xref: /openbmc/qemu/target/arm/tcg/translate-sve.c (revision 01dc65a3bc262ab1bec8fe89775e9bbfa627becb)
1  /*
2   * AArch64 SVE translation
3   *
4   * Copyright (c) 2018 Linaro, Ltd
5   *
6   * This library is free software; you can redistribute it and/or
7   * modify it under the terms of the GNU Lesser General Public
8   * License as published by the Free Software Foundation; either
9   * version 2.1 of the License, or (at your option) any later version.
10   *
11   * This library is distributed in the hope that it will be useful,
12   * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   * Lesser General Public License for more details.
15   *
16   * You should have received a copy of the GNU Lesser General Public
17   * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18   */
19  
20  #include "qemu/osdep.h"
21  #include "translate.h"
22  #include "translate-a64.h"
23  #include "fpu/softfloat.h"
24  
25  
26  typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
27                           TCGv_i64, uint32_t, uint32_t);
28  
29  typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
30                                       TCGv_ptr, TCGv_i32);
31  typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
32                                       TCGv_ptr, TCGv_ptr, TCGv_i32);
33  
34  typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
35  typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
36                                           TCGv_ptr, TCGv_i64, TCGv_i32);
37  
38  /*
39   * Helpers for extracting complex instruction fields.
40   */
41  
42  /* See e.g. ASR (immediate, predicated).
43   * Returns -1 for unallocated encoding; diagnose later.
44   */
tszimm_esz(DisasContext * s,int x)45  static int tszimm_esz(DisasContext *s, int x)
46  {
47      x >>= 3;  /* discard imm3 */
48      return 31 - clz32(x);
49  }
50  
tszimm_shr(DisasContext * s,int x)51  static int tszimm_shr(DisasContext *s, int x)
52  {
53      /*
54       * We won't use the tszimm_shr() value if tszimm_esz() returns -1 (the
55       * trans function will check for esz < 0), so we can return any
56       * value we like from here in that case as long as we avoid UB.
57       */
58      int esz = tszimm_esz(s, x);
59      if (esz < 0) {
60          return esz;
61      }
62      return (16 << esz) - x;
63  }
64  
65  /* See e.g. LSL (immediate, predicated).  */
tszimm_shl(DisasContext * s,int x)66  static int tszimm_shl(DisasContext *s, int x)
67  {
68      /* As with tszimm_shr(), value will be unused if esz < 0 */
69      int esz = tszimm_esz(s, x);
70      if (esz < 0) {
71          return esz;
72      }
73      return x - (8 << esz);
74  }
75  
76  /* The SH bit is in bit 8.  Extract the low 8 and shift.  */
expand_imm_sh8s(DisasContext * s,int x)77  static inline int expand_imm_sh8s(DisasContext *s, int x)
78  {
79      return (int8_t)x << (x & 0x100 ? 8 : 0);
80  }
81  
expand_imm_sh8u(DisasContext * s,int x)82  static inline int expand_imm_sh8u(DisasContext *s, int x)
83  {
84      return (uint8_t)x << (x & 0x100 ? 8 : 0);
85  }
86  
87  /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
88   * with unsigned data.  C.f. SVE Memory Contiguous Load Group.
89   */
msz_dtype(DisasContext * s,int msz)90  static inline int msz_dtype(DisasContext *s, int msz)
91  {
92      static const uint8_t dtype[4] = { 0, 5, 10, 15 };
93      return dtype[msz];
94  }
95  
96  /*
97   * Include the generated decoder.
98   */
99  
100  #include "decode-sve.c.inc"
101  
102  /*
103   * Implement all of the translator functions referenced by the decoder.
104   */
105  
106  /* Invoke an out-of-line helper on 2 Zregs. */
gen_gvec_ool_zz(DisasContext * s,gen_helper_gvec_2 * fn,int rd,int rn,int data)107  static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
108                              int rd, int rn, int data)
109  {
110      if (fn == NULL) {
111          return false;
112      }
113      if (sve_access_check(s)) {
114          unsigned vsz = vec_full_reg_size(s);
115          tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
116                             vec_full_reg_offset(s, rn),
117                             vsz, vsz, data, fn);
118      }
119      return true;
120  }
121  
gen_gvec_fpst_zz(DisasContext * s,gen_helper_gvec_2_ptr * fn,int rd,int rn,int data,ARMFPStatusFlavour flavour)122  static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
123                               int rd, int rn, int data,
124                               ARMFPStatusFlavour flavour)
125  {
126      if (fn == NULL) {
127          return false;
128      }
129      if (sve_access_check(s)) {
130          unsigned vsz = vec_full_reg_size(s);
131          TCGv_ptr status = fpstatus_ptr(flavour);
132  
133          tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
134                             vec_full_reg_offset(s, rn),
135                             status, vsz, vsz, data, fn);
136      }
137      return true;
138  }
139  
gen_gvec_fpst_arg_zz(DisasContext * s,gen_helper_gvec_2_ptr * fn,arg_rr_esz * a,int data)140  static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
141                                   arg_rr_esz *a, int data)
142  {
143      return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data,
144                              a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
145  }
146  
147  /* Invoke an out-of-line helper on 3 Zregs. */
gen_gvec_ool_zzz(DisasContext * s,gen_helper_gvec_3 * fn,int rd,int rn,int rm,int data)148  static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
149                               int rd, int rn, int rm, int data)
150  {
151      if (fn == NULL) {
152          return false;
153      }
154      if (sve_access_check(s)) {
155          unsigned vsz = vec_full_reg_size(s);
156          tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
157                             vec_full_reg_offset(s, rn),
158                             vec_full_reg_offset(s, rm),
159                             vsz, vsz, data, fn);
160      }
161      return true;
162  }
163  
gen_gvec_ool_arg_zzz(DisasContext * s,gen_helper_gvec_3 * fn,arg_rrr_esz * a,int data)164  static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
165                                   arg_rrr_esz *a, int data)
166  {
167      return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
168  }
169  
170  /* Invoke an out-of-line helper on 3 Zregs, plus float_status. */
gen_gvec_fpst_zzz(DisasContext * s,gen_helper_gvec_3_ptr * fn,int rd,int rn,int rm,int data,ARMFPStatusFlavour flavour)171  static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
172                                int rd, int rn, int rm,
173                                int data, ARMFPStatusFlavour flavour)
174  {
175      if (fn == NULL) {
176          return false;
177      }
178      if (sve_access_check(s)) {
179          unsigned vsz = vec_full_reg_size(s);
180          TCGv_ptr status = fpstatus_ptr(flavour);
181  
182          tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
183                             vec_full_reg_offset(s, rn),
184                             vec_full_reg_offset(s, rm),
185                             status, vsz, vsz, data, fn);
186      }
187      return true;
188  }
189  
gen_gvec_fpst_arg_zzz(DisasContext * s,gen_helper_gvec_3_ptr * fn,arg_rrr_esz * a,int data)190  static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
191                                    arg_rrr_esz *a, int data)
192  {
193      return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
194                               a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
195  }
196  
197  /* Invoke an out-of-line helper on 4 Zregs. */
gen_gvec_ool_zzzz(DisasContext * s,gen_helper_gvec_4 * fn,int rd,int rn,int rm,int ra,int data)198  static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
199                                int rd, int rn, int rm, int ra, int data)
200  {
201      if (fn == NULL) {
202          return false;
203      }
204      if (sve_access_check(s)) {
205          unsigned vsz = vec_full_reg_size(s);
206          tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
207                             vec_full_reg_offset(s, rn),
208                             vec_full_reg_offset(s, rm),
209                             vec_full_reg_offset(s, ra),
210                             vsz, vsz, data, fn);
211      }
212      return true;
213  }
214  
gen_gvec_ool_arg_zzzz(DisasContext * s,gen_helper_gvec_4 * fn,arg_rrrr_esz * a,int data)215  static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
216                                    arg_rrrr_esz *a, int data)
217  {
218      return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
219  }
220  
gen_gvec_ool_arg_zzxz(DisasContext * s,gen_helper_gvec_4 * fn,arg_rrxr_esz * a)221  static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn,
222                                    arg_rrxr_esz *a)
223  {
224      return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
225  }
226  
227  /* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */
gen_gvec_ptr_zzzz(DisasContext * s,gen_helper_gvec_4_ptr * fn,int rd,int rn,int rm,int ra,int data,TCGv_ptr ptr)228  static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
229                                int rd, int rn, int rm, int ra,
230                                int data, TCGv_ptr ptr)
231  {
232      if (fn == NULL) {
233          return false;
234      }
235      if (sve_access_check(s)) {
236          unsigned vsz = vec_full_reg_size(s);
237          tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
238                             vec_full_reg_offset(s, rn),
239                             vec_full_reg_offset(s, rm),
240                             vec_full_reg_offset(s, ra),
241                             ptr, vsz, vsz, data, fn);
242      }
243      return true;
244  }
245  
gen_gvec_fpst_zzzz(DisasContext * s,gen_helper_gvec_4_ptr * fn,int rd,int rn,int rm,int ra,int data,ARMFPStatusFlavour flavour)246  static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
247                                 int rd, int rn, int rm, int ra,
248                                 int data, ARMFPStatusFlavour flavour)
249  {
250      TCGv_ptr status = fpstatus_ptr(flavour);
251      bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status);
252      return ret;
253  }
254  
gen_gvec_env_zzzz(DisasContext * s,gen_helper_gvec_4_ptr * fn,int rd,int rn,int rm,int ra,int data)255  static bool gen_gvec_env_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
256                                int rd, int rn, int rm, int ra,
257                                int data)
258  {
259      return gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, tcg_env);
260  }
261  
gen_gvec_env_arg_zzzz(DisasContext * s,gen_helper_gvec_4_ptr * fn,arg_rrrr_esz * a,int data)262  static bool gen_gvec_env_arg_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
263                                    arg_rrrr_esz *a, int data)
264  {
265      return gen_gvec_env_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
266  }
267  
gen_gvec_env_arg_zzxz(DisasContext * s,gen_helper_gvec_4_ptr * fn,arg_rrxr_esz * a)268  static bool gen_gvec_env_arg_zzxz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
269                                    arg_rrxr_esz *a)
270  {
271      return gen_gvec_env_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
272  }
273  
274  /* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */
gen_gvec_fpst_zzzzp(DisasContext * s,gen_helper_gvec_5_ptr * fn,int rd,int rn,int rm,int ra,int pg,int data,ARMFPStatusFlavour flavour)275  static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn,
276                                  int rd, int rn, int rm, int ra, int pg,
277                                  int data, ARMFPStatusFlavour flavour)
278  {
279      if (fn == NULL) {
280          return false;
281      }
282      if (sve_access_check(s)) {
283          unsigned vsz = vec_full_reg_size(s);
284          TCGv_ptr status = fpstatus_ptr(flavour);
285  
286          tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd),
287                             vec_full_reg_offset(s, rn),
288                             vec_full_reg_offset(s, rm),
289                             vec_full_reg_offset(s, ra),
290                             pred_full_reg_offset(s, pg),
291                             status, vsz, vsz, data, fn);
292      }
293      return true;
294  }
295  
296  /* Invoke an out-of-line helper on 2 Zregs and a predicate. */
gen_gvec_ool_zzp(DisasContext * s,gen_helper_gvec_3 * fn,int rd,int rn,int pg,int data)297  static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
298                               int rd, int rn, int pg, int data)
299  {
300      if (fn == NULL) {
301          return false;
302      }
303      if (sve_access_check(s)) {
304          unsigned vsz = vec_full_reg_size(s);
305          tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
306                             vec_full_reg_offset(s, rn),
307                             pred_full_reg_offset(s, pg),
308                             vsz, vsz, data, fn);
309      }
310      return true;
311  }
312  
gen_gvec_ool_arg_zpz(DisasContext * s,gen_helper_gvec_3 * fn,arg_rpr_esz * a,int data)313  static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn,
314                                   arg_rpr_esz *a, int data)
315  {
316      return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data);
317  }
318  
gen_gvec_ool_arg_zpzi(DisasContext * s,gen_helper_gvec_3 * fn,arg_rpri_esz * a)319  static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn,
320                                    arg_rpri_esz *a)
321  {
322      return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
323  }
324  
gen_gvec_fpst_zzp(DisasContext * s,gen_helper_gvec_3_ptr * fn,int rd,int rn,int pg,int data,ARMFPStatusFlavour flavour)325  static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn,
326                                int rd, int rn, int pg, int data,
327                                ARMFPStatusFlavour flavour)
328  {
329      if (fn == NULL) {
330          return false;
331      }
332      if (sve_access_check(s)) {
333          unsigned vsz = vec_full_reg_size(s);
334          TCGv_ptr status = fpstatus_ptr(flavour);
335  
336          tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
337                             vec_full_reg_offset(s, rn),
338                             pred_full_reg_offset(s, pg),
339                             status, vsz, vsz, data, fn);
340      }
341      return true;
342  }
343  
gen_gvec_fpst_arg_zpz(DisasContext * s,gen_helper_gvec_3_ptr * fn,arg_rpr_esz * a,int data,ARMFPStatusFlavour flavour)344  static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
345                                    arg_rpr_esz *a, int data,
346                                    ARMFPStatusFlavour flavour)
347  {
348      return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour);
349  }
350  
351  /* Invoke an out-of-line helper on 3 Zregs and a predicate. */
gen_gvec_ool_zzzp(DisasContext * s,gen_helper_gvec_4 * fn,int rd,int rn,int rm,int pg,int data)352  static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
353                                int rd, int rn, int rm, int pg, int data)
354  {
355      if (fn == NULL) {
356          return false;
357      }
358      if (sve_access_check(s)) {
359          unsigned vsz = vec_full_reg_size(s);
360          tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
361                             vec_full_reg_offset(s, rn),
362                             vec_full_reg_offset(s, rm),
363                             pred_full_reg_offset(s, pg),
364                             vsz, vsz, data, fn);
365      }
366      return true;
367  }
368  
gen_gvec_ool_arg_zpzz(DisasContext * s,gen_helper_gvec_4 * fn,arg_rprr_esz * a,int data)369  static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn,
370                                    arg_rprr_esz *a, int data)
371  {
372      return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data);
373  }
374  
375  /* Invoke an out-of-line helper on 3 Zregs and a predicate. */
gen_gvec_fpst_zzzp(DisasContext * s,gen_helper_gvec_4_ptr * fn,int rd,int rn,int rm,int pg,int data,ARMFPStatusFlavour flavour)376  static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn,
377                                 int rd, int rn, int rm, int pg, int data,
378                                 ARMFPStatusFlavour flavour)
379  {
380      if (fn == NULL) {
381          return false;
382      }
383      if (sve_access_check(s)) {
384          unsigned vsz = vec_full_reg_size(s);
385          TCGv_ptr status = fpstatus_ptr(flavour);
386  
387          tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
388                             vec_full_reg_offset(s, rn),
389                             vec_full_reg_offset(s, rm),
390                             pred_full_reg_offset(s, pg),
391                             status, vsz, vsz, data, fn);
392      }
393      return true;
394  }
395  
gen_gvec_fpst_arg_zpzz(DisasContext * s,gen_helper_gvec_4_ptr * fn,arg_rprr_esz * a)396  static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
397                                     arg_rprr_esz *a)
398  {
399      return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0,
400                                a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
401  }
402  
403  /* Invoke a vector expander on two Zregs and an immediate.  */
gen_gvec_fn_zzi(DisasContext * s,GVecGen2iFn * gvec_fn,int esz,int rd,int rn,uint64_t imm)404  static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
405                              int esz, int rd, int rn, uint64_t imm)
406  {
407      if (gvec_fn == NULL) {
408          return false;
409      }
410      if (sve_access_check(s)) {
411          unsigned vsz = vec_full_reg_size(s);
412          gvec_fn(esz, vec_full_reg_offset(s, rd),
413                  vec_full_reg_offset(s, rn), imm, vsz, vsz);
414      }
415      return true;
416  }
417  
gen_gvec_fn_arg_zzi(DisasContext * s,GVecGen2iFn * gvec_fn,arg_rri_esz * a)418  static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
419                                  arg_rri_esz *a)
420  {
421      if (a->esz < 0) {
422          /* Invalid tsz encoding -- see tszimm_esz. */
423          return false;
424      }
425      return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm);
426  }
427  
428  /* Invoke a vector expander on three Zregs.  */
gen_gvec_fn_zzz(DisasContext * s,GVecGen3Fn * gvec_fn,int esz,int rd,int rn,int rm)429  static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
430                              int esz, int rd, int rn, int rm)
431  {
432      if (gvec_fn == NULL) {
433          return false;
434      }
435      if (sve_access_check(s)) {
436          unsigned vsz = vec_full_reg_size(s);
437          gvec_fn(esz, vec_full_reg_offset(s, rd),
438                  vec_full_reg_offset(s, rn),
439                  vec_full_reg_offset(s, rm), vsz, vsz);
440      }
441      return true;
442  }
443  
gen_gvec_fn_arg_zzz(DisasContext * s,GVecGen3Fn * fn,arg_rrr_esz * a)444  static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn,
445                                  arg_rrr_esz *a)
446  {
447      return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
448  }
449  
450  /* Invoke a vector expander on four Zregs.  */
gen_gvec_fn_arg_zzzz(DisasContext * s,GVecGen4Fn * gvec_fn,arg_rrrr_esz * a)451  static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
452                                   arg_rrrr_esz *a)
453  {
454      if (gvec_fn == NULL) {
455          return false;
456      }
457      if (sve_access_check(s)) {
458          unsigned vsz = vec_full_reg_size(s);
459          gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
460                  vec_full_reg_offset(s, a->rn),
461                  vec_full_reg_offset(s, a->rm),
462                  vec_full_reg_offset(s, a->ra), vsz, vsz);
463      }
464      return true;
465  }
466  
467  /* Invoke a vector move on two Zregs.  */
do_mov_z(DisasContext * s,int rd,int rn)468  static bool do_mov_z(DisasContext *s, int rd, int rn)
469  {
470      if (sve_access_check(s)) {
471          unsigned vsz = vec_full_reg_size(s);
472          tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd),
473                           vec_full_reg_offset(s, rn), vsz, vsz);
474      }
475      return true;
476  }
477  
478  /* Initialize a Zreg with replications of a 64-bit immediate.  */
do_dupi_z(DisasContext * s,int rd,uint64_t word)479  static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
480  {
481      unsigned vsz = vec_full_reg_size(s);
482      tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
483  }
484  
485  /* Invoke a vector expander on three Pregs.  */
gen_gvec_fn_ppp(DisasContext * s,GVecGen3Fn * gvec_fn,int rd,int rn,int rm)486  static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
487                              int rd, int rn, int rm)
488  {
489      if (sve_access_check(s)) {
490          unsigned psz = pred_gvec_reg_size(s);
491          gvec_fn(MO_64, pred_full_reg_offset(s, rd),
492                  pred_full_reg_offset(s, rn),
493                  pred_full_reg_offset(s, rm), psz, psz);
494      }
495      return true;
496  }
497  
498  /* Invoke a vector move on two Pregs.  */
do_mov_p(DisasContext * s,int rd,int rn)499  static bool do_mov_p(DisasContext *s, int rd, int rn)
500  {
501      if (sve_access_check(s)) {
502          unsigned psz = pred_gvec_reg_size(s);
503          tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
504                           pred_full_reg_offset(s, rn), psz, psz);
505      }
506      return true;
507  }
508  
509  /* Set the cpu flags as per a return from an SVE helper.  */
do_pred_flags(TCGv_i32 t)510  static void do_pred_flags(TCGv_i32 t)
511  {
512      tcg_gen_mov_i32(cpu_NF, t);
513      tcg_gen_andi_i32(cpu_ZF, t, 2);
514      tcg_gen_andi_i32(cpu_CF, t, 1);
515      tcg_gen_movi_i32(cpu_VF, 0);
516  }
517  
518  /* Subroutines computing the ARM PredTest psuedofunction.  */
do_predtest1(TCGv_i64 d,TCGv_i64 g)519  static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
520  {
521      TCGv_i32 t = tcg_temp_new_i32();
522  
523      gen_helper_sve_predtest1(t, d, g);
524      do_pred_flags(t);
525  }
526  
do_predtest(DisasContext * s,int dofs,int gofs,int words)527  static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
528  {
529      TCGv_ptr dptr = tcg_temp_new_ptr();
530      TCGv_ptr gptr = tcg_temp_new_ptr();
531      TCGv_i32 t = tcg_temp_new_i32();
532  
533      tcg_gen_addi_ptr(dptr, tcg_env, dofs);
534      tcg_gen_addi_ptr(gptr, tcg_env, gofs);
535  
536      gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words));
537  
538      do_pred_flags(t);
539  }
540  
541  /* For each element size, the bits within a predicate word that are active.  */
542  const uint64_t pred_esz_masks[5] = {
543      0xffffffffffffffffull, 0x5555555555555555ull,
544      0x1111111111111111ull, 0x0101010101010101ull,
545      0x0001000100010001ull,
546  };
547  
trans_INVALID(DisasContext * s,arg_INVALID * a)548  static bool trans_INVALID(DisasContext *s, arg_INVALID *a)
549  {
550      unallocated_encoding(s);
551      return true;
552  }
553  
554  /*
555   *** SVE Logical - Unpredicated Group
556   */
557  
TRANS_FEAT(AND_zzz,aa64_sve,gen_gvec_fn_arg_zzz,tcg_gen_gvec_and,a)558  TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a)
559  TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a)
560  TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a)
561  TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a)
562  
563  static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
564  {
565      if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
566          return false;
567      }
568      if (sve_access_check(s)) {
569          unsigned vsz = vec_full_reg_size(s);
570          gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
571                       vec_full_reg_offset(s, a->rn),
572                       vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
573      }
574      return true;
575  }
576  
TRANS_FEAT(EOR3,aa64_sve2,gen_gvec_fn_arg_zzzz,gen_gvec_eor3,a)577  TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_eor3, a)
578  TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_bcax, a)
579  
580  static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
581                      uint32_t a, uint32_t oprsz, uint32_t maxsz)
582  {
583      /* BSL differs from the generic bitsel in argument ordering. */
584      tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
585  }
586  
TRANS_FEAT(BSL,aa64_sve2,gen_gvec_fn_arg_zzzz,gen_bsl,a)587  TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a)
588  
589  static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
590  {
591      tcg_gen_andc_i64(n, k, n);
592      tcg_gen_andc_i64(m, m, k);
593      tcg_gen_or_i64(d, n, m);
594  }
595  
gen_bsl1n_vec(unsigned vece,TCGv_vec d,TCGv_vec n,TCGv_vec m,TCGv_vec k)596  static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
597                            TCGv_vec m, TCGv_vec k)
598  {
599      if (TCG_TARGET_HAS_bitsel_vec) {
600          tcg_gen_not_vec(vece, n, n);
601          tcg_gen_bitsel_vec(vece, d, k, n, m);
602      } else {
603          tcg_gen_andc_vec(vece, n, k, n);
604          tcg_gen_andc_vec(vece, m, m, k);
605          tcg_gen_or_vec(vece, d, n, m);
606      }
607  }
608  
gen_bsl1n(unsigned vece,uint32_t d,uint32_t n,uint32_t m,uint32_t a,uint32_t oprsz,uint32_t maxsz)609  static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
610                        uint32_t a, uint32_t oprsz, uint32_t maxsz)
611  {
612      static const GVecGen4 op = {
613          .fni8 = gen_bsl1n_i64,
614          .fniv = gen_bsl1n_vec,
615          .fno = gen_helper_sve2_bsl1n,
616          .vece = MO_64,
617          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
618      };
619      tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
620  }
621  
TRANS_FEAT(BSL1N,aa64_sve2,gen_gvec_fn_arg_zzzz,gen_bsl1n,a)622  TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a)
623  
624  static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
625  {
626      /*
627       * Z[dn] = (n & k) | (~m & ~k)
628       *       =         | ~(m | k)
629       */
630      tcg_gen_and_i64(n, n, k);
631      if (TCG_TARGET_HAS_orc_i64) {
632          tcg_gen_or_i64(m, m, k);
633          tcg_gen_orc_i64(d, n, m);
634      } else {
635          tcg_gen_nor_i64(m, m, k);
636          tcg_gen_or_i64(d, n, m);
637      }
638  }
639  
gen_bsl2n_vec(unsigned vece,TCGv_vec d,TCGv_vec n,TCGv_vec m,TCGv_vec k)640  static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
641                            TCGv_vec m, TCGv_vec k)
642  {
643      if (TCG_TARGET_HAS_bitsel_vec) {
644          tcg_gen_not_vec(vece, m, m);
645          tcg_gen_bitsel_vec(vece, d, k, n, m);
646      } else {
647          tcg_gen_and_vec(vece, n, n, k);
648          tcg_gen_or_vec(vece, m, m, k);
649          tcg_gen_orc_vec(vece, d, n, m);
650      }
651  }
652  
gen_bsl2n(unsigned vece,uint32_t d,uint32_t n,uint32_t m,uint32_t a,uint32_t oprsz,uint32_t maxsz)653  static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
654                        uint32_t a, uint32_t oprsz, uint32_t maxsz)
655  {
656      static const GVecGen4 op = {
657          .fni8 = gen_bsl2n_i64,
658          .fniv = gen_bsl2n_vec,
659          .fno = gen_helper_sve2_bsl2n,
660          .vece = MO_64,
661          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
662      };
663      tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
664  }
665  
TRANS_FEAT(BSL2N,aa64_sve2,gen_gvec_fn_arg_zzzz,gen_bsl2n,a)666  TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a)
667  
668  static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
669  {
670      tcg_gen_and_i64(n, n, k);
671      tcg_gen_andc_i64(m, m, k);
672      tcg_gen_nor_i64(d, n, m);
673  }
674  
gen_nbsl_vec(unsigned vece,TCGv_vec d,TCGv_vec n,TCGv_vec m,TCGv_vec k)675  static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
676                            TCGv_vec m, TCGv_vec k)
677  {
678      tcg_gen_bitsel_vec(vece, d, k, n, m);
679      tcg_gen_not_vec(vece, d, d);
680  }
681  
gen_nbsl(unsigned vece,uint32_t d,uint32_t n,uint32_t m,uint32_t a,uint32_t oprsz,uint32_t maxsz)682  static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
683                       uint32_t a, uint32_t oprsz, uint32_t maxsz)
684  {
685      static const GVecGen4 op = {
686          .fni8 = gen_nbsl_i64,
687          .fniv = gen_nbsl_vec,
688          .fno = gen_helper_sve2_nbsl,
689          .vece = MO_64,
690          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
691      };
692      tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
693  }
694  
TRANS_FEAT(NBSL,aa64_sve2,gen_gvec_fn_arg_zzzz,gen_nbsl,a)695  TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a)
696  
697  /*
698   *** SVE Integer Arithmetic - Unpredicated Group
699   */
700  
701  TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a)
702  TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a)
703  TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a)
704  TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a)
705  TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a)
706  TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a)
707  
708  /*
709   *** SVE Integer Arithmetic - Binary Predicated Group
710   */
711  
712  /* Select active elememnts from Zn and inactive elements from Zm,
713   * storing the result in Zd.
714   */
715  static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
716  {
717      static gen_helper_gvec_4 * const fns[4] = {
718          gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
719          gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
720      };
721      return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
722  }
723  
724  #define DO_ZPZZ(NAME, FEAT, name) \
725      static gen_helper_gvec_4 * const name##_zpzz_fns[4] = {               \
726          gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h,           \
727          gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d,           \
728      };                                                                    \
729      TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz,                         \
730                 name##_zpzz_fns[a->esz], a, 0)
731  
732  DO_ZPZZ(AND_zpzz, aa64_sve, sve_and)
733  DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor)
734  DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr)
735  DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic)
736  
737  DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add)
738  DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub)
739  
740  DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax)
741  DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax)
742  DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin)
743  DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin)
744  DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd)
745  DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd)
746  
747  DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul)
748  DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh)
749  DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh)
750  
751  DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr)
752  DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr)
753  DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl)
754  
755  static gen_helper_gvec_4 * const sdiv_fns[4] = {
756      NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
757  };
758  TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0)
759  
760  static gen_helper_gvec_4 * const udiv_fns[4] = {
761      NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
762  };
763  TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0)
764  
765  TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz)
766  
767  /*
768   *** SVE Integer Arithmetic - Unary Predicated Group
769   */
770  
771  #define DO_ZPZ(NAME, FEAT, name) \
772      static gen_helper_gvec_3 * const name##_fns[4] = {              \
773          gen_helper_##name##_b, gen_helper_##name##_h,               \
774          gen_helper_##name##_s, gen_helper_##name##_d,               \
775      };                                                              \
776      TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0)
777  
778  DO_ZPZ(CLS, aa64_sve, sve_cls)
779  DO_ZPZ(CLZ, aa64_sve, sve_clz)
780  DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz)
781  DO_ZPZ(CNOT, aa64_sve, sve_cnot)
782  DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz)
783  DO_ZPZ(ABS, aa64_sve, sve_abs)
784  DO_ZPZ(NEG, aa64_sve, sve_neg)
785  DO_ZPZ(RBIT, aa64_sve, sve_rbit)
786  
787  static gen_helper_gvec_3 * const fabs_fns[4] = {
788      NULL,                  gen_helper_sve_fabs_h,
789      gen_helper_sve_fabs_s, gen_helper_sve_fabs_d,
790  };
791  TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0)
792  
793  static gen_helper_gvec_3 * const fneg_fns[4] = {
794      NULL,                  gen_helper_sve_fneg_h,
795      gen_helper_sve_fneg_s, gen_helper_sve_fneg_d,
796  };
797  TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0)
798  
799  static gen_helper_gvec_3 * const sxtb_fns[4] = {
800      NULL,                  gen_helper_sve_sxtb_h,
801      gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d,
802  };
803  TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0)
804  
805  static gen_helper_gvec_3 * const uxtb_fns[4] = {
806      NULL,                  gen_helper_sve_uxtb_h,
807      gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d,
808  };
809  TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0)
810  
811  static gen_helper_gvec_3 * const sxth_fns[4] = {
812      NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d
813  };
814  TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0)
815  
816  static gen_helper_gvec_3 * const uxth_fns[4] = {
817      NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d
818  };
819  TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0)
820  
821  TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz,
822             a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0)
823  TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz,
824             a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0)
825  
826  /*
827   *** SVE Integer Reduction Group
828   */
829  
830  typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
do_vpz_ool(DisasContext * s,arg_rpr_esz * a,gen_helper_gvec_reduc * fn)831  static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
832                         gen_helper_gvec_reduc *fn)
833  {
834      unsigned vsz = vec_full_reg_size(s);
835      TCGv_ptr t_zn, t_pg;
836      TCGv_i32 desc;
837      TCGv_i64 temp;
838  
839      if (fn == NULL) {
840          return false;
841      }
842      if (!sve_access_check(s)) {
843          return true;
844      }
845  
846      desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
847      temp = tcg_temp_new_i64();
848      t_zn = tcg_temp_new_ptr();
849      t_pg = tcg_temp_new_ptr();
850  
851      tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn));
852      tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
853      fn(temp, t_zn, t_pg, desc);
854  
855      write_fp_dreg(s, a->rd, temp);
856      return true;
857  }
858  
859  #define DO_VPZ(NAME, name) \
860      static gen_helper_gvec_reduc * const name##_fns[4] = {               \
861          gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
862          gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
863      };                                                                   \
864      TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz])
865  
866  DO_VPZ(ORV, orv)
867  DO_VPZ(ANDV, andv)
868  DO_VPZ(EORV, eorv)
869  
870  DO_VPZ(UADDV, uaddv)
871  DO_VPZ(SMAXV, smaxv)
872  DO_VPZ(UMAXV, umaxv)
873  DO_VPZ(SMINV, sminv)
874  DO_VPZ(UMINV, uminv)
875  
876  static gen_helper_gvec_reduc * const saddv_fns[4] = {
877      gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
878      gen_helper_sve_saddv_s, NULL
879  };
TRANS_FEAT(SADDV,aa64_sve,do_vpz_ool,a,saddv_fns[a->esz])880  TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz])
881  
882  #undef DO_VPZ
883  
884  /*
885   *** SVE Shift by Immediate - Predicated Group
886   */
887  
888  /*
889   * Copy Zn into Zd, storing zeros into inactive elements.
890   * If invert, store zeros into the active elements.
891   */
892  static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
893                          int esz, bool invert)
894  {
895      static gen_helper_gvec_3 * const fns[4] = {
896          gen_helper_sve_movz_b, gen_helper_sve_movz_h,
897          gen_helper_sve_movz_s, gen_helper_sve_movz_d,
898      };
899      return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
900  }
901  
do_shift_zpzi(DisasContext * s,arg_rpri_esz * a,bool asr,gen_helper_gvec_3 * const fns[4])902  static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr,
903                            gen_helper_gvec_3 * const fns[4])
904  {
905      int max;
906  
907      if (a->esz < 0) {
908          /* Invalid tsz encoding -- see tszimm_esz. */
909          return false;
910      }
911  
912      /*
913       * Shift by element size is architecturally valid.
914       * For arithmetic right-shift, it's the same as by one less.
915       * For logical shifts and ASRD, it is a zeroing operation.
916       */
917      max = 8 << a->esz;
918      if (a->imm >= max) {
919          if (asr) {
920              a->imm = max - 1;
921          } else {
922              return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
923          }
924      }
925      return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
926  }
927  
928  static gen_helper_gvec_3 * const asr_zpzi_fns[4] = {
929      gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
930      gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
931  };
932  TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns)
933  
934  static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = {
935      gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
936      gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
937  };
938  TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns)
939  
940  static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = {
941      gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
942      gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
943  };
944  TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns)
945  
946  static gen_helper_gvec_3 * const asrd_fns[4] = {
947      gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
948      gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
949  };
950  TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns)
951  
952  static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = {
953      gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
954      gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
955  };
956  TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
957             a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a)
958  
959  static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = {
960      gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
961      gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
962  };
963  TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
964             a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a)
965  
966  static gen_helper_gvec_3 * const srshr_fns[4] = {
967      gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
968      gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
969  };
970  TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
971             a->esz < 0 ? NULL : srshr_fns[a->esz], a)
972  
973  static gen_helper_gvec_3 * const urshr_fns[4] = {
974      gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
975      gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
976  };
977  TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
978             a->esz < 0 ? NULL : urshr_fns[a->esz], a)
979  
980  static gen_helper_gvec_3 * const sqshlu_fns[4] = {
981      gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
982      gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
983  };
984  TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi,
985             a->esz < 0 ? NULL : sqshlu_fns[a->esz], a)
986  
987  /*
988   *** SVE Bitwise Shift - Predicated Group
989   */
990  
991  #define DO_ZPZW(NAME, name) \
992      static gen_helper_gvec_4 * const name##_zpzw_fns[4] = {               \
993          gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
994          gen_helper_sve_##name##_zpzw_s, NULL                              \
995      };                                                                    \
996      TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz,              \
997                 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0)
998  
DO_ZPZW(ASR,asr)999  DO_ZPZW(ASR, asr)
1000  DO_ZPZW(LSR, lsr)
1001  DO_ZPZW(LSL, lsl)
1002  
1003  #undef DO_ZPZW
1004  
1005  /*
1006   *** SVE Bitwise Shift - Unpredicated Group
1007   */
1008  
1009  static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
1010                           void (*gvec_fn)(unsigned, uint32_t, uint32_t,
1011                                           int64_t, uint32_t, uint32_t))
1012  {
1013      if (a->esz < 0) {
1014          /* Invalid tsz encoding -- see tszimm_esz. */
1015          return false;
1016      }
1017      if (sve_access_check(s)) {
1018          unsigned vsz = vec_full_reg_size(s);
1019          /* Shift by element size is architecturally valid.  For
1020             arithmetic right-shift, it's the same as by one less.
1021             Otherwise it is a zeroing operation.  */
1022          if (a->imm >= 8 << a->esz) {
1023              if (asr) {
1024                  a->imm = (8 << a->esz) - 1;
1025              } else {
1026                  do_dupi_z(s, a->rd, 0);
1027                  return true;
1028              }
1029          }
1030          gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
1031                  vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
1032      }
1033      return true;
1034  }
1035  
TRANS_FEAT(ASR_zzi,aa64_sve,do_shift_imm,a,true,tcg_gen_gvec_sari)1036  TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari)
1037  TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri)
1038  TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli)
1039  
1040  #define DO_ZZW(NAME, name) \
1041      static gen_helper_gvec_3 * const name##_zzw_fns[4] = {                \
1042          gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
1043          gen_helper_sve_##name##_zzw_s, NULL                               \
1044      };                                                                    \
1045      TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz,                      \
1046                 name##_zzw_fns[a->esz], a, 0)
1047  
1048  DO_ZZW(ASR_zzw, asr)
1049  DO_ZZW(LSR_zzw, lsr)
1050  DO_ZZW(LSL_zzw, lsl)
1051  
1052  #undef DO_ZZW
1053  
1054  /*
1055   *** SVE Integer Multiply-Add Group
1056   */
1057  
1058  static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1059                           gen_helper_gvec_5 *fn)
1060  {
1061      if (sve_access_check(s)) {
1062          unsigned vsz = vec_full_reg_size(s);
1063          tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1064                             vec_full_reg_offset(s, a->ra),
1065                             vec_full_reg_offset(s, a->rn),
1066                             vec_full_reg_offset(s, a->rm),
1067                             pred_full_reg_offset(s, a->pg),
1068                             vsz, vsz, 0, fn);
1069      }
1070      return true;
1071  }
1072  
1073  static gen_helper_gvec_5 * const mla_fns[4] = {
1074      gen_helper_sve_mla_b, gen_helper_sve_mla_h,
1075      gen_helper_sve_mla_s, gen_helper_sve_mla_d,
1076  };
1077  TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz])
1078  
1079  static gen_helper_gvec_5 * const mls_fns[4] = {
1080      gen_helper_sve_mls_b, gen_helper_sve_mls_h,
1081      gen_helper_sve_mls_s, gen_helper_sve_mls_d,
1082  };
TRANS_FEAT(MLS,aa64_sve,do_zpzzz_ool,a,mls_fns[a->esz])1083  TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz])
1084  
1085  /*
1086   *** SVE Index Generation Group
1087   */
1088  
1089  static bool do_index(DisasContext *s, int esz, int rd,
1090                       TCGv_i64 start, TCGv_i64 incr)
1091  {
1092      unsigned vsz;
1093      TCGv_i32 desc;
1094      TCGv_ptr t_zd;
1095  
1096      if (!sve_access_check(s)) {
1097          return true;
1098      }
1099  
1100      vsz = vec_full_reg_size(s);
1101      desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
1102      t_zd = tcg_temp_new_ptr();
1103  
1104      tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd));
1105      if (esz == 3) {
1106          gen_helper_sve_index_d(t_zd, start, incr, desc);
1107      } else {
1108          typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1109          static index_fn * const fns[3] = {
1110              gen_helper_sve_index_b,
1111              gen_helper_sve_index_h,
1112              gen_helper_sve_index_s,
1113          };
1114          TCGv_i32 s32 = tcg_temp_new_i32();
1115          TCGv_i32 i32 = tcg_temp_new_i32();
1116  
1117          tcg_gen_extrl_i64_i32(s32, start);
1118          tcg_gen_extrl_i64_i32(i32, incr);
1119          fns[esz](t_zd, s32, i32, desc);
1120      }
1121      return true;
1122  }
1123  
1124  TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd,
1125             tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2))
1126  TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd,
1127             tcg_constant_i64(a->imm), cpu_reg(s, a->rm))
1128  TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd,
1129             cpu_reg(s, a->rn), tcg_constant_i64(a->imm))
1130  TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd,
1131             cpu_reg(s, a->rn), cpu_reg(s, a->rm))
1132  
1133  /*
1134   *** SVE Stack Allocation Group
1135   */
1136  
trans_ADDVL(DisasContext * s,arg_ADDVL * a)1137  static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
1138  {
1139      if (!dc_isar_feature(aa64_sve, s)) {
1140          return false;
1141      }
1142      if (sve_access_check(s)) {
1143          TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1144          TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1145          tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1146      }
1147      return true;
1148  }
1149  
trans_ADDSVL(DisasContext * s,arg_ADDSVL * a)1150  static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a)
1151  {
1152      if (!dc_isar_feature(aa64_sme, s)) {
1153          return false;
1154      }
1155      if (sme_enabled_check(s)) {
1156          TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1157          TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1158          tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s));
1159      }
1160      return true;
1161  }
1162  
trans_ADDPL(DisasContext * s,arg_ADDPL * a)1163  static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
1164  {
1165      if (!dc_isar_feature(aa64_sve, s)) {
1166          return false;
1167      }
1168      if (sve_access_check(s)) {
1169          TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1170          TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1171          tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1172      }
1173      return true;
1174  }
1175  
trans_ADDSPL(DisasContext * s,arg_ADDSPL * a)1176  static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a)
1177  {
1178      if (!dc_isar_feature(aa64_sme, s)) {
1179          return false;
1180      }
1181      if (sme_enabled_check(s)) {
1182          TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1183          TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1184          tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s));
1185      }
1186      return true;
1187  }
1188  
trans_RDVL(DisasContext * s,arg_RDVL * a)1189  static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
1190  {
1191      if (!dc_isar_feature(aa64_sve, s)) {
1192          return false;
1193      }
1194      if (sve_access_check(s)) {
1195          TCGv_i64 reg = cpu_reg(s, a->rd);
1196          tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1197      }
1198      return true;
1199  }
1200  
trans_RDSVL(DisasContext * s,arg_RDSVL * a)1201  static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a)
1202  {
1203      if (!dc_isar_feature(aa64_sme, s)) {
1204          return false;
1205      }
1206      if (sme_enabled_check(s)) {
1207          TCGv_i64 reg = cpu_reg(s, a->rd);
1208          tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s));
1209      }
1210      return true;
1211  }
1212  
1213  /*
1214   *** SVE Compute Vector Address Group
1215   */
1216  
do_adr(DisasContext * s,arg_rrri * a,gen_helper_gvec_3 * fn)1217  static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1218  {
1219      return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
1220  }
1221  
1222  TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
1223  TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
1224  TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
1225  TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
1226  
1227  /*
1228   *** SVE Integer Misc - Unpredicated Group
1229   */
1230  
1231  static gen_helper_gvec_2 * const fexpa_fns[4] = {
1232      NULL,                   gen_helper_sve_fexpa_h,
1233      gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
1234  };
1235  TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz,
1236                          fexpa_fns[a->esz], a->rd, a->rn, 0)
1237  
1238  static gen_helper_gvec_3 * const ftssel_fns[4] = {
1239      NULL,                    gen_helper_sve_ftssel_h,
1240      gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
1241  };
1242  TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz,
1243                          ftssel_fns[a->esz], a, 0)
1244  
1245  /*
1246   *** SVE Predicate Logical Operations Group
1247   */
1248  
do_pppp_flags(DisasContext * s,arg_rprr_s * a,const GVecGen4 * gvec_op)1249  static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1250                            const GVecGen4 *gvec_op)
1251  {
1252      if (!sve_access_check(s)) {
1253          return true;
1254      }
1255  
1256      unsigned psz = pred_gvec_reg_size(s);
1257      int dofs = pred_full_reg_offset(s, a->rd);
1258      int nofs = pred_full_reg_offset(s, a->rn);
1259      int mofs = pred_full_reg_offset(s, a->rm);
1260      int gofs = pred_full_reg_offset(s, a->pg);
1261  
1262      if (!a->s) {
1263          tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1264          return true;
1265      }
1266  
1267      if (psz == 8) {
1268          /* Do the operation and the flags generation in temps.  */
1269          TCGv_i64 pd = tcg_temp_new_i64();
1270          TCGv_i64 pn = tcg_temp_new_i64();
1271          TCGv_i64 pm = tcg_temp_new_i64();
1272          TCGv_i64 pg = tcg_temp_new_i64();
1273  
1274          tcg_gen_ld_i64(pn, tcg_env, nofs);
1275          tcg_gen_ld_i64(pm, tcg_env, mofs);
1276          tcg_gen_ld_i64(pg, tcg_env, gofs);
1277  
1278          gvec_op->fni8(pd, pn, pm, pg);
1279          tcg_gen_st_i64(pd, tcg_env, dofs);
1280  
1281          do_predtest1(pd, pg);
1282      } else {
1283          /* The operation and flags generation is large.  The computation
1284           * of the flags depends on the original contents of the guarding
1285           * predicate.  If the destination overwrites the guarding predicate,
1286           * then the easiest way to get this right is to save a copy.
1287            */
1288          int tofs = gofs;
1289          if (a->rd == a->pg) {
1290              tofs = offsetof(CPUARMState, vfp.preg_tmp);
1291              tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1292          }
1293  
1294          tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1295          do_predtest(s, dofs, tofs, psz / 8);
1296      }
1297      return true;
1298  }
1299  
gen_and_pg_i64(TCGv_i64 pd,TCGv_i64 pn,TCGv_i64 pm,TCGv_i64 pg)1300  static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1301  {
1302      tcg_gen_and_i64(pd, pn, pm);
1303      tcg_gen_and_i64(pd, pd, pg);
1304  }
1305  
gen_and_pg_vec(unsigned vece,TCGv_vec pd,TCGv_vec pn,TCGv_vec pm,TCGv_vec pg)1306  static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1307                             TCGv_vec pm, TCGv_vec pg)
1308  {
1309      tcg_gen_and_vec(vece, pd, pn, pm);
1310      tcg_gen_and_vec(vece, pd, pd, pg);
1311  }
1312  
trans_AND_pppp(DisasContext * s,arg_rprr_s * a)1313  static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
1314  {
1315      static const GVecGen4 op = {
1316          .fni8 = gen_and_pg_i64,
1317          .fniv = gen_and_pg_vec,
1318          .fno = gen_helper_sve_and_pppp,
1319          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1320      };
1321  
1322      if (!dc_isar_feature(aa64_sve, s)) {
1323          return false;
1324      }
1325      if (!a->s) {
1326          if (a->rn == a->rm) {
1327              if (a->pg == a->rn) {
1328                  return do_mov_p(s, a->rd, a->rn);
1329              }
1330              return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1331          } else if (a->pg == a->rn || a->pg == a->rm) {
1332              return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1333          }
1334      }
1335      return do_pppp_flags(s, a, &op);
1336  }
1337  
gen_bic_pg_i64(TCGv_i64 pd,TCGv_i64 pn,TCGv_i64 pm,TCGv_i64 pg)1338  static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1339  {
1340      tcg_gen_andc_i64(pd, pn, pm);
1341      tcg_gen_and_i64(pd, pd, pg);
1342  }
1343  
gen_bic_pg_vec(unsigned vece,TCGv_vec pd,TCGv_vec pn,TCGv_vec pm,TCGv_vec pg)1344  static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1345                             TCGv_vec pm, TCGv_vec pg)
1346  {
1347      tcg_gen_andc_vec(vece, pd, pn, pm);
1348      tcg_gen_and_vec(vece, pd, pd, pg);
1349  }
1350  
trans_BIC_pppp(DisasContext * s,arg_rprr_s * a)1351  static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
1352  {
1353      static const GVecGen4 op = {
1354          .fni8 = gen_bic_pg_i64,
1355          .fniv = gen_bic_pg_vec,
1356          .fno = gen_helper_sve_bic_pppp,
1357          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1358      };
1359  
1360      if (!dc_isar_feature(aa64_sve, s)) {
1361          return false;
1362      }
1363      if (!a->s && a->pg == a->rn) {
1364          return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1365      }
1366      return do_pppp_flags(s, a, &op);
1367  }
1368  
gen_eor_pg_i64(TCGv_i64 pd,TCGv_i64 pn,TCGv_i64 pm,TCGv_i64 pg)1369  static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1370  {
1371      tcg_gen_xor_i64(pd, pn, pm);
1372      tcg_gen_and_i64(pd, pd, pg);
1373  }
1374  
gen_eor_pg_vec(unsigned vece,TCGv_vec pd,TCGv_vec pn,TCGv_vec pm,TCGv_vec pg)1375  static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1376                             TCGv_vec pm, TCGv_vec pg)
1377  {
1378      tcg_gen_xor_vec(vece, pd, pn, pm);
1379      tcg_gen_and_vec(vece, pd, pd, pg);
1380  }
1381  
trans_EOR_pppp(DisasContext * s,arg_rprr_s * a)1382  static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
1383  {
1384      static const GVecGen4 op = {
1385          .fni8 = gen_eor_pg_i64,
1386          .fniv = gen_eor_pg_vec,
1387          .fno = gen_helper_sve_eor_pppp,
1388          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1389      };
1390  
1391      if (!dc_isar_feature(aa64_sve, s)) {
1392          return false;
1393      }
1394      /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */
1395      if (!a->s && a->pg == a->rm) {
1396          return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn);
1397      }
1398      return do_pppp_flags(s, a, &op);
1399  }
1400  
trans_SEL_pppp(DisasContext * s,arg_rprr_s * a)1401  static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
1402  {
1403      if (a->s || !dc_isar_feature(aa64_sve, s)) {
1404          return false;
1405      }
1406      if (sve_access_check(s)) {
1407          unsigned psz = pred_gvec_reg_size(s);
1408          tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1409                              pred_full_reg_offset(s, a->pg),
1410                              pred_full_reg_offset(s, a->rn),
1411                              pred_full_reg_offset(s, a->rm), psz, psz);
1412      }
1413      return true;
1414  }
1415  
gen_orr_pg_i64(TCGv_i64 pd,TCGv_i64 pn,TCGv_i64 pm,TCGv_i64 pg)1416  static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1417  {
1418      tcg_gen_or_i64(pd, pn, pm);
1419      tcg_gen_and_i64(pd, pd, pg);
1420  }
1421  
gen_orr_pg_vec(unsigned vece,TCGv_vec pd,TCGv_vec pn,TCGv_vec pm,TCGv_vec pg)1422  static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1423                             TCGv_vec pm, TCGv_vec pg)
1424  {
1425      tcg_gen_or_vec(vece, pd, pn, pm);
1426      tcg_gen_and_vec(vece, pd, pd, pg);
1427  }
1428  
trans_ORR_pppp(DisasContext * s,arg_rprr_s * a)1429  static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
1430  {
1431      static const GVecGen4 op = {
1432          .fni8 = gen_orr_pg_i64,
1433          .fniv = gen_orr_pg_vec,
1434          .fno = gen_helper_sve_orr_pppp,
1435          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1436      };
1437  
1438      if (!dc_isar_feature(aa64_sve, s)) {
1439          return false;
1440      }
1441      if (!a->s && a->pg == a->rn && a->rn == a->rm) {
1442          return do_mov_p(s, a->rd, a->rn);
1443      }
1444      return do_pppp_flags(s, a, &op);
1445  }
1446  
gen_orn_pg_i64(TCGv_i64 pd,TCGv_i64 pn,TCGv_i64 pm,TCGv_i64 pg)1447  static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1448  {
1449      tcg_gen_orc_i64(pd, pn, pm);
1450      tcg_gen_and_i64(pd, pd, pg);
1451  }
1452  
gen_orn_pg_vec(unsigned vece,TCGv_vec pd,TCGv_vec pn,TCGv_vec pm,TCGv_vec pg)1453  static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1454                             TCGv_vec pm, TCGv_vec pg)
1455  {
1456      tcg_gen_orc_vec(vece, pd, pn, pm);
1457      tcg_gen_and_vec(vece, pd, pd, pg);
1458  }
1459  
trans_ORN_pppp(DisasContext * s,arg_rprr_s * a)1460  static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
1461  {
1462      static const GVecGen4 op = {
1463          .fni8 = gen_orn_pg_i64,
1464          .fniv = gen_orn_pg_vec,
1465          .fno = gen_helper_sve_orn_pppp,
1466          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1467      };
1468  
1469      if (!dc_isar_feature(aa64_sve, s)) {
1470          return false;
1471      }
1472      return do_pppp_flags(s, a, &op);
1473  }
1474  
gen_nor_pg_i64(TCGv_i64 pd,TCGv_i64 pn,TCGv_i64 pm,TCGv_i64 pg)1475  static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1476  {
1477      tcg_gen_or_i64(pd, pn, pm);
1478      tcg_gen_andc_i64(pd, pg, pd);
1479  }
1480  
gen_nor_pg_vec(unsigned vece,TCGv_vec pd,TCGv_vec pn,TCGv_vec pm,TCGv_vec pg)1481  static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1482                             TCGv_vec pm, TCGv_vec pg)
1483  {
1484      tcg_gen_or_vec(vece, pd, pn, pm);
1485      tcg_gen_andc_vec(vece, pd, pg, pd);
1486  }
1487  
trans_NOR_pppp(DisasContext * s,arg_rprr_s * a)1488  static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
1489  {
1490      static const GVecGen4 op = {
1491          .fni8 = gen_nor_pg_i64,
1492          .fniv = gen_nor_pg_vec,
1493          .fno = gen_helper_sve_nor_pppp,
1494          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1495      };
1496  
1497      if (!dc_isar_feature(aa64_sve, s)) {
1498          return false;
1499      }
1500      return do_pppp_flags(s, a, &op);
1501  }
1502  
gen_nand_pg_i64(TCGv_i64 pd,TCGv_i64 pn,TCGv_i64 pm,TCGv_i64 pg)1503  static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1504  {
1505      tcg_gen_and_i64(pd, pn, pm);
1506      tcg_gen_andc_i64(pd, pg, pd);
1507  }
1508  
gen_nand_pg_vec(unsigned vece,TCGv_vec pd,TCGv_vec pn,TCGv_vec pm,TCGv_vec pg)1509  static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1510                             TCGv_vec pm, TCGv_vec pg)
1511  {
1512      tcg_gen_and_vec(vece, pd, pn, pm);
1513      tcg_gen_andc_vec(vece, pd, pg, pd);
1514  }
1515  
trans_NAND_pppp(DisasContext * s,arg_rprr_s * a)1516  static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
1517  {
1518      static const GVecGen4 op = {
1519          .fni8 = gen_nand_pg_i64,
1520          .fniv = gen_nand_pg_vec,
1521          .fno = gen_helper_sve_nand_pppp,
1522          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1523      };
1524  
1525      if (!dc_isar_feature(aa64_sve, s)) {
1526          return false;
1527      }
1528      return do_pppp_flags(s, a, &op);
1529  }
1530  
1531  /*
1532   *** SVE Predicate Misc Group
1533   */
1534  
trans_PTEST(DisasContext * s,arg_PTEST * a)1535  static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
1536  {
1537      if (!dc_isar_feature(aa64_sve, s)) {
1538          return false;
1539      }
1540      if (sve_access_check(s)) {
1541          int nofs = pred_full_reg_offset(s, a->rn);
1542          int gofs = pred_full_reg_offset(s, a->pg);
1543          int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1544  
1545          if (words == 1) {
1546              TCGv_i64 pn = tcg_temp_new_i64();
1547              TCGv_i64 pg = tcg_temp_new_i64();
1548  
1549              tcg_gen_ld_i64(pn, tcg_env, nofs);
1550              tcg_gen_ld_i64(pg, tcg_env, gofs);
1551              do_predtest1(pn, pg);
1552          } else {
1553              do_predtest(s, nofs, gofs, words);
1554          }
1555      }
1556      return true;
1557  }
1558  
1559  /* See the ARM pseudocode DecodePredCount.  */
decode_pred_count(unsigned fullsz,int pattern,int esz)1560  static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1561  {
1562      unsigned elements = fullsz >> esz;
1563      unsigned bound;
1564  
1565      switch (pattern) {
1566      case 0x0: /* POW2 */
1567          return pow2floor(elements);
1568      case 0x1: /* VL1 */
1569      case 0x2: /* VL2 */
1570      case 0x3: /* VL3 */
1571      case 0x4: /* VL4 */
1572      case 0x5: /* VL5 */
1573      case 0x6: /* VL6 */
1574      case 0x7: /* VL7 */
1575      case 0x8: /* VL8 */
1576          bound = pattern;
1577          break;
1578      case 0x9: /* VL16 */
1579      case 0xa: /* VL32 */
1580      case 0xb: /* VL64 */
1581      case 0xc: /* VL128 */
1582      case 0xd: /* VL256 */
1583          bound = 16 << (pattern - 9);
1584          break;
1585      case 0x1d: /* MUL4 */
1586          return elements - elements % 4;
1587      case 0x1e: /* MUL3 */
1588          return elements - elements % 3;
1589      case 0x1f: /* ALL */
1590          return elements;
1591      default:   /* #uimm5 */
1592          return 0;
1593      }
1594      return elements >= bound ? bound : 0;
1595  }
1596  
1597  /* This handles all of the predicate initialization instructions,
1598   * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1599   * so that decode_pred_count returns 0.  For SETFFR, we will have
1600   * set RD == 16 == FFR.
1601   */
do_predset(DisasContext * s,int esz,int rd,int pat,bool setflag)1602  static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1603  {
1604      if (!sve_access_check(s)) {
1605          return true;
1606      }
1607  
1608      unsigned fullsz = vec_full_reg_size(s);
1609      unsigned ofs = pred_full_reg_offset(s, rd);
1610      unsigned numelem, setsz, i;
1611      uint64_t word, lastword;
1612      TCGv_i64 t;
1613  
1614      numelem = decode_pred_count(fullsz, pat, esz);
1615  
1616      /* Determine what we must store into each bit, and how many.  */
1617      if (numelem == 0) {
1618          lastword = word = 0;
1619          setsz = fullsz;
1620      } else {
1621          setsz = numelem << esz;
1622          lastword = word = pred_esz_masks[esz];
1623          if (setsz % 64) {
1624              lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1625          }
1626      }
1627  
1628      t = tcg_temp_new_i64();
1629      if (fullsz <= 64) {
1630          tcg_gen_movi_i64(t, lastword);
1631          tcg_gen_st_i64(t, tcg_env, ofs);
1632          goto done;
1633      }
1634  
1635      if (word == lastword) {
1636          unsigned maxsz = size_for_gvec(fullsz / 8);
1637          unsigned oprsz = size_for_gvec(setsz / 8);
1638  
1639          if (oprsz * 8 == setsz) {
1640              tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
1641              goto done;
1642          }
1643      }
1644  
1645      setsz /= 8;
1646      fullsz /= 8;
1647  
1648      tcg_gen_movi_i64(t, word);
1649      for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1650          tcg_gen_st_i64(t, tcg_env, ofs + i);
1651      }
1652      if (lastword != word) {
1653          tcg_gen_movi_i64(t, lastword);
1654          tcg_gen_st_i64(t, tcg_env, ofs + i);
1655          i += 8;
1656      }
1657      if (i < fullsz) {
1658          tcg_gen_movi_i64(t, 0);
1659          for (; i < fullsz; i += 8) {
1660              tcg_gen_st_i64(t, tcg_env, ofs + i);
1661          }
1662      }
1663  
1664   done:
1665      /* PTRUES */
1666      if (setflag) {
1667          tcg_gen_movi_i32(cpu_NF, -(word != 0));
1668          tcg_gen_movi_i32(cpu_CF, word == 0);
1669          tcg_gen_movi_i32(cpu_VF, 0);
1670          tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1671      }
1672      return true;
1673  }
1674  
1675  TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
1676  
1677  /* Note pat == 31 is #all, to set all elements.  */
1678  TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve,
1679                          do_predset, 0, FFR_PRED_NUM, 31, false)
1680  
1681  /* Note pat == 32 is #unimp, to set no elements.  */
1682  TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false)
1683  
trans_RDFFR_p(DisasContext * s,arg_RDFFR_p * a)1684  static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
1685  {
1686      /* The path through do_pppp_flags is complicated enough to want to avoid
1687       * duplication.  Frob the arguments into the form of a predicated AND.
1688       */
1689      arg_rprr_s alt_a = {
1690          .rd = a->rd, .pg = a->pg, .s = a->s,
1691          .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1692      };
1693  
1694      s->is_nonstreaming = true;
1695      return trans_AND_pppp(s, &alt_a);
1696  }
1697  
1698  TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
1699  TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
1700  
do_pfirst_pnext(DisasContext * s,arg_rr_esz * a,void (* gen_fn)(TCGv_i32,TCGv_ptr,TCGv_ptr,TCGv_i32))1701  static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1702                              void (*gen_fn)(TCGv_i32, TCGv_ptr,
1703                                             TCGv_ptr, TCGv_i32))
1704  {
1705      if (!sve_access_check(s)) {
1706          return true;
1707      }
1708  
1709      TCGv_ptr t_pd = tcg_temp_new_ptr();
1710      TCGv_ptr t_pg = tcg_temp_new_ptr();
1711      TCGv_i32 t;
1712      unsigned desc = 0;
1713  
1714      desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1715      desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
1716  
1717      tcg_gen_addi_ptr(t_pd, tcg_env, pred_full_reg_offset(s, a->rd));
1718      tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->rn));
1719      t = tcg_temp_new_i32();
1720  
1721      gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc));
1722  
1723      do_pred_flags(t);
1724      return true;
1725  }
1726  
TRANS_FEAT(PFIRST,aa64_sve,do_pfirst_pnext,a,gen_helper_sve_pfirst)1727  TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst)
1728  TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext)
1729  
1730  /*
1731   *** SVE Element Count Group
1732   */
1733  
1734  /* Perform an inline saturating addition of a 32-bit value within
1735   * a 64-bit register.  The second operand is known to be positive,
1736   * which halves the comparisons we must perform to bound the result.
1737   */
1738  static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1739  {
1740      int64_t ibound;
1741  
1742      /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1743      if (u) {
1744          tcg_gen_ext32u_i64(reg, reg);
1745      } else {
1746          tcg_gen_ext32s_i64(reg, reg);
1747      }
1748      if (d) {
1749          tcg_gen_sub_i64(reg, reg, val);
1750          ibound = (u ? 0 : INT32_MIN);
1751          tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound));
1752      } else {
1753          tcg_gen_add_i64(reg, reg, val);
1754          ibound = (u ? UINT32_MAX : INT32_MAX);
1755          tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound));
1756      }
1757  }
1758  
1759  /* Similarly with 64-bit values.  */
do_sat_addsub_64(TCGv_i64 reg,TCGv_i64 val,bool u,bool d)1760  static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1761  {
1762      TCGv_i64 t0 = tcg_temp_new_i64();
1763      TCGv_i64 t2;
1764  
1765      if (u) {
1766          if (d) {
1767              tcg_gen_sub_i64(t0, reg, val);
1768              t2 = tcg_constant_i64(0);
1769              tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
1770          } else {
1771              tcg_gen_add_i64(t0, reg, val);
1772              t2 = tcg_constant_i64(-1);
1773              tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
1774          }
1775      } else {
1776          TCGv_i64 t1 = tcg_temp_new_i64();
1777          if (d) {
1778              /* Detect signed overflow for subtraction.  */
1779              tcg_gen_xor_i64(t0, reg, val);
1780              tcg_gen_sub_i64(t1, reg, val);
1781              tcg_gen_xor_i64(reg, reg, t1);
1782              tcg_gen_and_i64(t0, t0, reg);
1783  
1784              /* Bound the result.  */
1785              tcg_gen_movi_i64(reg, INT64_MIN);
1786              t2 = tcg_constant_i64(0);
1787              tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1788          } else {
1789              /* Detect signed overflow for addition.  */
1790              tcg_gen_xor_i64(t0, reg, val);
1791              tcg_gen_add_i64(reg, reg, val);
1792              tcg_gen_xor_i64(t1, reg, val);
1793              tcg_gen_andc_i64(t0, t1, t0);
1794  
1795              /* Bound the result.  */
1796              tcg_gen_movi_i64(t1, INT64_MAX);
1797              t2 = tcg_constant_i64(0);
1798              tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1799          }
1800      }
1801  }
1802  
1803  /* Similarly with a vector and a scalar operand.  */
do_sat_addsub_vec(DisasContext * s,int esz,int rd,int rn,TCGv_i64 val,bool u,bool d)1804  static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1805                                TCGv_i64 val, bool u, bool d)
1806  {
1807      unsigned vsz = vec_full_reg_size(s);
1808      TCGv_ptr dptr, nptr;
1809      TCGv_i32 t32, desc;
1810      TCGv_i64 t64;
1811  
1812      dptr = tcg_temp_new_ptr();
1813      nptr = tcg_temp_new_ptr();
1814      tcg_gen_addi_ptr(dptr, tcg_env, vec_full_reg_offset(s, rd));
1815      tcg_gen_addi_ptr(nptr, tcg_env, vec_full_reg_offset(s, rn));
1816      desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
1817  
1818      switch (esz) {
1819      case MO_8:
1820          t32 = tcg_temp_new_i32();
1821          tcg_gen_extrl_i64_i32(t32, val);
1822          if (d) {
1823              tcg_gen_neg_i32(t32, t32);
1824          }
1825          if (u) {
1826              gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1827          } else {
1828              gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1829          }
1830          break;
1831  
1832      case MO_16:
1833          t32 = tcg_temp_new_i32();
1834          tcg_gen_extrl_i64_i32(t32, val);
1835          if (d) {
1836              tcg_gen_neg_i32(t32, t32);
1837          }
1838          if (u) {
1839              gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1840          } else {
1841              gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1842          }
1843          break;
1844  
1845      case MO_32:
1846          t64 = tcg_temp_new_i64();
1847          if (d) {
1848              tcg_gen_neg_i64(t64, val);
1849          } else {
1850              tcg_gen_mov_i64(t64, val);
1851          }
1852          if (u) {
1853              gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1854          } else {
1855              gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1856          }
1857          break;
1858  
1859      case MO_64:
1860          if (u) {
1861              if (d) {
1862                  gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1863              } else {
1864                  gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1865              }
1866          } else if (d) {
1867              t64 = tcg_temp_new_i64();
1868              tcg_gen_neg_i64(t64, val);
1869              gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1870          } else {
1871              gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1872          }
1873          break;
1874  
1875      default:
1876          g_assert_not_reached();
1877      }
1878  }
1879  
trans_CNT_r(DisasContext * s,arg_CNT_r * a)1880  static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
1881  {
1882      if (!dc_isar_feature(aa64_sve, s)) {
1883          return false;
1884      }
1885      if (sve_access_check(s)) {
1886          unsigned fullsz = vec_full_reg_size(s);
1887          unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1888          tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1889      }
1890      return true;
1891  }
1892  
trans_INCDEC_r(DisasContext * s,arg_incdec_cnt * a)1893  static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
1894  {
1895      if (!dc_isar_feature(aa64_sve, s)) {
1896          return false;
1897      }
1898      if (sve_access_check(s)) {
1899          unsigned fullsz = vec_full_reg_size(s);
1900          unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1901          int inc = numelem * a->imm * (a->d ? -1 : 1);
1902          TCGv_i64 reg = cpu_reg(s, a->rd);
1903  
1904          tcg_gen_addi_i64(reg, reg, inc);
1905      }
1906      return true;
1907  }
1908  
trans_SINCDEC_r_32(DisasContext * s,arg_incdec_cnt * a)1909  static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
1910  {
1911      if (!dc_isar_feature(aa64_sve, s)) {
1912          return false;
1913      }
1914      if (!sve_access_check(s)) {
1915          return true;
1916      }
1917  
1918      unsigned fullsz = vec_full_reg_size(s);
1919      unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1920      int inc = numelem * a->imm;
1921      TCGv_i64 reg = cpu_reg(s, a->rd);
1922  
1923      /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1924      if (inc == 0) {
1925          if (a->u) {
1926              tcg_gen_ext32u_i64(reg, reg);
1927          } else {
1928              tcg_gen_ext32s_i64(reg, reg);
1929          }
1930      } else {
1931          do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d);
1932      }
1933      return true;
1934  }
1935  
trans_SINCDEC_r_64(DisasContext * s,arg_incdec_cnt * a)1936  static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
1937  {
1938      if (!dc_isar_feature(aa64_sve, s)) {
1939          return false;
1940      }
1941      if (!sve_access_check(s)) {
1942          return true;
1943      }
1944  
1945      unsigned fullsz = vec_full_reg_size(s);
1946      unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1947      int inc = numelem * a->imm;
1948      TCGv_i64 reg = cpu_reg(s, a->rd);
1949  
1950      if (inc != 0) {
1951          do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d);
1952      }
1953      return true;
1954  }
1955  
trans_INCDEC_v(DisasContext * s,arg_incdec2_cnt * a)1956  static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1957  {
1958      if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
1959          return false;
1960      }
1961  
1962      unsigned fullsz = vec_full_reg_size(s);
1963      unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1964      int inc = numelem * a->imm;
1965  
1966      if (inc != 0) {
1967          if (sve_access_check(s)) {
1968              tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1969                                vec_full_reg_offset(s, a->rn),
1970                                tcg_constant_i64(a->d ? -inc : inc),
1971                                fullsz, fullsz);
1972          }
1973      } else {
1974          do_mov_z(s, a->rd, a->rn);
1975      }
1976      return true;
1977  }
1978  
trans_SINCDEC_v(DisasContext * s,arg_incdec2_cnt * a)1979  static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1980  {
1981      if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
1982          return false;
1983      }
1984  
1985      unsigned fullsz = vec_full_reg_size(s);
1986      unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1987      int inc = numelem * a->imm;
1988  
1989      if (inc != 0) {
1990          if (sve_access_check(s)) {
1991              do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
1992                                tcg_constant_i64(inc), a->u, a->d);
1993          }
1994      } else {
1995          do_mov_z(s, a->rd, a->rn);
1996      }
1997      return true;
1998  }
1999  
2000  /*
2001   *** SVE Bitwise Immediate Group
2002   */
2003  
do_zz_dbm(DisasContext * s,arg_rr_dbm * a,GVecGen2iFn * gvec_fn)2004  static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
2005  {
2006      uint64_t imm;
2007      if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2008                                  extract32(a->dbm, 0, 6),
2009                                  extract32(a->dbm, 6, 6))) {
2010          return false;
2011      }
2012      return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm);
2013  }
2014  
TRANS_FEAT(AND_zzi,aa64_sve,do_zz_dbm,a,tcg_gen_gvec_andi)2015  TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi)
2016  TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori)
2017  TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori)
2018  
2019  static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
2020  {
2021      uint64_t imm;
2022  
2023      if (!dc_isar_feature(aa64_sve, s)) {
2024          return false;
2025      }
2026      if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2027                                  extract32(a->dbm, 0, 6),
2028                                  extract32(a->dbm, 6, 6))) {
2029          return false;
2030      }
2031      if (sve_access_check(s)) {
2032          do_dupi_z(s, a->rd, imm);
2033      }
2034      return true;
2035  }
2036  
2037  /*
2038   *** SVE Integer Wide Immediate - Predicated Group
2039   */
2040  
2041  /* Implement all merging copies.  This is used for CPY (immediate),
2042   * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2043   */
do_cpy_m(DisasContext * s,int esz,int rd,int rn,int pg,TCGv_i64 val)2044  static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2045                       TCGv_i64 val)
2046  {
2047      typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2048      static gen_cpy * const fns[4] = {
2049          gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2050          gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2051      };
2052      unsigned vsz = vec_full_reg_size(s);
2053      TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
2054      TCGv_ptr t_zd = tcg_temp_new_ptr();
2055      TCGv_ptr t_zn = tcg_temp_new_ptr();
2056      TCGv_ptr t_pg = tcg_temp_new_ptr();
2057  
2058      tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd));
2059      tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, rn));
2060      tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
2061  
2062      fns[esz](t_zd, t_zn, t_pg, val, desc);
2063  }
2064  
trans_FCPY(DisasContext * s,arg_FCPY * a)2065  static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
2066  {
2067      if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
2068          return false;
2069      }
2070      if (sve_access_check(s)) {
2071          /* Decode the VFP immediate.  */
2072          uint64_t imm = vfp_expand_imm(a->esz, a->imm);
2073          do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm));
2074      }
2075      return true;
2076  }
2077  
trans_CPY_m_i(DisasContext * s,arg_rpri_esz * a)2078  static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
2079  {
2080      if (!dc_isar_feature(aa64_sve, s)) {
2081          return false;
2082      }
2083      if (sve_access_check(s)) {
2084          do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm));
2085      }
2086      return true;
2087  }
2088  
trans_CPY_z_i(DisasContext * s,arg_CPY_z_i * a)2089  static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
2090  {
2091      static gen_helper_gvec_2i * const fns[4] = {
2092          gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2093          gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2094      };
2095  
2096      if (!dc_isar_feature(aa64_sve, s)) {
2097          return false;
2098      }
2099      if (sve_access_check(s)) {
2100          unsigned vsz = vec_full_reg_size(s);
2101          tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2102                              pred_full_reg_offset(s, a->pg),
2103                              tcg_constant_i64(a->imm),
2104                              vsz, vsz, 0, fns[a->esz]);
2105      }
2106      return true;
2107  }
2108  
2109  /*
2110   *** SVE Permute Extract Group
2111   */
2112  
do_EXT(DisasContext * s,int rd,int rn,int rm,int imm)2113  static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
2114  {
2115      if (!sve_access_check(s)) {
2116          return true;
2117      }
2118  
2119      unsigned vsz = vec_full_reg_size(s);
2120      unsigned n_ofs = imm >= vsz ? 0 : imm;
2121      unsigned n_siz = vsz - n_ofs;
2122      unsigned d = vec_full_reg_offset(s, rd);
2123      unsigned n = vec_full_reg_offset(s, rn);
2124      unsigned m = vec_full_reg_offset(s, rm);
2125  
2126      /* Use host vector move insns if we have appropriate sizes
2127       * and no unfortunate overlap.
2128       */
2129      if (m != d
2130          && n_ofs == size_for_gvec(n_ofs)
2131          && n_siz == size_for_gvec(n_siz)
2132          && (d != n || n_siz <= n_ofs)) {
2133          tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2134          if (n_ofs != 0) {
2135              tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2136          }
2137      } else {
2138          tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2139      }
2140      return true;
2141  }
2142  
2143  TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm)
2144  TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm)
2145  
2146  /*
2147   *** SVE Permute - Unpredicated Group
2148   */
2149  
trans_DUP_s(DisasContext * s,arg_DUP_s * a)2150  static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
2151  {
2152      if (!dc_isar_feature(aa64_sve, s)) {
2153          return false;
2154      }
2155      if (sve_access_check(s)) {
2156          unsigned vsz = vec_full_reg_size(s);
2157          tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2158                               vsz, vsz, cpu_reg_sp(s, a->rn));
2159      }
2160      return true;
2161  }
2162  
trans_DUP_x(DisasContext * s,arg_DUP_x * a)2163  static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
2164  {
2165      if (!dc_isar_feature(aa64_sve, s)) {
2166          return false;
2167      }
2168      if ((a->imm & 0x1f) == 0) {
2169          return false;
2170      }
2171      if (sve_access_check(s)) {
2172          unsigned vsz = vec_full_reg_size(s);
2173          unsigned dofs = vec_full_reg_offset(s, a->rd);
2174          unsigned esz, index;
2175  
2176          esz = ctz32(a->imm);
2177          index = a->imm >> (esz + 1);
2178  
2179          if ((index << esz) < vsz) {
2180              unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2181              tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2182          } else {
2183              /*
2184               * While dup_mem handles 128-bit elements, dup_imm does not.
2185               * Thankfully element size doesn't matter for splatting zero.
2186               */
2187              tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
2188          }
2189      }
2190      return true;
2191  }
2192  
do_insr_i64(DisasContext * s,arg_rrr_esz * a,TCGv_i64 val)2193  static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2194  {
2195      typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2196      static gen_insr * const fns[4] = {
2197          gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2198          gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2199      };
2200      unsigned vsz = vec_full_reg_size(s);
2201      TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
2202      TCGv_ptr t_zd = tcg_temp_new_ptr();
2203      TCGv_ptr t_zn = tcg_temp_new_ptr();
2204  
2205      tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, a->rd));
2206      tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn));
2207  
2208      fns[a->esz](t_zd, t_zn, val, desc);
2209  }
2210  
trans_INSR_f(DisasContext * s,arg_rrr_esz * a)2211  static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
2212  {
2213      if (!dc_isar_feature(aa64_sve, s)) {
2214          return false;
2215      }
2216      if (sve_access_check(s)) {
2217          TCGv_i64 t = tcg_temp_new_i64();
2218          tcg_gen_ld_i64(t, tcg_env, vec_reg_offset(s, a->rm, 0, MO_64));
2219          do_insr_i64(s, a, t);
2220      }
2221      return true;
2222  }
2223  
trans_INSR_r(DisasContext * s,arg_rrr_esz * a)2224  static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
2225  {
2226      if (!dc_isar_feature(aa64_sve, s)) {
2227          return false;
2228      }
2229      if (sve_access_check(s)) {
2230          do_insr_i64(s, a, cpu_reg(s, a->rm));
2231      }
2232      return true;
2233  }
2234  
2235  static gen_helper_gvec_2 * const rev_fns[4] = {
2236      gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2237      gen_helper_sve_rev_s, gen_helper_sve_rev_d
2238  };
2239  TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0)
2240  
2241  static gen_helper_gvec_3 * const sve_tbl_fns[4] = {
2242      gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2243      gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2244  };
2245  TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0)
2246  
2247  static gen_helper_gvec_4 * const sve2_tbl_fns[4] = {
2248      gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2249      gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2250  };
2251  TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz],
2252             a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0)
2253  
2254  static gen_helper_gvec_3 * const tbx_fns[4] = {
2255      gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2256      gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2257  };
2258  TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0)
2259  
trans_UNPK(DisasContext * s,arg_UNPK * a)2260  static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
2261  {
2262      static gen_helper_gvec_2 * const fns[4][2] = {
2263          { NULL, NULL },
2264          { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2265          { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2266          { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2267      };
2268  
2269      if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
2270          return false;
2271      }
2272      if (sve_access_check(s)) {
2273          unsigned vsz = vec_full_reg_size(s);
2274          tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2275                             vec_full_reg_offset(s, a->rn)
2276                             + (a->h ? vsz / 2 : 0),
2277                             vsz, vsz, 0, fns[a->esz][a->u]);
2278      }
2279      return true;
2280  }
2281  
2282  /*
2283   *** SVE Permute - Predicates Group
2284   */
2285  
do_perm_pred3(DisasContext * s,arg_rrr_esz * a,bool high_odd,gen_helper_gvec_3 * fn)2286  static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2287                            gen_helper_gvec_3 *fn)
2288  {
2289      if (!sve_access_check(s)) {
2290          return true;
2291      }
2292  
2293      unsigned vsz = pred_full_reg_size(s);
2294  
2295      TCGv_ptr t_d = tcg_temp_new_ptr();
2296      TCGv_ptr t_n = tcg_temp_new_ptr();
2297      TCGv_ptr t_m = tcg_temp_new_ptr();
2298      uint32_t desc = 0;
2299  
2300      desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2301      desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2302      desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
2303  
2304      tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd));
2305      tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn));
2306      tcg_gen_addi_ptr(t_m, tcg_env, pred_full_reg_offset(s, a->rm));
2307  
2308      fn(t_d, t_n, t_m, tcg_constant_i32(desc));
2309      return true;
2310  }
2311  
do_perm_pred2(DisasContext * s,arg_rr_esz * a,bool high_odd,gen_helper_gvec_2 * fn)2312  static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2313                            gen_helper_gvec_2 *fn)
2314  {
2315      if (!sve_access_check(s)) {
2316          return true;
2317      }
2318  
2319      unsigned vsz = pred_full_reg_size(s);
2320      TCGv_ptr t_d = tcg_temp_new_ptr();
2321      TCGv_ptr t_n = tcg_temp_new_ptr();
2322      uint32_t desc = 0;
2323  
2324      tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd));
2325      tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn));
2326  
2327      desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2328      desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2329      desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
2330  
2331      fn(t_d, t_n, tcg_constant_i32(desc));
2332      return true;
2333  }
2334  
2335  TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p)
2336  TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p)
2337  TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p)
2338  TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p)
2339  TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p)
2340  TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p)
2341  
2342  TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p)
2343  TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p)
2344  TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p)
2345  
2346  /*
2347   *** SVE Permute - Interleaving Group
2348   */
2349  
2350  static gen_helper_gvec_3 * const zip_fns[4] = {
2351      gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2352      gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2353  };
2354  TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2355             zip_fns[a->esz], a, 0)
2356  TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2357             zip_fns[a->esz], a, vec_full_reg_size(s) / 2)
2358  
2359  TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2360             gen_helper_sve2_zip_q, a, 0)
2361  TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2362             gen_helper_sve2_zip_q, a,
2363             QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2)
2364  
2365  static gen_helper_gvec_3 * const uzp_fns[4] = {
2366      gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2367      gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2368  };
2369  
2370  TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2371             uzp_fns[a->esz], a, 0)
2372  TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2373             uzp_fns[a->esz], a, 1 << a->esz)
2374  
2375  TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2376             gen_helper_sve2_uzp_q, a, 0)
2377  TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2378             gen_helper_sve2_uzp_q, a, 16)
2379  
2380  static gen_helper_gvec_3 * const trn_fns[4] = {
2381      gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2382      gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2383  };
2384  
2385  TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2386             trn_fns[a->esz], a, 0)
2387  TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2388             trn_fns[a->esz], a, 1 << a->esz)
2389  
2390  TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2391             gen_helper_sve2_trn_q, a, 0)
2392  TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2393             gen_helper_sve2_trn_q, a, 16)
2394  
2395  /*
2396   *** SVE Permute Vector - Predicated Group
2397   */
2398  
2399  static gen_helper_gvec_3 * const compact_fns[4] = {
2400      NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2401  };
2402  TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz,
2403                          compact_fns[a->esz], a, 0)
2404  
2405  /* Call the helper that computes the ARM LastActiveElement pseudocode
2406   * function, scaled by the element size.  This includes the not found
2407   * indication; e.g. not found for esz=3 is -8.
2408   */
find_last_active(DisasContext * s,TCGv_i32 ret,int esz,int pg)2409  static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2410  {
2411      /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2412       * round up, as we do elsewhere, because we need the exact size.
2413       */
2414      TCGv_ptr t_p = tcg_temp_new_ptr();
2415      unsigned desc = 0;
2416  
2417      desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2418      desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
2419  
2420      tcg_gen_addi_ptr(t_p, tcg_env, pred_full_reg_offset(s, pg));
2421  
2422      gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc));
2423  }
2424  
2425  /* Increment LAST to the offset of the next element in the vector,
2426   * wrapping around to 0.
2427   */
incr_last_active(DisasContext * s,TCGv_i32 last,int esz)2428  static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2429  {
2430      unsigned vsz = vec_full_reg_size(s);
2431  
2432      tcg_gen_addi_i32(last, last, 1 << esz);
2433      if (is_power_of_2(vsz)) {
2434          tcg_gen_andi_i32(last, last, vsz - 1);
2435      } else {
2436          TCGv_i32 max = tcg_constant_i32(vsz);
2437          TCGv_i32 zero = tcg_constant_i32(0);
2438          tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2439      }
2440  }
2441  
2442  /* If LAST < 0, set LAST to the offset of the last element in the vector.  */
wrap_last_active(DisasContext * s,TCGv_i32 last,int esz)2443  static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2444  {
2445      unsigned vsz = vec_full_reg_size(s);
2446  
2447      if (is_power_of_2(vsz)) {
2448          tcg_gen_andi_i32(last, last, vsz - 1);
2449      } else {
2450          TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz));
2451          TCGv_i32 zero = tcg_constant_i32(0);
2452          tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2453      }
2454  }
2455  
2456  /* Load an unsigned element of ESZ from BASE+OFS.  */
load_esz(TCGv_ptr base,int ofs,int esz)2457  static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2458  {
2459      TCGv_i64 r = tcg_temp_new_i64();
2460  
2461      switch (esz) {
2462      case 0:
2463          tcg_gen_ld8u_i64(r, base, ofs);
2464          break;
2465      case 1:
2466          tcg_gen_ld16u_i64(r, base, ofs);
2467          break;
2468      case 2:
2469          tcg_gen_ld32u_i64(r, base, ofs);
2470          break;
2471      case 3:
2472          tcg_gen_ld_i64(r, base, ofs);
2473          break;
2474      default:
2475          g_assert_not_reached();
2476      }
2477      return r;
2478  }
2479  
2480  /* Load an unsigned element of ESZ from RM[LAST].  */
load_last_active(DisasContext * s,TCGv_i32 last,int rm,int esz)2481  static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2482                                   int rm, int esz)
2483  {
2484      TCGv_ptr p = tcg_temp_new_ptr();
2485  
2486      /* Convert offset into vector into offset into ENV.
2487       * The final adjustment for the vector register base
2488       * is added via constant offset to the load.
2489       */
2490  #if HOST_BIG_ENDIAN
2491      /* Adjust for element ordering.  See vec_reg_offset.  */
2492      if (esz < 3) {
2493          tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2494      }
2495  #endif
2496      tcg_gen_ext_i32_ptr(p, last);
2497      tcg_gen_add_ptr(p, p, tcg_env);
2498  
2499      return load_esz(p, vec_full_reg_offset(s, rm), esz);
2500  }
2501  
2502  /* Compute CLAST for a Zreg.  */
do_clast_vector(DisasContext * s,arg_rprr_esz * a,bool before)2503  static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2504  {
2505      TCGv_i32 last;
2506      TCGLabel *over;
2507      TCGv_i64 ele;
2508      unsigned vsz, esz = a->esz;
2509  
2510      if (!sve_access_check(s)) {
2511          return true;
2512      }
2513  
2514      last = tcg_temp_new_i32();
2515      over = gen_new_label();
2516  
2517      find_last_active(s, last, esz, a->pg);
2518  
2519      /* There is of course no movcond for a 2048-bit vector,
2520       * so we must branch over the actual store.
2521       */
2522      tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2523  
2524      if (!before) {
2525          incr_last_active(s, last, esz);
2526      }
2527  
2528      ele = load_last_active(s, last, a->rm, esz);
2529  
2530      vsz = vec_full_reg_size(s);
2531      tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2532  
2533      /* If this insn used MOVPRFX, we may need a second move.  */
2534      if (a->rd != a->rn) {
2535          TCGLabel *done = gen_new_label();
2536          tcg_gen_br(done);
2537  
2538          gen_set_label(over);
2539          do_mov_z(s, a->rd, a->rn);
2540  
2541          gen_set_label(done);
2542      } else {
2543          gen_set_label(over);
2544      }
2545      return true;
2546  }
2547  
TRANS_FEAT(CLASTA_z,aa64_sve,do_clast_vector,a,false)2548  TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false)
2549  TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true)
2550  
2551  /* Compute CLAST for a scalar.  */
2552  static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2553                              bool before, TCGv_i64 reg_val)
2554  {
2555      TCGv_i32 last = tcg_temp_new_i32();
2556      TCGv_i64 ele, cmp;
2557  
2558      find_last_active(s, last, esz, pg);
2559  
2560      /* Extend the original value of last prior to incrementing.  */
2561      cmp = tcg_temp_new_i64();
2562      tcg_gen_ext_i32_i64(cmp, last);
2563  
2564      if (!before) {
2565          incr_last_active(s, last, esz);
2566      }
2567  
2568      /* The conceit here is that while last < 0 indicates not found, after
2569       * adjusting for tcg_env->vfp.zregs[rm], it is still a valid address
2570       * from which we can load garbage.  We then discard the garbage with
2571       * a conditional move.
2572       */
2573      ele = load_last_active(s, last, rm, esz);
2574  
2575      tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0),
2576                          ele, reg_val);
2577  }
2578  
2579  /* Compute CLAST for a Vreg.  */
do_clast_fp(DisasContext * s,arg_rpr_esz * a,bool before)2580  static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2581  {
2582      if (sve_access_check(s)) {
2583          int esz = a->esz;
2584          int ofs = vec_reg_offset(s, a->rd, 0, esz);
2585          TCGv_i64 reg = load_esz(tcg_env, ofs, esz);
2586  
2587          do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2588          write_fp_dreg(s, a->rd, reg);
2589      }
2590      return true;
2591  }
2592  
TRANS_FEAT(CLASTA_v,aa64_sve,do_clast_fp,a,false)2593  TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false)
2594  TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true)
2595  
2596  /* Compute CLAST for a Xreg.  */
2597  static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2598  {
2599      TCGv_i64 reg;
2600  
2601      if (!sve_access_check(s)) {
2602          return true;
2603      }
2604  
2605      reg = cpu_reg(s, a->rd);
2606      switch (a->esz) {
2607      case 0:
2608          tcg_gen_ext8u_i64(reg, reg);
2609          break;
2610      case 1:
2611          tcg_gen_ext16u_i64(reg, reg);
2612          break;
2613      case 2:
2614          tcg_gen_ext32u_i64(reg, reg);
2615          break;
2616      case 3:
2617          break;
2618      default:
2619          g_assert_not_reached();
2620      }
2621  
2622      do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2623      return true;
2624  }
2625  
TRANS_FEAT(CLASTA_r,aa64_sve,do_clast_general,a,false)2626  TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false)
2627  TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true)
2628  
2629  /* Compute LAST for a scalar.  */
2630  static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2631                                 int pg, int rm, bool before)
2632  {
2633      TCGv_i32 last = tcg_temp_new_i32();
2634  
2635      find_last_active(s, last, esz, pg);
2636      if (before) {
2637          wrap_last_active(s, last, esz);
2638      } else {
2639          incr_last_active(s, last, esz);
2640      }
2641  
2642      return load_last_active(s, last, rm, esz);
2643  }
2644  
2645  /* Compute LAST for a Vreg.  */
do_last_fp(DisasContext * s,arg_rpr_esz * a,bool before)2646  static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2647  {
2648      if (sve_access_check(s)) {
2649          TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2650          write_fp_dreg(s, a->rd, val);
2651      }
2652      return true;
2653  }
2654  
TRANS_FEAT(LASTA_v,aa64_sve,do_last_fp,a,false)2655  TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false)
2656  TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true)
2657  
2658  /* Compute LAST for a Xreg.  */
2659  static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2660  {
2661      if (sve_access_check(s)) {
2662          TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2663          tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2664      }
2665      return true;
2666  }
2667  
TRANS_FEAT(LASTA_r,aa64_sve,do_last_general,a,false)2668  TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false)
2669  TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true)
2670  
2671  static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
2672  {
2673      if (!dc_isar_feature(aa64_sve, s)) {
2674          return false;
2675      }
2676      if (sve_access_check(s)) {
2677          do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2678      }
2679      return true;
2680  }
2681  
trans_CPY_m_v(DisasContext * s,arg_rpr_esz * a)2682  static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
2683  {
2684      if (!dc_isar_feature(aa64_sve, s)) {
2685          return false;
2686      }
2687      if (sve_access_check(s)) {
2688          int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2689          TCGv_i64 t = load_esz(tcg_env, ofs, a->esz);
2690          do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2691      }
2692      return true;
2693  }
2694  
2695  static gen_helper_gvec_3 * const revb_fns[4] = {
2696      NULL,                  gen_helper_sve_revb_h,
2697      gen_helper_sve_revb_s, gen_helper_sve_revb_d,
2698  };
2699  TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0)
2700  
2701  static gen_helper_gvec_3 * const revh_fns[4] = {
2702      NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d,
2703  };
2704  TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
2705  
2706  TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
2707             a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
2708  
2709  TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0)
2710  
2711  TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz,
2712             gen_helper_sve_splice, a, a->esz)
2713  
2714  TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice,
2715             a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz)
2716  
2717  /*
2718   *** SVE Integer Compare - Vectors Group
2719   */
2720  
do_ppzz_flags(DisasContext * s,arg_rprr_esz * a,gen_helper_gvec_flags_4 * gen_fn)2721  static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2722                            gen_helper_gvec_flags_4 *gen_fn)
2723  {
2724      TCGv_ptr pd, zn, zm, pg;
2725      unsigned vsz;
2726      TCGv_i32 t;
2727  
2728      if (gen_fn == NULL) {
2729          return false;
2730      }
2731      if (!sve_access_check(s)) {
2732          return true;
2733      }
2734  
2735      vsz = vec_full_reg_size(s);
2736      t = tcg_temp_new_i32();
2737      pd = tcg_temp_new_ptr();
2738      zn = tcg_temp_new_ptr();
2739      zm = tcg_temp_new_ptr();
2740      pg = tcg_temp_new_ptr();
2741  
2742      tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd));
2743      tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn));
2744      tcg_gen_addi_ptr(zm, tcg_env, vec_full_reg_offset(s, a->rm));
2745      tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg));
2746  
2747      gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0)));
2748  
2749      do_pred_flags(t);
2750      return true;
2751  }
2752  
2753  #define DO_PPZZ(NAME, name) \
2754      static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = {       \
2755          gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2756          gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2757      };                                                                  \
2758      TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags,                    \
2759                 a, name##_ppzz_fns[a->esz])
2760  
DO_PPZZ(CMPEQ,cmpeq)2761  DO_PPZZ(CMPEQ, cmpeq)
2762  DO_PPZZ(CMPNE, cmpne)
2763  DO_PPZZ(CMPGT, cmpgt)
2764  DO_PPZZ(CMPGE, cmpge)
2765  DO_PPZZ(CMPHI, cmphi)
2766  DO_PPZZ(CMPHS, cmphs)
2767  
2768  #undef DO_PPZZ
2769  
2770  #define DO_PPZW(NAME, name) \
2771      static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = {       \
2772          gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2773          gen_helper_sve_##name##_ppzw_s, NULL                            \
2774      };                                                                  \
2775      TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags,                    \
2776                 a, name##_ppzw_fns[a->esz])
2777  
2778  DO_PPZW(CMPEQ, cmpeq)
2779  DO_PPZW(CMPNE, cmpne)
2780  DO_PPZW(CMPGT, cmpgt)
2781  DO_PPZW(CMPGE, cmpge)
2782  DO_PPZW(CMPHI, cmphi)
2783  DO_PPZW(CMPHS, cmphs)
2784  DO_PPZW(CMPLT, cmplt)
2785  DO_PPZW(CMPLE, cmple)
2786  DO_PPZW(CMPLO, cmplo)
2787  DO_PPZW(CMPLS, cmpls)
2788  
2789  #undef DO_PPZW
2790  
2791  /*
2792   *** SVE Integer Compare - Immediate Groups
2793   */
2794  
2795  static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2796                            gen_helper_gvec_flags_3 *gen_fn)
2797  {
2798      TCGv_ptr pd, zn, pg;
2799      unsigned vsz;
2800      TCGv_i32 t;
2801  
2802      if (gen_fn == NULL) {
2803          return false;
2804      }
2805      if (!sve_access_check(s)) {
2806          return true;
2807      }
2808  
2809      vsz = vec_full_reg_size(s);
2810      t = tcg_temp_new_i32();
2811      pd = tcg_temp_new_ptr();
2812      zn = tcg_temp_new_ptr();
2813      pg = tcg_temp_new_ptr();
2814  
2815      tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd));
2816      tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn));
2817      tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg));
2818  
2819      gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm)));
2820  
2821      do_pred_flags(t);
2822      return true;
2823  }
2824  
2825  #define DO_PPZI(NAME, name) \
2826      static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = {         \
2827          gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
2828          gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
2829      };                                                                    \
2830      TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a,                   \
2831                 name##_ppzi_fns[a->esz])
2832  
DO_PPZI(CMPEQ,cmpeq)2833  DO_PPZI(CMPEQ, cmpeq)
2834  DO_PPZI(CMPNE, cmpne)
2835  DO_PPZI(CMPGT, cmpgt)
2836  DO_PPZI(CMPGE, cmpge)
2837  DO_PPZI(CMPHI, cmphi)
2838  DO_PPZI(CMPHS, cmphs)
2839  DO_PPZI(CMPLT, cmplt)
2840  DO_PPZI(CMPLE, cmple)
2841  DO_PPZI(CMPLO, cmplo)
2842  DO_PPZI(CMPLS, cmpls)
2843  
2844  #undef DO_PPZI
2845  
2846  /*
2847   *** SVE Partition Break Group
2848   */
2849  
2850  static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2851                      gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2852  {
2853      if (!sve_access_check(s)) {
2854          return true;
2855      }
2856  
2857      unsigned vsz = pred_full_reg_size(s);
2858  
2859      /* Predicate sizes may be smaller and cannot use simd_desc.  */
2860      TCGv_ptr d = tcg_temp_new_ptr();
2861      TCGv_ptr n = tcg_temp_new_ptr();
2862      TCGv_ptr m = tcg_temp_new_ptr();
2863      TCGv_ptr g = tcg_temp_new_ptr();
2864      TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
2865  
2866      tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd));
2867      tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn));
2868      tcg_gen_addi_ptr(m, tcg_env, pred_full_reg_offset(s, a->rm));
2869      tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg));
2870  
2871      if (a->s) {
2872          TCGv_i32 t = tcg_temp_new_i32();
2873          fn_s(t, d, n, m, g, desc);
2874          do_pred_flags(t);
2875      } else {
2876          fn(d, n, m, g, desc);
2877      }
2878      return true;
2879  }
2880  
do_brk2(DisasContext * s,arg_rpr_s * a,gen_helper_gvec_3 * fn,gen_helper_gvec_flags_3 * fn_s)2881  static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2882                      gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2883  {
2884      if (!sve_access_check(s)) {
2885          return true;
2886      }
2887  
2888      unsigned vsz = pred_full_reg_size(s);
2889  
2890      /* Predicate sizes may be smaller and cannot use simd_desc.  */
2891      TCGv_ptr d = tcg_temp_new_ptr();
2892      TCGv_ptr n = tcg_temp_new_ptr();
2893      TCGv_ptr g = tcg_temp_new_ptr();
2894      TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
2895  
2896      tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd));
2897      tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn));
2898      tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg));
2899  
2900      if (a->s) {
2901          TCGv_i32 t = tcg_temp_new_i32();
2902          fn_s(t, d, n, g, desc);
2903          do_pred_flags(t);
2904      } else {
2905          fn(d, n, g, desc);
2906      }
2907      return true;
2908  }
2909  
TRANS_FEAT(BRKPA,aa64_sve,do_brk3,a,gen_helper_sve_brkpa,gen_helper_sve_brkpas)2910  TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a,
2911             gen_helper_sve_brkpa, gen_helper_sve_brkpas)
2912  TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a,
2913             gen_helper_sve_brkpb, gen_helper_sve_brkpbs)
2914  
2915  TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a,
2916             gen_helper_sve_brka_m, gen_helper_sve_brkas_m)
2917  TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a,
2918             gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m)
2919  
2920  TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a,
2921             gen_helper_sve_brka_z, gen_helper_sve_brkas_z)
2922  TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a,
2923             gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z)
2924  
2925  TRANS_FEAT(BRKN, aa64_sve, do_brk2, a,
2926             gen_helper_sve_brkn, gen_helper_sve_brkns)
2927  
2928  /*
2929   *** SVE Predicate Count Group
2930   */
2931  
2932  static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
2933  {
2934      unsigned psz = pred_full_reg_size(s);
2935  
2936      if (psz <= 8) {
2937          uint64_t psz_mask;
2938  
2939          tcg_gen_ld_i64(val, tcg_env, pred_full_reg_offset(s, pn));
2940          if (pn != pg) {
2941              TCGv_i64 g = tcg_temp_new_i64();
2942              tcg_gen_ld_i64(g, tcg_env, pred_full_reg_offset(s, pg));
2943              tcg_gen_and_i64(val, val, g);
2944          }
2945  
2946          /* Reduce the pred_esz_masks value simply to reduce the
2947           * size of the code generated here.
2948           */
2949          psz_mask = MAKE_64BIT_MASK(0, psz * 8);
2950          tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
2951  
2952          tcg_gen_ctpop_i64(val, val);
2953      } else {
2954          TCGv_ptr t_pn = tcg_temp_new_ptr();
2955          TCGv_ptr t_pg = tcg_temp_new_ptr();
2956          unsigned desc = 0;
2957  
2958          desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
2959          desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
2960  
2961          tcg_gen_addi_ptr(t_pn, tcg_env, pred_full_reg_offset(s, pn));
2962          tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
2963  
2964          gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc));
2965      }
2966  }
2967  
trans_CNTP(DisasContext * s,arg_CNTP * a)2968  static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
2969  {
2970      if (!dc_isar_feature(aa64_sve, s)) {
2971          return false;
2972      }
2973      if (sve_access_check(s)) {
2974          do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
2975      }
2976      return true;
2977  }
2978  
trans_INCDECP_r(DisasContext * s,arg_incdec_pred * a)2979  static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
2980  {
2981      if (!dc_isar_feature(aa64_sve, s)) {
2982          return false;
2983      }
2984      if (sve_access_check(s)) {
2985          TCGv_i64 reg = cpu_reg(s, a->rd);
2986          TCGv_i64 val = tcg_temp_new_i64();
2987  
2988          do_cntp(s, val, a->esz, a->pg, a->pg);
2989          if (a->d) {
2990              tcg_gen_sub_i64(reg, reg, val);
2991          } else {
2992              tcg_gen_add_i64(reg, reg, val);
2993          }
2994      }
2995      return true;
2996  }
2997  
trans_INCDECP_z(DisasContext * s,arg_incdec2_pred * a)2998  static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
2999  {
3000      if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
3001          return false;
3002      }
3003      if (sve_access_check(s)) {
3004          unsigned vsz = vec_full_reg_size(s);
3005          TCGv_i64 val = tcg_temp_new_i64();
3006          GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3007  
3008          do_cntp(s, val, a->esz, a->pg, a->pg);
3009          gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3010                  vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3011      }
3012      return true;
3013  }
3014  
trans_SINCDECP_r_32(DisasContext * s,arg_incdec_pred * a)3015  static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
3016  {
3017      if (!dc_isar_feature(aa64_sve, s)) {
3018          return false;
3019      }
3020      if (sve_access_check(s)) {
3021          TCGv_i64 reg = cpu_reg(s, a->rd);
3022          TCGv_i64 val = tcg_temp_new_i64();
3023  
3024          do_cntp(s, val, a->esz, a->pg, a->pg);
3025          do_sat_addsub_32(reg, val, a->u, a->d);
3026      }
3027      return true;
3028  }
3029  
trans_SINCDECP_r_64(DisasContext * s,arg_incdec_pred * a)3030  static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
3031  {
3032      if (!dc_isar_feature(aa64_sve, s)) {
3033          return false;
3034      }
3035      if (sve_access_check(s)) {
3036          TCGv_i64 reg = cpu_reg(s, a->rd);
3037          TCGv_i64 val = tcg_temp_new_i64();
3038  
3039          do_cntp(s, val, a->esz, a->pg, a->pg);
3040          do_sat_addsub_64(reg, val, a->u, a->d);
3041      }
3042      return true;
3043  }
3044  
trans_SINCDECP_z(DisasContext * s,arg_incdec2_pred * a)3045  static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3046  {
3047      if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
3048          return false;
3049      }
3050      if (sve_access_check(s)) {
3051          TCGv_i64 val = tcg_temp_new_i64();
3052          do_cntp(s, val, a->esz, a->pg, a->pg);
3053          do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3054      }
3055      return true;
3056  }
3057  
3058  /*
3059   *** SVE Integer Compare Scalars Group
3060   */
3061  
trans_CTERM(DisasContext * s,arg_CTERM * a)3062  static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
3063  {
3064      if (!dc_isar_feature(aa64_sve, s)) {
3065          return false;
3066      }
3067      if (!sve_access_check(s)) {
3068          return true;
3069      }
3070  
3071      TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3072      TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3073      TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3074      TCGv_i64 cmp = tcg_temp_new_i64();
3075  
3076      tcg_gen_setcond_i64(cond, cmp, rn, rm);
3077      tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3078  
3079      /* VF = !NF & !CF.  */
3080      tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3081      tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3082  
3083      /* Both NF and VF actually look at bit 31.  */
3084      tcg_gen_neg_i32(cpu_NF, cpu_NF);
3085      tcg_gen_neg_i32(cpu_VF, cpu_VF);
3086      return true;
3087  }
3088  
trans_WHILE(DisasContext * s,arg_WHILE * a)3089  static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
3090  {
3091      TCGv_i64 op0, op1, t0, t1, tmax;
3092      TCGv_i32 t2;
3093      TCGv_ptr ptr;
3094      unsigned vsz = vec_full_reg_size(s);
3095      unsigned desc = 0;
3096      TCGCond cond;
3097      uint64_t maxval;
3098      /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3099      bool eq = a->eq == a->lt;
3100  
3101      /* The greater-than conditions are all SVE2. */
3102      if (a->lt
3103          ? !dc_isar_feature(aa64_sve, s)
3104          : !dc_isar_feature(aa64_sve2, s)) {
3105          return false;
3106      }
3107      if (!sve_access_check(s)) {
3108          return true;
3109      }
3110  
3111      op0 = read_cpu_reg(s, a->rn, 1);
3112      op1 = read_cpu_reg(s, a->rm, 1);
3113  
3114      if (!a->sf) {
3115          if (a->u) {
3116              tcg_gen_ext32u_i64(op0, op0);
3117              tcg_gen_ext32u_i64(op1, op1);
3118          } else {
3119              tcg_gen_ext32s_i64(op0, op0);
3120              tcg_gen_ext32s_i64(op1, op1);
3121          }
3122      }
3123  
3124      /* For the helper, compress the different conditions into a computation
3125       * of how many iterations for which the condition is true.
3126       */
3127      t0 = tcg_temp_new_i64();
3128      t1 = tcg_temp_new_i64();
3129  
3130      if (a->lt) {
3131          tcg_gen_sub_i64(t0, op1, op0);
3132          if (a->u) {
3133              maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3134              cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3135          } else {
3136              maxval = a->sf ? INT64_MAX : INT32_MAX;
3137              cond = eq ? TCG_COND_LE : TCG_COND_LT;
3138          }
3139      } else {
3140          tcg_gen_sub_i64(t0, op0, op1);
3141          if (a->u) {
3142              maxval = 0;
3143              cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3144          } else {
3145              maxval = a->sf ? INT64_MIN : INT32_MIN;
3146              cond = eq ? TCG_COND_GE : TCG_COND_GT;
3147          }
3148      }
3149  
3150      tmax = tcg_constant_i64(vsz >> a->esz);
3151      if (eq) {
3152          /* Equality means one more iteration.  */
3153          tcg_gen_addi_i64(t0, t0, 1);
3154  
3155          /*
3156           * For the less-than while, if op1 is maxval (and the only time
3157           * the addition above could overflow), then we produce an all-true
3158           * predicate by setting the count to the vector length.  This is
3159           * because the pseudocode is described as an increment + compare
3160           * loop, and the maximum integer would always compare true.
3161           * Similarly, the greater-than while has the same issue with the
3162           * minimum integer due to the decrement + compare loop.
3163           */
3164          tcg_gen_movi_i64(t1, maxval);
3165          tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3166      }
3167  
3168      /* Bound to the maximum.  */
3169      tcg_gen_umin_i64(t0, t0, tmax);
3170  
3171      /* Set the count to zero if the condition is false.  */
3172      tcg_gen_movi_i64(t1, 0);
3173      tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3174  
3175      /* Since we're bounded, pass as a 32-bit type.  */
3176      t2 = tcg_temp_new_i32();
3177      tcg_gen_extrl_i64_i32(t2, t0);
3178  
3179      /* Scale elements to bits.  */
3180      tcg_gen_shli_i32(t2, t2, a->esz);
3181  
3182      desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3183      desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
3184  
3185      ptr = tcg_temp_new_ptr();
3186      tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd));
3187  
3188      if (a->lt) {
3189          gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
3190      } else {
3191          gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc));
3192      }
3193      do_pred_flags(t2);
3194      return true;
3195  }
3196  
trans_WHILE_ptr(DisasContext * s,arg_WHILE_ptr * a)3197  static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3198  {
3199      TCGv_i64 op0, op1, diff, t1, tmax;
3200      TCGv_i32 t2;
3201      TCGv_ptr ptr;
3202      unsigned vsz = vec_full_reg_size(s);
3203      unsigned desc = 0;
3204  
3205      if (!dc_isar_feature(aa64_sve2, s)) {
3206          return false;
3207      }
3208      if (!sve_access_check(s)) {
3209          return true;
3210      }
3211  
3212      op0 = read_cpu_reg(s, a->rn, 1);
3213      op1 = read_cpu_reg(s, a->rm, 1);
3214  
3215      tmax = tcg_constant_i64(vsz);
3216      diff = tcg_temp_new_i64();
3217  
3218      if (a->rw) {
3219          /* WHILERW */
3220          /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3221          t1 = tcg_temp_new_i64();
3222          tcg_gen_sub_i64(diff, op0, op1);
3223          tcg_gen_sub_i64(t1, op1, op0);
3224          tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3225          /* Round down to a multiple of ESIZE.  */
3226          tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3227          /* If op1 == op0, diff == 0, and the condition is always true. */
3228          tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3229      } else {
3230          /* WHILEWR */
3231          tcg_gen_sub_i64(diff, op1, op0);
3232          /* Round down to a multiple of ESIZE.  */
3233          tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3234          /* If op0 >= op1, diff <= 0, the condition is always true. */
3235          tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3236      }
3237  
3238      /* Bound to the maximum.  */
3239      tcg_gen_umin_i64(diff, diff, tmax);
3240  
3241      /* Since we're bounded, pass as a 32-bit type.  */
3242      t2 = tcg_temp_new_i32();
3243      tcg_gen_extrl_i64_i32(t2, diff);
3244  
3245      desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3246      desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
3247  
3248      ptr = tcg_temp_new_ptr();
3249      tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd));
3250  
3251      gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
3252      do_pred_flags(t2);
3253      return true;
3254  }
3255  
3256  /*
3257   *** SVE Integer Wide Immediate - Unpredicated Group
3258   */
3259  
trans_FDUP(DisasContext * s,arg_FDUP * a)3260  static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
3261  {
3262      if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
3263          return false;
3264      }
3265      if (sve_access_check(s)) {
3266          unsigned vsz = vec_full_reg_size(s);
3267          int dofs = vec_full_reg_offset(s, a->rd);
3268          uint64_t imm;
3269  
3270          /* Decode the VFP immediate.  */
3271          imm = vfp_expand_imm(a->esz, a->imm);
3272          tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
3273      }
3274      return true;
3275  }
3276  
trans_DUP_i(DisasContext * s,arg_DUP_i * a)3277  static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
3278  {
3279      if (!dc_isar_feature(aa64_sve, s)) {
3280          return false;
3281      }
3282      if (sve_access_check(s)) {
3283          unsigned vsz = vec_full_reg_size(s);
3284          int dofs = vec_full_reg_offset(s, a->rd);
3285          tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
3286      }
3287      return true;
3288  }
3289  
TRANS_FEAT(ADD_zzi,aa64_sve,gen_gvec_fn_arg_zzi,tcg_gen_gvec_addi,a)3290  TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a)
3291  
3292  static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
3293  {
3294      a->imm = -a->imm;
3295      return trans_ADD_zzi(s, a);
3296  }
3297  
trans_SUBR_zzi(DisasContext * s,arg_rri_esz * a)3298  static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
3299  {
3300      static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
3301      static const GVecGen2s op[4] = {
3302          { .fni8 = tcg_gen_vec_sub8_i64,
3303            .fniv = tcg_gen_sub_vec,
3304            .fno = gen_helper_sve_subri_b,
3305            .opt_opc = vecop_list,
3306            .vece = MO_8,
3307            .scalar_first = true },
3308          { .fni8 = tcg_gen_vec_sub16_i64,
3309            .fniv = tcg_gen_sub_vec,
3310            .fno = gen_helper_sve_subri_h,
3311            .opt_opc = vecop_list,
3312            .vece = MO_16,
3313            .scalar_first = true },
3314          { .fni4 = tcg_gen_sub_i32,
3315            .fniv = tcg_gen_sub_vec,
3316            .fno = gen_helper_sve_subri_s,
3317            .opt_opc = vecop_list,
3318            .vece = MO_32,
3319            .scalar_first = true },
3320          { .fni8 = tcg_gen_sub_i64,
3321            .fniv = tcg_gen_sub_vec,
3322            .fno = gen_helper_sve_subri_d,
3323            .opt_opc = vecop_list,
3324            .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3325            .vece = MO_64,
3326            .scalar_first = true }
3327      };
3328  
3329      if (!dc_isar_feature(aa64_sve, s)) {
3330          return false;
3331      }
3332      if (sve_access_check(s)) {
3333          unsigned vsz = vec_full_reg_size(s);
3334          tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3335                          vec_full_reg_offset(s, a->rn),
3336                          vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]);
3337      }
3338      return true;
3339  }
3340  
TRANS_FEAT(MUL_zzi,aa64_sve,gen_gvec_fn_arg_zzi,tcg_gen_gvec_muli,a)3341  TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a)
3342  
3343  static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
3344  {
3345      if (sve_access_check(s)) {
3346          do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
3347                            tcg_constant_i64(a->imm), u, d);
3348      }
3349      return true;
3350  }
3351  
TRANS_FEAT(SQADD_zzi,aa64_sve,do_zzi_sat,a,false,false)3352  TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false)
3353  TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false)
3354  TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true)
3355  TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true)
3356  
3357  static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3358  {
3359      if (sve_access_check(s)) {
3360          unsigned vsz = vec_full_reg_size(s);
3361          tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3362                              vec_full_reg_offset(s, a->rn),
3363                              tcg_constant_i64(a->imm), vsz, vsz, 0, fn);
3364      }
3365      return true;
3366  }
3367  
3368  #define DO_ZZI(NAME, name) \
3369      static gen_helper_gvec_2i * const name##i_fns[4] = {                \
3370          gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
3371          gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
3372      };                                                                  \
3373      TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz])
3374  
3375  DO_ZZI(SMAX, smax)
3376  DO_ZZI(UMAX, umax)
3377  DO_ZZI(SMIN, smin)
3378  DO_ZZI(UMIN, umin)
3379  
3380  #undef DO_ZZI
3381  
3382  static gen_helper_gvec_4 * const dot_fns[2][2] = {
3383      { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3384      { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3385  };
3386  TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz,
3387             dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0)
3388  
3389  /*
3390   * SVE Multiply - Indexed
3391   */
3392  
TRANS_FEAT(SDOT_zzxw_s,aa64_sve,gen_gvec_ool_arg_zzxz,gen_helper_gvec_sdot_idx_b,a)3393  TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3394             gen_helper_gvec_sdot_idx_b, a)
3395  TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3396             gen_helper_gvec_sdot_idx_h, a)
3397  TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3398             gen_helper_gvec_udot_idx_b, a)
3399  TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3400             gen_helper_gvec_udot_idx_h, a)
3401  
3402  TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3403             gen_helper_gvec_sudot_idx_b, a)
3404  TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3405             gen_helper_gvec_usdot_idx_b, a)
3406  
3407  #define DO_SVE2_RRX(NAME, FUNC) \
3408      TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC,          \
3409                 a->rd, a->rn, a->rm, a->index)
3410  
3411  DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h)
3412  DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s)
3413  DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d)
3414  
3415  DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
3416  DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
3417  DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
3418  
3419  DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
3420  DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
3421  DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
3422  
3423  #undef DO_SVE2_RRX
3424  
3425  #define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
3426      TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC,          \
3427                 a->rd, a->rn, a->rm, (a->index << 1) | TOP)
3428  
3429  DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
3430  DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
3431  DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
3432  DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
3433  
3434  DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
3435  DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
3436  DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
3437  DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
3438  
3439  DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
3440  DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
3441  DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
3442  DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
3443  
3444  #undef DO_SVE2_RRX_TB
3445  
3446  #define DO_SVE2_RRXR(NAME, FUNC) \
3447      TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a)
3448  
3449  DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
3450  DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
3451  DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
3452  
3453  DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
3454  DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
3455  DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
3456  
3457  DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
3458  DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
3459  DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
3460  
3461  DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
3462  DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
3463  DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
3464  
3465  #undef DO_SVE2_RRXR
3466  
3467  #define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
3468      TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC,        \
3469                 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP)
3470  
3471  DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
3472  DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
3473  DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
3474  DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
3475  
3476  DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
3477  DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
3478  DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
3479  DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
3480  
3481  DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
3482  DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
3483  DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
3484  DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
3485  
3486  DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
3487  DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
3488  DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
3489  DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
3490  
3491  DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
3492  DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
3493  DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
3494  DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
3495  
3496  DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
3497  DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
3498  DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
3499  DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
3500  
3501  #undef DO_SVE2_RRXR_TB
3502  
3503  #define DO_SVE2_RRXR_ROT(NAME, FUNC) \
3504      TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC,           \
3505                 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot)
3506  
3507  DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
3508  DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
3509  
3510  DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
3511  DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
3512  
3513  DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
3514  DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
3515  
3516  #undef DO_SVE2_RRXR_ROT
3517  
3518  /*
3519   *** SVE Floating Point Multiply-Add Indexed Group
3520   */
3521  
3522  static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
3523  {
3524      static gen_helper_gvec_4_ptr * const fns[4] = {
3525          NULL,
3526          gen_helper_gvec_fmla_idx_h,
3527          gen_helper_gvec_fmla_idx_s,
3528          gen_helper_gvec_fmla_idx_d,
3529      };
3530      return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
3531                                (a->index << 1) | sub,
3532                                a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3533  }
3534  
3535  TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false)
3536  TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true)
3537  
3538  /*
3539   *** SVE Floating Point Multiply Indexed Group
3540   */
3541  
3542  static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = {
3543      NULL,                       gen_helper_gvec_fmul_idx_h,
3544      gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d,
3545  };
3546  TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz,
3547             fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index,
3548             a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3549  
3550  /*
3551   *** SVE Floating Point Fast Reduction Group
3552   */
3553  
3554  typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3555                                    TCGv_ptr, TCGv_i32);
3556  
do_reduce(DisasContext * s,arg_rpr_esz * a,gen_helper_fp_reduce * fn)3557  static bool do_reduce(DisasContext *s, arg_rpr_esz *a,
3558                        gen_helper_fp_reduce *fn)
3559  {
3560      unsigned vsz, p2vsz;
3561      TCGv_i32 t_desc;
3562      TCGv_ptr t_zn, t_pg, status;
3563      TCGv_i64 temp;
3564  
3565      if (fn == NULL) {
3566          return false;
3567      }
3568      if (!sve_access_check(s)) {
3569          return true;
3570      }
3571  
3572      vsz = vec_full_reg_size(s);
3573      p2vsz = pow2ceil(vsz);
3574      t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz));
3575      temp = tcg_temp_new_i64();
3576      t_zn = tcg_temp_new_ptr();
3577      t_pg = tcg_temp_new_ptr();
3578  
3579      tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn));
3580      tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
3581      status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3582  
3583      fn(temp, t_zn, t_pg, status, t_desc);
3584  
3585      write_fp_dreg(s, a->rd, temp);
3586      return true;
3587  }
3588  
3589  #define DO_VPZ(NAME, name) \
3590      static gen_helper_fp_reduce * const name##_fns[4] = {                \
3591          NULL,                      gen_helper_sve_##name##_h,            \
3592          gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
3593      };                                                                   \
3594      TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz])
3595  
3596  DO_VPZ(FADDV, faddv)
3597  DO_VPZ(FMINNMV, fminnmv)
3598  DO_VPZ(FMAXNMV, fmaxnmv)
3599  DO_VPZ(FMINV, fminv)
3600  DO_VPZ(FMAXV, fmaxv)
3601  
3602  #undef DO_VPZ
3603  
3604  /*
3605   *** SVE Floating Point Unary Operations - Unpredicated Group
3606   */
3607  
3608  static gen_helper_gvec_2_ptr * const frecpe_fns[] = {
3609      NULL,                     gen_helper_gvec_frecpe_h,
3610      gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d,
3611  };
3612  TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0)
3613  
3614  static gen_helper_gvec_2_ptr * const frsqrte_fns[] = {
3615      NULL,                      gen_helper_gvec_frsqrte_h,
3616      gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d,
3617  };
3618  TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0)
3619  
3620  /*
3621   *** SVE Floating Point Compare with Zero Group
3622   */
3623  
do_ppz_fp(DisasContext * s,arg_rpr_esz * a,gen_helper_gvec_3_ptr * fn)3624  static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3625                        gen_helper_gvec_3_ptr *fn)
3626  {
3627      if (fn == NULL) {
3628          return false;
3629      }
3630      if (sve_access_check(s)) {
3631          unsigned vsz = vec_full_reg_size(s);
3632          TCGv_ptr status =
3633              fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3634  
3635          tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3636                             vec_full_reg_offset(s, a->rn),
3637                             pred_full_reg_offset(s, a->pg),
3638                             status, vsz, vsz, 0, fn);
3639      }
3640      return true;
3641  }
3642  
3643  #define DO_PPZ(NAME, name) \
3644      static gen_helper_gvec_3_ptr * const name##_fns[] = {         \
3645          NULL,                      gen_helper_sve_##name##_h,     \
3646          gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,     \
3647      };                                                            \
3648      TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz])
3649  
3650  DO_PPZ(FCMGE_ppz0, fcmge0)
3651  DO_PPZ(FCMGT_ppz0, fcmgt0)
3652  DO_PPZ(FCMLE_ppz0, fcmle0)
3653  DO_PPZ(FCMLT_ppz0, fcmlt0)
3654  DO_PPZ(FCMEQ_ppz0, fcmeq0)
3655  DO_PPZ(FCMNE_ppz0, fcmne0)
3656  
3657  #undef DO_PPZ
3658  
3659  /*
3660   *** SVE floating-point trig multiply-add coefficient
3661   */
3662  
3663  static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
3664      NULL,                   gen_helper_sve_ftmad_h,
3665      gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
3666  };
3667  TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
3668                          ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
3669                          a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3670  
3671  /*
3672   *** SVE Floating Point Accumulating Reduction Group
3673   */
3674  
trans_FADDA(DisasContext * s,arg_rprr_esz * a)3675  static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
3676  {
3677      typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3678                            TCGv_ptr, TCGv_ptr, TCGv_i32);
3679      static fadda_fn * const fns[3] = {
3680          gen_helper_sve_fadda_h,
3681          gen_helper_sve_fadda_s,
3682          gen_helper_sve_fadda_d,
3683      };
3684      unsigned vsz = vec_full_reg_size(s);
3685      TCGv_ptr t_rm, t_pg, t_fpst;
3686      TCGv_i64 t_val;
3687      TCGv_i32 t_desc;
3688  
3689      if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
3690          return false;
3691      }
3692      s->is_nonstreaming = true;
3693      if (!sve_access_check(s)) {
3694          return true;
3695      }
3696  
3697      t_val = load_esz(tcg_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3698      t_rm = tcg_temp_new_ptr();
3699      t_pg = tcg_temp_new_ptr();
3700      tcg_gen_addi_ptr(t_rm, tcg_env, vec_full_reg_offset(s, a->rm));
3701      tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
3702      t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3703      t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
3704  
3705      fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3706  
3707      write_fp_dreg(s, a->rd, t_val);
3708      return true;
3709  }
3710  
3711  /*
3712   *** SVE Floating Point Arithmetic - Unpredicated Group
3713   */
3714  
3715  #define DO_FP3(NAME, name) \
3716      static gen_helper_gvec_3_ptr * const name##_fns[4] = {          \
3717          NULL, gen_helper_gvec_##name##_h,                           \
3718          gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
3719      };                                                              \
3720      TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0)
3721  
3722  DO_FP3(FADD_zzz, fadd)
3723  DO_FP3(FSUB_zzz, fsub)
3724  DO_FP3(FMUL_zzz, fmul)
3725  DO_FP3(FRECPS, recps)
3726  DO_FP3(FRSQRTS, rsqrts)
3727  
3728  #undef DO_FP3
3729  
3730  static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = {
3731      NULL,                     gen_helper_gvec_ftsmul_h,
3732      gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d
3733  };
3734  TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz,
3735                          ftsmul_fns[a->esz], a, 0)
3736  
3737  /*
3738   *** SVE Floating Point Arithmetic - Predicated Group
3739   */
3740  
3741  #define DO_ZPZZ_FP(NAME, FEAT, name) \
3742      static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \
3743          NULL,                  gen_helper_##name##_h,           \
3744          gen_helper_##name##_s, gen_helper_##name##_d            \
3745      };                                                          \
3746      TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a)
3747  
3748  DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd)
3749  DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub)
3750  DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul)
3751  DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin)
3752  DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax)
3753  DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum)
3754  DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum)
3755  DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd)
3756  DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn)
3757  DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv)
3758  DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx)
3759  
3760  typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3761                                        TCGv_i64, TCGv_ptr, TCGv_i32);
3762  
do_fp_scalar(DisasContext * s,int zd,int zn,int pg,bool is_fp16,TCGv_i64 scalar,gen_helper_sve_fp2scalar * fn)3763  static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3764                           TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3765  {
3766      unsigned vsz = vec_full_reg_size(s);
3767      TCGv_ptr t_zd, t_zn, t_pg, status;
3768      TCGv_i32 desc;
3769  
3770      t_zd = tcg_temp_new_ptr();
3771      t_zn = tcg_temp_new_ptr();
3772      t_pg = tcg_temp_new_ptr();
3773      tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, zd));
3774      tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, zn));
3775      tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
3776  
3777      status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
3778      desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
3779      fn(t_zd, t_zn, t_pg, scalar, status, desc);
3780  }
3781  
do_fp_imm(DisasContext * s,arg_rpri_esz * a,uint64_t imm,gen_helper_sve_fp2scalar * fn)3782  static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3783                        gen_helper_sve_fp2scalar *fn)
3784  {
3785      if (fn == NULL) {
3786          return false;
3787      }
3788      if (sve_access_check(s)) {
3789          do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16,
3790                       tcg_constant_i64(imm), fn);
3791      }
3792      return true;
3793  }
3794  
3795  #define DO_FP_IMM(NAME, name, const0, const1)                           \
3796      static gen_helper_sve_fp2scalar * const name##_fns[4] = {           \
3797          NULL, gen_helper_sve_##name##_h,                                \
3798          gen_helper_sve_##name##_s,                                      \
3799          gen_helper_sve_##name##_d                                       \
3800      };                                                                  \
3801      static uint64_t const name##_const[4][2] = {                        \
3802          { -1, -1 },                                                     \
3803          { float16_##const0, float16_##const1 },                         \
3804          { float32_##const0, float32_##const1 },                         \
3805          { float64_##const0, float64_##const1 },                         \
3806      };                                                                  \
3807      TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a,                     \
3808                 name##_const[a->esz][a->imm], name##_fns[a->esz])
3809  
DO_FP_IMM(FADD,fadds,half,one)3810  DO_FP_IMM(FADD, fadds, half, one)
3811  DO_FP_IMM(FSUB, fsubs, half, one)
3812  DO_FP_IMM(FMUL, fmuls, half, two)
3813  DO_FP_IMM(FSUBR, fsubrs, half, one)
3814  DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3815  DO_FP_IMM(FMINNM, fminnms, zero, one)
3816  DO_FP_IMM(FMAX, fmaxs, zero, one)
3817  DO_FP_IMM(FMIN, fmins, zero, one)
3818  
3819  #undef DO_FP_IMM
3820  
3821  static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3822                        gen_helper_gvec_4_ptr *fn)
3823  {
3824      if (fn == NULL) {
3825          return false;
3826      }
3827      if (sve_access_check(s)) {
3828          unsigned vsz = vec_full_reg_size(s);
3829          TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3830          tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3831                             vec_full_reg_offset(s, a->rn),
3832                             vec_full_reg_offset(s, a->rm),
3833                             pred_full_reg_offset(s, a->pg),
3834                             status, vsz, vsz, 0, fn);
3835      }
3836      return true;
3837  }
3838  
3839  #define DO_FPCMP(NAME, name) \
3840      static gen_helper_gvec_4_ptr * const name##_fns[4] = {            \
3841          NULL, gen_helper_sve_##name##_h,                              \
3842          gen_helper_sve_##name##_s, gen_helper_sve_##name##_d          \
3843      };                                                                \
3844      TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz])
3845  
3846  DO_FPCMP(FCMGE, fcmge)
3847  DO_FPCMP(FCMGT, fcmgt)
3848  DO_FPCMP(FCMEQ, fcmeq)
3849  DO_FPCMP(FCMNE, fcmne)
3850  DO_FPCMP(FCMUO, fcmuo)
3851  DO_FPCMP(FACGE, facge)
3852  DO_FPCMP(FACGT, facgt)
3853  
3854  #undef DO_FPCMP
3855  
3856  static gen_helper_gvec_4_ptr * const fcadd_fns[] = {
3857      NULL,                   gen_helper_sve_fcadd_h,
3858      gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d,
3859  };
3860  TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
3861             a->rd, a->rn, a->rm, a->pg, a->rot,
3862             a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3863  
3864  #define DO_FMLA(NAME, name) \
3865      static gen_helper_gvec_5_ptr * const name##_fns[4] = {              \
3866          NULL, gen_helper_sve_##name##_h,                                \
3867          gen_helper_sve_##name##_s, gen_helper_sve_##name##_d            \
3868      };                                                                  \
3869      TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \
3870                 a->rd, a->rn, a->rm, a->ra, a->pg, 0,                    \
3871                 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3872  
3873  DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3874  DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3875  DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3876  DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3877  
3878  #undef DO_FMLA
3879  
3880  static gen_helper_gvec_5_ptr * const fcmla_fns[4] = {
3881      NULL,                         gen_helper_sve_fcmla_zpzzz_h,
3882      gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d,
3883  };
3884  TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz],
3885             a->rd, a->rn, a->rm, a->ra, a->pg, a->rot,
3886             a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3887  
3888  static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = {
3889      NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL
3890  };
3891  TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz],
3892             a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot,
3893             a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3894  
3895  /*
3896   *** SVE Floating Point Unary Operations Predicated Group
3897   */
3898  
3899  TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
3900             gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR)
3901  TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
3902             gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR)
3903  
3904  TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
3905             gen_helper_sve_bfcvt, a, 0, FPST_FPCR)
3906  
3907  TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
3908             gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR)
3909  TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
3910             gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR)
3911  TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
3912             gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR)
3913  TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
3914             gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR)
3915  
3916  TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
3917             gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16)
3918  TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
3919             gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16)
3920  TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
3921             gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16)
3922  TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
3923             gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16)
3924  TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
3925             gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16)
3926  TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
3927             gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16)
3928  
3929  TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
3930             gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR)
3931  TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
3932             gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR)
3933  TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
3934             gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR)
3935  TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
3936             gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR)
3937  TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
3938             gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR)
3939  TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
3940             gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR)
3941  
3942  TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
3943             gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR)
3944  TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
3945             gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR)
3946  
3947  static gen_helper_gvec_3_ptr * const frint_fns[] = {
3948      NULL,
3949      gen_helper_sve_frint_h,
3950      gen_helper_sve_frint_s,
3951      gen_helper_sve_frint_d
3952  };
3953  TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz],
3954             a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3955  
3956  static gen_helper_gvec_3_ptr * const frintx_fns[] = {
3957      NULL,
3958      gen_helper_sve_frintx_h,
3959      gen_helper_sve_frintx_s,
3960      gen_helper_sve_frintx_d
3961  };
3962  TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz],
3963             a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3964  
do_frint_mode(DisasContext * s,arg_rpr_esz * a,ARMFPRounding mode,gen_helper_gvec_3_ptr * fn)3965  static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
3966                            ARMFPRounding mode, gen_helper_gvec_3_ptr *fn)
3967  {
3968      unsigned vsz;
3969      TCGv_i32 tmode;
3970      TCGv_ptr status;
3971  
3972      if (fn == NULL) {
3973          return false;
3974      }
3975      if (!sve_access_check(s)) {
3976          return true;
3977      }
3978  
3979      vsz = vec_full_reg_size(s);
3980      status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3981      tmode = gen_set_rmode(mode, status);
3982  
3983      tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3984                         vec_full_reg_offset(s, a->rn),
3985                         pred_full_reg_offset(s, a->pg),
3986                         status, vsz, vsz, 0, fn);
3987  
3988      gen_restore_rmode(tmode, status);
3989      return true;
3990  }
3991  
3992  TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a,
3993             FPROUNDING_TIEEVEN, frint_fns[a->esz])
3994  TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a,
3995             FPROUNDING_POSINF, frint_fns[a->esz])
3996  TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a,
3997             FPROUNDING_NEGINF, frint_fns[a->esz])
3998  TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a,
3999             FPROUNDING_ZERO, frint_fns[a->esz])
4000  TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a,
4001             FPROUNDING_TIEAWAY, frint_fns[a->esz])
4002  
4003  static gen_helper_gvec_3_ptr * const frecpx_fns[] = {
4004      NULL,                    gen_helper_sve_frecpx_h,
4005      gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d,
4006  };
4007  TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz],
4008             a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
4009  
4010  static gen_helper_gvec_3_ptr * const fsqrt_fns[] = {
4011      NULL,                   gen_helper_sve_fsqrt_h,
4012      gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d,
4013  };
4014  TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz],
4015             a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
4016  
4017  TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4018             gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16)
4019  TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4020             gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16)
4021  TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4022             gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16)
4023  
4024  TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4025             gen_helper_sve_scvt_ss, a, 0, FPST_FPCR)
4026  TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4027             gen_helper_sve_scvt_ds, a, 0, FPST_FPCR)
4028  
4029  TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4030             gen_helper_sve_scvt_sd, a, 0, FPST_FPCR)
4031  TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4032             gen_helper_sve_scvt_dd, a, 0, FPST_FPCR)
4033  
4034  TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4035             gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16)
4036  TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4037             gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16)
4038  TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4039             gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16)
4040  
4041  TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4042             gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR)
4043  TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4044             gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR)
4045  TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4046             gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR)
4047  
4048  TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4049             gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR)
4050  
4051  /*
4052   *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4053   */
4054  
4055  /* Subroutine loading a vector register at VOFS of LEN bytes.
4056   * The load should begin at the address Rn + IMM.
4057   */
4058  
gen_sve_ldr(DisasContext * s,TCGv_ptr base,int vofs,int len,int rn,int imm)4059  void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
4060                   int len, int rn, int imm)
4061  {
4062      int len_align = QEMU_ALIGN_DOWN(len, 16);
4063      int len_remain = len % 16;
4064      int nparts = len / 16 + ctpop8(len_remain);
4065      int midx = get_mem_index(s);
4066      TCGv_i64 dirty_addr, clean_addr, t0, t1;
4067      TCGv_i128 t16;
4068  
4069      dirty_addr = tcg_temp_new_i64();
4070      tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4071      clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
4072  
4073      /*
4074       * Note that unpredicated load/store of vector/predicate registers
4075       * are defined as a stream of bytes, which equates to little-endian
4076       * operations on larger quantities.
4077       * Attempt to keep code expansion to a minimum by limiting the
4078       * amount of unrolling done.
4079       */
4080      if (nparts <= 4) {
4081          int i;
4082  
4083          t0 = tcg_temp_new_i64();
4084          t1 = tcg_temp_new_i64();
4085          t16 = tcg_temp_new_i128();
4086  
4087          for (i = 0; i < len_align; i += 16) {
4088              tcg_gen_qemu_ld_i128(t16, clean_addr, midx,
4089                                   MO_LE | MO_128 | MO_ATOM_NONE);
4090              tcg_gen_extr_i128_i64(t0, t1, t16);
4091              tcg_gen_st_i64(t0, base, vofs + i);
4092              tcg_gen_st_i64(t1, base, vofs + i + 8);
4093              tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4094          }
4095      } else {
4096          TCGLabel *loop = gen_new_label();
4097          TCGv_ptr tp, i = tcg_temp_new_ptr();
4098  
4099          tcg_gen_movi_ptr(i, 0);
4100          gen_set_label(loop);
4101  
4102          t16 = tcg_temp_new_i128();
4103          tcg_gen_qemu_ld_i128(t16, clean_addr, midx,
4104                               MO_LE | MO_128 | MO_ATOM_NONE);
4105          tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4106  
4107          tp = tcg_temp_new_ptr();
4108          tcg_gen_add_ptr(tp, base, i);
4109          tcg_gen_addi_ptr(i, i, 16);
4110  
4111          t0 = tcg_temp_new_i64();
4112          t1 = tcg_temp_new_i64();
4113          tcg_gen_extr_i128_i64(t0, t1, t16);
4114  
4115          tcg_gen_st_i64(t0, tp, vofs);
4116          tcg_gen_st_i64(t1, tp, vofs + 8);
4117  
4118          tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4119      }
4120  
4121      /*
4122       * Predicate register loads can be any multiple of 2.
4123       * Note that we still store the entire 64-bit unit into tcg_env.
4124       */
4125      if (len_remain >= 8) {
4126          t0 = tcg_temp_new_i64();
4127          tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE);
4128          tcg_gen_st_i64(t0, base, vofs + len_align);
4129          len_remain -= 8;
4130          len_align += 8;
4131          if (len_remain) {
4132              tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4133          }
4134      }
4135      if (len_remain) {
4136          t0 = tcg_temp_new_i64();
4137          switch (len_remain) {
4138          case 2:
4139          case 4:
4140          case 8:
4141              tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4142                                  MO_LE | ctz32(len_remain) | MO_ATOM_NONE);
4143              break;
4144  
4145          case 6:
4146              t1 = tcg_temp_new_i64();
4147              tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE);
4148              tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4149              tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW | MO_ATOM_NONE);
4150              tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4151              break;
4152  
4153          default:
4154              g_assert_not_reached();
4155          }
4156          tcg_gen_st_i64(t0, base, vofs + len_align);
4157      }
4158  }
4159  
4160  /* Similarly for stores.  */
gen_sve_str(DisasContext * s,TCGv_ptr base,int vofs,int len,int rn,int imm)4161  void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
4162                   int len, int rn, int imm)
4163  {
4164      int len_align = QEMU_ALIGN_DOWN(len, 16);
4165      int len_remain = len % 16;
4166      int nparts = len / 16 + ctpop8(len_remain);
4167      int midx = get_mem_index(s);
4168      TCGv_i64 dirty_addr, clean_addr, t0, t1;
4169      TCGv_i128 t16;
4170  
4171      dirty_addr = tcg_temp_new_i64();
4172      tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4173      clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
4174  
4175      /* Note that unpredicated load/store of vector/predicate registers
4176       * are defined as a stream of bytes, which equates to little-endian
4177       * operations on larger quantities.  There is no nice way to force
4178       * a little-endian store for aarch64_be-linux-user out of line.
4179       *
4180       * Attempt to keep code expansion to a minimum by limiting the
4181       * amount of unrolling done.
4182       */
4183      if (nparts <= 4) {
4184          int i;
4185  
4186          t0 = tcg_temp_new_i64();
4187          t1 = tcg_temp_new_i64();
4188          t16 = tcg_temp_new_i128();
4189          for (i = 0; i < len_align; i += 16) {
4190              tcg_gen_ld_i64(t0, base, vofs + i);
4191              tcg_gen_ld_i64(t1, base, vofs + i + 8);
4192              tcg_gen_concat_i64_i128(t16, t0, t1);
4193              tcg_gen_qemu_st_i128(t16, clean_addr, midx,
4194                                   MO_LE | MO_128 | MO_ATOM_NONE);
4195              tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4196          }
4197      } else {
4198          TCGLabel *loop = gen_new_label();
4199          TCGv_ptr tp, i = tcg_temp_new_ptr();
4200  
4201          tcg_gen_movi_ptr(i, 0);
4202          gen_set_label(loop);
4203  
4204          t0 = tcg_temp_new_i64();
4205          t1 = tcg_temp_new_i64();
4206          tp = tcg_temp_new_ptr();
4207          tcg_gen_add_ptr(tp, base, i);
4208          tcg_gen_ld_i64(t0, tp, vofs);
4209          tcg_gen_ld_i64(t1, tp, vofs + 8);
4210          tcg_gen_addi_ptr(i, i, 16);
4211  
4212          t16 = tcg_temp_new_i128();
4213          tcg_gen_concat_i64_i128(t16, t0, t1);
4214  
4215          tcg_gen_qemu_st_i128(t16, clean_addr, midx,
4216                               MO_LE | MO_128 | MO_ATOM_NONE);
4217          tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4218  
4219          tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4220      }
4221  
4222      /* Predicate register stores can be any multiple of 2.  */
4223      if (len_remain >= 8) {
4224          t0 = tcg_temp_new_i64();
4225          tcg_gen_ld_i64(t0, base, vofs + len_align);
4226          tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE);
4227          len_remain -= 8;
4228          len_align += 8;
4229          if (len_remain) {
4230              tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4231          }
4232      }
4233      if (len_remain) {
4234          t0 = tcg_temp_new_i64();
4235          tcg_gen_ld_i64(t0, base, vofs + len_align);
4236  
4237          switch (len_remain) {
4238          case 2:
4239          case 4:
4240          case 8:
4241              tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4242                                  MO_LE | ctz32(len_remain) | MO_ATOM_NONE);
4243              break;
4244  
4245          case 6:
4246              tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE);
4247              tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4248              tcg_gen_shri_i64(t0, t0, 32);
4249              tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW | MO_ATOM_NONE);
4250              break;
4251  
4252          default:
4253              g_assert_not_reached();
4254          }
4255      }
4256  }
4257  
trans_LDR_zri(DisasContext * s,arg_rri * a)4258  static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
4259  {
4260      if (!dc_isar_feature(aa64_sve, s)) {
4261          return false;
4262      }
4263      if (sve_access_check(s)) {
4264          int size = vec_full_reg_size(s);
4265          int off = vec_full_reg_offset(s, a->rd);
4266          gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size);
4267      }
4268      return true;
4269  }
4270  
trans_LDR_pri(DisasContext * s,arg_rri * a)4271  static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
4272  {
4273      if (!dc_isar_feature(aa64_sve, s)) {
4274          return false;
4275      }
4276      if (sve_access_check(s)) {
4277          int size = pred_full_reg_size(s);
4278          int off = pred_full_reg_offset(s, a->rd);
4279          gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size);
4280      }
4281      return true;
4282  }
4283  
trans_STR_zri(DisasContext * s,arg_rri * a)4284  static bool trans_STR_zri(DisasContext *s, arg_rri *a)
4285  {
4286      if (!dc_isar_feature(aa64_sve, s)) {
4287          return false;
4288      }
4289      if (sve_access_check(s)) {
4290          int size = vec_full_reg_size(s);
4291          int off = vec_full_reg_offset(s, a->rd);
4292          gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size);
4293      }
4294      return true;
4295  }
4296  
trans_STR_pri(DisasContext * s,arg_rri * a)4297  static bool trans_STR_pri(DisasContext *s, arg_rri *a)
4298  {
4299      if (!dc_isar_feature(aa64_sve, s)) {
4300          return false;
4301      }
4302      if (sve_access_check(s)) {
4303          int size = pred_full_reg_size(s);
4304          int off = pred_full_reg_offset(s, a->rd);
4305          gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size);
4306      }
4307      return true;
4308  }
4309  
4310  /*
4311   *** SVE Memory - Contiguous Load Group
4312   */
4313  
4314  /* The memory mode of the dtype.  */
4315  static const MemOp dtype_mop[16] = {
4316      MO_UB, MO_UB, MO_UB, MO_UB,
4317      MO_SL, MO_UW, MO_UW, MO_UW,
4318      MO_SW, MO_SW, MO_UL, MO_UL,
4319      MO_SB, MO_SB, MO_SB, MO_UQ
4320  };
4321  
4322  #define dtype_msz(x)  (dtype_mop[x] & MO_SIZE)
4323  
4324  /* The vector element size of dtype.  */
4325  static const uint8_t dtype_esz[16] = {
4326      0, 1, 2, 3,
4327      3, 1, 2, 3,
4328      3, 2, 2, 3,
4329      3, 2, 1, 3
4330  };
4331  
make_svemte_desc(DisasContext * s,unsigned vsz,uint32_t nregs,uint32_t msz,bool is_write,uint32_t data)4332  uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs,
4333                            uint32_t msz, bool is_write, uint32_t data)
4334  {
4335      uint32_t sizem1;
4336      uint32_t desc = 0;
4337  
4338      /* Assert all of the data fits, with or without MTE enabled. */
4339      assert(nregs >= 1 && nregs <= 4);
4340      sizem1 = (nregs << msz) - 1;
4341      assert(sizem1 <= R_MTEDESC_SIZEM1_MASK >> R_MTEDESC_SIZEM1_SHIFT);
4342      assert(data < 1u << SVE_MTEDESC_SHIFT);
4343  
4344      if (s->mte_active[0]) {
4345          desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
4346          desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4347          desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4348          desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
4349          desc = FIELD_DP32(desc, MTEDESC, SIZEM1, sizem1);
4350          desc <<= SVE_MTEDESC_SHIFT;
4351      }
4352      return simd_desc(vsz, vsz, desc | data);
4353  }
4354  
do_mem_zpa(DisasContext * s,int zt,int pg,TCGv_i64 addr,int dtype,uint32_t nregs,bool is_write,gen_helper_gvec_mem * fn)4355  static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4356                         int dtype, uint32_t nregs, bool is_write,
4357                         gen_helper_gvec_mem *fn)
4358  {
4359      TCGv_ptr t_pg;
4360      uint32_t desc;
4361  
4362      if (!s->mte_active[0]) {
4363          addr = clean_data_tbi(s, addr);
4364      }
4365  
4366      /*
4367       * For e.g. LD4, there are not enough arguments to pass all 4
4368       * registers as pointers, so encode the regno into the data field.
4369       * For consistency, do this even for LD1.
4370       */
4371      desc = make_svemte_desc(s, vec_full_reg_size(s), nregs,
4372                              dtype_msz(dtype), is_write, zt);
4373      t_pg = tcg_temp_new_ptr();
4374  
4375      tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
4376      fn(tcg_env, t_pg, addr, tcg_constant_i32(desc));
4377  }
4378  
4379  /* Indexed by [mte][be][dtype][nreg] */
4380  static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
4381      { /* mte inactive, little-endian */
4382        { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4383            gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4384          { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4385          { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4386          { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4387  
4388          { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4389          { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4390            gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4391          { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4392          { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4393  
4394          { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4395          { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4396          { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4397            gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4398          { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4399  
4400          { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4401          { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4402          { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4403          { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4404            gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4405  
4406        /* mte inactive, big-endian */
4407        { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4408            gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4409          { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4410          { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4411          { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4412  
4413          { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4414          { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4415            gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4416          { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4417          { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4418  
4419          { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4420          { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4421          { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4422            gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4423          { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4424  
4425          { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4426          { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4427          { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4428          { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4429            gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
4430  
4431      { /* mte active, little-endian */
4432        { { gen_helper_sve_ld1bb_r_mte,
4433            gen_helper_sve_ld2bb_r_mte,
4434            gen_helper_sve_ld3bb_r_mte,
4435            gen_helper_sve_ld4bb_r_mte },
4436          { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4437          { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4438          { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4439  
4440          { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
4441          { gen_helper_sve_ld1hh_le_r_mte,
4442            gen_helper_sve_ld2hh_le_r_mte,
4443            gen_helper_sve_ld3hh_le_r_mte,
4444            gen_helper_sve_ld4hh_le_r_mte },
4445          { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
4446          { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
4447  
4448          { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
4449          { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
4450          { gen_helper_sve_ld1ss_le_r_mte,
4451            gen_helper_sve_ld2ss_le_r_mte,
4452            gen_helper_sve_ld3ss_le_r_mte,
4453            gen_helper_sve_ld4ss_le_r_mte },
4454          { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
4455  
4456          { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4457          { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4458          { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4459          { gen_helper_sve_ld1dd_le_r_mte,
4460            gen_helper_sve_ld2dd_le_r_mte,
4461            gen_helper_sve_ld3dd_le_r_mte,
4462            gen_helper_sve_ld4dd_le_r_mte } },
4463  
4464        /* mte active, big-endian */
4465        { { gen_helper_sve_ld1bb_r_mte,
4466            gen_helper_sve_ld2bb_r_mte,
4467            gen_helper_sve_ld3bb_r_mte,
4468            gen_helper_sve_ld4bb_r_mte },
4469          { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4470          { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4471          { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4472  
4473          { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
4474          { gen_helper_sve_ld1hh_be_r_mte,
4475            gen_helper_sve_ld2hh_be_r_mte,
4476            gen_helper_sve_ld3hh_be_r_mte,
4477            gen_helper_sve_ld4hh_be_r_mte },
4478          { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
4479          { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
4480  
4481          { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
4482          { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
4483          { gen_helper_sve_ld1ss_be_r_mte,
4484            gen_helper_sve_ld2ss_be_r_mte,
4485            gen_helper_sve_ld3ss_be_r_mte,
4486            gen_helper_sve_ld4ss_be_r_mte },
4487          { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
4488  
4489          { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4490          { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4491          { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4492          { gen_helper_sve_ld1dd_be_r_mte,
4493            gen_helper_sve_ld2dd_be_r_mte,
4494            gen_helper_sve_ld3dd_be_r_mte,
4495            gen_helper_sve_ld4dd_be_r_mte } } },
4496  };
4497  
do_ld_zpa(DisasContext * s,int zt,int pg,TCGv_i64 addr,int dtype,int nreg)4498  static void do_ld_zpa(DisasContext *s, int zt, int pg,
4499                        TCGv_i64 addr, int dtype, int nreg)
4500  {
4501      gen_helper_gvec_mem *fn
4502          = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
4503  
4504      /*
4505       * While there are holes in the table, they are not
4506       * accessible via the instruction encoding.
4507       */
4508      assert(fn != NULL);
4509      do_mem_zpa(s, zt, pg, addr, dtype, nreg + 1, false, fn);
4510  }
4511  
trans_LD_zprr(DisasContext * s,arg_rprr_load * a)4512  static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
4513  {
4514      if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) {
4515          return false;
4516      }
4517      if (sve_access_check(s)) {
4518          TCGv_i64 addr = tcg_temp_new_i64();
4519          tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4520          tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4521          do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4522      }
4523      return true;
4524  }
4525  
trans_LD_zpri(DisasContext * s,arg_rpri_load * a)4526  static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
4527  {
4528      if (!dc_isar_feature(aa64_sve, s)) {
4529          return false;
4530      }
4531      if (sve_access_check(s)) {
4532          int vsz = vec_full_reg_size(s);
4533          int elements = vsz >> dtype_esz[a->dtype];
4534          TCGv_i64 addr = tcg_temp_new_i64();
4535  
4536          tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4537                           (a->imm * elements * (a->nreg + 1))
4538                           << dtype_msz(a->dtype));
4539          do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4540      }
4541      return true;
4542  }
4543  
trans_LDFF1_zprr(DisasContext * s,arg_rprr_load * a)4544  static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
4545  {
4546      static gen_helper_gvec_mem * const fns[2][2][16] = {
4547          { /* mte inactive, little-endian */
4548            { gen_helper_sve_ldff1bb_r,
4549              gen_helper_sve_ldff1bhu_r,
4550              gen_helper_sve_ldff1bsu_r,
4551              gen_helper_sve_ldff1bdu_r,
4552  
4553              gen_helper_sve_ldff1sds_le_r,
4554              gen_helper_sve_ldff1hh_le_r,
4555              gen_helper_sve_ldff1hsu_le_r,
4556              gen_helper_sve_ldff1hdu_le_r,
4557  
4558              gen_helper_sve_ldff1hds_le_r,
4559              gen_helper_sve_ldff1hss_le_r,
4560              gen_helper_sve_ldff1ss_le_r,
4561              gen_helper_sve_ldff1sdu_le_r,
4562  
4563              gen_helper_sve_ldff1bds_r,
4564              gen_helper_sve_ldff1bss_r,
4565              gen_helper_sve_ldff1bhs_r,
4566              gen_helper_sve_ldff1dd_le_r },
4567  
4568            /* mte inactive, big-endian */
4569            { gen_helper_sve_ldff1bb_r,
4570              gen_helper_sve_ldff1bhu_r,
4571              gen_helper_sve_ldff1bsu_r,
4572              gen_helper_sve_ldff1bdu_r,
4573  
4574              gen_helper_sve_ldff1sds_be_r,
4575              gen_helper_sve_ldff1hh_be_r,
4576              gen_helper_sve_ldff1hsu_be_r,
4577              gen_helper_sve_ldff1hdu_be_r,
4578  
4579              gen_helper_sve_ldff1hds_be_r,
4580              gen_helper_sve_ldff1hss_be_r,
4581              gen_helper_sve_ldff1ss_be_r,
4582              gen_helper_sve_ldff1sdu_be_r,
4583  
4584              gen_helper_sve_ldff1bds_r,
4585              gen_helper_sve_ldff1bss_r,
4586              gen_helper_sve_ldff1bhs_r,
4587              gen_helper_sve_ldff1dd_be_r } },
4588  
4589          { /* mte active, little-endian */
4590            { gen_helper_sve_ldff1bb_r_mte,
4591              gen_helper_sve_ldff1bhu_r_mte,
4592              gen_helper_sve_ldff1bsu_r_mte,
4593              gen_helper_sve_ldff1bdu_r_mte,
4594  
4595              gen_helper_sve_ldff1sds_le_r_mte,
4596              gen_helper_sve_ldff1hh_le_r_mte,
4597              gen_helper_sve_ldff1hsu_le_r_mte,
4598              gen_helper_sve_ldff1hdu_le_r_mte,
4599  
4600              gen_helper_sve_ldff1hds_le_r_mte,
4601              gen_helper_sve_ldff1hss_le_r_mte,
4602              gen_helper_sve_ldff1ss_le_r_mte,
4603              gen_helper_sve_ldff1sdu_le_r_mte,
4604  
4605              gen_helper_sve_ldff1bds_r_mte,
4606              gen_helper_sve_ldff1bss_r_mte,
4607              gen_helper_sve_ldff1bhs_r_mte,
4608              gen_helper_sve_ldff1dd_le_r_mte },
4609  
4610            /* mte active, big-endian */
4611            { gen_helper_sve_ldff1bb_r_mte,
4612              gen_helper_sve_ldff1bhu_r_mte,
4613              gen_helper_sve_ldff1bsu_r_mte,
4614              gen_helper_sve_ldff1bdu_r_mte,
4615  
4616              gen_helper_sve_ldff1sds_be_r_mte,
4617              gen_helper_sve_ldff1hh_be_r_mte,
4618              gen_helper_sve_ldff1hsu_be_r_mte,
4619              gen_helper_sve_ldff1hdu_be_r_mte,
4620  
4621              gen_helper_sve_ldff1hds_be_r_mte,
4622              gen_helper_sve_ldff1hss_be_r_mte,
4623              gen_helper_sve_ldff1ss_be_r_mte,
4624              gen_helper_sve_ldff1sdu_be_r_mte,
4625  
4626              gen_helper_sve_ldff1bds_r_mte,
4627              gen_helper_sve_ldff1bss_r_mte,
4628              gen_helper_sve_ldff1bhs_r_mte,
4629              gen_helper_sve_ldff1dd_be_r_mte } },
4630      };
4631  
4632      if (!dc_isar_feature(aa64_sve, s)) {
4633          return false;
4634      }
4635      s->is_nonstreaming = true;
4636      if (sve_access_check(s)) {
4637          TCGv_i64 addr = tcg_temp_new_i64();
4638          tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4639          tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4640          do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4641                     fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
4642      }
4643      return true;
4644  }
4645  
trans_LDNF1_zpri(DisasContext * s,arg_rpri_load * a)4646  static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
4647  {
4648      static gen_helper_gvec_mem * const fns[2][2][16] = {
4649          { /* mte inactive, little-endian */
4650            { gen_helper_sve_ldnf1bb_r,
4651              gen_helper_sve_ldnf1bhu_r,
4652              gen_helper_sve_ldnf1bsu_r,
4653              gen_helper_sve_ldnf1bdu_r,
4654  
4655              gen_helper_sve_ldnf1sds_le_r,
4656              gen_helper_sve_ldnf1hh_le_r,
4657              gen_helper_sve_ldnf1hsu_le_r,
4658              gen_helper_sve_ldnf1hdu_le_r,
4659  
4660              gen_helper_sve_ldnf1hds_le_r,
4661              gen_helper_sve_ldnf1hss_le_r,
4662              gen_helper_sve_ldnf1ss_le_r,
4663              gen_helper_sve_ldnf1sdu_le_r,
4664  
4665              gen_helper_sve_ldnf1bds_r,
4666              gen_helper_sve_ldnf1bss_r,
4667              gen_helper_sve_ldnf1bhs_r,
4668              gen_helper_sve_ldnf1dd_le_r },
4669  
4670            /* mte inactive, big-endian */
4671            { gen_helper_sve_ldnf1bb_r,
4672              gen_helper_sve_ldnf1bhu_r,
4673              gen_helper_sve_ldnf1bsu_r,
4674              gen_helper_sve_ldnf1bdu_r,
4675  
4676              gen_helper_sve_ldnf1sds_be_r,
4677              gen_helper_sve_ldnf1hh_be_r,
4678              gen_helper_sve_ldnf1hsu_be_r,
4679              gen_helper_sve_ldnf1hdu_be_r,
4680  
4681              gen_helper_sve_ldnf1hds_be_r,
4682              gen_helper_sve_ldnf1hss_be_r,
4683              gen_helper_sve_ldnf1ss_be_r,
4684              gen_helper_sve_ldnf1sdu_be_r,
4685  
4686              gen_helper_sve_ldnf1bds_r,
4687              gen_helper_sve_ldnf1bss_r,
4688              gen_helper_sve_ldnf1bhs_r,
4689              gen_helper_sve_ldnf1dd_be_r } },
4690  
4691          { /* mte inactive, little-endian */
4692            { gen_helper_sve_ldnf1bb_r_mte,
4693              gen_helper_sve_ldnf1bhu_r_mte,
4694              gen_helper_sve_ldnf1bsu_r_mte,
4695              gen_helper_sve_ldnf1bdu_r_mte,
4696  
4697              gen_helper_sve_ldnf1sds_le_r_mte,
4698              gen_helper_sve_ldnf1hh_le_r_mte,
4699              gen_helper_sve_ldnf1hsu_le_r_mte,
4700              gen_helper_sve_ldnf1hdu_le_r_mte,
4701  
4702              gen_helper_sve_ldnf1hds_le_r_mte,
4703              gen_helper_sve_ldnf1hss_le_r_mte,
4704              gen_helper_sve_ldnf1ss_le_r_mte,
4705              gen_helper_sve_ldnf1sdu_le_r_mte,
4706  
4707              gen_helper_sve_ldnf1bds_r_mte,
4708              gen_helper_sve_ldnf1bss_r_mte,
4709              gen_helper_sve_ldnf1bhs_r_mte,
4710              gen_helper_sve_ldnf1dd_le_r_mte },
4711  
4712            /* mte inactive, big-endian */
4713            { gen_helper_sve_ldnf1bb_r_mte,
4714              gen_helper_sve_ldnf1bhu_r_mte,
4715              gen_helper_sve_ldnf1bsu_r_mte,
4716              gen_helper_sve_ldnf1bdu_r_mte,
4717  
4718              gen_helper_sve_ldnf1sds_be_r_mte,
4719              gen_helper_sve_ldnf1hh_be_r_mte,
4720              gen_helper_sve_ldnf1hsu_be_r_mte,
4721              gen_helper_sve_ldnf1hdu_be_r_mte,
4722  
4723              gen_helper_sve_ldnf1hds_be_r_mte,
4724              gen_helper_sve_ldnf1hss_be_r_mte,
4725              gen_helper_sve_ldnf1ss_be_r_mte,
4726              gen_helper_sve_ldnf1sdu_be_r_mte,
4727  
4728              gen_helper_sve_ldnf1bds_r_mte,
4729              gen_helper_sve_ldnf1bss_r_mte,
4730              gen_helper_sve_ldnf1bhs_r_mte,
4731              gen_helper_sve_ldnf1dd_be_r_mte } },
4732      };
4733  
4734      if (!dc_isar_feature(aa64_sve, s)) {
4735          return false;
4736      }
4737      s->is_nonstreaming = true;
4738      if (sve_access_check(s)) {
4739          int vsz = vec_full_reg_size(s);
4740          int elements = vsz >> dtype_esz[a->dtype];
4741          int off = (a->imm * elements) << dtype_msz(a->dtype);
4742          TCGv_i64 addr = tcg_temp_new_i64();
4743  
4744          tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4745          do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4746                     fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
4747      }
4748      return true;
4749  }
4750  
do_ldrq(DisasContext * s,int zt,int pg,TCGv_i64 addr,int dtype)4751  static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
4752  {
4753      unsigned vsz = vec_full_reg_size(s);
4754      TCGv_ptr t_pg;
4755      int poff;
4756      uint32_t desc;
4757  
4758      /* Load the first quadword using the normal predicated load helpers.  */
4759      if (!s->mte_active[0]) {
4760          addr = clean_data_tbi(s, addr);
4761      }
4762  
4763      poff = pred_full_reg_offset(s, pg);
4764      if (vsz > 16) {
4765          /*
4766           * Zero-extend the first 16 bits of the predicate into a temporary.
4767           * This avoids triggering an assert making sure we don't have bits
4768           * set within a predicate beyond VQ, but we have lowered VQ to 1
4769           * for this load operation.
4770           */
4771          TCGv_i64 tmp = tcg_temp_new_i64();
4772  #if HOST_BIG_ENDIAN
4773          poff += 6;
4774  #endif
4775          tcg_gen_ld16u_i64(tmp, tcg_env, poff);
4776  
4777          poff = offsetof(CPUARMState, vfp.preg_tmp);
4778          tcg_gen_st_i64(tmp, tcg_env, poff);
4779      }
4780  
4781      t_pg = tcg_temp_new_ptr();
4782      tcg_gen_addi_ptr(t_pg, tcg_env, poff);
4783  
4784      gen_helper_gvec_mem *fn
4785          = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
4786      desc = make_svemte_desc(s, 16, 1, dtype_msz(dtype), false, zt);
4787      fn(tcg_env, t_pg, addr, tcg_constant_i32(desc));
4788  
4789      /* Replicate that first quadword.  */
4790      if (vsz > 16) {
4791          int doff = vec_full_reg_offset(s, zt);
4792          tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
4793      }
4794  }
4795  
trans_LD1RQ_zprr(DisasContext * s,arg_rprr_load * a)4796  static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
4797  {
4798      if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) {
4799          return false;
4800      }
4801      if (sve_access_check(s)) {
4802          int msz = dtype_msz(a->dtype);
4803          TCGv_i64 addr = tcg_temp_new_i64();
4804          tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4805          tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4806          do_ldrq(s, a->rd, a->pg, addr, a->dtype);
4807      }
4808      return true;
4809  }
4810  
trans_LD1RQ_zpri(DisasContext * s,arg_rpri_load * a)4811  static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
4812  {
4813      if (!dc_isar_feature(aa64_sve, s)) {
4814          return false;
4815      }
4816      if (sve_access_check(s)) {
4817          TCGv_i64 addr = tcg_temp_new_i64();
4818          tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4819          do_ldrq(s, a->rd, a->pg, addr, a->dtype);
4820      }
4821      return true;
4822  }
4823  
do_ldro(DisasContext * s,int zt,int pg,TCGv_i64 addr,int dtype)4824  static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
4825  {
4826      unsigned vsz = vec_full_reg_size(s);
4827      unsigned vsz_r32;
4828      TCGv_ptr t_pg;
4829      int poff, doff;
4830      uint32_t desc;
4831  
4832      if (vsz < 32) {
4833          /*
4834           * Note that this UNDEFINED check comes after CheckSVEEnabled()
4835           * in the ARM pseudocode, which is the sve_access_check() done
4836           * in our caller.  We should not now return false from the caller.
4837           */
4838          unallocated_encoding(s);
4839          return;
4840      }
4841  
4842      /* Load the first octaword using the normal predicated load helpers.  */
4843      if (!s->mte_active[0]) {
4844          addr = clean_data_tbi(s, addr);
4845      }
4846  
4847      poff = pred_full_reg_offset(s, pg);
4848      if (vsz > 32) {
4849          /*
4850           * Zero-extend the first 32 bits of the predicate into a temporary.
4851           * This avoids triggering an assert making sure we don't have bits
4852           * set within a predicate beyond VQ, but we have lowered VQ to 2
4853           * for this load operation.
4854           */
4855          TCGv_i64 tmp = tcg_temp_new_i64();
4856  #if HOST_BIG_ENDIAN
4857          poff += 4;
4858  #endif
4859          tcg_gen_ld32u_i64(tmp, tcg_env, poff);
4860  
4861          poff = offsetof(CPUARMState, vfp.preg_tmp);
4862          tcg_gen_st_i64(tmp, tcg_env, poff);
4863      }
4864  
4865      t_pg = tcg_temp_new_ptr();
4866      tcg_gen_addi_ptr(t_pg, tcg_env, poff);
4867  
4868      gen_helper_gvec_mem *fn
4869          = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
4870      desc = make_svemte_desc(s, 32, 1, dtype_msz(dtype), false, zt);
4871      fn(tcg_env, t_pg, addr, tcg_constant_i32(desc));
4872  
4873      /*
4874       * Replicate that first octaword.
4875       * The replication happens in units of 32; if the full vector size
4876       * is not a multiple of 32, the final bits are zeroed.
4877       */
4878      doff = vec_full_reg_offset(s, zt);
4879      vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
4880      if (vsz >= 64) {
4881          tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
4882      }
4883      vsz -= vsz_r32;
4884      if (vsz) {
4885          tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
4886      }
4887  }
4888  
trans_LD1RO_zprr(DisasContext * s,arg_rprr_load * a)4889  static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
4890  {
4891      if (!dc_isar_feature(aa64_sve_f64mm, s)) {
4892          return false;
4893      }
4894      if (a->rm == 31) {
4895          return false;
4896      }
4897      s->is_nonstreaming = true;
4898      if (sve_access_check(s)) {
4899          TCGv_i64 addr = tcg_temp_new_i64();
4900          tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4901          tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4902          do_ldro(s, a->rd, a->pg, addr, a->dtype);
4903      }
4904      return true;
4905  }
4906  
trans_LD1RO_zpri(DisasContext * s,arg_rpri_load * a)4907  static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
4908  {
4909      if (!dc_isar_feature(aa64_sve_f64mm, s)) {
4910          return false;
4911      }
4912      s->is_nonstreaming = true;
4913      if (sve_access_check(s)) {
4914          TCGv_i64 addr = tcg_temp_new_i64();
4915          tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
4916          do_ldro(s, a->rd, a->pg, addr, a->dtype);
4917      }
4918      return true;
4919  }
4920  
4921  /* Load and broadcast element.  */
trans_LD1R_zpri(DisasContext * s,arg_rpri_load * a)4922  static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
4923  {
4924      unsigned vsz = vec_full_reg_size(s);
4925      unsigned psz = pred_full_reg_size(s);
4926      unsigned esz = dtype_esz[a->dtype];
4927      unsigned msz = dtype_msz(a->dtype);
4928      TCGLabel *over;
4929      TCGv_i64 temp, clean_addr;
4930      MemOp memop;
4931  
4932      if (!dc_isar_feature(aa64_sve, s)) {
4933          return false;
4934      }
4935      if (!sve_access_check(s)) {
4936          return true;
4937      }
4938  
4939      over = gen_new_label();
4940  
4941      /* If the guarding predicate has no bits set, no load occurs.  */
4942      if (psz <= 8) {
4943          /* Reduce the pred_esz_masks value simply to reduce the
4944           * size of the code generated here.
4945           */
4946          uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4947          temp = tcg_temp_new_i64();
4948          tcg_gen_ld_i64(temp, tcg_env, pred_full_reg_offset(s, a->pg));
4949          tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4950          tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4951      } else {
4952          TCGv_i32 t32 = tcg_temp_new_i32();
4953          find_last_active(s, t32, esz, a->pg);
4954          tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4955      }
4956  
4957      /* Load the data.  */
4958      temp = tcg_temp_new_i64();
4959      tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4960  
4961      memop = finalize_memop(s, dtype_mop[a->dtype]);
4962      clean_addr = gen_mte_check1(s, temp, false, true, memop);
4963      tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), memop);
4964  
4965      /* Broadcast to *all* elements.  */
4966      tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4967                           vsz, vsz, temp);
4968  
4969      /* Zero the inactive elements.  */
4970      gen_set_label(over);
4971      return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
4972  }
4973  
do_st_zpa(DisasContext * s,int zt,int pg,TCGv_i64 addr,int msz,int esz,int nreg)4974  static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4975                        int msz, int esz, int nreg)
4976  {
4977      static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
4978          { { { gen_helper_sve_st1bb_r,
4979                gen_helper_sve_st1bh_r,
4980                gen_helper_sve_st1bs_r,
4981                gen_helper_sve_st1bd_r },
4982              { NULL,
4983                gen_helper_sve_st1hh_le_r,
4984                gen_helper_sve_st1hs_le_r,
4985                gen_helper_sve_st1hd_le_r },
4986              { NULL, NULL,
4987                gen_helper_sve_st1ss_le_r,
4988                gen_helper_sve_st1sd_le_r },
4989              { NULL, NULL, NULL,
4990                gen_helper_sve_st1dd_le_r } },
4991            { { gen_helper_sve_st1bb_r,
4992                gen_helper_sve_st1bh_r,
4993                gen_helper_sve_st1bs_r,
4994                gen_helper_sve_st1bd_r },
4995              { NULL,
4996                gen_helper_sve_st1hh_be_r,
4997                gen_helper_sve_st1hs_be_r,
4998                gen_helper_sve_st1hd_be_r },
4999              { NULL, NULL,
5000                gen_helper_sve_st1ss_be_r,
5001                gen_helper_sve_st1sd_be_r },
5002              { NULL, NULL, NULL,
5003                gen_helper_sve_st1dd_be_r } } },
5004  
5005          { { { gen_helper_sve_st1bb_r_mte,
5006                gen_helper_sve_st1bh_r_mte,
5007                gen_helper_sve_st1bs_r_mte,
5008                gen_helper_sve_st1bd_r_mte },
5009              { NULL,
5010                gen_helper_sve_st1hh_le_r_mte,
5011                gen_helper_sve_st1hs_le_r_mte,
5012                gen_helper_sve_st1hd_le_r_mte },
5013              { NULL, NULL,
5014                gen_helper_sve_st1ss_le_r_mte,
5015                gen_helper_sve_st1sd_le_r_mte },
5016              { NULL, NULL, NULL,
5017                gen_helper_sve_st1dd_le_r_mte } },
5018            { { gen_helper_sve_st1bb_r_mte,
5019                gen_helper_sve_st1bh_r_mte,
5020                gen_helper_sve_st1bs_r_mte,
5021                gen_helper_sve_st1bd_r_mte },
5022              { NULL,
5023                gen_helper_sve_st1hh_be_r_mte,
5024                gen_helper_sve_st1hs_be_r_mte,
5025                gen_helper_sve_st1hd_be_r_mte },
5026              { NULL, NULL,
5027                gen_helper_sve_st1ss_be_r_mte,
5028                gen_helper_sve_st1sd_be_r_mte },
5029              { NULL, NULL, NULL,
5030                gen_helper_sve_st1dd_be_r_mte } } },
5031      };
5032      static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5033          { { { gen_helper_sve_st2bb_r,
5034                gen_helper_sve_st2hh_le_r,
5035                gen_helper_sve_st2ss_le_r,
5036                gen_helper_sve_st2dd_le_r },
5037              { gen_helper_sve_st3bb_r,
5038                gen_helper_sve_st3hh_le_r,
5039                gen_helper_sve_st3ss_le_r,
5040                gen_helper_sve_st3dd_le_r },
5041              { gen_helper_sve_st4bb_r,
5042                gen_helper_sve_st4hh_le_r,
5043                gen_helper_sve_st4ss_le_r,
5044                gen_helper_sve_st4dd_le_r } },
5045            { { gen_helper_sve_st2bb_r,
5046                gen_helper_sve_st2hh_be_r,
5047                gen_helper_sve_st2ss_be_r,
5048                gen_helper_sve_st2dd_be_r },
5049              { gen_helper_sve_st3bb_r,
5050                gen_helper_sve_st3hh_be_r,
5051                gen_helper_sve_st3ss_be_r,
5052                gen_helper_sve_st3dd_be_r },
5053              { gen_helper_sve_st4bb_r,
5054                gen_helper_sve_st4hh_be_r,
5055                gen_helper_sve_st4ss_be_r,
5056                gen_helper_sve_st4dd_be_r } } },
5057          { { { gen_helper_sve_st2bb_r_mte,
5058                gen_helper_sve_st2hh_le_r_mte,
5059                gen_helper_sve_st2ss_le_r_mte,
5060                gen_helper_sve_st2dd_le_r_mte },
5061              { gen_helper_sve_st3bb_r_mte,
5062                gen_helper_sve_st3hh_le_r_mte,
5063                gen_helper_sve_st3ss_le_r_mte,
5064                gen_helper_sve_st3dd_le_r_mte },
5065              { gen_helper_sve_st4bb_r_mte,
5066                gen_helper_sve_st4hh_le_r_mte,
5067                gen_helper_sve_st4ss_le_r_mte,
5068                gen_helper_sve_st4dd_le_r_mte } },
5069            { { gen_helper_sve_st2bb_r_mte,
5070                gen_helper_sve_st2hh_be_r_mte,
5071                gen_helper_sve_st2ss_be_r_mte,
5072                gen_helper_sve_st2dd_be_r_mte },
5073              { gen_helper_sve_st3bb_r_mte,
5074                gen_helper_sve_st3hh_be_r_mte,
5075                gen_helper_sve_st3ss_be_r_mte,
5076                gen_helper_sve_st3dd_be_r_mte },
5077              { gen_helper_sve_st4bb_r_mte,
5078                gen_helper_sve_st4hh_be_r_mte,
5079                gen_helper_sve_st4ss_be_r_mte,
5080                gen_helper_sve_st4dd_be_r_mte } } },
5081      };
5082      gen_helper_gvec_mem *fn;
5083      int be = s->be_data == MO_BE;
5084  
5085      if (nreg == 0) {
5086          /* ST1 */
5087          fn = fn_single[s->mte_active[0]][be][msz][esz];
5088      } else {
5089          /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5090          assert(msz == esz);
5091          fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
5092      }
5093      assert(fn != NULL);
5094      do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg + 1, true, fn);
5095  }
5096  
trans_ST_zprr(DisasContext * s,arg_rprr_store * a)5097  static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
5098  {
5099      if (!dc_isar_feature(aa64_sve, s)) {
5100          return false;
5101      }
5102      if (a->rm == 31 || a->msz > a->esz) {
5103          return false;
5104      }
5105      if (sve_access_check(s)) {
5106          TCGv_i64 addr = tcg_temp_new_i64();
5107          tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
5108          tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5109          do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5110      }
5111      return true;
5112  }
5113  
trans_ST_zpri(DisasContext * s,arg_rpri_store * a)5114  static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
5115  {
5116      if (!dc_isar_feature(aa64_sve, s)) {
5117          return false;
5118      }
5119      if (a->msz > a->esz) {
5120          return false;
5121      }
5122      if (sve_access_check(s)) {
5123          int vsz = vec_full_reg_size(s);
5124          int elements = vsz >> a->esz;
5125          TCGv_i64 addr = tcg_temp_new_i64();
5126  
5127          tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5128                           (a->imm * elements * (a->nreg + 1)) << a->msz);
5129          do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5130      }
5131      return true;
5132  }
5133  
5134  /*
5135   *** SVE gather loads / scatter stores
5136   */
5137  
do_mem_zpz(DisasContext * s,int zt,int pg,int zm,int scale,TCGv_i64 scalar,int msz,bool is_write,gen_helper_gvec_mem_scatter * fn)5138  static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
5139                         int scale, TCGv_i64 scalar, int msz, bool is_write,
5140                         gen_helper_gvec_mem_scatter *fn)
5141  {
5142      TCGv_ptr t_zm = tcg_temp_new_ptr();
5143      TCGv_ptr t_pg = tcg_temp_new_ptr();
5144      TCGv_ptr t_zt = tcg_temp_new_ptr();
5145      uint32_t desc;
5146  
5147      tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
5148      tcg_gen_addi_ptr(t_zm, tcg_env, vec_full_reg_offset(s, zm));
5149      tcg_gen_addi_ptr(t_zt, tcg_env, vec_full_reg_offset(s, zt));
5150  
5151      desc = make_svemte_desc(s, vec_full_reg_size(s), 1, msz, is_write, scale);
5152      fn(tcg_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc));
5153  }
5154  
5155  /* Indexed by [mte][be][ff][xs][u][msz].  */
5156  static gen_helper_gvec_mem_scatter * const
5157  gather_load_fn32[2][2][2][2][2][3] = {
5158      { /* MTE Inactive */
5159          { /* Little-endian */
5160              { { { gen_helper_sve_ldbss_zsu,
5161                    gen_helper_sve_ldhss_le_zsu,
5162                    NULL, },
5163                  { gen_helper_sve_ldbsu_zsu,
5164                    gen_helper_sve_ldhsu_le_zsu,
5165                    gen_helper_sve_ldss_le_zsu, } },
5166                { { gen_helper_sve_ldbss_zss,
5167                    gen_helper_sve_ldhss_le_zss,
5168                    NULL, },
5169                  { gen_helper_sve_ldbsu_zss,
5170                    gen_helper_sve_ldhsu_le_zss,
5171                    gen_helper_sve_ldss_le_zss, } } },
5172  
5173              /* First-fault */
5174              { { { gen_helper_sve_ldffbss_zsu,
5175                    gen_helper_sve_ldffhss_le_zsu,
5176                    NULL, },
5177                  { gen_helper_sve_ldffbsu_zsu,
5178                    gen_helper_sve_ldffhsu_le_zsu,
5179                    gen_helper_sve_ldffss_le_zsu, } },
5180                { { gen_helper_sve_ldffbss_zss,
5181                    gen_helper_sve_ldffhss_le_zss,
5182                    NULL, },
5183                  { gen_helper_sve_ldffbsu_zss,
5184                    gen_helper_sve_ldffhsu_le_zss,
5185                    gen_helper_sve_ldffss_le_zss, } } } },
5186  
5187          { /* Big-endian */
5188              { { { gen_helper_sve_ldbss_zsu,
5189                    gen_helper_sve_ldhss_be_zsu,
5190                    NULL, },
5191                  { gen_helper_sve_ldbsu_zsu,
5192                    gen_helper_sve_ldhsu_be_zsu,
5193                    gen_helper_sve_ldss_be_zsu, } },
5194                { { gen_helper_sve_ldbss_zss,
5195                    gen_helper_sve_ldhss_be_zss,
5196                    NULL, },
5197                  { gen_helper_sve_ldbsu_zss,
5198                    gen_helper_sve_ldhsu_be_zss,
5199                    gen_helper_sve_ldss_be_zss, } } },
5200  
5201              /* First-fault */
5202              { { { gen_helper_sve_ldffbss_zsu,
5203                    gen_helper_sve_ldffhss_be_zsu,
5204                    NULL, },
5205                  { gen_helper_sve_ldffbsu_zsu,
5206                    gen_helper_sve_ldffhsu_be_zsu,
5207                    gen_helper_sve_ldffss_be_zsu, } },
5208                { { gen_helper_sve_ldffbss_zss,
5209                    gen_helper_sve_ldffhss_be_zss,
5210                    NULL, },
5211                  { gen_helper_sve_ldffbsu_zss,
5212                    gen_helper_sve_ldffhsu_be_zss,
5213                    gen_helper_sve_ldffss_be_zss, } } } } },
5214      { /* MTE Active */
5215          { /* Little-endian */
5216              { { { gen_helper_sve_ldbss_zsu_mte,
5217                    gen_helper_sve_ldhss_le_zsu_mte,
5218                    NULL, },
5219                  { gen_helper_sve_ldbsu_zsu_mte,
5220                    gen_helper_sve_ldhsu_le_zsu_mte,
5221                    gen_helper_sve_ldss_le_zsu_mte, } },
5222                { { gen_helper_sve_ldbss_zss_mte,
5223                    gen_helper_sve_ldhss_le_zss_mte,
5224                    NULL, },
5225                  { gen_helper_sve_ldbsu_zss_mte,
5226                    gen_helper_sve_ldhsu_le_zss_mte,
5227                    gen_helper_sve_ldss_le_zss_mte, } } },
5228  
5229              /* First-fault */
5230              { { { gen_helper_sve_ldffbss_zsu_mte,
5231                    gen_helper_sve_ldffhss_le_zsu_mte,
5232                    NULL, },
5233                  { gen_helper_sve_ldffbsu_zsu_mte,
5234                    gen_helper_sve_ldffhsu_le_zsu_mte,
5235                    gen_helper_sve_ldffss_le_zsu_mte, } },
5236                { { gen_helper_sve_ldffbss_zss_mte,
5237                    gen_helper_sve_ldffhss_le_zss_mte,
5238                    NULL, },
5239                  { gen_helper_sve_ldffbsu_zss_mte,
5240                    gen_helper_sve_ldffhsu_le_zss_mte,
5241                    gen_helper_sve_ldffss_le_zss_mte, } } } },
5242  
5243          { /* Big-endian */
5244              { { { gen_helper_sve_ldbss_zsu_mte,
5245                    gen_helper_sve_ldhss_be_zsu_mte,
5246                    NULL, },
5247                  { gen_helper_sve_ldbsu_zsu_mte,
5248                    gen_helper_sve_ldhsu_be_zsu_mte,
5249                    gen_helper_sve_ldss_be_zsu_mte, } },
5250                { { gen_helper_sve_ldbss_zss_mte,
5251                    gen_helper_sve_ldhss_be_zss_mte,
5252                    NULL, },
5253                  { gen_helper_sve_ldbsu_zss_mte,
5254                    gen_helper_sve_ldhsu_be_zss_mte,
5255                    gen_helper_sve_ldss_be_zss_mte, } } },
5256  
5257              /* First-fault */
5258              { { { gen_helper_sve_ldffbss_zsu_mte,
5259                    gen_helper_sve_ldffhss_be_zsu_mte,
5260                    NULL, },
5261                  { gen_helper_sve_ldffbsu_zsu_mte,
5262                    gen_helper_sve_ldffhsu_be_zsu_mte,
5263                    gen_helper_sve_ldffss_be_zsu_mte, } },
5264                { { gen_helper_sve_ldffbss_zss_mte,
5265                    gen_helper_sve_ldffhss_be_zss_mte,
5266                    NULL, },
5267                  { gen_helper_sve_ldffbsu_zss_mte,
5268                    gen_helper_sve_ldffhsu_be_zss_mte,
5269                    gen_helper_sve_ldffss_be_zss_mte, } } } } },
5270  };
5271  
5272  /* Note that we overload xs=2 to indicate 64-bit offset.  */
5273  static gen_helper_gvec_mem_scatter * const
5274  gather_load_fn64[2][2][2][3][2][4] = {
5275      { /* MTE Inactive */
5276          { /* Little-endian */
5277              { { { gen_helper_sve_ldbds_zsu,
5278                    gen_helper_sve_ldhds_le_zsu,
5279                    gen_helper_sve_ldsds_le_zsu,
5280                    NULL, },
5281                  { gen_helper_sve_ldbdu_zsu,
5282                    gen_helper_sve_ldhdu_le_zsu,
5283                    gen_helper_sve_ldsdu_le_zsu,
5284                    gen_helper_sve_lddd_le_zsu, } },
5285                { { gen_helper_sve_ldbds_zss,
5286                    gen_helper_sve_ldhds_le_zss,
5287                    gen_helper_sve_ldsds_le_zss,
5288                    NULL, },
5289                  { gen_helper_sve_ldbdu_zss,
5290                    gen_helper_sve_ldhdu_le_zss,
5291                    gen_helper_sve_ldsdu_le_zss,
5292                    gen_helper_sve_lddd_le_zss, } },
5293                { { gen_helper_sve_ldbds_zd,
5294                    gen_helper_sve_ldhds_le_zd,
5295                    gen_helper_sve_ldsds_le_zd,
5296                    NULL, },
5297                  { gen_helper_sve_ldbdu_zd,
5298                    gen_helper_sve_ldhdu_le_zd,
5299                    gen_helper_sve_ldsdu_le_zd,
5300                    gen_helper_sve_lddd_le_zd, } } },
5301  
5302              /* First-fault */
5303              { { { gen_helper_sve_ldffbds_zsu,
5304                    gen_helper_sve_ldffhds_le_zsu,
5305                    gen_helper_sve_ldffsds_le_zsu,
5306                    NULL, },
5307                  { gen_helper_sve_ldffbdu_zsu,
5308                    gen_helper_sve_ldffhdu_le_zsu,
5309                    gen_helper_sve_ldffsdu_le_zsu,
5310                    gen_helper_sve_ldffdd_le_zsu, } },
5311                { { gen_helper_sve_ldffbds_zss,
5312                    gen_helper_sve_ldffhds_le_zss,
5313                    gen_helper_sve_ldffsds_le_zss,
5314                    NULL, },
5315                  { gen_helper_sve_ldffbdu_zss,
5316                    gen_helper_sve_ldffhdu_le_zss,
5317                    gen_helper_sve_ldffsdu_le_zss,
5318                    gen_helper_sve_ldffdd_le_zss, } },
5319                { { gen_helper_sve_ldffbds_zd,
5320                    gen_helper_sve_ldffhds_le_zd,
5321                    gen_helper_sve_ldffsds_le_zd,
5322                    NULL, },
5323                  { gen_helper_sve_ldffbdu_zd,
5324                    gen_helper_sve_ldffhdu_le_zd,
5325                    gen_helper_sve_ldffsdu_le_zd,
5326                    gen_helper_sve_ldffdd_le_zd, } } } },
5327          { /* Big-endian */
5328              { { { gen_helper_sve_ldbds_zsu,
5329                    gen_helper_sve_ldhds_be_zsu,
5330                    gen_helper_sve_ldsds_be_zsu,
5331                    NULL, },
5332                  { gen_helper_sve_ldbdu_zsu,
5333                    gen_helper_sve_ldhdu_be_zsu,
5334                    gen_helper_sve_ldsdu_be_zsu,
5335                    gen_helper_sve_lddd_be_zsu, } },
5336                { { gen_helper_sve_ldbds_zss,
5337                    gen_helper_sve_ldhds_be_zss,
5338                    gen_helper_sve_ldsds_be_zss,
5339                    NULL, },
5340                  { gen_helper_sve_ldbdu_zss,
5341                    gen_helper_sve_ldhdu_be_zss,
5342                    gen_helper_sve_ldsdu_be_zss,
5343                    gen_helper_sve_lddd_be_zss, } },
5344                { { gen_helper_sve_ldbds_zd,
5345                    gen_helper_sve_ldhds_be_zd,
5346                    gen_helper_sve_ldsds_be_zd,
5347                    NULL, },
5348                  { gen_helper_sve_ldbdu_zd,
5349                    gen_helper_sve_ldhdu_be_zd,
5350                    gen_helper_sve_ldsdu_be_zd,
5351                    gen_helper_sve_lddd_be_zd, } } },
5352  
5353              /* First-fault */
5354              { { { gen_helper_sve_ldffbds_zsu,
5355                    gen_helper_sve_ldffhds_be_zsu,
5356                    gen_helper_sve_ldffsds_be_zsu,
5357                    NULL, },
5358                  { gen_helper_sve_ldffbdu_zsu,
5359                    gen_helper_sve_ldffhdu_be_zsu,
5360                    gen_helper_sve_ldffsdu_be_zsu,
5361                    gen_helper_sve_ldffdd_be_zsu, } },
5362                { { gen_helper_sve_ldffbds_zss,
5363                    gen_helper_sve_ldffhds_be_zss,
5364                    gen_helper_sve_ldffsds_be_zss,
5365                    NULL, },
5366                  { gen_helper_sve_ldffbdu_zss,
5367                    gen_helper_sve_ldffhdu_be_zss,
5368                    gen_helper_sve_ldffsdu_be_zss,
5369                    gen_helper_sve_ldffdd_be_zss, } },
5370                { { gen_helper_sve_ldffbds_zd,
5371                    gen_helper_sve_ldffhds_be_zd,
5372                    gen_helper_sve_ldffsds_be_zd,
5373                    NULL, },
5374                  { gen_helper_sve_ldffbdu_zd,
5375                    gen_helper_sve_ldffhdu_be_zd,
5376                    gen_helper_sve_ldffsdu_be_zd,
5377                    gen_helper_sve_ldffdd_be_zd, } } } } },
5378      { /* MTE Active */
5379          { /* Little-endian */
5380              { { { gen_helper_sve_ldbds_zsu_mte,
5381                    gen_helper_sve_ldhds_le_zsu_mte,
5382                    gen_helper_sve_ldsds_le_zsu_mte,
5383                    NULL, },
5384                  { gen_helper_sve_ldbdu_zsu_mte,
5385                    gen_helper_sve_ldhdu_le_zsu_mte,
5386                    gen_helper_sve_ldsdu_le_zsu_mte,
5387                    gen_helper_sve_lddd_le_zsu_mte, } },
5388                { { gen_helper_sve_ldbds_zss_mte,
5389                    gen_helper_sve_ldhds_le_zss_mte,
5390                    gen_helper_sve_ldsds_le_zss_mte,
5391                    NULL, },
5392                  { gen_helper_sve_ldbdu_zss_mte,
5393                    gen_helper_sve_ldhdu_le_zss_mte,
5394                    gen_helper_sve_ldsdu_le_zss_mte,
5395                    gen_helper_sve_lddd_le_zss_mte, } },
5396                { { gen_helper_sve_ldbds_zd_mte,
5397                    gen_helper_sve_ldhds_le_zd_mte,
5398                    gen_helper_sve_ldsds_le_zd_mte,
5399                    NULL, },
5400                  { gen_helper_sve_ldbdu_zd_mte,
5401                    gen_helper_sve_ldhdu_le_zd_mte,
5402                    gen_helper_sve_ldsdu_le_zd_mte,
5403                    gen_helper_sve_lddd_le_zd_mte, } } },
5404  
5405              /* First-fault */
5406              { { { gen_helper_sve_ldffbds_zsu_mte,
5407                    gen_helper_sve_ldffhds_le_zsu_mte,
5408                    gen_helper_sve_ldffsds_le_zsu_mte,
5409                    NULL, },
5410                  { gen_helper_sve_ldffbdu_zsu_mte,
5411                    gen_helper_sve_ldffhdu_le_zsu_mte,
5412                    gen_helper_sve_ldffsdu_le_zsu_mte,
5413                    gen_helper_sve_ldffdd_le_zsu_mte, } },
5414                { { gen_helper_sve_ldffbds_zss_mte,
5415                    gen_helper_sve_ldffhds_le_zss_mte,
5416                    gen_helper_sve_ldffsds_le_zss_mte,
5417                    NULL, },
5418                  { gen_helper_sve_ldffbdu_zss_mte,
5419                    gen_helper_sve_ldffhdu_le_zss_mte,
5420                    gen_helper_sve_ldffsdu_le_zss_mte,
5421                    gen_helper_sve_ldffdd_le_zss_mte, } },
5422                { { gen_helper_sve_ldffbds_zd_mte,
5423                    gen_helper_sve_ldffhds_le_zd_mte,
5424                    gen_helper_sve_ldffsds_le_zd_mte,
5425                    NULL, },
5426                  { gen_helper_sve_ldffbdu_zd_mte,
5427                    gen_helper_sve_ldffhdu_le_zd_mte,
5428                    gen_helper_sve_ldffsdu_le_zd_mte,
5429                    gen_helper_sve_ldffdd_le_zd_mte, } } } },
5430          { /* Big-endian */
5431              { { { gen_helper_sve_ldbds_zsu_mte,
5432                    gen_helper_sve_ldhds_be_zsu_mte,
5433                    gen_helper_sve_ldsds_be_zsu_mte,
5434                    NULL, },
5435                  { gen_helper_sve_ldbdu_zsu_mte,
5436                    gen_helper_sve_ldhdu_be_zsu_mte,
5437                    gen_helper_sve_ldsdu_be_zsu_mte,
5438                    gen_helper_sve_lddd_be_zsu_mte, } },
5439                { { gen_helper_sve_ldbds_zss_mte,
5440                    gen_helper_sve_ldhds_be_zss_mte,
5441                    gen_helper_sve_ldsds_be_zss_mte,
5442                    NULL, },
5443                  { gen_helper_sve_ldbdu_zss_mte,
5444                    gen_helper_sve_ldhdu_be_zss_mte,
5445                    gen_helper_sve_ldsdu_be_zss_mte,
5446                    gen_helper_sve_lddd_be_zss_mte, } },
5447                { { gen_helper_sve_ldbds_zd_mte,
5448                    gen_helper_sve_ldhds_be_zd_mte,
5449                    gen_helper_sve_ldsds_be_zd_mte,
5450                    NULL, },
5451                  { gen_helper_sve_ldbdu_zd_mte,
5452                    gen_helper_sve_ldhdu_be_zd_mte,
5453                    gen_helper_sve_ldsdu_be_zd_mte,
5454                    gen_helper_sve_lddd_be_zd_mte, } } },
5455  
5456              /* First-fault */
5457              { { { gen_helper_sve_ldffbds_zsu_mte,
5458                    gen_helper_sve_ldffhds_be_zsu_mte,
5459                    gen_helper_sve_ldffsds_be_zsu_mte,
5460                    NULL, },
5461                  { gen_helper_sve_ldffbdu_zsu_mte,
5462                    gen_helper_sve_ldffhdu_be_zsu_mte,
5463                    gen_helper_sve_ldffsdu_be_zsu_mte,
5464                    gen_helper_sve_ldffdd_be_zsu_mte, } },
5465                { { gen_helper_sve_ldffbds_zss_mte,
5466                    gen_helper_sve_ldffhds_be_zss_mte,
5467                    gen_helper_sve_ldffsds_be_zss_mte,
5468                    NULL, },
5469                  { gen_helper_sve_ldffbdu_zss_mte,
5470                    gen_helper_sve_ldffhdu_be_zss_mte,
5471                    gen_helper_sve_ldffsdu_be_zss_mte,
5472                    gen_helper_sve_ldffdd_be_zss_mte, } },
5473                { { gen_helper_sve_ldffbds_zd_mte,
5474                    gen_helper_sve_ldffhds_be_zd_mte,
5475                    gen_helper_sve_ldffsds_be_zd_mte,
5476                    NULL, },
5477                  { gen_helper_sve_ldffbdu_zd_mte,
5478                    gen_helper_sve_ldffhdu_be_zd_mte,
5479                    gen_helper_sve_ldffsdu_be_zd_mte,
5480                    gen_helper_sve_ldffdd_be_zd_mte, } } } } },
5481  };
5482  
trans_LD1_zprz(DisasContext * s,arg_LD1_zprz * a)5483  static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
5484  {
5485      gen_helper_gvec_mem_scatter *fn = NULL;
5486      bool be = s->be_data == MO_BE;
5487      bool mte = s->mte_active[0];
5488  
5489      if (!dc_isar_feature(aa64_sve, s)) {
5490          return false;
5491      }
5492      s->is_nonstreaming = true;
5493      if (!sve_access_check(s)) {
5494          return true;
5495      }
5496  
5497      switch (a->esz) {
5498      case MO_32:
5499          fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
5500          break;
5501      case MO_64:
5502          fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
5503          break;
5504      }
5505      assert(fn != NULL);
5506  
5507      do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5508                 cpu_reg_sp(s, a->rn), a->msz, false, fn);
5509      return true;
5510  }
5511  
trans_LD1_zpiz(DisasContext * s,arg_LD1_zpiz * a)5512  static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
5513  {
5514      gen_helper_gvec_mem_scatter *fn = NULL;
5515      bool be = s->be_data == MO_BE;
5516      bool mte = s->mte_active[0];
5517  
5518      if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5519          return false;
5520      }
5521      if (!dc_isar_feature(aa64_sve, s)) {
5522          return false;
5523      }
5524      s->is_nonstreaming = true;
5525      if (!sve_access_check(s)) {
5526          return true;
5527      }
5528  
5529      switch (a->esz) {
5530      case MO_32:
5531          fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
5532          break;
5533      case MO_64:
5534          fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
5535          break;
5536      }
5537      assert(fn != NULL);
5538  
5539      /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5540       * by loading the immediate into the scalar parameter.
5541       */
5542      do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5543                 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn);
5544      return true;
5545  }
5546  
trans_LDNT1_zprz(DisasContext * s,arg_LD1_zprz * a)5547  static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
5548  {
5549      gen_helper_gvec_mem_scatter *fn = NULL;
5550      bool be = s->be_data == MO_BE;
5551      bool mte = s->mte_active[0];
5552  
5553      if (a->esz < a->msz + !a->u) {
5554          return false;
5555      }
5556      if (!dc_isar_feature(aa64_sve2, s)) {
5557          return false;
5558      }
5559      s->is_nonstreaming = true;
5560      if (!sve_access_check(s)) {
5561          return true;
5562      }
5563  
5564      switch (a->esz) {
5565      case MO_32:
5566          fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
5567          break;
5568      case MO_64:
5569          fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
5570          break;
5571      }
5572      assert(fn != NULL);
5573  
5574      do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5575                 cpu_reg(s, a->rm), a->msz, false, fn);
5576      return true;
5577  }
5578  
5579  /* Indexed by [mte][be][xs][msz].  */
5580  static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
5581      { /* MTE Inactive */
5582          { /* Little-endian */
5583              { gen_helper_sve_stbs_zsu,
5584                gen_helper_sve_sths_le_zsu,
5585                gen_helper_sve_stss_le_zsu, },
5586              { gen_helper_sve_stbs_zss,
5587                gen_helper_sve_sths_le_zss,
5588                gen_helper_sve_stss_le_zss, } },
5589          { /* Big-endian */
5590              { gen_helper_sve_stbs_zsu,
5591                gen_helper_sve_sths_be_zsu,
5592                gen_helper_sve_stss_be_zsu, },
5593              { gen_helper_sve_stbs_zss,
5594                gen_helper_sve_sths_be_zss,
5595                gen_helper_sve_stss_be_zss, } } },
5596      { /* MTE Active */
5597          { /* Little-endian */
5598              { gen_helper_sve_stbs_zsu_mte,
5599                gen_helper_sve_sths_le_zsu_mte,
5600                gen_helper_sve_stss_le_zsu_mte, },
5601              { gen_helper_sve_stbs_zss_mte,
5602                gen_helper_sve_sths_le_zss_mte,
5603                gen_helper_sve_stss_le_zss_mte, } },
5604          { /* Big-endian */
5605              { gen_helper_sve_stbs_zsu_mte,
5606                gen_helper_sve_sths_be_zsu_mte,
5607                gen_helper_sve_stss_be_zsu_mte, },
5608              { gen_helper_sve_stbs_zss_mte,
5609                gen_helper_sve_sths_be_zss_mte,
5610                gen_helper_sve_stss_be_zss_mte, } } },
5611  };
5612  
5613  /* Note that we overload xs=2 to indicate 64-bit offset.  */
5614  static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
5615      { /* MTE Inactive */
5616           { /* Little-endian */
5617               { gen_helper_sve_stbd_zsu,
5618                 gen_helper_sve_sthd_le_zsu,
5619                 gen_helper_sve_stsd_le_zsu,
5620                 gen_helper_sve_stdd_le_zsu, },
5621               { gen_helper_sve_stbd_zss,
5622                 gen_helper_sve_sthd_le_zss,
5623                 gen_helper_sve_stsd_le_zss,
5624                 gen_helper_sve_stdd_le_zss, },
5625               { gen_helper_sve_stbd_zd,
5626                 gen_helper_sve_sthd_le_zd,
5627                 gen_helper_sve_stsd_le_zd,
5628                 gen_helper_sve_stdd_le_zd, } },
5629           { /* Big-endian */
5630               { gen_helper_sve_stbd_zsu,
5631                 gen_helper_sve_sthd_be_zsu,
5632                 gen_helper_sve_stsd_be_zsu,
5633                 gen_helper_sve_stdd_be_zsu, },
5634               { gen_helper_sve_stbd_zss,
5635                 gen_helper_sve_sthd_be_zss,
5636                 gen_helper_sve_stsd_be_zss,
5637                 gen_helper_sve_stdd_be_zss, },
5638               { gen_helper_sve_stbd_zd,
5639                 gen_helper_sve_sthd_be_zd,
5640                 gen_helper_sve_stsd_be_zd,
5641                 gen_helper_sve_stdd_be_zd, } } },
5642      { /* MTE Inactive */
5643           { /* Little-endian */
5644               { gen_helper_sve_stbd_zsu_mte,
5645                 gen_helper_sve_sthd_le_zsu_mte,
5646                 gen_helper_sve_stsd_le_zsu_mte,
5647                 gen_helper_sve_stdd_le_zsu_mte, },
5648               { gen_helper_sve_stbd_zss_mte,
5649                 gen_helper_sve_sthd_le_zss_mte,
5650                 gen_helper_sve_stsd_le_zss_mte,
5651                 gen_helper_sve_stdd_le_zss_mte, },
5652               { gen_helper_sve_stbd_zd_mte,
5653                 gen_helper_sve_sthd_le_zd_mte,
5654                 gen_helper_sve_stsd_le_zd_mte,
5655                 gen_helper_sve_stdd_le_zd_mte, } },
5656           { /* Big-endian */
5657               { gen_helper_sve_stbd_zsu_mte,
5658                 gen_helper_sve_sthd_be_zsu_mte,
5659                 gen_helper_sve_stsd_be_zsu_mte,
5660                 gen_helper_sve_stdd_be_zsu_mte, },
5661               { gen_helper_sve_stbd_zss_mte,
5662                 gen_helper_sve_sthd_be_zss_mte,
5663                 gen_helper_sve_stsd_be_zss_mte,
5664                 gen_helper_sve_stdd_be_zss_mte, },
5665               { gen_helper_sve_stbd_zd_mte,
5666                 gen_helper_sve_sthd_be_zd_mte,
5667                 gen_helper_sve_stsd_be_zd_mte,
5668                 gen_helper_sve_stdd_be_zd_mte, } } },
5669  };
5670  
trans_ST1_zprz(DisasContext * s,arg_ST1_zprz * a)5671  static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
5672  {
5673      gen_helper_gvec_mem_scatter *fn;
5674      bool be = s->be_data == MO_BE;
5675      bool mte = s->mte_active[0];
5676  
5677      if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5678          return false;
5679      }
5680      if (!dc_isar_feature(aa64_sve, s)) {
5681          return false;
5682      }
5683      s->is_nonstreaming = true;
5684      if (!sve_access_check(s)) {
5685          return true;
5686      }
5687      switch (a->esz) {
5688      case MO_32:
5689          fn = scatter_store_fn32[mte][be][a->xs][a->msz];
5690          break;
5691      case MO_64:
5692          fn = scatter_store_fn64[mte][be][a->xs][a->msz];
5693          break;
5694      default:
5695          g_assert_not_reached();
5696      }
5697      do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5698                 cpu_reg_sp(s, a->rn), a->msz, true, fn);
5699      return true;
5700  }
5701  
trans_ST1_zpiz(DisasContext * s,arg_ST1_zpiz * a)5702  static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
5703  {
5704      gen_helper_gvec_mem_scatter *fn = NULL;
5705      bool be = s->be_data == MO_BE;
5706      bool mte = s->mte_active[0];
5707  
5708      if (a->esz < a->msz) {
5709          return false;
5710      }
5711      if (!dc_isar_feature(aa64_sve, s)) {
5712          return false;
5713      }
5714      s->is_nonstreaming = true;
5715      if (!sve_access_check(s)) {
5716          return true;
5717      }
5718  
5719      switch (a->esz) {
5720      case MO_32:
5721          fn = scatter_store_fn32[mte][be][0][a->msz];
5722          break;
5723      case MO_64:
5724          fn = scatter_store_fn64[mte][be][2][a->msz];
5725          break;
5726      }
5727      assert(fn != NULL);
5728  
5729      /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5730       * by loading the immediate into the scalar parameter.
5731       */
5732      do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5733                 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn);
5734      return true;
5735  }
5736  
trans_STNT1_zprz(DisasContext * s,arg_ST1_zprz * a)5737  static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
5738  {
5739      gen_helper_gvec_mem_scatter *fn;
5740      bool be = s->be_data == MO_BE;
5741      bool mte = s->mte_active[0];
5742  
5743      if (a->esz < a->msz) {
5744          return false;
5745      }
5746      if (!dc_isar_feature(aa64_sve2, s)) {
5747          return false;
5748      }
5749      s->is_nonstreaming = true;
5750      if (!sve_access_check(s)) {
5751          return true;
5752      }
5753  
5754      switch (a->esz) {
5755      case MO_32:
5756          fn = scatter_store_fn32[mte][be][0][a->msz];
5757          break;
5758      case MO_64:
5759          fn = scatter_store_fn64[mte][be][2][a->msz];
5760          break;
5761      default:
5762          g_assert_not_reached();
5763      }
5764  
5765      do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5766                 cpu_reg(s, a->rm), a->msz, true, fn);
5767      return true;
5768  }
5769  
5770  /*
5771   * Prefetches
5772   */
5773  
trans_PRF(DisasContext * s,arg_PRF * a)5774  static bool trans_PRF(DisasContext *s, arg_PRF *a)
5775  {
5776      if (!dc_isar_feature(aa64_sve, s)) {
5777          return false;
5778      }
5779      /* Prefetch is a nop within QEMU.  */
5780      (void)sve_access_check(s);
5781      return true;
5782  }
5783  
trans_PRF_rr(DisasContext * s,arg_PRF_rr * a)5784  static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
5785  {
5786      if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) {
5787          return false;
5788      }
5789      /* Prefetch is a nop within QEMU.  */
5790      (void)sve_access_check(s);
5791      return true;
5792  }
5793  
trans_PRF_ns(DisasContext * s,arg_PRF_ns * a)5794  static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a)
5795  {
5796      if (!dc_isar_feature(aa64_sve, s)) {
5797          return false;
5798      }
5799      /* Prefetch is a nop within QEMU.  */
5800      s->is_nonstreaming = true;
5801      (void)sve_access_check(s);
5802      return true;
5803  }
5804  
5805  /*
5806   * Move Prefix
5807   *
5808   * TODO: The implementation so far could handle predicated merging movprfx.
5809   * The helper functions as written take an extra source register to
5810   * use in the operation, but the result is only written when predication
5811   * succeeds.  For unpredicated movprfx, we need to rearrange the helpers
5812   * to allow the final write back to the destination to be unconditional.
5813   * For predicated zeroing movprfx, we need to rearrange the helpers to
5814   * allow the final write back to zero inactives.
5815   *
5816   * In the meantime, just emit the moves.
5817   */
5818  
5819  TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn)
5820  TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz)
5821  TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false)
5822  
5823  /*
5824   * SVE2 Integer Multiply - Unpredicated
5825   */
5826  
5827  TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a)
5828  
5829  static gen_helper_gvec_3 * const smulh_zzz_fns[4] = {
5830      gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
5831      gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
5832  };
5833  TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5834             smulh_zzz_fns[a->esz], a, 0)
5835  
5836  static gen_helper_gvec_3 * const umulh_zzz_fns[4] = {
5837      gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
5838      gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
5839  };
5840  TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5841             umulh_zzz_fns[a->esz], a, 0)
5842  
5843  TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5844             gen_helper_gvec_pmul_b, a, 0)
5845  
5846  static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = {
5847      gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
5848      gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
5849  };
5850  TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5851             sqdmulh_zzz_fns[a->esz], a, 0)
5852  
5853  static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = {
5854      gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
5855      gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
5856  };
5857  TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5858             sqrdmulh_zzz_fns[a->esz], a, 0)
5859  
5860  /*
5861   * SVE2 Integer - Predicated
5862   */
5863  
5864  static gen_helper_gvec_4 * const sadlp_fns[4] = {
5865      NULL,                          gen_helper_sve2_sadalp_zpzz_h,
5866      gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d,
5867  };
5868  TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
5869             sadlp_fns[a->esz], a, 0)
5870  
5871  static gen_helper_gvec_4 * const uadlp_fns[4] = {
5872      NULL,                          gen_helper_sve2_uadalp_zpzz_h,
5873      gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d,
5874  };
5875  TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
5876             uadlp_fns[a->esz], a, 0)
5877  
5878  /*
5879   * SVE2 integer unary operations (predicated)
5880   */
5881  
5882  TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz,
5883             a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0)
5884  
5885  TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz,
5886             a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0)
5887  
5888  static gen_helper_gvec_3 * const sqabs_fns[4] = {
5889      gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
5890      gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
5891  };
5892  TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0)
5893  
5894  static gen_helper_gvec_3 * const sqneg_fns[4] = {
5895      gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
5896      gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
5897  };
5898  TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0)
5899  
5900  DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl)
5901  DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl)
5902  DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl)
5903  
5904  DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl)
5905  DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl)
5906  DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl)
5907  
5908  DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd)
5909  DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd)
5910  DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub)
5911  
5912  DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd)
5913  DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd)
5914  DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub)
5915  
5916  DO_ZPZZ(ADDP, aa64_sve2, sve2_addp)
5917  DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp)
5918  DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp)
5919  DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp)
5920  DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp)
5921  
5922  DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd)
5923  DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd)
5924  DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub)
5925  DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub)
5926  DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd)
5927  DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd)
5928  
5929  /*
5930   * SVE2 Widening Integer Arithmetic
5931   */
5932  
5933  static gen_helper_gvec_3 * const saddl_fns[4] = {
5934      NULL,                    gen_helper_sve2_saddl_h,
5935      gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d,
5936  };
5937  TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5938             saddl_fns[a->esz], a, 0)
5939  TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5940             saddl_fns[a->esz], a, 3)
5941  TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
5942             saddl_fns[a->esz], a, 2)
5943  
5944  static gen_helper_gvec_3 * const ssubl_fns[4] = {
5945      NULL,                    gen_helper_sve2_ssubl_h,
5946      gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d,
5947  };
5948  TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5949             ssubl_fns[a->esz], a, 0)
5950  TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5951             ssubl_fns[a->esz], a, 3)
5952  TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
5953             ssubl_fns[a->esz], a, 2)
5954  TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz,
5955             ssubl_fns[a->esz], a, 1)
5956  
5957  static gen_helper_gvec_3 * const sabdl_fns[4] = {
5958      NULL,                    gen_helper_sve2_sabdl_h,
5959      gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d,
5960  };
5961  TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5962             sabdl_fns[a->esz], a, 0)
5963  TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5964             sabdl_fns[a->esz], a, 3)
5965  
5966  static gen_helper_gvec_3 * const uaddl_fns[4] = {
5967      NULL,                    gen_helper_sve2_uaddl_h,
5968      gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d,
5969  };
5970  TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5971             uaddl_fns[a->esz], a, 0)
5972  TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5973             uaddl_fns[a->esz], a, 3)
5974  
5975  static gen_helper_gvec_3 * const usubl_fns[4] = {
5976      NULL,                    gen_helper_sve2_usubl_h,
5977      gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d,
5978  };
5979  TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5980             usubl_fns[a->esz], a, 0)
5981  TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5982             usubl_fns[a->esz], a, 3)
5983  
5984  static gen_helper_gvec_3 * const uabdl_fns[4] = {
5985      NULL,                    gen_helper_sve2_uabdl_h,
5986      gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d,
5987  };
5988  TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5989             uabdl_fns[a->esz], a, 0)
5990  TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5991             uabdl_fns[a->esz], a, 3)
5992  
5993  static gen_helper_gvec_3 * const sqdmull_fns[4] = {
5994      NULL,                          gen_helper_sve2_sqdmull_zzz_h,
5995      gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d,
5996  };
5997  TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5998             sqdmull_fns[a->esz], a, 0)
5999  TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6000             sqdmull_fns[a->esz], a, 3)
6001  
6002  static gen_helper_gvec_3 * const smull_fns[4] = {
6003      NULL,                        gen_helper_sve2_smull_zzz_h,
6004      gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d,
6005  };
6006  TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6007             smull_fns[a->esz], a, 0)
6008  TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6009             smull_fns[a->esz], a, 3)
6010  
6011  static gen_helper_gvec_3 * const umull_fns[4] = {
6012      NULL,                        gen_helper_sve2_umull_zzz_h,
6013      gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d,
6014  };
6015  TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6016             umull_fns[a->esz], a, 0)
6017  TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6018             umull_fns[a->esz], a, 3)
6019  
6020  static gen_helper_gvec_3 * const eoril_fns[4] = {
6021      gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
6022      gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
6023  };
6024  TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2)
6025  TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1)
6026  
do_trans_pmull(DisasContext * s,arg_rrr_esz * a,bool sel)6027  static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
6028  {
6029      static gen_helper_gvec_3 * const fns[4] = {
6030          gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
6031          NULL,                    gen_helper_sve2_pmull_d,
6032      };
6033  
6034      if (a->esz == 0) {
6035          if (!dc_isar_feature(aa64_sve2_pmull128, s)) {
6036              return false;
6037          }
6038          s->is_nonstreaming = true;
6039      } else if (!dc_isar_feature(aa64_sve, s)) {
6040          return false;
6041      }
6042      return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
6043  }
6044  
6045  TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false)
6046  TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true)
6047  
6048  static gen_helper_gvec_3 * const saddw_fns[4] = {
6049      NULL,                    gen_helper_sve2_saddw_h,
6050      gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d,
6051  };
6052  TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0)
6053  TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1)
6054  
6055  static gen_helper_gvec_3 * const ssubw_fns[4] = {
6056      NULL,                    gen_helper_sve2_ssubw_h,
6057      gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d,
6058  };
6059  TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0)
6060  TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1)
6061  
6062  static gen_helper_gvec_3 * const uaddw_fns[4] = {
6063      NULL,                    gen_helper_sve2_uaddw_h,
6064      gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d,
6065  };
6066  TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0)
6067  TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1)
6068  
6069  static gen_helper_gvec_3 * const usubw_fns[4] = {
6070      NULL,                    gen_helper_sve2_usubw_h,
6071      gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d,
6072  };
6073  TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0)
6074  TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1)
6075  
gen_sshll_vec(unsigned vece,TCGv_vec d,TCGv_vec n,int64_t imm)6076  static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6077  {
6078      int top = imm & 1;
6079      int shl = imm >> 1;
6080      int halfbits = 4 << vece;
6081  
6082      if (top) {
6083          if (shl == halfbits) {
6084              tcg_gen_and_vec(vece, d, n,
6085                              tcg_constant_vec_matching(d, vece,
6086                                  MAKE_64BIT_MASK(halfbits, halfbits)));
6087          } else {
6088              tcg_gen_sari_vec(vece, d, n, halfbits);
6089              tcg_gen_shli_vec(vece, d, d, shl);
6090          }
6091      } else {
6092          tcg_gen_shli_vec(vece, d, n, halfbits);
6093          tcg_gen_sari_vec(vece, d, d, halfbits - shl);
6094      }
6095  }
6096  
gen_ushll_i64(unsigned vece,TCGv_i64 d,TCGv_i64 n,int imm)6097  static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
6098  {
6099      int halfbits = 4 << vece;
6100      int top = imm & 1;
6101      int shl = (imm >> 1);
6102      int shift;
6103      uint64_t mask;
6104  
6105      mask = MAKE_64BIT_MASK(0, halfbits);
6106      mask <<= shl;
6107      mask = dup_const(vece, mask);
6108  
6109      shift = shl - top * halfbits;
6110      if (shift < 0) {
6111          tcg_gen_shri_i64(d, n, -shift);
6112      } else {
6113          tcg_gen_shli_i64(d, n, shift);
6114      }
6115      tcg_gen_andi_i64(d, d, mask);
6116  }
6117  
gen_ushll16_i64(TCGv_i64 d,TCGv_i64 n,int64_t imm)6118  static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6119  {
6120      gen_ushll_i64(MO_16, d, n, imm);
6121  }
6122  
gen_ushll32_i64(TCGv_i64 d,TCGv_i64 n,int64_t imm)6123  static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6124  {
6125      gen_ushll_i64(MO_32, d, n, imm);
6126  }
6127  
gen_ushll64_i64(TCGv_i64 d,TCGv_i64 n,int64_t imm)6128  static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6129  {
6130      gen_ushll_i64(MO_64, d, n, imm);
6131  }
6132  
gen_ushll_vec(unsigned vece,TCGv_vec d,TCGv_vec n,int64_t imm)6133  static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6134  {
6135      int halfbits = 4 << vece;
6136      int top = imm & 1;
6137      int shl = imm >> 1;
6138  
6139      if (top) {
6140          if (shl == halfbits) {
6141              tcg_gen_and_vec(vece, d, n,
6142                              tcg_constant_vec_matching(d, vece,
6143                                  MAKE_64BIT_MASK(halfbits, halfbits)));
6144          } else {
6145              tcg_gen_shri_vec(vece, d, n, halfbits);
6146              tcg_gen_shli_vec(vece, d, d, shl);
6147          }
6148      } else {
6149          if (shl == 0) {
6150              tcg_gen_and_vec(vece, d, n,
6151                              tcg_constant_vec_matching(d, vece,
6152                                  MAKE_64BIT_MASK(0, halfbits)));
6153          } else {
6154              tcg_gen_shli_vec(vece, d, n, halfbits);
6155              tcg_gen_shri_vec(vece, d, d, halfbits - shl);
6156          }
6157      }
6158  }
6159  
do_shll_tb(DisasContext * s,arg_rri_esz * a,const GVecGen2i ops[3],bool sel)6160  static bool do_shll_tb(DisasContext *s, arg_rri_esz *a,
6161                         const GVecGen2i ops[3], bool sel)
6162  {
6163  
6164      if (a->esz < 0 || a->esz > 2) {
6165          return false;
6166      }
6167      if (sve_access_check(s)) {
6168          unsigned vsz = vec_full_reg_size(s);
6169          tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6170                          vec_full_reg_offset(s, a->rn),
6171                          vsz, vsz, (a->imm << 1) | sel,
6172                          &ops[a->esz]);
6173      }
6174      return true;
6175  }
6176  
6177  static const TCGOpcode sshll_list[] = {
6178      INDEX_op_shli_vec, INDEX_op_sari_vec, 0
6179  };
6180  static const GVecGen2i sshll_ops[3] = {
6181      { .fniv = gen_sshll_vec,
6182        .opt_opc = sshll_list,
6183        .fno = gen_helper_sve2_sshll_h,
6184        .vece = MO_16 },
6185      { .fniv = gen_sshll_vec,
6186        .opt_opc = sshll_list,
6187        .fno = gen_helper_sve2_sshll_s,
6188        .vece = MO_32 },
6189      { .fniv = gen_sshll_vec,
6190        .opt_opc = sshll_list,
6191        .fno = gen_helper_sve2_sshll_d,
6192        .vece = MO_64 }
6193  };
6194  TRANS_FEAT(SSHLLB, aa64_sve2, do_shll_tb, a, sshll_ops, false)
6195  TRANS_FEAT(SSHLLT, aa64_sve2, do_shll_tb, a, sshll_ops, true)
6196  
6197  static const TCGOpcode ushll_list[] = {
6198      INDEX_op_shli_vec, INDEX_op_shri_vec, 0
6199  };
6200  static const GVecGen2i ushll_ops[3] = {
6201      { .fni8 = gen_ushll16_i64,
6202        .fniv = gen_ushll_vec,
6203        .opt_opc = ushll_list,
6204        .fno = gen_helper_sve2_ushll_h,
6205        .vece = MO_16 },
6206      { .fni8 = gen_ushll32_i64,
6207        .fniv = gen_ushll_vec,
6208        .opt_opc = ushll_list,
6209        .fno = gen_helper_sve2_ushll_s,
6210        .vece = MO_32 },
6211      { .fni8 = gen_ushll64_i64,
6212        .fniv = gen_ushll_vec,
6213        .opt_opc = ushll_list,
6214        .fno = gen_helper_sve2_ushll_d,
6215        .vece = MO_64 },
6216  };
6217  TRANS_FEAT(USHLLB, aa64_sve2, do_shll_tb, a, ushll_ops, false)
6218  TRANS_FEAT(USHLLT, aa64_sve2, do_shll_tb, a, ushll_ops, true)
6219  
6220  static gen_helper_gvec_3 * const bext_fns[4] = {
6221      gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
6222      gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
6223  };
6224  TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6225                          bext_fns[a->esz], a, 0)
6226  
6227  static gen_helper_gvec_3 * const bdep_fns[4] = {
6228      gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
6229      gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
6230  };
6231  TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6232                          bdep_fns[a->esz], a, 0)
6233  
6234  static gen_helper_gvec_3 * const bgrp_fns[4] = {
6235      gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
6236      gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
6237  };
6238  TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6239                          bgrp_fns[a->esz], a, 0)
6240  
6241  static gen_helper_gvec_3 * const cadd_fns[4] = {
6242      gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
6243      gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d,
6244  };
6245  TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6246             cadd_fns[a->esz], a, 0)
6247  TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6248             cadd_fns[a->esz], a, 1)
6249  
6250  static gen_helper_gvec_3 * const sqcadd_fns[4] = {
6251      gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
6252      gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d,
6253  };
6254  TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6255             sqcadd_fns[a->esz], a, 0)
6256  TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6257             sqcadd_fns[a->esz], a, 1)
6258  
6259  static gen_helper_gvec_4 * const sabal_fns[4] = {
6260      NULL,                    gen_helper_sve2_sabal_h,
6261      gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d,
6262  };
6263  TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0)
6264  TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1)
6265  
6266  static gen_helper_gvec_4 * const uabal_fns[4] = {
6267      NULL,                    gen_helper_sve2_uabal_h,
6268      gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d,
6269  };
6270  TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0)
6271  TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1)
6272  
do_adcl(DisasContext * s,arg_rrrr_esz * a,bool sel)6273  static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
6274  {
6275      static gen_helper_gvec_4 * const fns[2] = {
6276          gen_helper_sve2_adcl_s,
6277          gen_helper_sve2_adcl_d,
6278      };
6279      /*
6280       * Note that in this case the ESZ field encodes both size and sign.
6281       * Split out 'subtract' into bit 1 of the data field for the helper.
6282       */
6283      return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel);
6284  }
6285  
TRANS_FEAT(ADCLB,aa64_sve2,do_adcl,a,false)6286  TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false)
6287  TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true)
6288  
6289  TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a)
6290  TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a)
6291  TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a)
6292  TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a)
6293  TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a)
6294  TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a)
6295  
6296  TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a)
6297  TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a)
6298  
6299  static bool do_narrow_extract(DisasContext *s, arg_rri_esz *a,
6300                                const GVecGen2 ops[3])
6301  {
6302      if (a->esz < 0 || a->esz > MO_32 || a->imm != 0) {
6303          return false;
6304      }
6305      if (sve_access_check(s)) {
6306          unsigned vsz = vec_full_reg_size(s);
6307          tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
6308                          vec_full_reg_offset(s, a->rn),
6309                          vsz, vsz, &ops[a->esz]);
6310      }
6311      return true;
6312  }
6313  
6314  static const TCGOpcode sqxtn_list[] = {
6315      INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
6316  };
6317  
gen_sqxtnb_vec(unsigned vece,TCGv_vec d,TCGv_vec n)6318  static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6319  {
6320      int halfbits = 4 << vece;
6321      int64_t mask = (1ull << halfbits) - 1;
6322      int64_t min = -1ull << (halfbits - 1);
6323      int64_t max = -min - 1;
6324  
6325      tcg_gen_smax_vec(vece, d, n, tcg_constant_vec_matching(d, vece, min));
6326      tcg_gen_smin_vec(vece, d, d, tcg_constant_vec_matching(d, vece, max));
6327      tcg_gen_and_vec(vece, d, d, tcg_constant_vec_matching(d, vece, mask));
6328  }
6329  
6330  static const GVecGen2 sqxtnb_ops[3] = {
6331      { .fniv = gen_sqxtnb_vec,
6332        .opt_opc = sqxtn_list,
6333        .fno = gen_helper_sve2_sqxtnb_h,
6334        .vece = MO_16 },
6335      { .fniv = gen_sqxtnb_vec,
6336        .opt_opc = sqxtn_list,
6337        .fno = gen_helper_sve2_sqxtnb_s,
6338        .vece = MO_32 },
6339      { .fniv = gen_sqxtnb_vec,
6340        .opt_opc = sqxtn_list,
6341        .fno = gen_helper_sve2_sqxtnb_d,
6342        .vece = MO_64 },
6343  };
TRANS_FEAT(SQXTNB,aa64_sve2,do_narrow_extract,a,sqxtnb_ops)6344  TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops)
6345  
6346  static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6347  {
6348      int halfbits = 4 << vece;
6349      int64_t mask = (1ull << halfbits) - 1;
6350      int64_t min = -1ull << (halfbits - 1);
6351      int64_t max = -min - 1;
6352  
6353      tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min));
6354      tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max));
6355      tcg_gen_shli_vec(vece, n, n, halfbits);
6356      tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n);
6357  }
6358  
6359  static const GVecGen2 sqxtnt_ops[3] = {
6360      { .fniv = gen_sqxtnt_vec,
6361        .opt_opc = sqxtn_list,
6362        .load_dest = true,
6363        .fno = gen_helper_sve2_sqxtnt_h,
6364        .vece = MO_16 },
6365      { .fniv = gen_sqxtnt_vec,
6366        .opt_opc = sqxtn_list,
6367        .load_dest = true,
6368        .fno = gen_helper_sve2_sqxtnt_s,
6369        .vece = MO_32 },
6370      { .fniv = gen_sqxtnt_vec,
6371        .opt_opc = sqxtn_list,
6372        .load_dest = true,
6373        .fno = gen_helper_sve2_sqxtnt_d,
6374        .vece = MO_64 },
6375  };
6376  TRANS_FEAT(SQXTNT, aa64_sve2, do_narrow_extract, a, sqxtnt_ops)
6377  
6378  static const TCGOpcode uqxtn_list[] = {
6379      INDEX_op_shli_vec, INDEX_op_umin_vec, 0
6380  };
6381  
gen_uqxtnb_vec(unsigned vece,TCGv_vec d,TCGv_vec n)6382  static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6383  {
6384      int halfbits = 4 << vece;
6385      int64_t max = (1ull << halfbits) - 1;
6386  
6387      tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max));
6388  }
6389  
6390  static const GVecGen2 uqxtnb_ops[3] = {
6391      { .fniv = gen_uqxtnb_vec,
6392        .opt_opc = uqxtn_list,
6393        .fno = gen_helper_sve2_uqxtnb_h,
6394        .vece = MO_16 },
6395      { .fniv = gen_uqxtnb_vec,
6396        .opt_opc = uqxtn_list,
6397        .fno = gen_helper_sve2_uqxtnb_s,
6398        .vece = MO_32 },
6399      { .fniv = gen_uqxtnb_vec,
6400        .opt_opc = uqxtn_list,
6401        .fno = gen_helper_sve2_uqxtnb_d,
6402        .vece = MO_64 },
6403  };
TRANS_FEAT(UQXTNB,aa64_sve2,do_narrow_extract,a,uqxtnb_ops)6404  TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops)
6405  
6406  static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6407  {
6408      int halfbits = 4 << vece;
6409      int64_t max = (1ull << halfbits) - 1;
6410      TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
6411  
6412      tcg_gen_umin_vec(vece, n, n, maxv);
6413      tcg_gen_shli_vec(vece, n, n, halfbits);
6414      tcg_gen_bitsel_vec(vece, d, maxv, d, n);
6415  }
6416  
6417  static const GVecGen2 uqxtnt_ops[3] = {
6418      { .fniv = gen_uqxtnt_vec,
6419        .opt_opc = uqxtn_list,
6420        .load_dest = true,
6421        .fno = gen_helper_sve2_uqxtnt_h,
6422        .vece = MO_16 },
6423      { .fniv = gen_uqxtnt_vec,
6424        .opt_opc = uqxtn_list,
6425        .load_dest = true,
6426        .fno = gen_helper_sve2_uqxtnt_s,
6427        .vece = MO_32 },
6428      { .fniv = gen_uqxtnt_vec,
6429        .opt_opc = uqxtn_list,
6430        .load_dest = true,
6431        .fno = gen_helper_sve2_uqxtnt_d,
6432        .vece = MO_64 },
6433  };
6434  TRANS_FEAT(UQXTNT, aa64_sve2, do_narrow_extract, a, uqxtnt_ops)
6435  
6436  static const TCGOpcode sqxtun_list[] = {
6437      INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
6438  };
6439  
gen_sqxtunb_vec(unsigned vece,TCGv_vec d,TCGv_vec n)6440  static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6441  {
6442      int halfbits = 4 << vece;
6443      int64_t max = (1ull << halfbits) - 1;
6444  
6445      tcg_gen_smax_vec(vece, d, n, tcg_constant_vec_matching(d, vece, 0));
6446      tcg_gen_umin_vec(vece, d, d, tcg_constant_vec_matching(d, vece, max));
6447  }
6448  
6449  static const GVecGen2 sqxtunb_ops[3] = {
6450      { .fniv = gen_sqxtunb_vec,
6451        .opt_opc = sqxtun_list,
6452        .fno = gen_helper_sve2_sqxtunb_h,
6453        .vece = MO_16 },
6454      { .fniv = gen_sqxtunb_vec,
6455        .opt_opc = sqxtun_list,
6456        .fno = gen_helper_sve2_sqxtunb_s,
6457        .vece = MO_32 },
6458      { .fniv = gen_sqxtunb_vec,
6459        .opt_opc = sqxtun_list,
6460        .fno = gen_helper_sve2_sqxtunb_d,
6461        .vece = MO_64 },
6462  };
TRANS_FEAT(SQXTUNB,aa64_sve2,do_narrow_extract,a,sqxtunb_ops)6463  TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops)
6464  
6465  static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6466  {
6467      int halfbits = 4 << vece;
6468      int64_t max = (1ull << halfbits) - 1;
6469      TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
6470  
6471      tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0));
6472      tcg_gen_umin_vec(vece, n, n, maxv);
6473      tcg_gen_shli_vec(vece, n, n, halfbits);
6474      tcg_gen_bitsel_vec(vece, d, maxv, d, n);
6475  }
6476  
6477  static const GVecGen2 sqxtunt_ops[3] = {
6478      { .fniv = gen_sqxtunt_vec,
6479        .opt_opc = sqxtun_list,
6480        .load_dest = true,
6481        .fno = gen_helper_sve2_sqxtunt_h,
6482        .vece = MO_16 },
6483      { .fniv = gen_sqxtunt_vec,
6484        .opt_opc = sqxtun_list,
6485        .load_dest = true,
6486        .fno = gen_helper_sve2_sqxtunt_s,
6487        .vece = MO_32 },
6488      { .fniv = gen_sqxtunt_vec,
6489        .opt_opc = sqxtun_list,
6490        .load_dest = true,
6491        .fno = gen_helper_sve2_sqxtunt_d,
6492        .vece = MO_64 },
6493  };
TRANS_FEAT(SQXTUNT,aa64_sve2,do_narrow_extract,a,sqxtunt_ops)6494  TRANS_FEAT(SQXTUNT, aa64_sve2, do_narrow_extract, a, sqxtunt_ops)
6495  
6496  static bool do_shr_narrow(DisasContext *s, arg_rri_esz *a,
6497                            const GVecGen2i ops[3])
6498  {
6499      if (a->esz < 0 || a->esz > MO_32) {
6500          return false;
6501      }
6502      assert(a->imm > 0 && a->imm <= (8 << a->esz));
6503      if (sve_access_check(s)) {
6504          unsigned vsz = vec_full_reg_size(s);
6505          tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6506                          vec_full_reg_offset(s, a->rn),
6507                          vsz, vsz, a->imm, &ops[a->esz]);
6508      }
6509      return true;
6510  }
6511  
gen_shrnb_i64(unsigned vece,TCGv_i64 d,TCGv_i64 n,int shr)6512  static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
6513  {
6514      int halfbits = 4 << vece;
6515      uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
6516  
6517      tcg_gen_shri_i64(d, n, shr);
6518      tcg_gen_andi_i64(d, d, mask);
6519  }
6520  
gen_shrnb16_i64(TCGv_i64 d,TCGv_i64 n,int64_t shr)6521  static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6522  {
6523      gen_shrnb_i64(MO_16, d, n, shr);
6524  }
6525  
gen_shrnb32_i64(TCGv_i64 d,TCGv_i64 n,int64_t shr)6526  static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6527  {
6528      gen_shrnb_i64(MO_32, d, n, shr);
6529  }
6530  
gen_shrnb64_i64(TCGv_i64 d,TCGv_i64 n,int64_t shr)6531  static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6532  {
6533      gen_shrnb_i64(MO_64, d, n, shr);
6534  }
6535  
gen_shrnb_vec(unsigned vece,TCGv_vec d,TCGv_vec n,int64_t shr)6536  static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
6537  {
6538      int halfbits = 4 << vece;
6539      uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
6540  
6541      tcg_gen_shri_vec(vece, n, n, shr);
6542      tcg_gen_and_vec(vece, d, n, tcg_constant_vec_matching(d, vece, mask));
6543  }
6544  
6545  static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 };
6546  static const GVecGen2i shrnb_ops[3] = {
6547      { .fni8 = gen_shrnb16_i64,
6548        .fniv = gen_shrnb_vec,
6549        .opt_opc = shrnb_vec_list,
6550        .fno = gen_helper_sve2_shrnb_h,
6551        .vece = MO_16 },
6552      { .fni8 = gen_shrnb32_i64,
6553        .fniv = gen_shrnb_vec,
6554        .opt_opc = shrnb_vec_list,
6555        .fno = gen_helper_sve2_shrnb_s,
6556        .vece = MO_32 },
6557      { .fni8 = gen_shrnb64_i64,
6558        .fniv = gen_shrnb_vec,
6559        .opt_opc = shrnb_vec_list,
6560        .fno = gen_helper_sve2_shrnb_d,
6561        .vece = MO_64 },
6562  };
TRANS_FEAT(SHRNB,aa64_sve2,do_shr_narrow,a,shrnb_ops)6563  TRANS_FEAT(SHRNB, aa64_sve2, do_shr_narrow, a, shrnb_ops)
6564  
6565  static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
6566  {
6567      int halfbits = 4 << vece;
6568      uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
6569  
6570      tcg_gen_shli_i64(n, n, halfbits - shr);
6571      tcg_gen_andi_i64(n, n, ~mask);
6572      tcg_gen_andi_i64(d, d, mask);
6573      tcg_gen_or_i64(d, d, n);
6574  }
6575  
gen_shrnt16_i64(TCGv_i64 d,TCGv_i64 n,int64_t shr)6576  static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6577  {
6578      gen_shrnt_i64(MO_16, d, n, shr);
6579  }
6580  
gen_shrnt32_i64(TCGv_i64 d,TCGv_i64 n,int64_t shr)6581  static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6582  {
6583      gen_shrnt_i64(MO_32, d, n, shr);
6584  }
6585  
gen_shrnt64_i64(TCGv_i64 d,TCGv_i64 n,int64_t shr)6586  static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6587  {
6588      tcg_gen_shri_i64(n, n, shr);
6589      tcg_gen_deposit_i64(d, d, n, 32, 32);
6590  }
6591  
gen_shrnt_vec(unsigned vece,TCGv_vec d,TCGv_vec n,int64_t shr)6592  static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
6593  {
6594      int halfbits = 4 << vece;
6595      uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
6596  
6597      tcg_gen_shli_vec(vece, n, n, halfbits - shr);
6598      tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n);
6599  }
6600  
6601  static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 };
6602  static const GVecGen2i shrnt_ops[3] = {
6603      { .fni8 = gen_shrnt16_i64,
6604        .fniv = gen_shrnt_vec,
6605        .opt_opc = shrnt_vec_list,
6606        .load_dest = true,
6607        .fno = gen_helper_sve2_shrnt_h,
6608        .vece = MO_16 },
6609      { .fni8 = gen_shrnt32_i64,
6610        .fniv = gen_shrnt_vec,
6611        .opt_opc = shrnt_vec_list,
6612        .load_dest = true,
6613        .fno = gen_helper_sve2_shrnt_s,
6614        .vece = MO_32 },
6615      { .fni8 = gen_shrnt64_i64,
6616        .fniv = gen_shrnt_vec,
6617        .opt_opc = shrnt_vec_list,
6618        .load_dest = true,
6619        .fno = gen_helper_sve2_shrnt_d,
6620        .vece = MO_64 },
6621  };
6622  TRANS_FEAT(SHRNT, aa64_sve2, do_shr_narrow, a, shrnt_ops)
6623  
6624  static const GVecGen2i rshrnb_ops[3] = {
6625      { .fno = gen_helper_sve2_rshrnb_h },
6626      { .fno = gen_helper_sve2_rshrnb_s },
6627      { .fno = gen_helper_sve2_rshrnb_d },
6628  };
6629  TRANS_FEAT(RSHRNB, aa64_sve2, do_shr_narrow, a, rshrnb_ops)
6630  
6631  static const GVecGen2i rshrnt_ops[3] = {
6632      { .fno = gen_helper_sve2_rshrnt_h },
6633      { .fno = gen_helper_sve2_rshrnt_s },
6634      { .fno = gen_helper_sve2_rshrnt_d },
6635  };
TRANS_FEAT(RSHRNT,aa64_sve2,do_shr_narrow,a,rshrnt_ops)6636  TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops)
6637  
6638  static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
6639                               TCGv_vec n, int64_t shr)
6640  {
6641      int halfbits = 4 << vece;
6642      uint64_t max = MAKE_64BIT_MASK(0, halfbits);
6643  
6644      tcg_gen_sari_vec(vece, n, n, shr);
6645      tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0));
6646      tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max));
6647  }
6648  
6649  static const TCGOpcode sqshrunb_vec_list[] = {
6650      INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
6651  };
6652  static const GVecGen2i sqshrunb_ops[3] = {
6653      { .fniv = gen_sqshrunb_vec,
6654        .opt_opc = sqshrunb_vec_list,
6655        .fno = gen_helper_sve2_sqshrunb_h,
6656        .vece = MO_16 },
6657      { .fniv = gen_sqshrunb_vec,
6658        .opt_opc = sqshrunb_vec_list,
6659        .fno = gen_helper_sve2_sqshrunb_s,
6660        .vece = MO_32 },
6661      { .fniv = gen_sqshrunb_vec,
6662        .opt_opc = sqshrunb_vec_list,
6663        .fno = gen_helper_sve2_sqshrunb_d,
6664        .vece = MO_64 },
6665  };
TRANS_FEAT(SQSHRUNB,aa64_sve2,do_shr_narrow,a,sqshrunb_ops)6666  TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops)
6667  
6668  static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
6669                               TCGv_vec n, int64_t shr)
6670  {
6671      int halfbits = 4 << vece;
6672      uint64_t max = MAKE_64BIT_MASK(0, halfbits);
6673      TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
6674  
6675      tcg_gen_sari_vec(vece, n, n, shr);
6676      tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0));
6677      tcg_gen_umin_vec(vece, n, n, maxv);
6678      tcg_gen_shli_vec(vece, n, n, halfbits);
6679      tcg_gen_bitsel_vec(vece, d, maxv, d, n);
6680  }
6681  
6682  static const TCGOpcode sqshrunt_vec_list[] = {
6683      INDEX_op_shli_vec, INDEX_op_sari_vec,
6684      INDEX_op_smax_vec, INDEX_op_umin_vec, 0
6685  };
6686  static const GVecGen2i sqshrunt_ops[3] = {
6687      { .fniv = gen_sqshrunt_vec,
6688        .opt_opc = sqshrunt_vec_list,
6689        .load_dest = true,
6690        .fno = gen_helper_sve2_sqshrunt_h,
6691        .vece = MO_16 },
6692      { .fniv = gen_sqshrunt_vec,
6693        .opt_opc = sqshrunt_vec_list,
6694        .load_dest = true,
6695        .fno = gen_helper_sve2_sqshrunt_s,
6696        .vece = MO_32 },
6697      { .fniv = gen_sqshrunt_vec,
6698        .opt_opc = sqshrunt_vec_list,
6699        .load_dest = true,
6700        .fno = gen_helper_sve2_sqshrunt_d,
6701        .vece = MO_64 },
6702  };
6703  TRANS_FEAT(SQSHRUNT, aa64_sve2, do_shr_narrow, a, sqshrunt_ops)
6704  
6705  static const GVecGen2i sqrshrunb_ops[3] = {
6706      { .fno = gen_helper_sve2_sqrshrunb_h },
6707      { .fno = gen_helper_sve2_sqrshrunb_s },
6708      { .fno = gen_helper_sve2_sqrshrunb_d },
6709  };
6710  TRANS_FEAT(SQRSHRUNB, aa64_sve2, do_shr_narrow, a, sqrshrunb_ops)
6711  
6712  static const GVecGen2i sqrshrunt_ops[3] = {
6713      { .fno = gen_helper_sve2_sqrshrunt_h },
6714      { .fno = gen_helper_sve2_sqrshrunt_s },
6715      { .fno = gen_helper_sve2_sqrshrunt_d },
6716  };
TRANS_FEAT(SQRSHRUNT,aa64_sve2,do_shr_narrow,a,sqrshrunt_ops)6717  TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops)
6718  
6719  static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
6720                              TCGv_vec n, int64_t shr)
6721  {
6722      int halfbits = 4 << vece;
6723      int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
6724      int64_t min = -max - 1;
6725      int64_t mask = MAKE_64BIT_MASK(0, halfbits);
6726  
6727      tcg_gen_sari_vec(vece, n, n, shr);
6728      tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min));
6729      tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max));
6730      tcg_gen_and_vec(vece, d, n, tcg_constant_vec_matching(d, vece, mask));
6731  }
6732  
6733  static const TCGOpcode sqshrnb_vec_list[] = {
6734      INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
6735  };
6736  static const GVecGen2i sqshrnb_ops[3] = {
6737      { .fniv = gen_sqshrnb_vec,
6738        .opt_opc = sqshrnb_vec_list,
6739        .fno = gen_helper_sve2_sqshrnb_h,
6740        .vece = MO_16 },
6741      { .fniv = gen_sqshrnb_vec,
6742        .opt_opc = sqshrnb_vec_list,
6743        .fno = gen_helper_sve2_sqshrnb_s,
6744        .vece = MO_32 },
6745      { .fniv = gen_sqshrnb_vec,
6746        .opt_opc = sqshrnb_vec_list,
6747        .fno = gen_helper_sve2_sqshrnb_d,
6748        .vece = MO_64 },
6749  };
TRANS_FEAT(SQSHRNB,aa64_sve2,do_shr_narrow,a,sqshrnb_ops)6750  TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops)
6751  
6752  static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
6753                               TCGv_vec n, int64_t shr)
6754  {
6755      int halfbits = 4 << vece;
6756      int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
6757      int64_t min = -max - 1;
6758      int64_t mask = MAKE_64BIT_MASK(0, halfbits);
6759  
6760      tcg_gen_sari_vec(vece, n, n, shr);
6761      tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min));
6762      tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max));
6763      tcg_gen_shli_vec(vece, n, n, halfbits);
6764      tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n);
6765  }
6766  
6767  static const TCGOpcode sqshrnt_vec_list[] = {
6768      INDEX_op_shli_vec, INDEX_op_sari_vec,
6769      INDEX_op_smax_vec, INDEX_op_smin_vec, 0
6770  };
6771  static const GVecGen2i sqshrnt_ops[3] = {
6772      { .fniv = gen_sqshrnt_vec,
6773        .opt_opc = sqshrnt_vec_list,
6774        .load_dest = true,
6775        .fno = gen_helper_sve2_sqshrnt_h,
6776        .vece = MO_16 },
6777      { .fniv = gen_sqshrnt_vec,
6778        .opt_opc = sqshrnt_vec_list,
6779        .load_dest = true,
6780        .fno = gen_helper_sve2_sqshrnt_s,
6781        .vece = MO_32 },
6782      { .fniv = gen_sqshrnt_vec,
6783        .opt_opc = sqshrnt_vec_list,
6784        .load_dest = true,
6785        .fno = gen_helper_sve2_sqshrnt_d,
6786        .vece = MO_64 },
6787  };
6788  TRANS_FEAT(SQSHRNT, aa64_sve2, do_shr_narrow, a, sqshrnt_ops)
6789  
6790  static const GVecGen2i sqrshrnb_ops[3] = {
6791      { .fno = gen_helper_sve2_sqrshrnb_h },
6792      { .fno = gen_helper_sve2_sqrshrnb_s },
6793      { .fno = gen_helper_sve2_sqrshrnb_d },
6794  };
6795  TRANS_FEAT(SQRSHRNB, aa64_sve2, do_shr_narrow, a, sqrshrnb_ops)
6796  
6797  static const GVecGen2i sqrshrnt_ops[3] = {
6798      { .fno = gen_helper_sve2_sqrshrnt_h },
6799      { .fno = gen_helper_sve2_sqrshrnt_s },
6800      { .fno = gen_helper_sve2_sqrshrnt_d },
6801  };
TRANS_FEAT(SQRSHRNT,aa64_sve2,do_shr_narrow,a,sqrshrnt_ops)6802  TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops)
6803  
6804  static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
6805                              TCGv_vec n, int64_t shr)
6806  {
6807      int halfbits = 4 << vece;
6808      int64_t max = MAKE_64BIT_MASK(0, halfbits);
6809  
6810      tcg_gen_shri_vec(vece, n, n, shr);
6811      tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max));
6812  }
6813  
6814  static const TCGOpcode uqshrnb_vec_list[] = {
6815      INDEX_op_shri_vec, INDEX_op_umin_vec, 0
6816  };
6817  static const GVecGen2i uqshrnb_ops[3] = {
6818      { .fniv = gen_uqshrnb_vec,
6819        .opt_opc = uqshrnb_vec_list,
6820        .fno = gen_helper_sve2_uqshrnb_h,
6821        .vece = MO_16 },
6822      { .fniv = gen_uqshrnb_vec,
6823        .opt_opc = uqshrnb_vec_list,
6824        .fno = gen_helper_sve2_uqshrnb_s,
6825        .vece = MO_32 },
6826      { .fniv = gen_uqshrnb_vec,
6827        .opt_opc = uqshrnb_vec_list,
6828        .fno = gen_helper_sve2_uqshrnb_d,
6829        .vece = MO_64 },
6830  };
TRANS_FEAT(UQSHRNB,aa64_sve2,do_shr_narrow,a,uqshrnb_ops)6831  TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops)
6832  
6833  static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
6834                              TCGv_vec n, int64_t shr)
6835  {
6836      int halfbits = 4 << vece;
6837      int64_t max = MAKE_64BIT_MASK(0, halfbits);
6838      TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
6839  
6840      tcg_gen_shri_vec(vece, n, n, shr);
6841      tcg_gen_umin_vec(vece, n, n, maxv);
6842      tcg_gen_shli_vec(vece, n, n, halfbits);
6843      tcg_gen_bitsel_vec(vece, d, maxv, d, n);
6844  }
6845  
6846  static const TCGOpcode uqshrnt_vec_list[] = {
6847      INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
6848  };
6849  static const GVecGen2i uqshrnt_ops[3] = {
6850      { .fniv = gen_uqshrnt_vec,
6851        .opt_opc = uqshrnt_vec_list,
6852        .load_dest = true,
6853        .fno = gen_helper_sve2_uqshrnt_h,
6854        .vece = MO_16 },
6855      { .fniv = gen_uqshrnt_vec,
6856        .opt_opc = uqshrnt_vec_list,
6857        .load_dest = true,
6858        .fno = gen_helper_sve2_uqshrnt_s,
6859        .vece = MO_32 },
6860      { .fniv = gen_uqshrnt_vec,
6861        .opt_opc = uqshrnt_vec_list,
6862        .load_dest = true,
6863        .fno = gen_helper_sve2_uqshrnt_d,
6864        .vece = MO_64 },
6865  };
6866  TRANS_FEAT(UQSHRNT, aa64_sve2, do_shr_narrow, a, uqshrnt_ops)
6867  
6868  static const GVecGen2i uqrshrnb_ops[3] = {
6869      { .fno = gen_helper_sve2_uqrshrnb_h },
6870      { .fno = gen_helper_sve2_uqrshrnb_s },
6871      { .fno = gen_helper_sve2_uqrshrnb_d },
6872  };
6873  TRANS_FEAT(UQRSHRNB, aa64_sve2, do_shr_narrow, a, uqrshrnb_ops)
6874  
6875  static const GVecGen2i uqrshrnt_ops[3] = {
6876      { .fno = gen_helper_sve2_uqrshrnt_h },
6877      { .fno = gen_helper_sve2_uqrshrnt_s },
6878      { .fno = gen_helper_sve2_uqrshrnt_d },
6879  };
6880  TRANS_FEAT(UQRSHRNT, aa64_sve2, do_shr_narrow, a, uqrshrnt_ops)
6881  
6882  #define DO_SVE2_ZZZ_NARROW(NAME, name)                                    \
6883      static gen_helper_gvec_3 * const name##_fns[4] = {                    \
6884          NULL,                       gen_helper_sve2_##name##_h,           \
6885          gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d,           \
6886      };                                                                    \
6887      TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz,                     \
6888                 name##_fns[a->esz], a, 0)
6889  
6890  DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
6891  DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
6892  DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
6893  DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
6894  
6895  DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
6896  DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
6897  DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
6898  DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
6899  
6900  static gen_helper_gvec_flags_4 * const match_fns[4] = {
6901      gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL
6902  };
6903  TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
6904  
6905  static gen_helper_gvec_flags_4 * const nmatch_fns[4] = {
6906      gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL
6907  };
6908  TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
6909  
6910  static gen_helper_gvec_4 * const histcnt_fns[4] = {
6911      NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
6912  };
6913  TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
6914                          histcnt_fns[a->esz], a, 0)
6915  
6916  TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
6917                          a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
6918  
6919  DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz)
6920  DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz)
6921  DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz)
6922  DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz)
6923  DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
6924  
6925  /*
6926   * SVE Integer Multiply-Add (unpredicated)
6927   */
6928  
6929  TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz,
6930                          gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra,
6931                          0, FPST_FPCR)
6932  TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz,
6933                          gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra,
6934                          0, FPST_FPCR)
6935  
6936  static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
6937      NULL,                           gen_helper_sve2_sqdmlal_zzzw_h,
6938      gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
6939  };
6940  TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6941             sqdmlal_zzzw_fns[a->esz], a, 0)
6942  TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6943             sqdmlal_zzzw_fns[a->esz], a, 3)
6944  TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
6945             sqdmlal_zzzw_fns[a->esz], a, 2)
6946  
6947  static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = {
6948      NULL,                           gen_helper_sve2_sqdmlsl_zzzw_h,
6949      gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
6950  };
6951  TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6952             sqdmlsl_zzzw_fns[a->esz], a, 0)
6953  TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6954             sqdmlsl_zzzw_fns[a->esz], a, 3)
6955  TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
6956             sqdmlsl_zzzw_fns[a->esz], a, 2)
6957  
6958  static gen_helper_gvec_4 * const sqrdmlah_fns[] = {
6959      gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
6960      gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
6961  };
6962  TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
6963             sqrdmlah_fns[a->esz], a, 0)
6964  
6965  static gen_helper_gvec_4 * const sqrdmlsh_fns[] = {
6966      gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
6967      gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
6968  };
6969  TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
6970             sqrdmlsh_fns[a->esz], a, 0)
6971  
6972  static gen_helper_gvec_4 * const smlal_zzzw_fns[] = {
6973      NULL,                         gen_helper_sve2_smlal_zzzw_h,
6974      gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
6975  };
6976  TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6977             smlal_zzzw_fns[a->esz], a, 0)
6978  TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6979             smlal_zzzw_fns[a->esz], a, 1)
6980  
6981  static gen_helper_gvec_4 * const umlal_zzzw_fns[] = {
6982      NULL,                         gen_helper_sve2_umlal_zzzw_h,
6983      gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
6984  };
6985  TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6986             umlal_zzzw_fns[a->esz], a, 0)
6987  TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6988             umlal_zzzw_fns[a->esz], a, 1)
6989  
6990  static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = {
6991      NULL,                         gen_helper_sve2_smlsl_zzzw_h,
6992      gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
6993  };
6994  TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6995             smlsl_zzzw_fns[a->esz], a, 0)
6996  TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6997             smlsl_zzzw_fns[a->esz], a, 1)
6998  
6999  static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = {
7000      NULL,                         gen_helper_sve2_umlsl_zzzw_h,
7001      gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
7002  };
7003  TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7004             umlsl_zzzw_fns[a->esz], a, 0)
7005  TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7006             umlsl_zzzw_fns[a->esz], a, 1)
7007  
7008  static gen_helper_gvec_4 * const cmla_fns[] = {
7009      gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
7010      gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
7011  };
7012  TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7013             cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
7014  
7015  static gen_helper_gvec_4 * const cdot_fns[] = {
7016      NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d
7017  };
7018  TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7019             cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
7020  
7021  static gen_helper_gvec_4 * const sqrdcmlah_fns[] = {
7022      gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
7023      gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
7024  };
7025  TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7026             sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
7027  
7028  TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7029             a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
7030  
7031  TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
7032                          gen_helper_crypto_aesmc, a->rd, a->rd, 0)
7033  TRANS_FEAT_NONSTREAMING(AESIMC, aa64_sve2_aes, gen_gvec_ool_zz,
7034                          gen_helper_crypto_aesimc, a->rd, a->rd, 0)
7035  
7036  TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7037                          gen_helper_crypto_aese, a, 0)
7038  TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7039                          gen_helper_crypto_aesd, a, 0)
7040  
7041  TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7042                          gen_helper_crypto_sm4e, a, 0)
7043  TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7044                          gen_helper_crypto_sm4ekey, a, 0)
7045  
7046  TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz,
7047                          gen_gvec_rax1, a)
7048  
7049  TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
7050             gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR)
7051  TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz,
7052             gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR)
7053  
7054  TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
7055             gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR)
7056  
7057  TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz,
7058             gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR)
7059  TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz,
7060             gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR)
7061  
7062  TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a,
7063             FPROUNDING_ODD, gen_helper_sve_fcvt_ds)
7064  TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a,
7065             FPROUNDING_ODD, gen_helper_sve2_fcvtnt_ds)
7066  
7067  static gen_helper_gvec_3_ptr * const flogb_fns[] = {
7068      NULL,               gen_helper_flogb_h,
7069      gen_helper_flogb_s, gen_helper_flogb_d
7070  };
7071  TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz],
7072             a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
7073  
do_FMLAL_zzzw(DisasContext * s,arg_rrrr_esz * a,bool sub,bool sel)7074  static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
7075  {
7076      return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s,
7077                               a->rd, a->rn, a->rm, a->ra,
7078                               (sel << 1) | sub, tcg_env);
7079  }
7080  
TRANS_FEAT(FMLALB_zzzw,aa64_sve2,do_FMLAL_zzzw,a,false,false)7081  TRANS_FEAT(FMLALB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, false)
7082  TRANS_FEAT(FMLALT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, true)
7083  TRANS_FEAT(FMLSLB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, false)
7084  TRANS_FEAT(FMLSLT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, true)
7085  
7086  static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
7087  {
7088      return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s,
7089                               a->rd, a->rn, a->rm, a->ra,
7090                               (a->index << 2) | (sel << 1) | sub, tcg_env);
7091  }
7092  
TRANS_FEAT(FMLALB_zzxw,aa64_sve2,do_FMLAL_zzxw,a,false,false)7093  TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false)
7094  TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true)
7095  TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false)
7096  TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true)
7097  
7098  TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7099                          gen_helper_gvec_smmla_b, a, 0)
7100  TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7101                          gen_helper_gvec_usmmla_b, a, 0)
7102  TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7103                          gen_helper_gvec_ummla_b, a, 0)
7104  
7105  TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_env_arg_zzzz,
7106             gen_helper_gvec_bfdot, a, 0)
7107  TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_env_arg_zzxz,
7108             gen_helper_gvec_bfdot_idx, a)
7109  
7110  TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_env_arg_zzzz,
7111                          gen_helper_gvec_bfmmla, a, 0)
7112  
7113  static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
7114  {
7115      return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal,
7116                                a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR);
7117  }
7118  
TRANS_FEAT(BFMLALB_zzzw,aa64_sve_bf16,do_BFMLAL_zzzw,a,false)7119  TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false)
7120  TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true)
7121  
7122  static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
7123  {
7124      return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx,
7125                                a->rd, a->rn, a->rm, a->ra,
7126                                (a->index << 1) | sel, FPST_FPCR);
7127  }
7128  
TRANS_FEAT(BFMLALB_zzxw,aa64_sve_bf16,do_BFMLAL_zzxw,a,false)7129  TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
7130  TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)
7131  
7132  static bool trans_PSEL(DisasContext *s, arg_psel *a)
7133  {
7134      int vl = vec_full_reg_size(s);
7135      int pl = pred_gvec_reg_size(s);
7136      int elements = vl >> a->esz;
7137      TCGv_i64 tmp, didx, dbit;
7138      TCGv_ptr ptr;
7139  
7140      if (!dc_isar_feature(aa64_sme, s)) {
7141          return false;
7142      }
7143      if (!sve_access_check(s)) {
7144          return true;
7145      }
7146  
7147      tmp = tcg_temp_new_i64();
7148      dbit = tcg_temp_new_i64();
7149      didx = tcg_temp_new_i64();
7150      ptr = tcg_temp_new_ptr();
7151  
7152      /* Compute the predicate element. */
7153      tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm);
7154      if (is_power_of_2(elements)) {
7155          tcg_gen_andi_i64(tmp, tmp, elements - 1);
7156      } else {
7157          tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements));
7158      }
7159  
7160      /* Extract the predicate byte and bit indices. */
7161      tcg_gen_shli_i64(tmp, tmp, a->esz);
7162      tcg_gen_andi_i64(dbit, tmp, 7);
7163      tcg_gen_shri_i64(didx, tmp, 3);
7164      if (HOST_BIG_ENDIAN) {
7165          tcg_gen_xori_i64(didx, didx, 7);
7166      }
7167  
7168      /* Load the predicate word. */
7169      tcg_gen_trunc_i64_ptr(ptr, didx);
7170      tcg_gen_add_ptr(ptr, ptr, tcg_env);
7171      tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm));
7172  
7173      /* Extract the predicate bit and replicate to MO_64. */
7174      tcg_gen_shr_i64(tmp, tmp, dbit);
7175      tcg_gen_andi_i64(tmp, tmp, 1);
7176      tcg_gen_neg_i64(tmp, tmp);
7177  
7178      /* Apply to either copy the source, or write zeros. */
7179      tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd),
7180                        pred_full_reg_offset(s, a->pn), tmp, pl, pl);
7181      return true;
7182  }
7183  
gen_sclamp_i32(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_i32 a)7184  static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
7185  {
7186      tcg_gen_smax_i32(d, a, n);
7187      tcg_gen_smin_i32(d, d, m);
7188  }
7189  
gen_sclamp_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_i64 a)7190  static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
7191  {
7192      tcg_gen_smax_i64(d, a, n);
7193      tcg_gen_smin_i64(d, d, m);
7194  }
7195  
gen_sclamp_vec(unsigned vece,TCGv_vec d,TCGv_vec n,TCGv_vec m,TCGv_vec a)7196  static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
7197                             TCGv_vec m, TCGv_vec a)
7198  {
7199      tcg_gen_smax_vec(vece, d, a, n);
7200      tcg_gen_smin_vec(vece, d, d, m);
7201  }
7202  
gen_sclamp(unsigned vece,uint32_t d,uint32_t n,uint32_t m,uint32_t a,uint32_t oprsz,uint32_t maxsz)7203  static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
7204                         uint32_t a, uint32_t oprsz, uint32_t maxsz)
7205  {
7206      static const TCGOpcode vecop[] = {
7207          INDEX_op_smin_vec, INDEX_op_smax_vec, 0
7208      };
7209      static const GVecGen4 ops[4] = {
7210          { .fniv = gen_sclamp_vec,
7211            .fno  = gen_helper_gvec_sclamp_b,
7212            .opt_opc = vecop,
7213            .vece = MO_8 },
7214          { .fniv = gen_sclamp_vec,
7215            .fno  = gen_helper_gvec_sclamp_h,
7216            .opt_opc = vecop,
7217            .vece = MO_16 },
7218          { .fni4 = gen_sclamp_i32,
7219            .fniv = gen_sclamp_vec,
7220            .fno  = gen_helper_gvec_sclamp_s,
7221            .opt_opc = vecop,
7222            .vece = MO_32 },
7223          { .fni8 = gen_sclamp_i64,
7224            .fniv = gen_sclamp_vec,
7225            .fno  = gen_helper_gvec_sclamp_d,
7226            .opt_opc = vecop,
7227            .vece = MO_64,
7228            .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
7229      };
7230      tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
7231  }
7232  
TRANS_FEAT(SCLAMP,aa64_sme,gen_gvec_fn_arg_zzzz,gen_sclamp,a)7233  TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a)
7234  
7235  static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
7236  {
7237      tcg_gen_umax_i32(d, a, n);
7238      tcg_gen_umin_i32(d, d, m);
7239  }
7240  
gen_uclamp_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_i64 a)7241  static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
7242  {
7243      tcg_gen_umax_i64(d, a, n);
7244      tcg_gen_umin_i64(d, d, m);
7245  }
7246  
gen_uclamp_vec(unsigned vece,TCGv_vec d,TCGv_vec n,TCGv_vec m,TCGv_vec a)7247  static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
7248                             TCGv_vec m, TCGv_vec a)
7249  {
7250      tcg_gen_umax_vec(vece, d, a, n);
7251      tcg_gen_umin_vec(vece, d, d, m);
7252  }
7253  
gen_uclamp(unsigned vece,uint32_t d,uint32_t n,uint32_t m,uint32_t a,uint32_t oprsz,uint32_t maxsz)7254  static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
7255                         uint32_t a, uint32_t oprsz, uint32_t maxsz)
7256  {
7257      static const TCGOpcode vecop[] = {
7258          INDEX_op_umin_vec, INDEX_op_umax_vec, 0
7259      };
7260      static const GVecGen4 ops[4] = {
7261          { .fniv = gen_uclamp_vec,
7262            .fno  = gen_helper_gvec_uclamp_b,
7263            .opt_opc = vecop,
7264            .vece = MO_8 },
7265          { .fniv = gen_uclamp_vec,
7266            .fno  = gen_helper_gvec_uclamp_h,
7267            .opt_opc = vecop,
7268            .vece = MO_16 },
7269          { .fni4 = gen_uclamp_i32,
7270            .fniv = gen_uclamp_vec,
7271            .fno  = gen_helper_gvec_uclamp_s,
7272            .opt_opc = vecop,
7273            .vece = MO_32 },
7274          { .fni8 = gen_uclamp_i64,
7275            .fniv = gen_uclamp_vec,
7276            .fno  = gen_helper_gvec_uclamp_d,
7277            .opt_opc = vecop,
7278            .vece = MO_64,
7279            .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
7280      };
7281      tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
7282  }
7283  
7284  TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a)
7285