xref: /openbmc/qemu/target/arm/tcg/translate-sve.c (revision 76916dfa)
1 /*
2  * AArch64 SVE translation
3  *
4  * Copyright (c) 2018 Linaro, Ltd
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "translate.h"
22 #include "translate-a64.h"
23 #include "fpu/softfloat.h"
24 
25 
26 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
27                          TCGv_i64, uint32_t, uint32_t);
28 
29 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
30                                      TCGv_ptr, TCGv_i32);
31 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
32                                      TCGv_ptr, TCGv_ptr, TCGv_i32);
33 
34 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
35 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
36                                          TCGv_ptr, TCGv_i64, TCGv_i32);
37 
38 /*
39  * Helpers for extracting complex instruction fields.
40  */
41 
42 /* See e.g. ASR (immediate, predicated).
43  * Returns -1 for unallocated encoding; diagnose later.
44  */
45 static int tszimm_esz(DisasContext *s, int x)
46 {
47     x >>= 3;  /* discard imm3 */
48     return 31 - clz32(x);
49 }
50 
51 static int tszimm_shr(DisasContext *s, int x)
52 {
53     /*
54      * We won't use the tszimm_shr() value if tszimm_esz() returns -1 (the
55      * trans function will check for esz < 0), so we can return any
56      * value we like from here in that case as long as we avoid UB.
57      */
58     int esz = tszimm_esz(s, x);
59     if (esz < 0) {
60         return esz;
61     }
62     return (16 << esz) - x;
63 }
64 
65 /* See e.g. LSL (immediate, predicated).  */
66 static int tszimm_shl(DisasContext *s, int x)
67 {
68     /* As with tszimm_shr(), value will be unused if esz < 0 */
69     int esz = tszimm_esz(s, x);
70     if (esz < 0) {
71         return esz;
72     }
73     return x - (8 << esz);
74 }
75 
76 /* The SH bit is in bit 8.  Extract the low 8 and shift.  */
77 static inline int expand_imm_sh8s(DisasContext *s, int x)
78 {
79     return (int8_t)x << (x & 0x100 ? 8 : 0);
80 }
81 
82 static inline int expand_imm_sh8u(DisasContext *s, int x)
83 {
84     return (uint8_t)x << (x & 0x100 ? 8 : 0);
85 }
86 
87 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
88  * with unsigned data.  C.f. SVE Memory Contiguous Load Group.
89  */
90 static inline int msz_dtype(DisasContext *s, int msz)
91 {
92     static const uint8_t dtype[4] = { 0, 5, 10, 15 };
93     return dtype[msz];
94 }
95 
96 /*
97  * Include the generated decoder.
98  */
99 
100 #include "decode-sve.c.inc"
101 
102 /*
103  * Implement all of the translator functions referenced by the decoder.
104  */
105 
106 /* Invoke an out-of-line helper on 2 Zregs. */
107 static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
108                             int rd, int rn, int data)
109 {
110     if (fn == NULL) {
111         return false;
112     }
113     if (sve_access_check(s)) {
114         unsigned vsz = vec_full_reg_size(s);
115         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
116                            vec_full_reg_offset(s, rn),
117                            vsz, vsz, data, fn);
118     }
119     return true;
120 }
121 
122 static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
123                              int rd, int rn, int data,
124                              ARMFPStatusFlavour flavour)
125 {
126     if (fn == NULL) {
127         return false;
128     }
129     if (sve_access_check(s)) {
130         unsigned vsz = vec_full_reg_size(s);
131         TCGv_ptr status = fpstatus_ptr(flavour);
132 
133         tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
134                            vec_full_reg_offset(s, rn),
135                            status, vsz, vsz, data, fn);
136     }
137     return true;
138 }
139 
140 static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
141                                  arg_rr_esz *a, int data)
142 {
143     return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data,
144                             a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
145 }
146 
147 /* Invoke an out-of-line helper on 3 Zregs. */
148 static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
149                              int rd, int rn, int rm, int data)
150 {
151     if (fn == NULL) {
152         return false;
153     }
154     if (sve_access_check(s)) {
155         unsigned vsz = vec_full_reg_size(s);
156         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
157                            vec_full_reg_offset(s, rn),
158                            vec_full_reg_offset(s, rm),
159                            vsz, vsz, data, fn);
160     }
161     return true;
162 }
163 
164 static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
165                                  arg_rrr_esz *a, int data)
166 {
167     return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
168 }
169 
170 /* Invoke an out-of-line helper on 3 Zregs, plus float_status. */
171 static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
172                               int rd, int rn, int rm,
173                               int data, ARMFPStatusFlavour flavour)
174 {
175     if (fn == NULL) {
176         return false;
177     }
178     if (sve_access_check(s)) {
179         unsigned vsz = vec_full_reg_size(s);
180         TCGv_ptr status = fpstatus_ptr(flavour);
181 
182         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
183                            vec_full_reg_offset(s, rn),
184                            vec_full_reg_offset(s, rm),
185                            status, vsz, vsz, data, fn);
186     }
187     return true;
188 }
189 
190 static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
191                                   arg_rrr_esz *a, int data)
192 {
193     return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
194                              a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
195 }
196 
197 /* Invoke an out-of-line helper on 4 Zregs. */
198 static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
199                               int rd, int rn, int rm, int ra, int data)
200 {
201     if (fn == NULL) {
202         return false;
203     }
204     if (sve_access_check(s)) {
205         unsigned vsz = vec_full_reg_size(s);
206         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
207                            vec_full_reg_offset(s, rn),
208                            vec_full_reg_offset(s, rm),
209                            vec_full_reg_offset(s, ra),
210                            vsz, vsz, data, fn);
211     }
212     return true;
213 }
214 
215 static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
216                                   arg_rrrr_esz *a, int data)
217 {
218     return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
219 }
220 
221 static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn,
222                                   arg_rrxr_esz *a)
223 {
224     return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
225 }
226 
227 /* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */
228 static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
229                               int rd, int rn, int rm, int ra,
230                               int data, TCGv_ptr ptr)
231 {
232     if (fn == NULL) {
233         return false;
234     }
235     if (sve_access_check(s)) {
236         unsigned vsz = vec_full_reg_size(s);
237         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
238                            vec_full_reg_offset(s, rn),
239                            vec_full_reg_offset(s, rm),
240                            vec_full_reg_offset(s, ra),
241                            ptr, vsz, vsz, data, fn);
242     }
243     return true;
244 }
245 
246 static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
247                                int rd, int rn, int rm, int ra,
248                                int data, ARMFPStatusFlavour flavour)
249 {
250     TCGv_ptr status = fpstatus_ptr(flavour);
251     bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status);
252     return ret;
253 }
254 
255 /* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */
256 static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn,
257                                 int rd, int rn, int rm, int ra, int pg,
258                                 int data, ARMFPStatusFlavour flavour)
259 {
260     if (fn == NULL) {
261         return false;
262     }
263     if (sve_access_check(s)) {
264         unsigned vsz = vec_full_reg_size(s);
265         TCGv_ptr status = fpstatus_ptr(flavour);
266 
267         tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd),
268                            vec_full_reg_offset(s, rn),
269                            vec_full_reg_offset(s, rm),
270                            vec_full_reg_offset(s, ra),
271                            pred_full_reg_offset(s, pg),
272                            status, vsz, vsz, data, fn);
273     }
274     return true;
275 }
276 
277 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */
278 static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
279                              int rd, int rn, int pg, int data)
280 {
281     if (fn == NULL) {
282         return false;
283     }
284     if (sve_access_check(s)) {
285         unsigned vsz = vec_full_reg_size(s);
286         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
287                            vec_full_reg_offset(s, rn),
288                            pred_full_reg_offset(s, pg),
289                            vsz, vsz, data, fn);
290     }
291     return true;
292 }
293 
294 static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn,
295                                  arg_rpr_esz *a, int data)
296 {
297     return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data);
298 }
299 
300 static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn,
301                                   arg_rpri_esz *a)
302 {
303     return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
304 }
305 
306 static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn,
307                               int rd, int rn, int pg, int data,
308                               ARMFPStatusFlavour flavour)
309 {
310     if (fn == NULL) {
311         return false;
312     }
313     if (sve_access_check(s)) {
314         unsigned vsz = vec_full_reg_size(s);
315         TCGv_ptr status = fpstatus_ptr(flavour);
316 
317         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
318                            vec_full_reg_offset(s, rn),
319                            pred_full_reg_offset(s, pg),
320                            status, vsz, vsz, data, fn);
321     }
322     return true;
323 }
324 
325 static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
326                                   arg_rpr_esz *a, int data,
327                                   ARMFPStatusFlavour flavour)
328 {
329     return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour);
330 }
331 
332 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */
333 static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
334                               int rd, int rn, int rm, int pg, int data)
335 {
336     if (fn == NULL) {
337         return false;
338     }
339     if (sve_access_check(s)) {
340         unsigned vsz = vec_full_reg_size(s);
341         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
342                            vec_full_reg_offset(s, rn),
343                            vec_full_reg_offset(s, rm),
344                            pred_full_reg_offset(s, pg),
345                            vsz, vsz, data, fn);
346     }
347     return true;
348 }
349 
350 static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn,
351                                   arg_rprr_esz *a, int data)
352 {
353     return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data);
354 }
355 
356 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */
357 static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn,
358                                int rd, int rn, int rm, int pg, int data,
359                                ARMFPStatusFlavour flavour)
360 {
361     if (fn == NULL) {
362         return false;
363     }
364     if (sve_access_check(s)) {
365         unsigned vsz = vec_full_reg_size(s);
366         TCGv_ptr status = fpstatus_ptr(flavour);
367 
368         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
369                            vec_full_reg_offset(s, rn),
370                            vec_full_reg_offset(s, rm),
371                            pred_full_reg_offset(s, pg),
372                            status, vsz, vsz, data, fn);
373     }
374     return true;
375 }
376 
377 static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
378                                    arg_rprr_esz *a)
379 {
380     return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0,
381                               a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
382 }
383 
384 /* Invoke a vector expander on two Zregs and an immediate.  */
385 static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
386                             int esz, int rd, int rn, uint64_t imm)
387 {
388     if (gvec_fn == NULL) {
389         return false;
390     }
391     if (sve_access_check(s)) {
392         unsigned vsz = vec_full_reg_size(s);
393         gvec_fn(esz, vec_full_reg_offset(s, rd),
394                 vec_full_reg_offset(s, rn), imm, vsz, vsz);
395     }
396     return true;
397 }
398 
399 static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
400                                 arg_rri_esz *a)
401 {
402     if (a->esz < 0) {
403         /* Invalid tsz encoding -- see tszimm_esz. */
404         return false;
405     }
406     return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm);
407 }
408 
409 /* Invoke a vector expander on three Zregs.  */
410 static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
411                             int esz, int rd, int rn, int rm)
412 {
413     if (gvec_fn == NULL) {
414         return false;
415     }
416     if (sve_access_check(s)) {
417         unsigned vsz = vec_full_reg_size(s);
418         gvec_fn(esz, vec_full_reg_offset(s, rd),
419                 vec_full_reg_offset(s, rn),
420                 vec_full_reg_offset(s, rm), vsz, vsz);
421     }
422     return true;
423 }
424 
425 static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn,
426                                 arg_rrr_esz *a)
427 {
428     return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
429 }
430 
431 /* Invoke a vector expander on four Zregs.  */
432 static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
433                                  arg_rrrr_esz *a)
434 {
435     if (gvec_fn == NULL) {
436         return false;
437     }
438     if (sve_access_check(s)) {
439         unsigned vsz = vec_full_reg_size(s);
440         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
441                 vec_full_reg_offset(s, a->rn),
442                 vec_full_reg_offset(s, a->rm),
443                 vec_full_reg_offset(s, a->ra), vsz, vsz);
444     }
445     return true;
446 }
447 
448 /* Invoke a vector move on two Zregs.  */
449 static bool do_mov_z(DisasContext *s, int rd, int rn)
450 {
451     if (sve_access_check(s)) {
452         unsigned vsz = vec_full_reg_size(s);
453         tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd),
454                          vec_full_reg_offset(s, rn), vsz, vsz);
455     }
456     return true;
457 }
458 
459 /* Initialize a Zreg with replications of a 64-bit immediate.  */
460 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
461 {
462     unsigned vsz = vec_full_reg_size(s);
463     tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
464 }
465 
466 /* Invoke a vector expander on three Pregs.  */
467 static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
468                             int rd, int rn, int rm)
469 {
470     if (sve_access_check(s)) {
471         unsigned psz = pred_gvec_reg_size(s);
472         gvec_fn(MO_64, pred_full_reg_offset(s, rd),
473                 pred_full_reg_offset(s, rn),
474                 pred_full_reg_offset(s, rm), psz, psz);
475     }
476     return true;
477 }
478 
479 /* Invoke a vector move on two Pregs.  */
480 static bool do_mov_p(DisasContext *s, int rd, int rn)
481 {
482     if (sve_access_check(s)) {
483         unsigned psz = pred_gvec_reg_size(s);
484         tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
485                          pred_full_reg_offset(s, rn), psz, psz);
486     }
487     return true;
488 }
489 
490 /* Set the cpu flags as per a return from an SVE helper.  */
491 static void do_pred_flags(TCGv_i32 t)
492 {
493     tcg_gen_mov_i32(cpu_NF, t);
494     tcg_gen_andi_i32(cpu_ZF, t, 2);
495     tcg_gen_andi_i32(cpu_CF, t, 1);
496     tcg_gen_movi_i32(cpu_VF, 0);
497 }
498 
499 /* Subroutines computing the ARM PredTest psuedofunction.  */
500 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
501 {
502     TCGv_i32 t = tcg_temp_new_i32();
503 
504     gen_helper_sve_predtest1(t, d, g);
505     do_pred_flags(t);
506 }
507 
508 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
509 {
510     TCGv_ptr dptr = tcg_temp_new_ptr();
511     TCGv_ptr gptr = tcg_temp_new_ptr();
512     TCGv_i32 t = tcg_temp_new_i32();
513 
514     tcg_gen_addi_ptr(dptr, tcg_env, dofs);
515     tcg_gen_addi_ptr(gptr, tcg_env, gofs);
516 
517     gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words));
518 
519     do_pred_flags(t);
520 }
521 
522 /* For each element size, the bits within a predicate word that are active.  */
523 const uint64_t pred_esz_masks[5] = {
524     0xffffffffffffffffull, 0x5555555555555555ull,
525     0x1111111111111111ull, 0x0101010101010101ull,
526     0x0001000100010001ull,
527 };
528 
529 static bool trans_INVALID(DisasContext *s, arg_INVALID *a)
530 {
531     unallocated_encoding(s);
532     return true;
533 }
534 
535 /*
536  *** SVE Logical - Unpredicated Group
537  */
538 
539 TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a)
540 TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a)
541 TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a)
542 TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a)
543 
544 static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
545 {
546     if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
547         return false;
548     }
549     if (sve_access_check(s)) {
550         unsigned vsz = vec_full_reg_size(s);
551         gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
552                      vec_full_reg_offset(s, a->rn),
553                      vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
554     }
555     return true;
556 }
557 
558 TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_eor3, a)
559 TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_bcax, a)
560 
561 static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
562                     uint32_t a, uint32_t oprsz, uint32_t maxsz)
563 {
564     /* BSL differs from the generic bitsel in argument ordering. */
565     tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
566 }
567 
568 TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a)
569 
570 static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
571 {
572     tcg_gen_andc_i64(n, k, n);
573     tcg_gen_andc_i64(m, m, k);
574     tcg_gen_or_i64(d, n, m);
575 }
576 
577 static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
578                           TCGv_vec m, TCGv_vec k)
579 {
580     if (TCG_TARGET_HAS_bitsel_vec) {
581         tcg_gen_not_vec(vece, n, n);
582         tcg_gen_bitsel_vec(vece, d, k, n, m);
583     } else {
584         tcg_gen_andc_vec(vece, n, k, n);
585         tcg_gen_andc_vec(vece, m, m, k);
586         tcg_gen_or_vec(vece, d, n, m);
587     }
588 }
589 
590 static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
591                       uint32_t a, uint32_t oprsz, uint32_t maxsz)
592 {
593     static const GVecGen4 op = {
594         .fni8 = gen_bsl1n_i64,
595         .fniv = gen_bsl1n_vec,
596         .fno = gen_helper_sve2_bsl1n,
597         .vece = MO_64,
598         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
599     };
600     tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
601 }
602 
603 TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a)
604 
605 static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
606 {
607     /*
608      * Z[dn] = (n & k) | (~m & ~k)
609      *       =         | ~(m | k)
610      */
611     tcg_gen_and_i64(n, n, k);
612     if (TCG_TARGET_HAS_orc_i64) {
613         tcg_gen_or_i64(m, m, k);
614         tcg_gen_orc_i64(d, n, m);
615     } else {
616         tcg_gen_nor_i64(m, m, k);
617         tcg_gen_or_i64(d, n, m);
618     }
619 }
620 
621 static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
622                           TCGv_vec m, TCGv_vec k)
623 {
624     if (TCG_TARGET_HAS_bitsel_vec) {
625         tcg_gen_not_vec(vece, m, m);
626         tcg_gen_bitsel_vec(vece, d, k, n, m);
627     } else {
628         tcg_gen_and_vec(vece, n, n, k);
629         tcg_gen_or_vec(vece, m, m, k);
630         tcg_gen_orc_vec(vece, d, n, m);
631     }
632 }
633 
634 static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
635                       uint32_t a, uint32_t oprsz, uint32_t maxsz)
636 {
637     static const GVecGen4 op = {
638         .fni8 = gen_bsl2n_i64,
639         .fniv = gen_bsl2n_vec,
640         .fno = gen_helper_sve2_bsl2n,
641         .vece = MO_64,
642         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
643     };
644     tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
645 }
646 
647 TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a)
648 
649 static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
650 {
651     tcg_gen_and_i64(n, n, k);
652     tcg_gen_andc_i64(m, m, k);
653     tcg_gen_nor_i64(d, n, m);
654 }
655 
656 static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
657                           TCGv_vec m, TCGv_vec k)
658 {
659     tcg_gen_bitsel_vec(vece, d, k, n, m);
660     tcg_gen_not_vec(vece, d, d);
661 }
662 
663 static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
664                      uint32_t a, uint32_t oprsz, uint32_t maxsz)
665 {
666     static const GVecGen4 op = {
667         .fni8 = gen_nbsl_i64,
668         .fniv = gen_nbsl_vec,
669         .fno = gen_helper_sve2_nbsl,
670         .vece = MO_64,
671         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
672     };
673     tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
674 }
675 
676 TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a)
677 
678 /*
679  *** SVE Integer Arithmetic - Unpredicated Group
680  */
681 
682 TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a)
683 TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a)
684 TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a)
685 TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a)
686 TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a)
687 TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a)
688 
689 /*
690  *** SVE Integer Arithmetic - Binary Predicated Group
691  */
692 
693 /* Select active elememnts from Zn and inactive elements from Zm,
694  * storing the result in Zd.
695  */
696 static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
697 {
698     static gen_helper_gvec_4 * const fns[4] = {
699         gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
700         gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
701     };
702     return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
703 }
704 
705 #define DO_ZPZZ(NAME, FEAT, name) \
706     static gen_helper_gvec_4 * const name##_zpzz_fns[4] = {               \
707         gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h,           \
708         gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d,           \
709     };                                                                    \
710     TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz,                         \
711                name##_zpzz_fns[a->esz], a, 0)
712 
713 DO_ZPZZ(AND_zpzz, aa64_sve, sve_and)
714 DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor)
715 DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr)
716 DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic)
717 
718 DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add)
719 DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub)
720 
721 DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax)
722 DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax)
723 DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin)
724 DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin)
725 DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd)
726 DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd)
727 
728 DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul)
729 DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh)
730 DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh)
731 
732 DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr)
733 DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr)
734 DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl)
735 
736 static gen_helper_gvec_4 * const sdiv_fns[4] = {
737     NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
738 };
739 TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0)
740 
741 static gen_helper_gvec_4 * const udiv_fns[4] = {
742     NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
743 };
744 TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0)
745 
746 TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz)
747 
748 /*
749  *** SVE Integer Arithmetic - Unary Predicated Group
750  */
751 
752 #define DO_ZPZ(NAME, FEAT, name) \
753     static gen_helper_gvec_3 * const name##_fns[4] = {              \
754         gen_helper_##name##_b, gen_helper_##name##_h,               \
755         gen_helper_##name##_s, gen_helper_##name##_d,               \
756     };                                                              \
757     TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0)
758 
759 DO_ZPZ(CLS, aa64_sve, sve_cls)
760 DO_ZPZ(CLZ, aa64_sve, sve_clz)
761 DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz)
762 DO_ZPZ(CNOT, aa64_sve, sve_cnot)
763 DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz)
764 DO_ZPZ(ABS, aa64_sve, sve_abs)
765 DO_ZPZ(NEG, aa64_sve, sve_neg)
766 DO_ZPZ(RBIT, aa64_sve, sve_rbit)
767 
768 static gen_helper_gvec_3 * const fabs_fns[4] = {
769     NULL,                  gen_helper_sve_fabs_h,
770     gen_helper_sve_fabs_s, gen_helper_sve_fabs_d,
771 };
772 TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0)
773 
774 static gen_helper_gvec_3 * const fneg_fns[4] = {
775     NULL,                  gen_helper_sve_fneg_h,
776     gen_helper_sve_fneg_s, gen_helper_sve_fneg_d,
777 };
778 TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0)
779 
780 static gen_helper_gvec_3 * const sxtb_fns[4] = {
781     NULL,                  gen_helper_sve_sxtb_h,
782     gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d,
783 };
784 TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0)
785 
786 static gen_helper_gvec_3 * const uxtb_fns[4] = {
787     NULL,                  gen_helper_sve_uxtb_h,
788     gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d,
789 };
790 TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0)
791 
792 static gen_helper_gvec_3 * const sxth_fns[4] = {
793     NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d
794 };
795 TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0)
796 
797 static gen_helper_gvec_3 * const uxth_fns[4] = {
798     NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d
799 };
800 TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0)
801 
802 TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz,
803            a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0)
804 TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz,
805            a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0)
806 
807 /*
808  *** SVE Integer Reduction Group
809  */
810 
811 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
812 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
813                        gen_helper_gvec_reduc *fn)
814 {
815     unsigned vsz = vec_full_reg_size(s);
816     TCGv_ptr t_zn, t_pg;
817     TCGv_i32 desc;
818     TCGv_i64 temp;
819 
820     if (fn == NULL) {
821         return false;
822     }
823     if (!sve_access_check(s)) {
824         return true;
825     }
826 
827     desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
828     temp = tcg_temp_new_i64();
829     t_zn = tcg_temp_new_ptr();
830     t_pg = tcg_temp_new_ptr();
831 
832     tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn));
833     tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
834     fn(temp, t_zn, t_pg, desc);
835 
836     write_fp_dreg(s, a->rd, temp);
837     return true;
838 }
839 
840 #define DO_VPZ(NAME, name) \
841     static gen_helper_gvec_reduc * const name##_fns[4] = {               \
842         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
843         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
844     };                                                                   \
845     TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz])
846 
847 DO_VPZ(ORV, orv)
848 DO_VPZ(ANDV, andv)
849 DO_VPZ(EORV, eorv)
850 
851 DO_VPZ(UADDV, uaddv)
852 DO_VPZ(SMAXV, smaxv)
853 DO_VPZ(UMAXV, umaxv)
854 DO_VPZ(SMINV, sminv)
855 DO_VPZ(UMINV, uminv)
856 
857 static gen_helper_gvec_reduc * const saddv_fns[4] = {
858     gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
859     gen_helper_sve_saddv_s, NULL
860 };
861 TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz])
862 
863 #undef DO_VPZ
864 
865 /*
866  *** SVE Shift by Immediate - Predicated Group
867  */
868 
869 /*
870  * Copy Zn into Zd, storing zeros into inactive elements.
871  * If invert, store zeros into the active elements.
872  */
873 static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
874                         int esz, bool invert)
875 {
876     static gen_helper_gvec_3 * const fns[4] = {
877         gen_helper_sve_movz_b, gen_helper_sve_movz_h,
878         gen_helper_sve_movz_s, gen_helper_sve_movz_d,
879     };
880     return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
881 }
882 
883 static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr,
884                           gen_helper_gvec_3 * const fns[4])
885 {
886     int max;
887 
888     if (a->esz < 0) {
889         /* Invalid tsz encoding -- see tszimm_esz. */
890         return false;
891     }
892 
893     /*
894      * Shift by element size is architecturally valid.
895      * For arithmetic right-shift, it's the same as by one less.
896      * For logical shifts and ASRD, it is a zeroing operation.
897      */
898     max = 8 << a->esz;
899     if (a->imm >= max) {
900         if (asr) {
901             a->imm = max - 1;
902         } else {
903             return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
904         }
905     }
906     return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
907 }
908 
909 static gen_helper_gvec_3 * const asr_zpzi_fns[4] = {
910     gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
911     gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
912 };
913 TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns)
914 
915 static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = {
916     gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
917     gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
918 };
919 TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns)
920 
921 static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = {
922     gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
923     gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
924 };
925 TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns)
926 
927 static gen_helper_gvec_3 * const asrd_fns[4] = {
928     gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
929     gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
930 };
931 TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns)
932 
933 static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = {
934     gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
935     gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
936 };
937 TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
938            a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a)
939 
940 static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = {
941     gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
942     gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
943 };
944 TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
945            a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a)
946 
947 static gen_helper_gvec_3 * const srshr_fns[4] = {
948     gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
949     gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
950 };
951 TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
952            a->esz < 0 ? NULL : srshr_fns[a->esz], a)
953 
954 static gen_helper_gvec_3 * const urshr_fns[4] = {
955     gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
956     gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
957 };
958 TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
959            a->esz < 0 ? NULL : urshr_fns[a->esz], a)
960 
961 static gen_helper_gvec_3 * const sqshlu_fns[4] = {
962     gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
963     gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
964 };
965 TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi,
966            a->esz < 0 ? NULL : sqshlu_fns[a->esz], a)
967 
968 /*
969  *** SVE Bitwise Shift - Predicated Group
970  */
971 
972 #define DO_ZPZW(NAME, name) \
973     static gen_helper_gvec_4 * const name##_zpzw_fns[4] = {               \
974         gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
975         gen_helper_sve_##name##_zpzw_s, NULL                              \
976     };                                                                    \
977     TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz,              \
978                a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0)
979 
980 DO_ZPZW(ASR, asr)
981 DO_ZPZW(LSR, lsr)
982 DO_ZPZW(LSL, lsl)
983 
984 #undef DO_ZPZW
985 
986 /*
987  *** SVE Bitwise Shift - Unpredicated Group
988  */
989 
990 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
991                          void (*gvec_fn)(unsigned, uint32_t, uint32_t,
992                                          int64_t, uint32_t, uint32_t))
993 {
994     if (a->esz < 0) {
995         /* Invalid tsz encoding -- see tszimm_esz. */
996         return false;
997     }
998     if (sve_access_check(s)) {
999         unsigned vsz = vec_full_reg_size(s);
1000         /* Shift by element size is architecturally valid.  For
1001            arithmetic right-shift, it's the same as by one less.
1002            Otherwise it is a zeroing operation.  */
1003         if (a->imm >= 8 << a->esz) {
1004             if (asr) {
1005                 a->imm = (8 << a->esz) - 1;
1006             } else {
1007                 do_dupi_z(s, a->rd, 0);
1008                 return true;
1009             }
1010         }
1011         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
1012                 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
1013     }
1014     return true;
1015 }
1016 
1017 TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari)
1018 TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri)
1019 TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli)
1020 
1021 #define DO_ZZW(NAME, name) \
1022     static gen_helper_gvec_3 * const name##_zzw_fns[4] = {                \
1023         gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
1024         gen_helper_sve_##name##_zzw_s, NULL                               \
1025     };                                                                    \
1026     TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz,                      \
1027                name##_zzw_fns[a->esz], a, 0)
1028 
1029 DO_ZZW(ASR_zzw, asr)
1030 DO_ZZW(LSR_zzw, lsr)
1031 DO_ZZW(LSL_zzw, lsl)
1032 
1033 #undef DO_ZZW
1034 
1035 /*
1036  *** SVE Integer Multiply-Add Group
1037  */
1038 
1039 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1040                          gen_helper_gvec_5 *fn)
1041 {
1042     if (sve_access_check(s)) {
1043         unsigned vsz = vec_full_reg_size(s);
1044         tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1045                            vec_full_reg_offset(s, a->ra),
1046                            vec_full_reg_offset(s, a->rn),
1047                            vec_full_reg_offset(s, a->rm),
1048                            pred_full_reg_offset(s, a->pg),
1049                            vsz, vsz, 0, fn);
1050     }
1051     return true;
1052 }
1053 
1054 static gen_helper_gvec_5 * const mla_fns[4] = {
1055     gen_helper_sve_mla_b, gen_helper_sve_mla_h,
1056     gen_helper_sve_mla_s, gen_helper_sve_mla_d,
1057 };
1058 TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz])
1059 
1060 static gen_helper_gvec_5 * const mls_fns[4] = {
1061     gen_helper_sve_mls_b, gen_helper_sve_mls_h,
1062     gen_helper_sve_mls_s, gen_helper_sve_mls_d,
1063 };
1064 TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz])
1065 
1066 /*
1067  *** SVE Index Generation Group
1068  */
1069 
1070 static bool do_index(DisasContext *s, int esz, int rd,
1071                      TCGv_i64 start, TCGv_i64 incr)
1072 {
1073     unsigned vsz;
1074     TCGv_i32 desc;
1075     TCGv_ptr t_zd;
1076 
1077     if (!sve_access_check(s)) {
1078         return true;
1079     }
1080 
1081     vsz = vec_full_reg_size(s);
1082     desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
1083     t_zd = tcg_temp_new_ptr();
1084 
1085     tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd));
1086     if (esz == 3) {
1087         gen_helper_sve_index_d(t_zd, start, incr, desc);
1088     } else {
1089         typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1090         static index_fn * const fns[3] = {
1091             gen_helper_sve_index_b,
1092             gen_helper_sve_index_h,
1093             gen_helper_sve_index_s,
1094         };
1095         TCGv_i32 s32 = tcg_temp_new_i32();
1096         TCGv_i32 i32 = tcg_temp_new_i32();
1097 
1098         tcg_gen_extrl_i64_i32(s32, start);
1099         tcg_gen_extrl_i64_i32(i32, incr);
1100         fns[esz](t_zd, s32, i32, desc);
1101     }
1102     return true;
1103 }
1104 
1105 TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd,
1106            tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2))
1107 TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd,
1108            tcg_constant_i64(a->imm), cpu_reg(s, a->rm))
1109 TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd,
1110            cpu_reg(s, a->rn), tcg_constant_i64(a->imm))
1111 TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd,
1112            cpu_reg(s, a->rn), cpu_reg(s, a->rm))
1113 
1114 /*
1115  *** SVE Stack Allocation Group
1116  */
1117 
1118 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
1119 {
1120     if (!dc_isar_feature(aa64_sve, s)) {
1121         return false;
1122     }
1123     if (sve_access_check(s)) {
1124         TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1125         TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1126         tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1127     }
1128     return true;
1129 }
1130 
1131 static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a)
1132 {
1133     if (!dc_isar_feature(aa64_sme, s)) {
1134         return false;
1135     }
1136     if (sme_enabled_check(s)) {
1137         TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1138         TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1139         tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s));
1140     }
1141     return true;
1142 }
1143 
1144 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
1145 {
1146     if (!dc_isar_feature(aa64_sve, s)) {
1147         return false;
1148     }
1149     if (sve_access_check(s)) {
1150         TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1151         TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1152         tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1153     }
1154     return true;
1155 }
1156 
1157 static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a)
1158 {
1159     if (!dc_isar_feature(aa64_sme, s)) {
1160         return false;
1161     }
1162     if (sme_enabled_check(s)) {
1163         TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1164         TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1165         tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s));
1166     }
1167     return true;
1168 }
1169 
1170 static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
1171 {
1172     if (!dc_isar_feature(aa64_sve, s)) {
1173         return false;
1174     }
1175     if (sve_access_check(s)) {
1176         TCGv_i64 reg = cpu_reg(s, a->rd);
1177         tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1178     }
1179     return true;
1180 }
1181 
1182 static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a)
1183 {
1184     if (!dc_isar_feature(aa64_sme, s)) {
1185         return false;
1186     }
1187     if (sme_enabled_check(s)) {
1188         TCGv_i64 reg = cpu_reg(s, a->rd);
1189         tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s));
1190     }
1191     return true;
1192 }
1193 
1194 /*
1195  *** SVE Compute Vector Address Group
1196  */
1197 
1198 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1199 {
1200     return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
1201 }
1202 
1203 TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
1204 TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
1205 TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
1206 TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
1207 
1208 /*
1209  *** SVE Integer Misc - Unpredicated Group
1210  */
1211 
1212 static gen_helper_gvec_2 * const fexpa_fns[4] = {
1213     NULL,                   gen_helper_sve_fexpa_h,
1214     gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
1215 };
1216 TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz,
1217                         fexpa_fns[a->esz], a->rd, a->rn, 0)
1218 
1219 static gen_helper_gvec_3 * const ftssel_fns[4] = {
1220     NULL,                    gen_helper_sve_ftssel_h,
1221     gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
1222 };
1223 TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz,
1224                         ftssel_fns[a->esz], a, 0)
1225 
1226 /*
1227  *** SVE Predicate Logical Operations Group
1228  */
1229 
1230 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1231                           const GVecGen4 *gvec_op)
1232 {
1233     if (!sve_access_check(s)) {
1234         return true;
1235     }
1236 
1237     unsigned psz = pred_gvec_reg_size(s);
1238     int dofs = pred_full_reg_offset(s, a->rd);
1239     int nofs = pred_full_reg_offset(s, a->rn);
1240     int mofs = pred_full_reg_offset(s, a->rm);
1241     int gofs = pred_full_reg_offset(s, a->pg);
1242 
1243     if (!a->s) {
1244         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1245         return true;
1246     }
1247 
1248     if (psz == 8) {
1249         /* Do the operation and the flags generation in temps.  */
1250         TCGv_i64 pd = tcg_temp_new_i64();
1251         TCGv_i64 pn = tcg_temp_new_i64();
1252         TCGv_i64 pm = tcg_temp_new_i64();
1253         TCGv_i64 pg = tcg_temp_new_i64();
1254 
1255         tcg_gen_ld_i64(pn, tcg_env, nofs);
1256         tcg_gen_ld_i64(pm, tcg_env, mofs);
1257         tcg_gen_ld_i64(pg, tcg_env, gofs);
1258 
1259         gvec_op->fni8(pd, pn, pm, pg);
1260         tcg_gen_st_i64(pd, tcg_env, dofs);
1261 
1262         do_predtest1(pd, pg);
1263     } else {
1264         /* The operation and flags generation is large.  The computation
1265          * of the flags depends on the original contents of the guarding
1266          * predicate.  If the destination overwrites the guarding predicate,
1267          * then the easiest way to get this right is to save a copy.
1268           */
1269         int tofs = gofs;
1270         if (a->rd == a->pg) {
1271             tofs = offsetof(CPUARMState, vfp.preg_tmp);
1272             tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1273         }
1274 
1275         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1276         do_predtest(s, dofs, tofs, psz / 8);
1277     }
1278     return true;
1279 }
1280 
1281 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1282 {
1283     tcg_gen_and_i64(pd, pn, pm);
1284     tcg_gen_and_i64(pd, pd, pg);
1285 }
1286 
1287 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1288                            TCGv_vec pm, TCGv_vec pg)
1289 {
1290     tcg_gen_and_vec(vece, pd, pn, pm);
1291     tcg_gen_and_vec(vece, pd, pd, pg);
1292 }
1293 
1294 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
1295 {
1296     static const GVecGen4 op = {
1297         .fni8 = gen_and_pg_i64,
1298         .fniv = gen_and_pg_vec,
1299         .fno = gen_helper_sve_and_pppp,
1300         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1301     };
1302 
1303     if (!dc_isar_feature(aa64_sve, s)) {
1304         return false;
1305     }
1306     if (!a->s) {
1307         if (a->rn == a->rm) {
1308             if (a->pg == a->rn) {
1309                 return do_mov_p(s, a->rd, a->rn);
1310             }
1311             return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1312         } else if (a->pg == a->rn || a->pg == a->rm) {
1313             return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1314         }
1315     }
1316     return do_pppp_flags(s, a, &op);
1317 }
1318 
1319 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1320 {
1321     tcg_gen_andc_i64(pd, pn, pm);
1322     tcg_gen_and_i64(pd, pd, pg);
1323 }
1324 
1325 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1326                            TCGv_vec pm, TCGv_vec pg)
1327 {
1328     tcg_gen_andc_vec(vece, pd, pn, pm);
1329     tcg_gen_and_vec(vece, pd, pd, pg);
1330 }
1331 
1332 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
1333 {
1334     static const GVecGen4 op = {
1335         .fni8 = gen_bic_pg_i64,
1336         .fniv = gen_bic_pg_vec,
1337         .fno = gen_helper_sve_bic_pppp,
1338         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1339     };
1340 
1341     if (!dc_isar_feature(aa64_sve, s)) {
1342         return false;
1343     }
1344     if (!a->s && a->pg == a->rn) {
1345         return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1346     }
1347     return do_pppp_flags(s, a, &op);
1348 }
1349 
1350 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1351 {
1352     tcg_gen_xor_i64(pd, pn, pm);
1353     tcg_gen_and_i64(pd, pd, pg);
1354 }
1355 
1356 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1357                            TCGv_vec pm, TCGv_vec pg)
1358 {
1359     tcg_gen_xor_vec(vece, pd, pn, pm);
1360     tcg_gen_and_vec(vece, pd, pd, pg);
1361 }
1362 
1363 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
1364 {
1365     static const GVecGen4 op = {
1366         .fni8 = gen_eor_pg_i64,
1367         .fniv = gen_eor_pg_vec,
1368         .fno = gen_helper_sve_eor_pppp,
1369         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1370     };
1371 
1372     if (!dc_isar_feature(aa64_sve, s)) {
1373         return false;
1374     }
1375     /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */
1376     if (!a->s && a->pg == a->rm) {
1377         return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn);
1378     }
1379     return do_pppp_flags(s, a, &op);
1380 }
1381 
1382 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
1383 {
1384     if (a->s || !dc_isar_feature(aa64_sve, s)) {
1385         return false;
1386     }
1387     if (sve_access_check(s)) {
1388         unsigned psz = pred_gvec_reg_size(s);
1389         tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1390                             pred_full_reg_offset(s, a->pg),
1391                             pred_full_reg_offset(s, a->rn),
1392                             pred_full_reg_offset(s, a->rm), psz, psz);
1393     }
1394     return true;
1395 }
1396 
1397 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1398 {
1399     tcg_gen_or_i64(pd, pn, pm);
1400     tcg_gen_and_i64(pd, pd, pg);
1401 }
1402 
1403 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1404                            TCGv_vec pm, TCGv_vec pg)
1405 {
1406     tcg_gen_or_vec(vece, pd, pn, pm);
1407     tcg_gen_and_vec(vece, pd, pd, pg);
1408 }
1409 
1410 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
1411 {
1412     static const GVecGen4 op = {
1413         .fni8 = gen_orr_pg_i64,
1414         .fniv = gen_orr_pg_vec,
1415         .fno = gen_helper_sve_orr_pppp,
1416         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1417     };
1418 
1419     if (!dc_isar_feature(aa64_sve, s)) {
1420         return false;
1421     }
1422     if (!a->s && a->pg == a->rn && a->rn == a->rm) {
1423         return do_mov_p(s, a->rd, a->rn);
1424     }
1425     return do_pppp_flags(s, a, &op);
1426 }
1427 
1428 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1429 {
1430     tcg_gen_orc_i64(pd, pn, pm);
1431     tcg_gen_and_i64(pd, pd, pg);
1432 }
1433 
1434 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1435                            TCGv_vec pm, TCGv_vec pg)
1436 {
1437     tcg_gen_orc_vec(vece, pd, pn, pm);
1438     tcg_gen_and_vec(vece, pd, pd, pg);
1439 }
1440 
1441 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
1442 {
1443     static const GVecGen4 op = {
1444         .fni8 = gen_orn_pg_i64,
1445         .fniv = gen_orn_pg_vec,
1446         .fno = gen_helper_sve_orn_pppp,
1447         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1448     };
1449 
1450     if (!dc_isar_feature(aa64_sve, s)) {
1451         return false;
1452     }
1453     return do_pppp_flags(s, a, &op);
1454 }
1455 
1456 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1457 {
1458     tcg_gen_or_i64(pd, pn, pm);
1459     tcg_gen_andc_i64(pd, pg, pd);
1460 }
1461 
1462 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1463                            TCGv_vec pm, TCGv_vec pg)
1464 {
1465     tcg_gen_or_vec(vece, pd, pn, pm);
1466     tcg_gen_andc_vec(vece, pd, pg, pd);
1467 }
1468 
1469 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
1470 {
1471     static const GVecGen4 op = {
1472         .fni8 = gen_nor_pg_i64,
1473         .fniv = gen_nor_pg_vec,
1474         .fno = gen_helper_sve_nor_pppp,
1475         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1476     };
1477 
1478     if (!dc_isar_feature(aa64_sve, s)) {
1479         return false;
1480     }
1481     return do_pppp_flags(s, a, &op);
1482 }
1483 
1484 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1485 {
1486     tcg_gen_and_i64(pd, pn, pm);
1487     tcg_gen_andc_i64(pd, pg, pd);
1488 }
1489 
1490 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1491                            TCGv_vec pm, TCGv_vec pg)
1492 {
1493     tcg_gen_and_vec(vece, pd, pn, pm);
1494     tcg_gen_andc_vec(vece, pd, pg, pd);
1495 }
1496 
1497 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
1498 {
1499     static const GVecGen4 op = {
1500         .fni8 = gen_nand_pg_i64,
1501         .fniv = gen_nand_pg_vec,
1502         .fno = gen_helper_sve_nand_pppp,
1503         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1504     };
1505 
1506     if (!dc_isar_feature(aa64_sve, s)) {
1507         return false;
1508     }
1509     return do_pppp_flags(s, a, &op);
1510 }
1511 
1512 /*
1513  *** SVE Predicate Misc Group
1514  */
1515 
1516 static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
1517 {
1518     if (!dc_isar_feature(aa64_sve, s)) {
1519         return false;
1520     }
1521     if (sve_access_check(s)) {
1522         int nofs = pred_full_reg_offset(s, a->rn);
1523         int gofs = pred_full_reg_offset(s, a->pg);
1524         int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1525 
1526         if (words == 1) {
1527             TCGv_i64 pn = tcg_temp_new_i64();
1528             TCGv_i64 pg = tcg_temp_new_i64();
1529 
1530             tcg_gen_ld_i64(pn, tcg_env, nofs);
1531             tcg_gen_ld_i64(pg, tcg_env, gofs);
1532             do_predtest1(pn, pg);
1533         } else {
1534             do_predtest(s, nofs, gofs, words);
1535         }
1536     }
1537     return true;
1538 }
1539 
1540 /* See the ARM pseudocode DecodePredCount.  */
1541 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1542 {
1543     unsigned elements = fullsz >> esz;
1544     unsigned bound;
1545 
1546     switch (pattern) {
1547     case 0x0: /* POW2 */
1548         return pow2floor(elements);
1549     case 0x1: /* VL1 */
1550     case 0x2: /* VL2 */
1551     case 0x3: /* VL3 */
1552     case 0x4: /* VL4 */
1553     case 0x5: /* VL5 */
1554     case 0x6: /* VL6 */
1555     case 0x7: /* VL7 */
1556     case 0x8: /* VL8 */
1557         bound = pattern;
1558         break;
1559     case 0x9: /* VL16 */
1560     case 0xa: /* VL32 */
1561     case 0xb: /* VL64 */
1562     case 0xc: /* VL128 */
1563     case 0xd: /* VL256 */
1564         bound = 16 << (pattern - 9);
1565         break;
1566     case 0x1d: /* MUL4 */
1567         return elements - elements % 4;
1568     case 0x1e: /* MUL3 */
1569         return elements - elements % 3;
1570     case 0x1f: /* ALL */
1571         return elements;
1572     default:   /* #uimm5 */
1573         return 0;
1574     }
1575     return elements >= bound ? bound : 0;
1576 }
1577 
1578 /* This handles all of the predicate initialization instructions,
1579  * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1580  * so that decode_pred_count returns 0.  For SETFFR, we will have
1581  * set RD == 16 == FFR.
1582  */
1583 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1584 {
1585     if (!sve_access_check(s)) {
1586         return true;
1587     }
1588 
1589     unsigned fullsz = vec_full_reg_size(s);
1590     unsigned ofs = pred_full_reg_offset(s, rd);
1591     unsigned numelem, setsz, i;
1592     uint64_t word, lastword;
1593     TCGv_i64 t;
1594 
1595     numelem = decode_pred_count(fullsz, pat, esz);
1596 
1597     /* Determine what we must store into each bit, and how many.  */
1598     if (numelem == 0) {
1599         lastword = word = 0;
1600         setsz = fullsz;
1601     } else {
1602         setsz = numelem << esz;
1603         lastword = word = pred_esz_masks[esz];
1604         if (setsz % 64) {
1605             lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1606         }
1607     }
1608 
1609     t = tcg_temp_new_i64();
1610     if (fullsz <= 64) {
1611         tcg_gen_movi_i64(t, lastword);
1612         tcg_gen_st_i64(t, tcg_env, ofs);
1613         goto done;
1614     }
1615 
1616     if (word == lastword) {
1617         unsigned maxsz = size_for_gvec(fullsz / 8);
1618         unsigned oprsz = size_for_gvec(setsz / 8);
1619 
1620         if (oprsz * 8 == setsz) {
1621             tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
1622             goto done;
1623         }
1624     }
1625 
1626     setsz /= 8;
1627     fullsz /= 8;
1628 
1629     tcg_gen_movi_i64(t, word);
1630     for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1631         tcg_gen_st_i64(t, tcg_env, ofs + i);
1632     }
1633     if (lastword != word) {
1634         tcg_gen_movi_i64(t, lastword);
1635         tcg_gen_st_i64(t, tcg_env, ofs + i);
1636         i += 8;
1637     }
1638     if (i < fullsz) {
1639         tcg_gen_movi_i64(t, 0);
1640         for (; i < fullsz; i += 8) {
1641             tcg_gen_st_i64(t, tcg_env, ofs + i);
1642         }
1643     }
1644 
1645  done:
1646     /* PTRUES */
1647     if (setflag) {
1648         tcg_gen_movi_i32(cpu_NF, -(word != 0));
1649         tcg_gen_movi_i32(cpu_CF, word == 0);
1650         tcg_gen_movi_i32(cpu_VF, 0);
1651         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1652     }
1653     return true;
1654 }
1655 
1656 TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
1657 
1658 /* Note pat == 31 is #all, to set all elements.  */
1659 TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve,
1660                         do_predset, 0, FFR_PRED_NUM, 31, false)
1661 
1662 /* Note pat == 32 is #unimp, to set no elements.  */
1663 TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false)
1664 
1665 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
1666 {
1667     /* The path through do_pppp_flags is complicated enough to want to avoid
1668      * duplication.  Frob the arguments into the form of a predicated AND.
1669      */
1670     arg_rprr_s alt_a = {
1671         .rd = a->rd, .pg = a->pg, .s = a->s,
1672         .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1673     };
1674 
1675     s->is_nonstreaming = true;
1676     return trans_AND_pppp(s, &alt_a);
1677 }
1678 
1679 TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
1680 TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
1681 
1682 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1683                             void (*gen_fn)(TCGv_i32, TCGv_ptr,
1684                                            TCGv_ptr, TCGv_i32))
1685 {
1686     if (!sve_access_check(s)) {
1687         return true;
1688     }
1689 
1690     TCGv_ptr t_pd = tcg_temp_new_ptr();
1691     TCGv_ptr t_pg = tcg_temp_new_ptr();
1692     TCGv_i32 t;
1693     unsigned desc = 0;
1694 
1695     desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1696     desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
1697 
1698     tcg_gen_addi_ptr(t_pd, tcg_env, pred_full_reg_offset(s, a->rd));
1699     tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->rn));
1700     t = tcg_temp_new_i32();
1701 
1702     gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc));
1703 
1704     do_pred_flags(t);
1705     return true;
1706 }
1707 
1708 TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst)
1709 TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext)
1710 
1711 /*
1712  *** SVE Element Count Group
1713  */
1714 
1715 /* Perform an inline saturating addition of a 32-bit value within
1716  * a 64-bit register.  The second operand is known to be positive,
1717  * which halves the comparisons we must perform to bound the result.
1718  */
1719 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1720 {
1721     int64_t ibound;
1722 
1723     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1724     if (u) {
1725         tcg_gen_ext32u_i64(reg, reg);
1726     } else {
1727         tcg_gen_ext32s_i64(reg, reg);
1728     }
1729     if (d) {
1730         tcg_gen_sub_i64(reg, reg, val);
1731         ibound = (u ? 0 : INT32_MIN);
1732         tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound));
1733     } else {
1734         tcg_gen_add_i64(reg, reg, val);
1735         ibound = (u ? UINT32_MAX : INT32_MAX);
1736         tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound));
1737     }
1738 }
1739 
1740 /* Similarly with 64-bit values.  */
1741 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1742 {
1743     TCGv_i64 t0 = tcg_temp_new_i64();
1744     TCGv_i64 t2;
1745 
1746     if (u) {
1747         if (d) {
1748             tcg_gen_sub_i64(t0, reg, val);
1749             t2 = tcg_constant_i64(0);
1750             tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
1751         } else {
1752             tcg_gen_add_i64(t0, reg, val);
1753             t2 = tcg_constant_i64(-1);
1754             tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
1755         }
1756     } else {
1757         TCGv_i64 t1 = tcg_temp_new_i64();
1758         if (d) {
1759             /* Detect signed overflow for subtraction.  */
1760             tcg_gen_xor_i64(t0, reg, val);
1761             tcg_gen_sub_i64(t1, reg, val);
1762             tcg_gen_xor_i64(reg, reg, t1);
1763             tcg_gen_and_i64(t0, t0, reg);
1764 
1765             /* Bound the result.  */
1766             tcg_gen_movi_i64(reg, INT64_MIN);
1767             t2 = tcg_constant_i64(0);
1768             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1769         } else {
1770             /* Detect signed overflow for addition.  */
1771             tcg_gen_xor_i64(t0, reg, val);
1772             tcg_gen_add_i64(reg, reg, val);
1773             tcg_gen_xor_i64(t1, reg, val);
1774             tcg_gen_andc_i64(t0, t1, t0);
1775 
1776             /* Bound the result.  */
1777             tcg_gen_movi_i64(t1, INT64_MAX);
1778             t2 = tcg_constant_i64(0);
1779             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1780         }
1781     }
1782 }
1783 
1784 /* Similarly with a vector and a scalar operand.  */
1785 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1786                               TCGv_i64 val, bool u, bool d)
1787 {
1788     unsigned vsz = vec_full_reg_size(s);
1789     TCGv_ptr dptr, nptr;
1790     TCGv_i32 t32, desc;
1791     TCGv_i64 t64;
1792 
1793     dptr = tcg_temp_new_ptr();
1794     nptr = tcg_temp_new_ptr();
1795     tcg_gen_addi_ptr(dptr, tcg_env, vec_full_reg_offset(s, rd));
1796     tcg_gen_addi_ptr(nptr, tcg_env, vec_full_reg_offset(s, rn));
1797     desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
1798 
1799     switch (esz) {
1800     case MO_8:
1801         t32 = tcg_temp_new_i32();
1802         tcg_gen_extrl_i64_i32(t32, val);
1803         if (d) {
1804             tcg_gen_neg_i32(t32, t32);
1805         }
1806         if (u) {
1807             gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1808         } else {
1809             gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1810         }
1811         break;
1812 
1813     case MO_16:
1814         t32 = tcg_temp_new_i32();
1815         tcg_gen_extrl_i64_i32(t32, val);
1816         if (d) {
1817             tcg_gen_neg_i32(t32, t32);
1818         }
1819         if (u) {
1820             gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1821         } else {
1822             gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1823         }
1824         break;
1825 
1826     case MO_32:
1827         t64 = tcg_temp_new_i64();
1828         if (d) {
1829             tcg_gen_neg_i64(t64, val);
1830         } else {
1831             tcg_gen_mov_i64(t64, val);
1832         }
1833         if (u) {
1834             gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1835         } else {
1836             gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1837         }
1838         break;
1839 
1840     case MO_64:
1841         if (u) {
1842             if (d) {
1843                 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1844             } else {
1845                 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1846             }
1847         } else if (d) {
1848             t64 = tcg_temp_new_i64();
1849             tcg_gen_neg_i64(t64, val);
1850             gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1851         } else {
1852             gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1853         }
1854         break;
1855 
1856     default:
1857         g_assert_not_reached();
1858     }
1859 }
1860 
1861 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
1862 {
1863     if (!dc_isar_feature(aa64_sve, s)) {
1864         return false;
1865     }
1866     if (sve_access_check(s)) {
1867         unsigned fullsz = vec_full_reg_size(s);
1868         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1869         tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1870     }
1871     return true;
1872 }
1873 
1874 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
1875 {
1876     if (!dc_isar_feature(aa64_sve, s)) {
1877         return false;
1878     }
1879     if (sve_access_check(s)) {
1880         unsigned fullsz = vec_full_reg_size(s);
1881         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1882         int inc = numelem * a->imm * (a->d ? -1 : 1);
1883         TCGv_i64 reg = cpu_reg(s, a->rd);
1884 
1885         tcg_gen_addi_i64(reg, reg, inc);
1886     }
1887     return true;
1888 }
1889 
1890 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
1891 {
1892     if (!dc_isar_feature(aa64_sve, s)) {
1893         return false;
1894     }
1895     if (!sve_access_check(s)) {
1896         return true;
1897     }
1898 
1899     unsigned fullsz = vec_full_reg_size(s);
1900     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1901     int inc = numelem * a->imm;
1902     TCGv_i64 reg = cpu_reg(s, a->rd);
1903 
1904     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1905     if (inc == 0) {
1906         if (a->u) {
1907             tcg_gen_ext32u_i64(reg, reg);
1908         } else {
1909             tcg_gen_ext32s_i64(reg, reg);
1910         }
1911     } else {
1912         do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d);
1913     }
1914     return true;
1915 }
1916 
1917 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
1918 {
1919     if (!dc_isar_feature(aa64_sve, s)) {
1920         return false;
1921     }
1922     if (!sve_access_check(s)) {
1923         return true;
1924     }
1925 
1926     unsigned fullsz = vec_full_reg_size(s);
1927     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1928     int inc = numelem * a->imm;
1929     TCGv_i64 reg = cpu_reg(s, a->rd);
1930 
1931     if (inc != 0) {
1932         do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d);
1933     }
1934     return true;
1935 }
1936 
1937 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1938 {
1939     if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
1940         return false;
1941     }
1942 
1943     unsigned fullsz = vec_full_reg_size(s);
1944     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1945     int inc = numelem * a->imm;
1946 
1947     if (inc != 0) {
1948         if (sve_access_check(s)) {
1949             tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1950                               vec_full_reg_offset(s, a->rn),
1951                               tcg_constant_i64(a->d ? -inc : inc),
1952                               fullsz, fullsz);
1953         }
1954     } else {
1955         do_mov_z(s, a->rd, a->rn);
1956     }
1957     return true;
1958 }
1959 
1960 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1961 {
1962     if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
1963         return false;
1964     }
1965 
1966     unsigned fullsz = vec_full_reg_size(s);
1967     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1968     int inc = numelem * a->imm;
1969 
1970     if (inc != 0) {
1971         if (sve_access_check(s)) {
1972             do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
1973                               tcg_constant_i64(inc), a->u, a->d);
1974         }
1975     } else {
1976         do_mov_z(s, a->rd, a->rn);
1977     }
1978     return true;
1979 }
1980 
1981 /*
1982  *** SVE Bitwise Immediate Group
1983  */
1984 
1985 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1986 {
1987     uint64_t imm;
1988     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1989                                 extract32(a->dbm, 0, 6),
1990                                 extract32(a->dbm, 6, 6))) {
1991         return false;
1992     }
1993     return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm);
1994 }
1995 
1996 TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi)
1997 TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori)
1998 TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori)
1999 
2000 static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
2001 {
2002     uint64_t imm;
2003 
2004     if (!dc_isar_feature(aa64_sve, s)) {
2005         return false;
2006     }
2007     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2008                                 extract32(a->dbm, 0, 6),
2009                                 extract32(a->dbm, 6, 6))) {
2010         return false;
2011     }
2012     if (sve_access_check(s)) {
2013         do_dupi_z(s, a->rd, imm);
2014     }
2015     return true;
2016 }
2017 
2018 /*
2019  *** SVE Integer Wide Immediate - Predicated Group
2020  */
2021 
2022 /* Implement all merging copies.  This is used for CPY (immediate),
2023  * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2024  */
2025 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2026                      TCGv_i64 val)
2027 {
2028     typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2029     static gen_cpy * const fns[4] = {
2030         gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2031         gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2032     };
2033     unsigned vsz = vec_full_reg_size(s);
2034     TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
2035     TCGv_ptr t_zd = tcg_temp_new_ptr();
2036     TCGv_ptr t_zn = tcg_temp_new_ptr();
2037     TCGv_ptr t_pg = tcg_temp_new_ptr();
2038 
2039     tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd));
2040     tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, rn));
2041     tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
2042 
2043     fns[esz](t_zd, t_zn, t_pg, val, desc);
2044 }
2045 
2046 static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
2047 {
2048     if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
2049         return false;
2050     }
2051     if (sve_access_check(s)) {
2052         /* Decode the VFP immediate.  */
2053         uint64_t imm = vfp_expand_imm(a->esz, a->imm);
2054         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm));
2055     }
2056     return true;
2057 }
2058 
2059 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
2060 {
2061     if (!dc_isar_feature(aa64_sve, s)) {
2062         return false;
2063     }
2064     if (sve_access_check(s)) {
2065         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm));
2066     }
2067     return true;
2068 }
2069 
2070 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
2071 {
2072     static gen_helper_gvec_2i * const fns[4] = {
2073         gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2074         gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2075     };
2076 
2077     if (!dc_isar_feature(aa64_sve, s)) {
2078         return false;
2079     }
2080     if (sve_access_check(s)) {
2081         unsigned vsz = vec_full_reg_size(s);
2082         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2083                             pred_full_reg_offset(s, a->pg),
2084                             tcg_constant_i64(a->imm),
2085                             vsz, vsz, 0, fns[a->esz]);
2086     }
2087     return true;
2088 }
2089 
2090 /*
2091  *** SVE Permute Extract Group
2092  */
2093 
2094 static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
2095 {
2096     if (!sve_access_check(s)) {
2097         return true;
2098     }
2099 
2100     unsigned vsz = vec_full_reg_size(s);
2101     unsigned n_ofs = imm >= vsz ? 0 : imm;
2102     unsigned n_siz = vsz - n_ofs;
2103     unsigned d = vec_full_reg_offset(s, rd);
2104     unsigned n = vec_full_reg_offset(s, rn);
2105     unsigned m = vec_full_reg_offset(s, rm);
2106 
2107     /* Use host vector move insns if we have appropriate sizes
2108      * and no unfortunate overlap.
2109      */
2110     if (m != d
2111         && n_ofs == size_for_gvec(n_ofs)
2112         && n_siz == size_for_gvec(n_siz)
2113         && (d != n || n_siz <= n_ofs)) {
2114         tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2115         if (n_ofs != 0) {
2116             tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2117         }
2118     } else {
2119         tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2120     }
2121     return true;
2122 }
2123 
2124 TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm)
2125 TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm)
2126 
2127 /*
2128  *** SVE Permute - Unpredicated Group
2129  */
2130 
2131 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
2132 {
2133     if (!dc_isar_feature(aa64_sve, s)) {
2134         return false;
2135     }
2136     if (sve_access_check(s)) {
2137         unsigned vsz = vec_full_reg_size(s);
2138         tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2139                              vsz, vsz, cpu_reg_sp(s, a->rn));
2140     }
2141     return true;
2142 }
2143 
2144 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
2145 {
2146     if (!dc_isar_feature(aa64_sve, s)) {
2147         return false;
2148     }
2149     if ((a->imm & 0x1f) == 0) {
2150         return false;
2151     }
2152     if (sve_access_check(s)) {
2153         unsigned vsz = vec_full_reg_size(s);
2154         unsigned dofs = vec_full_reg_offset(s, a->rd);
2155         unsigned esz, index;
2156 
2157         esz = ctz32(a->imm);
2158         index = a->imm >> (esz + 1);
2159 
2160         if ((index << esz) < vsz) {
2161             unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2162             tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2163         } else {
2164             /*
2165              * While dup_mem handles 128-bit elements, dup_imm does not.
2166              * Thankfully element size doesn't matter for splatting zero.
2167              */
2168             tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
2169         }
2170     }
2171     return true;
2172 }
2173 
2174 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2175 {
2176     typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2177     static gen_insr * const fns[4] = {
2178         gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2179         gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2180     };
2181     unsigned vsz = vec_full_reg_size(s);
2182     TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
2183     TCGv_ptr t_zd = tcg_temp_new_ptr();
2184     TCGv_ptr t_zn = tcg_temp_new_ptr();
2185 
2186     tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, a->rd));
2187     tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn));
2188 
2189     fns[a->esz](t_zd, t_zn, val, desc);
2190 }
2191 
2192 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
2193 {
2194     if (!dc_isar_feature(aa64_sve, s)) {
2195         return false;
2196     }
2197     if (sve_access_check(s)) {
2198         TCGv_i64 t = tcg_temp_new_i64();
2199         tcg_gen_ld_i64(t, tcg_env, vec_reg_offset(s, a->rm, 0, MO_64));
2200         do_insr_i64(s, a, t);
2201     }
2202     return true;
2203 }
2204 
2205 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
2206 {
2207     if (!dc_isar_feature(aa64_sve, s)) {
2208         return false;
2209     }
2210     if (sve_access_check(s)) {
2211         do_insr_i64(s, a, cpu_reg(s, a->rm));
2212     }
2213     return true;
2214 }
2215 
2216 static gen_helper_gvec_2 * const rev_fns[4] = {
2217     gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2218     gen_helper_sve_rev_s, gen_helper_sve_rev_d
2219 };
2220 TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0)
2221 
2222 static gen_helper_gvec_3 * const sve_tbl_fns[4] = {
2223     gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2224     gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2225 };
2226 TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0)
2227 
2228 static gen_helper_gvec_4 * const sve2_tbl_fns[4] = {
2229     gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2230     gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2231 };
2232 TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz],
2233            a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0)
2234 
2235 static gen_helper_gvec_3 * const tbx_fns[4] = {
2236     gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2237     gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2238 };
2239 TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0)
2240 
2241 static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
2242 {
2243     static gen_helper_gvec_2 * const fns[4][2] = {
2244         { NULL, NULL },
2245         { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2246         { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2247         { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2248     };
2249 
2250     if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
2251         return false;
2252     }
2253     if (sve_access_check(s)) {
2254         unsigned vsz = vec_full_reg_size(s);
2255         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2256                            vec_full_reg_offset(s, a->rn)
2257                            + (a->h ? vsz / 2 : 0),
2258                            vsz, vsz, 0, fns[a->esz][a->u]);
2259     }
2260     return true;
2261 }
2262 
2263 /*
2264  *** SVE Permute - Predicates Group
2265  */
2266 
2267 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2268                           gen_helper_gvec_3 *fn)
2269 {
2270     if (!sve_access_check(s)) {
2271         return true;
2272     }
2273 
2274     unsigned vsz = pred_full_reg_size(s);
2275 
2276     TCGv_ptr t_d = tcg_temp_new_ptr();
2277     TCGv_ptr t_n = tcg_temp_new_ptr();
2278     TCGv_ptr t_m = tcg_temp_new_ptr();
2279     uint32_t desc = 0;
2280 
2281     desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2282     desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2283     desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
2284 
2285     tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd));
2286     tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn));
2287     tcg_gen_addi_ptr(t_m, tcg_env, pred_full_reg_offset(s, a->rm));
2288 
2289     fn(t_d, t_n, t_m, tcg_constant_i32(desc));
2290     return true;
2291 }
2292 
2293 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2294                           gen_helper_gvec_2 *fn)
2295 {
2296     if (!sve_access_check(s)) {
2297         return true;
2298     }
2299 
2300     unsigned vsz = pred_full_reg_size(s);
2301     TCGv_ptr t_d = tcg_temp_new_ptr();
2302     TCGv_ptr t_n = tcg_temp_new_ptr();
2303     uint32_t desc = 0;
2304 
2305     tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd));
2306     tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn));
2307 
2308     desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2309     desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2310     desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
2311 
2312     fn(t_d, t_n, tcg_constant_i32(desc));
2313     return true;
2314 }
2315 
2316 TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p)
2317 TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p)
2318 TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p)
2319 TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p)
2320 TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p)
2321 TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p)
2322 
2323 TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p)
2324 TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p)
2325 TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p)
2326 
2327 /*
2328  *** SVE Permute - Interleaving Group
2329  */
2330 
2331 static gen_helper_gvec_3 * const zip_fns[4] = {
2332     gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2333     gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2334 };
2335 TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2336            zip_fns[a->esz], a, 0)
2337 TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2338            zip_fns[a->esz], a, vec_full_reg_size(s) / 2)
2339 
2340 TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2341            gen_helper_sve2_zip_q, a, 0)
2342 TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2343            gen_helper_sve2_zip_q, a,
2344            QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2)
2345 
2346 static gen_helper_gvec_3 * const uzp_fns[4] = {
2347     gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2348     gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2349 };
2350 
2351 TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2352            uzp_fns[a->esz], a, 0)
2353 TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2354            uzp_fns[a->esz], a, 1 << a->esz)
2355 
2356 TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2357            gen_helper_sve2_uzp_q, a, 0)
2358 TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2359            gen_helper_sve2_uzp_q, a, 16)
2360 
2361 static gen_helper_gvec_3 * const trn_fns[4] = {
2362     gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2363     gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2364 };
2365 
2366 TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2367            trn_fns[a->esz], a, 0)
2368 TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2369            trn_fns[a->esz], a, 1 << a->esz)
2370 
2371 TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2372            gen_helper_sve2_trn_q, a, 0)
2373 TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2374            gen_helper_sve2_trn_q, a, 16)
2375 
2376 /*
2377  *** SVE Permute Vector - Predicated Group
2378  */
2379 
2380 static gen_helper_gvec_3 * const compact_fns[4] = {
2381     NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2382 };
2383 TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz,
2384                         compact_fns[a->esz], a, 0)
2385 
2386 /* Call the helper that computes the ARM LastActiveElement pseudocode
2387  * function, scaled by the element size.  This includes the not found
2388  * indication; e.g. not found for esz=3 is -8.
2389  */
2390 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2391 {
2392     /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2393      * round up, as we do elsewhere, because we need the exact size.
2394      */
2395     TCGv_ptr t_p = tcg_temp_new_ptr();
2396     unsigned desc = 0;
2397 
2398     desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2399     desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
2400 
2401     tcg_gen_addi_ptr(t_p, tcg_env, pred_full_reg_offset(s, pg));
2402 
2403     gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc));
2404 }
2405 
2406 /* Increment LAST to the offset of the next element in the vector,
2407  * wrapping around to 0.
2408  */
2409 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2410 {
2411     unsigned vsz = vec_full_reg_size(s);
2412 
2413     tcg_gen_addi_i32(last, last, 1 << esz);
2414     if (is_power_of_2(vsz)) {
2415         tcg_gen_andi_i32(last, last, vsz - 1);
2416     } else {
2417         TCGv_i32 max = tcg_constant_i32(vsz);
2418         TCGv_i32 zero = tcg_constant_i32(0);
2419         tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2420     }
2421 }
2422 
2423 /* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2424 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2425 {
2426     unsigned vsz = vec_full_reg_size(s);
2427 
2428     if (is_power_of_2(vsz)) {
2429         tcg_gen_andi_i32(last, last, vsz - 1);
2430     } else {
2431         TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz));
2432         TCGv_i32 zero = tcg_constant_i32(0);
2433         tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2434     }
2435 }
2436 
2437 /* Load an unsigned element of ESZ from BASE+OFS.  */
2438 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2439 {
2440     TCGv_i64 r = tcg_temp_new_i64();
2441 
2442     switch (esz) {
2443     case 0:
2444         tcg_gen_ld8u_i64(r, base, ofs);
2445         break;
2446     case 1:
2447         tcg_gen_ld16u_i64(r, base, ofs);
2448         break;
2449     case 2:
2450         tcg_gen_ld32u_i64(r, base, ofs);
2451         break;
2452     case 3:
2453         tcg_gen_ld_i64(r, base, ofs);
2454         break;
2455     default:
2456         g_assert_not_reached();
2457     }
2458     return r;
2459 }
2460 
2461 /* Load an unsigned element of ESZ from RM[LAST].  */
2462 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2463                                  int rm, int esz)
2464 {
2465     TCGv_ptr p = tcg_temp_new_ptr();
2466 
2467     /* Convert offset into vector into offset into ENV.
2468      * The final adjustment for the vector register base
2469      * is added via constant offset to the load.
2470      */
2471 #if HOST_BIG_ENDIAN
2472     /* Adjust for element ordering.  See vec_reg_offset.  */
2473     if (esz < 3) {
2474         tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2475     }
2476 #endif
2477     tcg_gen_ext_i32_ptr(p, last);
2478     tcg_gen_add_ptr(p, p, tcg_env);
2479 
2480     return load_esz(p, vec_full_reg_offset(s, rm), esz);
2481 }
2482 
2483 /* Compute CLAST for a Zreg.  */
2484 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2485 {
2486     TCGv_i32 last;
2487     TCGLabel *over;
2488     TCGv_i64 ele;
2489     unsigned vsz, esz = a->esz;
2490 
2491     if (!sve_access_check(s)) {
2492         return true;
2493     }
2494 
2495     last = tcg_temp_new_i32();
2496     over = gen_new_label();
2497 
2498     find_last_active(s, last, esz, a->pg);
2499 
2500     /* There is of course no movcond for a 2048-bit vector,
2501      * so we must branch over the actual store.
2502      */
2503     tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2504 
2505     if (!before) {
2506         incr_last_active(s, last, esz);
2507     }
2508 
2509     ele = load_last_active(s, last, a->rm, esz);
2510 
2511     vsz = vec_full_reg_size(s);
2512     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2513 
2514     /* If this insn used MOVPRFX, we may need a second move.  */
2515     if (a->rd != a->rn) {
2516         TCGLabel *done = gen_new_label();
2517         tcg_gen_br(done);
2518 
2519         gen_set_label(over);
2520         do_mov_z(s, a->rd, a->rn);
2521 
2522         gen_set_label(done);
2523     } else {
2524         gen_set_label(over);
2525     }
2526     return true;
2527 }
2528 
2529 TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false)
2530 TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true)
2531 
2532 /* Compute CLAST for a scalar.  */
2533 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2534                             bool before, TCGv_i64 reg_val)
2535 {
2536     TCGv_i32 last = tcg_temp_new_i32();
2537     TCGv_i64 ele, cmp;
2538 
2539     find_last_active(s, last, esz, pg);
2540 
2541     /* Extend the original value of last prior to incrementing.  */
2542     cmp = tcg_temp_new_i64();
2543     tcg_gen_ext_i32_i64(cmp, last);
2544 
2545     if (!before) {
2546         incr_last_active(s, last, esz);
2547     }
2548 
2549     /* The conceit here is that while last < 0 indicates not found, after
2550      * adjusting for tcg_env->vfp.zregs[rm], it is still a valid address
2551      * from which we can load garbage.  We then discard the garbage with
2552      * a conditional move.
2553      */
2554     ele = load_last_active(s, last, rm, esz);
2555 
2556     tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0),
2557                         ele, reg_val);
2558 }
2559 
2560 /* Compute CLAST for a Vreg.  */
2561 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2562 {
2563     if (sve_access_check(s)) {
2564         int esz = a->esz;
2565         int ofs = vec_reg_offset(s, a->rd, 0, esz);
2566         TCGv_i64 reg = load_esz(tcg_env, ofs, esz);
2567 
2568         do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2569         write_fp_dreg(s, a->rd, reg);
2570     }
2571     return true;
2572 }
2573 
2574 TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false)
2575 TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true)
2576 
2577 /* Compute CLAST for a Xreg.  */
2578 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2579 {
2580     TCGv_i64 reg;
2581 
2582     if (!sve_access_check(s)) {
2583         return true;
2584     }
2585 
2586     reg = cpu_reg(s, a->rd);
2587     switch (a->esz) {
2588     case 0:
2589         tcg_gen_ext8u_i64(reg, reg);
2590         break;
2591     case 1:
2592         tcg_gen_ext16u_i64(reg, reg);
2593         break;
2594     case 2:
2595         tcg_gen_ext32u_i64(reg, reg);
2596         break;
2597     case 3:
2598         break;
2599     default:
2600         g_assert_not_reached();
2601     }
2602 
2603     do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2604     return true;
2605 }
2606 
2607 TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false)
2608 TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true)
2609 
2610 /* Compute LAST for a scalar.  */
2611 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2612                                int pg, int rm, bool before)
2613 {
2614     TCGv_i32 last = tcg_temp_new_i32();
2615 
2616     find_last_active(s, last, esz, pg);
2617     if (before) {
2618         wrap_last_active(s, last, esz);
2619     } else {
2620         incr_last_active(s, last, esz);
2621     }
2622 
2623     return load_last_active(s, last, rm, esz);
2624 }
2625 
2626 /* Compute LAST for a Vreg.  */
2627 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2628 {
2629     if (sve_access_check(s)) {
2630         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2631         write_fp_dreg(s, a->rd, val);
2632     }
2633     return true;
2634 }
2635 
2636 TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false)
2637 TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true)
2638 
2639 /* Compute LAST for a Xreg.  */
2640 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2641 {
2642     if (sve_access_check(s)) {
2643         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2644         tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2645     }
2646     return true;
2647 }
2648 
2649 TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false)
2650 TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true)
2651 
2652 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
2653 {
2654     if (!dc_isar_feature(aa64_sve, s)) {
2655         return false;
2656     }
2657     if (sve_access_check(s)) {
2658         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2659     }
2660     return true;
2661 }
2662 
2663 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
2664 {
2665     if (!dc_isar_feature(aa64_sve, s)) {
2666         return false;
2667     }
2668     if (sve_access_check(s)) {
2669         int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2670         TCGv_i64 t = load_esz(tcg_env, ofs, a->esz);
2671         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2672     }
2673     return true;
2674 }
2675 
2676 static gen_helper_gvec_3 * const revb_fns[4] = {
2677     NULL,                  gen_helper_sve_revb_h,
2678     gen_helper_sve_revb_s, gen_helper_sve_revb_d,
2679 };
2680 TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0)
2681 
2682 static gen_helper_gvec_3 * const revh_fns[4] = {
2683     NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d,
2684 };
2685 TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
2686 
2687 TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
2688            a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
2689 
2690 TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0)
2691 
2692 TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz,
2693            gen_helper_sve_splice, a, a->esz)
2694 
2695 TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice,
2696            a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz)
2697 
2698 /*
2699  *** SVE Integer Compare - Vectors Group
2700  */
2701 
2702 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2703                           gen_helper_gvec_flags_4 *gen_fn)
2704 {
2705     TCGv_ptr pd, zn, zm, pg;
2706     unsigned vsz;
2707     TCGv_i32 t;
2708 
2709     if (gen_fn == NULL) {
2710         return false;
2711     }
2712     if (!sve_access_check(s)) {
2713         return true;
2714     }
2715 
2716     vsz = vec_full_reg_size(s);
2717     t = tcg_temp_new_i32();
2718     pd = tcg_temp_new_ptr();
2719     zn = tcg_temp_new_ptr();
2720     zm = tcg_temp_new_ptr();
2721     pg = tcg_temp_new_ptr();
2722 
2723     tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd));
2724     tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn));
2725     tcg_gen_addi_ptr(zm, tcg_env, vec_full_reg_offset(s, a->rm));
2726     tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg));
2727 
2728     gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0)));
2729 
2730     do_pred_flags(t);
2731     return true;
2732 }
2733 
2734 #define DO_PPZZ(NAME, name) \
2735     static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = {       \
2736         gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2737         gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2738     };                                                                  \
2739     TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags,                    \
2740                a, name##_ppzz_fns[a->esz])
2741 
2742 DO_PPZZ(CMPEQ, cmpeq)
2743 DO_PPZZ(CMPNE, cmpne)
2744 DO_PPZZ(CMPGT, cmpgt)
2745 DO_PPZZ(CMPGE, cmpge)
2746 DO_PPZZ(CMPHI, cmphi)
2747 DO_PPZZ(CMPHS, cmphs)
2748 
2749 #undef DO_PPZZ
2750 
2751 #define DO_PPZW(NAME, name) \
2752     static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = {       \
2753         gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2754         gen_helper_sve_##name##_ppzw_s, NULL                            \
2755     };                                                                  \
2756     TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags,                    \
2757                a, name##_ppzw_fns[a->esz])
2758 
2759 DO_PPZW(CMPEQ, cmpeq)
2760 DO_PPZW(CMPNE, cmpne)
2761 DO_PPZW(CMPGT, cmpgt)
2762 DO_PPZW(CMPGE, cmpge)
2763 DO_PPZW(CMPHI, cmphi)
2764 DO_PPZW(CMPHS, cmphs)
2765 DO_PPZW(CMPLT, cmplt)
2766 DO_PPZW(CMPLE, cmple)
2767 DO_PPZW(CMPLO, cmplo)
2768 DO_PPZW(CMPLS, cmpls)
2769 
2770 #undef DO_PPZW
2771 
2772 /*
2773  *** SVE Integer Compare - Immediate Groups
2774  */
2775 
2776 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2777                           gen_helper_gvec_flags_3 *gen_fn)
2778 {
2779     TCGv_ptr pd, zn, pg;
2780     unsigned vsz;
2781     TCGv_i32 t;
2782 
2783     if (gen_fn == NULL) {
2784         return false;
2785     }
2786     if (!sve_access_check(s)) {
2787         return true;
2788     }
2789 
2790     vsz = vec_full_reg_size(s);
2791     t = tcg_temp_new_i32();
2792     pd = tcg_temp_new_ptr();
2793     zn = tcg_temp_new_ptr();
2794     pg = tcg_temp_new_ptr();
2795 
2796     tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd));
2797     tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn));
2798     tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg));
2799 
2800     gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm)));
2801 
2802     do_pred_flags(t);
2803     return true;
2804 }
2805 
2806 #define DO_PPZI(NAME, name) \
2807     static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = {         \
2808         gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
2809         gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
2810     };                                                                    \
2811     TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a,                   \
2812                name##_ppzi_fns[a->esz])
2813 
2814 DO_PPZI(CMPEQ, cmpeq)
2815 DO_PPZI(CMPNE, cmpne)
2816 DO_PPZI(CMPGT, cmpgt)
2817 DO_PPZI(CMPGE, cmpge)
2818 DO_PPZI(CMPHI, cmphi)
2819 DO_PPZI(CMPHS, cmphs)
2820 DO_PPZI(CMPLT, cmplt)
2821 DO_PPZI(CMPLE, cmple)
2822 DO_PPZI(CMPLO, cmplo)
2823 DO_PPZI(CMPLS, cmpls)
2824 
2825 #undef DO_PPZI
2826 
2827 /*
2828  *** SVE Partition Break Group
2829  */
2830 
2831 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2832                     gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2833 {
2834     if (!sve_access_check(s)) {
2835         return true;
2836     }
2837 
2838     unsigned vsz = pred_full_reg_size(s);
2839 
2840     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2841     TCGv_ptr d = tcg_temp_new_ptr();
2842     TCGv_ptr n = tcg_temp_new_ptr();
2843     TCGv_ptr m = tcg_temp_new_ptr();
2844     TCGv_ptr g = tcg_temp_new_ptr();
2845     TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
2846 
2847     tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd));
2848     tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn));
2849     tcg_gen_addi_ptr(m, tcg_env, pred_full_reg_offset(s, a->rm));
2850     tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg));
2851 
2852     if (a->s) {
2853         TCGv_i32 t = tcg_temp_new_i32();
2854         fn_s(t, d, n, m, g, desc);
2855         do_pred_flags(t);
2856     } else {
2857         fn(d, n, m, g, desc);
2858     }
2859     return true;
2860 }
2861 
2862 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2863                     gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2864 {
2865     if (!sve_access_check(s)) {
2866         return true;
2867     }
2868 
2869     unsigned vsz = pred_full_reg_size(s);
2870 
2871     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2872     TCGv_ptr d = tcg_temp_new_ptr();
2873     TCGv_ptr n = tcg_temp_new_ptr();
2874     TCGv_ptr g = tcg_temp_new_ptr();
2875     TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
2876 
2877     tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd));
2878     tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn));
2879     tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg));
2880 
2881     if (a->s) {
2882         TCGv_i32 t = tcg_temp_new_i32();
2883         fn_s(t, d, n, g, desc);
2884         do_pred_flags(t);
2885     } else {
2886         fn(d, n, g, desc);
2887     }
2888     return true;
2889 }
2890 
2891 TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a,
2892            gen_helper_sve_brkpa, gen_helper_sve_brkpas)
2893 TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a,
2894            gen_helper_sve_brkpb, gen_helper_sve_brkpbs)
2895 
2896 TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a,
2897            gen_helper_sve_brka_m, gen_helper_sve_brkas_m)
2898 TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a,
2899            gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m)
2900 
2901 TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a,
2902            gen_helper_sve_brka_z, gen_helper_sve_brkas_z)
2903 TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a,
2904            gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z)
2905 
2906 TRANS_FEAT(BRKN, aa64_sve, do_brk2, a,
2907            gen_helper_sve_brkn, gen_helper_sve_brkns)
2908 
2909 /*
2910  *** SVE Predicate Count Group
2911  */
2912 
2913 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
2914 {
2915     unsigned psz = pred_full_reg_size(s);
2916 
2917     if (psz <= 8) {
2918         uint64_t psz_mask;
2919 
2920         tcg_gen_ld_i64(val, tcg_env, pred_full_reg_offset(s, pn));
2921         if (pn != pg) {
2922             TCGv_i64 g = tcg_temp_new_i64();
2923             tcg_gen_ld_i64(g, tcg_env, pred_full_reg_offset(s, pg));
2924             tcg_gen_and_i64(val, val, g);
2925         }
2926 
2927         /* Reduce the pred_esz_masks value simply to reduce the
2928          * size of the code generated here.
2929          */
2930         psz_mask = MAKE_64BIT_MASK(0, psz * 8);
2931         tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
2932 
2933         tcg_gen_ctpop_i64(val, val);
2934     } else {
2935         TCGv_ptr t_pn = tcg_temp_new_ptr();
2936         TCGv_ptr t_pg = tcg_temp_new_ptr();
2937         unsigned desc = 0;
2938 
2939         desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
2940         desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
2941 
2942         tcg_gen_addi_ptr(t_pn, tcg_env, pred_full_reg_offset(s, pn));
2943         tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
2944 
2945         gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc));
2946     }
2947 }
2948 
2949 static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
2950 {
2951     if (!dc_isar_feature(aa64_sve, s)) {
2952         return false;
2953     }
2954     if (sve_access_check(s)) {
2955         do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
2956     }
2957     return true;
2958 }
2959 
2960 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
2961 {
2962     if (!dc_isar_feature(aa64_sve, s)) {
2963         return false;
2964     }
2965     if (sve_access_check(s)) {
2966         TCGv_i64 reg = cpu_reg(s, a->rd);
2967         TCGv_i64 val = tcg_temp_new_i64();
2968 
2969         do_cntp(s, val, a->esz, a->pg, a->pg);
2970         if (a->d) {
2971             tcg_gen_sub_i64(reg, reg, val);
2972         } else {
2973             tcg_gen_add_i64(reg, reg, val);
2974         }
2975     }
2976     return true;
2977 }
2978 
2979 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
2980 {
2981     if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
2982         return false;
2983     }
2984     if (sve_access_check(s)) {
2985         unsigned vsz = vec_full_reg_size(s);
2986         TCGv_i64 val = tcg_temp_new_i64();
2987         GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
2988 
2989         do_cntp(s, val, a->esz, a->pg, a->pg);
2990         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
2991                 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
2992     }
2993     return true;
2994 }
2995 
2996 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
2997 {
2998     if (!dc_isar_feature(aa64_sve, s)) {
2999         return false;
3000     }
3001     if (sve_access_check(s)) {
3002         TCGv_i64 reg = cpu_reg(s, a->rd);
3003         TCGv_i64 val = tcg_temp_new_i64();
3004 
3005         do_cntp(s, val, a->esz, a->pg, a->pg);
3006         do_sat_addsub_32(reg, val, a->u, a->d);
3007     }
3008     return true;
3009 }
3010 
3011 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
3012 {
3013     if (!dc_isar_feature(aa64_sve, s)) {
3014         return false;
3015     }
3016     if (sve_access_check(s)) {
3017         TCGv_i64 reg = cpu_reg(s, a->rd);
3018         TCGv_i64 val = tcg_temp_new_i64();
3019 
3020         do_cntp(s, val, a->esz, a->pg, a->pg);
3021         do_sat_addsub_64(reg, val, a->u, a->d);
3022     }
3023     return true;
3024 }
3025 
3026 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3027 {
3028     if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
3029         return false;
3030     }
3031     if (sve_access_check(s)) {
3032         TCGv_i64 val = tcg_temp_new_i64();
3033         do_cntp(s, val, a->esz, a->pg, a->pg);
3034         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3035     }
3036     return true;
3037 }
3038 
3039 /*
3040  *** SVE Integer Compare Scalars Group
3041  */
3042 
3043 static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
3044 {
3045     if (!dc_isar_feature(aa64_sve, s)) {
3046         return false;
3047     }
3048     if (!sve_access_check(s)) {
3049         return true;
3050     }
3051 
3052     TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3053     TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3054     TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3055     TCGv_i64 cmp = tcg_temp_new_i64();
3056 
3057     tcg_gen_setcond_i64(cond, cmp, rn, rm);
3058     tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3059 
3060     /* VF = !NF & !CF.  */
3061     tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3062     tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3063 
3064     /* Both NF and VF actually look at bit 31.  */
3065     tcg_gen_neg_i32(cpu_NF, cpu_NF);
3066     tcg_gen_neg_i32(cpu_VF, cpu_VF);
3067     return true;
3068 }
3069 
3070 static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
3071 {
3072     TCGv_i64 op0, op1, t0, t1, tmax;
3073     TCGv_i32 t2;
3074     TCGv_ptr ptr;
3075     unsigned vsz = vec_full_reg_size(s);
3076     unsigned desc = 0;
3077     TCGCond cond;
3078     uint64_t maxval;
3079     /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3080     bool eq = a->eq == a->lt;
3081 
3082     /* The greater-than conditions are all SVE2. */
3083     if (a->lt
3084         ? !dc_isar_feature(aa64_sve, s)
3085         : !dc_isar_feature(aa64_sve2, s)) {
3086         return false;
3087     }
3088     if (!sve_access_check(s)) {
3089         return true;
3090     }
3091 
3092     op0 = read_cpu_reg(s, a->rn, 1);
3093     op1 = read_cpu_reg(s, a->rm, 1);
3094 
3095     if (!a->sf) {
3096         if (a->u) {
3097             tcg_gen_ext32u_i64(op0, op0);
3098             tcg_gen_ext32u_i64(op1, op1);
3099         } else {
3100             tcg_gen_ext32s_i64(op0, op0);
3101             tcg_gen_ext32s_i64(op1, op1);
3102         }
3103     }
3104 
3105     /* For the helper, compress the different conditions into a computation
3106      * of how many iterations for which the condition is true.
3107      */
3108     t0 = tcg_temp_new_i64();
3109     t1 = tcg_temp_new_i64();
3110 
3111     if (a->lt) {
3112         tcg_gen_sub_i64(t0, op1, op0);
3113         if (a->u) {
3114             maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3115             cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3116         } else {
3117             maxval = a->sf ? INT64_MAX : INT32_MAX;
3118             cond = eq ? TCG_COND_LE : TCG_COND_LT;
3119         }
3120     } else {
3121         tcg_gen_sub_i64(t0, op0, op1);
3122         if (a->u) {
3123             maxval = 0;
3124             cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3125         } else {
3126             maxval = a->sf ? INT64_MIN : INT32_MIN;
3127             cond = eq ? TCG_COND_GE : TCG_COND_GT;
3128         }
3129     }
3130 
3131     tmax = tcg_constant_i64(vsz >> a->esz);
3132     if (eq) {
3133         /* Equality means one more iteration.  */
3134         tcg_gen_addi_i64(t0, t0, 1);
3135 
3136         /*
3137          * For the less-than while, if op1 is maxval (and the only time
3138          * the addition above could overflow), then we produce an all-true
3139          * predicate by setting the count to the vector length.  This is
3140          * because the pseudocode is described as an increment + compare
3141          * loop, and the maximum integer would always compare true.
3142          * Similarly, the greater-than while has the same issue with the
3143          * minimum integer due to the decrement + compare loop.
3144          */
3145         tcg_gen_movi_i64(t1, maxval);
3146         tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3147     }
3148 
3149     /* Bound to the maximum.  */
3150     tcg_gen_umin_i64(t0, t0, tmax);
3151 
3152     /* Set the count to zero if the condition is false.  */
3153     tcg_gen_movi_i64(t1, 0);
3154     tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3155 
3156     /* Since we're bounded, pass as a 32-bit type.  */
3157     t2 = tcg_temp_new_i32();
3158     tcg_gen_extrl_i64_i32(t2, t0);
3159 
3160     /* Scale elements to bits.  */
3161     tcg_gen_shli_i32(t2, t2, a->esz);
3162 
3163     desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3164     desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
3165 
3166     ptr = tcg_temp_new_ptr();
3167     tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd));
3168 
3169     if (a->lt) {
3170         gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
3171     } else {
3172         gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc));
3173     }
3174     do_pred_flags(t2);
3175     return true;
3176 }
3177 
3178 static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3179 {
3180     TCGv_i64 op0, op1, diff, t1, tmax;
3181     TCGv_i32 t2;
3182     TCGv_ptr ptr;
3183     unsigned vsz = vec_full_reg_size(s);
3184     unsigned desc = 0;
3185 
3186     if (!dc_isar_feature(aa64_sve2, s)) {
3187         return false;
3188     }
3189     if (!sve_access_check(s)) {
3190         return true;
3191     }
3192 
3193     op0 = read_cpu_reg(s, a->rn, 1);
3194     op1 = read_cpu_reg(s, a->rm, 1);
3195 
3196     tmax = tcg_constant_i64(vsz);
3197     diff = tcg_temp_new_i64();
3198 
3199     if (a->rw) {
3200         /* WHILERW */
3201         /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3202         t1 = tcg_temp_new_i64();
3203         tcg_gen_sub_i64(diff, op0, op1);
3204         tcg_gen_sub_i64(t1, op1, op0);
3205         tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3206         /* Round down to a multiple of ESIZE.  */
3207         tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3208         /* If op1 == op0, diff == 0, and the condition is always true. */
3209         tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3210     } else {
3211         /* WHILEWR */
3212         tcg_gen_sub_i64(diff, op1, op0);
3213         /* Round down to a multiple of ESIZE.  */
3214         tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3215         /* If op0 >= op1, diff <= 0, the condition is always true. */
3216         tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3217     }
3218 
3219     /* Bound to the maximum.  */
3220     tcg_gen_umin_i64(diff, diff, tmax);
3221 
3222     /* Since we're bounded, pass as a 32-bit type.  */
3223     t2 = tcg_temp_new_i32();
3224     tcg_gen_extrl_i64_i32(t2, diff);
3225 
3226     desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3227     desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
3228 
3229     ptr = tcg_temp_new_ptr();
3230     tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd));
3231 
3232     gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
3233     do_pred_flags(t2);
3234     return true;
3235 }
3236 
3237 /*
3238  *** SVE Integer Wide Immediate - Unpredicated Group
3239  */
3240 
3241 static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
3242 {
3243     if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
3244         return false;
3245     }
3246     if (sve_access_check(s)) {
3247         unsigned vsz = vec_full_reg_size(s);
3248         int dofs = vec_full_reg_offset(s, a->rd);
3249         uint64_t imm;
3250 
3251         /* Decode the VFP immediate.  */
3252         imm = vfp_expand_imm(a->esz, a->imm);
3253         tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
3254     }
3255     return true;
3256 }
3257 
3258 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
3259 {
3260     if (!dc_isar_feature(aa64_sve, s)) {
3261         return false;
3262     }
3263     if (sve_access_check(s)) {
3264         unsigned vsz = vec_full_reg_size(s);
3265         int dofs = vec_full_reg_offset(s, a->rd);
3266         tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
3267     }
3268     return true;
3269 }
3270 
3271 TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a)
3272 
3273 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
3274 {
3275     a->imm = -a->imm;
3276     return trans_ADD_zzi(s, a);
3277 }
3278 
3279 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
3280 {
3281     static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
3282     static const GVecGen2s op[4] = {
3283         { .fni8 = tcg_gen_vec_sub8_i64,
3284           .fniv = tcg_gen_sub_vec,
3285           .fno = gen_helper_sve_subri_b,
3286           .opt_opc = vecop_list,
3287           .vece = MO_8,
3288           .scalar_first = true },
3289         { .fni8 = tcg_gen_vec_sub16_i64,
3290           .fniv = tcg_gen_sub_vec,
3291           .fno = gen_helper_sve_subri_h,
3292           .opt_opc = vecop_list,
3293           .vece = MO_16,
3294           .scalar_first = true },
3295         { .fni4 = tcg_gen_sub_i32,
3296           .fniv = tcg_gen_sub_vec,
3297           .fno = gen_helper_sve_subri_s,
3298           .opt_opc = vecop_list,
3299           .vece = MO_32,
3300           .scalar_first = true },
3301         { .fni8 = tcg_gen_sub_i64,
3302           .fniv = tcg_gen_sub_vec,
3303           .fno = gen_helper_sve_subri_d,
3304           .opt_opc = vecop_list,
3305           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3306           .vece = MO_64,
3307           .scalar_first = true }
3308     };
3309 
3310     if (!dc_isar_feature(aa64_sve, s)) {
3311         return false;
3312     }
3313     if (sve_access_check(s)) {
3314         unsigned vsz = vec_full_reg_size(s);
3315         tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3316                         vec_full_reg_offset(s, a->rn),
3317                         vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]);
3318     }
3319     return true;
3320 }
3321 
3322 TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a)
3323 
3324 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
3325 {
3326     if (sve_access_check(s)) {
3327         do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
3328                           tcg_constant_i64(a->imm), u, d);
3329     }
3330     return true;
3331 }
3332 
3333 TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false)
3334 TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false)
3335 TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true)
3336 TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true)
3337 
3338 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3339 {
3340     if (sve_access_check(s)) {
3341         unsigned vsz = vec_full_reg_size(s);
3342         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3343                             vec_full_reg_offset(s, a->rn),
3344                             tcg_constant_i64(a->imm), vsz, vsz, 0, fn);
3345     }
3346     return true;
3347 }
3348 
3349 #define DO_ZZI(NAME, name) \
3350     static gen_helper_gvec_2i * const name##i_fns[4] = {                \
3351         gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
3352         gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
3353     };                                                                  \
3354     TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz])
3355 
3356 DO_ZZI(SMAX, smax)
3357 DO_ZZI(UMAX, umax)
3358 DO_ZZI(SMIN, smin)
3359 DO_ZZI(UMIN, umin)
3360 
3361 #undef DO_ZZI
3362 
3363 static gen_helper_gvec_4 * const dot_fns[2][2] = {
3364     { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3365     { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3366 };
3367 TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz,
3368            dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0)
3369 
3370 /*
3371  * SVE Multiply - Indexed
3372  */
3373 
3374 TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3375            gen_helper_gvec_sdot_idx_b, a)
3376 TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3377            gen_helper_gvec_sdot_idx_h, a)
3378 TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3379            gen_helper_gvec_udot_idx_b, a)
3380 TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3381            gen_helper_gvec_udot_idx_h, a)
3382 
3383 TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3384            gen_helper_gvec_sudot_idx_b, a)
3385 TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3386            gen_helper_gvec_usdot_idx_b, a)
3387 
3388 #define DO_SVE2_RRX(NAME, FUNC) \
3389     TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC,          \
3390                a->rd, a->rn, a->rm, a->index)
3391 
3392 DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h)
3393 DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s)
3394 DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d)
3395 
3396 DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
3397 DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
3398 DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
3399 
3400 DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
3401 DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
3402 DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
3403 
3404 #undef DO_SVE2_RRX
3405 
3406 #define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
3407     TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC,          \
3408                a->rd, a->rn, a->rm, (a->index << 1) | TOP)
3409 
3410 DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
3411 DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
3412 DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
3413 DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
3414 
3415 DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
3416 DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
3417 DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
3418 DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
3419 
3420 DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
3421 DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
3422 DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
3423 DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
3424 
3425 #undef DO_SVE2_RRX_TB
3426 
3427 #define DO_SVE2_RRXR(NAME, FUNC) \
3428     TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a)
3429 
3430 DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
3431 DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
3432 DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
3433 
3434 DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
3435 DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
3436 DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
3437 
3438 DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
3439 DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
3440 DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
3441 
3442 DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
3443 DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
3444 DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
3445 
3446 #undef DO_SVE2_RRXR
3447 
3448 #define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
3449     TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC,        \
3450                a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP)
3451 
3452 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
3453 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
3454 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
3455 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
3456 
3457 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
3458 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
3459 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
3460 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
3461 
3462 DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
3463 DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
3464 DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
3465 DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
3466 
3467 DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
3468 DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
3469 DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
3470 DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
3471 
3472 DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
3473 DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
3474 DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
3475 DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
3476 
3477 DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
3478 DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
3479 DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
3480 DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
3481 
3482 #undef DO_SVE2_RRXR_TB
3483 
3484 #define DO_SVE2_RRXR_ROT(NAME, FUNC) \
3485     TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC,           \
3486                a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot)
3487 
3488 DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
3489 DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
3490 
3491 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
3492 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
3493 
3494 DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
3495 DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
3496 
3497 #undef DO_SVE2_RRXR_ROT
3498 
3499 /*
3500  *** SVE Floating Point Multiply-Add Indexed Group
3501  */
3502 
3503 static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
3504 {
3505     static gen_helper_gvec_4_ptr * const fns[4] = {
3506         NULL,
3507         gen_helper_gvec_fmla_idx_h,
3508         gen_helper_gvec_fmla_idx_s,
3509         gen_helper_gvec_fmla_idx_d,
3510     };
3511     return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
3512                               (a->index << 1) | sub,
3513                               a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3514 }
3515 
3516 TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false)
3517 TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true)
3518 
3519 /*
3520  *** SVE Floating Point Multiply Indexed Group
3521  */
3522 
3523 static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = {
3524     NULL,                       gen_helper_gvec_fmul_idx_h,
3525     gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d,
3526 };
3527 TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz,
3528            fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index,
3529            a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3530 
3531 /*
3532  *** SVE Floating Point Fast Reduction Group
3533  */
3534 
3535 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3536                                   TCGv_ptr, TCGv_i32);
3537 
3538 static bool do_reduce(DisasContext *s, arg_rpr_esz *a,
3539                       gen_helper_fp_reduce *fn)
3540 {
3541     unsigned vsz, p2vsz;
3542     TCGv_i32 t_desc;
3543     TCGv_ptr t_zn, t_pg, status;
3544     TCGv_i64 temp;
3545 
3546     if (fn == NULL) {
3547         return false;
3548     }
3549     if (!sve_access_check(s)) {
3550         return true;
3551     }
3552 
3553     vsz = vec_full_reg_size(s);
3554     p2vsz = pow2ceil(vsz);
3555     t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz));
3556     temp = tcg_temp_new_i64();
3557     t_zn = tcg_temp_new_ptr();
3558     t_pg = tcg_temp_new_ptr();
3559 
3560     tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn));
3561     tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
3562     status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3563 
3564     fn(temp, t_zn, t_pg, status, t_desc);
3565 
3566     write_fp_dreg(s, a->rd, temp);
3567     return true;
3568 }
3569 
3570 #define DO_VPZ(NAME, name) \
3571     static gen_helper_fp_reduce * const name##_fns[4] = {                \
3572         NULL,                      gen_helper_sve_##name##_h,            \
3573         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
3574     };                                                                   \
3575     TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz])
3576 
3577 DO_VPZ(FADDV, faddv)
3578 DO_VPZ(FMINNMV, fminnmv)
3579 DO_VPZ(FMAXNMV, fmaxnmv)
3580 DO_VPZ(FMINV, fminv)
3581 DO_VPZ(FMAXV, fmaxv)
3582 
3583 #undef DO_VPZ
3584 
3585 /*
3586  *** SVE Floating Point Unary Operations - Unpredicated Group
3587  */
3588 
3589 static gen_helper_gvec_2_ptr * const frecpe_fns[] = {
3590     NULL,                     gen_helper_gvec_frecpe_h,
3591     gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d,
3592 };
3593 TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0)
3594 
3595 static gen_helper_gvec_2_ptr * const frsqrte_fns[] = {
3596     NULL,                      gen_helper_gvec_frsqrte_h,
3597     gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d,
3598 };
3599 TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0)
3600 
3601 /*
3602  *** SVE Floating Point Compare with Zero Group
3603  */
3604 
3605 static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3606                       gen_helper_gvec_3_ptr *fn)
3607 {
3608     if (fn == NULL) {
3609         return false;
3610     }
3611     if (sve_access_check(s)) {
3612         unsigned vsz = vec_full_reg_size(s);
3613         TCGv_ptr status =
3614             fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3615 
3616         tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3617                            vec_full_reg_offset(s, a->rn),
3618                            pred_full_reg_offset(s, a->pg),
3619                            status, vsz, vsz, 0, fn);
3620     }
3621     return true;
3622 }
3623 
3624 #define DO_PPZ(NAME, name) \
3625     static gen_helper_gvec_3_ptr * const name##_fns[] = {         \
3626         NULL,                      gen_helper_sve_##name##_h,     \
3627         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,     \
3628     };                                                            \
3629     TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz])
3630 
3631 DO_PPZ(FCMGE_ppz0, fcmge0)
3632 DO_PPZ(FCMGT_ppz0, fcmgt0)
3633 DO_PPZ(FCMLE_ppz0, fcmle0)
3634 DO_PPZ(FCMLT_ppz0, fcmlt0)
3635 DO_PPZ(FCMEQ_ppz0, fcmeq0)
3636 DO_PPZ(FCMNE_ppz0, fcmne0)
3637 
3638 #undef DO_PPZ
3639 
3640 /*
3641  *** SVE floating-point trig multiply-add coefficient
3642  */
3643 
3644 static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
3645     NULL,                   gen_helper_sve_ftmad_h,
3646     gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
3647 };
3648 TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
3649                         ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
3650                         a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3651 
3652 /*
3653  *** SVE Floating Point Accumulating Reduction Group
3654  */
3655 
3656 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
3657 {
3658     typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3659                           TCGv_ptr, TCGv_ptr, TCGv_i32);
3660     static fadda_fn * const fns[3] = {
3661         gen_helper_sve_fadda_h,
3662         gen_helper_sve_fadda_s,
3663         gen_helper_sve_fadda_d,
3664     };
3665     unsigned vsz = vec_full_reg_size(s);
3666     TCGv_ptr t_rm, t_pg, t_fpst;
3667     TCGv_i64 t_val;
3668     TCGv_i32 t_desc;
3669 
3670     if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
3671         return false;
3672     }
3673     s->is_nonstreaming = true;
3674     if (!sve_access_check(s)) {
3675         return true;
3676     }
3677 
3678     t_val = load_esz(tcg_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3679     t_rm = tcg_temp_new_ptr();
3680     t_pg = tcg_temp_new_ptr();
3681     tcg_gen_addi_ptr(t_rm, tcg_env, vec_full_reg_offset(s, a->rm));
3682     tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
3683     t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3684     t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
3685 
3686     fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3687 
3688     write_fp_dreg(s, a->rd, t_val);
3689     return true;
3690 }
3691 
3692 /*
3693  *** SVE Floating Point Arithmetic - Unpredicated Group
3694  */
3695 
3696 #define DO_FP3(NAME, name) \
3697     static gen_helper_gvec_3_ptr * const name##_fns[4] = {          \
3698         NULL, gen_helper_gvec_##name##_h,                           \
3699         gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
3700     };                                                              \
3701     TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0)
3702 
3703 DO_FP3(FADD_zzz, fadd)
3704 DO_FP3(FSUB_zzz, fsub)
3705 DO_FP3(FMUL_zzz, fmul)
3706 DO_FP3(FRECPS, recps)
3707 DO_FP3(FRSQRTS, rsqrts)
3708 
3709 #undef DO_FP3
3710 
3711 static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = {
3712     NULL,                     gen_helper_gvec_ftsmul_h,
3713     gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d
3714 };
3715 TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz,
3716                         ftsmul_fns[a->esz], a, 0)
3717 
3718 /*
3719  *** SVE Floating Point Arithmetic - Predicated Group
3720  */
3721 
3722 #define DO_ZPZZ_FP(NAME, FEAT, name) \
3723     static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \
3724         NULL,                  gen_helper_##name##_h,           \
3725         gen_helper_##name##_s, gen_helper_##name##_d            \
3726     };                                                          \
3727     TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a)
3728 
3729 DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd)
3730 DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub)
3731 DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul)
3732 DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin)
3733 DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax)
3734 DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum)
3735 DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum)
3736 DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd)
3737 DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn)
3738 DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv)
3739 DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx)
3740 
3741 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3742                                       TCGv_i64, TCGv_ptr, TCGv_i32);
3743 
3744 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3745                          TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3746 {
3747     unsigned vsz = vec_full_reg_size(s);
3748     TCGv_ptr t_zd, t_zn, t_pg, status;
3749     TCGv_i32 desc;
3750 
3751     t_zd = tcg_temp_new_ptr();
3752     t_zn = tcg_temp_new_ptr();
3753     t_pg = tcg_temp_new_ptr();
3754     tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, zd));
3755     tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, zn));
3756     tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
3757 
3758     status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
3759     desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
3760     fn(t_zd, t_zn, t_pg, scalar, status, desc);
3761 }
3762 
3763 static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3764                       gen_helper_sve_fp2scalar *fn)
3765 {
3766     if (fn == NULL) {
3767         return false;
3768     }
3769     if (sve_access_check(s)) {
3770         do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16,
3771                      tcg_constant_i64(imm), fn);
3772     }
3773     return true;
3774 }
3775 
3776 #define DO_FP_IMM(NAME, name, const0, const1)                           \
3777     static gen_helper_sve_fp2scalar * const name##_fns[4] = {           \
3778         NULL, gen_helper_sve_##name##_h,                                \
3779         gen_helper_sve_##name##_s,                                      \
3780         gen_helper_sve_##name##_d                                       \
3781     };                                                                  \
3782     static uint64_t const name##_const[4][2] = {                        \
3783         { -1, -1 },                                                     \
3784         { float16_##const0, float16_##const1 },                         \
3785         { float32_##const0, float32_##const1 },                         \
3786         { float64_##const0, float64_##const1 },                         \
3787     };                                                                  \
3788     TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a,                     \
3789                name##_const[a->esz][a->imm], name##_fns[a->esz])
3790 
3791 DO_FP_IMM(FADD, fadds, half, one)
3792 DO_FP_IMM(FSUB, fsubs, half, one)
3793 DO_FP_IMM(FMUL, fmuls, half, two)
3794 DO_FP_IMM(FSUBR, fsubrs, half, one)
3795 DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3796 DO_FP_IMM(FMINNM, fminnms, zero, one)
3797 DO_FP_IMM(FMAX, fmaxs, zero, one)
3798 DO_FP_IMM(FMIN, fmins, zero, one)
3799 
3800 #undef DO_FP_IMM
3801 
3802 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3803                       gen_helper_gvec_4_ptr *fn)
3804 {
3805     if (fn == NULL) {
3806         return false;
3807     }
3808     if (sve_access_check(s)) {
3809         unsigned vsz = vec_full_reg_size(s);
3810         TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3811         tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3812                            vec_full_reg_offset(s, a->rn),
3813                            vec_full_reg_offset(s, a->rm),
3814                            pred_full_reg_offset(s, a->pg),
3815                            status, vsz, vsz, 0, fn);
3816     }
3817     return true;
3818 }
3819 
3820 #define DO_FPCMP(NAME, name) \
3821     static gen_helper_gvec_4_ptr * const name##_fns[4] = {            \
3822         NULL, gen_helper_sve_##name##_h,                              \
3823         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d          \
3824     };                                                                \
3825     TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz])
3826 
3827 DO_FPCMP(FCMGE, fcmge)
3828 DO_FPCMP(FCMGT, fcmgt)
3829 DO_FPCMP(FCMEQ, fcmeq)
3830 DO_FPCMP(FCMNE, fcmne)
3831 DO_FPCMP(FCMUO, fcmuo)
3832 DO_FPCMP(FACGE, facge)
3833 DO_FPCMP(FACGT, facgt)
3834 
3835 #undef DO_FPCMP
3836 
3837 static gen_helper_gvec_4_ptr * const fcadd_fns[] = {
3838     NULL,                   gen_helper_sve_fcadd_h,
3839     gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d,
3840 };
3841 TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
3842            a->rd, a->rn, a->rm, a->pg, a->rot,
3843            a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3844 
3845 #define DO_FMLA(NAME, name) \
3846     static gen_helper_gvec_5_ptr * const name##_fns[4] = {              \
3847         NULL, gen_helper_sve_##name##_h,                                \
3848         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d            \
3849     };                                                                  \
3850     TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \
3851                a->rd, a->rn, a->rm, a->ra, a->pg, 0,                    \
3852                a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3853 
3854 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3855 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3856 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3857 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3858 
3859 #undef DO_FMLA
3860 
3861 static gen_helper_gvec_5_ptr * const fcmla_fns[4] = {
3862     NULL,                         gen_helper_sve_fcmla_zpzzz_h,
3863     gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d,
3864 };
3865 TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz],
3866            a->rd, a->rn, a->rm, a->ra, a->pg, a->rot,
3867            a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3868 
3869 static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = {
3870     NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL
3871 };
3872 TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz],
3873            a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot,
3874            a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3875 
3876 /*
3877  *** SVE Floating Point Unary Operations Predicated Group
3878  */
3879 
3880 TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
3881            gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR)
3882 TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
3883            gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR)
3884 
3885 TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
3886            gen_helper_sve_bfcvt, a, 0, FPST_FPCR)
3887 
3888 TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
3889            gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR)
3890 TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
3891            gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR)
3892 TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
3893            gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR)
3894 TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
3895            gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR)
3896 
3897 TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
3898            gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16)
3899 TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
3900            gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16)
3901 TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
3902            gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16)
3903 TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
3904            gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16)
3905 TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
3906            gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16)
3907 TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
3908            gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16)
3909 
3910 TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
3911            gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR)
3912 TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
3913            gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR)
3914 TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
3915            gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR)
3916 TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
3917            gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR)
3918 TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
3919            gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR)
3920 TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
3921            gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR)
3922 
3923 TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
3924            gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR)
3925 TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
3926            gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR)
3927 
3928 static gen_helper_gvec_3_ptr * const frint_fns[] = {
3929     NULL,
3930     gen_helper_sve_frint_h,
3931     gen_helper_sve_frint_s,
3932     gen_helper_sve_frint_d
3933 };
3934 TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz],
3935            a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3936 
3937 static gen_helper_gvec_3_ptr * const frintx_fns[] = {
3938     NULL,
3939     gen_helper_sve_frintx_h,
3940     gen_helper_sve_frintx_s,
3941     gen_helper_sve_frintx_d
3942 };
3943 TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz],
3944            a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3945 
3946 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
3947                           ARMFPRounding mode, gen_helper_gvec_3_ptr *fn)
3948 {
3949     unsigned vsz;
3950     TCGv_i32 tmode;
3951     TCGv_ptr status;
3952 
3953     if (fn == NULL) {
3954         return false;
3955     }
3956     if (!sve_access_check(s)) {
3957         return true;
3958     }
3959 
3960     vsz = vec_full_reg_size(s);
3961     status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3962     tmode = gen_set_rmode(mode, status);
3963 
3964     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3965                        vec_full_reg_offset(s, a->rn),
3966                        pred_full_reg_offset(s, a->pg),
3967                        status, vsz, vsz, 0, fn);
3968 
3969     gen_restore_rmode(tmode, status);
3970     return true;
3971 }
3972 
3973 TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a,
3974            FPROUNDING_TIEEVEN, frint_fns[a->esz])
3975 TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a,
3976            FPROUNDING_POSINF, frint_fns[a->esz])
3977 TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a,
3978            FPROUNDING_NEGINF, frint_fns[a->esz])
3979 TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a,
3980            FPROUNDING_ZERO, frint_fns[a->esz])
3981 TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a,
3982            FPROUNDING_TIEAWAY, frint_fns[a->esz])
3983 
3984 static gen_helper_gvec_3_ptr * const frecpx_fns[] = {
3985     NULL,                    gen_helper_sve_frecpx_h,
3986     gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d,
3987 };
3988 TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz],
3989            a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3990 
3991 static gen_helper_gvec_3_ptr * const fsqrt_fns[] = {
3992     NULL,                   gen_helper_sve_fsqrt_h,
3993     gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d,
3994 };
3995 TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz],
3996            a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3997 
3998 TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
3999            gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16)
4000 TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4001            gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16)
4002 TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4003            gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16)
4004 
4005 TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4006            gen_helper_sve_scvt_ss, a, 0, FPST_FPCR)
4007 TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4008            gen_helper_sve_scvt_ds, a, 0, FPST_FPCR)
4009 
4010 TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4011            gen_helper_sve_scvt_sd, a, 0, FPST_FPCR)
4012 TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4013            gen_helper_sve_scvt_dd, a, 0, FPST_FPCR)
4014 
4015 TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4016            gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16)
4017 TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4018            gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16)
4019 TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4020            gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16)
4021 
4022 TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4023            gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR)
4024 TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4025            gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR)
4026 TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4027            gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR)
4028 
4029 TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4030            gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR)
4031 
4032 /*
4033  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4034  */
4035 
4036 /* Subroutine loading a vector register at VOFS of LEN bytes.
4037  * The load should begin at the address Rn + IMM.
4038  */
4039 
4040 void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
4041                  int len, int rn, int imm)
4042 {
4043     int len_align = QEMU_ALIGN_DOWN(len, 16);
4044     int len_remain = len % 16;
4045     int nparts = len / 16 + ctpop8(len_remain);
4046     int midx = get_mem_index(s);
4047     TCGv_i64 dirty_addr, clean_addr, t0, t1;
4048     TCGv_i128 t16;
4049 
4050     dirty_addr = tcg_temp_new_i64();
4051     tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4052     clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
4053 
4054     /*
4055      * Note that unpredicated load/store of vector/predicate registers
4056      * are defined as a stream of bytes, which equates to little-endian
4057      * operations on larger quantities.
4058      * Attempt to keep code expansion to a minimum by limiting the
4059      * amount of unrolling done.
4060      */
4061     if (nparts <= 4) {
4062         int i;
4063 
4064         t0 = tcg_temp_new_i64();
4065         t1 = tcg_temp_new_i64();
4066         t16 = tcg_temp_new_i128();
4067 
4068         for (i = 0; i < len_align; i += 16) {
4069             tcg_gen_qemu_ld_i128(t16, clean_addr, midx,
4070                                  MO_LE | MO_128 | MO_ATOM_NONE);
4071             tcg_gen_extr_i128_i64(t0, t1, t16);
4072             tcg_gen_st_i64(t0, base, vofs + i);
4073             tcg_gen_st_i64(t1, base, vofs + i + 8);
4074             tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4075         }
4076     } else {
4077         TCGLabel *loop = gen_new_label();
4078         TCGv_ptr tp, i = tcg_temp_new_ptr();
4079 
4080         tcg_gen_movi_ptr(i, 0);
4081         gen_set_label(loop);
4082 
4083         t16 = tcg_temp_new_i128();
4084         tcg_gen_qemu_ld_i128(t16, clean_addr, midx,
4085                              MO_LE | MO_128 | MO_ATOM_NONE);
4086         tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4087 
4088         tp = tcg_temp_new_ptr();
4089         tcg_gen_add_ptr(tp, base, i);
4090         tcg_gen_addi_ptr(i, i, 16);
4091 
4092         t0 = tcg_temp_new_i64();
4093         t1 = tcg_temp_new_i64();
4094         tcg_gen_extr_i128_i64(t0, t1, t16);
4095 
4096         tcg_gen_st_i64(t0, tp, vofs);
4097         tcg_gen_st_i64(t1, tp, vofs + 8);
4098 
4099         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4100     }
4101 
4102     /*
4103      * Predicate register loads can be any multiple of 2.
4104      * Note that we still store the entire 64-bit unit into tcg_env.
4105      */
4106     if (len_remain >= 8) {
4107         t0 = tcg_temp_new_i64();
4108         tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE);
4109         tcg_gen_st_i64(t0, base, vofs + len_align);
4110         len_remain -= 8;
4111         len_align += 8;
4112         if (len_remain) {
4113             tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4114         }
4115     }
4116     if (len_remain) {
4117         t0 = tcg_temp_new_i64();
4118         switch (len_remain) {
4119         case 2:
4120         case 4:
4121         case 8:
4122             tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4123                                 MO_LE | ctz32(len_remain) | MO_ATOM_NONE);
4124             break;
4125 
4126         case 6:
4127             t1 = tcg_temp_new_i64();
4128             tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE);
4129             tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4130             tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW | MO_ATOM_NONE);
4131             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4132             break;
4133 
4134         default:
4135             g_assert_not_reached();
4136         }
4137         tcg_gen_st_i64(t0, base, vofs + len_align);
4138     }
4139 }
4140 
4141 /* Similarly for stores.  */
4142 void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
4143                  int len, int rn, int imm)
4144 {
4145     int len_align = QEMU_ALIGN_DOWN(len, 16);
4146     int len_remain = len % 16;
4147     int nparts = len / 16 + ctpop8(len_remain);
4148     int midx = get_mem_index(s);
4149     TCGv_i64 dirty_addr, clean_addr, t0, t1;
4150     TCGv_i128 t16;
4151 
4152     dirty_addr = tcg_temp_new_i64();
4153     tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4154     clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
4155 
4156     /* Note that unpredicated load/store of vector/predicate registers
4157      * are defined as a stream of bytes, which equates to little-endian
4158      * operations on larger quantities.  There is no nice way to force
4159      * a little-endian store for aarch64_be-linux-user out of line.
4160      *
4161      * Attempt to keep code expansion to a minimum by limiting the
4162      * amount of unrolling done.
4163      */
4164     if (nparts <= 4) {
4165         int i;
4166 
4167         t0 = tcg_temp_new_i64();
4168         t1 = tcg_temp_new_i64();
4169         t16 = tcg_temp_new_i128();
4170         for (i = 0; i < len_align; i += 16) {
4171             tcg_gen_ld_i64(t0, base, vofs + i);
4172             tcg_gen_ld_i64(t1, base, vofs + i + 8);
4173             tcg_gen_concat_i64_i128(t16, t0, t1);
4174             tcg_gen_qemu_st_i128(t16, clean_addr, midx,
4175                                  MO_LE | MO_128 | MO_ATOM_NONE);
4176             tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4177         }
4178     } else {
4179         TCGLabel *loop = gen_new_label();
4180         TCGv_ptr tp, i = tcg_temp_new_ptr();
4181 
4182         tcg_gen_movi_ptr(i, 0);
4183         gen_set_label(loop);
4184 
4185         t0 = tcg_temp_new_i64();
4186         t1 = tcg_temp_new_i64();
4187         tp = tcg_temp_new_ptr();
4188         tcg_gen_add_ptr(tp, base, i);
4189         tcg_gen_ld_i64(t0, tp, vofs);
4190         tcg_gen_ld_i64(t1, tp, vofs + 8);
4191         tcg_gen_addi_ptr(i, i, 16);
4192 
4193         t16 = tcg_temp_new_i128();
4194         tcg_gen_concat_i64_i128(t16, t0, t1);
4195 
4196         tcg_gen_qemu_st_i128(t16, clean_addr, midx,
4197                              MO_LE | MO_128 | MO_ATOM_NONE);
4198         tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4199 
4200         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4201     }
4202 
4203     /* Predicate register stores can be any multiple of 2.  */
4204     if (len_remain >= 8) {
4205         t0 = tcg_temp_new_i64();
4206         tcg_gen_ld_i64(t0, base, vofs + len_align);
4207         tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE);
4208         len_remain -= 8;
4209         len_align += 8;
4210         if (len_remain) {
4211             tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4212         }
4213     }
4214     if (len_remain) {
4215         t0 = tcg_temp_new_i64();
4216         tcg_gen_ld_i64(t0, base, vofs + len_align);
4217 
4218         switch (len_remain) {
4219         case 2:
4220         case 4:
4221         case 8:
4222             tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4223                                 MO_LE | ctz32(len_remain) | MO_ATOM_NONE);
4224             break;
4225 
4226         case 6:
4227             tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE);
4228             tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4229             tcg_gen_shri_i64(t0, t0, 32);
4230             tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW | MO_ATOM_NONE);
4231             break;
4232 
4233         default:
4234             g_assert_not_reached();
4235         }
4236     }
4237 }
4238 
4239 static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
4240 {
4241     if (!dc_isar_feature(aa64_sve, s)) {
4242         return false;
4243     }
4244     if (sve_access_check(s)) {
4245         int size = vec_full_reg_size(s);
4246         int off = vec_full_reg_offset(s, a->rd);
4247         gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size);
4248     }
4249     return true;
4250 }
4251 
4252 static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
4253 {
4254     if (!dc_isar_feature(aa64_sve, s)) {
4255         return false;
4256     }
4257     if (sve_access_check(s)) {
4258         int size = pred_full_reg_size(s);
4259         int off = pred_full_reg_offset(s, a->rd);
4260         gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size);
4261     }
4262     return true;
4263 }
4264 
4265 static bool trans_STR_zri(DisasContext *s, arg_rri *a)
4266 {
4267     if (!dc_isar_feature(aa64_sve, s)) {
4268         return false;
4269     }
4270     if (sve_access_check(s)) {
4271         int size = vec_full_reg_size(s);
4272         int off = vec_full_reg_offset(s, a->rd);
4273         gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size);
4274     }
4275     return true;
4276 }
4277 
4278 static bool trans_STR_pri(DisasContext *s, arg_rri *a)
4279 {
4280     if (!dc_isar_feature(aa64_sve, s)) {
4281         return false;
4282     }
4283     if (sve_access_check(s)) {
4284         int size = pred_full_reg_size(s);
4285         int off = pred_full_reg_offset(s, a->rd);
4286         gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size);
4287     }
4288     return true;
4289 }
4290 
4291 /*
4292  *** SVE Memory - Contiguous Load Group
4293  */
4294 
4295 /* The memory mode of the dtype.  */
4296 static const MemOp dtype_mop[16] = {
4297     MO_UB, MO_UB, MO_UB, MO_UB,
4298     MO_SL, MO_UW, MO_UW, MO_UW,
4299     MO_SW, MO_SW, MO_UL, MO_UL,
4300     MO_SB, MO_SB, MO_SB, MO_UQ
4301 };
4302 
4303 #define dtype_msz(x)  (dtype_mop[x] & MO_SIZE)
4304 
4305 /* The vector element size of dtype.  */
4306 static const uint8_t dtype_esz[16] = {
4307     0, 1, 2, 3,
4308     3, 1, 2, 3,
4309     3, 2, 2, 3,
4310     3, 2, 1, 3
4311 };
4312 
4313 uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs,
4314                           uint32_t msz, bool is_write, uint32_t data)
4315 {
4316     uint32_t sizem1;
4317     uint32_t desc = 0;
4318 
4319     /* Assert all of the data fits, with or without MTE enabled. */
4320     assert(nregs >= 1 && nregs <= 4);
4321     sizem1 = (nregs << msz) - 1;
4322     assert(sizem1 <= R_MTEDESC_SIZEM1_MASK >> R_MTEDESC_SIZEM1_SHIFT);
4323     assert(data < 1u << SVE_MTEDESC_SHIFT);
4324 
4325     if (s->mte_active[0]) {
4326         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
4327         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4328         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4329         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
4330         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, sizem1);
4331         desc <<= SVE_MTEDESC_SHIFT;
4332     }
4333     return simd_desc(vsz, vsz, desc | data);
4334 }
4335 
4336 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4337                        int dtype, uint32_t nregs, bool is_write,
4338                        gen_helper_gvec_mem *fn)
4339 {
4340     TCGv_ptr t_pg;
4341     uint32_t desc;
4342 
4343     if (!s->mte_active[0]) {
4344         addr = clean_data_tbi(s, addr);
4345     }
4346 
4347     /*
4348      * For e.g. LD4, there are not enough arguments to pass all 4
4349      * registers as pointers, so encode the regno into the data field.
4350      * For consistency, do this even for LD1.
4351      */
4352     desc = make_svemte_desc(s, vec_full_reg_size(s), nregs,
4353                             dtype_msz(dtype), is_write, zt);
4354     t_pg = tcg_temp_new_ptr();
4355 
4356     tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
4357     fn(tcg_env, t_pg, addr, tcg_constant_i32(desc));
4358 }
4359 
4360 /* Indexed by [mte][be][dtype][nreg] */
4361 static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
4362     { /* mte inactive, little-endian */
4363       { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4364           gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4365         { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4366         { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4367         { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4368 
4369         { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4370         { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4371           gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4372         { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4373         { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4374 
4375         { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4376         { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4377         { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4378           gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4379         { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4380 
4381         { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4382         { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4383         { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4384         { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4385           gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4386 
4387       /* mte inactive, big-endian */
4388       { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4389           gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4390         { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4391         { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4392         { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4393 
4394         { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4395         { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4396           gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4397         { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4398         { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4399 
4400         { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4401         { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4402         { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4403           gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4404         { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4405 
4406         { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4407         { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4408         { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4409         { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4410           gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
4411 
4412     { /* mte active, little-endian */
4413       { { gen_helper_sve_ld1bb_r_mte,
4414           gen_helper_sve_ld2bb_r_mte,
4415           gen_helper_sve_ld3bb_r_mte,
4416           gen_helper_sve_ld4bb_r_mte },
4417         { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4418         { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4419         { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4420 
4421         { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
4422         { gen_helper_sve_ld1hh_le_r_mte,
4423           gen_helper_sve_ld2hh_le_r_mte,
4424           gen_helper_sve_ld3hh_le_r_mte,
4425           gen_helper_sve_ld4hh_le_r_mte },
4426         { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
4427         { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
4428 
4429         { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
4430         { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
4431         { gen_helper_sve_ld1ss_le_r_mte,
4432           gen_helper_sve_ld2ss_le_r_mte,
4433           gen_helper_sve_ld3ss_le_r_mte,
4434           gen_helper_sve_ld4ss_le_r_mte },
4435         { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
4436 
4437         { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4438         { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4439         { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4440         { gen_helper_sve_ld1dd_le_r_mte,
4441           gen_helper_sve_ld2dd_le_r_mte,
4442           gen_helper_sve_ld3dd_le_r_mte,
4443           gen_helper_sve_ld4dd_le_r_mte } },
4444 
4445       /* mte active, big-endian */
4446       { { gen_helper_sve_ld1bb_r_mte,
4447           gen_helper_sve_ld2bb_r_mte,
4448           gen_helper_sve_ld3bb_r_mte,
4449           gen_helper_sve_ld4bb_r_mte },
4450         { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4451         { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4452         { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4453 
4454         { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
4455         { gen_helper_sve_ld1hh_be_r_mte,
4456           gen_helper_sve_ld2hh_be_r_mte,
4457           gen_helper_sve_ld3hh_be_r_mte,
4458           gen_helper_sve_ld4hh_be_r_mte },
4459         { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
4460         { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
4461 
4462         { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
4463         { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
4464         { gen_helper_sve_ld1ss_be_r_mte,
4465           gen_helper_sve_ld2ss_be_r_mte,
4466           gen_helper_sve_ld3ss_be_r_mte,
4467           gen_helper_sve_ld4ss_be_r_mte },
4468         { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
4469 
4470         { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4471         { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4472         { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4473         { gen_helper_sve_ld1dd_be_r_mte,
4474           gen_helper_sve_ld2dd_be_r_mte,
4475           gen_helper_sve_ld3dd_be_r_mte,
4476           gen_helper_sve_ld4dd_be_r_mte } } },
4477 };
4478 
4479 static void do_ld_zpa(DisasContext *s, int zt, int pg,
4480                       TCGv_i64 addr, int dtype, int nreg)
4481 {
4482     gen_helper_gvec_mem *fn
4483         = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
4484 
4485     /*
4486      * While there are holes in the table, they are not
4487      * accessible via the instruction encoding.
4488      */
4489     assert(fn != NULL);
4490     do_mem_zpa(s, zt, pg, addr, dtype, nreg + 1, false, fn);
4491 }
4492 
4493 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
4494 {
4495     if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) {
4496         return false;
4497     }
4498     if (sve_access_check(s)) {
4499         TCGv_i64 addr = tcg_temp_new_i64();
4500         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4501         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4502         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4503     }
4504     return true;
4505 }
4506 
4507 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
4508 {
4509     if (!dc_isar_feature(aa64_sve, s)) {
4510         return false;
4511     }
4512     if (sve_access_check(s)) {
4513         int vsz = vec_full_reg_size(s);
4514         int elements = vsz >> dtype_esz[a->dtype];
4515         TCGv_i64 addr = tcg_temp_new_i64();
4516 
4517         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4518                          (a->imm * elements * (a->nreg + 1))
4519                          << dtype_msz(a->dtype));
4520         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4521     }
4522     return true;
4523 }
4524 
4525 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
4526 {
4527     static gen_helper_gvec_mem * const fns[2][2][16] = {
4528         { /* mte inactive, little-endian */
4529           { gen_helper_sve_ldff1bb_r,
4530             gen_helper_sve_ldff1bhu_r,
4531             gen_helper_sve_ldff1bsu_r,
4532             gen_helper_sve_ldff1bdu_r,
4533 
4534             gen_helper_sve_ldff1sds_le_r,
4535             gen_helper_sve_ldff1hh_le_r,
4536             gen_helper_sve_ldff1hsu_le_r,
4537             gen_helper_sve_ldff1hdu_le_r,
4538 
4539             gen_helper_sve_ldff1hds_le_r,
4540             gen_helper_sve_ldff1hss_le_r,
4541             gen_helper_sve_ldff1ss_le_r,
4542             gen_helper_sve_ldff1sdu_le_r,
4543 
4544             gen_helper_sve_ldff1bds_r,
4545             gen_helper_sve_ldff1bss_r,
4546             gen_helper_sve_ldff1bhs_r,
4547             gen_helper_sve_ldff1dd_le_r },
4548 
4549           /* mte inactive, big-endian */
4550           { gen_helper_sve_ldff1bb_r,
4551             gen_helper_sve_ldff1bhu_r,
4552             gen_helper_sve_ldff1bsu_r,
4553             gen_helper_sve_ldff1bdu_r,
4554 
4555             gen_helper_sve_ldff1sds_be_r,
4556             gen_helper_sve_ldff1hh_be_r,
4557             gen_helper_sve_ldff1hsu_be_r,
4558             gen_helper_sve_ldff1hdu_be_r,
4559 
4560             gen_helper_sve_ldff1hds_be_r,
4561             gen_helper_sve_ldff1hss_be_r,
4562             gen_helper_sve_ldff1ss_be_r,
4563             gen_helper_sve_ldff1sdu_be_r,
4564 
4565             gen_helper_sve_ldff1bds_r,
4566             gen_helper_sve_ldff1bss_r,
4567             gen_helper_sve_ldff1bhs_r,
4568             gen_helper_sve_ldff1dd_be_r } },
4569 
4570         { /* mte active, little-endian */
4571           { gen_helper_sve_ldff1bb_r_mte,
4572             gen_helper_sve_ldff1bhu_r_mte,
4573             gen_helper_sve_ldff1bsu_r_mte,
4574             gen_helper_sve_ldff1bdu_r_mte,
4575 
4576             gen_helper_sve_ldff1sds_le_r_mte,
4577             gen_helper_sve_ldff1hh_le_r_mte,
4578             gen_helper_sve_ldff1hsu_le_r_mte,
4579             gen_helper_sve_ldff1hdu_le_r_mte,
4580 
4581             gen_helper_sve_ldff1hds_le_r_mte,
4582             gen_helper_sve_ldff1hss_le_r_mte,
4583             gen_helper_sve_ldff1ss_le_r_mte,
4584             gen_helper_sve_ldff1sdu_le_r_mte,
4585 
4586             gen_helper_sve_ldff1bds_r_mte,
4587             gen_helper_sve_ldff1bss_r_mte,
4588             gen_helper_sve_ldff1bhs_r_mte,
4589             gen_helper_sve_ldff1dd_le_r_mte },
4590 
4591           /* mte active, big-endian */
4592           { gen_helper_sve_ldff1bb_r_mte,
4593             gen_helper_sve_ldff1bhu_r_mte,
4594             gen_helper_sve_ldff1bsu_r_mte,
4595             gen_helper_sve_ldff1bdu_r_mte,
4596 
4597             gen_helper_sve_ldff1sds_be_r_mte,
4598             gen_helper_sve_ldff1hh_be_r_mte,
4599             gen_helper_sve_ldff1hsu_be_r_mte,
4600             gen_helper_sve_ldff1hdu_be_r_mte,
4601 
4602             gen_helper_sve_ldff1hds_be_r_mte,
4603             gen_helper_sve_ldff1hss_be_r_mte,
4604             gen_helper_sve_ldff1ss_be_r_mte,
4605             gen_helper_sve_ldff1sdu_be_r_mte,
4606 
4607             gen_helper_sve_ldff1bds_r_mte,
4608             gen_helper_sve_ldff1bss_r_mte,
4609             gen_helper_sve_ldff1bhs_r_mte,
4610             gen_helper_sve_ldff1dd_be_r_mte } },
4611     };
4612 
4613     if (!dc_isar_feature(aa64_sve, s)) {
4614         return false;
4615     }
4616     s->is_nonstreaming = true;
4617     if (sve_access_check(s)) {
4618         TCGv_i64 addr = tcg_temp_new_i64();
4619         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4620         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4621         do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4622                    fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
4623     }
4624     return true;
4625 }
4626 
4627 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
4628 {
4629     static gen_helper_gvec_mem * const fns[2][2][16] = {
4630         { /* mte inactive, little-endian */
4631           { gen_helper_sve_ldnf1bb_r,
4632             gen_helper_sve_ldnf1bhu_r,
4633             gen_helper_sve_ldnf1bsu_r,
4634             gen_helper_sve_ldnf1bdu_r,
4635 
4636             gen_helper_sve_ldnf1sds_le_r,
4637             gen_helper_sve_ldnf1hh_le_r,
4638             gen_helper_sve_ldnf1hsu_le_r,
4639             gen_helper_sve_ldnf1hdu_le_r,
4640 
4641             gen_helper_sve_ldnf1hds_le_r,
4642             gen_helper_sve_ldnf1hss_le_r,
4643             gen_helper_sve_ldnf1ss_le_r,
4644             gen_helper_sve_ldnf1sdu_le_r,
4645 
4646             gen_helper_sve_ldnf1bds_r,
4647             gen_helper_sve_ldnf1bss_r,
4648             gen_helper_sve_ldnf1bhs_r,
4649             gen_helper_sve_ldnf1dd_le_r },
4650 
4651           /* mte inactive, big-endian */
4652           { gen_helper_sve_ldnf1bb_r,
4653             gen_helper_sve_ldnf1bhu_r,
4654             gen_helper_sve_ldnf1bsu_r,
4655             gen_helper_sve_ldnf1bdu_r,
4656 
4657             gen_helper_sve_ldnf1sds_be_r,
4658             gen_helper_sve_ldnf1hh_be_r,
4659             gen_helper_sve_ldnf1hsu_be_r,
4660             gen_helper_sve_ldnf1hdu_be_r,
4661 
4662             gen_helper_sve_ldnf1hds_be_r,
4663             gen_helper_sve_ldnf1hss_be_r,
4664             gen_helper_sve_ldnf1ss_be_r,
4665             gen_helper_sve_ldnf1sdu_be_r,
4666 
4667             gen_helper_sve_ldnf1bds_r,
4668             gen_helper_sve_ldnf1bss_r,
4669             gen_helper_sve_ldnf1bhs_r,
4670             gen_helper_sve_ldnf1dd_be_r } },
4671 
4672         { /* mte inactive, little-endian */
4673           { gen_helper_sve_ldnf1bb_r_mte,
4674             gen_helper_sve_ldnf1bhu_r_mte,
4675             gen_helper_sve_ldnf1bsu_r_mte,
4676             gen_helper_sve_ldnf1bdu_r_mte,
4677 
4678             gen_helper_sve_ldnf1sds_le_r_mte,
4679             gen_helper_sve_ldnf1hh_le_r_mte,
4680             gen_helper_sve_ldnf1hsu_le_r_mte,
4681             gen_helper_sve_ldnf1hdu_le_r_mte,
4682 
4683             gen_helper_sve_ldnf1hds_le_r_mte,
4684             gen_helper_sve_ldnf1hss_le_r_mte,
4685             gen_helper_sve_ldnf1ss_le_r_mte,
4686             gen_helper_sve_ldnf1sdu_le_r_mte,
4687 
4688             gen_helper_sve_ldnf1bds_r_mte,
4689             gen_helper_sve_ldnf1bss_r_mte,
4690             gen_helper_sve_ldnf1bhs_r_mte,
4691             gen_helper_sve_ldnf1dd_le_r_mte },
4692 
4693           /* mte inactive, big-endian */
4694           { gen_helper_sve_ldnf1bb_r_mte,
4695             gen_helper_sve_ldnf1bhu_r_mte,
4696             gen_helper_sve_ldnf1bsu_r_mte,
4697             gen_helper_sve_ldnf1bdu_r_mte,
4698 
4699             gen_helper_sve_ldnf1sds_be_r_mte,
4700             gen_helper_sve_ldnf1hh_be_r_mte,
4701             gen_helper_sve_ldnf1hsu_be_r_mte,
4702             gen_helper_sve_ldnf1hdu_be_r_mte,
4703 
4704             gen_helper_sve_ldnf1hds_be_r_mte,
4705             gen_helper_sve_ldnf1hss_be_r_mte,
4706             gen_helper_sve_ldnf1ss_be_r_mte,
4707             gen_helper_sve_ldnf1sdu_be_r_mte,
4708 
4709             gen_helper_sve_ldnf1bds_r_mte,
4710             gen_helper_sve_ldnf1bss_r_mte,
4711             gen_helper_sve_ldnf1bhs_r_mte,
4712             gen_helper_sve_ldnf1dd_be_r_mte } },
4713     };
4714 
4715     if (!dc_isar_feature(aa64_sve, s)) {
4716         return false;
4717     }
4718     s->is_nonstreaming = true;
4719     if (sve_access_check(s)) {
4720         int vsz = vec_full_reg_size(s);
4721         int elements = vsz >> dtype_esz[a->dtype];
4722         int off = (a->imm * elements) << dtype_msz(a->dtype);
4723         TCGv_i64 addr = tcg_temp_new_i64();
4724 
4725         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4726         do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4727                    fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
4728     }
4729     return true;
4730 }
4731 
4732 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
4733 {
4734     unsigned vsz = vec_full_reg_size(s);
4735     TCGv_ptr t_pg;
4736     int poff;
4737     uint32_t desc;
4738 
4739     /* Load the first quadword using the normal predicated load helpers.  */
4740     if (!s->mte_active[0]) {
4741         addr = clean_data_tbi(s, addr);
4742     }
4743 
4744     poff = pred_full_reg_offset(s, pg);
4745     if (vsz > 16) {
4746         /*
4747          * Zero-extend the first 16 bits of the predicate into a temporary.
4748          * This avoids triggering an assert making sure we don't have bits
4749          * set within a predicate beyond VQ, but we have lowered VQ to 1
4750          * for this load operation.
4751          */
4752         TCGv_i64 tmp = tcg_temp_new_i64();
4753 #if HOST_BIG_ENDIAN
4754         poff += 6;
4755 #endif
4756         tcg_gen_ld16u_i64(tmp, tcg_env, poff);
4757 
4758         poff = offsetof(CPUARMState, vfp.preg_tmp);
4759         tcg_gen_st_i64(tmp, tcg_env, poff);
4760     }
4761 
4762     t_pg = tcg_temp_new_ptr();
4763     tcg_gen_addi_ptr(t_pg, tcg_env, poff);
4764 
4765     gen_helper_gvec_mem *fn
4766         = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
4767     desc = make_svemte_desc(s, 16, 1, dtype_msz(dtype), false, zt);
4768     fn(tcg_env, t_pg, addr, tcg_constant_i32(desc));
4769 
4770     /* Replicate that first quadword.  */
4771     if (vsz > 16) {
4772         int doff = vec_full_reg_offset(s, zt);
4773         tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
4774     }
4775 }
4776 
4777 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
4778 {
4779     if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) {
4780         return false;
4781     }
4782     if (sve_access_check(s)) {
4783         int msz = dtype_msz(a->dtype);
4784         TCGv_i64 addr = tcg_temp_new_i64();
4785         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4786         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4787         do_ldrq(s, a->rd, a->pg, addr, a->dtype);
4788     }
4789     return true;
4790 }
4791 
4792 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
4793 {
4794     if (!dc_isar_feature(aa64_sve, s)) {
4795         return false;
4796     }
4797     if (sve_access_check(s)) {
4798         TCGv_i64 addr = tcg_temp_new_i64();
4799         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4800         do_ldrq(s, a->rd, a->pg, addr, a->dtype);
4801     }
4802     return true;
4803 }
4804 
4805 static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
4806 {
4807     unsigned vsz = vec_full_reg_size(s);
4808     unsigned vsz_r32;
4809     TCGv_ptr t_pg;
4810     int poff, doff;
4811     uint32_t desc;
4812 
4813     if (vsz < 32) {
4814         /*
4815          * Note that this UNDEFINED check comes after CheckSVEEnabled()
4816          * in the ARM pseudocode, which is the sve_access_check() done
4817          * in our caller.  We should not now return false from the caller.
4818          */
4819         unallocated_encoding(s);
4820         return;
4821     }
4822 
4823     /* Load the first octaword using the normal predicated load helpers.  */
4824     if (!s->mte_active[0]) {
4825         addr = clean_data_tbi(s, addr);
4826     }
4827 
4828     poff = pred_full_reg_offset(s, pg);
4829     if (vsz > 32) {
4830         /*
4831          * Zero-extend the first 32 bits of the predicate into a temporary.
4832          * This avoids triggering an assert making sure we don't have bits
4833          * set within a predicate beyond VQ, but we have lowered VQ to 2
4834          * for this load operation.
4835          */
4836         TCGv_i64 tmp = tcg_temp_new_i64();
4837 #if HOST_BIG_ENDIAN
4838         poff += 4;
4839 #endif
4840         tcg_gen_ld32u_i64(tmp, tcg_env, poff);
4841 
4842         poff = offsetof(CPUARMState, vfp.preg_tmp);
4843         tcg_gen_st_i64(tmp, tcg_env, poff);
4844     }
4845 
4846     t_pg = tcg_temp_new_ptr();
4847     tcg_gen_addi_ptr(t_pg, tcg_env, poff);
4848 
4849     gen_helper_gvec_mem *fn
4850         = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
4851     desc = make_svemte_desc(s, 32, 1, dtype_msz(dtype), false, zt);
4852     fn(tcg_env, t_pg, addr, tcg_constant_i32(desc));
4853 
4854     /*
4855      * Replicate that first octaword.
4856      * The replication happens in units of 32; if the full vector size
4857      * is not a multiple of 32, the final bits are zeroed.
4858      */
4859     doff = vec_full_reg_offset(s, zt);
4860     vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
4861     if (vsz >= 64) {
4862         tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
4863     }
4864     vsz -= vsz_r32;
4865     if (vsz) {
4866         tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
4867     }
4868 }
4869 
4870 static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
4871 {
4872     if (!dc_isar_feature(aa64_sve_f64mm, s)) {
4873         return false;
4874     }
4875     if (a->rm == 31) {
4876         return false;
4877     }
4878     s->is_nonstreaming = true;
4879     if (sve_access_check(s)) {
4880         TCGv_i64 addr = tcg_temp_new_i64();
4881         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4882         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4883         do_ldro(s, a->rd, a->pg, addr, a->dtype);
4884     }
4885     return true;
4886 }
4887 
4888 static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
4889 {
4890     if (!dc_isar_feature(aa64_sve_f64mm, s)) {
4891         return false;
4892     }
4893     s->is_nonstreaming = true;
4894     if (sve_access_check(s)) {
4895         TCGv_i64 addr = tcg_temp_new_i64();
4896         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
4897         do_ldro(s, a->rd, a->pg, addr, a->dtype);
4898     }
4899     return true;
4900 }
4901 
4902 /* Load and broadcast element.  */
4903 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
4904 {
4905     unsigned vsz = vec_full_reg_size(s);
4906     unsigned psz = pred_full_reg_size(s);
4907     unsigned esz = dtype_esz[a->dtype];
4908     unsigned msz = dtype_msz(a->dtype);
4909     TCGLabel *over;
4910     TCGv_i64 temp, clean_addr;
4911     MemOp memop;
4912 
4913     if (!dc_isar_feature(aa64_sve, s)) {
4914         return false;
4915     }
4916     if (!sve_access_check(s)) {
4917         return true;
4918     }
4919 
4920     over = gen_new_label();
4921 
4922     /* If the guarding predicate has no bits set, no load occurs.  */
4923     if (psz <= 8) {
4924         /* Reduce the pred_esz_masks value simply to reduce the
4925          * size of the code generated here.
4926          */
4927         uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4928         temp = tcg_temp_new_i64();
4929         tcg_gen_ld_i64(temp, tcg_env, pred_full_reg_offset(s, a->pg));
4930         tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4931         tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4932     } else {
4933         TCGv_i32 t32 = tcg_temp_new_i32();
4934         find_last_active(s, t32, esz, a->pg);
4935         tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4936     }
4937 
4938     /* Load the data.  */
4939     temp = tcg_temp_new_i64();
4940     tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4941 
4942     memop = finalize_memop(s, dtype_mop[a->dtype]);
4943     clean_addr = gen_mte_check1(s, temp, false, true, memop);
4944     tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), memop);
4945 
4946     /* Broadcast to *all* elements.  */
4947     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4948                          vsz, vsz, temp);
4949 
4950     /* Zero the inactive elements.  */
4951     gen_set_label(over);
4952     return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
4953 }
4954 
4955 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4956                       int msz, int esz, int nreg)
4957 {
4958     static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
4959         { { { gen_helper_sve_st1bb_r,
4960               gen_helper_sve_st1bh_r,
4961               gen_helper_sve_st1bs_r,
4962               gen_helper_sve_st1bd_r },
4963             { NULL,
4964               gen_helper_sve_st1hh_le_r,
4965               gen_helper_sve_st1hs_le_r,
4966               gen_helper_sve_st1hd_le_r },
4967             { NULL, NULL,
4968               gen_helper_sve_st1ss_le_r,
4969               gen_helper_sve_st1sd_le_r },
4970             { NULL, NULL, NULL,
4971               gen_helper_sve_st1dd_le_r } },
4972           { { gen_helper_sve_st1bb_r,
4973               gen_helper_sve_st1bh_r,
4974               gen_helper_sve_st1bs_r,
4975               gen_helper_sve_st1bd_r },
4976             { NULL,
4977               gen_helper_sve_st1hh_be_r,
4978               gen_helper_sve_st1hs_be_r,
4979               gen_helper_sve_st1hd_be_r },
4980             { NULL, NULL,
4981               gen_helper_sve_st1ss_be_r,
4982               gen_helper_sve_st1sd_be_r },
4983             { NULL, NULL, NULL,
4984               gen_helper_sve_st1dd_be_r } } },
4985 
4986         { { { gen_helper_sve_st1bb_r_mte,
4987               gen_helper_sve_st1bh_r_mte,
4988               gen_helper_sve_st1bs_r_mte,
4989               gen_helper_sve_st1bd_r_mte },
4990             { NULL,
4991               gen_helper_sve_st1hh_le_r_mte,
4992               gen_helper_sve_st1hs_le_r_mte,
4993               gen_helper_sve_st1hd_le_r_mte },
4994             { NULL, NULL,
4995               gen_helper_sve_st1ss_le_r_mte,
4996               gen_helper_sve_st1sd_le_r_mte },
4997             { NULL, NULL, NULL,
4998               gen_helper_sve_st1dd_le_r_mte } },
4999           { { gen_helper_sve_st1bb_r_mte,
5000               gen_helper_sve_st1bh_r_mte,
5001               gen_helper_sve_st1bs_r_mte,
5002               gen_helper_sve_st1bd_r_mte },
5003             { NULL,
5004               gen_helper_sve_st1hh_be_r_mte,
5005               gen_helper_sve_st1hs_be_r_mte,
5006               gen_helper_sve_st1hd_be_r_mte },
5007             { NULL, NULL,
5008               gen_helper_sve_st1ss_be_r_mte,
5009               gen_helper_sve_st1sd_be_r_mte },
5010             { NULL, NULL, NULL,
5011               gen_helper_sve_st1dd_be_r_mte } } },
5012     };
5013     static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5014         { { { gen_helper_sve_st2bb_r,
5015               gen_helper_sve_st2hh_le_r,
5016               gen_helper_sve_st2ss_le_r,
5017               gen_helper_sve_st2dd_le_r },
5018             { gen_helper_sve_st3bb_r,
5019               gen_helper_sve_st3hh_le_r,
5020               gen_helper_sve_st3ss_le_r,
5021               gen_helper_sve_st3dd_le_r },
5022             { gen_helper_sve_st4bb_r,
5023               gen_helper_sve_st4hh_le_r,
5024               gen_helper_sve_st4ss_le_r,
5025               gen_helper_sve_st4dd_le_r } },
5026           { { gen_helper_sve_st2bb_r,
5027               gen_helper_sve_st2hh_be_r,
5028               gen_helper_sve_st2ss_be_r,
5029               gen_helper_sve_st2dd_be_r },
5030             { gen_helper_sve_st3bb_r,
5031               gen_helper_sve_st3hh_be_r,
5032               gen_helper_sve_st3ss_be_r,
5033               gen_helper_sve_st3dd_be_r },
5034             { gen_helper_sve_st4bb_r,
5035               gen_helper_sve_st4hh_be_r,
5036               gen_helper_sve_st4ss_be_r,
5037               gen_helper_sve_st4dd_be_r } } },
5038         { { { gen_helper_sve_st2bb_r_mte,
5039               gen_helper_sve_st2hh_le_r_mte,
5040               gen_helper_sve_st2ss_le_r_mte,
5041               gen_helper_sve_st2dd_le_r_mte },
5042             { gen_helper_sve_st3bb_r_mte,
5043               gen_helper_sve_st3hh_le_r_mte,
5044               gen_helper_sve_st3ss_le_r_mte,
5045               gen_helper_sve_st3dd_le_r_mte },
5046             { gen_helper_sve_st4bb_r_mte,
5047               gen_helper_sve_st4hh_le_r_mte,
5048               gen_helper_sve_st4ss_le_r_mte,
5049               gen_helper_sve_st4dd_le_r_mte } },
5050           { { gen_helper_sve_st2bb_r_mte,
5051               gen_helper_sve_st2hh_be_r_mte,
5052               gen_helper_sve_st2ss_be_r_mte,
5053               gen_helper_sve_st2dd_be_r_mte },
5054             { gen_helper_sve_st3bb_r_mte,
5055               gen_helper_sve_st3hh_be_r_mte,
5056               gen_helper_sve_st3ss_be_r_mte,
5057               gen_helper_sve_st3dd_be_r_mte },
5058             { gen_helper_sve_st4bb_r_mte,
5059               gen_helper_sve_st4hh_be_r_mte,
5060               gen_helper_sve_st4ss_be_r_mte,
5061               gen_helper_sve_st4dd_be_r_mte } } },
5062     };
5063     gen_helper_gvec_mem *fn;
5064     int be = s->be_data == MO_BE;
5065 
5066     if (nreg == 0) {
5067         /* ST1 */
5068         fn = fn_single[s->mte_active[0]][be][msz][esz];
5069     } else {
5070         /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5071         assert(msz == esz);
5072         fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
5073     }
5074     assert(fn != NULL);
5075     do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg + 1, true, fn);
5076 }
5077 
5078 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
5079 {
5080     if (!dc_isar_feature(aa64_sve, s)) {
5081         return false;
5082     }
5083     if (a->rm == 31 || a->msz > a->esz) {
5084         return false;
5085     }
5086     if (sve_access_check(s)) {
5087         TCGv_i64 addr = tcg_temp_new_i64();
5088         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
5089         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5090         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5091     }
5092     return true;
5093 }
5094 
5095 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
5096 {
5097     if (!dc_isar_feature(aa64_sve, s)) {
5098         return false;
5099     }
5100     if (a->msz > a->esz) {
5101         return false;
5102     }
5103     if (sve_access_check(s)) {
5104         int vsz = vec_full_reg_size(s);
5105         int elements = vsz >> a->esz;
5106         TCGv_i64 addr = tcg_temp_new_i64();
5107 
5108         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5109                          (a->imm * elements * (a->nreg + 1)) << a->msz);
5110         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5111     }
5112     return true;
5113 }
5114 
5115 /*
5116  *** SVE gather loads / scatter stores
5117  */
5118 
5119 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
5120                        int scale, TCGv_i64 scalar, int msz, bool is_write,
5121                        gen_helper_gvec_mem_scatter *fn)
5122 {
5123     TCGv_ptr t_zm = tcg_temp_new_ptr();
5124     TCGv_ptr t_pg = tcg_temp_new_ptr();
5125     TCGv_ptr t_zt = tcg_temp_new_ptr();
5126     uint32_t desc;
5127 
5128     tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
5129     tcg_gen_addi_ptr(t_zm, tcg_env, vec_full_reg_offset(s, zm));
5130     tcg_gen_addi_ptr(t_zt, tcg_env, vec_full_reg_offset(s, zt));
5131 
5132     desc = make_svemte_desc(s, vec_full_reg_size(s), 1, msz, is_write, scale);
5133     fn(tcg_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc));
5134 }
5135 
5136 /* Indexed by [mte][be][ff][xs][u][msz].  */
5137 static gen_helper_gvec_mem_scatter * const
5138 gather_load_fn32[2][2][2][2][2][3] = {
5139     { /* MTE Inactive */
5140         { /* Little-endian */
5141             { { { gen_helper_sve_ldbss_zsu,
5142                   gen_helper_sve_ldhss_le_zsu,
5143                   NULL, },
5144                 { gen_helper_sve_ldbsu_zsu,
5145                   gen_helper_sve_ldhsu_le_zsu,
5146                   gen_helper_sve_ldss_le_zsu, } },
5147               { { gen_helper_sve_ldbss_zss,
5148                   gen_helper_sve_ldhss_le_zss,
5149                   NULL, },
5150                 { gen_helper_sve_ldbsu_zss,
5151                   gen_helper_sve_ldhsu_le_zss,
5152                   gen_helper_sve_ldss_le_zss, } } },
5153 
5154             /* First-fault */
5155             { { { gen_helper_sve_ldffbss_zsu,
5156                   gen_helper_sve_ldffhss_le_zsu,
5157                   NULL, },
5158                 { gen_helper_sve_ldffbsu_zsu,
5159                   gen_helper_sve_ldffhsu_le_zsu,
5160                   gen_helper_sve_ldffss_le_zsu, } },
5161               { { gen_helper_sve_ldffbss_zss,
5162                   gen_helper_sve_ldffhss_le_zss,
5163                   NULL, },
5164                 { gen_helper_sve_ldffbsu_zss,
5165                   gen_helper_sve_ldffhsu_le_zss,
5166                   gen_helper_sve_ldffss_le_zss, } } } },
5167 
5168         { /* Big-endian */
5169             { { { gen_helper_sve_ldbss_zsu,
5170                   gen_helper_sve_ldhss_be_zsu,
5171                   NULL, },
5172                 { gen_helper_sve_ldbsu_zsu,
5173                   gen_helper_sve_ldhsu_be_zsu,
5174                   gen_helper_sve_ldss_be_zsu, } },
5175               { { gen_helper_sve_ldbss_zss,
5176                   gen_helper_sve_ldhss_be_zss,
5177                   NULL, },
5178                 { gen_helper_sve_ldbsu_zss,
5179                   gen_helper_sve_ldhsu_be_zss,
5180                   gen_helper_sve_ldss_be_zss, } } },
5181 
5182             /* First-fault */
5183             { { { gen_helper_sve_ldffbss_zsu,
5184                   gen_helper_sve_ldffhss_be_zsu,
5185                   NULL, },
5186                 { gen_helper_sve_ldffbsu_zsu,
5187                   gen_helper_sve_ldffhsu_be_zsu,
5188                   gen_helper_sve_ldffss_be_zsu, } },
5189               { { gen_helper_sve_ldffbss_zss,
5190                   gen_helper_sve_ldffhss_be_zss,
5191                   NULL, },
5192                 { gen_helper_sve_ldffbsu_zss,
5193                   gen_helper_sve_ldffhsu_be_zss,
5194                   gen_helper_sve_ldffss_be_zss, } } } } },
5195     { /* MTE Active */
5196         { /* Little-endian */
5197             { { { gen_helper_sve_ldbss_zsu_mte,
5198                   gen_helper_sve_ldhss_le_zsu_mte,
5199                   NULL, },
5200                 { gen_helper_sve_ldbsu_zsu_mte,
5201                   gen_helper_sve_ldhsu_le_zsu_mte,
5202                   gen_helper_sve_ldss_le_zsu_mte, } },
5203               { { gen_helper_sve_ldbss_zss_mte,
5204                   gen_helper_sve_ldhss_le_zss_mte,
5205                   NULL, },
5206                 { gen_helper_sve_ldbsu_zss_mte,
5207                   gen_helper_sve_ldhsu_le_zss_mte,
5208                   gen_helper_sve_ldss_le_zss_mte, } } },
5209 
5210             /* First-fault */
5211             { { { gen_helper_sve_ldffbss_zsu_mte,
5212                   gen_helper_sve_ldffhss_le_zsu_mte,
5213                   NULL, },
5214                 { gen_helper_sve_ldffbsu_zsu_mte,
5215                   gen_helper_sve_ldffhsu_le_zsu_mte,
5216                   gen_helper_sve_ldffss_le_zsu_mte, } },
5217               { { gen_helper_sve_ldffbss_zss_mte,
5218                   gen_helper_sve_ldffhss_le_zss_mte,
5219                   NULL, },
5220                 { gen_helper_sve_ldffbsu_zss_mte,
5221                   gen_helper_sve_ldffhsu_le_zss_mte,
5222                   gen_helper_sve_ldffss_le_zss_mte, } } } },
5223 
5224         { /* Big-endian */
5225             { { { gen_helper_sve_ldbss_zsu_mte,
5226                   gen_helper_sve_ldhss_be_zsu_mte,
5227                   NULL, },
5228                 { gen_helper_sve_ldbsu_zsu_mte,
5229                   gen_helper_sve_ldhsu_be_zsu_mte,
5230                   gen_helper_sve_ldss_be_zsu_mte, } },
5231               { { gen_helper_sve_ldbss_zss_mte,
5232                   gen_helper_sve_ldhss_be_zss_mte,
5233                   NULL, },
5234                 { gen_helper_sve_ldbsu_zss_mte,
5235                   gen_helper_sve_ldhsu_be_zss_mte,
5236                   gen_helper_sve_ldss_be_zss_mte, } } },
5237 
5238             /* First-fault */
5239             { { { gen_helper_sve_ldffbss_zsu_mte,
5240                   gen_helper_sve_ldffhss_be_zsu_mte,
5241                   NULL, },
5242                 { gen_helper_sve_ldffbsu_zsu_mte,
5243                   gen_helper_sve_ldffhsu_be_zsu_mte,
5244                   gen_helper_sve_ldffss_be_zsu_mte, } },
5245               { { gen_helper_sve_ldffbss_zss_mte,
5246                   gen_helper_sve_ldffhss_be_zss_mte,
5247                   NULL, },
5248                 { gen_helper_sve_ldffbsu_zss_mte,
5249                   gen_helper_sve_ldffhsu_be_zss_mte,
5250                   gen_helper_sve_ldffss_be_zss_mte, } } } } },
5251 };
5252 
5253 /* Note that we overload xs=2 to indicate 64-bit offset.  */
5254 static gen_helper_gvec_mem_scatter * const
5255 gather_load_fn64[2][2][2][3][2][4] = {
5256     { /* MTE Inactive */
5257         { /* Little-endian */
5258             { { { gen_helper_sve_ldbds_zsu,
5259                   gen_helper_sve_ldhds_le_zsu,
5260                   gen_helper_sve_ldsds_le_zsu,
5261                   NULL, },
5262                 { gen_helper_sve_ldbdu_zsu,
5263                   gen_helper_sve_ldhdu_le_zsu,
5264                   gen_helper_sve_ldsdu_le_zsu,
5265                   gen_helper_sve_lddd_le_zsu, } },
5266               { { gen_helper_sve_ldbds_zss,
5267                   gen_helper_sve_ldhds_le_zss,
5268                   gen_helper_sve_ldsds_le_zss,
5269                   NULL, },
5270                 { gen_helper_sve_ldbdu_zss,
5271                   gen_helper_sve_ldhdu_le_zss,
5272                   gen_helper_sve_ldsdu_le_zss,
5273                   gen_helper_sve_lddd_le_zss, } },
5274               { { gen_helper_sve_ldbds_zd,
5275                   gen_helper_sve_ldhds_le_zd,
5276                   gen_helper_sve_ldsds_le_zd,
5277                   NULL, },
5278                 { gen_helper_sve_ldbdu_zd,
5279                   gen_helper_sve_ldhdu_le_zd,
5280                   gen_helper_sve_ldsdu_le_zd,
5281                   gen_helper_sve_lddd_le_zd, } } },
5282 
5283             /* First-fault */
5284             { { { gen_helper_sve_ldffbds_zsu,
5285                   gen_helper_sve_ldffhds_le_zsu,
5286                   gen_helper_sve_ldffsds_le_zsu,
5287                   NULL, },
5288                 { gen_helper_sve_ldffbdu_zsu,
5289                   gen_helper_sve_ldffhdu_le_zsu,
5290                   gen_helper_sve_ldffsdu_le_zsu,
5291                   gen_helper_sve_ldffdd_le_zsu, } },
5292               { { gen_helper_sve_ldffbds_zss,
5293                   gen_helper_sve_ldffhds_le_zss,
5294                   gen_helper_sve_ldffsds_le_zss,
5295                   NULL, },
5296                 { gen_helper_sve_ldffbdu_zss,
5297                   gen_helper_sve_ldffhdu_le_zss,
5298                   gen_helper_sve_ldffsdu_le_zss,
5299                   gen_helper_sve_ldffdd_le_zss, } },
5300               { { gen_helper_sve_ldffbds_zd,
5301                   gen_helper_sve_ldffhds_le_zd,
5302                   gen_helper_sve_ldffsds_le_zd,
5303                   NULL, },
5304                 { gen_helper_sve_ldffbdu_zd,
5305                   gen_helper_sve_ldffhdu_le_zd,
5306                   gen_helper_sve_ldffsdu_le_zd,
5307                   gen_helper_sve_ldffdd_le_zd, } } } },
5308         { /* Big-endian */
5309             { { { gen_helper_sve_ldbds_zsu,
5310                   gen_helper_sve_ldhds_be_zsu,
5311                   gen_helper_sve_ldsds_be_zsu,
5312                   NULL, },
5313                 { gen_helper_sve_ldbdu_zsu,
5314                   gen_helper_sve_ldhdu_be_zsu,
5315                   gen_helper_sve_ldsdu_be_zsu,
5316                   gen_helper_sve_lddd_be_zsu, } },
5317               { { gen_helper_sve_ldbds_zss,
5318                   gen_helper_sve_ldhds_be_zss,
5319                   gen_helper_sve_ldsds_be_zss,
5320                   NULL, },
5321                 { gen_helper_sve_ldbdu_zss,
5322                   gen_helper_sve_ldhdu_be_zss,
5323                   gen_helper_sve_ldsdu_be_zss,
5324                   gen_helper_sve_lddd_be_zss, } },
5325               { { gen_helper_sve_ldbds_zd,
5326                   gen_helper_sve_ldhds_be_zd,
5327                   gen_helper_sve_ldsds_be_zd,
5328                   NULL, },
5329                 { gen_helper_sve_ldbdu_zd,
5330                   gen_helper_sve_ldhdu_be_zd,
5331                   gen_helper_sve_ldsdu_be_zd,
5332                   gen_helper_sve_lddd_be_zd, } } },
5333 
5334             /* First-fault */
5335             { { { gen_helper_sve_ldffbds_zsu,
5336                   gen_helper_sve_ldffhds_be_zsu,
5337                   gen_helper_sve_ldffsds_be_zsu,
5338                   NULL, },
5339                 { gen_helper_sve_ldffbdu_zsu,
5340                   gen_helper_sve_ldffhdu_be_zsu,
5341                   gen_helper_sve_ldffsdu_be_zsu,
5342                   gen_helper_sve_ldffdd_be_zsu, } },
5343               { { gen_helper_sve_ldffbds_zss,
5344                   gen_helper_sve_ldffhds_be_zss,
5345                   gen_helper_sve_ldffsds_be_zss,
5346                   NULL, },
5347                 { gen_helper_sve_ldffbdu_zss,
5348                   gen_helper_sve_ldffhdu_be_zss,
5349                   gen_helper_sve_ldffsdu_be_zss,
5350                   gen_helper_sve_ldffdd_be_zss, } },
5351               { { gen_helper_sve_ldffbds_zd,
5352                   gen_helper_sve_ldffhds_be_zd,
5353                   gen_helper_sve_ldffsds_be_zd,
5354                   NULL, },
5355                 { gen_helper_sve_ldffbdu_zd,
5356                   gen_helper_sve_ldffhdu_be_zd,
5357                   gen_helper_sve_ldffsdu_be_zd,
5358                   gen_helper_sve_ldffdd_be_zd, } } } } },
5359     { /* MTE Active */
5360         { /* Little-endian */
5361             { { { gen_helper_sve_ldbds_zsu_mte,
5362                   gen_helper_sve_ldhds_le_zsu_mte,
5363                   gen_helper_sve_ldsds_le_zsu_mte,
5364                   NULL, },
5365                 { gen_helper_sve_ldbdu_zsu_mte,
5366                   gen_helper_sve_ldhdu_le_zsu_mte,
5367                   gen_helper_sve_ldsdu_le_zsu_mte,
5368                   gen_helper_sve_lddd_le_zsu_mte, } },
5369               { { gen_helper_sve_ldbds_zss_mte,
5370                   gen_helper_sve_ldhds_le_zss_mte,
5371                   gen_helper_sve_ldsds_le_zss_mte,
5372                   NULL, },
5373                 { gen_helper_sve_ldbdu_zss_mte,
5374                   gen_helper_sve_ldhdu_le_zss_mte,
5375                   gen_helper_sve_ldsdu_le_zss_mte,
5376                   gen_helper_sve_lddd_le_zss_mte, } },
5377               { { gen_helper_sve_ldbds_zd_mte,
5378                   gen_helper_sve_ldhds_le_zd_mte,
5379                   gen_helper_sve_ldsds_le_zd_mte,
5380                   NULL, },
5381                 { gen_helper_sve_ldbdu_zd_mte,
5382                   gen_helper_sve_ldhdu_le_zd_mte,
5383                   gen_helper_sve_ldsdu_le_zd_mte,
5384                   gen_helper_sve_lddd_le_zd_mte, } } },
5385 
5386             /* First-fault */
5387             { { { gen_helper_sve_ldffbds_zsu_mte,
5388                   gen_helper_sve_ldffhds_le_zsu_mte,
5389                   gen_helper_sve_ldffsds_le_zsu_mte,
5390                   NULL, },
5391                 { gen_helper_sve_ldffbdu_zsu_mte,
5392                   gen_helper_sve_ldffhdu_le_zsu_mte,
5393                   gen_helper_sve_ldffsdu_le_zsu_mte,
5394                   gen_helper_sve_ldffdd_le_zsu_mte, } },
5395               { { gen_helper_sve_ldffbds_zss_mte,
5396                   gen_helper_sve_ldffhds_le_zss_mte,
5397                   gen_helper_sve_ldffsds_le_zss_mte,
5398                   NULL, },
5399                 { gen_helper_sve_ldffbdu_zss_mte,
5400                   gen_helper_sve_ldffhdu_le_zss_mte,
5401                   gen_helper_sve_ldffsdu_le_zss_mte,
5402                   gen_helper_sve_ldffdd_le_zss_mte, } },
5403               { { gen_helper_sve_ldffbds_zd_mte,
5404                   gen_helper_sve_ldffhds_le_zd_mte,
5405                   gen_helper_sve_ldffsds_le_zd_mte,
5406                   NULL, },
5407                 { gen_helper_sve_ldffbdu_zd_mte,
5408                   gen_helper_sve_ldffhdu_le_zd_mte,
5409                   gen_helper_sve_ldffsdu_le_zd_mte,
5410                   gen_helper_sve_ldffdd_le_zd_mte, } } } },
5411         { /* Big-endian */
5412             { { { gen_helper_sve_ldbds_zsu_mte,
5413                   gen_helper_sve_ldhds_be_zsu_mte,
5414                   gen_helper_sve_ldsds_be_zsu_mte,
5415                   NULL, },
5416                 { gen_helper_sve_ldbdu_zsu_mte,
5417                   gen_helper_sve_ldhdu_be_zsu_mte,
5418                   gen_helper_sve_ldsdu_be_zsu_mte,
5419                   gen_helper_sve_lddd_be_zsu_mte, } },
5420               { { gen_helper_sve_ldbds_zss_mte,
5421                   gen_helper_sve_ldhds_be_zss_mte,
5422                   gen_helper_sve_ldsds_be_zss_mte,
5423                   NULL, },
5424                 { gen_helper_sve_ldbdu_zss_mte,
5425                   gen_helper_sve_ldhdu_be_zss_mte,
5426                   gen_helper_sve_ldsdu_be_zss_mte,
5427                   gen_helper_sve_lddd_be_zss_mte, } },
5428               { { gen_helper_sve_ldbds_zd_mte,
5429                   gen_helper_sve_ldhds_be_zd_mte,
5430                   gen_helper_sve_ldsds_be_zd_mte,
5431                   NULL, },
5432                 { gen_helper_sve_ldbdu_zd_mte,
5433                   gen_helper_sve_ldhdu_be_zd_mte,
5434                   gen_helper_sve_ldsdu_be_zd_mte,
5435                   gen_helper_sve_lddd_be_zd_mte, } } },
5436 
5437             /* First-fault */
5438             { { { gen_helper_sve_ldffbds_zsu_mte,
5439                   gen_helper_sve_ldffhds_be_zsu_mte,
5440                   gen_helper_sve_ldffsds_be_zsu_mte,
5441                   NULL, },
5442                 { gen_helper_sve_ldffbdu_zsu_mte,
5443                   gen_helper_sve_ldffhdu_be_zsu_mte,
5444                   gen_helper_sve_ldffsdu_be_zsu_mte,
5445                   gen_helper_sve_ldffdd_be_zsu_mte, } },
5446               { { gen_helper_sve_ldffbds_zss_mte,
5447                   gen_helper_sve_ldffhds_be_zss_mte,
5448                   gen_helper_sve_ldffsds_be_zss_mte,
5449                   NULL, },
5450                 { gen_helper_sve_ldffbdu_zss_mte,
5451                   gen_helper_sve_ldffhdu_be_zss_mte,
5452                   gen_helper_sve_ldffsdu_be_zss_mte,
5453                   gen_helper_sve_ldffdd_be_zss_mte, } },
5454               { { gen_helper_sve_ldffbds_zd_mte,
5455                   gen_helper_sve_ldffhds_be_zd_mte,
5456                   gen_helper_sve_ldffsds_be_zd_mte,
5457                   NULL, },
5458                 { gen_helper_sve_ldffbdu_zd_mte,
5459                   gen_helper_sve_ldffhdu_be_zd_mte,
5460                   gen_helper_sve_ldffsdu_be_zd_mte,
5461                   gen_helper_sve_ldffdd_be_zd_mte, } } } } },
5462 };
5463 
5464 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
5465 {
5466     gen_helper_gvec_mem_scatter *fn = NULL;
5467     bool be = s->be_data == MO_BE;
5468     bool mte = s->mte_active[0];
5469 
5470     if (!dc_isar_feature(aa64_sve, s)) {
5471         return false;
5472     }
5473     s->is_nonstreaming = true;
5474     if (!sve_access_check(s)) {
5475         return true;
5476     }
5477 
5478     switch (a->esz) {
5479     case MO_32:
5480         fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
5481         break;
5482     case MO_64:
5483         fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
5484         break;
5485     }
5486     assert(fn != NULL);
5487 
5488     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5489                cpu_reg_sp(s, a->rn), a->msz, false, fn);
5490     return true;
5491 }
5492 
5493 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
5494 {
5495     gen_helper_gvec_mem_scatter *fn = NULL;
5496     bool be = s->be_data == MO_BE;
5497     bool mte = s->mte_active[0];
5498 
5499     if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5500         return false;
5501     }
5502     if (!dc_isar_feature(aa64_sve, s)) {
5503         return false;
5504     }
5505     s->is_nonstreaming = true;
5506     if (!sve_access_check(s)) {
5507         return true;
5508     }
5509 
5510     switch (a->esz) {
5511     case MO_32:
5512         fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
5513         break;
5514     case MO_64:
5515         fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
5516         break;
5517     }
5518     assert(fn != NULL);
5519 
5520     /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5521      * by loading the immediate into the scalar parameter.
5522      */
5523     do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5524                tcg_constant_i64(a->imm << a->msz), a->msz, false, fn);
5525     return true;
5526 }
5527 
5528 static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
5529 {
5530     gen_helper_gvec_mem_scatter *fn = NULL;
5531     bool be = s->be_data == MO_BE;
5532     bool mte = s->mte_active[0];
5533 
5534     if (a->esz < a->msz + !a->u) {
5535         return false;
5536     }
5537     if (!dc_isar_feature(aa64_sve2, s)) {
5538         return false;
5539     }
5540     s->is_nonstreaming = true;
5541     if (!sve_access_check(s)) {
5542         return true;
5543     }
5544 
5545     switch (a->esz) {
5546     case MO_32:
5547         fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
5548         break;
5549     case MO_64:
5550         fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
5551         break;
5552     }
5553     assert(fn != NULL);
5554 
5555     do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5556                cpu_reg(s, a->rm), a->msz, false, fn);
5557     return true;
5558 }
5559 
5560 /* Indexed by [mte][be][xs][msz].  */
5561 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
5562     { /* MTE Inactive */
5563         { /* Little-endian */
5564             { gen_helper_sve_stbs_zsu,
5565               gen_helper_sve_sths_le_zsu,
5566               gen_helper_sve_stss_le_zsu, },
5567             { gen_helper_sve_stbs_zss,
5568               gen_helper_sve_sths_le_zss,
5569               gen_helper_sve_stss_le_zss, } },
5570         { /* Big-endian */
5571             { gen_helper_sve_stbs_zsu,
5572               gen_helper_sve_sths_be_zsu,
5573               gen_helper_sve_stss_be_zsu, },
5574             { gen_helper_sve_stbs_zss,
5575               gen_helper_sve_sths_be_zss,
5576               gen_helper_sve_stss_be_zss, } } },
5577     { /* MTE Active */
5578         { /* Little-endian */
5579             { gen_helper_sve_stbs_zsu_mte,
5580               gen_helper_sve_sths_le_zsu_mte,
5581               gen_helper_sve_stss_le_zsu_mte, },
5582             { gen_helper_sve_stbs_zss_mte,
5583               gen_helper_sve_sths_le_zss_mte,
5584               gen_helper_sve_stss_le_zss_mte, } },
5585         { /* Big-endian */
5586             { gen_helper_sve_stbs_zsu_mte,
5587               gen_helper_sve_sths_be_zsu_mte,
5588               gen_helper_sve_stss_be_zsu_mte, },
5589             { gen_helper_sve_stbs_zss_mte,
5590               gen_helper_sve_sths_be_zss_mte,
5591               gen_helper_sve_stss_be_zss_mte, } } },
5592 };
5593 
5594 /* Note that we overload xs=2 to indicate 64-bit offset.  */
5595 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
5596     { /* MTE Inactive */
5597          { /* Little-endian */
5598              { gen_helper_sve_stbd_zsu,
5599                gen_helper_sve_sthd_le_zsu,
5600                gen_helper_sve_stsd_le_zsu,
5601                gen_helper_sve_stdd_le_zsu, },
5602              { gen_helper_sve_stbd_zss,
5603                gen_helper_sve_sthd_le_zss,
5604                gen_helper_sve_stsd_le_zss,
5605                gen_helper_sve_stdd_le_zss, },
5606              { gen_helper_sve_stbd_zd,
5607                gen_helper_sve_sthd_le_zd,
5608                gen_helper_sve_stsd_le_zd,
5609                gen_helper_sve_stdd_le_zd, } },
5610          { /* Big-endian */
5611              { gen_helper_sve_stbd_zsu,
5612                gen_helper_sve_sthd_be_zsu,
5613                gen_helper_sve_stsd_be_zsu,
5614                gen_helper_sve_stdd_be_zsu, },
5615              { gen_helper_sve_stbd_zss,
5616                gen_helper_sve_sthd_be_zss,
5617                gen_helper_sve_stsd_be_zss,
5618                gen_helper_sve_stdd_be_zss, },
5619              { gen_helper_sve_stbd_zd,
5620                gen_helper_sve_sthd_be_zd,
5621                gen_helper_sve_stsd_be_zd,
5622                gen_helper_sve_stdd_be_zd, } } },
5623     { /* MTE Inactive */
5624          { /* Little-endian */
5625              { gen_helper_sve_stbd_zsu_mte,
5626                gen_helper_sve_sthd_le_zsu_mte,
5627                gen_helper_sve_stsd_le_zsu_mte,
5628                gen_helper_sve_stdd_le_zsu_mte, },
5629              { gen_helper_sve_stbd_zss_mte,
5630                gen_helper_sve_sthd_le_zss_mte,
5631                gen_helper_sve_stsd_le_zss_mte,
5632                gen_helper_sve_stdd_le_zss_mte, },
5633              { gen_helper_sve_stbd_zd_mte,
5634                gen_helper_sve_sthd_le_zd_mte,
5635                gen_helper_sve_stsd_le_zd_mte,
5636                gen_helper_sve_stdd_le_zd_mte, } },
5637          { /* Big-endian */
5638              { gen_helper_sve_stbd_zsu_mte,
5639                gen_helper_sve_sthd_be_zsu_mte,
5640                gen_helper_sve_stsd_be_zsu_mte,
5641                gen_helper_sve_stdd_be_zsu_mte, },
5642              { gen_helper_sve_stbd_zss_mte,
5643                gen_helper_sve_sthd_be_zss_mte,
5644                gen_helper_sve_stsd_be_zss_mte,
5645                gen_helper_sve_stdd_be_zss_mte, },
5646              { gen_helper_sve_stbd_zd_mte,
5647                gen_helper_sve_sthd_be_zd_mte,
5648                gen_helper_sve_stsd_be_zd_mte,
5649                gen_helper_sve_stdd_be_zd_mte, } } },
5650 };
5651 
5652 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
5653 {
5654     gen_helper_gvec_mem_scatter *fn;
5655     bool be = s->be_data == MO_BE;
5656     bool mte = s->mte_active[0];
5657 
5658     if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5659         return false;
5660     }
5661     if (!dc_isar_feature(aa64_sve, s)) {
5662         return false;
5663     }
5664     s->is_nonstreaming = true;
5665     if (!sve_access_check(s)) {
5666         return true;
5667     }
5668     switch (a->esz) {
5669     case MO_32:
5670         fn = scatter_store_fn32[mte][be][a->xs][a->msz];
5671         break;
5672     case MO_64:
5673         fn = scatter_store_fn64[mte][be][a->xs][a->msz];
5674         break;
5675     default:
5676         g_assert_not_reached();
5677     }
5678     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5679                cpu_reg_sp(s, a->rn), a->msz, true, fn);
5680     return true;
5681 }
5682 
5683 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
5684 {
5685     gen_helper_gvec_mem_scatter *fn = NULL;
5686     bool be = s->be_data == MO_BE;
5687     bool mte = s->mte_active[0];
5688 
5689     if (a->esz < a->msz) {
5690         return false;
5691     }
5692     if (!dc_isar_feature(aa64_sve, s)) {
5693         return false;
5694     }
5695     s->is_nonstreaming = true;
5696     if (!sve_access_check(s)) {
5697         return true;
5698     }
5699 
5700     switch (a->esz) {
5701     case MO_32:
5702         fn = scatter_store_fn32[mte][be][0][a->msz];
5703         break;
5704     case MO_64:
5705         fn = scatter_store_fn64[mte][be][2][a->msz];
5706         break;
5707     }
5708     assert(fn != NULL);
5709 
5710     /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5711      * by loading the immediate into the scalar parameter.
5712      */
5713     do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5714                tcg_constant_i64(a->imm << a->msz), a->msz, true, fn);
5715     return true;
5716 }
5717 
5718 static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
5719 {
5720     gen_helper_gvec_mem_scatter *fn;
5721     bool be = s->be_data == MO_BE;
5722     bool mte = s->mte_active[0];
5723 
5724     if (a->esz < a->msz) {
5725         return false;
5726     }
5727     if (!dc_isar_feature(aa64_sve2, s)) {
5728         return false;
5729     }
5730     s->is_nonstreaming = true;
5731     if (!sve_access_check(s)) {
5732         return true;
5733     }
5734 
5735     switch (a->esz) {
5736     case MO_32:
5737         fn = scatter_store_fn32[mte][be][0][a->msz];
5738         break;
5739     case MO_64:
5740         fn = scatter_store_fn64[mte][be][2][a->msz];
5741         break;
5742     default:
5743         g_assert_not_reached();
5744     }
5745 
5746     do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5747                cpu_reg(s, a->rm), a->msz, true, fn);
5748     return true;
5749 }
5750 
5751 /*
5752  * Prefetches
5753  */
5754 
5755 static bool trans_PRF(DisasContext *s, arg_PRF *a)
5756 {
5757     if (!dc_isar_feature(aa64_sve, s)) {
5758         return false;
5759     }
5760     /* Prefetch is a nop within QEMU.  */
5761     (void)sve_access_check(s);
5762     return true;
5763 }
5764 
5765 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
5766 {
5767     if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) {
5768         return false;
5769     }
5770     /* Prefetch is a nop within QEMU.  */
5771     (void)sve_access_check(s);
5772     return true;
5773 }
5774 
5775 static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a)
5776 {
5777     if (!dc_isar_feature(aa64_sve, s)) {
5778         return false;
5779     }
5780     /* Prefetch is a nop within QEMU.  */
5781     s->is_nonstreaming = true;
5782     (void)sve_access_check(s);
5783     return true;
5784 }
5785 
5786 /*
5787  * Move Prefix
5788  *
5789  * TODO: The implementation so far could handle predicated merging movprfx.
5790  * The helper functions as written take an extra source register to
5791  * use in the operation, but the result is only written when predication
5792  * succeeds.  For unpredicated movprfx, we need to rearrange the helpers
5793  * to allow the final write back to the destination to be unconditional.
5794  * For predicated zeroing movprfx, we need to rearrange the helpers to
5795  * allow the final write back to zero inactives.
5796  *
5797  * In the meantime, just emit the moves.
5798  */
5799 
5800 TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn)
5801 TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz)
5802 TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false)
5803 
5804 /*
5805  * SVE2 Integer Multiply - Unpredicated
5806  */
5807 
5808 TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a)
5809 
5810 static gen_helper_gvec_3 * const smulh_zzz_fns[4] = {
5811     gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
5812     gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
5813 };
5814 TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5815            smulh_zzz_fns[a->esz], a, 0)
5816 
5817 static gen_helper_gvec_3 * const umulh_zzz_fns[4] = {
5818     gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
5819     gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
5820 };
5821 TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5822            umulh_zzz_fns[a->esz], a, 0)
5823 
5824 TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5825            gen_helper_gvec_pmul_b, a, 0)
5826 
5827 static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = {
5828     gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
5829     gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
5830 };
5831 TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5832            sqdmulh_zzz_fns[a->esz], a, 0)
5833 
5834 static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = {
5835     gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
5836     gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
5837 };
5838 TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5839            sqrdmulh_zzz_fns[a->esz], a, 0)
5840 
5841 /*
5842  * SVE2 Integer - Predicated
5843  */
5844 
5845 static gen_helper_gvec_4 * const sadlp_fns[4] = {
5846     NULL,                          gen_helper_sve2_sadalp_zpzz_h,
5847     gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d,
5848 };
5849 TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
5850            sadlp_fns[a->esz], a, 0)
5851 
5852 static gen_helper_gvec_4 * const uadlp_fns[4] = {
5853     NULL,                          gen_helper_sve2_uadalp_zpzz_h,
5854     gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d,
5855 };
5856 TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
5857            uadlp_fns[a->esz], a, 0)
5858 
5859 /*
5860  * SVE2 integer unary operations (predicated)
5861  */
5862 
5863 TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz,
5864            a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0)
5865 
5866 TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz,
5867            a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0)
5868 
5869 static gen_helper_gvec_3 * const sqabs_fns[4] = {
5870     gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
5871     gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
5872 };
5873 TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0)
5874 
5875 static gen_helper_gvec_3 * const sqneg_fns[4] = {
5876     gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
5877     gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
5878 };
5879 TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0)
5880 
5881 DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl)
5882 DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl)
5883 DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl)
5884 
5885 DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl)
5886 DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl)
5887 DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl)
5888 
5889 DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd)
5890 DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd)
5891 DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub)
5892 
5893 DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd)
5894 DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd)
5895 DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub)
5896 
5897 DO_ZPZZ(ADDP, aa64_sve2, sve2_addp)
5898 DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp)
5899 DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp)
5900 DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp)
5901 DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp)
5902 
5903 DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd)
5904 DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd)
5905 DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub)
5906 DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub)
5907 DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd)
5908 DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd)
5909 
5910 /*
5911  * SVE2 Widening Integer Arithmetic
5912  */
5913 
5914 static gen_helper_gvec_3 * const saddl_fns[4] = {
5915     NULL,                    gen_helper_sve2_saddl_h,
5916     gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d,
5917 };
5918 TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5919            saddl_fns[a->esz], a, 0)
5920 TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5921            saddl_fns[a->esz], a, 3)
5922 TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
5923            saddl_fns[a->esz], a, 2)
5924 
5925 static gen_helper_gvec_3 * const ssubl_fns[4] = {
5926     NULL,                    gen_helper_sve2_ssubl_h,
5927     gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d,
5928 };
5929 TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5930            ssubl_fns[a->esz], a, 0)
5931 TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5932            ssubl_fns[a->esz], a, 3)
5933 TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
5934            ssubl_fns[a->esz], a, 2)
5935 TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz,
5936            ssubl_fns[a->esz], a, 1)
5937 
5938 static gen_helper_gvec_3 * const sabdl_fns[4] = {
5939     NULL,                    gen_helper_sve2_sabdl_h,
5940     gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d,
5941 };
5942 TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5943            sabdl_fns[a->esz], a, 0)
5944 TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5945            sabdl_fns[a->esz], a, 3)
5946 
5947 static gen_helper_gvec_3 * const uaddl_fns[4] = {
5948     NULL,                    gen_helper_sve2_uaddl_h,
5949     gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d,
5950 };
5951 TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5952            uaddl_fns[a->esz], a, 0)
5953 TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5954            uaddl_fns[a->esz], a, 3)
5955 
5956 static gen_helper_gvec_3 * const usubl_fns[4] = {
5957     NULL,                    gen_helper_sve2_usubl_h,
5958     gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d,
5959 };
5960 TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5961            usubl_fns[a->esz], a, 0)
5962 TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5963            usubl_fns[a->esz], a, 3)
5964 
5965 static gen_helper_gvec_3 * const uabdl_fns[4] = {
5966     NULL,                    gen_helper_sve2_uabdl_h,
5967     gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d,
5968 };
5969 TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5970            uabdl_fns[a->esz], a, 0)
5971 TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5972            uabdl_fns[a->esz], a, 3)
5973 
5974 static gen_helper_gvec_3 * const sqdmull_fns[4] = {
5975     NULL,                          gen_helper_sve2_sqdmull_zzz_h,
5976     gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d,
5977 };
5978 TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5979            sqdmull_fns[a->esz], a, 0)
5980 TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5981            sqdmull_fns[a->esz], a, 3)
5982 
5983 static gen_helper_gvec_3 * const smull_fns[4] = {
5984     NULL,                        gen_helper_sve2_smull_zzz_h,
5985     gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d,
5986 };
5987 TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5988            smull_fns[a->esz], a, 0)
5989 TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5990            smull_fns[a->esz], a, 3)
5991 
5992 static gen_helper_gvec_3 * const umull_fns[4] = {
5993     NULL,                        gen_helper_sve2_umull_zzz_h,
5994     gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d,
5995 };
5996 TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5997            umull_fns[a->esz], a, 0)
5998 TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5999            umull_fns[a->esz], a, 3)
6000 
6001 static gen_helper_gvec_3 * const eoril_fns[4] = {
6002     gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
6003     gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
6004 };
6005 TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2)
6006 TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1)
6007 
6008 static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
6009 {
6010     static gen_helper_gvec_3 * const fns[4] = {
6011         gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
6012         NULL,                    gen_helper_sve2_pmull_d,
6013     };
6014 
6015     if (a->esz == 0) {
6016         if (!dc_isar_feature(aa64_sve2_pmull128, s)) {
6017             return false;
6018         }
6019         s->is_nonstreaming = true;
6020     } else if (!dc_isar_feature(aa64_sve, s)) {
6021         return false;
6022     }
6023     return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
6024 }
6025 
6026 TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false)
6027 TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true)
6028 
6029 static gen_helper_gvec_3 * const saddw_fns[4] = {
6030     NULL,                    gen_helper_sve2_saddw_h,
6031     gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d,
6032 };
6033 TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0)
6034 TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1)
6035 
6036 static gen_helper_gvec_3 * const ssubw_fns[4] = {
6037     NULL,                    gen_helper_sve2_ssubw_h,
6038     gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d,
6039 };
6040 TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0)
6041 TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1)
6042 
6043 static gen_helper_gvec_3 * const uaddw_fns[4] = {
6044     NULL,                    gen_helper_sve2_uaddw_h,
6045     gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d,
6046 };
6047 TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0)
6048 TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1)
6049 
6050 static gen_helper_gvec_3 * const usubw_fns[4] = {
6051     NULL,                    gen_helper_sve2_usubw_h,
6052     gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d,
6053 };
6054 TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0)
6055 TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1)
6056 
6057 static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6058 {
6059     int top = imm & 1;
6060     int shl = imm >> 1;
6061     int halfbits = 4 << vece;
6062 
6063     if (top) {
6064         if (shl == halfbits) {
6065             TCGv_vec t = tcg_temp_new_vec_matching(d);
6066             tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6067             tcg_gen_and_vec(vece, d, n, t);
6068         } else {
6069             tcg_gen_sari_vec(vece, d, n, halfbits);
6070             tcg_gen_shli_vec(vece, d, d, shl);
6071         }
6072     } else {
6073         tcg_gen_shli_vec(vece, d, n, halfbits);
6074         tcg_gen_sari_vec(vece, d, d, halfbits - shl);
6075     }
6076 }
6077 
6078 static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
6079 {
6080     int halfbits = 4 << vece;
6081     int top = imm & 1;
6082     int shl = (imm >> 1);
6083     int shift;
6084     uint64_t mask;
6085 
6086     mask = MAKE_64BIT_MASK(0, halfbits);
6087     mask <<= shl;
6088     mask = dup_const(vece, mask);
6089 
6090     shift = shl - top * halfbits;
6091     if (shift < 0) {
6092         tcg_gen_shri_i64(d, n, -shift);
6093     } else {
6094         tcg_gen_shli_i64(d, n, shift);
6095     }
6096     tcg_gen_andi_i64(d, d, mask);
6097 }
6098 
6099 static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6100 {
6101     gen_ushll_i64(MO_16, d, n, imm);
6102 }
6103 
6104 static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6105 {
6106     gen_ushll_i64(MO_32, d, n, imm);
6107 }
6108 
6109 static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6110 {
6111     gen_ushll_i64(MO_64, d, n, imm);
6112 }
6113 
6114 static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6115 {
6116     int halfbits = 4 << vece;
6117     int top = imm & 1;
6118     int shl = imm >> 1;
6119 
6120     if (top) {
6121         if (shl == halfbits) {
6122             TCGv_vec t = tcg_temp_new_vec_matching(d);
6123             tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6124             tcg_gen_and_vec(vece, d, n, t);
6125         } else {
6126             tcg_gen_shri_vec(vece, d, n, halfbits);
6127             tcg_gen_shli_vec(vece, d, d, shl);
6128         }
6129     } else {
6130         if (shl == 0) {
6131             TCGv_vec t = tcg_temp_new_vec_matching(d);
6132             tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6133             tcg_gen_and_vec(vece, d, n, t);
6134         } else {
6135             tcg_gen_shli_vec(vece, d, n, halfbits);
6136             tcg_gen_shri_vec(vece, d, d, halfbits - shl);
6137         }
6138     }
6139 }
6140 
6141 static bool do_shll_tb(DisasContext *s, arg_rri_esz *a,
6142                        const GVecGen2i ops[3], bool sel)
6143 {
6144 
6145     if (a->esz < 0 || a->esz > 2) {
6146         return false;
6147     }
6148     if (sve_access_check(s)) {
6149         unsigned vsz = vec_full_reg_size(s);
6150         tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6151                         vec_full_reg_offset(s, a->rn),
6152                         vsz, vsz, (a->imm << 1) | sel,
6153                         &ops[a->esz]);
6154     }
6155     return true;
6156 }
6157 
6158 static const TCGOpcode sshll_list[] = {
6159     INDEX_op_shli_vec, INDEX_op_sari_vec, 0
6160 };
6161 static const GVecGen2i sshll_ops[3] = {
6162     { .fniv = gen_sshll_vec,
6163       .opt_opc = sshll_list,
6164       .fno = gen_helper_sve2_sshll_h,
6165       .vece = MO_16 },
6166     { .fniv = gen_sshll_vec,
6167       .opt_opc = sshll_list,
6168       .fno = gen_helper_sve2_sshll_s,
6169       .vece = MO_32 },
6170     { .fniv = gen_sshll_vec,
6171       .opt_opc = sshll_list,
6172       .fno = gen_helper_sve2_sshll_d,
6173       .vece = MO_64 }
6174 };
6175 TRANS_FEAT(SSHLLB, aa64_sve2, do_shll_tb, a, sshll_ops, false)
6176 TRANS_FEAT(SSHLLT, aa64_sve2, do_shll_tb, a, sshll_ops, true)
6177 
6178 static const TCGOpcode ushll_list[] = {
6179     INDEX_op_shli_vec, INDEX_op_shri_vec, 0
6180 };
6181 static const GVecGen2i ushll_ops[3] = {
6182     { .fni8 = gen_ushll16_i64,
6183       .fniv = gen_ushll_vec,
6184       .opt_opc = ushll_list,
6185       .fno = gen_helper_sve2_ushll_h,
6186       .vece = MO_16 },
6187     { .fni8 = gen_ushll32_i64,
6188       .fniv = gen_ushll_vec,
6189       .opt_opc = ushll_list,
6190       .fno = gen_helper_sve2_ushll_s,
6191       .vece = MO_32 },
6192     { .fni8 = gen_ushll64_i64,
6193       .fniv = gen_ushll_vec,
6194       .opt_opc = ushll_list,
6195       .fno = gen_helper_sve2_ushll_d,
6196       .vece = MO_64 },
6197 };
6198 TRANS_FEAT(USHLLB, aa64_sve2, do_shll_tb, a, ushll_ops, false)
6199 TRANS_FEAT(USHLLT, aa64_sve2, do_shll_tb, a, ushll_ops, true)
6200 
6201 static gen_helper_gvec_3 * const bext_fns[4] = {
6202     gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
6203     gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
6204 };
6205 TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6206                         bext_fns[a->esz], a, 0)
6207 
6208 static gen_helper_gvec_3 * const bdep_fns[4] = {
6209     gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
6210     gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
6211 };
6212 TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6213                         bdep_fns[a->esz], a, 0)
6214 
6215 static gen_helper_gvec_3 * const bgrp_fns[4] = {
6216     gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
6217     gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
6218 };
6219 TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6220                         bgrp_fns[a->esz], a, 0)
6221 
6222 static gen_helper_gvec_3 * const cadd_fns[4] = {
6223     gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
6224     gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d,
6225 };
6226 TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6227            cadd_fns[a->esz], a, 0)
6228 TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6229            cadd_fns[a->esz], a, 1)
6230 
6231 static gen_helper_gvec_3 * const sqcadd_fns[4] = {
6232     gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
6233     gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d,
6234 };
6235 TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6236            sqcadd_fns[a->esz], a, 0)
6237 TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6238            sqcadd_fns[a->esz], a, 1)
6239 
6240 static gen_helper_gvec_4 * const sabal_fns[4] = {
6241     NULL,                    gen_helper_sve2_sabal_h,
6242     gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d,
6243 };
6244 TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0)
6245 TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1)
6246 
6247 static gen_helper_gvec_4 * const uabal_fns[4] = {
6248     NULL,                    gen_helper_sve2_uabal_h,
6249     gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d,
6250 };
6251 TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0)
6252 TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1)
6253 
6254 static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
6255 {
6256     static gen_helper_gvec_4 * const fns[2] = {
6257         gen_helper_sve2_adcl_s,
6258         gen_helper_sve2_adcl_d,
6259     };
6260     /*
6261      * Note that in this case the ESZ field encodes both size and sign.
6262      * Split out 'subtract' into bit 1 of the data field for the helper.
6263      */
6264     return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel);
6265 }
6266 
6267 TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false)
6268 TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true)
6269 
6270 TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a)
6271 TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a)
6272 TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a)
6273 TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a)
6274 TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a)
6275 TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a)
6276 
6277 TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a)
6278 TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a)
6279 
6280 static bool do_narrow_extract(DisasContext *s, arg_rri_esz *a,
6281                               const GVecGen2 ops[3])
6282 {
6283     if (a->esz < 0 || a->esz > MO_32 || a->imm != 0) {
6284         return false;
6285     }
6286     if (sve_access_check(s)) {
6287         unsigned vsz = vec_full_reg_size(s);
6288         tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
6289                         vec_full_reg_offset(s, a->rn),
6290                         vsz, vsz, &ops[a->esz]);
6291     }
6292     return true;
6293 }
6294 
6295 static const TCGOpcode sqxtn_list[] = {
6296     INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
6297 };
6298 
6299 static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6300 {
6301     TCGv_vec t = tcg_temp_new_vec_matching(d);
6302     int halfbits = 4 << vece;
6303     int64_t mask = (1ull << halfbits) - 1;
6304     int64_t min = -1ull << (halfbits - 1);
6305     int64_t max = -min - 1;
6306 
6307     tcg_gen_dupi_vec(vece, t, min);
6308     tcg_gen_smax_vec(vece, d, n, t);
6309     tcg_gen_dupi_vec(vece, t, max);
6310     tcg_gen_smin_vec(vece, d, d, t);
6311     tcg_gen_dupi_vec(vece, t, mask);
6312     tcg_gen_and_vec(vece, d, d, t);
6313 }
6314 
6315 static const GVecGen2 sqxtnb_ops[3] = {
6316     { .fniv = gen_sqxtnb_vec,
6317       .opt_opc = sqxtn_list,
6318       .fno = gen_helper_sve2_sqxtnb_h,
6319       .vece = MO_16 },
6320     { .fniv = gen_sqxtnb_vec,
6321       .opt_opc = sqxtn_list,
6322       .fno = gen_helper_sve2_sqxtnb_s,
6323       .vece = MO_32 },
6324     { .fniv = gen_sqxtnb_vec,
6325       .opt_opc = sqxtn_list,
6326       .fno = gen_helper_sve2_sqxtnb_d,
6327       .vece = MO_64 },
6328 };
6329 TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops)
6330 
6331 static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6332 {
6333     TCGv_vec t = tcg_temp_new_vec_matching(d);
6334     int halfbits = 4 << vece;
6335     int64_t mask = (1ull << halfbits) - 1;
6336     int64_t min = -1ull << (halfbits - 1);
6337     int64_t max = -min - 1;
6338 
6339     tcg_gen_dupi_vec(vece, t, min);
6340     tcg_gen_smax_vec(vece, n, n, t);
6341     tcg_gen_dupi_vec(vece, t, max);
6342     tcg_gen_smin_vec(vece, n, n, t);
6343     tcg_gen_shli_vec(vece, n, n, halfbits);
6344     tcg_gen_dupi_vec(vece, t, mask);
6345     tcg_gen_bitsel_vec(vece, d, t, d, n);
6346 }
6347 
6348 static const GVecGen2 sqxtnt_ops[3] = {
6349     { .fniv = gen_sqxtnt_vec,
6350       .opt_opc = sqxtn_list,
6351       .load_dest = true,
6352       .fno = gen_helper_sve2_sqxtnt_h,
6353       .vece = MO_16 },
6354     { .fniv = gen_sqxtnt_vec,
6355       .opt_opc = sqxtn_list,
6356       .load_dest = true,
6357       .fno = gen_helper_sve2_sqxtnt_s,
6358       .vece = MO_32 },
6359     { .fniv = gen_sqxtnt_vec,
6360       .opt_opc = sqxtn_list,
6361       .load_dest = true,
6362       .fno = gen_helper_sve2_sqxtnt_d,
6363       .vece = MO_64 },
6364 };
6365 TRANS_FEAT(SQXTNT, aa64_sve2, do_narrow_extract, a, sqxtnt_ops)
6366 
6367 static const TCGOpcode uqxtn_list[] = {
6368     INDEX_op_shli_vec, INDEX_op_umin_vec, 0
6369 };
6370 
6371 static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6372 {
6373     TCGv_vec t = tcg_temp_new_vec_matching(d);
6374     int halfbits = 4 << vece;
6375     int64_t max = (1ull << halfbits) - 1;
6376 
6377     tcg_gen_dupi_vec(vece, t, max);
6378     tcg_gen_umin_vec(vece, d, n, t);
6379 }
6380 
6381 static const GVecGen2 uqxtnb_ops[3] = {
6382     { .fniv = gen_uqxtnb_vec,
6383       .opt_opc = uqxtn_list,
6384       .fno = gen_helper_sve2_uqxtnb_h,
6385       .vece = MO_16 },
6386     { .fniv = gen_uqxtnb_vec,
6387       .opt_opc = uqxtn_list,
6388       .fno = gen_helper_sve2_uqxtnb_s,
6389       .vece = MO_32 },
6390     { .fniv = gen_uqxtnb_vec,
6391       .opt_opc = uqxtn_list,
6392       .fno = gen_helper_sve2_uqxtnb_d,
6393       .vece = MO_64 },
6394 };
6395 TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops)
6396 
6397 static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6398 {
6399     TCGv_vec t = tcg_temp_new_vec_matching(d);
6400     int halfbits = 4 << vece;
6401     int64_t max = (1ull << halfbits) - 1;
6402 
6403     tcg_gen_dupi_vec(vece, t, max);
6404     tcg_gen_umin_vec(vece, n, n, t);
6405     tcg_gen_shli_vec(vece, n, n, halfbits);
6406     tcg_gen_bitsel_vec(vece, d, t, d, n);
6407 }
6408 
6409 static const GVecGen2 uqxtnt_ops[3] = {
6410     { .fniv = gen_uqxtnt_vec,
6411       .opt_opc = uqxtn_list,
6412       .load_dest = true,
6413       .fno = gen_helper_sve2_uqxtnt_h,
6414       .vece = MO_16 },
6415     { .fniv = gen_uqxtnt_vec,
6416       .opt_opc = uqxtn_list,
6417       .load_dest = true,
6418       .fno = gen_helper_sve2_uqxtnt_s,
6419       .vece = MO_32 },
6420     { .fniv = gen_uqxtnt_vec,
6421       .opt_opc = uqxtn_list,
6422       .load_dest = true,
6423       .fno = gen_helper_sve2_uqxtnt_d,
6424       .vece = MO_64 },
6425 };
6426 TRANS_FEAT(UQXTNT, aa64_sve2, do_narrow_extract, a, uqxtnt_ops)
6427 
6428 static const TCGOpcode sqxtun_list[] = {
6429     INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
6430 };
6431 
6432 static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6433 {
6434     TCGv_vec t = tcg_temp_new_vec_matching(d);
6435     int halfbits = 4 << vece;
6436     int64_t max = (1ull << halfbits) - 1;
6437 
6438     tcg_gen_dupi_vec(vece, t, 0);
6439     tcg_gen_smax_vec(vece, d, n, t);
6440     tcg_gen_dupi_vec(vece, t, max);
6441     tcg_gen_umin_vec(vece, d, d, t);
6442 }
6443 
6444 static const GVecGen2 sqxtunb_ops[3] = {
6445     { .fniv = gen_sqxtunb_vec,
6446       .opt_opc = sqxtun_list,
6447       .fno = gen_helper_sve2_sqxtunb_h,
6448       .vece = MO_16 },
6449     { .fniv = gen_sqxtunb_vec,
6450       .opt_opc = sqxtun_list,
6451       .fno = gen_helper_sve2_sqxtunb_s,
6452       .vece = MO_32 },
6453     { .fniv = gen_sqxtunb_vec,
6454       .opt_opc = sqxtun_list,
6455       .fno = gen_helper_sve2_sqxtunb_d,
6456       .vece = MO_64 },
6457 };
6458 TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops)
6459 
6460 static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6461 {
6462     TCGv_vec t = tcg_temp_new_vec_matching(d);
6463     int halfbits = 4 << vece;
6464     int64_t max = (1ull << halfbits) - 1;
6465 
6466     tcg_gen_dupi_vec(vece, t, 0);
6467     tcg_gen_smax_vec(vece, n, n, t);
6468     tcg_gen_dupi_vec(vece, t, max);
6469     tcg_gen_umin_vec(vece, n, n, t);
6470     tcg_gen_shli_vec(vece, n, n, halfbits);
6471     tcg_gen_bitsel_vec(vece, d, t, d, n);
6472 }
6473 
6474 static const GVecGen2 sqxtunt_ops[3] = {
6475     { .fniv = gen_sqxtunt_vec,
6476       .opt_opc = sqxtun_list,
6477       .load_dest = true,
6478       .fno = gen_helper_sve2_sqxtunt_h,
6479       .vece = MO_16 },
6480     { .fniv = gen_sqxtunt_vec,
6481       .opt_opc = sqxtun_list,
6482       .load_dest = true,
6483       .fno = gen_helper_sve2_sqxtunt_s,
6484       .vece = MO_32 },
6485     { .fniv = gen_sqxtunt_vec,
6486       .opt_opc = sqxtun_list,
6487       .load_dest = true,
6488       .fno = gen_helper_sve2_sqxtunt_d,
6489       .vece = MO_64 },
6490 };
6491 TRANS_FEAT(SQXTUNT, aa64_sve2, do_narrow_extract, a, sqxtunt_ops)
6492 
6493 static bool do_shr_narrow(DisasContext *s, arg_rri_esz *a,
6494                           const GVecGen2i ops[3])
6495 {
6496     if (a->esz < 0 || a->esz > MO_32) {
6497         return false;
6498     }
6499     assert(a->imm > 0 && a->imm <= (8 << a->esz));
6500     if (sve_access_check(s)) {
6501         unsigned vsz = vec_full_reg_size(s);
6502         tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6503                         vec_full_reg_offset(s, a->rn),
6504                         vsz, vsz, a->imm, &ops[a->esz]);
6505     }
6506     return true;
6507 }
6508 
6509 static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
6510 {
6511     int halfbits = 4 << vece;
6512     uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
6513 
6514     tcg_gen_shri_i64(d, n, shr);
6515     tcg_gen_andi_i64(d, d, mask);
6516 }
6517 
6518 static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6519 {
6520     gen_shrnb_i64(MO_16, d, n, shr);
6521 }
6522 
6523 static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6524 {
6525     gen_shrnb_i64(MO_32, d, n, shr);
6526 }
6527 
6528 static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6529 {
6530     gen_shrnb_i64(MO_64, d, n, shr);
6531 }
6532 
6533 static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
6534 {
6535     TCGv_vec t = tcg_temp_new_vec_matching(d);
6536     int halfbits = 4 << vece;
6537     uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
6538 
6539     tcg_gen_shri_vec(vece, n, n, shr);
6540     tcg_gen_dupi_vec(vece, t, mask);
6541     tcg_gen_and_vec(vece, d, n, t);
6542 }
6543 
6544 static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 };
6545 static const GVecGen2i shrnb_ops[3] = {
6546     { .fni8 = gen_shrnb16_i64,
6547       .fniv = gen_shrnb_vec,
6548       .opt_opc = shrnb_vec_list,
6549       .fno = gen_helper_sve2_shrnb_h,
6550       .vece = MO_16 },
6551     { .fni8 = gen_shrnb32_i64,
6552       .fniv = gen_shrnb_vec,
6553       .opt_opc = shrnb_vec_list,
6554       .fno = gen_helper_sve2_shrnb_s,
6555       .vece = MO_32 },
6556     { .fni8 = gen_shrnb64_i64,
6557       .fniv = gen_shrnb_vec,
6558       .opt_opc = shrnb_vec_list,
6559       .fno = gen_helper_sve2_shrnb_d,
6560       .vece = MO_64 },
6561 };
6562 TRANS_FEAT(SHRNB, aa64_sve2, do_shr_narrow, a, shrnb_ops)
6563 
6564 static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
6565 {
6566     int halfbits = 4 << vece;
6567     uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
6568 
6569     tcg_gen_shli_i64(n, n, halfbits - shr);
6570     tcg_gen_andi_i64(n, n, ~mask);
6571     tcg_gen_andi_i64(d, d, mask);
6572     tcg_gen_or_i64(d, d, n);
6573 }
6574 
6575 static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6576 {
6577     gen_shrnt_i64(MO_16, d, n, shr);
6578 }
6579 
6580 static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6581 {
6582     gen_shrnt_i64(MO_32, d, n, shr);
6583 }
6584 
6585 static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6586 {
6587     tcg_gen_shri_i64(n, n, shr);
6588     tcg_gen_deposit_i64(d, d, n, 32, 32);
6589 }
6590 
6591 static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
6592 {
6593     TCGv_vec t = tcg_temp_new_vec_matching(d);
6594     int halfbits = 4 << vece;
6595     uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
6596 
6597     tcg_gen_shli_vec(vece, n, n, halfbits - shr);
6598     tcg_gen_dupi_vec(vece, t, mask);
6599     tcg_gen_bitsel_vec(vece, d, t, d, n);
6600 }
6601 
6602 static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 };
6603 static const GVecGen2i shrnt_ops[3] = {
6604     { .fni8 = gen_shrnt16_i64,
6605       .fniv = gen_shrnt_vec,
6606       .opt_opc = shrnt_vec_list,
6607       .load_dest = true,
6608       .fno = gen_helper_sve2_shrnt_h,
6609       .vece = MO_16 },
6610     { .fni8 = gen_shrnt32_i64,
6611       .fniv = gen_shrnt_vec,
6612       .opt_opc = shrnt_vec_list,
6613       .load_dest = true,
6614       .fno = gen_helper_sve2_shrnt_s,
6615       .vece = MO_32 },
6616     { .fni8 = gen_shrnt64_i64,
6617       .fniv = gen_shrnt_vec,
6618       .opt_opc = shrnt_vec_list,
6619       .load_dest = true,
6620       .fno = gen_helper_sve2_shrnt_d,
6621       .vece = MO_64 },
6622 };
6623 TRANS_FEAT(SHRNT, aa64_sve2, do_shr_narrow, a, shrnt_ops)
6624 
6625 static const GVecGen2i rshrnb_ops[3] = {
6626     { .fno = gen_helper_sve2_rshrnb_h },
6627     { .fno = gen_helper_sve2_rshrnb_s },
6628     { .fno = gen_helper_sve2_rshrnb_d },
6629 };
6630 TRANS_FEAT(RSHRNB, aa64_sve2, do_shr_narrow, a, rshrnb_ops)
6631 
6632 static const GVecGen2i rshrnt_ops[3] = {
6633     { .fno = gen_helper_sve2_rshrnt_h },
6634     { .fno = gen_helper_sve2_rshrnt_s },
6635     { .fno = gen_helper_sve2_rshrnt_d },
6636 };
6637 TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops)
6638 
6639 static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
6640                              TCGv_vec n, int64_t shr)
6641 {
6642     TCGv_vec t = tcg_temp_new_vec_matching(d);
6643     int halfbits = 4 << vece;
6644 
6645     tcg_gen_sari_vec(vece, n, n, shr);
6646     tcg_gen_dupi_vec(vece, t, 0);
6647     tcg_gen_smax_vec(vece, n, n, t);
6648     tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6649     tcg_gen_umin_vec(vece, d, n, t);
6650 }
6651 
6652 static const TCGOpcode sqshrunb_vec_list[] = {
6653     INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
6654 };
6655 static const GVecGen2i sqshrunb_ops[3] = {
6656     { .fniv = gen_sqshrunb_vec,
6657       .opt_opc = sqshrunb_vec_list,
6658       .fno = gen_helper_sve2_sqshrunb_h,
6659       .vece = MO_16 },
6660     { .fniv = gen_sqshrunb_vec,
6661       .opt_opc = sqshrunb_vec_list,
6662       .fno = gen_helper_sve2_sqshrunb_s,
6663       .vece = MO_32 },
6664     { .fniv = gen_sqshrunb_vec,
6665       .opt_opc = sqshrunb_vec_list,
6666       .fno = gen_helper_sve2_sqshrunb_d,
6667       .vece = MO_64 },
6668 };
6669 TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops)
6670 
6671 static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
6672                              TCGv_vec n, int64_t shr)
6673 {
6674     TCGv_vec t = tcg_temp_new_vec_matching(d);
6675     int halfbits = 4 << vece;
6676 
6677     tcg_gen_sari_vec(vece, n, n, shr);
6678     tcg_gen_dupi_vec(vece, t, 0);
6679     tcg_gen_smax_vec(vece, n, n, t);
6680     tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6681     tcg_gen_umin_vec(vece, n, n, t);
6682     tcg_gen_shli_vec(vece, n, n, halfbits);
6683     tcg_gen_bitsel_vec(vece, d, t, d, n);
6684 }
6685 
6686 static const TCGOpcode sqshrunt_vec_list[] = {
6687     INDEX_op_shli_vec, INDEX_op_sari_vec,
6688     INDEX_op_smax_vec, INDEX_op_umin_vec, 0
6689 };
6690 static const GVecGen2i sqshrunt_ops[3] = {
6691     { .fniv = gen_sqshrunt_vec,
6692       .opt_opc = sqshrunt_vec_list,
6693       .load_dest = true,
6694       .fno = gen_helper_sve2_sqshrunt_h,
6695       .vece = MO_16 },
6696     { .fniv = gen_sqshrunt_vec,
6697       .opt_opc = sqshrunt_vec_list,
6698       .load_dest = true,
6699       .fno = gen_helper_sve2_sqshrunt_s,
6700       .vece = MO_32 },
6701     { .fniv = gen_sqshrunt_vec,
6702       .opt_opc = sqshrunt_vec_list,
6703       .load_dest = true,
6704       .fno = gen_helper_sve2_sqshrunt_d,
6705       .vece = MO_64 },
6706 };
6707 TRANS_FEAT(SQSHRUNT, aa64_sve2, do_shr_narrow, a, sqshrunt_ops)
6708 
6709 static const GVecGen2i sqrshrunb_ops[3] = {
6710     { .fno = gen_helper_sve2_sqrshrunb_h },
6711     { .fno = gen_helper_sve2_sqrshrunb_s },
6712     { .fno = gen_helper_sve2_sqrshrunb_d },
6713 };
6714 TRANS_FEAT(SQRSHRUNB, aa64_sve2, do_shr_narrow, a, sqrshrunb_ops)
6715 
6716 static const GVecGen2i sqrshrunt_ops[3] = {
6717     { .fno = gen_helper_sve2_sqrshrunt_h },
6718     { .fno = gen_helper_sve2_sqrshrunt_s },
6719     { .fno = gen_helper_sve2_sqrshrunt_d },
6720 };
6721 TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops)
6722 
6723 static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
6724                             TCGv_vec n, int64_t shr)
6725 {
6726     TCGv_vec t = tcg_temp_new_vec_matching(d);
6727     int halfbits = 4 << vece;
6728     int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
6729     int64_t min = -max - 1;
6730 
6731     tcg_gen_sari_vec(vece, n, n, shr);
6732     tcg_gen_dupi_vec(vece, t, min);
6733     tcg_gen_smax_vec(vece, n, n, t);
6734     tcg_gen_dupi_vec(vece, t, max);
6735     tcg_gen_smin_vec(vece, n, n, t);
6736     tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6737     tcg_gen_and_vec(vece, d, n, t);
6738 }
6739 
6740 static const TCGOpcode sqshrnb_vec_list[] = {
6741     INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
6742 };
6743 static const GVecGen2i sqshrnb_ops[3] = {
6744     { .fniv = gen_sqshrnb_vec,
6745       .opt_opc = sqshrnb_vec_list,
6746       .fno = gen_helper_sve2_sqshrnb_h,
6747       .vece = MO_16 },
6748     { .fniv = gen_sqshrnb_vec,
6749       .opt_opc = sqshrnb_vec_list,
6750       .fno = gen_helper_sve2_sqshrnb_s,
6751       .vece = MO_32 },
6752     { .fniv = gen_sqshrnb_vec,
6753       .opt_opc = sqshrnb_vec_list,
6754       .fno = gen_helper_sve2_sqshrnb_d,
6755       .vece = MO_64 },
6756 };
6757 TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops)
6758 
6759 static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
6760                              TCGv_vec n, int64_t shr)
6761 {
6762     TCGv_vec t = tcg_temp_new_vec_matching(d);
6763     int halfbits = 4 << vece;
6764     int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
6765     int64_t min = -max - 1;
6766 
6767     tcg_gen_sari_vec(vece, n, n, shr);
6768     tcg_gen_dupi_vec(vece, t, min);
6769     tcg_gen_smax_vec(vece, n, n, t);
6770     tcg_gen_dupi_vec(vece, t, max);
6771     tcg_gen_smin_vec(vece, n, n, t);
6772     tcg_gen_shli_vec(vece, n, n, halfbits);
6773     tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6774     tcg_gen_bitsel_vec(vece, d, t, d, n);
6775 }
6776 
6777 static const TCGOpcode sqshrnt_vec_list[] = {
6778     INDEX_op_shli_vec, INDEX_op_sari_vec,
6779     INDEX_op_smax_vec, INDEX_op_smin_vec, 0
6780 };
6781 static const GVecGen2i sqshrnt_ops[3] = {
6782     { .fniv = gen_sqshrnt_vec,
6783       .opt_opc = sqshrnt_vec_list,
6784       .load_dest = true,
6785       .fno = gen_helper_sve2_sqshrnt_h,
6786       .vece = MO_16 },
6787     { .fniv = gen_sqshrnt_vec,
6788       .opt_opc = sqshrnt_vec_list,
6789       .load_dest = true,
6790       .fno = gen_helper_sve2_sqshrnt_s,
6791       .vece = MO_32 },
6792     { .fniv = gen_sqshrnt_vec,
6793       .opt_opc = sqshrnt_vec_list,
6794       .load_dest = true,
6795       .fno = gen_helper_sve2_sqshrnt_d,
6796       .vece = MO_64 },
6797 };
6798 TRANS_FEAT(SQSHRNT, aa64_sve2, do_shr_narrow, a, sqshrnt_ops)
6799 
6800 static const GVecGen2i sqrshrnb_ops[3] = {
6801     { .fno = gen_helper_sve2_sqrshrnb_h },
6802     { .fno = gen_helper_sve2_sqrshrnb_s },
6803     { .fno = gen_helper_sve2_sqrshrnb_d },
6804 };
6805 TRANS_FEAT(SQRSHRNB, aa64_sve2, do_shr_narrow, a, sqrshrnb_ops)
6806 
6807 static const GVecGen2i sqrshrnt_ops[3] = {
6808     { .fno = gen_helper_sve2_sqrshrnt_h },
6809     { .fno = gen_helper_sve2_sqrshrnt_s },
6810     { .fno = gen_helper_sve2_sqrshrnt_d },
6811 };
6812 TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops)
6813 
6814 static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
6815                             TCGv_vec n, int64_t shr)
6816 {
6817     TCGv_vec t = tcg_temp_new_vec_matching(d);
6818     int halfbits = 4 << vece;
6819 
6820     tcg_gen_shri_vec(vece, n, n, shr);
6821     tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6822     tcg_gen_umin_vec(vece, d, n, t);
6823 }
6824 
6825 static const TCGOpcode uqshrnb_vec_list[] = {
6826     INDEX_op_shri_vec, INDEX_op_umin_vec, 0
6827 };
6828 static const GVecGen2i uqshrnb_ops[3] = {
6829     { .fniv = gen_uqshrnb_vec,
6830       .opt_opc = uqshrnb_vec_list,
6831       .fno = gen_helper_sve2_uqshrnb_h,
6832       .vece = MO_16 },
6833     { .fniv = gen_uqshrnb_vec,
6834       .opt_opc = uqshrnb_vec_list,
6835       .fno = gen_helper_sve2_uqshrnb_s,
6836       .vece = MO_32 },
6837     { .fniv = gen_uqshrnb_vec,
6838       .opt_opc = uqshrnb_vec_list,
6839       .fno = gen_helper_sve2_uqshrnb_d,
6840       .vece = MO_64 },
6841 };
6842 TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops)
6843 
6844 static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
6845                             TCGv_vec n, int64_t shr)
6846 {
6847     TCGv_vec t = tcg_temp_new_vec_matching(d);
6848     int halfbits = 4 << vece;
6849 
6850     tcg_gen_shri_vec(vece, n, n, shr);
6851     tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6852     tcg_gen_umin_vec(vece, n, n, t);
6853     tcg_gen_shli_vec(vece, n, n, halfbits);
6854     tcg_gen_bitsel_vec(vece, d, t, d, n);
6855 }
6856 
6857 static const TCGOpcode uqshrnt_vec_list[] = {
6858     INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
6859 };
6860 static const GVecGen2i uqshrnt_ops[3] = {
6861     { .fniv = gen_uqshrnt_vec,
6862       .opt_opc = uqshrnt_vec_list,
6863       .load_dest = true,
6864       .fno = gen_helper_sve2_uqshrnt_h,
6865       .vece = MO_16 },
6866     { .fniv = gen_uqshrnt_vec,
6867       .opt_opc = uqshrnt_vec_list,
6868       .load_dest = true,
6869       .fno = gen_helper_sve2_uqshrnt_s,
6870       .vece = MO_32 },
6871     { .fniv = gen_uqshrnt_vec,
6872       .opt_opc = uqshrnt_vec_list,
6873       .load_dest = true,
6874       .fno = gen_helper_sve2_uqshrnt_d,
6875       .vece = MO_64 },
6876 };
6877 TRANS_FEAT(UQSHRNT, aa64_sve2, do_shr_narrow, a, uqshrnt_ops)
6878 
6879 static const GVecGen2i uqrshrnb_ops[3] = {
6880     { .fno = gen_helper_sve2_uqrshrnb_h },
6881     { .fno = gen_helper_sve2_uqrshrnb_s },
6882     { .fno = gen_helper_sve2_uqrshrnb_d },
6883 };
6884 TRANS_FEAT(UQRSHRNB, aa64_sve2, do_shr_narrow, a, uqrshrnb_ops)
6885 
6886 static const GVecGen2i uqrshrnt_ops[3] = {
6887     { .fno = gen_helper_sve2_uqrshrnt_h },
6888     { .fno = gen_helper_sve2_uqrshrnt_s },
6889     { .fno = gen_helper_sve2_uqrshrnt_d },
6890 };
6891 TRANS_FEAT(UQRSHRNT, aa64_sve2, do_shr_narrow, a, uqrshrnt_ops)
6892 
6893 #define DO_SVE2_ZZZ_NARROW(NAME, name)                                    \
6894     static gen_helper_gvec_3 * const name##_fns[4] = {                    \
6895         NULL,                       gen_helper_sve2_##name##_h,           \
6896         gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d,           \
6897     };                                                                    \
6898     TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz,                     \
6899                name##_fns[a->esz], a, 0)
6900 
6901 DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
6902 DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
6903 DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
6904 DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
6905 
6906 DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
6907 DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
6908 DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
6909 DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
6910 
6911 static gen_helper_gvec_flags_4 * const match_fns[4] = {
6912     gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL
6913 };
6914 TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
6915 
6916 static gen_helper_gvec_flags_4 * const nmatch_fns[4] = {
6917     gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL
6918 };
6919 TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
6920 
6921 static gen_helper_gvec_4 * const histcnt_fns[4] = {
6922     NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
6923 };
6924 TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
6925                         histcnt_fns[a->esz], a, 0)
6926 
6927 TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
6928                         a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
6929 
6930 DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz)
6931 DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz)
6932 DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz)
6933 DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz)
6934 DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
6935 
6936 /*
6937  * SVE Integer Multiply-Add (unpredicated)
6938  */
6939 
6940 TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz,
6941                         gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra,
6942                         0, FPST_FPCR)
6943 TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz,
6944                         gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra,
6945                         0, FPST_FPCR)
6946 
6947 static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
6948     NULL,                           gen_helper_sve2_sqdmlal_zzzw_h,
6949     gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
6950 };
6951 TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6952            sqdmlal_zzzw_fns[a->esz], a, 0)
6953 TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6954            sqdmlal_zzzw_fns[a->esz], a, 3)
6955 TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
6956            sqdmlal_zzzw_fns[a->esz], a, 2)
6957 
6958 static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = {
6959     NULL,                           gen_helper_sve2_sqdmlsl_zzzw_h,
6960     gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
6961 };
6962 TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6963            sqdmlsl_zzzw_fns[a->esz], a, 0)
6964 TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6965            sqdmlsl_zzzw_fns[a->esz], a, 3)
6966 TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
6967            sqdmlsl_zzzw_fns[a->esz], a, 2)
6968 
6969 static gen_helper_gvec_4 * const sqrdmlah_fns[] = {
6970     gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
6971     gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
6972 };
6973 TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
6974            sqrdmlah_fns[a->esz], a, 0)
6975 
6976 static gen_helper_gvec_4 * const sqrdmlsh_fns[] = {
6977     gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
6978     gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
6979 };
6980 TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
6981            sqrdmlsh_fns[a->esz], a, 0)
6982 
6983 static gen_helper_gvec_4 * const smlal_zzzw_fns[] = {
6984     NULL,                         gen_helper_sve2_smlal_zzzw_h,
6985     gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
6986 };
6987 TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6988            smlal_zzzw_fns[a->esz], a, 0)
6989 TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6990            smlal_zzzw_fns[a->esz], a, 1)
6991 
6992 static gen_helper_gvec_4 * const umlal_zzzw_fns[] = {
6993     NULL,                         gen_helper_sve2_umlal_zzzw_h,
6994     gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
6995 };
6996 TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6997            umlal_zzzw_fns[a->esz], a, 0)
6998 TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6999            umlal_zzzw_fns[a->esz], a, 1)
7000 
7001 static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = {
7002     NULL,                         gen_helper_sve2_smlsl_zzzw_h,
7003     gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
7004 };
7005 TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7006            smlsl_zzzw_fns[a->esz], a, 0)
7007 TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7008            smlsl_zzzw_fns[a->esz], a, 1)
7009 
7010 static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = {
7011     NULL,                         gen_helper_sve2_umlsl_zzzw_h,
7012     gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
7013 };
7014 TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7015            umlsl_zzzw_fns[a->esz], a, 0)
7016 TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7017            umlsl_zzzw_fns[a->esz], a, 1)
7018 
7019 static gen_helper_gvec_4 * const cmla_fns[] = {
7020     gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
7021     gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
7022 };
7023 TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7024            cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
7025 
7026 static gen_helper_gvec_4 * const cdot_fns[] = {
7027     NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d
7028 };
7029 TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7030            cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
7031 
7032 static gen_helper_gvec_4 * const sqrdcmlah_fns[] = {
7033     gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
7034     gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
7035 };
7036 TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7037            sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
7038 
7039 TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7040            a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
7041 
7042 TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
7043                         gen_helper_crypto_aesmc, a->rd, a->rd, 0)
7044 TRANS_FEAT_NONSTREAMING(AESIMC, aa64_sve2_aes, gen_gvec_ool_zz,
7045                         gen_helper_crypto_aesimc, a->rd, a->rd, 0)
7046 
7047 TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7048                         gen_helper_crypto_aese, a, 0)
7049 TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7050                         gen_helper_crypto_aesd, a, 0)
7051 
7052 TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7053                         gen_helper_crypto_sm4e, a, 0)
7054 TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7055                         gen_helper_crypto_sm4ekey, a, 0)
7056 
7057 TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz,
7058                         gen_gvec_rax1, a)
7059 
7060 TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
7061            gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR)
7062 TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz,
7063            gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR)
7064 
7065 TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
7066            gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR)
7067 
7068 TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz,
7069            gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR)
7070 TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz,
7071            gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR)
7072 
7073 TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a,
7074            FPROUNDING_ODD, gen_helper_sve_fcvt_ds)
7075 TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a,
7076            FPROUNDING_ODD, gen_helper_sve2_fcvtnt_ds)
7077 
7078 static gen_helper_gvec_3_ptr * const flogb_fns[] = {
7079     NULL,               gen_helper_flogb_h,
7080     gen_helper_flogb_s, gen_helper_flogb_d
7081 };
7082 TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz],
7083            a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
7084 
7085 static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
7086 {
7087     return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s,
7088                              a->rd, a->rn, a->rm, a->ra,
7089                              (sel << 1) | sub, tcg_env);
7090 }
7091 
7092 TRANS_FEAT(FMLALB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, false)
7093 TRANS_FEAT(FMLALT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, true)
7094 TRANS_FEAT(FMLSLB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, false)
7095 TRANS_FEAT(FMLSLT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, true)
7096 
7097 static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
7098 {
7099     return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s,
7100                              a->rd, a->rn, a->rm, a->ra,
7101                              (a->index << 2) | (sel << 1) | sub, tcg_env);
7102 }
7103 
7104 TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false)
7105 TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true)
7106 TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false)
7107 TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true)
7108 
7109 TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7110                         gen_helper_gvec_smmla_b, a, 0)
7111 TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7112                         gen_helper_gvec_usmmla_b, a, 0)
7113 TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7114                         gen_helper_gvec_ummla_b, a, 0)
7115 
7116 TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
7117            gen_helper_gvec_bfdot, a, 0)
7118 TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
7119            gen_helper_gvec_bfdot_idx, a)
7120 
7121 TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
7122                         gen_helper_gvec_bfmmla, a, 0)
7123 
7124 static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
7125 {
7126     return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal,
7127                               a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR);
7128 }
7129 
7130 TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false)
7131 TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true)
7132 
7133 static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
7134 {
7135     return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx,
7136                               a->rd, a->rn, a->rm, a->ra,
7137                               (a->index << 1) | sel, FPST_FPCR);
7138 }
7139 
7140 TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
7141 TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)
7142 
7143 static bool trans_PSEL(DisasContext *s, arg_psel *a)
7144 {
7145     int vl = vec_full_reg_size(s);
7146     int pl = pred_gvec_reg_size(s);
7147     int elements = vl >> a->esz;
7148     TCGv_i64 tmp, didx, dbit;
7149     TCGv_ptr ptr;
7150 
7151     if (!dc_isar_feature(aa64_sme, s)) {
7152         return false;
7153     }
7154     if (!sve_access_check(s)) {
7155         return true;
7156     }
7157 
7158     tmp = tcg_temp_new_i64();
7159     dbit = tcg_temp_new_i64();
7160     didx = tcg_temp_new_i64();
7161     ptr = tcg_temp_new_ptr();
7162 
7163     /* Compute the predicate element. */
7164     tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm);
7165     if (is_power_of_2(elements)) {
7166         tcg_gen_andi_i64(tmp, tmp, elements - 1);
7167     } else {
7168         tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements));
7169     }
7170 
7171     /* Extract the predicate byte and bit indices. */
7172     tcg_gen_shli_i64(tmp, tmp, a->esz);
7173     tcg_gen_andi_i64(dbit, tmp, 7);
7174     tcg_gen_shri_i64(didx, tmp, 3);
7175     if (HOST_BIG_ENDIAN) {
7176         tcg_gen_xori_i64(didx, didx, 7);
7177     }
7178 
7179     /* Load the predicate word. */
7180     tcg_gen_trunc_i64_ptr(ptr, didx);
7181     tcg_gen_add_ptr(ptr, ptr, tcg_env);
7182     tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm));
7183 
7184     /* Extract the predicate bit and replicate to MO_64. */
7185     tcg_gen_shr_i64(tmp, tmp, dbit);
7186     tcg_gen_andi_i64(tmp, tmp, 1);
7187     tcg_gen_neg_i64(tmp, tmp);
7188 
7189     /* Apply to either copy the source, or write zeros. */
7190     tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd),
7191                       pred_full_reg_offset(s, a->pn), tmp, pl, pl);
7192     return true;
7193 }
7194 
7195 static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
7196 {
7197     tcg_gen_smax_i32(d, a, n);
7198     tcg_gen_smin_i32(d, d, m);
7199 }
7200 
7201 static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
7202 {
7203     tcg_gen_smax_i64(d, a, n);
7204     tcg_gen_smin_i64(d, d, m);
7205 }
7206 
7207 static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
7208                            TCGv_vec m, TCGv_vec a)
7209 {
7210     tcg_gen_smax_vec(vece, d, a, n);
7211     tcg_gen_smin_vec(vece, d, d, m);
7212 }
7213 
7214 static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
7215                        uint32_t a, uint32_t oprsz, uint32_t maxsz)
7216 {
7217     static const TCGOpcode vecop[] = {
7218         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
7219     };
7220     static const GVecGen4 ops[4] = {
7221         { .fniv = gen_sclamp_vec,
7222           .fno  = gen_helper_gvec_sclamp_b,
7223           .opt_opc = vecop,
7224           .vece = MO_8 },
7225         { .fniv = gen_sclamp_vec,
7226           .fno  = gen_helper_gvec_sclamp_h,
7227           .opt_opc = vecop,
7228           .vece = MO_16 },
7229         { .fni4 = gen_sclamp_i32,
7230           .fniv = gen_sclamp_vec,
7231           .fno  = gen_helper_gvec_sclamp_s,
7232           .opt_opc = vecop,
7233           .vece = MO_32 },
7234         { .fni8 = gen_sclamp_i64,
7235           .fniv = gen_sclamp_vec,
7236           .fno  = gen_helper_gvec_sclamp_d,
7237           .opt_opc = vecop,
7238           .vece = MO_64,
7239           .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
7240     };
7241     tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
7242 }
7243 
7244 TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a)
7245 
7246 static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
7247 {
7248     tcg_gen_umax_i32(d, a, n);
7249     tcg_gen_umin_i32(d, d, m);
7250 }
7251 
7252 static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
7253 {
7254     tcg_gen_umax_i64(d, a, n);
7255     tcg_gen_umin_i64(d, d, m);
7256 }
7257 
7258 static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
7259                            TCGv_vec m, TCGv_vec a)
7260 {
7261     tcg_gen_umax_vec(vece, d, a, n);
7262     tcg_gen_umin_vec(vece, d, d, m);
7263 }
7264 
7265 static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
7266                        uint32_t a, uint32_t oprsz, uint32_t maxsz)
7267 {
7268     static const TCGOpcode vecop[] = {
7269         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
7270     };
7271     static const GVecGen4 ops[4] = {
7272         { .fniv = gen_uclamp_vec,
7273           .fno  = gen_helper_gvec_uclamp_b,
7274           .opt_opc = vecop,
7275           .vece = MO_8 },
7276         { .fniv = gen_uclamp_vec,
7277           .fno  = gen_helper_gvec_uclamp_h,
7278           .opt_opc = vecop,
7279           .vece = MO_16 },
7280         { .fni4 = gen_uclamp_i32,
7281           .fniv = gen_uclamp_vec,
7282           .fno  = gen_helper_gvec_uclamp_s,
7283           .opt_opc = vecop,
7284           .vece = MO_32 },
7285         { .fni8 = gen_uclamp_i64,
7286           .fniv = gen_uclamp_vec,
7287           .fno  = gen_helper_gvec_uclamp_d,
7288           .opt_opc = vecop,
7289           .vece = MO_64,
7290           .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
7291     };
7292     tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
7293 }
7294 
7295 TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a)
7296