xref: /openbmc/qemu/target/arm/tcg/translate-neon.c (revision c43a23e1)
1 /*
2  *  ARM translation: AArch32 Neon instructions
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *  Copyright (c) 2005-2007 CodeSourcery
6  *  Copyright (c) 2007 OpenedHand, Ltd.
7  *  Copyright (c) 2020 Linaro, Ltd.
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21  */
22 
23 #include "qemu/osdep.h"
24 #include "translate.h"
25 #include "translate-a32.h"
26 
27 /* Include the generated Neon decoder */
28 #include "decode-neon-dp.c.inc"
29 #include "decode-neon-ls.c.inc"
30 #include "decode-neon-shared.c.inc"
31 
32 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
33 {
34     TCGv_ptr ret = tcg_temp_new_ptr();
35     tcg_gen_addi_ptr(ret, tcg_env, vfp_reg_offset(dp, reg));
36     return ret;
37 }
38 
39 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
40 {
41     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
42 
43     switch (mop) {
44     case MO_UB:
45         tcg_gen_ld8u_i32(var, tcg_env, offset);
46         break;
47     case MO_UW:
48         tcg_gen_ld16u_i32(var, tcg_env, offset);
49         break;
50     case MO_UL:
51         tcg_gen_ld_i32(var, tcg_env, offset);
52         break;
53     default:
54         g_assert_not_reached();
55     }
56 }
57 
58 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
59 {
60     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
61 
62     switch (mop) {
63     case MO_UB:
64         tcg_gen_ld8u_i64(var, tcg_env, offset);
65         break;
66     case MO_UW:
67         tcg_gen_ld16u_i64(var, tcg_env, offset);
68         break;
69     case MO_UL:
70         tcg_gen_ld32u_i64(var, tcg_env, offset);
71         break;
72     case MO_UQ:
73         tcg_gen_ld_i64(var, tcg_env, offset);
74         break;
75     default:
76         g_assert_not_reached();
77     }
78 }
79 
80 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
81 {
82     long offset = neon_element_offset(reg, ele, size);
83 
84     switch (size) {
85     case MO_8:
86         tcg_gen_st8_i32(var, tcg_env, offset);
87         break;
88     case MO_16:
89         tcg_gen_st16_i32(var, tcg_env, offset);
90         break;
91     case MO_32:
92         tcg_gen_st_i32(var, tcg_env, offset);
93         break;
94     default:
95         g_assert_not_reached();
96     }
97 }
98 
99 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
100 {
101     long offset = neon_element_offset(reg, ele, size);
102 
103     switch (size) {
104     case MO_8:
105         tcg_gen_st8_i64(var, tcg_env, offset);
106         break;
107     case MO_16:
108         tcg_gen_st16_i64(var, tcg_env, offset);
109         break;
110     case MO_32:
111         tcg_gen_st32_i64(var, tcg_env, offset);
112         break;
113     case MO_64:
114         tcg_gen_st_i64(var, tcg_env, offset);
115         break;
116     default:
117         g_assert_not_reached();
118     }
119 }
120 
121 static bool do_neon_ddda(DisasContext *s, int q, int vd, int vn, int vm,
122                          int data, gen_helper_gvec_4 *fn_gvec)
123 {
124     /* UNDEF accesses to D16-D31 if they don't exist. */
125     if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
126         return false;
127     }
128 
129     /*
130      * UNDEF accesses to odd registers for each bit of Q.
131      * Q will be 0b111 for all Q-reg instructions, otherwise
132      * when we have mixed Q- and D-reg inputs.
133      */
134     if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
135         return false;
136     }
137 
138     if (!vfp_access_check(s)) {
139         return true;
140     }
141 
142     int opr_sz = q ? 16 : 8;
143     tcg_gen_gvec_4_ool(vfp_reg_offset(1, vd),
144                        vfp_reg_offset(1, vn),
145                        vfp_reg_offset(1, vm),
146                        vfp_reg_offset(1, vd),
147                        opr_sz, opr_sz, data, fn_gvec);
148     return true;
149 }
150 
151 static bool do_neon_ddda_fpst(DisasContext *s, int q, int vd, int vn, int vm,
152                               int data, ARMFPStatusFlavour fp_flavour,
153                               gen_helper_gvec_4_ptr *fn_gvec_ptr)
154 {
155     /* UNDEF accesses to D16-D31 if they don't exist. */
156     if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
157         return false;
158     }
159 
160     /*
161      * UNDEF accesses to odd registers for each bit of Q.
162      * Q will be 0b111 for all Q-reg instructions, otherwise
163      * when we have mixed Q- and D-reg inputs.
164      */
165     if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
166         return false;
167     }
168 
169     if (!vfp_access_check(s)) {
170         return true;
171     }
172 
173     int opr_sz = q ? 16 : 8;
174     TCGv_ptr fpst = fpstatus_ptr(fp_flavour);
175 
176     tcg_gen_gvec_4_ptr(vfp_reg_offset(1, vd),
177                        vfp_reg_offset(1, vn),
178                        vfp_reg_offset(1, vm),
179                        vfp_reg_offset(1, vd),
180                        fpst, opr_sz, opr_sz, data, fn_gvec_ptr);
181     return true;
182 }
183 
184 static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
185 {
186     if (!dc_isar_feature(aa32_vcma, s)) {
187         return false;
188     }
189     if (a->size == MO_16) {
190         if (!dc_isar_feature(aa32_fp16_arith, s)) {
191             return false;
192         }
193         return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot,
194                                  FPST_STD_F16, gen_helper_gvec_fcmlah);
195     }
196     return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot,
197                              FPST_STD, gen_helper_gvec_fcmlas);
198 }
199 
200 static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
201 {
202     int opr_sz;
203     TCGv_ptr fpst;
204     gen_helper_gvec_3_ptr *fn_gvec_ptr;
205 
206     if (!dc_isar_feature(aa32_vcma, s)
207         || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) {
208         return false;
209     }
210 
211     /* UNDEF accesses to D16-D31 if they don't exist. */
212     if (!dc_isar_feature(aa32_simd_r32, s) &&
213         ((a->vd | a->vn | a->vm) & 0x10)) {
214         return false;
215     }
216 
217     if ((a->vn | a->vm | a->vd) & a->q) {
218         return false;
219     }
220 
221     if (!vfp_access_check(s)) {
222         return true;
223     }
224 
225     opr_sz = (1 + a->q) * 8;
226     fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
227     fn_gvec_ptr = (a->size == MO_16) ?
228         gen_helper_gvec_fcaddh : gen_helper_gvec_fcadds;
229     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
230                        vfp_reg_offset(1, a->vn),
231                        vfp_reg_offset(1, a->vm),
232                        fpst, opr_sz, opr_sz, a->rot,
233                        fn_gvec_ptr);
234     return true;
235 }
236 
237 static bool trans_VSDOT(DisasContext *s, arg_VSDOT *a)
238 {
239     if (!dc_isar_feature(aa32_dp, s)) {
240         return false;
241     }
242     return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
243                         gen_helper_gvec_sdot_b);
244 }
245 
246 static bool trans_VUDOT(DisasContext *s, arg_VUDOT *a)
247 {
248     if (!dc_isar_feature(aa32_dp, s)) {
249         return false;
250     }
251     return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
252                         gen_helper_gvec_udot_b);
253 }
254 
255 static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a)
256 {
257     if (!dc_isar_feature(aa32_i8mm, s)) {
258         return false;
259     }
260     return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
261                         gen_helper_gvec_usdot_b);
262 }
263 
264 static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a)
265 {
266     if (!dc_isar_feature(aa32_bf16, s)) {
267         return false;
268     }
269     return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
270                         gen_helper_gvec_bfdot);
271 }
272 
273 static bool trans_VFML(DisasContext *s, arg_VFML *a)
274 {
275     int opr_sz;
276 
277     if (!dc_isar_feature(aa32_fhm, s)) {
278         return false;
279     }
280 
281     /* UNDEF accesses to D16-D31 if they don't exist. */
282     if (!dc_isar_feature(aa32_simd_r32, s) &&
283         (a->vd & 0x10)) {
284         return false;
285     }
286 
287     if (a->vd & a->q) {
288         return false;
289     }
290 
291     if (!vfp_access_check(s)) {
292         return true;
293     }
294 
295     opr_sz = (1 + a->q) * 8;
296     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
297                        vfp_reg_offset(a->q, a->vn),
298                        vfp_reg_offset(a->q, a->vm),
299                        tcg_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */
300                        gen_helper_gvec_fmlal_a32);
301     return true;
302 }
303 
304 static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
305 {
306     int data = (a->index << 2) | a->rot;
307 
308     if (!dc_isar_feature(aa32_vcma, s)) {
309         return false;
310     }
311     if (a->size == MO_16) {
312         if (!dc_isar_feature(aa32_fp16_arith, s)) {
313             return false;
314         }
315         return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data,
316                                  FPST_STD_F16, gen_helper_gvec_fcmlah_idx);
317     }
318     return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data,
319                              FPST_STD, gen_helper_gvec_fcmlas_idx);
320 }
321 
322 static bool trans_VSDOT_scalar(DisasContext *s, arg_VSDOT_scalar *a)
323 {
324     if (!dc_isar_feature(aa32_dp, s)) {
325         return false;
326     }
327     return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
328                         gen_helper_gvec_sdot_idx_b);
329 }
330 
331 static bool trans_VUDOT_scalar(DisasContext *s, arg_VUDOT_scalar *a)
332 {
333     if (!dc_isar_feature(aa32_dp, s)) {
334         return false;
335     }
336     return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
337                         gen_helper_gvec_udot_idx_b);
338 }
339 
340 static bool trans_VUSDOT_scalar(DisasContext *s, arg_VUSDOT_scalar *a)
341 {
342     if (!dc_isar_feature(aa32_i8mm, s)) {
343         return false;
344     }
345     return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
346                         gen_helper_gvec_usdot_idx_b);
347 }
348 
349 static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a)
350 {
351     if (!dc_isar_feature(aa32_i8mm, s)) {
352         return false;
353     }
354     return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
355                         gen_helper_gvec_sudot_idx_b);
356 }
357 
358 static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a)
359 {
360     if (!dc_isar_feature(aa32_bf16, s)) {
361         return false;
362     }
363     return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
364                         gen_helper_gvec_bfdot_idx);
365 }
366 
367 static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
368 {
369     int opr_sz;
370 
371     if (!dc_isar_feature(aa32_fhm, s)) {
372         return false;
373     }
374 
375     /* UNDEF accesses to D16-D31 if they don't exist. */
376     if (!dc_isar_feature(aa32_simd_r32, s) &&
377         ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) {
378         return false;
379     }
380 
381     if (a->vd & a->q) {
382         return false;
383     }
384 
385     if (!vfp_access_check(s)) {
386         return true;
387     }
388 
389     opr_sz = (1 + a->q) * 8;
390     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
391                        vfp_reg_offset(a->q, a->vn),
392                        vfp_reg_offset(a->q, a->rm),
393                        tcg_env, opr_sz, opr_sz,
394                        (a->index << 2) | a->s, /* is_2 == 0 */
395                        gen_helper_gvec_fmlal_idx_a32);
396     return true;
397 }
398 
399 static struct {
400     int nregs;
401     int interleave;
402     int spacing;
403 } const neon_ls_element_type[11] = {
404     {1, 4, 1},
405     {1, 4, 2},
406     {4, 1, 1},
407     {2, 2, 2},
408     {1, 3, 1},
409     {1, 3, 2},
410     {3, 1, 1},
411     {1, 1, 1},
412     {1, 2, 1},
413     {1, 2, 2},
414     {2, 1, 1}
415 };
416 
417 static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
418                                       int stride)
419 {
420     if (rm != 15) {
421         TCGv_i32 base;
422 
423         base = load_reg(s, rn);
424         if (rm == 13) {
425             tcg_gen_addi_i32(base, base, stride);
426         } else {
427             TCGv_i32 index;
428             index = load_reg(s, rm);
429             tcg_gen_add_i32(base, base, index);
430         }
431         store_reg(s, rn, base);
432     }
433 }
434 
435 static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
436 {
437     /* Neon load/store multiple structures */
438     int nregs, interleave, spacing, reg, n;
439     MemOp mop, align, endian;
440     int mmu_idx = get_mem_index(s);
441     int size = a->size;
442     TCGv_i64 tmp64;
443     TCGv_i32 addr;
444 
445     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
446         return false;
447     }
448 
449     /* UNDEF accesses to D16-D31 if they don't exist */
450     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
451         return false;
452     }
453     if (a->itype > 10) {
454         return false;
455     }
456     /* Catch UNDEF cases for bad values of align field */
457     switch (a->itype & 0xc) {
458     case 4:
459         if (a->align >= 2) {
460             return false;
461         }
462         break;
463     case 8:
464         if (a->align == 3) {
465             return false;
466         }
467         break;
468     default:
469         break;
470     }
471     nregs = neon_ls_element_type[a->itype].nregs;
472     interleave = neon_ls_element_type[a->itype].interleave;
473     spacing = neon_ls_element_type[a->itype].spacing;
474     if (size == 3 && (interleave | spacing) != 1) {
475         return false;
476     }
477 
478     if (!vfp_access_check(s)) {
479         return true;
480     }
481 
482     /* For our purposes, bytes are always little-endian.  */
483     endian = s->be_data;
484     if (size == 0) {
485         endian = MO_LE;
486     }
487 
488     /* Enforce alignment requested by the instruction */
489     if (a->align) {
490         align = pow2_align(a->align + 2); /* 4 ** a->align */
491     } else {
492         align = s->align_mem ? MO_ALIGN : 0;
493     }
494 
495     /*
496      * Consecutive little-endian elements from a single register
497      * can be promoted to a larger little-endian operation.
498      */
499     if (interleave == 1 && endian == MO_LE) {
500         /* Retain any natural alignment. */
501         if (align == MO_ALIGN) {
502             align = pow2_align(size);
503         }
504         size = 3;
505     }
506 
507     tmp64 = tcg_temp_new_i64();
508     addr = tcg_temp_new_i32();
509     load_reg_var(s, addr, a->rn);
510 
511     mop = endian | size | align;
512     for (reg = 0; reg < nregs; reg++) {
513         for (n = 0; n < 8 >> size; n++) {
514             int xs;
515             for (xs = 0; xs < interleave; xs++) {
516                 int tt = a->vd + reg + spacing * xs;
517 
518                 if (a->l) {
519                     gen_aa32_ld_internal_i64(s, tmp64, addr, mmu_idx, mop);
520                     neon_store_element64(tt, n, size, tmp64);
521                 } else {
522                     neon_load_element64(tmp64, tt, n, size);
523                     gen_aa32_st_internal_i64(s, tmp64, addr, mmu_idx, mop);
524                 }
525                 tcg_gen_addi_i32(addr, addr, 1 << size);
526 
527                 /* Subsequent memory operations inherit alignment */
528                 mop &= ~MO_AMASK;
529             }
530         }
531     }
532 
533     gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
534     return true;
535 }
536 
537 static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
538 {
539     /* Neon load single structure to all lanes */
540     int reg, stride, vec_size;
541     int vd = a->vd;
542     int size = a->size;
543     int nregs = a->n + 1;
544     TCGv_i32 addr, tmp;
545     MemOp mop, align;
546 
547     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
548         return false;
549     }
550 
551     /* UNDEF accesses to D16-D31 if they don't exist */
552     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
553         return false;
554     }
555 
556     align = 0;
557     if (size == 3) {
558         if (nregs != 4 || a->a == 0) {
559             return false;
560         }
561         /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
562         size = MO_32;
563         align = MO_ALIGN_16;
564     } else if (a->a) {
565         switch (nregs) {
566         case 1:
567             if (size == 0) {
568                 return false;
569             }
570             align = MO_ALIGN;
571             break;
572         case 2:
573             align = pow2_align(size + 1);
574             break;
575         case 3:
576             return false;
577         case 4:
578             if (size == 2) {
579                 align = pow2_align(3);
580             } else {
581                 align = pow2_align(size + 2);
582             }
583             break;
584         default:
585             g_assert_not_reached();
586         }
587     }
588 
589     if (!vfp_access_check(s)) {
590         return true;
591     }
592 
593     /*
594      * VLD1 to all lanes: T bit indicates how many Dregs to write.
595      * VLD2/3/4 to all lanes: T bit indicates register stride.
596      */
597     stride = a->t ? 2 : 1;
598     vec_size = nregs == 1 ? stride * 8 : 8;
599     mop = size | align;
600     tmp = tcg_temp_new_i32();
601     addr = tcg_temp_new_i32();
602     load_reg_var(s, addr, a->rn);
603     for (reg = 0; reg < nregs; reg++) {
604         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop);
605         if ((vd & 1) && vec_size == 16) {
606             /*
607              * We cannot write 16 bytes at once because the
608              * destination is unaligned.
609              */
610             tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
611                                  8, 8, tmp);
612             tcg_gen_gvec_mov(0, neon_full_reg_offset(vd + 1),
613                              neon_full_reg_offset(vd), 8, 8);
614         } else {
615             tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
616                                  vec_size, vec_size, tmp);
617         }
618         tcg_gen_addi_i32(addr, addr, 1 << size);
619         vd += stride;
620 
621         /* Subsequent memory operations inherit alignment */
622         mop &= ~MO_AMASK;
623     }
624 
625     gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);
626 
627     return true;
628 }
629 
630 static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
631 {
632     /* Neon load/store single structure to one lane */
633     int reg;
634     int nregs = a->n + 1;
635     int vd = a->vd;
636     TCGv_i32 addr, tmp;
637     MemOp mop;
638 
639     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
640         return false;
641     }
642 
643     /* UNDEF accesses to D16-D31 if they don't exist */
644     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
645         return false;
646     }
647 
648     /* Catch the UNDEF cases. This is unavoidably a bit messy. */
649     switch (nregs) {
650     case 1:
651         if (a->stride != 1) {
652             return false;
653         }
654         if (((a->align & (1 << a->size)) != 0) ||
655             (a->size == 2 && (a->align == 1 || a->align == 2))) {
656             return false;
657         }
658         break;
659     case 2:
660         if (a->size == 2 && (a->align & 2) != 0) {
661             return false;
662         }
663         break;
664     case 3:
665         if (a->align != 0) {
666             return false;
667         }
668         break;
669     case 4:
670         if (a->size == 2 && a->align == 3) {
671             return false;
672         }
673         break;
674     default:
675         g_assert_not_reached();
676     }
677     if ((vd + a->stride * (nregs - 1)) > 31) {
678         /*
679          * Attempts to write off the end of the register file are
680          * UNPREDICTABLE; we choose to UNDEF because otherwise we would
681          * access off the end of the array that holds the register data.
682          */
683         return false;
684     }
685 
686     if (!vfp_access_check(s)) {
687         return true;
688     }
689 
690     /* Pick up SCTLR settings */
691     mop = finalize_memop(s, a->size);
692 
693     if (a->align) {
694         MemOp align_op;
695 
696         switch (nregs) {
697         case 1:
698             /* For VLD1, use natural alignment. */
699             align_op = MO_ALIGN;
700             break;
701         case 2:
702             /* For VLD2, use double alignment. */
703             align_op = pow2_align(a->size + 1);
704             break;
705         case 4:
706             if (a->size == MO_32) {
707                 /*
708                  * For VLD4.32, align = 1 is double alignment, align = 2 is
709                  * quad alignment; align = 3 is rejected above.
710                  */
711                 align_op = pow2_align(a->size + a->align);
712             } else {
713                 /* For VLD4.8 and VLD.16, we want quad alignment. */
714                 align_op = pow2_align(a->size + 2);
715             }
716             break;
717         default:
718             /* For VLD3, the alignment field is zero and rejected above. */
719             g_assert_not_reached();
720         }
721 
722         mop = (mop & ~MO_AMASK) | align_op;
723     }
724 
725     tmp = tcg_temp_new_i32();
726     addr = tcg_temp_new_i32();
727     load_reg_var(s, addr, a->rn);
728 
729     for (reg = 0; reg < nregs; reg++) {
730         if (a->l) {
731             gen_aa32_ld_internal_i32(s, tmp, addr, get_mem_index(s), mop);
732             neon_store_element(vd, a->reg_idx, a->size, tmp);
733         } else { /* Store */
734             neon_load_element(tmp, vd, a->reg_idx, a->size);
735             gen_aa32_st_internal_i32(s, tmp, addr, get_mem_index(s), mop);
736         }
737         vd += a->stride;
738         tcg_gen_addi_i32(addr, addr, 1 << a->size);
739 
740         /* Subsequent memory operations inherit alignment */
741         mop &= ~MO_AMASK;
742     }
743 
744     gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs);
745 
746     return true;
747 }
748 
749 static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
750 {
751     int vec_size = a->q ? 16 : 8;
752     int rd_ofs = neon_full_reg_offset(a->vd);
753     int rn_ofs = neon_full_reg_offset(a->vn);
754     int rm_ofs = neon_full_reg_offset(a->vm);
755 
756     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
757         return false;
758     }
759 
760     /* UNDEF accesses to D16-D31 if they don't exist. */
761     if (!dc_isar_feature(aa32_simd_r32, s) &&
762         ((a->vd | a->vn | a->vm) & 0x10)) {
763         return false;
764     }
765 
766     if ((a->vn | a->vm | a->vd) & a->q) {
767         return false;
768     }
769 
770     if (!vfp_access_check(s)) {
771         return true;
772     }
773 
774     fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
775     return true;
776 }
777 
778 #define DO_3SAME(INSN, FUNC)                                            \
779     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
780     {                                                                   \
781         return do_3same(s, a, FUNC);                                    \
782     }
783 
784 DO_3SAME(VADD, tcg_gen_gvec_add)
785 DO_3SAME(VSUB, tcg_gen_gvec_sub)
786 DO_3SAME(VAND, tcg_gen_gvec_and)
787 DO_3SAME(VBIC, tcg_gen_gvec_andc)
788 DO_3SAME(VORR, tcg_gen_gvec_or)
789 DO_3SAME(VORN, tcg_gen_gvec_orc)
790 DO_3SAME(VEOR, tcg_gen_gvec_xor)
791 DO_3SAME(VSHL_S, gen_gvec_sshl)
792 DO_3SAME(VSHL_U, gen_gvec_ushl)
793 DO_3SAME(VQADD_S, gen_gvec_sqadd_qc)
794 DO_3SAME(VQADD_U, gen_gvec_uqadd_qc)
795 DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc)
796 DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc)
797 
798 /* These insns are all gvec_bitsel but with the inputs in various orders. */
799 #define DO_3SAME_BITSEL(INSN, O1, O2, O3)                               \
800     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
801                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
802                                 uint32_t oprsz, uint32_t maxsz)         \
803     {                                                                   \
804         tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz);    \
805     }                                                                   \
806     DO_3SAME(INSN, gen_##INSN##_3s)
807 
808 DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs)
809 DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs)
810 DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs)
811 
812 #define DO_3SAME_NO_SZ_3(INSN, FUNC)                                    \
813     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
814     {                                                                   \
815         if (a->size == 3) {                                             \
816             return false;                                               \
817         }                                                               \
818         return do_3same(s, a, FUNC);                                    \
819     }
820 
821 DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
822 DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
823 DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
824 DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
825 DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
826 DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
827 DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
828 DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
829 DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
830 DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
831 DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
832 DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
833 
834 #define DO_3SAME_CMP(INSN, COND)                                        \
835     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
836                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
837                                 uint32_t oprsz, uint32_t maxsz)         \
838     {                                                                   \
839         tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
840     }                                                                   \
841     DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
842 
843 DO_3SAME_CMP(VCGT_S, TCG_COND_GT)
844 DO_3SAME_CMP(VCGT_U, TCG_COND_GTU)
845 DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
846 DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
847 DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
848 
849 #define WRAP_OOL_FN(WRAPNAME, FUNC)                                        \
850     static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,  \
851                          uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz)  \
852     {                                                                      \
853         tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \
854     }
855 
856 WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b)
857 
858 static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
859 {
860     if (a->size != 0) {
861         return false;
862     }
863     return do_3same(s, a, gen_VMUL_p_3s);
864 }
865 
866 #define DO_VQRDMLAH(INSN, FUNC)                                         \
867     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
868     {                                                                   \
869         if (!dc_isar_feature(aa32_rdm, s)) {                            \
870             return false;                                               \
871         }                                                               \
872         if (a->size != 1 && a->size != 2) {                             \
873             return false;                                               \
874         }                                                               \
875         return do_3same(s, a, FUNC);                                    \
876     }
877 
878 DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
879 DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
880 
881 #define DO_SHA1(NAME, FUNC)                                             \
882     WRAP_OOL_FN(gen_##NAME##_3s, FUNC)                                  \
883     static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a)        \
884     {                                                                   \
885         if (!dc_isar_feature(aa32_sha1, s)) {                           \
886             return false;                                               \
887         }                                                               \
888         return do_3same(s, a, gen_##NAME##_3s);                         \
889     }
890 
891 DO_SHA1(SHA1C, gen_helper_crypto_sha1c)
892 DO_SHA1(SHA1P, gen_helper_crypto_sha1p)
893 DO_SHA1(SHA1M, gen_helper_crypto_sha1m)
894 DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0)
895 
896 #define DO_SHA2(NAME, FUNC)                                             \
897     WRAP_OOL_FN(gen_##NAME##_3s, FUNC)                                  \
898     static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a)        \
899     {                                                                   \
900         if (!dc_isar_feature(aa32_sha2, s)) {                           \
901             return false;                                               \
902         }                                                               \
903         return do_3same(s, a, gen_##NAME##_3s);                         \
904     }
905 
906 DO_SHA2(SHA256H, gen_helper_crypto_sha256h)
907 DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2)
908 DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1)
909 
910 #define DO_3SAME_64(INSN, FUNC)                                         \
911     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
912                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
913                                 uint32_t oprsz, uint32_t maxsz)         \
914     {                                                                   \
915         static const GVecGen3 op = { .fni8 = FUNC };                    \
916         tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op);      \
917     }                                                                   \
918     DO_3SAME(INSN, gen_##INSN##_3s)
919 
920 #define DO_3SAME_64_ENV(INSN, FUNC)                                     \
921     static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)    \
922     {                                                                   \
923         FUNC(d, tcg_env, n, m);                                         \
924     }                                                                   \
925     DO_3SAME_64(INSN, gen_##INSN##_elt)
926 
927 DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64)
928 DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64)
929 DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64)
930 DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64)
931 DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64)
932 DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
933 
934 #define DO_3SAME_32(INSN, FUNC)                                         \
935     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
936                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
937                                 uint32_t oprsz, uint32_t maxsz)         \
938     {                                                                   \
939         static const GVecGen3 ops[4] = {                                \
940             { .fni4 = gen_helper_neon_##FUNC##8 },                      \
941             { .fni4 = gen_helper_neon_##FUNC##16 },                     \
942             { .fni4 = gen_helper_neon_##FUNC##32 },                     \
943             { 0 },                                                      \
944         };                                                              \
945         tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
946     }                                                                   \
947     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
948     {                                                                   \
949         if (a->size > 2) {                                              \
950             return false;                                               \
951         }                                                               \
952         return do_3same(s, a, gen_##INSN##_3s);                         \
953     }
954 
955 /*
956  * Some helper functions need to be passed the tcg_env. In order
957  * to use those with the gvec APIs like tcg_gen_gvec_3() we need
958  * to create wrapper functions whose prototype is a NeonGenTwoOpFn()
959  * and which call a NeonGenTwoOpEnvFn().
960  */
961 #define WRAP_ENV_FN(WRAPNAME, FUNC)                                     \
962     static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m)            \
963     {                                                                   \
964         FUNC(d, tcg_env, n, m);                                         \
965     }
966 
967 #define DO_3SAME_32_ENV(INSN, FUNC)                                     \
968     WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8);        \
969     WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16);      \
970     WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32);      \
971     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
972                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
973                                 uint32_t oprsz, uint32_t maxsz)         \
974     {                                                                   \
975         static const GVecGen3 ops[4] = {                                \
976             { .fni4 = gen_##INSN##_tramp8 },                            \
977             { .fni4 = gen_##INSN##_tramp16 },                           \
978             { .fni4 = gen_##INSN##_tramp32 },                           \
979             { 0 },                                                      \
980         };                                                              \
981         tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
982     }                                                                   \
983     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
984     {                                                                   \
985         if (a->size > 2) {                                              \
986             return false;                                               \
987         }                                                               \
988         return do_3same(s, a, gen_##INSN##_3s);                         \
989     }
990 
991 DO_3SAME_32(VHADD_S, hadd_s)
992 DO_3SAME_32(VHADD_U, hadd_u)
993 DO_3SAME_32(VHSUB_S, hsub_s)
994 DO_3SAME_32(VHSUB_U, hsub_u)
995 DO_3SAME_32(VRHADD_S, rhadd_s)
996 DO_3SAME_32(VRHADD_U, rhadd_u)
997 DO_3SAME_32(VRSHL_S, rshl_s)
998 DO_3SAME_32(VRSHL_U, rshl_u)
999 
1000 DO_3SAME_32_ENV(VQSHL_S, qshl_s)
1001 DO_3SAME_32_ENV(VQSHL_U, qshl_u)
1002 DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
1003 DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
1004 
1005 static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
1006 {
1007     /* Operations handled pairwise 32 bits at a time */
1008     TCGv_i32 tmp, tmp2, tmp3;
1009 
1010     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1011         return false;
1012     }
1013 
1014     /* UNDEF accesses to D16-D31 if they don't exist. */
1015     if (!dc_isar_feature(aa32_simd_r32, s) &&
1016         ((a->vd | a->vn | a->vm) & 0x10)) {
1017         return false;
1018     }
1019 
1020     if (a->size == 3) {
1021         return false;
1022     }
1023 
1024     if (!vfp_access_check(s)) {
1025         return true;
1026     }
1027 
1028     assert(a->q == 0); /* enforced by decode patterns */
1029 
1030     /*
1031      * Note that we have to be careful not to clobber the source operands
1032      * in the "vm == vd" case by storing the result of the first pass too
1033      * early. Since Q is 0 there are always just two passes, so instead
1034      * of a complicated loop over each pass we just unroll.
1035      */
1036     tmp = tcg_temp_new_i32();
1037     tmp2 = tcg_temp_new_i32();
1038     tmp3 = tcg_temp_new_i32();
1039 
1040     read_neon_element32(tmp, a->vn, 0, MO_32);
1041     read_neon_element32(tmp2, a->vn, 1, MO_32);
1042     fn(tmp, tmp, tmp2);
1043 
1044     read_neon_element32(tmp3, a->vm, 0, MO_32);
1045     read_neon_element32(tmp2, a->vm, 1, MO_32);
1046     fn(tmp3, tmp3, tmp2);
1047 
1048     write_neon_element32(tmp, a->vd, 0, MO_32);
1049     write_neon_element32(tmp3, a->vd, 1, MO_32);
1050 
1051     return true;
1052 }
1053 
1054 #define DO_3SAME_PAIR(INSN, func)                                       \
1055     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
1056     {                                                                   \
1057         static NeonGenTwoOpFn * const fns[] = {                         \
1058             gen_helper_neon_##func##8,                                  \
1059             gen_helper_neon_##func##16,                                 \
1060             gen_helper_neon_##func##32,                                 \
1061         };                                                              \
1062         if (a->size > 2) {                                              \
1063             return false;                                               \
1064         }                                                               \
1065         return do_3same_pair(s, a, fns[a->size]);                       \
1066     }
1067 
1068 /* 32-bit pairwise ops end up the same as the elementwise versions.  */
1069 #define gen_helper_neon_pmax_s32  tcg_gen_smax_i32
1070 #define gen_helper_neon_pmax_u32  tcg_gen_umax_i32
1071 #define gen_helper_neon_pmin_s32  tcg_gen_smin_i32
1072 #define gen_helper_neon_pmin_u32  tcg_gen_umin_i32
1073 #define gen_helper_neon_padd_u32  tcg_gen_add_i32
1074 
1075 DO_3SAME_PAIR(VPMAX_S, pmax_s)
1076 DO_3SAME_PAIR(VPMIN_S, pmin_s)
1077 DO_3SAME_PAIR(VPMAX_U, pmax_u)
1078 DO_3SAME_PAIR(VPMIN_U, pmin_u)
1079 DO_3SAME_PAIR(VPADD, padd_u)
1080 
1081 #define DO_3SAME_VQDMULH(INSN, FUNC)                                    \
1082     WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16);    \
1083     WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32);    \
1084     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
1085                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
1086                                 uint32_t oprsz, uint32_t maxsz)         \
1087     {                                                                   \
1088         static const GVecGen3 ops[2] = {                                \
1089             { .fni4 = gen_##INSN##_tramp16 },                           \
1090             { .fni4 = gen_##INSN##_tramp32 },                           \
1091         };                                                              \
1092         tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \
1093     }                                                                   \
1094     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
1095     {                                                                   \
1096         if (a->size != 1 && a->size != 2) {                             \
1097             return false;                                               \
1098         }                                                               \
1099         return do_3same(s, a, gen_##INSN##_3s);                         \
1100     }
1101 
1102 DO_3SAME_VQDMULH(VQDMULH, qdmulh)
1103 DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
1104 
1105 #define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC)                              \
1106     static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
1107                          uint32_t rn_ofs, uint32_t rm_ofs,              \
1108                          uint32_t oprsz, uint32_t maxsz)                \
1109     {                                                                   \
1110         TCGv_ptr fpst = fpstatus_ptr(FPST);                             \
1111         tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst,                \
1112                            oprsz, maxsz, 0, FUNC);                      \
1113     }
1114 
1115 #define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC)                                 \
1116     WRAP_FP_GVEC(gen_##INSN##_fp32_3s, FPST_STD, SFUNC)                 \
1117     WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC)             \
1118     static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a)     \
1119     {                                                                   \
1120         if (a->size == MO_16) {                                         \
1121             if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
1122                 return false;                                           \
1123             }                                                           \
1124             return do_3same(s, a, gen_##INSN##_fp16_3s);                \
1125         }                                                               \
1126         return do_3same(s, a, gen_##INSN##_fp32_3s);                    \
1127     }
1128 
1129 
1130 DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_h)
1131 DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_h)
1132 DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_h)
1133 DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_h)
1134 DO_3S_FP_GVEC(VCEQ, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_h)
1135 DO_3S_FP_GVEC(VCGE, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_h)
1136 DO_3S_FP_GVEC(VCGT, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_h)
1137 DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h)
1138 DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h)
1139 DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h)
1140 DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h)
1141 DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h)
1142 DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h)
1143 DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h)
1144 DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h)
1145 DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h)
1146 DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h)
1147 DO_3S_FP_GVEC(VPADD, gen_helper_gvec_faddp_s, gen_helper_gvec_faddp_h)
1148 DO_3S_FP_GVEC(VPMAX, gen_helper_gvec_fmaxp_s, gen_helper_gvec_fmaxp_h)
1149 DO_3S_FP_GVEC(VPMIN, gen_helper_gvec_fminp_s, gen_helper_gvec_fminp_h)
1150 
1151 WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s)
1152 WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h)
1153 WRAP_FP_GVEC(gen_VMINNM_fp32_3s, FPST_STD, gen_helper_gvec_fminnum_s)
1154 WRAP_FP_GVEC(gen_VMINNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fminnum_h)
1155 
1156 static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
1157 {
1158     if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
1159         return false;
1160     }
1161 
1162     if (a->size == MO_16) {
1163         if (!dc_isar_feature(aa32_fp16_arith, s)) {
1164             return false;
1165         }
1166         return do_3same(s, a, gen_VMAXNM_fp16_3s);
1167     }
1168     return do_3same(s, a, gen_VMAXNM_fp32_3s);
1169 }
1170 
1171 static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
1172 {
1173     if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
1174         return false;
1175     }
1176 
1177     if (a->size == MO_16) {
1178         if (!dc_isar_feature(aa32_fp16_arith, s)) {
1179             return false;
1180         }
1181         return do_3same(s, a, gen_VMINNM_fp16_3s);
1182     }
1183     return do_3same(s, a, gen_VMINNM_fp32_3s);
1184 }
1185 
1186 static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
1187 {
1188     /* Handle a 2-reg-shift insn which can be vectorized. */
1189     int vec_size = a->q ? 16 : 8;
1190     int rd_ofs = neon_full_reg_offset(a->vd);
1191     int rm_ofs = neon_full_reg_offset(a->vm);
1192 
1193     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1194         return false;
1195     }
1196 
1197     /* UNDEF accesses to D16-D31 if they don't exist. */
1198     if (!dc_isar_feature(aa32_simd_r32, s) &&
1199         ((a->vd | a->vm) & 0x10)) {
1200         return false;
1201     }
1202 
1203     if ((a->vm | a->vd) & a->q) {
1204         return false;
1205     }
1206 
1207     if (!vfp_access_check(s)) {
1208         return true;
1209     }
1210 
1211     fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size);
1212     return true;
1213 }
1214 
1215 #define DO_2SH(INSN, FUNC)                                              \
1216     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1217     {                                                                   \
1218         return do_vector_2sh(s, a, FUNC);                               \
1219     }                                                                   \
1220 
1221 DO_2SH(VSHL, tcg_gen_gvec_shli)
1222 DO_2SH(VSLI, gen_gvec_sli)
1223 DO_2SH(VSRI, gen_gvec_sri)
1224 DO_2SH(VSRA_S, gen_gvec_ssra)
1225 DO_2SH(VSRA_U, gen_gvec_usra)
1226 DO_2SH(VRSHR_S, gen_gvec_srshr)
1227 DO_2SH(VRSHR_U, gen_gvec_urshr)
1228 DO_2SH(VRSRA_S, gen_gvec_srsra)
1229 DO_2SH(VRSRA_U, gen_gvec_ursra)
1230 
1231 static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a)
1232 {
1233     /* Signed shift out of range results in all-sign-bits */
1234     a->shift = MIN(a->shift, (8 << a->size) - 1);
1235     return do_vector_2sh(s, a, tcg_gen_gvec_sari);
1236 }
1237 
1238 static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
1239                             int64_t shift, uint32_t oprsz, uint32_t maxsz)
1240 {
1241     tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0);
1242 }
1243 
1244 static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a)
1245 {
1246     /* Shift out of range is architecturally valid and results in zero. */
1247     if (a->shift >= (8 << a->size)) {
1248         return do_vector_2sh(s, a, gen_zero_rd_2sh);
1249     } else {
1250         return do_vector_2sh(s, a, tcg_gen_gvec_shri);
1251     }
1252 }
1253 
1254 static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
1255                              NeonGenTwo64OpEnvFn *fn)
1256 {
1257     /*
1258      * 2-reg-and-shift operations, size == 3 case, where the
1259      * function needs to be passed tcg_env.
1260      */
1261     TCGv_i64 constimm;
1262     int pass;
1263 
1264     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1265         return false;
1266     }
1267 
1268     /* UNDEF accesses to D16-D31 if they don't exist. */
1269     if (!dc_isar_feature(aa32_simd_r32, s) &&
1270         ((a->vd | a->vm) & 0x10)) {
1271         return false;
1272     }
1273 
1274     if ((a->vm | a->vd) & a->q) {
1275         return false;
1276     }
1277 
1278     if (!vfp_access_check(s)) {
1279         return true;
1280     }
1281 
1282     /*
1283      * To avoid excessive duplication of ops we implement shift
1284      * by immediate using the variable shift operations.
1285      */
1286     constimm = tcg_constant_i64(dup_const(a->size, a->shift));
1287 
1288     for (pass = 0; pass < a->q + 1; pass++) {
1289         TCGv_i64 tmp = tcg_temp_new_i64();
1290 
1291         read_neon_element64(tmp, a->vm, pass, MO_64);
1292         fn(tmp, tcg_env, tmp, constimm);
1293         write_neon_element64(tmp, a->vd, pass, MO_64);
1294     }
1295     return true;
1296 }
1297 
1298 static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
1299                              NeonGenTwoOpEnvFn *fn)
1300 {
1301     /*
1302      * 2-reg-and-shift operations, size < 3 case, where the
1303      * helper needs to be passed tcg_env.
1304      */
1305     TCGv_i32 constimm, tmp;
1306     int pass;
1307 
1308     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1309         return false;
1310     }
1311 
1312     /* UNDEF accesses to D16-D31 if they don't exist. */
1313     if (!dc_isar_feature(aa32_simd_r32, s) &&
1314         ((a->vd | a->vm) & 0x10)) {
1315         return false;
1316     }
1317 
1318     if ((a->vm | a->vd) & a->q) {
1319         return false;
1320     }
1321 
1322     if (!vfp_access_check(s)) {
1323         return true;
1324     }
1325 
1326     /*
1327      * To avoid excessive duplication of ops we implement shift
1328      * by immediate using the variable shift operations.
1329      */
1330     constimm = tcg_constant_i32(dup_const(a->size, a->shift));
1331     tmp = tcg_temp_new_i32();
1332 
1333     for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
1334         read_neon_element32(tmp, a->vm, pass, MO_32);
1335         fn(tmp, tcg_env, tmp, constimm);
1336         write_neon_element32(tmp, a->vd, pass, MO_32);
1337     }
1338     return true;
1339 }
1340 
1341 #define DO_2SHIFT_ENV(INSN, FUNC)                                       \
1342     static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \
1343     {                                                                   \
1344         return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64);      \
1345     }                                                                   \
1346     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1347     {                                                                   \
1348         static NeonGenTwoOpEnvFn * const fns[] = {                      \
1349             gen_helper_neon_##FUNC##8,                                  \
1350             gen_helper_neon_##FUNC##16,                                 \
1351             gen_helper_neon_##FUNC##32,                                 \
1352         };                                                              \
1353         assert(a->size < ARRAY_SIZE(fns));                              \
1354         return do_2shift_env_32(s, a, fns[a->size]);                    \
1355     }
1356 
1357 DO_2SHIFT_ENV(VQSHLU, qshlu_s)
1358 DO_2SHIFT_ENV(VQSHL_U, qshl_u)
1359 DO_2SHIFT_ENV(VQSHL_S, qshl_s)
1360 
1361 static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
1362                                 NeonGenTwo64OpFn *shiftfn,
1363                                 NeonGenNarrowEnvFn *narrowfn)
1364 {
1365     /* 2-reg-and-shift narrowing-shift operations, size == 3 case */
1366     TCGv_i64 constimm, rm1, rm2;
1367     TCGv_i32 rd;
1368 
1369     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1370         return false;
1371     }
1372 
1373     /* UNDEF accesses to D16-D31 if they don't exist. */
1374     if (!dc_isar_feature(aa32_simd_r32, s) &&
1375         ((a->vd | a->vm) & 0x10)) {
1376         return false;
1377     }
1378 
1379     if (a->vm & 1) {
1380         return false;
1381     }
1382 
1383     if (!vfp_access_check(s)) {
1384         return true;
1385     }
1386 
1387     /*
1388      * This is always a right shift, and the shiftfn is always a
1389      * left-shift helper, which thus needs the negated shift count.
1390      */
1391     constimm = tcg_constant_i64(-a->shift);
1392     rm1 = tcg_temp_new_i64();
1393     rm2 = tcg_temp_new_i64();
1394     rd = tcg_temp_new_i32();
1395 
1396     /* Load both inputs first to avoid potential overwrite if rm == rd */
1397     read_neon_element64(rm1, a->vm, 0, MO_64);
1398     read_neon_element64(rm2, a->vm, 1, MO_64);
1399 
1400     shiftfn(rm1, rm1, constimm);
1401     narrowfn(rd, tcg_env, rm1);
1402     write_neon_element32(rd, a->vd, 0, MO_32);
1403 
1404     shiftfn(rm2, rm2, constimm);
1405     narrowfn(rd, tcg_env, rm2);
1406     write_neon_element32(rd, a->vd, 1, MO_32);
1407 
1408     return true;
1409 }
1410 
1411 static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
1412                                 NeonGenTwoOpFn *shiftfn,
1413                                 NeonGenNarrowEnvFn *narrowfn)
1414 {
1415     /* 2-reg-and-shift narrowing-shift operations, size < 3 case */
1416     TCGv_i32 constimm, rm1, rm2, rm3, rm4;
1417     TCGv_i64 rtmp;
1418     uint32_t imm;
1419 
1420     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1421         return false;
1422     }
1423 
1424     /* UNDEF accesses to D16-D31 if they don't exist. */
1425     if (!dc_isar_feature(aa32_simd_r32, s) &&
1426         ((a->vd | a->vm) & 0x10)) {
1427         return false;
1428     }
1429 
1430     if (a->vm & 1) {
1431         return false;
1432     }
1433 
1434     if (!vfp_access_check(s)) {
1435         return true;
1436     }
1437 
1438     /*
1439      * This is always a right shift, and the shiftfn is always a
1440      * left-shift helper, which thus needs the negated shift count
1441      * duplicated into each lane of the immediate value.
1442      */
1443     if (a->size == 1) {
1444         imm = (uint16_t)(-a->shift);
1445         imm |= imm << 16;
1446     } else {
1447         /* size == 2 */
1448         imm = -a->shift;
1449     }
1450     constimm = tcg_constant_i32(imm);
1451 
1452     /* Load all inputs first to avoid potential overwrite */
1453     rm1 = tcg_temp_new_i32();
1454     rm2 = tcg_temp_new_i32();
1455     rm3 = tcg_temp_new_i32();
1456     rm4 = tcg_temp_new_i32();
1457     read_neon_element32(rm1, a->vm, 0, MO_32);
1458     read_neon_element32(rm2, a->vm, 1, MO_32);
1459     read_neon_element32(rm3, a->vm, 2, MO_32);
1460     read_neon_element32(rm4, a->vm, 3, MO_32);
1461     rtmp = tcg_temp_new_i64();
1462 
1463     shiftfn(rm1, rm1, constimm);
1464     shiftfn(rm2, rm2, constimm);
1465 
1466     tcg_gen_concat_i32_i64(rtmp, rm1, rm2);
1467 
1468     narrowfn(rm1, tcg_env, rtmp);
1469     write_neon_element32(rm1, a->vd, 0, MO_32);
1470 
1471     shiftfn(rm3, rm3, constimm);
1472     shiftfn(rm4, rm4, constimm);
1473 
1474     tcg_gen_concat_i32_i64(rtmp, rm3, rm4);
1475 
1476     narrowfn(rm3, tcg_env, rtmp);
1477     write_neon_element32(rm3, a->vd, 1, MO_32);
1478     return true;
1479 }
1480 
1481 #define DO_2SN_64(INSN, FUNC, NARROWFUNC)                               \
1482     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1483     {                                                                   \
1484         return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC);             \
1485     }
1486 #define DO_2SN_32(INSN, FUNC, NARROWFUNC)                               \
1487     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1488     {                                                                   \
1489         return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC);             \
1490     }
1491 
1492 static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1493 {
1494     tcg_gen_extrl_i64_i32(dest, src);
1495 }
1496 
1497 static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1498 {
1499     gen_helper_neon_narrow_u16(dest, src);
1500 }
1501 
1502 static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1503 {
1504     gen_helper_neon_narrow_u8(dest, src);
1505 }
1506 
1507 DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32)
1508 DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16)
1509 DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8)
1510 
1511 DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32)
1512 DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16)
1513 DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8)
1514 
1515 DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32)
1516 DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16)
1517 DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8)
1518 
1519 DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32)
1520 DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16)
1521 DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8)
1522 DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32)
1523 DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16)
1524 DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8)
1525 
1526 DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32)
1527 DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16)
1528 DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8)
1529 
1530 DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32)
1531 DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16)
1532 DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8)
1533 
1534 DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32)
1535 DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16)
1536 DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)
1537 
1538 static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
1539                          NeonGenWidenFn *widenfn, bool u)
1540 {
1541     TCGv_i64 tmp;
1542     TCGv_i32 rm0, rm1;
1543     uint64_t widen_mask = 0;
1544 
1545     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1546         return false;
1547     }
1548 
1549     /* UNDEF accesses to D16-D31 if they don't exist. */
1550     if (!dc_isar_feature(aa32_simd_r32, s) &&
1551         ((a->vd | a->vm) & 0x10)) {
1552         return false;
1553     }
1554 
1555     if (a->vd & 1) {
1556         return false;
1557     }
1558 
1559     if (!vfp_access_check(s)) {
1560         return true;
1561     }
1562 
1563     /*
1564      * This is a widen-and-shift operation. The shift is always less
1565      * than the width of the source type, so after widening the input
1566      * vector we can simply shift the whole 64-bit widened register,
1567      * and then clear the potential overflow bits resulting from left
1568      * bits of the narrow input appearing as right bits of the left
1569      * neighbour narrow input. Calculate a mask of bits to clear.
1570      */
1571     if ((a->shift != 0) && (a->size < 2 || u)) {
1572         int esize = 8 << a->size;
1573         widen_mask = MAKE_64BIT_MASK(0, esize);
1574         widen_mask >>= esize - a->shift;
1575         widen_mask = dup_const(a->size + 1, widen_mask);
1576     }
1577 
1578     rm0 = tcg_temp_new_i32();
1579     rm1 = tcg_temp_new_i32();
1580     read_neon_element32(rm0, a->vm, 0, MO_32);
1581     read_neon_element32(rm1, a->vm, 1, MO_32);
1582     tmp = tcg_temp_new_i64();
1583 
1584     widenfn(tmp, rm0);
1585     if (a->shift != 0) {
1586         tcg_gen_shli_i64(tmp, tmp, a->shift);
1587         tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
1588     }
1589     write_neon_element64(tmp, a->vd, 0, MO_64);
1590 
1591     widenfn(tmp, rm1);
1592     if (a->shift != 0) {
1593         tcg_gen_shli_i64(tmp, tmp, a->shift);
1594         tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
1595     }
1596     write_neon_element64(tmp, a->vd, 1, MO_64);
1597     return true;
1598 }
1599 
1600 static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
1601 {
1602     static NeonGenWidenFn * const widenfn[] = {
1603         gen_helper_neon_widen_s8,
1604         gen_helper_neon_widen_s16,
1605         tcg_gen_ext_i32_i64,
1606     };
1607     return do_vshll_2sh(s, a, widenfn[a->size], false);
1608 }
1609 
1610 static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
1611 {
1612     static NeonGenWidenFn * const widenfn[] = {
1613         gen_helper_neon_widen_u8,
1614         gen_helper_neon_widen_u16,
1615         tcg_gen_extu_i32_i64,
1616     };
1617     return do_vshll_2sh(s, a, widenfn[a->size], true);
1618 }
1619 
1620 static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
1621                       gen_helper_gvec_2_ptr *fn)
1622 {
1623     /* FP operations in 2-reg-and-shift group */
1624     int vec_size = a->q ? 16 : 8;
1625     int rd_ofs = neon_full_reg_offset(a->vd);
1626     int rm_ofs = neon_full_reg_offset(a->vm);
1627     TCGv_ptr fpst;
1628 
1629     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1630         return false;
1631     }
1632 
1633     if (a->size == MO_16) {
1634         if (!dc_isar_feature(aa32_fp16_arith, s)) {
1635             return false;
1636         }
1637     }
1638 
1639     /* UNDEF accesses to D16-D31 if they don't exist. */
1640     if (!dc_isar_feature(aa32_simd_r32, s) &&
1641         ((a->vd | a->vm) & 0x10)) {
1642         return false;
1643     }
1644 
1645     if ((a->vm | a->vd) & a->q) {
1646         return false;
1647     }
1648 
1649     if (!vfp_access_check(s)) {
1650         return true;
1651     }
1652 
1653     fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
1654     tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn);
1655     return true;
1656 }
1657 
1658 #define DO_FP_2SH(INSN, FUNC)                                           \
1659     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1660     {                                                                   \
1661         return do_fp_2sh(s, a, FUNC);                                   \
1662     }
1663 
1664 DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf)
1665 DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf)
1666 DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs)
1667 DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu)
1668 
1669 DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh)
1670 DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh)
1671 DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs)
1672 DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu)
1673 
1674 static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
1675                         GVecGen2iFn *fn)
1676 {
1677     uint64_t imm;
1678     int reg_ofs, vec_size;
1679 
1680     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1681         return false;
1682     }
1683 
1684     /* UNDEF accesses to D16-D31 if they don't exist. */
1685     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1686         return false;
1687     }
1688 
1689     if (a->vd & a->q) {
1690         return false;
1691     }
1692 
1693     if (!vfp_access_check(s)) {
1694         return true;
1695     }
1696 
1697     reg_ofs = neon_full_reg_offset(a->vd);
1698     vec_size = a->q ? 16 : 8;
1699     imm = asimd_imm_const(a->imm, a->cmode, a->op);
1700 
1701     fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size);
1702     return true;
1703 }
1704 
1705 static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs,
1706                         int64_t c, uint32_t oprsz, uint32_t maxsz)
1707 {
1708     tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
1709 }
1710 
1711 static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
1712 {
1713     /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1714     GVecGen2iFn *fn;
1715 
1716     if ((a->cmode & 1) && a->cmode < 12) {
1717         /* for op=1, the imm will be inverted, so BIC becomes AND. */
1718         fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
1719     } else {
1720         /* There is one unallocated cmode/op combination in this space */
1721         if (a->cmode == 15 && a->op == 1) {
1722             return false;
1723         }
1724         fn = gen_VMOV_1r;
1725     }
1726     return do_1reg_imm(s, a, fn);
1727 }
1728 
1729 static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
1730                            NeonGenWidenFn *widenfn,
1731                            NeonGenTwo64OpFn *opfn,
1732                            int src1_mop, int src2_mop)
1733 {
1734     /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
1735     TCGv_i64 rn0_64, rn1_64, rm_64;
1736 
1737     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1738         return false;
1739     }
1740 
1741     /* UNDEF accesses to D16-D31 if they don't exist. */
1742     if (!dc_isar_feature(aa32_simd_r32, s) &&
1743         ((a->vd | a->vn | a->vm) & 0x10)) {
1744         return false;
1745     }
1746 
1747     if (!opfn) {
1748         /* size == 3 case, which is an entirely different insn group */
1749         return false;
1750     }
1751 
1752     if ((a->vd & 1) || (src1_mop == MO_UQ && (a->vn & 1))) {
1753         return false;
1754     }
1755 
1756     if (!vfp_access_check(s)) {
1757         return true;
1758     }
1759 
1760     rn0_64 = tcg_temp_new_i64();
1761     rn1_64 = tcg_temp_new_i64();
1762     rm_64 = tcg_temp_new_i64();
1763 
1764     if (src1_mop >= 0) {
1765         read_neon_element64(rn0_64, a->vn, 0, src1_mop);
1766     } else {
1767         TCGv_i32 tmp = tcg_temp_new_i32();
1768         read_neon_element32(tmp, a->vn, 0, MO_32);
1769         widenfn(rn0_64, tmp);
1770     }
1771     if (src2_mop >= 0) {
1772         read_neon_element64(rm_64, a->vm, 0, src2_mop);
1773     } else {
1774         TCGv_i32 tmp = tcg_temp_new_i32();
1775         read_neon_element32(tmp, a->vm, 0, MO_32);
1776         widenfn(rm_64, tmp);
1777     }
1778 
1779     opfn(rn0_64, rn0_64, rm_64);
1780 
1781     /*
1782      * Load second pass inputs before storing the first pass result, to
1783      * avoid incorrect results if a narrow input overlaps with the result.
1784      */
1785     if (src1_mop >= 0) {
1786         read_neon_element64(rn1_64, a->vn, 1, src1_mop);
1787     } else {
1788         TCGv_i32 tmp = tcg_temp_new_i32();
1789         read_neon_element32(tmp, a->vn, 1, MO_32);
1790         widenfn(rn1_64, tmp);
1791     }
1792     if (src2_mop >= 0) {
1793         read_neon_element64(rm_64, a->vm, 1, src2_mop);
1794     } else {
1795         TCGv_i32 tmp = tcg_temp_new_i32();
1796         read_neon_element32(tmp, a->vm, 1, MO_32);
1797         widenfn(rm_64, tmp);
1798     }
1799 
1800     write_neon_element64(rn0_64, a->vd, 0, MO_64);
1801 
1802     opfn(rn1_64, rn1_64, rm_64);
1803     write_neon_element64(rn1_64, a->vd, 1, MO_64);
1804 
1805     return true;
1806 }
1807 
1808 #define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN)                        \
1809     static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
1810     {                                                                   \
1811         static NeonGenWidenFn * const widenfn[] = {                     \
1812             gen_helper_neon_widen_##S##8,                               \
1813             gen_helper_neon_widen_##S##16,                              \
1814             NULL, NULL,                                                 \
1815         };                                                              \
1816         static NeonGenTwo64OpFn * const addfn[] = {                     \
1817             gen_helper_neon_##OP##l_u16,                                \
1818             gen_helper_neon_##OP##l_u32,                                \
1819             tcg_gen_##OP##_i64,                                         \
1820             NULL,                                                       \
1821         };                                                              \
1822         int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1;          \
1823         return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size],   \
1824                               SRC1WIDE ? MO_UQ : narrow_mop,             \
1825                               narrow_mop);                              \
1826     }
1827 
1828 DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN)
1829 DO_PREWIDEN(VADDL_U, u, add, false, 0)
1830 DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN)
1831 DO_PREWIDEN(VSUBL_U, u, sub, false, 0)
1832 DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN)
1833 DO_PREWIDEN(VADDW_U, u, add, true, 0)
1834 DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN)
1835 DO_PREWIDEN(VSUBW_U, u, sub, true, 0)
1836 
1837 static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
1838                          NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
1839 {
1840     /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */
1841     TCGv_i64 rn_64, rm_64;
1842     TCGv_i32 rd0, rd1;
1843 
1844     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1845         return false;
1846     }
1847 
1848     /* UNDEF accesses to D16-D31 if they don't exist. */
1849     if (!dc_isar_feature(aa32_simd_r32, s) &&
1850         ((a->vd | a->vn | a->vm) & 0x10)) {
1851         return false;
1852     }
1853 
1854     if (!opfn || !narrowfn) {
1855         /* size == 3 case, which is an entirely different insn group */
1856         return false;
1857     }
1858 
1859     if ((a->vn | a->vm) & 1) {
1860         return false;
1861     }
1862 
1863     if (!vfp_access_check(s)) {
1864         return true;
1865     }
1866 
1867     rn_64 = tcg_temp_new_i64();
1868     rm_64 = tcg_temp_new_i64();
1869     rd0 = tcg_temp_new_i32();
1870     rd1 = tcg_temp_new_i32();
1871 
1872     read_neon_element64(rn_64, a->vn, 0, MO_64);
1873     read_neon_element64(rm_64, a->vm, 0, MO_64);
1874 
1875     opfn(rn_64, rn_64, rm_64);
1876 
1877     narrowfn(rd0, rn_64);
1878 
1879     read_neon_element64(rn_64, a->vn, 1, MO_64);
1880     read_neon_element64(rm_64, a->vm, 1, MO_64);
1881 
1882     opfn(rn_64, rn_64, rm_64);
1883 
1884     narrowfn(rd1, rn_64);
1885 
1886     write_neon_element32(rd0, a->vd, 0, MO_32);
1887     write_neon_element32(rd1, a->vd, 1, MO_32);
1888 
1889     return true;
1890 }
1891 
1892 #define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP)                       \
1893     static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
1894     {                                                                   \
1895         static NeonGenTwo64OpFn * const addfn[] = {                     \
1896             gen_helper_neon_##OP##l_u16,                                \
1897             gen_helper_neon_##OP##l_u32,                                \
1898             tcg_gen_##OP##_i64,                                         \
1899             NULL,                                                       \
1900         };                                                              \
1901         static NeonGenNarrowFn * const narrowfn[] = {                   \
1902             gen_helper_neon_##NARROWTYPE##_high_u8,                     \
1903             gen_helper_neon_##NARROWTYPE##_high_u16,                    \
1904             EXTOP,                                                      \
1905             NULL,                                                       \
1906         };                                                              \
1907         return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]);   \
1908     }
1909 
1910 static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn)
1911 {
1912     tcg_gen_addi_i64(rn, rn, 1u << 31);
1913     tcg_gen_extrh_i64_i32(rd, rn);
1914 }
1915 
1916 DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
1917 DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
1918 DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
1919 DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
1920 
1921 static bool do_long_3d(DisasContext *s, arg_3diff *a,
1922                        NeonGenTwoOpWidenFn *opfn,
1923                        NeonGenTwo64OpFn *accfn)
1924 {
1925     /*
1926      * 3-regs different lengths, long operations.
1927      * These perform an operation on two inputs that returns a double-width
1928      * result, and then possibly perform an accumulation operation of
1929      * that result into the double-width destination.
1930      */
1931     TCGv_i64 rd0, rd1, tmp;
1932     TCGv_i32 rn, rm;
1933 
1934     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1935         return false;
1936     }
1937 
1938     /* UNDEF accesses to D16-D31 if they don't exist. */
1939     if (!dc_isar_feature(aa32_simd_r32, s) &&
1940         ((a->vd | a->vn | a->vm) & 0x10)) {
1941         return false;
1942     }
1943 
1944     if (!opfn) {
1945         /* size == 3 case, which is an entirely different insn group */
1946         return false;
1947     }
1948 
1949     if (a->vd & 1) {
1950         return false;
1951     }
1952 
1953     if (!vfp_access_check(s)) {
1954         return true;
1955     }
1956 
1957     rd0 = tcg_temp_new_i64();
1958     rd1 = tcg_temp_new_i64();
1959 
1960     rn = tcg_temp_new_i32();
1961     rm = tcg_temp_new_i32();
1962     read_neon_element32(rn, a->vn, 0, MO_32);
1963     read_neon_element32(rm, a->vm, 0, MO_32);
1964     opfn(rd0, rn, rm);
1965 
1966     read_neon_element32(rn, a->vn, 1, MO_32);
1967     read_neon_element32(rm, a->vm, 1, MO_32);
1968     opfn(rd1, rn, rm);
1969 
1970     /* Don't store results until after all loads: they might overlap */
1971     if (accfn) {
1972         tmp = tcg_temp_new_i64();
1973         read_neon_element64(tmp, a->vd, 0, MO_64);
1974         accfn(rd0, tmp, rd0);
1975         read_neon_element64(tmp, a->vd, 1, MO_64);
1976         accfn(rd1, tmp, rd1);
1977     }
1978 
1979     write_neon_element64(rd0, a->vd, 0, MO_64);
1980     write_neon_element64(rd1, a->vd, 1, MO_64);
1981 
1982     return true;
1983 }
1984 
1985 static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a)
1986 {
1987     static NeonGenTwoOpWidenFn * const opfn[] = {
1988         gen_helper_neon_abdl_s16,
1989         gen_helper_neon_abdl_s32,
1990         gen_helper_neon_abdl_s64,
1991         NULL,
1992     };
1993 
1994     return do_long_3d(s, a, opfn[a->size], NULL);
1995 }
1996 
1997 static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a)
1998 {
1999     static NeonGenTwoOpWidenFn * const opfn[] = {
2000         gen_helper_neon_abdl_u16,
2001         gen_helper_neon_abdl_u32,
2002         gen_helper_neon_abdl_u64,
2003         NULL,
2004     };
2005 
2006     return do_long_3d(s, a, opfn[a->size], NULL);
2007 }
2008 
2009 static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
2010 {
2011     static NeonGenTwoOpWidenFn * const opfn[] = {
2012         gen_helper_neon_abdl_s16,
2013         gen_helper_neon_abdl_s32,
2014         gen_helper_neon_abdl_s64,
2015         NULL,
2016     };
2017     static NeonGenTwo64OpFn * const addfn[] = {
2018         gen_helper_neon_addl_u16,
2019         gen_helper_neon_addl_u32,
2020         tcg_gen_add_i64,
2021         NULL,
2022     };
2023 
2024     return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
2025 }
2026 
2027 static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
2028 {
2029     static NeonGenTwoOpWidenFn * const opfn[] = {
2030         gen_helper_neon_abdl_u16,
2031         gen_helper_neon_abdl_u32,
2032         gen_helper_neon_abdl_u64,
2033         NULL,
2034     };
2035     static NeonGenTwo64OpFn * const addfn[] = {
2036         gen_helper_neon_addl_u16,
2037         gen_helper_neon_addl_u32,
2038         tcg_gen_add_i64,
2039         NULL,
2040     };
2041 
2042     return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
2043 }
2044 
2045 static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2046 {
2047     TCGv_i32 lo = tcg_temp_new_i32();
2048     TCGv_i32 hi = tcg_temp_new_i32();
2049 
2050     tcg_gen_muls2_i32(lo, hi, rn, rm);
2051     tcg_gen_concat_i32_i64(rd, lo, hi);
2052 }
2053 
2054 static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2055 {
2056     TCGv_i32 lo = tcg_temp_new_i32();
2057     TCGv_i32 hi = tcg_temp_new_i32();
2058 
2059     tcg_gen_mulu2_i32(lo, hi, rn, rm);
2060     tcg_gen_concat_i32_i64(rd, lo, hi);
2061 }
2062 
2063 static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a)
2064 {
2065     static NeonGenTwoOpWidenFn * const opfn[] = {
2066         gen_helper_neon_mull_s8,
2067         gen_helper_neon_mull_s16,
2068         gen_mull_s32,
2069         NULL,
2070     };
2071 
2072     return do_long_3d(s, a, opfn[a->size], NULL);
2073 }
2074 
2075 static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
2076 {
2077     static NeonGenTwoOpWidenFn * const opfn[] = {
2078         gen_helper_neon_mull_u8,
2079         gen_helper_neon_mull_u16,
2080         gen_mull_u32,
2081         NULL,
2082     };
2083 
2084     return do_long_3d(s, a, opfn[a->size], NULL);
2085 }
2086 
2087 #define DO_VMLAL(INSN,MULL,ACC)                                         \
2088     static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
2089     {                                                                   \
2090         static NeonGenTwoOpWidenFn * const opfn[] = {                   \
2091             gen_helper_neon_##MULL##8,                                  \
2092             gen_helper_neon_##MULL##16,                                 \
2093             gen_##MULL##32,                                             \
2094             NULL,                                                       \
2095         };                                                              \
2096         static NeonGenTwo64OpFn * const accfn[] = {                     \
2097             gen_helper_neon_##ACC##l_u16,                               \
2098             gen_helper_neon_##ACC##l_u32,                               \
2099             tcg_gen_##ACC##_i64,                                        \
2100             NULL,                                                       \
2101         };                                                              \
2102         return do_long_3d(s, a, opfn[a->size], accfn[a->size]);         \
2103     }
2104 
2105 DO_VMLAL(VMLAL_S,mull_s,add)
2106 DO_VMLAL(VMLAL_U,mull_u,add)
2107 DO_VMLAL(VMLSL_S,mull_s,sub)
2108 DO_VMLAL(VMLSL_U,mull_u,sub)
2109 
2110 static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2111 {
2112     gen_helper_neon_mull_s16(rd, rn, rm);
2113     gen_helper_neon_addl_saturate_s32(rd, tcg_env, rd, rd);
2114 }
2115 
2116 static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2117 {
2118     gen_mull_s32(rd, rn, rm);
2119     gen_helper_neon_addl_saturate_s64(rd, tcg_env, rd, rd);
2120 }
2121 
2122 static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a)
2123 {
2124     static NeonGenTwoOpWidenFn * const opfn[] = {
2125         NULL,
2126         gen_VQDMULL_16,
2127         gen_VQDMULL_32,
2128         NULL,
2129     };
2130 
2131     return do_long_3d(s, a, opfn[a->size], NULL);
2132 }
2133 
2134 static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2135 {
2136     gen_helper_neon_addl_saturate_s32(rd, tcg_env, rn, rm);
2137 }
2138 
2139 static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2140 {
2141     gen_helper_neon_addl_saturate_s64(rd, tcg_env, rn, rm);
2142 }
2143 
2144 static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a)
2145 {
2146     static NeonGenTwoOpWidenFn * const opfn[] = {
2147         NULL,
2148         gen_VQDMULL_16,
2149         gen_VQDMULL_32,
2150         NULL,
2151     };
2152     static NeonGenTwo64OpFn * const accfn[] = {
2153         NULL,
2154         gen_VQDMLAL_acc_16,
2155         gen_VQDMLAL_acc_32,
2156         NULL,
2157     };
2158 
2159     return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
2160 }
2161 
2162 static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2163 {
2164     gen_helper_neon_negl_u32(rm, rm);
2165     gen_helper_neon_addl_saturate_s32(rd, tcg_env, rn, rm);
2166 }
2167 
2168 static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2169 {
2170     tcg_gen_neg_i64(rm, rm);
2171     gen_helper_neon_addl_saturate_s64(rd, tcg_env, rn, rm);
2172 }
2173 
2174 static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
2175 {
2176     static NeonGenTwoOpWidenFn * const opfn[] = {
2177         NULL,
2178         gen_VQDMULL_16,
2179         gen_VQDMULL_32,
2180         NULL,
2181     };
2182     static NeonGenTwo64OpFn * const accfn[] = {
2183         NULL,
2184         gen_VQDMLSL_acc_16,
2185         gen_VQDMLSL_acc_32,
2186         NULL,
2187     };
2188 
2189     return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
2190 }
2191 
2192 static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
2193 {
2194     gen_helper_gvec_3 *fn_gvec;
2195 
2196     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2197         return false;
2198     }
2199 
2200     /* UNDEF accesses to D16-D31 if they don't exist. */
2201     if (!dc_isar_feature(aa32_simd_r32, s) &&
2202         ((a->vd | a->vn | a->vm) & 0x10)) {
2203         return false;
2204     }
2205 
2206     if (a->vd & 1) {
2207         return false;
2208     }
2209 
2210     switch (a->size) {
2211     case 0:
2212         fn_gvec = gen_helper_neon_pmull_h;
2213         break;
2214     case 2:
2215         if (!dc_isar_feature(aa32_pmull, s)) {
2216             return false;
2217         }
2218         fn_gvec = gen_helper_gvec_pmull_q;
2219         break;
2220     default:
2221         return false;
2222     }
2223 
2224     if (!vfp_access_check(s)) {
2225         return true;
2226     }
2227 
2228     tcg_gen_gvec_3_ool(neon_full_reg_offset(a->vd),
2229                        neon_full_reg_offset(a->vn),
2230                        neon_full_reg_offset(a->vm),
2231                        16, 16, 0, fn_gvec);
2232     return true;
2233 }
2234 
2235 static void gen_neon_dup_low16(TCGv_i32 var)
2236 {
2237     TCGv_i32 tmp = tcg_temp_new_i32();
2238     tcg_gen_ext16u_i32(var, var);
2239     tcg_gen_shli_i32(tmp, var, 16);
2240     tcg_gen_or_i32(var, var, tmp);
2241 }
2242 
2243 static void gen_neon_dup_high16(TCGv_i32 var)
2244 {
2245     TCGv_i32 tmp = tcg_temp_new_i32();
2246     tcg_gen_andi_i32(var, var, 0xffff0000);
2247     tcg_gen_shri_i32(tmp, var, 16);
2248     tcg_gen_or_i32(var, var, tmp);
2249 }
2250 
2251 static inline TCGv_i32 neon_get_scalar(int size, int reg)
2252 {
2253     TCGv_i32 tmp = tcg_temp_new_i32();
2254     if (size == MO_16) {
2255         read_neon_element32(tmp, reg & 7, reg >> 4, MO_32);
2256         if (reg & 8) {
2257             gen_neon_dup_high16(tmp);
2258         } else {
2259             gen_neon_dup_low16(tmp);
2260         }
2261     } else {
2262         read_neon_element32(tmp, reg & 15, reg >> 4, MO_32);
2263     }
2264     return tmp;
2265 }
2266 
2267 static bool do_2scalar(DisasContext *s, arg_2scalar *a,
2268                        NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn)
2269 {
2270     /*
2271      * Two registers and a scalar: perform an operation between
2272      * the input elements and the scalar, and then possibly
2273      * perform an accumulation operation of that result into the
2274      * destination.
2275      */
2276     TCGv_i32 scalar, tmp;
2277     int pass;
2278 
2279     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2280         return false;
2281     }
2282 
2283     /* UNDEF accesses to D16-D31 if they don't exist. */
2284     if (!dc_isar_feature(aa32_simd_r32, s) &&
2285         ((a->vd | a->vn | a->vm) & 0x10)) {
2286         return false;
2287     }
2288 
2289     if (!opfn) {
2290         /* Bad size (including size == 3, which is a different insn group) */
2291         return false;
2292     }
2293 
2294     if (a->q && ((a->vd | a->vn) & 1)) {
2295         return false;
2296     }
2297 
2298     if (!vfp_access_check(s)) {
2299         return true;
2300     }
2301 
2302     scalar = neon_get_scalar(a->size, a->vm);
2303     tmp = tcg_temp_new_i32();
2304 
2305     for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
2306         read_neon_element32(tmp, a->vn, pass, MO_32);
2307         opfn(tmp, tmp, scalar);
2308         if (accfn) {
2309             TCGv_i32 rd = tcg_temp_new_i32();
2310             read_neon_element32(rd, a->vd, pass, MO_32);
2311             accfn(tmp, rd, tmp);
2312         }
2313         write_neon_element32(tmp, a->vd, pass, MO_32);
2314     }
2315     return true;
2316 }
2317 
2318 static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a)
2319 {
2320     static NeonGenTwoOpFn * const opfn[] = {
2321         NULL,
2322         gen_helper_neon_mul_u16,
2323         tcg_gen_mul_i32,
2324         NULL,
2325     };
2326 
2327     return do_2scalar(s, a, opfn[a->size], NULL);
2328 }
2329 
2330 static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a)
2331 {
2332     static NeonGenTwoOpFn * const opfn[] = {
2333         NULL,
2334         gen_helper_neon_mul_u16,
2335         tcg_gen_mul_i32,
2336         NULL,
2337     };
2338     static NeonGenTwoOpFn * const accfn[] = {
2339         NULL,
2340         gen_helper_neon_add_u16,
2341         tcg_gen_add_i32,
2342         NULL,
2343     };
2344 
2345     return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
2346 }
2347 
2348 static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
2349 {
2350     static NeonGenTwoOpFn * const opfn[] = {
2351         NULL,
2352         gen_helper_neon_mul_u16,
2353         tcg_gen_mul_i32,
2354         NULL,
2355     };
2356     static NeonGenTwoOpFn * const accfn[] = {
2357         NULL,
2358         gen_helper_neon_sub_u16,
2359         tcg_gen_sub_i32,
2360         NULL,
2361     };
2362 
2363     return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
2364 }
2365 
2366 static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
2367                               gen_helper_gvec_3_ptr *fn)
2368 {
2369     /* Two registers and a scalar, using gvec */
2370     int vec_size = a->q ? 16 : 8;
2371     int rd_ofs = neon_full_reg_offset(a->vd);
2372     int rn_ofs = neon_full_reg_offset(a->vn);
2373     int rm_ofs;
2374     int idx;
2375     TCGv_ptr fpstatus;
2376 
2377     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2378         return false;
2379     }
2380 
2381     /* UNDEF accesses to D16-D31 if they don't exist. */
2382     if (!dc_isar_feature(aa32_simd_r32, s) &&
2383         ((a->vd | a->vn | a->vm) & 0x10)) {
2384         return false;
2385     }
2386 
2387     if (!fn) {
2388         /* Bad size (including size == 3, which is a different insn group) */
2389         return false;
2390     }
2391 
2392     if (a->q && ((a->vd | a->vn) & 1)) {
2393         return false;
2394     }
2395 
2396     if (!vfp_access_check(s)) {
2397         return true;
2398     }
2399 
2400     /* a->vm is M:Vm, which encodes both register and index */
2401     idx = extract32(a->vm, a->size + 2, 2);
2402     a->vm = extract32(a->vm, 0, a->size + 2);
2403     rm_ofs = neon_full_reg_offset(a->vm);
2404 
2405     fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD);
2406     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus,
2407                        vec_size, vec_size, idx, fn);
2408     return true;
2409 }
2410 
2411 #define DO_VMUL_F_2sc(NAME, FUNC)                                       \
2412     static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a)   \
2413     {                                                                   \
2414         static gen_helper_gvec_3_ptr * const opfn[] = {                 \
2415             NULL,                                                       \
2416             gen_helper_##FUNC##_h,                                      \
2417             gen_helper_##FUNC##_s,                                      \
2418             NULL,                                                       \
2419         };                                                              \
2420         if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \
2421             return false;                                               \
2422         }                                                               \
2423         return do_2scalar_fp_vec(s, a, opfn[a->size]);                  \
2424     }
2425 
2426 DO_VMUL_F_2sc(VMUL, gvec_fmul_idx)
2427 DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx)
2428 DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx)
2429 
2430 WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
2431 WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
2432 WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
2433 WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
2434 
2435 static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a)
2436 {
2437     static NeonGenTwoOpFn * const opfn[] = {
2438         NULL,
2439         gen_VQDMULH_16,
2440         gen_VQDMULH_32,
2441         NULL,
2442     };
2443 
2444     return do_2scalar(s, a, opfn[a->size], NULL);
2445 }
2446 
2447 static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
2448 {
2449     static NeonGenTwoOpFn * const opfn[] = {
2450         NULL,
2451         gen_VQRDMULH_16,
2452         gen_VQRDMULH_32,
2453         NULL,
2454     };
2455 
2456     return do_2scalar(s, a, opfn[a->size], NULL);
2457 }
2458 
2459 static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
2460                             NeonGenThreeOpEnvFn *opfn)
2461 {
2462     /*
2463      * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
2464      * performs a kind of fused op-then-accumulate using a helper
2465      * function that takes all of rd, rn and the scalar at once.
2466      */
2467     TCGv_i32 scalar, rn, rd;
2468     int pass;
2469 
2470     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2471         return false;
2472     }
2473 
2474     if (!dc_isar_feature(aa32_rdm, s)) {
2475         return false;
2476     }
2477 
2478     /* UNDEF accesses to D16-D31 if they don't exist. */
2479     if (!dc_isar_feature(aa32_simd_r32, s) &&
2480         ((a->vd | a->vn | a->vm) & 0x10)) {
2481         return false;
2482     }
2483 
2484     if (!opfn) {
2485         /* Bad size (including size == 3, which is a different insn group) */
2486         return false;
2487     }
2488 
2489     if (a->q && ((a->vd | a->vn) & 1)) {
2490         return false;
2491     }
2492 
2493     if (!vfp_access_check(s)) {
2494         return true;
2495     }
2496 
2497     scalar = neon_get_scalar(a->size, a->vm);
2498     rn = tcg_temp_new_i32();
2499     rd = tcg_temp_new_i32();
2500 
2501     for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
2502         read_neon_element32(rn, a->vn, pass, MO_32);
2503         read_neon_element32(rd, a->vd, pass, MO_32);
2504         opfn(rd, tcg_env, rn, scalar, rd);
2505         write_neon_element32(rd, a->vd, pass, MO_32);
2506     }
2507     return true;
2508 }
2509 
2510 static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
2511 {
2512     static NeonGenThreeOpEnvFn *opfn[] = {
2513         NULL,
2514         gen_helper_neon_qrdmlah_s16,
2515         gen_helper_neon_qrdmlah_s32,
2516         NULL,
2517     };
2518     return do_vqrdmlah_2sc(s, a, opfn[a->size]);
2519 }
2520 
2521 static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
2522 {
2523     static NeonGenThreeOpEnvFn *opfn[] = {
2524         NULL,
2525         gen_helper_neon_qrdmlsh_s16,
2526         gen_helper_neon_qrdmlsh_s32,
2527         NULL,
2528     };
2529     return do_vqrdmlah_2sc(s, a, opfn[a->size]);
2530 }
2531 
2532 static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
2533                             NeonGenTwoOpWidenFn *opfn,
2534                             NeonGenTwo64OpFn *accfn)
2535 {
2536     /*
2537      * Two registers and a scalar, long operations: perform an
2538      * operation on the input elements and the scalar which produces
2539      * a double-width result, and then possibly perform an accumulation
2540      * operation of that result into the destination.
2541      */
2542     TCGv_i32 scalar, rn;
2543     TCGv_i64 rn0_64, rn1_64;
2544 
2545     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2546         return false;
2547     }
2548 
2549     /* UNDEF accesses to D16-D31 if they don't exist. */
2550     if (!dc_isar_feature(aa32_simd_r32, s) &&
2551         ((a->vd | a->vn | a->vm) & 0x10)) {
2552         return false;
2553     }
2554 
2555     if (!opfn) {
2556         /* Bad size (including size == 3, which is a different insn group) */
2557         return false;
2558     }
2559 
2560     if (a->vd & 1) {
2561         return false;
2562     }
2563 
2564     if (!vfp_access_check(s)) {
2565         return true;
2566     }
2567 
2568     scalar = neon_get_scalar(a->size, a->vm);
2569 
2570     /* Load all inputs before writing any outputs, in case of overlap */
2571     rn = tcg_temp_new_i32();
2572     read_neon_element32(rn, a->vn, 0, MO_32);
2573     rn0_64 = tcg_temp_new_i64();
2574     opfn(rn0_64, rn, scalar);
2575 
2576     read_neon_element32(rn, a->vn, 1, MO_32);
2577     rn1_64 = tcg_temp_new_i64();
2578     opfn(rn1_64, rn, scalar);
2579 
2580     if (accfn) {
2581         TCGv_i64 t64 = tcg_temp_new_i64();
2582         read_neon_element64(t64, a->vd, 0, MO_64);
2583         accfn(rn0_64, t64, rn0_64);
2584         read_neon_element64(t64, a->vd, 1, MO_64);
2585         accfn(rn1_64, t64, rn1_64);
2586     }
2587 
2588     write_neon_element64(rn0_64, a->vd, 0, MO_64);
2589     write_neon_element64(rn1_64, a->vd, 1, MO_64);
2590     return true;
2591 }
2592 
2593 static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a)
2594 {
2595     static NeonGenTwoOpWidenFn * const opfn[] = {
2596         NULL,
2597         gen_helper_neon_mull_s16,
2598         gen_mull_s32,
2599         NULL,
2600     };
2601 
2602     return do_2scalar_long(s, a, opfn[a->size], NULL);
2603 }
2604 
2605 static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
2606 {
2607     static NeonGenTwoOpWidenFn * const opfn[] = {
2608         NULL,
2609         gen_helper_neon_mull_u16,
2610         gen_mull_u32,
2611         NULL,
2612     };
2613 
2614     return do_2scalar_long(s, a, opfn[a->size], NULL);
2615 }
2616 
2617 #define DO_VMLAL_2SC(INSN, MULL, ACC)                                   \
2618     static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a)     \
2619     {                                                                   \
2620         static NeonGenTwoOpWidenFn * const opfn[] = {                   \
2621             NULL,                                                       \
2622             gen_helper_neon_##MULL##16,                                 \
2623             gen_##MULL##32,                                             \
2624             NULL,                                                       \
2625         };                                                              \
2626         static NeonGenTwo64OpFn * const accfn[] = {                     \
2627             NULL,                                                       \
2628             gen_helper_neon_##ACC##l_u32,                               \
2629             tcg_gen_##ACC##_i64,                                        \
2630             NULL,                                                       \
2631         };                                                              \
2632         return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);    \
2633     }
2634 
2635 DO_VMLAL_2SC(VMLAL_S, mull_s, add)
2636 DO_VMLAL_2SC(VMLAL_U, mull_u, add)
2637 DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
2638 DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
2639 
2640 static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a)
2641 {
2642     static NeonGenTwoOpWidenFn * const opfn[] = {
2643         NULL,
2644         gen_VQDMULL_16,
2645         gen_VQDMULL_32,
2646         NULL,
2647     };
2648 
2649     return do_2scalar_long(s, a, opfn[a->size], NULL);
2650 }
2651 
2652 static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a)
2653 {
2654     static NeonGenTwoOpWidenFn * const opfn[] = {
2655         NULL,
2656         gen_VQDMULL_16,
2657         gen_VQDMULL_32,
2658         NULL,
2659     };
2660     static NeonGenTwo64OpFn * const accfn[] = {
2661         NULL,
2662         gen_VQDMLAL_acc_16,
2663         gen_VQDMLAL_acc_32,
2664         NULL,
2665     };
2666 
2667     return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
2668 }
2669 
2670 static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
2671 {
2672     static NeonGenTwoOpWidenFn * const opfn[] = {
2673         NULL,
2674         gen_VQDMULL_16,
2675         gen_VQDMULL_32,
2676         NULL,
2677     };
2678     static NeonGenTwo64OpFn * const accfn[] = {
2679         NULL,
2680         gen_VQDMLSL_acc_16,
2681         gen_VQDMLSL_acc_32,
2682         NULL,
2683     };
2684 
2685     return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
2686 }
2687 
2688 static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
2689 {
2690     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2691         return false;
2692     }
2693 
2694     /* UNDEF accesses to D16-D31 if they don't exist. */
2695     if (!dc_isar_feature(aa32_simd_r32, s) &&
2696         ((a->vd | a->vn | a->vm) & 0x10)) {
2697         return false;
2698     }
2699 
2700     if ((a->vn | a->vm | a->vd) & a->q) {
2701         return false;
2702     }
2703 
2704     if (a->imm > 7 && !a->q) {
2705         return false;
2706     }
2707 
2708     if (!vfp_access_check(s)) {
2709         return true;
2710     }
2711 
2712     if (!a->q) {
2713         /* Extract 64 bits from <Vm:Vn> */
2714         TCGv_i64 left, right, dest;
2715 
2716         left = tcg_temp_new_i64();
2717         right = tcg_temp_new_i64();
2718         dest = tcg_temp_new_i64();
2719 
2720         read_neon_element64(right, a->vn, 0, MO_64);
2721         read_neon_element64(left, a->vm, 0, MO_64);
2722         tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
2723         write_neon_element64(dest, a->vd, 0, MO_64);
2724     } else {
2725         /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
2726         TCGv_i64 left, middle, right, destleft, destright;
2727 
2728         left = tcg_temp_new_i64();
2729         middle = tcg_temp_new_i64();
2730         right = tcg_temp_new_i64();
2731         destleft = tcg_temp_new_i64();
2732         destright = tcg_temp_new_i64();
2733 
2734         if (a->imm < 8) {
2735             read_neon_element64(right, a->vn, 0, MO_64);
2736             read_neon_element64(middle, a->vn, 1, MO_64);
2737             tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
2738             read_neon_element64(left, a->vm, 0, MO_64);
2739             tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
2740         } else {
2741             read_neon_element64(right, a->vn, 1, MO_64);
2742             read_neon_element64(middle, a->vm, 0, MO_64);
2743             tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
2744             read_neon_element64(left, a->vm, 1, MO_64);
2745             tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
2746         }
2747 
2748         write_neon_element64(destright, a->vd, 0, MO_64);
2749         write_neon_element64(destleft, a->vd, 1, MO_64);
2750     }
2751     return true;
2752 }
2753 
2754 static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
2755 {
2756     TCGv_i64 val, def;
2757     TCGv_i32 desc;
2758 
2759     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2760         return false;
2761     }
2762 
2763     /* UNDEF accesses to D16-D31 if they don't exist. */
2764     if (!dc_isar_feature(aa32_simd_r32, s) &&
2765         ((a->vd | a->vn | a->vm) & 0x10)) {
2766         return false;
2767     }
2768 
2769     if ((a->vn + a->len + 1) > 32) {
2770         /*
2771          * This is UNPREDICTABLE; we choose to UNDEF to avoid the
2772          * helper function running off the end of the register file.
2773          */
2774         return false;
2775     }
2776 
2777     if (!vfp_access_check(s)) {
2778         return true;
2779     }
2780 
2781     desc = tcg_constant_i32((a->vn << 2) | a->len);
2782     def = tcg_temp_new_i64();
2783     if (a->op) {
2784         read_neon_element64(def, a->vd, 0, MO_64);
2785     } else {
2786         tcg_gen_movi_i64(def, 0);
2787     }
2788     val = tcg_temp_new_i64();
2789     read_neon_element64(val, a->vm, 0, MO_64);
2790 
2791     gen_helper_neon_tbl(val, tcg_env, desc, val, def);
2792     write_neon_element64(val, a->vd, 0, MO_64);
2793     return true;
2794 }
2795 
2796 static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
2797 {
2798     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2799         return false;
2800     }
2801 
2802     /* UNDEF accesses to D16-D31 if they don't exist. */
2803     if (!dc_isar_feature(aa32_simd_r32, s) &&
2804         ((a->vd | a->vm) & 0x10)) {
2805         return false;
2806     }
2807 
2808     if (a->vd & a->q) {
2809         return false;
2810     }
2811 
2812     if (!vfp_access_check(s)) {
2813         return true;
2814     }
2815 
2816     tcg_gen_gvec_dup_mem(a->size, neon_full_reg_offset(a->vd),
2817                          neon_element_offset(a->vm, a->index, a->size),
2818                          a->q ? 16 : 8, a->q ? 16 : 8);
2819     return true;
2820 }
2821 
2822 static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
2823 {
2824     int pass, half;
2825     TCGv_i32 tmp[2];
2826 
2827     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2828         return false;
2829     }
2830 
2831     /* UNDEF accesses to D16-D31 if they don't exist. */
2832     if (!dc_isar_feature(aa32_simd_r32, s) &&
2833         ((a->vd | a->vm) & 0x10)) {
2834         return false;
2835     }
2836 
2837     if ((a->vd | a->vm) & a->q) {
2838         return false;
2839     }
2840 
2841     if (a->size == 3) {
2842         return false;
2843     }
2844 
2845     if (!vfp_access_check(s)) {
2846         return true;
2847     }
2848 
2849     tmp[0] = tcg_temp_new_i32();
2850     tmp[1] = tcg_temp_new_i32();
2851 
2852     for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
2853         for (half = 0; half < 2; half++) {
2854             read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32);
2855             switch (a->size) {
2856             case 0:
2857                 tcg_gen_bswap32_i32(tmp[half], tmp[half]);
2858                 break;
2859             case 1:
2860                 gen_swap_half(tmp[half], tmp[half]);
2861                 break;
2862             case 2:
2863                 break;
2864             default:
2865                 g_assert_not_reached();
2866             }
2867         }
2868         write_neon_element32(tmp[1], a->vd, pass * 2, MO_32);
2869         write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32);
2870     }
2871     return true;
2872 }
2873 
2874 static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
2875                               NeonGenWidenFn *widenfn,
2876                               NeonGenTwo64OpFn *opfn,
2877                               NeonGenTwo64OpFn *accfn)
2878 {
2879     /*
2880      * Pairwise long operations: widen both halves of the pair,
2881      * combine the pairs with the opfn, and then possibly accumulate
2882      * into the destination with the accfn.
2883      */
2884     int pass;
2885 
2886     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2887         return false;
2888     }
2889 
2890     /* UNDEF accesses to D16-D31 if they don't exist. */
2891     if (!dc_isar_feature(aa32_simd_r32, s) &&
2892         ((a->vd | a->vm) & 0x10)) {
2893         return false;
2894     }
2895 
2896     if ((a->vd | a->vm) & a->q) {
2897         return false;
2898     }
2899 
2900     if (!widenfn) {
2901         return false;
2902     }
2903 
2904     if (!vfp_access_check(s)) {
2905         return true;
2906     }
2907 
2908     for (pass = 0; pass < a->q + 1; pass++) {
2909         TCGv_i32 tmp;
2910         TCGv_i64 rm0_64, rm1_64, rd_64;
2911 
2912         rm0_64 = tcg_temp_new_i64();
2913         rm1_64 = tcg_temp_new_i64();
2914         rd_64 = tcg_temp_new_i64();
2915 
2916         tmp = tcg_temp_new_i32();
2917         read_neon_element32(tmp, a->vm, pass * 2, MO_32);
2918         widenfn(rm0_64, tmp);
2919         read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32);
2920         widenfn(rm1_64, tmp);
2921 
2922         opfn(rd_64, rm0_64, rm1_64);
2923 
2924         if (accfn) {
2925             TCGv_i64 tmp64 = tcg_temp_new_i64();
2926             read_neon_element64(tmp64, a->vd, pass, MO_64);
2927             accfn(rd_64, tmp64, rd_64);
2928         }
2929         write_neon_element64(rd_64, a->vd, pass, MO_64);
2930     }
2931     return true;
2932 }
2933 
2934 static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a)
2935 {
2936     static NeonGenWidenFn * const widenfn[] = {
2937         gen_helper_neon_widen_s8,
2938         gen_helper_neon_widen_s16,
2939         tcg_gen_ext_i32_i64,
2940         NULL,
2941     };
2942     static NeonGenTwo64OpFn * const opfn[] = {
2943         gen_helper_neon_paddl_u16,
2944         gen_helper_neon_paddl_u32,
2945         tcg_gen_add_i64,
2946         NULL,
2947     };
2948 
2949     return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
2950 }
2951 
2952 static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a)
2953 {
2954     static NeonGenWidenFn * const widenfn[] = {
2955         gen_helper_neon_widen_u8,
2956         gen_helper_neon_widen_u16,
2957         tcg_gen_extu_i32_i64,
2958         NULL,
2959     };
2960     static NeonGenTwo64OpFn * const opfn[] = {
2961         gen_helper_neon_paddl_u16,
2962         gen_helper_neon_paddl_u32,
2963         tcg_gen_add_i64,
2964         NULL,
2965     };
2966 
2967     return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
2968 }
2969 
2970 static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a)
2971 {
2972     static NeonGenWidenFn * const widenfn[] = {
2973         gen_helper_neon_widen_s8,
2974         gen_helper_neon_widen_s16,
2975         tcg_gen_ext_i32_i64,
2976         NULL,
2977     };
2978     static NeonGenTwo64OpFn * const opfn[] = {
2979         gen_helper_neon_paddl_u16,
2980         gen_helper_neon_paddl_u32,
2981         tcg_gen_add_i64,
2982         NULL,
2983     };
2984     static NeonGenTwo64OpFn * const accfn[] = {
2985         gen_helper_neon_addl_u16,
2986         gen_helper_neon_addl_u32,
2987         tcg_gen_add_i64,
2988         NULL,
2989     };
2990 
2991     return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
2992                              accfn[a->size]);
2993 }
2994 
2995 static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a)
2996 {
2997     static NeonGenWidenFn * const widenfn[] = {
2998         gen_helper_neon_widen_u8,
2999         gen_helper_neon_widen_u16,
3000         tcg_gen_extu_i32_i64,
3001         NULL,
3002     };
3003     static NeonGenTwo64OpFn * const opfn[] = {
3004         gen_helper_neon_paddl_u16,
3005         gen_helper_neon_paddl_u32,
3006         tcg_gen_add_i64,
3007         NULL,
3008     };
3009     static NeonGenTwo64OpFn * const accfn[] = {
3010         gen_helper_neon_addl_u16,
3011         gen_helper_neon_addl_u32,
3012         tcg_gen_add_i64,
3013         NULL,
3014     };
3015 
3016     return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
3017                              accfn[a->size]);
3018 }
3019 
3020 typedef void ZipFn(TCGv_ptr, TCGv_ptr);
3021 
3022 static bool do_zip_uzp(DisasContext *s, arg_2misc *a,
3023                        ZipFn *fn)
3024 {
3025     TCGv_ptr pd, pm;
3026 
3027     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3028         return false;
3029     }
3030 
3031     /* UNDEF accesses to D16-D31 if they don't exist. */
3032     if (!dc_isar_feature(aa32_simd_r32, s) &&
3033         ((a->vd | a->vm) & 0x10)) {
3034         return false;
3035     }
3036 
3037     if ((a->vd | a->vm) & a->q) {
3038         return false;
3039     }
3040 
3041     if (!fn) {
3042         /* Bad size or size/q combination */
3043         return false;
3044     }
3045 
3046     if (!vfp_access_check(s)) {
3047         return true;
3048     }
3049 
3050     pd = vfp_reg_ptr(true, a->vd);
3051     pm = vfp_reg_ptr(true, a->vm);
3052     fn(pd, pm);
3053     return true;
3054 }
3055 
3056 static bool trans_VUZP(DisasContext *s, arg_2misc *a)
3057 {
3058     static ZipFn * const fn[2][4] = {
3059         {
3060             gen_helper_neon_unzip8,
3061             gen_helper_neon_unzip16,
3062             NULL,
3063             NULL,
3064         }, {
3065             gen_helper_neon_qunzip8,
3066             gen_helper_neon_qunzip16,
3067             gen_helper_neon_qunzip32,
3068             NULL,
3069         }
3070     };
3071     return do_zip_uzp(s, a, fn[a->q][a->size]);
3072 }
3073 
3074 static bool trans_VZIP(DisasContext *s, arg_2misc *a)
3075 {
3076     static ZipFn * const fn[2][4] = {
3077         {
3078             gen_helper_neon_zip8,
3079             gen_helper_neon_zip16,
3080             NULL,
3081             NULL,
3082         }, {
3083             gen_helper_neon_qzip8,
3084             gen_helper_neon_qzip16,
3085             gen_helper_neon_qzip32,
3086             NULL,
3087         }
3088     };
3089     return do_zip_uzp(s, a, fn[a->q][a->size]);
3090 }
3091 
3092 static bool do_vmovn(DisasContext *s, arg_2misc *a,
3093                      NeonGenNarrowEnvFn *narrowfn)
3094 {
3095     TCGv_i64 rm;
3096     TCGv_i32 rd0, rd1;
3097 
3098     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3099         return false;
3100     }
3101 
3102     /* UNDEF accesses to D16-D31 if they don't exist. */
3103     if (!dc_isar_feature(aa32_simd_r32, s) &&
3104         ((a->vd | a->vm) & 0x10)) {
3105         return false;
3106     }
3107 
3108     if (a->vm & 1) {
3109         return false;
3110     }
3111 
3112     if (!narrowfn) {
3113         return false;
3114     }
3115 
3116     if (!vfp_access_check(s)) {
3117         return true;
3118     }
3119 
3120     rm = tcg_temp_new_i64();
3121     rd0 = tcg_temp_new_i32();
3122     rd1 = tcg_temp_new_i32();
3123 
3124     read_neon_element64(rm, a->vm, 0, MO_64);
3125     narrowfn(rd0, tcg_env, rm);
3126     read_neon_element64(rm, a->vm, 1, MO_64);
3127     narrowfn(rd1, tcg_env, rm);
3128     write_neon_element32(rd0, a->vd, 0, MO_32);
3129     write_neon_element32(rd1, a->vd, 1, MO_32);
3130     return true;
3131 }
3132 
3133 #define DO_VMOVN(INSN, FUNC)                                    \
3134     static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
3135     {                                                           \
3136         static NeonGenNarrowEnvFn * const narrowfn[] = {        \
3137             FUNC##8,                                            \
3138             FUNC##16,                                           \
3139             FUNC##32,                                           \
3140             NULL,                                               \
3141         };                                                      \
3142         return do_vmovn(s, a, narrowfn[a->size]);               \
3143     }
3144 
3145 DO_VMOVN(VMOVN, gen_neon_narrow_u)
3146 DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat)
3147 DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s)
3148 DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u)
3149 
3150 static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
3151 {
3152     TCGv_i32 rm0, rm1;
3153     TCGv_i64 rd;
3154     static NeonGenWidenFn * const widenfns[] = {
3155         gen_helper_neon_widen_u8,
3156         gen_helper_neon_widen_u16,
3157         tcg_gen_extu_i32_i64,
3158         NULL,
3159     };
3160     NeonGenWidenFn *widenfn = widenfns[a->size];
3161 
3162     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3163         return false;
3164     }
3165 
3166     /* UNDEF accesses to D16-D31 if they don't exist. */
3167     if (!dc_isar_feature(aa32_simd_r32, s) &&
3168         ((a->vd | a->vm) & 0x10)) {
3169         return false;
3170     }
3171 
3172     if (a->vd & 1) {
3173         return false;
3174     }
3175 
3176     if (!widenfn) {
3177         return false;
3178     }
3179 
3180     if (!vfp_access_check(s)) {
3181         return true;
3182     }
3183 
3184     rd = tcg_temp_new_i64();
3185     rm0 = tcg_temp_new_i32();
3186     rm1 = tcg_temp_new_i32();
3187 
3188     read_neon_element32(rm0, a->vm, 0, MO_32);
3189     read_neon_element32(rm1, a->vm, 1, MO_32);
3190 
3191     widenfn(rd, rm0);
3192     tcg_gen_shli_i64(rd, rd, 8 << a->size);
3193     write_neon_element64(rd, a->vd, 0, MO_64);
3194     widenfn(rd, rm1);
3195     tcg_gen_shli_i64(rd, rd, 8 << a->size);
3196     write_neon_element64(rd, a->vd, 1, MO_64);
3197     return true;
3198 }
3199 
3200 static bool trans_VCVT_B16_F32(DisasContext *s, arg_2misc *a)
3201 {
3202     TCGv_ptr fpst;
3203     TCGv_i64 tmp;
3204     TCGv_i32 dst0, dst1;
3205 
3206     if (!dc_isar_feature(aa32_bf16, s)) {
3207         return false;
3208     }
3209 
3210     /* UNDEF accesses to D16-D31 if they don't exist. */
3211     if (!dc_isar_feature(aa32_simd_r32, s) &&
3212         ((a->vd | a->vm) & 0x10)) {
3213         return false;
3214     }
3215 
3216     if ((a->vm & 1) || (a->size != 1)) {
3217         return false;
3218     }
3219 
3220     if (!vfp_access_check(s)) {
3221         return true;
3222     }
3223 
3224     fpst = fpstatus_ptr(FPST_STD);
3225     tmp = tcg_temp_new_i64();
3226     dst0 = tcg_temp_new_i32();
3227     dst1 = tcg_temp_new_i32();
3228 
3229     read_neon_element64(tmp, a->vm, 0, MO_64);
3230     gen_helper_bfcvt_pair(dst0, tmp, fpst);
3231 
3232     read_neon_element64(tmp, a->vm, 1, MO_64);
3233     gen_helper_bfcvt_pair(dst1, tmp, fpst);
3234 
3235     write_neon_element32(dst0, a->vd, 0, MO_32);
3236     write_neon_element32(dst1, a->vd, 1, MO_32);
3237     return true;
3238 }
3239 
3240 static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
3241 {
3242     TCGv_ptr fpst;
3243     TCGv_i32 ahp, tmp, tmp2, tmp3;
3244 
3245     if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
3246         !dc_isar_feature(aa32_fp16_spconv, s)) {
3247         return false;
3248     }
3249 
3250     /* UNDEF accesses to D16-D31 if they don't exist. */
3251     if (!dc_isar_feature(aa32_simd_r32, s) &&
3252         ((a->vd | a->vm) & 0x10)) {
3253         return false;
3254     }
3255 
3256     if ((a->vm & 1) || (a->size != 1)) {
3257         return false;
3258     }
3259 
3260     if (!vfp_access_check(s)) {
3261         return true;
3262     }
3263 
3264     fpst = fpstatus_ptr(FPST_STD);
3265     ahp = get_ahp_flag();
3266     tmp = tcg_temp_new_i32();
3267     read_neon_element32(tmp, a->vm, 0, MO_32);
3268     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
3269     tmp2 = tcg_temp_new_i32();
3270     read_neon_element32(tmp2, a->vm, 1, MO_32);
3271     gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
3272     tcg_gen_shli_i32(tmp2, tmp2, 16);
3273     tcg_gen_or_i32(tmp2, tmp2, tmp);
3274     read_neon_element32(tmp, a->vm, 2, MO_32);
3275     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
3276     tmp3 = tcg_temp_new_i32();
3277     read_neon_element32(tmp3, a->vm, 3, MO_32);
3278     write_neon_element32(tmp2, a->vd, 0, MO_32);
3279     gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
3280     tcg_gen_shli_i32(tmp3, tmp3, 16);
3281     tcg_gen_or_i32(tmp3, tmp3, tmp);
3282     write_neon_element32(tmp3, a->vd, 1, MO_32);
3283     return true;
3284 }
3285 
3286 static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
3287 {
3288     TCGv_ptr fpst;
3289     TCGv_i32 ahp, tmp, tmp2, tmp3;
3290 
3291     if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
3292         !dc_isar_feature(aa32_fp16_spconv, s)) {
3293         return false;
3294     }
3295 
3296     /* UNDEF accesses to D16-D31 if they don't exist. */
3297     if (!dc_isar_feature(aa32_simd_r32, s) &&
3298         ((a->vd | a->vm) & 0x10)) {
3299         return false;
3300     }
3301 
3302     if ((a->vd & 1) || (a->size != 1)) {
3303         return false;
3304     }
3305 
3306     if (!vfp_access_check(s)) {
3307         return true;
3308     }
3309 
3310     fpst = fpstatus_ptr(FPST_STD);
3311     ahp = get_ahp_flag();
3312     tmp3 = tcg_temp_new_i32();
3313     tmp2 = tcg_temp_new_i32();
3314     tmp = tcg_temp_new_i32();
3315     read_neon_element32(tmp, a->vm, 0, MO_32);
3316     read_neon_element32(tmp2, a->vm, 1, MO_32);
3317     tcg_gen_ext16u_i32(tmp3, tmp);
3318     gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
3319     write_neon_element32(tmp3, a->vd, 0, MO_32);
3320     tcg_gen_shri_i32(tmp, tmp, 16);
3321     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
3322     write_neon_element32(tmp, a->vd, 1, MO_32);
3323     tcg_gen_ext16u_i32(tmp3, tmp2);
3324     gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
3325     write_neon_element32(tmp3, a->vd, 2, MO_32);
3326     tcg_gen_shri_i32(tmp2, tmp2, 16);
3327     gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
3328     write_neon_element32(tmp2, a->vd, 3, MO_32);
3329     return true;
3330 }
3331 
3332 static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn)
3333 {
3334     int vec_size = a->q ? 16 : 8;
3335     int rd_ofs = neon_full_reg_offset(a->vd);
3336     int rm_ofs = neon_full_reg_offset(a->vm);
3337 
3338     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3339         return false;
3340     }
3341 
3342     /* UNDEF accesses to D16-D31 if they don't exist. */
3343     if (!dc_isar_feature(aa32_simd_r32, s) &&
3344         ((a->vd | a->vm) & 0x10)) {
3345         return false;
3346     }
3347 
3348     if (a->size == 3) {
3349         return false;
3350     }
3351 
3352     if ((a->vd | a->vm) & a->q) {
3353         return false;
3354     }
3355 
3356     if (!vfp_access_check(s)) {
3357         return true;
3358     }
3359 
3360     fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size);
3361 
3362     return true;
3363 }
3364 
3365 #define DO_2MISC_VEC(INSN, FN)                                  \
3366     static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
3367     {                                                           \
3368         return do_2misc_vec(s, a, FN);                          \
3369     }
3370 
3371 DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg)
3372 DO_2MISC_VEC(VABS, tcg_gen_gvec_abs)
3373 DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0)
3374 DO_2MISC_VEC(VCGT0, gen_gvec_cgt0)
3375 DO_2MISC_VEC(VCLE0, gen_gvec_cle0)
3376 DO_2MISC_VEC(VCGE0, gen_gvec_cge0)
3377 DO_2MISC_VEC(VCLT0, gen_gvec_clt0)
3378 
3379 static bool trans_VMVN(DisasContext *s, arg_2misc *a)
3380 {
3381     if (a->size != 0) {
3382         return false;
3383     }
3384     return do_2misc_vec(s, a, tcg_gen_gvec_not);
3385 }
3386 
3387 #define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA)                          \
3388     static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
3389                          uint32_t rm_ofs, uint32_t oprsz,               \
3390                          uint32_t maxsz)                                \
3391     {                                                                   \
3392         tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz,        \
3393                            DATA, FUNC);                                 \
3394     }
3395 
3396 #define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA)                          \
3397     static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
3398                          uint32_t rm_ofs, uint32_t oprsz,               \
3399                          uint32_t maxsz)                                \
3400     {                                                                   \
3401         tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC);   \
3402     }
3403 
3404 WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0)
3405 WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aesd, 0)
3406 WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0)
3407 WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesimc, 0)
3408 WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0)
3409 WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0)
3410 WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0)
3411 
3412 #define DO_2M_CRYPTO(INSN, FEATURE, SIZE)                       \
3413     static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
3414     {                                                           \
3415         if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) {  \
3416             return false;                                       \
3417         }                                                       \
3418         return do_2misc_vec(s, a, gen_##INSN);                  \
3419     }
3420 
3421 DO_2M_CRYPTO(AESE, aa32_aes, 0)
3422 DO_2M_CRYPTO(AESD, aa32_aes, 0)
3423 DO_2M_CRYPTO(AESMC, aa32_aes, 0)
3424 DO_2M_CRYPTO(AESIMC, aa32_aes, 0)
3425 DO_2M_CRYPTO(SHA1H, aa32_sha1, 2)
3426 DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2)
3427 DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
3428 
3429 static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
3430 {
3431     TCGv_i32 tmp;
3432     int pass;
3433 
3434     /* Handle a 2-reg-misc operation by iterating 32 bits at a time */
3435     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3436         return false;
3437     }
3438 
3439     /* UNDEF accesses to D16-D31 if they don't exist. */
3440     if (!dc_isar_feature(aa32_simd_r32, s) &&
3441         ((a->vd | a->vm) & 0x10)) {
3442         return false;
3443     }
3444 
3445     if (!fn) {
3446         return false;
3447     }
3448 
3449     if ((a->vd | a->vm) & a->q) {
3450         return false;
3451     }
3452 
3453     if (!vfp_access_check(s)) {
3454         return true;
3455     }
3456 
3457     tmp = tcg_temp_new_i32();
3458     for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
3459         read_neon_element32(tmp, a->vm, pass, MO_32);
3460         fn(tmp, tmp);
3461         write_neon_element32(tmp, a->vd, pass, MO_32);
3462     }
3463     return true;
3464 }
3465 
3466 static bool trans_VREV32(DisasContext *s, arg_2misc *a)
3467 {
3468     static NeonGenOneOpFn * const fn[] = {
3469         tcg_gen_bswap32_i32,
3470         gen_swap_half,
3471         NULL,
3472         NULL,
3473     };
3474     return do_2misc(s, a, fn[a->size]);
3475 }
3476 
3477 static bool trans_VREV16(DisasContext *s, arg_2misc *a)
3478 {
3479     if (a->size != 0) {
3480         return false;
3481     }
3482     return do_2misc(s, a, gen_rev16);
3483 }
3484 
3485 static bool trans_VCLS(DisasContext *s, arg_2misc *a)
3486 {
3487     static NeonGenOneOpFn * const fn[] = {
3488         gen_helper_neon_cls_s8,
3489         gen_helper_neon_cls_s16,
3490         gen_helper_neon_cls_s32,
3491         NULL,
3492     };
3493     return do_2misc(s, a, fn[a->size]);
3494 }
3495 
3496 static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm)
3497 {
3498     tcg_gen_clzi_i32(rd, rm, 32);
3499 }
3500 
3501 static bool trans_VCLZ(DisasContext *s, arg_2misc *a)
3502 {
3503     static NeonGenOneOpFn * const fn[] = {
3504         gen_helper_neon_clz_u8,
3505         gen_helper_neon_clz_u16,
3506         do_VCLZ_32,
3507         NULL,
3508     };
3509     return do_2misc(s, a, fn[a->size]);
3510 }
3511 
3512 static bool trans_VCNT(DisasContext *s, arg_2misc *a)
3513 {
3514     if (a->size != 0) {
3515         return false;
3516     }
3517     return do_2misc(s, a, gen_helper_neon_cnt_u8);
3518 }
3519 
3520 static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3521                        uint32_t oprsz, uint32_t maxsz)
3522 {
3523     tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs,
3524                       vece == MO_16 ? 0x7fff : 0x7fffffff,
3525                       oprsz, maxsz);
3526 }
3527 
3528 static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
3529 {
3530     if (a->size == MO_16) {
3531         if (!dc_isar_feature(aa32_fp16_arith, s)) {
3532             return false;
3533         }
3534     } else if (a->size != MO_32) {
3535         return false;
3536     }
3537     return do_2misc_vec(s, a, gen_VABS_F);
3538 }
3539 
3540 static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3541                        uint32_t oprsz, uint32_t maxsz)
3542 {
3543     tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs,
3544                       vece == MO_16 ? 0x8000 : 0x80000000,
3545                       oprsz, maxsz);
3546 }
3547 
3548 static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
3549 {
3550     if (a->size == MO_16) {
3551         if (!dc_isar_feature(aa32_fp16_arith, s)) {
3552             return false;
3553         }
3554     } else if (a->size != MO_32) {
3555         return false;
3556     }
3557     return do_2misc_vec(s, a, gen_VNEG_F);
3558 }
3559 
3560 static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
3561 {
3562     if (a->size != 2) {
3563         return false;
3564     }
3565     return do_2misc(s, a, gen_helper_recpe_u32);
3566 }
3567 
3568 static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
3569 {
3570     if (a->size != 2) {
3571         return false;
3572     }
3573     return do_2misc(s, a, gen_helper_rsqrte_u32);
3574 }
3575 
3576 #define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \
3577     static void WRAPNAME(TCGv_i32 d, TCGv_i32 m)        \
3578     {                                                   \
3579         FUNC(d, tcg_env, m);                            \
3580     }
3581 
3582 WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8)
3583 WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16)
3584 WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32)
3585 WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8)
3586 WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16)
3587 WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32)
3588 
3589 static bool trans_VQABS(DisasContext *s, arg_2misc *a)
3590 {
3591     static NeonGenOneOpFn * const fn[] = {
3592         gen_VQABS_s8,
3593         gen_VQABS_s16,
3594         gen_VQABS_s32,
3595         NULL,
3596     };
3597     return do_2misc(s, a, fn[a->size]);
3598 }
3599 
3600 static bool trans_VQNEG(DisasContext *s, arg_2misc *a)
3601 {
3602     static NeonGenOneOpFn * const fn[] = {
3603         gen_VQNEG_s8,
3604         gen_VQNEG_s16,
3605         gen_VQNEG_s32,
3606         NULL,
3607     };
3608     return do_2misc(s, a, fn[a->size]);
3609 }
3610 
3611 #define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC)                             \
3612     static void gen_##INSN(unsigned vece, uint32_t rd_ofs,              \
3613                            uint32_t rm_ofs,                             \
3614                            uint32_t oprsz, uint32_t maxsz)              \
3615     {                                                                   \
3616         static gen_helper_gvec_2_ptr * const fns[4] = {                 \
3617             NULL, HFUNC, SFUNC, NULL,                                   \
3618         };                                                              \
3619         TCGv_ptr fpst;                                                  \
3620         fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD);   \
3621         tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0,       \
3622                            fns[vece]);                                  \
3623     }                                                                   \
3624     static bool trans_##INSN(DisasContext *s, arg_2misc *a)             \
3625     {                                                                   \
3626         if (a->size == MO_16) {                                         \
3627             if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
3628                 return false;                                           \
3629             }                                                           \
3630         } else if (a->size != MO_32) {                                  \
3631             return false;                                               \
3632         }                                                               \
3633         return do_2misc_vec(s, a, gen_##INSN);                          \
3634     }
3635 
3636 DO_2MISC_FP_VEC(VRECPE_F, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s)
3637 DO_2MISC_FP_VEC(VRSQRTE_F, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s)
3638 DO_2MISC_FP_VEC(VCGT0_F, gen_helper_gvec_fcgt0_h, gen_helper_gvec_fcgt0_s)
3639 DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h, gen_helper_gvec_fcge0_s)
3640 DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s)
3641 DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s)
3642 DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s)
3643 DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos)
3644 DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos)
3645 DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs)
3646 DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs)
3647 
3648 DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h, gen_helper_gvec_vrintx_s)
3649 
3650 static bool trans_VRINTX(DisasContext *s, arg_2misc *a)
3651 {
3652     if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3653         return false;
3654     }
3655     return trans_VRINTX_impl(s, a);
3656 }
3657 
3658 #define DO_VEC_RMODE(INSN, RMODE, OP)                                   \
3659     static void gen_##INSN(unsigned vece, uint32_t rd_ofs,              \
3660                            uint32_t rm_ofs,                             \
3661                            uint32_t oprsz, uint32_t maxsz)              \
3662     {                                                                   \
3663         static gen_helper_gvec_2_ptr * const fns[4] = {                 \
3664             NULL,                                                       \
3665             gen_helper_gvec_##OP##h,                                    \
3666             gen_helper_gvec_##OP##s,                                    \
3667             NULL,                                                       \
3668         };                                                              \
3669         TCGv_ptr fpst;                                                  \
3670         fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD);       \
3671         tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz,          \
3672                            arm_rmode_to_sf(RMODE), fns[vece]);          \
3673     }                                                                   \
3674     static bool trans_##INSN(DisasContext *s, arg_2misc *a)             \
3675     {                                                                   \
3676         if (!arm_dc_feature(s, ARM_FEATURE_V8)) {                       \
3677             return false;                                               \
3678         }                                                               \
3679         if (a->size == MO_16) {                                         \
3680             if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
3681                 return false;                                           \
3682             }                                                           \
3683         } else if (a->size != MO_32) {                                  \
3684             return false;                                               \
3685         }                                                               \
3686         return do_2misc_vec(s, a, gen_##INSN);                          \
3687     }
3688 
3689 DO_VEC_RMODE(VCVTAU, FPROUNDING_TIEAWAY, vcvt_rm_u)
3690 DO_VEC_RMODE(VCVTAS, FPROUNDING_TIEAWAY, vcvt_rm_s)
3691 DO_VEC_RMODE(VCVTNU, FPROUNDING_TIEEVEN, vcvt_rm_u)
3692 DO_VEC_RMODE(VCVTNS, FPROUNDING_TIEEVEN, vcvt_rm_s)
3693 DO_VEC_RMODE(VCVTPU, FPROUNDING_POSINF, vcvt_rm_u)
3694 DO_VEC_RMODE(VCVTPS, FPROUNDING_POSINF, vcvt_rm_s)
3695 DO_VEC_RMODE(VCVTMU, FPROUNDING_NEGINF, vcvt_rm_u)
3696 DO_VEC_RMODE(VCVTMS, FPROUNDING_NEGINF, vcvt_rm_s)
3697 
3698 DO_VEC_RMODE(VRINTN, FPROUNDING_TIEEVEN, vrint_rm_)
3699 DO_VEC_RMODE(VRINTA, FPROUNDING_TIEAWAY, vrint_rm_)
3700 DO_VEC_RMODE(VRINTZ, FPROUNDING_ZERO, vrint_rm_)
3701 DO_VEC_RMODE(VRINTM, FPROUNDING_NEGINF, vrint_rm_)
3702 DO_VEC_RMODE(VRINTP, FPROUNDING_POSINF, vrint_rm_)
3703 
3704 static bool trans_VSWP(DisasContext *s, arg_2misc *a)
3705 {
3706     TCGv_i64 rm, rd;
3707     int pass;
3708 
3709     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3710         return false;
3711     }
3712 
3713     /* UNDEF accesses to D16-D31 if they don't exist. */
3714     if (!dc_isar_feature(aa32_simd_r32, s) &&
3715         ((a->vd | a->vm) & 0x10)) {
3716         return false;
3717     }
3718 
3719     if (a->size != 0) {
3720         return false;
3721     }
3722 
3723     if ((a->vd | a->vm) & a->q) {
3724         return false;
3725     }
3726 
3727     if (!vfp_access_check(s)) {
3728         return true;
3729     }
3730 
3731     rm = tcg_temp_new_i64();
3732     rd = tcg_temp_new_i64();
3733     for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
3734         read_neon_element64(rm, a->vm, pass, MO_64);
3735         read_neon_element64(rd, a->vd, pass, MO_64);
3736         write_neon_element64(rm, a->vd, pass, MO_64);
3737         write_neon_element64(rd, a->vm, pass, MO_64);
3738     }
3739     return true;
3740 }
3741 
3742 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3743 {
3744     TCGv_i32 rd, tmp;
3745 
3746     rd = tcg_temp_new_i32();
3747     tmp = tcg_temp_new_i32();
3748 
3749     tcg_gen_shli_i32(rd, t0, 8);
3750     tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3751     tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3752     tcg_gen_or_i32(rd, rd, tmp);
3753 
3754     tcg_gen_shri_i32(t1, t1, 8);
3755     tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3756     tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3757     tcg_gen_or_i32(t1, t1, tmp);
3758     tcg_gen_mov_i32(t0, rd);
3759 }
3760 
3761 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3762 {
3763     TCGv_i32 rd, tmp;
3764 
3765     rd = tcg_temp_new_i32();
3766     tmp = tcg_temp_new_i32();
3767 
3768     tcg_gen_shli_i32(rd, t0, 16);
3769     tcg_gen_andi_i32(tmp, t1, 0xffff);
3770     tcg_gen_or_i32(rd, rd, tmp);
3771     tcg_gen_shri_i32(t1, t1, 16);
3772     tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3773     tcg_gen_or_i32(t1, t1, tmp);
3774     tcg_gen_mov_i32(t0, rd);
3775 }
3776 
3777 static bool trans_VTRN(DisasContext *s, arg_2misc *a)
3778 {
3779     TCGv_i32 tmp, tmp2;
3780     int pass;
3781 
3782     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3783         return false;
3784     }
3785 
3786     /* UNDEF accesses to D16-D31 if they don't exist. */
3787     if (!dc_isar_feature(aa32_simd_r32, s) &&
3788         ((a->vd | a->vm) & 0x10)) {
3789         return false;
3790     }
3791 
3792     if ((a->vd | a->vm) & a->q) {
3793         return false;
3794     }
3795 
3796     if (a->size == 3) {
3797         return false;
3798     }
3799 
3800     if (!vfp_access_check(s)) {
3801         return true;
3802     }
3803 
3804     tmp = tcg_temp_new_i32();
3805     tmp2 = tcg_temp_new_i32();
3806     if (a->size == MO_32) {
3807         for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
3808             read_neon_element32(tmp, a->vm, pass, MO_32);
3809             read_neon_element32(tmp2, a->vd, pass + 1, MO_32);
3810             write_neon_element32(tmp2, a->vm, pass, MO_32);
3811             write_neon_element32(tmp, a->vd, pass + 1, MO_32);
3812         }
3813     } else {
3814         for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
3815             read_neon_element32(tmp, a->vm, pass, MO_32);
3816             read_neon_element32(tmp2, a->vd, pass, MO_32);
3817             if (a->size == MO_8) {
3818                 gen_neon_trn_u8(tmp, tmp2);
3819             } else {
3820                 gen_neon_trn_u16(tmp, tmp2);
3821             }
3822             write_neon_element32(tmp2, a->vm, pass, MO_32);
3823             write_neon_element32(tmp, a->vd, pass, MO_32);
3824         }
3825     }
3826     return true;
3827 }
3828 
3829 static bool trans_VSMMLA(DisasContext *s, arg_VSMMLA *a)
3830 {
3831     if (!dc_isar_feature(aa32_i8mm, s)) {
3832         return false;
3833     }
3834     return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
3835                         gen_helper_gvec_smmla_b);
3836 }
3837 
3838 static bool trans_VUMMLA(DisasContext *s, arg_VUMMLA *a)
3839 {
3840     if (!dc_isar_feature(aa32_i8mm, s)) {
3841         return false;
3842     }
3843     return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
3844                         gen_helper_gvec_ummla_b);
3845 }
3846 
3847 static bool trans_VUSMMLA(DisasContext *s, arg_VUSMMLA *a)
3848 {
3849     if (!dc_isar_feature(aa32_i8mm, s)) {
3850         return false;
3851     }
3852     return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
3853                         gen_helper_gvec_usmmla_b);
3854 }
3855 
3856 static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a)
3857 {
3858     if (!dc_isar_feature(aa32_bf16, s)) {
3859         return false;
3860     }
3861     return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
3862                         gen_helper_gvec_bfmmla);
3863 }
3864 
3865 static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a)
3866 {
3867     if (!dc_isar_feature(aa32_bf16, s)) {
3868         return false;
3869     }
3870     return do_neon_ddda_fpst(s, 7, a->vd, a->vn, a->vm, a->q, FPST_STD,
3871                              gen_helper_gvec_bfmlal);
3872 }
3873 
3874 static bool trans_VFMA_b16_scal(DisasContext *s, arg_VFMA_b16_scal *a)
3875 {
3876     if (!dc_isar_feature(aa32_bf16, s)) {
3877         return false;
3878     }
3879     return do_neon_ddda_fpst(s, 6, a->vd, a->vn, a->vm,
3880                              (a->index << 1) | a->q, FPST_STD,
3881                              gen_helper_gvec_bfmlal_idx);
3882 }
3883