1 /*
2 * ARM translation: AArch32 Neon instructions
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
7 * Copyright (c) 2020 Linaro, Ltd.
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 */
22
23 #include "qemu/osdep.h"
24 #include "translate.h"
25 #include "translate-a32.h"
26
27 /* Include the generated Neon decoder */
28 #include "decode-neon-dp.c.inc"
29 #include "decode-neon-ls.c.inc"
30 #include "decode-neon-shared.c.inc"
31
vfp_reg_ptr(bool dp,int reg)32 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
33 {
34 TCGv_ptr ret = tcg_temp_new_ptr();
35 tcg_gen_addi_ptr(ret, tcg_env, vfp_reg_offset(dp, reg));
36 return ret;
37 }
38
neon_load_element(TCGv_i32 var,int reg,int ele,MemOp mop)39 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
40 {
41 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
42
43 switch (mop) {
44 case MO_UB:
45 tcg_gen_ld8u_i32(var, tcg_env, offset);
46 break;
47 case MO_UW:
48 tcg_gen_ld16u_i32(var, tcg_env, offset);
49 break;
50 case MO_UL:
51 tcg_gen_ld_i32(var, tcg_env, offset);
52 break;
53 default:
54 g_assert_not_reached();
55 }
56 }
57
neon_load_element64(TCGv_i64 var,int reg,int ele,MemOp mop)58 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
59 {
60 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
61
62 switch (mop) {
63 case MO_UB:
64 tcg_gen_ld8u_i64(var, tcg_env, offset);
65 break;
66 case MO_UW:
67 tcg_gen_ld16u_i64(var, tcg_env, offset);
68 break;
69 case MO_UL:
70 tcg_gen_ld32u_i64(var, tcg_env, offset);
71 break;
72 case MO_UQ:
73 tcg_gen_ld_i64(var, tcg_env, offset);
74 break;
75 default:
76 g_assert_not_reached();
77 }
78 }
79
neon_store_element(int reg,int ele,MemOp size,TCGv_i32 var)80 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
81 {
82 long offset = neon_element_offset(reg, ele, size);
83
84 switch (size) {
85 case MO_8:
86 tcg_gen_st8_i32(var, tcg_env, offset);
87 break;
88 case MO_16:
89 tcg_gen_st16_i32(var, tcg_env, offset);
90 break;
91 case MO_32:
92 tcg_gen_st_i32(var, tcg_env, offset);
93 break;
94 default:
95 g_assert_not_reached();
96 }
97 }
98
neon_store_element64(int reg,int ele,MemOp size,TCGv_i64 var)99 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
100 {
101 long offset = neon_element_offset(reg, ele, size);
102
103 switch (size) {
104 case MO_8:
105 tcg_gen_st8_i64(var, tcg_env, offset);
106 break;
107 case MO_16:
108 tcg_gen_st16_i64(var, tcg_env, offset);
109 break;
110 case MO_32:
111 tcg_gen_st32_i64(var, tcg_env, offset);
112 break;
113 case MO_64:
114 tcg_gen_st_i64(var, tcg_env, offset);
115 break;
116 default:
117 g_assert_not_reached();
118 }
119 }
120
do_neon_ddda(DisasContext * s,int q,int vd,int vn,int vm,int data,gen_helper_gvec_4 * fn_gvec)121 static bool do_neon_ddda(DisasContext *s, int q, int vd, int vn, int vm,
122 int data, gen_helper_gvec_4 *fn_gvec)
123 {
124 /* UNDEF accesses to D16-D31 if they don't exist. */
125 if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
126 return false;
127 }
128
129 /*
130 * UNDEF accesses to odd registers for each bit of Q.
131 * Q will be 0b111 for all Q-reg instructions, otherwise
132 * when we have mixed Q- and D-reg inputs.
133 */
134 if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
135 return false;
136 }
137
138 if (!vfp_access_check(s)) {
139 return true;
140 }
141
142 int opr_sz = q ? 16 : 8;
143 tcg_gen_gvec_4_ool(vfp_reg_offset(1, vd),
144 vfp_reg_offset(1, vn),
145 vfp_reg_offset(1, vm),
146 vfp_reg_offset(1, vd),
147 opr_sz, opr_sz, data, fn_gvec);
148 return true;
149 }
150
do_neon_ddda_env(DisasContext * s,int q,int vd,int vn,int vm,int data,gen_helper_gvec_4_ptr * fn_gvec)151 static bool do_neon_ddda_env(DisasContext *s, int q, int vd, int vn, int vm,
152 int data, gen_helper_gvec_4_ptr *fn_gvec)
153 {
154 /* UNDEF accesses to D16-D31 if they don't exist. */
155 if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
156 return false;
157 }
158
159 /*
160 * UNDEF accesses to odd registers for each bit of Q.
161 * Q will be 0b111 for all Q-reg instructions, otherwise
162 * when we have mixed Q- and D-reg inputs.
163 */
164 if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
165 return false;
166 }
167
168 if (!vfp_access_check(s)) {
169 return true;
170 }
171
172 int opr_sz = q ? 16 : 8;
173 tcg_gen_gvec_4_ptr(vfp_reg_offset(1, vd),
174 vfp_reg_offset(1, vn),
175 vfp_reg_offset(1, vm),
176 vfp_reg_offset(1, vd),
177 tcg_env,
178 opr_sz, opr_sz, data, fn_gvec);
179 return true;
180 }
181
do_neon_ddda_fpst(DisasContext * s,int q,int vd,int vn,int vm,int data,ARMFPStatusFlavour fp_flavour,gen_helper_gvec_4_ptr * fn_gvec_ptr)182 static bool do_neon_ddda_fpst(DisasContext *s, int q, int vd, int vn, int vm,
183 int data, ARMFPStatusFlavour fp_flavour,
184 gen_helper_gvec_4_ptr *fn_gvec_ptr)
185 {
186 /* UNDEF accesses to D16-D31 if they don't exist. */
187 if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
188 return false;
189 }
190
191 /*
192 * UNDEF accesses to odd registers for each bit of Q.
193 * Q will be 0b111 for all Q-reg instructions, otherwise
194 * when we have mixed Q- and D-reg inputs.
195 */
196 if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
197 return false;
198 }
199
200 if (!vfp_access_check(s)) {
201 return true;
202 }
203
204 int opr_sz = q ? 16 : 8;
205 TCGv_ptr fpst = fpstatus_ptr(fp_flavour);
206
207 tcg_gen_gvec_4_ptr(vfp_reg_offset(1, vd),
208 vfp_reg_offset(1, vn),
209 vfp_reg_offset(1, vm),
210 vfp_reg_offset(1, vd),
211 fpst, opr_sz, opr_sz, data, fn_gvec_ptr);
212 return true;
213 }
214
trans_VCMLA(DisasContext * s,arg_VCMLA * a)215 static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
216 {
217 if (!dc_isar_feature(aa32_vcma, s)) {
218 return false;
219 }
220 if (a->size == MO_16) {
221 if (!dc_isar_feature(aa32_fp16_arith, s)) {
222 return false;
223 }
224 return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot,
225 FPST_STD_F16, gen_helper_gvec_fcmlah);
226 }
227 return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot,
228 FPST_STD, gen_helper_gvec_fcmlas);
229 }
230
trans_VCADD(DisasContext * s,arg_VCADD * a)231 static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
232 {
233 int opr_sz;
234 TCGv_ptr fpst;
235 gen_helper_gvec_3_ptr *fn_gvec_ptr;
236
237 if (!dc_isar_feature(aa32_vcma, s)
238 || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) {
239 return false;
240 }
241
242 /* UNDEF accesses to D16-D31 if they don't exist. */
243 if (!dc_isar_feature(aa32_simd_r32, s) &&
244 ((a->vd | a->vn | a->vm) & 0x10)) {
245 return false;
246 }
247
248 if ((a->vn | a->vm | a->vd) & a->q) {
249 return false;
250 }
251
252 if (!vfp_access_check(s)) {
253 return true;
254 }
255
256 opr_sz = (1 + a->q) * 8;
257 fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
258 fn_gvec_ptr = (a->size == MO_16) ?
259 gen_helper_gvec_fcaddh : gen_helper_gvec_fcadds;
260 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
261 vfp_reg_offset(1, a->vn),
262 vfp_reg_offset(1, a->vm),
263 fpst, opr_sz, opr_sz, a->rot,
264 fn_gvec_ptr);
265 return true;
266 }
267
trans_VSDOT(DisasContext * s,arg_VSDOT * a)268 static bool trans_VSDOT(DisasContext *s, arg_VSDOT *a)
269 {
270 if (!dc_isar_feature(aa32_dp, s)) {
271 return false;
272 }
273 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
274 gen_helper_gvec_sdot_b);
275 }
276
trans_VUDOT(DisasContext * s,arg_VUDOT * a)277 static bool trans_VUDOT(DisasContext *s, arg_VUDOT *a)
278 {
279 if (!dc_isar_feature(aa32_dp, s)) {
280 return false;
281 }
282 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
283 gen_helper_gvec_udot_b);
284 }
285
trans_VUSDOT(DisasContext * s,arg_VUSDOT * a)286 static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a)
287 {
288 if (!dc_isar_feature(aa32_i8mm, s)) {
289 return false;
290 }
291 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
292 gen_helper_gvec_usdot_b);
293 }
294
trans_VDOT_b16(DisasContext * s,arg_VDOT_b16 * a)295 static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a)
296 {
297 if (!dc_isar_feature(aa32_bf16, s)) {
298 return false;
299 }
300 return do_neon_ddda_env(s, a->q * 7, a->vd, a->vn, a->vm, 0,
301 gen_helper_gvec_bfdot);
302 }
303
trans_VFML(DisasContext * s,arg_VFML * a)304 static bool trans_VFML(DisasContext *s, arg_VFML *a)
305 {
306 int opr_sz;
307
308 if (!dc_isar_feature(aa32_fhm, s)) {
309 return false;
310 }
311
312 /* UNDEF accesses to D16-D31 if they don't exist. */
313 if (!dc_isar_feature(aa32_simd_r32, s) &&
314 (a->vd & 0x10)) {
315 return false;
316 }
317
318 if (a->vd & a->q) {
319 return false;
320 }
321
322 if (!vfp_access_check(s)) {
323 return true;
324 }
325
326 opr_sz = (1 + a->q) * 8;
327 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
328 vfp_reg_offset(a->q, a->vn),
329 vfp_reg_offset(a->q, a->vm),
330 tcg_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */
331 gen_helper_gvec_fmlal_a32);
332 return true;
333 }
334
trans_VCMLA_scalar(DisasContext * s,arg_VCMLA_scalar * a)335 static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
336 {
337 int data = (a->index << 2) | a->rot;
338
339 if (!dc_isar_feature(aa32_vcma, s)) {
340 return false;
341 }
342 if (a->size == MO_16) {
343 if (!dc_isar_feature(aa32_fp16_arith, s)) {
344 return false;
345 }
346 return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data,
347 FPST_STD_F16, gen_helper_gvec_fcmlah_idx);
348 }
349 return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data,
350 FPST_STD, gen_helper_gvec_fcmlas_idx);
351 }
352
trans_VSDOT_scalar(DisasContext * s,arg_VSDOT_scalar * a)353 static bool trans_VSDOT_scalar(DisasContext *s, arg_VSDOT_scalar *a)
354 {
355 if (!dc_isar_feature(aa32_dp, s)) {
356 return false;
357 }
358 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
359 gen_helper_gvec_sdot_idx_b);
360 }
361
trans_VUDOT_scalar(DisasContext * s,arg_VUDOT_scalar * a)362 static bool trans_VUDOT_scalar(DisasContext *s, arg_VUDOT_scalar *a)
363 {
364 if (!dc_isar_feature(aa32_dp, s)) {
365 return false;
366 }
367 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
368 gen_helper_gvec_udot_idx_b);
369 }
370
trans_VUSDOT_scalar(DisasContext * s,arg_VUSDOT_scalar * a)371 static bool trans_VUSDOT_scalar(DisasContext *s, arg_VUSDOT_scalar *a)
372 {
373 if (!dc_isar_feature(aa32_i8mm, s)) {
374 return false;
375 }
376 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
377 gen_helper_gvec_usdot_idx_b);
378 }
379
trans_VSUDOT_scalar(DisasContext * s,arg_VSUDOT_scalar * a)380 static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a)
381 {
382 if (!dc_isar_feature(aa32_i8mm, s)) {
383 return false;
384 }
385 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
386 gen_helper_gvec_sudot_idx_b);
387 }
388
trans_VDOT_b16_scal(DisasContext * s,arg_VDOT_b16_scal * a)389 static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a)
390 {
391 if (!dc_isar_feature(aa32_bf16, s)) {
392 return false;
393 }
394 return do_neon_ddda_env(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
395 gen_helper_gvec_bfdot_idx);
396 }
397
trans_VFML_scalar(DisasContext * s,arg_VFML_scalar * a)398 static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
399 {
400 int opr_sz;
401
402 if (!dc_isar_feature(aa32_fhm, s)) {
403 return false;
404 }
405
406 /* UNDEF accesses to D16-D31 if they don't exist. */
407 if (!dc_isar_feature(aa32_simd_r32, s) &&
408 ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) {
409 return false;
410 }
411
412 if (a->vd & a->q) {
413 return false;
414 }
415
416 if (!vfp_access_check(s)) {
417 return true;
418 }
419
420 opr_sz = (1 + a->q) * 8;
421 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
422 vfp_reg_offset(a->q, a->vn),
423 vfp_reg_offset(a->q, a->rm),
424 tcg_env, opr_sz, opr_sz,
425 (a->index << 2) | a->s, /* is_2 == 0 */
426 gen_helper_gvec_fmlal_idx_a32);
427 return true;
428 }
429
430 static struct {
431 int nregs;
432 int interleave;
433 int spacing;
434 } const neon_ls_element_type[11] = {
435 {1, 4, 1},
436 {1, 4, 2},
437 {4, 1, 1},
438 {2, 2, 2},
439 {1, 3, 1},
440 {1, 3, 2},
441 {3, 1, 1},
442 {1, 1, 1},
443 {1, 2, 1},
444 {1, 2, 2},
445 {2, 1, 1}
446 };
447
gen_neon_ldst_base_update(DisasContext * s,int rm,int rn,int stride)448 static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
449 int stride)
450 {
451 if (rm != 15) {
452 TCGv_i32 base;
453
454 base = load_reg(s, rn);
455 if (rm == 13) {
456 tcg_gen_addi_i32(base, base, stride);
457 } else {
458 TCGv_i32 index;
459 index = load_reg(s, rm);
460 tcg_gen_add_i32(base, base, index);
461 }
462 store_reg(s, rn, base);
463 }
464 }
465
trans_VLDST_multiple(DisasContext * s,arg_VLDST_multiple * a)466 static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
467 {
468 /* Neon load/store multiple structures */
469 int nregs, interleave, spacing, reg, n;
470 MemOp mop, align, endian;
471 int mmu_idx = get_mem_index(s);
472 int size = a->size;
473 TCGv_i64 tmp64;
474 TCGv_i32 addr;
475
476 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
477 return false;
478 }
479
480 /* UNDEF accesses to D16-D31 if they don't exist */
481 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
482 return false;
483 }
484 if (a->itype > 10) {
485 return false;
486 }
487 /* Catch UNDEF cases for bad values of align field */
488 switch (a->itype & 0xc) {
489 case 4:
490 if (a->align >= 2) {
491 return false;
492 }
493 break;
494 case 8:
495 if (a->align == 3) {
496 return false;
497 }
498 break;
499 default:
500 break;
501 }
502 nregs = neon_ls_element_type[a->itype].nregs;
503 interleave = neon_ls_element_type[a->itype].interleave;
504 spacing = neon_ls_element_type[a->itype].spacing;
505 if (size == 3 && (interleave | spacing) != 1) {
506 return false;
507 }
508
509 if (!vfp_access_check(s)) {
510 return true;
511 }
512
513 /* For our purposes, bytes are always little-endian. */
514 endian = s->be_data;
515 if (size == 0) {
516 endian = MO_LE;
517 }
518
519 /* Enforce alignment requested by the instruction */
520 if (a->align) {
521 align = pow2_align(a->align + 2); /* 4 ** a->align */
522 } else {
523 align = s->align_mem ? MO_ALIGN : 0;
524 }
525
526 /*
527 * Consecutive little-endian elements from a single register
528 * can be promoted to a larger little-endian operation.
529 */
530 if (interleave == 1 && endian == MO_LE) {
531 /* Retain any natural alignment. */
532 if (align == MO_ALIGN) {
533 align = pow2_align(size);
534 }
535 size = 3;
536 }
537
538 tmp64 = tcg_temp_new_i64();
539 addr = tcg_temp_new_i32();
540 load_reg_var(s, addr, a->rn);
541
542 mop = endian | size | align;
543 for (reg = 0; reg < nregs; reg++) {
544 for (n = 0; n < 8 >> size; n++) {
545 int xs;
546 for (xs = 0; xs < interleave; xs++) {
547 int tt = a->vd + reg + spacing * xs;
548
549 if (a->l) {
550 gen_aa32_ld_internal_i64(s, tmp64, addr, mmu_idx, mop);
551 neon_store_element64(tt, n, size, tmp64);
552 } else {
553 neon_load_element64(tmp64, tt, n, size);
554 gen_aa32_st_internal_i64(s, tmp64, addr, mmu_idx, mop);
555 }
556 tcg_gen_addi_i32(addr, addr, 1 << size);
557
558 /* Subsequent memory operations inherit alignment */
559 mop &= ~MO_AMASK;
560 }
561 }
562 }
563
564 gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
565 return true;
566 }
567
trans_VLD_all_lanes(DisasContext * s,arg_VLD_all_lanes * a)568 static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
569 {
570 /* Neon load single structure to all lanes */
571 int reg, stride, vec_size;
572 int vd = a->vd;
573 int size = a->size;
574 int nregs = a->n + 1;
575 TCGv_i32 addr, tmp;
576 MemOp mop, align;
577
578 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
579 return false;
580 }
581
582 /* UNDEF accesses to D16-D31 if they don't exist */
583 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
584 return false;
585 }
586
587 align = 0;
588 if (size == 3) {
589 if (nregs != 4 || a->a == 0) {
590 return false;
591 }
592 /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
593 size = MO_32;
594 align = MO_ALIGN_16;
595 } else if (a->a) {
596 switch (nregs) {
597 case 1:
598 if (size == 0) {
599 return false;
600 }
601 align = MO_ALIGN;
602 break;
603 case 2:
604 align = pow2_align(size + 1);
605 break;
606 case 3:
607 return false;
608 case 4:
609 if (size == 2) {
610 align = pow2_align(3);
611 } else {
612 align = pow2_align(size + 2);
613 }
614 break;
615 default:
616 g_assert_not_reached();
617 }
618 }
619
620 if (!vfp_access_check(s)) {
621 return true;
622 }
623
624 /*
625 * VLD1 to all lanes: T bit indicates how many Dregs to write.
626 * VLD2/3/4 to all lanes: T bit indicates register stride.
627 */
628 stride = a->t ? 2 : 1;
629 vec_size = nregs == 1 ? stride * 8 : 8;
630 mop = size | align;
631 tmp = tcg_temp_new_i32();
632 addr = tcg_temp_new_i32();
633 load_reg_var(s, addr, a->rn);
634 for (reg = 0; reg < nregs; reg++) {
635 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop);
636 if ((vd & 1) && vec_size == 16) {
637 /*
638 * We cannot write 16 bytes at once because the
639 * destination is unaligned.
640 */
641 tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
642 8, 8, tmp);
643 tcg_gen_gvec_mov(0, neon_full_reg_offset(vd + 1),
644 neon_full_reg_offset(vd), 8, 8);
645 } else {
646 tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
647 vec_size, vec_size, tmp);
648 }
649 tcg_gen_addi_i32(addr, addr, 1 << size);
650 vd += stride;
651
652 /* Subsequent memory operations inherit alignment */
653 mop &= ~MO_AMASK;
654 }
655
656 gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);
657
658 return true;
659 }
660
trans_VLDST_single(DisasContext * s,arg_VLDST_single * a)661 static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
662 {
663 /* Neon load/store single structure to one lane */
664 int reg;
665 int nregs = a->n + 1;
666 int vd = a->vd;
667 TCGv_i32 addr, tmp;
668 MemOp mop;
669
670 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
671 return false;
672 }
673
674 /* UNDEF accesses to D16-D31 if they don't exist */
675 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
676 return false;
677 }
678
679 /* Catch the UNDEF cases. This is unavoidably a bit messy. */
680 switch (nregs) {
681 case 1:
682 if (a->stride != 1) {
683 return false;
684 }
685 if (((a->align & (1 << a->size)) != 0) ||
686 (a->size == 2 && (a->align == 1 || a->align == 2))) {
687 return false;
688 }
689 break;
690 case 2:
691 if (a->size == 2 && (a->align & 2) != 0) {
692 return false;
693 }
694 break;
695 case 3:
696 if (a->align != 0) {
697 return false;
698 }
699 break;
700 case 4:
701 if (a->size == 2 && a->align == 3) {
702 return false;
703 }
704 break;
705 default:
706 g_assert_not_reached();
707 }
708 if ((vd + a->stride * (nregs - 1)) > 31) {
709 /*
710 * Attempts to write off the end of the register file are
711 * UNPREDICTABLE; we choose to UNDEF because otherwise we would
712 * access off the end of the array that holds the register data.
713 */
714 return false;
715 }
716
717 if (!vfp_access_check(s)) {
718 return true;
719 }
720
721 /* Pick up SCTLR settings */
722 mop = finalize_memop(s, a->size);
723
724 if (a->align) {
725 MemOp align_op;
726
727 switch (nregs) {
728 case 1:
729 /* For VLD1, use natural alignment. */
730 align_op = MO_ALIGN;
731 break;
732 case 2:
733 /* For VLD2, use double alignment. */
734 align_op = pow2_align(a->size + 1);
735 break;
736 case 4:
737 if (a->size == MO_32) {
738 /*
739 * For VLD4.32, align = 1 is double alignment, align = 2 is
740 * quad alignment; align = 3 is rejected above.
741 */
742 align_op = pow2_align(a->size + a->align);
743 } else {
744 /* For VLD4.8 and VLD.16, we want quad alignment. */
745 align_op = pow2_align(a->size + 2);
746 }
747 break;
748 default:
749 /* For VLD3, the alignment field is zero and rejected above. */
750 g_assert_not_reached();
751 }
752
753 mop = (mop & ~MO_AMASK) | align_op;
754 }
755
756 tmp = tcg_temp_new_i32();
757 addr = tcg_temp_new_i32();
758 load_reg_var(s, addr, a->rn);
759
760 for (reg = 0; reg < nregs; reg++) {
761 if (a->l) {
762 gen_aa32_ld_internal_i32(s, tmp, addr, get_mem_index(s), mop);
763 neon_store_element(vd, a->reg_idx, a->size, tmp);
764 } else { /* Store */
765 neon_load_element(tmp, vd, a->reg_idx, a->size);
766 gen_aa32_st_internal_i32(s, tmp, addr, get_mem_index(s), mop);
767 }
768 vd += a->stride;
769 tcg_gen_addi_i32(addr, addr, 1 << a->size);
770
771 /* Subsequent memory operations inherit alignment */
772 mop &= ~MO_AMASK;
773 }
774
775 gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs);
776
777 return true;
778 }
779
do_3same(DisasContext * s,arg_3same * a,GVecGen3Fn fn)780 static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
781 {
782 int vec_size = a->q ? 16 : 8;
783 int rd_ofs = neon_full_reg_offset(a->vd);
784 int rn_ofs = neon_full_reg_offset(a->vn);
785 int rm_ofs = neon_full_reg_offset(a->vm);
786
787 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
788 return false;
789 }
790
791 /* UNDEF accesses to D16-D31 if they don't exist. */
792 if (!dc_isar_feature(aa32_simd_r32, s) &&
793 ((a->vd | a->vn | a->vm) & 0x10)) {
794 return false;
795 }
796
797 if ((a->vn | a->vm | a->vd) & a->q) {
798 return false;
799 }
800
801 if (!vfp_access_check(s)) {
802 return true;
803 }
804
805 fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
806 return true;
807 }
808
809 #define DO_3SAME(INSN, FUNC) \
810 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
811 { \
812 return do_3same(s, a, FUNC); \
813 }
814
DO_3SAME(VADD,tcg_gen_gvec_add)815 DO_3SAME(VADD, tcg_gen_gvec_add)
816 DO_3SAME(VSUB, tcg_gen_gvec_sub)
817 DO_3SAME(VAND, tcg_gen_gvec_and)
818 DO_3SAME(VBIC, tcg_gen_gvec_andc)
819 DO_3SAME(VORR, tcg_gen_gvec_or)
820 DO_3SAME(VORN, tcg_gen_gvec_orc)
821 DO_3SAME(VEOR, tcg_gen_gvec_xor)
822 DO_3SAME(VSHL_S, gen_gvec_sshl)
823 DO_3SAME(VSHL_U, gen_gvec_ushl)
824 DO_3SAME(VQADD_S, gen_gvec_sqadd_qc)
825 DO_3SAME(VQADD_U, gen_gvec_uqadd_qc)
826 DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc)
827 DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc)
828 DO_3SAME(VRSHL_S, gen_gvec_srshl)
829 DO_3SAME(VRSHL_U, gen_gvec_urshl)
830 DO_3SAME(VQSHL_S, gen_neon_sqshl)
831 DO_3SAME(VQSHL_U, gen_neon_uqshl)
832 DO_3SAME(VQRSHL_S, gen_neon_sqrshl)
833 DO_3SAME(VQRSHL_U, gen_neon_uqrshl)
834
835 /* These insns are all gvec_bitsel but with the inputs in various orders. */
836 #define DO_3SAME_BITSEL(INSN, O1, O2, O3) \
837 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
838 uint32_t rn_ofs, uint32_t rm_ofs, \
839 uint32_t oprsz, uint32_t maxsz) \
840 { \
841 tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz); \
842 } \
843 DO_3SAME(INSN, gen_##INSN##_3s)
844
845 DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs)
846 DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs)
847 DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs)
848
849 #define DO_3SAME_NO_SZ_3(INSN, FUNC) \
850 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
851 { \
852 if (a->size == 3) { \
853 return false; \
854 } \
855 return do_3same(s, a, FUNC); \
856 }
857
858 DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
859 DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
860 DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
861 DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
862 DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
863 DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
864 DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
865 DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
866 DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
867 DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
868 DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
869 DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
870 DO_3SAME_NO_SZ_3(VPADD, gen_gvec_addp)
871 DO_3SAME_NO_SZ_3(VPMAX_S, gen_gvec_smaxp)
872 DO_3SAME_NO_SZ_3(VPMIN_S, gen_gvec_sminp)
873 DO_3SAME_NO_SZ_3(VPMAX_U, gen_gvec_umaxp)
874 DO_3SAME_NO_SZ_3(VPMIN_U, gen_gvec_uminp)
875 DO_3SAME_NO_SZ_3(VHADD_S, gen_gvec_shadd)
876 DO_3SAME_NO_SZ_3(VHADD_U, gen_gvec_uhadd)
877 DO_3SAME_NO_SZ_3(VHSUB_S, gen_gvec_shsub)
878 DO_3SAME_NO_SZ_3(VHSUB_U, gen_gvec_uhsub)
879 DO_3SAME_NO_SZ_3(VRHADD_S, gen_gvec_srhadd)
880 DO_3SAME_NO_SZ_3(VRHADD_U, gen_gvec_urhadd)
881
882 #define DO_3SAME_CMP(INSN, COND) \
883 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
884 uint32_t rn_ofs, uint32_t rm_ofs, \
885 uint32_t oprsz, uint32_t maxsz) \
886 { \
887 tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
888 } \
889 DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
890
891 DO_3SAME_CMP(VCGT_S, TCG_COND_GT)
892 DO_3SAME_CMP(VCGT_U, TCG_COND_GTU)
893 DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
894 DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
895 DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
896
897 #define WRAP_OOL_FN(WRAPNAME, FUNC) \
898 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, \
899 uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) \
900 { \
901 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \
902 }
903
904 WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b)
905
906 static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
907 {
908 if (a->size != 0) {
909 return false;
910 }
911 return do_3same(s, a, gen_VMUL_p_3s);
912 }
913
914 #define DO_VQRDMLAH(INSN, FUNC) \
915 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
916 { \
917 if (!dc_isar_feature(aa32_rdm, s)) { \
918 return false; \
919 } \
920 if (a->size != 1 && a->size != 2) { \
921 return false; \
922 } \
923 return do_3same(s, a, FUNC); \
924 }
925
DO_VQRDMLAH(VQRDMLAH,gen_gvec_sqrdmlah_qc)926 DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
927 DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
928
929 #define DO_SHA1(NAME, FUNC) \
930 WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \
931 static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \
932 { \
933 if (!dc_isar_feature(aa32_sha1, s)) { \
934 return false; \
935 } \
936 return do_3same(s, a, gen_##NAME##_3s); \
937 }
938
939 DO_SHA1(SHA1C, gen_helper_crypto_sha1c)
940 DO_SHA1(SHA1P, gen_helper_crypto_sha1p)
941 DO_SHA1(SHA1M, gen_helper_crypto_sha1m)
942 DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0)
943
944 #define DO_SHA2(NAME, FUNC) \
945 WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \
946 static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \
947 { \
948 if (!dc_isar_feature(aa32_sha2, s)) { \
949 return false; \
950 } \
951 return do_3same(s, a, gen_##NAME##_3s); \
952 }
953
954 DO_SHA2(SHA256H, gen_helper_crypto_sha256h)
955 DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2)
956 DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1)
957
958 /*
959 * Some helper functions need to be passed the tcg_env. In order
960 * to use those with the gvec APIs like tcg_gen_gvec_3() we need
961 * to create wrapper functions whose prototype is a NeonGenTwoOpFn()
962 * and which call a NeonGenTwoOpEnvFn().
963 */
964 #define WRAP_ENV_FN(WRAPNAME, FUNC) \
965 static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m) \
966 { \
967 FUNC(d, tcg_env, n, m); \
968 }
969
970 #define DO_3SAME_VQDMULH(INSN, FUNC) \
971 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
972 { return a->size >= 1 && a->size <= 2 && do_3same(s, a, FUNC); }
973
974 DO_3SAME_VQDMULH(VQDMULH, gen_gvec_sqdmulh_qc)
975 DO_3SAME_VQDMULH(VQRDMULH, gen_gvec_sqrdmulh_qc)
976
977 #define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC) \
978 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
979 uint32_t rn_ofs, uint32_t rm_ofs, \
980 uint32_t oprsz, uint32_t maxsz) \
981 { \
982 TCGv_ptr fpst = fpstatus_ptr(FPST); \
983 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst, \
984 oprsz, maxsz, 0, FUNC); \
985 }
986
987 #define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC) \
988 WRAP_FP_GVEC(gen_##INSN##_fp32_3s, FPST_STD, SFUNC) \
989 WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC) \
990 static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
991 { \
992 if (a->size == MO_16) { \
993 if (!dc_isar_feature(aa32_fp16_arith, s)) { \
994 return false; \
995 } \
996 return do_3same(s, a, gen_##INSN##_fp16_3s); \
997 } \
998 return do_3same(s, a, gen_##INSN##_fp32_3s); \
999 }
1000
1001
1002 DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_h)
1003 DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_h)
1004 DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_h)
1005 DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_h)
1006 DO_3S_FP_GVEC(VCEQ, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_h)
1007 DO_3S_FP_GVEC(VCGE, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_h)
1008 DO_3S_FP_GVEC(VCGT, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_h)
1009 DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h)
1010 DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h)
1011 DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h)
1012 DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h)
1013 DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h)
1014 DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h)
1015 DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h)
1016 DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h)
1017 DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h)
1018 DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h)
1019 DO_3S_FP_GVEC(VPADD, gen_helper_gvec_faddp_s, gen_helper_gvec_faddp_h)
1020 DO_3S_FP_GVEC(VPMAX, gen_helper_gvec_fmaxp_s, gen_helper_gvec_fmaxp_h)
1021 DO_3S_FP_GVEC(VPMIN, gen_helper_gvec_fminp_s, gen_helper_gvec_fminp_h)
1022
1023 WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s)
1024 WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h)
1025 WRAP_FP_GVEC(gen_VMINNM_fp32_3s, FPST_STD, gen_helper_gvec_fminnum_s)
1026 WRAP_FP_GVEC(gen_VMINNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fminnum_h)
1027
1028 static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
1029 {
1030 if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
1031 return false;
1032 }
1033
1034 if (a->size == MO_16) {
1035 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1036 return false;
1037 }
1038 return do_3same(s, a, gen_VMAXNM_fp16_3s);
1039 }
1040 return do_3same(s, a, gen_VMAXNM_fp32_3s);
1041 }
1042
trans_VMINNM_fp_3s(DisasContext * s,arg_3same * a)1043 static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
1044 {
1045 if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
1046 return false;
1047 }
1048
1049 if (a->size == MO_16) {
1050 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1051 return false;
1052 }
1053 return do_3same(s, a, gen_VMINNM_fp16_3s);
1054 }
1055 return do_3same(s, a, gen_VMINNM_fp32_3s);
1056 }
1057
do_vector_2sh(DisasContext * s,arg_2reg_shift * a,GVecGen2iFn * fn)1058 static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
1059 {
1060 /* Handle a 2-reg-shift insn which can be vectorized. */
1061 int vec_size = a->q ? 16 : 8;
1062 int rd_ofs = neon_full_reg_offset(a->vd);
1063 int rm_ofs = neon_full_reg_offset(a->vm);
1064
1065 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1066 return false;
1067 }
1068
1069 /* UNDEF accesses to D16-D31 if they don't exist. */
1070 if (!dc_isar_feature(aa32_simd_r32, s) &&
1071 ((a->vd | a->vm) & 0x10)) {
1072 return false;
1073 }
1074
1075 if ((a->vm | a->vd) & a->q) {
1076 return false;
1077 }
1078
1079 if (!vfp_access_check(s)) {
1080 return true;
1081 }
1082
1083 fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size);
1084 return true;
1085 }
1086
1087 #define DO_2SH(INSN, FUNC) \
1088 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1089 { \
1090 return do_vector_2sh(s, a, FUNC); \
1091 } \
1092
DO_2SH(VSHL,tcg_gen_gvec_shli)1093 DO_2SH(VSHL, tcg_gen_gvec_shli)
1094 DO_2SH(VSLI, gen_gvec_sli)
1095 DO_2SH(VSRI, gen_gvec_sri)
1096 DO_2SH(VSRA_S, gen_gvec_ssra)
1097 DO_2SH(VSRA_U, gen_gvec_usra)
1098 DO_2SH(VRSHR_S, gen_gvec_srshr)
1099 DO_2SH(VRSHR_U, gen_gvec_urshr)
1100 DO_2SH(VRSRA_S, gen_gvec_srsra)
1101 DO_2SH(VRSRA_U, gen_gvec_ursra)
1102 DO_2SH(VSHR_S, gen_gvec_sshr)
1103 DO_2SH(VSHR_U, gen_gvec_ushr)
1104 DO_2SH(VQSHLU, gen_neon_sqshlui)
1105 DO_2SH(VQSHL_U, gen_neon_uqshli)
1106 DO_2SH(VQSHL_S, gen_neon_sqshli)
1107
1108 static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
1109 NeonGenTwo64OpFn *shiftfn,
1110 NeonGenOne64OpEnvFn *narrowfn)
1111 {
1112 /* 2-reg-and-shift narrowing-shift operations, size == 3 case */
1113 TCGv_i64 constimm, rm1, rm2, rd;
1114
1115 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1116 return false;
1117 }
1118
1119 /* UNDEF accesses to D16-D31 if they don't exist. */
1120 if (!dc_isar_feature(aa32_simd_r32, s) &&
1121 ((a->vd | a->vm) & 0x10)) {
1122 return false;
1123 }
1124
1125 if (a->vm & 1) {
1126 return false;
1127 }
1128
1129 if (!vfp_access_check(s)) {
1130 return true;
1131 }
1132
1133 /*
1134 * This is always a right shift, and the shiftfn is always a
1135 * left-shift helper, which thus needs the negated shift count.
1136 */
1137 constimm = tcg_constant_i64(-a->shift);
1138 rm1 = tcg_temp_new_i64();
1139 rm2 = tcg_temp_new_i64();
1140 rd = tcg_temp_new_i64();
1141
1142 /* Load both inputs first to avoid potential overwrite if rm == rd */
1143 read_neon_element64(rm1, a->vm, 0, MO_64);
1144 read_neon_element64(rm2, a->vm, 1, MO_64);
1145
1146 shiftfn(rm1, rm1, constimm);
1147 narrowfn(rd, tcg_env, rm1);
1148 write_neon_element64(rd, a->vd, 0, MO_32);
1149
1150 shiftfn(rm2, rm2, constimm);
1151 narrowfn(rd, tcg_env, rm2);
1152 write_neon_element64(rd, a->vd, 1, MO_32);
1153
1154 return true;
1155 }
1156
do_2shift_narrow_32(DisasContext * s,arg_2reg_shift * a,NeonGenTwoOpFn * shiftfn,NeonGenOne64OpEnvFn * narrowfn)1157 static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
1158 NeonGenTwoOpFn *shiftfn,
1159 NeonGenOne64OpEnvFn *narrowfn)
1160 {
1161 /* 2-reg-and-shift narrowing-shift operations, size < 3 case */
1162 TCGv_i32 constimm, rm1, rm2, rm3, rm4;
1163 TCGv_i64 rtmp;
1164 uint32_t imm;
1165
1166 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1167 return false;
1168 }
1169
1170 /* UNDEF accesses to D16-D31 if they don't exist. */
1171 if (!dc_isar_feature(aa32_simd_r32, s) &&
1172 ((a->vd | a->vm) & 0x10)) {
1173 return false;
1174 }
1175
1176 if (a->vm & 1) {
1177 return false;
1178 }
1179
1180 if (!vfp_access_check(s)) {
1181 return true;
1182 }
1183
1184 /*
1185 * This is always a right shift, and the shiftfn is always a
1186 * left-shift helper, which thus needs the negated shift count
1187 * duplicated into each lane of the immediate value.
1188 */
1189 if (a->size == 1) {
1190 imm = (uint16_t)(-a->shift);
1191 imm |= imm << 16;
1192 } else {
1193 /* size == 2 */
1194 imm = -a->shift;
1195 }
1196 constimm = tcg_constant_i32(imm);
1197
1198 /* Load all inputs first to avoid potential overwrite */
1199 rm1 = tcg_temp_new_i32();
1200 rm2 = tcg_temp_new_i32();
1201 rm3 = tcg_temp_new_i32();
1202 rm4 = tcg_temp_new_i32();
1203 read_neon_element32(rm1, a->vm, 0, MO_32);
1204 read_neon_element32(rm2, a->vm, 1, MO_32);
1205 read_neon_element32(rm3, a->vm, 2, MO_32);
1206 read_neon_element32(rm4, a->vm, 3, MO_32);
1207 rtmp = tcg_temp_new_i64();
1208
1209 shiftfn(rm1, rm1, constimm);
1210 shiftfn(rm2, rm2, constimm);
1211
1212 tcg_gen_concat_i32_i64(rtmp, rm1, rm2);
1213
1214 narrowfn(rtmp, tcg_env, rtmp);
1215 write_neon_element64(rtmp, a->vd, 0, MO_32);
1216
1217 shiftfn(rm3, rm3, constimm);
1218 shiftfn(rm4, rm4, constimm);
1219
1220 tcg_gen_concat_i32_i64(rtmp, rm3, rm4);
1221
1222 narrowfn(rtmp, tcg_env, rtmp);
1223 write_neon_element64(rtmp, a->vd, 1, MO_32);
1224 return true;
1225 }
1226
1227 #define DO_2SN_64(INSN, FUNC, NARROWFUNC) \
1228 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1229 { \
1230 return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC); \
1231 }
1232 #define DO_2SN_32(INSN, FUNC, NARROWFUNC) \
1233 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1234 { \
1235 return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC); \
1236 }
1237
gen_neon_narrow_u32(TCGv_i64 dest,TCGv_ptr env,TCGv_i64 src)1238 static void gen_neon_narrow_u32(TCGv_i64 dest, TCGv_ptr env, TCGv_i64 src)
1239 {
1240 tcg_gen_ext32u_i64(dest, src);
1241 }
1242
gen_neon_narrow_u16(TCGv_i64 dest,TCGv_ptr env,TCGv_i64 src)1243 static void gen_neon_narrow_u16(TCGv_i64 dest, TCGv_ptr env, TCGv_i64 src)
1244 {
1245 gen_helper_neon_narrow_u16(dest, src);
1246 }
1247
gen_neon_narrow_u8(TCGv_i64 dest,TCGv_ptr env,TCGv_i64 src)1248 static void gen_neon_narrow_u8(TCGv_i64 dest, TCGv_ptr env, TCGv_i64 src)
1249 {
1250 gen_helper_neon_narrow_u8(dest, src);
1251 }
1252
DO_2SN_64(VSHRN_64,gen_ushl_i64,gen_neon_narrow_u32)1253 DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32)
1254 DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16)
1255 DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8)
1256
1257 DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32)
1258 DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16)
1259 DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8)
1260
1261 DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32)
1262 DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16)
1263 DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8)
1264
1265 DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32)
1266 DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16)
1267 DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8)
1268 DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32)
1269 DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16)
1270 DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8)
1271
1272 DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32)
1273 DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16)
1274 DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8)
1275
1276 DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32)
1277 DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16)
1278 DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8)
1279
1280 DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32)
1281 DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16)
1282 DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)
1283
1284 static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
1285 NeonGenWidenFn *widenfn, bool u)
1286 {
1287 TCGv_i64 tmp;
1288 TCGv_i32 rm0, rm1;
1289 uint64_t widen_mask = 0;
1290
1291 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1292 return false;
1293 }
1294
1295 /* UNDEF accesses to D16-D31 if they don't exist. */
1296 if (!dc_isar_feature(aa32_simd_r32, s) &&
1297 ((a->vd | a->vm) & 0x10)) {
1298 return false;
1299 }
1300
1301 if (a->vd & 1) {
1302 return false;
1303 }
1304
1305 if (!vfp_access_check(s)) {
1306 return true;
1307 }
1308
1309 /*
1310 * This is a widen-and-shift operation. The shift is always less
1311 * than the width of the source type, so after widening the input
1312 * vector we can simply shift the whole 64-bit widened register,
1313 * and then clear the potential overflow bits resulting from left
1314 * bits of the narrow input appearing as right bits of the left
1315 * neighbour narrow input. Calculate a mask of bits to clear.
1316 */
1317 if ((a->shift != 0) && (a->size < 2 || u)) {
1318 int esize = 8 << a->size;
1319 widen_mask = MAKE_64BIT_MASK(0, esize);
1320 widen_mask >>= esize - a->shift;
1321 widen_mask = dup_const(a->size + 1, widen_mask);
1322 }
1323
1324 rm0 = tcg_temp_new_i32();
1325 rm1 = tcg_temp_new_i32();
1326 read_neon_element32(rm0, a->vm, 0, MO_32);
1327 read_neon_element32(rm1, a->vm, 1, MO_32);
1328 tmp = tcg_temp_new_i64();
1329
1330 widenfn(tmp, rm0);
1331 if (a->shift != 0) {
1332 tcg_gen_shli_i64(tmp, tmp, a->shift);
1333 tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
1334 }
1335 write_neon_element64(tmp, a->vd, 0, MO_64);
1336
1337 widenfn(tmp, rm1);
1338 if (a->shift != 0) {
1339 tcg_gen_shli_i64(tmp, tmp, a->shift);
1340 tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
1341 }
1342 write_neon_element64(tmp, a->vd, 1, MO_64);
1343 return true;
1344 }
1345
trans_VSHLL_S_2sh(DisasContext * s,arg_2reg_shift * a)1346 static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
1347 {
1348 static NeonGenWidenFn * const widenfn[] = {
1349 gen_helper_neon_widen_s8,
1350 gen_helper_neon_widen_s16,
1351 tcg_gen_ext_i32_i64,
1352 };
1353 return do_vshll_2sh(s, a, widenfn[a->size], false);
1354 }
1355
trans_VSHLL_U_2sh(DisasContext * s,arg_2reg_shift * a)1356 static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
1357 {
1358 static NeonGenWidenFn * const widenfn[] = {
1359 gen_helper_neon_widen_u8,
1360 gen_helper_neon_widen_u16,
1361 tcg_gen_extu_i32_i64,
1362 };
1363 return do_vshll_2sh(s, a, widenfn[a->size], true);
1364 }
1365
do_fp_2sh(DisasContext * s,arg_2reg_shift * a,gen_helper_gvec_2_ptr * fn)1366 static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
1367 gen_helper_gvec_2_ptr *fn)
1368 {
1369 /* FP operations in 2-reg-and-shift group */
1370 int vec_size = a->q ? 16 : 8;
1371 int rd_ofs = neon_full_reg_offset(a->vd);
1372 int rm_ofs = neon_full_reg_offset(a->vm);
1373 TCGv_ptr fpst;
1374
1375 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1376 return false;
1377 }
1378
1379 if (a->size == MO_16) {
1380 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1381 return false;
1382 }
1383 }
1384
1385 /* UNDEF accesses to D16-D31 if they don't exist. */
1386 if (!dc_isar_feature(aa32_simd_r32, s) &&
1387 ((a->vd | a->vm) & 0x10)) {
1388 return false;
1389 }
1390
1391 if ((a->vm | a->vd) & a->q) {
1392 return false;
1393 }
1394
1395 if (!vfp_access_check(s)) {
1396 return true;
1397 }
1398
1399 fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
1400 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn);
1401 return true;
1402 }
1403
1404 #define DO_FP_2SH(INSN, FUNC) \
1405 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1406 { \
1407 return do_fp_2sh(s, a, FUNC); \
1408 }
1409
DO_FP_2SH(VCVT_SF,gen_helper_gvec_vcvt_sf)1410 DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf)
1411 DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf)
1412 DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs)
1413 DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu)
1414
1415 DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh)
1416 DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh)
1417 DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs)
1418 DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu)
1419
1420 static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
1421 GVecGen2iFn *fn)
1422 {
1423 uint64_t imm;
1424 int reg_ofs, vec_size;
1425
1426 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1427 return false;
1428 }
1429
1430 /* UNDEF accesses to D16-D31 if they don't exist. */
1431 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1432 return false;
1433 }
1434
1435 if (a->vd & a->q) {
1436 return false;
1437 }
1438
1439 if (!vfp_access_check(s)) {
1440 return true;
1441 }
1442
1443 reg_ofs = neon_full_reg_offset(a->vd);
1444 vec_size = a->q ? 16 : 8;
1445 imm = asimd_imm_const(a->imm, a->cmode, a->op);
1446
1447 fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size);
1448 return true;
1449 }
1450
gen_VMOV_1r(unsigned vece,uint32_t dofs,uint32_t aofs,int64_t c,uint32_t oprsz,uint32_t maxsz)1451 static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs,
1452 int64_t c, uint32_t oprsz, uint32_t maxsz)
1453 {
1454 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
1455 }
1456
trans_Vimm_1r(DisasContext * s,arg_1reg_imm * a)1457 static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
1458 {
1459 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1460 GVecGen2iFn *fn;
1461
1462 if ((a->cmode & 1) && a->cmode < 12) {
1463 /* for op=1, the imm will be inverted, so BIC becomes AND. */
1464 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
1465 } else {
1466 /* There is one unallocated cmode/op combination in this space */
1467 if (a->cmode == 15 && a->op == 1) {
1468 return false;
1469 }
1470 fn = gen_VMOV_1r;
1471 }
1472 return do_1reg_imm(s, a, fn);
1473 }
1474
do_prewiden_3d(DisasContext * s,arg_3diff * a,NeonGenWidenFn * widenfn,NeonGenTwo64OpFn * opfn,int src1_mop,int src2_mop)1475 static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
1476 NeonGenWidenFn *widenfn,
1477 NeonGenTwo64OpFn *opfn,
1478 int src1_mop, int src2_mop)
1479 {
1480 /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
1481 TCGv_i64 rn0_64, rn1_64, rm_64;
1482
1483 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1484 return false;
1485 }
1486
1487 /* UNDEF accesses to D16-D31 if they don't exist. */
1488 if (!dc_isar_feature(aa32_simd_r32, s) &&
1489 ((a->vd | a->vn | a->vm) & 0x10)) {
1490 return false;
1491 }
1492
1493 if (!opfn) {
1494 /* size == 3 case, which is an entirely different insn group */
1495 return false;
1496 }
1497
1498 if ((a->vd & 1) || (src1_mop == MO_UQ && (a->vn & 1))) {
1499 return false;
1500 }
1501
1502 if (!vfp_access_check(s)) {
1503 return true;
1504 }
1505
1506 rn0_64 = tcg_temp_new_i64();
1507 rn1_64 = tcg_temp_new_i64();
1508 rm_64 = tcg_temp_new_i64();
1509
1510 if (src1_mop >= 0) {
1511 read_neon_element64(rn0_64, a->vn, 0, src1_mop);
1512 } else {
1513 TCGv_i32 tmp = tcg_temp_new_i32();
1514 read_neon_element32(tmp, a->vn, 0, MO_32);
1515 widenfn(rn0_64, tmp);
1516 }
1517 if (src2_mop >= 0) {
1518 read_neon_element64(rm_64, a->vm, 0, src2_mop);
1519 } else {
1520 TCGv_i32 tmp = tcg_temp_new_i32();
1521 read_neon_element32(tmp, a->vm, 0, MO_32);
1522 widenfn(rm_64, tmp);
1523 }
1524
1525 opfn(rn0_64, rn0_64, rm_64);
1526
1527 /*
1528 * Load second pass inputs before storing the first pass result, to
1529 * avoid incorrect results if a narrow input overlaps with the result.
1530 */
1531 if (src1_mop >= 0) {
1532 read_neon_element64(rn1_64, a->vn, 1, src1_mop);
1533 } else {
1534 TCGv_i32 tmp = tcg_temp_new_i32();
1535 read_neon_element32(tmp, a->vn, 1, MO_32);
1536 widenfn(rn1_64, tmp);
1537 }
1538 if (src2_mop >= 0) {
1539 read_neon_element64(rm_64, a->vm, 1, src2_mop);
1540 } else {
1541 TCGv_i32 tmp = tcg_temp_new_i32();
1542 read_neon_element32(tmp, a->vm, 1, MO_32);
1543 widenfn(rm_64, tmp);
1544 }
1545
1546 write_neon_element64(rn0_64, a->vd, 0, MO_64);
1547
1548 opfn(rn1_64, rn1_64, rm_64);
1549 write_neon_element64(rn1_64, a->vd, 1, MO_64);
1550
1551 return true;
1552 }
1553
1554 #define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN) \
1555 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
1556 { \
1557 static NeonGenWidenFn * const widenfn[] = { \
1558 gen_helper_neon_widen_##S##8, \
1559 gen_helper_neon_widen_##S##16, \
1560 NULL, NULL, \
1561 }; \
1562 static NeonGenTwo64OpFn * const addfn[] = { \
1563 gen_helper_neon_##OP##l_u16, \
1564 gen_helper_neon_##OP##l_u32, \
1565 tcg_gen_##OP##_i64, \
1566 NULL, \
1567 }; \
1568 int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1; \
1569 return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size], \
1570 SRC1WIDE ? MO_UQ : narrow_mop, \
1571 narrow_mop); \
1572 }
1573
DO_PREWIDEN(VADDL_S,s,add,false,MO_SIGN)1574 DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN)
1575 DO_PREWIDEN(VADDL_U, u, add, false, 0)
1576 DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN)
1577 DO_PREWIDEN(VSUBL_U, u, sub, false, 0)
1578 DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN)
1579 DO_PREWIDEN(VADDW_U, u, add, true, 0)
1580 DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN)
1581 DO_PREWIDEN(VSUBW_U, u, sub, true, 0)
1582
1583 static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
1584 NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
1585 {
1586 /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */
1587 TCGv_i64 rn_64, rm_64;
1588 TCGv_i32 rd0, rd1;
1589
1590 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1591 return false;
1592 }
1593
1594 /* UNDEF accesses to D16-D31 if they don't exist. */
1595 if (!dc_isar_feature(aa32_simd_r32, s) &&
1596 ((a->vd | a->vn | a->vm) & 0x10)) {
1597 return false;
1598 }
1599
1600 if (!opfn || !narrowfn) {
1601 /* size == 3 case, which is an entirely different insn group */
1602 return false;
1603 }
1604
1605 if ((a->vn | a->vm) & 1) {
1606 return false;
1607 }
1608
1609 if (!vfp_access_check(s)) {
1610 return true;
1611 }
1612
1613 rn_64 = tcg_temp_new_i64();
1614 rm_64 = tcg_temp_new_i64();
1615 rd0 = tcg_temp_new_i32();
1616 rd1 = tcg_temp_new_i32();
1617
1618 read_neon_element64(rn_64, a->vn, 0, MO_64);
1619 read_neon_element64(rm_64, a->vm, 0, MO_64);
1620
1621 opfn(rn_64, rn_64, rm_64);
1622
1623 narrowfn(rd0, rn_64);
1624
1625 read_neon_element64(rn_64, a->vn, 1, MO_64);
1626 read_neon_element64(rm_64, a->vm, 1, MO_64);
1627
1628 opfn(rn_64, rn_64, rm_64);
1629
1630 narrowfn(rd1, rn_64);
1631
1632 write_neon_element32(rd0, a->vd, 0, MO_32);
1633 write_neon_element32(rd1, a->vd, 1, MO_32);
1634
1635 return true;
1636 }
1637
1638 #define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP) \
1639 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
1640 { \
1641 static NeonGenTwo64OpFn * const addfn[] = { \
1642 gen_helper_neon_##OP##l_u16, \
1643 gen_helper_neon_##OP##l_u32, \
1644 tcg_gen_##OP##_i64, \
1645 NULL, \
1646 }; \
1647 static NeonGenNarrowFn * const narrowfn[] = { \
1648 gen_helper_neon_##NARROWTYPE##_high_u8, \
1649 gen_helper_neon_##NARROWTYPE##_high_u16, \
1650 EXTOP, \
1651 NULL, \
1652 }; \
1653 return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]); \
1654 }
1655
gen_narrow_round_high_u32(TCGv_i32 rd,TCGv_i64 rn)1656 static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn)
1657 {
1658 tcg_gen_addi_i64(rn, rn, 1u << 31);
1659 tcg_gen_extrh_i64_i32(rd, rn);
1660 }
1661
DO_NARROW_3D(VADDHN,add,narrow,tcg_gen_extrh_i64_i32)1662 DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
1663 DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
1664 DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
1665 DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
1666
1667 static bool do_long_3d(DisasContext *s, arg_3diff *a,
1668 NeonGenTwoOpWidenFn *opfn,
1669 NeonGenTwo64OpFn *accfn)
1670 {
1671 /*
1672 * 3-regs different lengths, long operations.
1673 * These perform an operation on two inputs that returns a double-width
1674 * result, and then possibly perform an accumulation operation of
1675 * that result into the double-width destination.
1676 */
1677 TCGv_i64 rd0, rd1, tmp;
1678 TCGv_i32 rn, rm;
1679
1680 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1681 return false;
1682 }
1683
1684 /* UNDEF accesses to D16-D31 if they don't exist. */
1685 if (!dc_isar_feature(aa32_simd_r32, s) &&
1686 ((a->vd | a->vn | a->vm) & 0x10)) {
1687 return false;
1688 }
1689
1690 if (!opfn) {
1691 /* size == 3 case, which is an entirely different insn group */
1692 return false;
1693 }
1694
1695 if (a->vd & 1) {
1696 return false;
1697 }
1698
1699 if (!vfp_access_check(s)) {
1700 return true;
1701 }
1702
1703 rd0 = tcg_temp_new_i64();
1704 rd1 = tcg_temp_new_i64();
1705
1706 rn = tcg_temp_new_i32();
1707 rm = tcg_temp_new_i32();
1708 read_neon_element32(rn, a->vn, 0, MO_32);
1709 read_neon_element32(rm, a->vm, 0, MO_32);
1710 opfn(rd0, rn, rm);
1711
1712 read_neon_element32(rn, a->vn, 1, MO_32);
1713 read_neon_element32(rm, a->vm, 1, MO_32);
1714 opfn(rd1, rn, rm);
1715
1716 /* Don't store results until after all loads: they might overlap */
1717 if (accfn) {
1718 tmp = tcg_temp_new_i64();
1719 read_neon_element64(tmp, a->vd, 0, MO_64);
1720 accfn(rd0, tmp, rd0);
1721 read_neon_element64(tmp, a->vd, 1, MO_64);
1722 accfn(rd1, tmp, rd1);
1723 }
1724
1725 write_neon_element64(rd0, a->vd, 0, MO_64);
1726 write_neon_element64(rd1, a->vd, 1, MO_64);
1727
1728 return true;
1729 }
1730
trans_VABDL_S_3d(DisasContext * s,arg_3diff * a)1731 static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a)
1732 {
1733 static NeonGenTwoOpWidenFn * const opfn[] = {
1734 gen_helper_neon_abdl_s16,
1735 gen_helper_neon_abdl_s32,
1736 gen_helper_neon_abdl_s64,
1737 NULL,
1738 };
1739
1740 return do_long_3d(s, a, opfn[a->size], NULL);
1741 }
1742
trans_VABDL_U_3d(DisasContext * s,arg_3diff * a)1743 static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a)
1744 {
1745 static NeonGenTwoOpWidenFn * const opfn[] = {
1746 gen_helper_neon_abdl_u16,
1747 gen_helper_neon_abdl_u32,
1748 gen_helper_neon_abdl_u64,
1749 NULL,
1750 };
1751
1752 return do_long_3d(s, a, opfn[a->size], NULL);
1753 }
1754
trans_VABAL_S_3d(DisasContext * s,arg_3diff * a)1755 static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
1756 {
1757 static NeonGenTwoOpWidenFn * const opfn[] = {
1758 gen_helper_neon_abdl_s16,
1759 gen_helper_neon_abdl_s32,
1760 gen_helper_neon_abdl_s64,
1761 NULL,
1762 };
1763 static NeonGenTwo64OpFn * const addfn[] = {
1764 gen_helper_neon_addl_u16,
1765 gen_helper_neon_addl_u32,
1766 tcg_gen_add_i64,
1767 NULL,
1768 };
1769
1770 return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
1771 }
1772
trans_VABAL_U_3d(DisasContext * s,arg_3diff * a)1773 static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
1774 {
1775 static NeonGenTwoOpWidenFn * const opfn[] = {
1776 gen_helper_neon_abdl_u16,
1777 gen_helper_neon_abdl_u32,
1778 gen_helper_neon_abdl_u64,
1779 NULL,
1780 };
1781 static NeonGenTwo64OpFn * const addfn[] = {
1782 gen_helper_neon_addl_u16,
1783 gen_helper_neon_addl_u32,
1784 tcg_gen_add_i64,
1785 NULL,
1786 };
1787
1788 return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
1789 }
1790
gen_mull_s32(TCGv_i64 rd,TCGv_i32 rn,TCGv_i32 rm)1791 static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
1792 {
1793 TCGv_i32 lo = tcg_temp_new_i32();
1794 TCGv_i32 hi = tcg_temp_new_i32();
1795
1796 tcg_gen_muls2_i32(lo, hi, rn, rm);
1797 tcg_gen_concat_i32_i64(rd, lo, hi);
1798 }
1799
gen_mull_u32(TCGv_i64 rd,TCGv_i32 rn,TCGv_i32 rm)1800 static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
1801 {
1802 TCGv_i32 lo = tcg_temp_new_i32();
1803 TCGv_i32 hi = tcg_temp_new_i32();
1804
1805 tcg_gen_mulu2_i32(lo, hi, rn, rm);
1806 tcg_gen_concat_i32_i64(rd, lo, hi);
1807 }
1808
trans_VMULL_S_3d(DisasContext * s,arg_3diff * a)1809 static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a)
1810 {
1811 static NeonGenTwoOpWidenFn * const opfn[] = {
1812 gen_helper_neon_mull_s8,
1813 gen_helper_neon_mull_s16,
1814 gen_mull_s32,
1815 NULL,
1816 };
1817
1818 return do_long_3d(s, a, opfn[a->size], NULL);
1819 }
1820
trans_VMULL_U_3d(DisasContext * s,arg_3diff * a)1821 static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
1822 {
1823 static NeonGenTwoOpWidenFn * const opfn[] = {
1824 gen_helper_neon_mull_u8,
1825 gen_helper_neon_mull_u16,
1826 gen_mull_u32,
1827 NULL,
1828 };
1829
1830 return do_long_3d(s, a, opfn[a->size], NULL);
1831 }
1832
1833 #define DO_VMLAL(INSN,MULL,ACC) \
1834 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
1835 { \
1836 static NeonGenTwoOpWidenFn * const opfn[] = { \
1837 gen_helper_neon_##MULL##8, \
1838 gen_helper_neon_##MULL##16, \
1839 gen_##MULL##32, \
1840 NULL, \
1841 }; \
1842 static NeonGenTwo64OpFn * const accfn[] = { \
1843 gen_helper_neon_##ACC##l_u16, \
1844 gen_helper_neon_##ACC##l_u32, \
1845 tcg_gen_##ACC##_i64, \
1846 NULL, \
1847 }; \
1848 return do_long_3d(s, a, opfn[a->size], accfn[a->size]); \
1849 }
1850
DO_VMLAL(VMLAL_S,mull_s,add)1851 DO_VMLAL(VMLAL_S,mull_s,add)
1852 DO_VMLAL(VMLAL_U,mull_u,add)
1853 DO_VMLAL(VMLSL_S,mull_s,sub)
1854 DO_VMLAL(VMLSL_U,mull_u,sub)
1855
1856 static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
1857 {
1858 gen_helper_neon_mull_s16(rd, rn, rm);
1859 gen_helper_neon_addl_saturate_s32(rd, tcg_env, rd, rd);
1860 }
1861
gen_VQDMULL_32(TCGv_i64 rd,TCGv_i32 rn,TCGv_i32 rm)1862 static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
1863 {
1864 gen_mull_s32(rd, rn, rm);
1865 gen_helper_neon_addl_saturate_s64(rd, tcg_env, rd, rd);
1866 }
1867
trans_VQDMULL_3d(DisasContext * s,arg_3diff * a)1868 static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a)
1869 {
1870 static NeonGenTwoOpWidenFn * const opfn[] = {
1871 NULL,
1872 gen_VQDMULL_16,
1873 gen_VQDMULL_32,
1874 NULL,
1875 };
1876
1877 return do_long_3d(s, a, opfn[a->size], NULL);
1878 }
1879
gen_VQDMLAL_acc_16(TCGv_i64 rd,TCGv_i64 rn,TCGv_i64 rm)1880 static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
1881 {
1882 gen_helper_neon_addl_saturate_s32(rd, tcg_env, rn, rm);
1883 }
1884
gen_VQDMLAL_acc_32(TCGv_i64 rd,TCGv_i64 rn,TCGv_i64 rm)1885 static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
1886 {
1887 gen_helper_neon_addl_saturate_s64(rd, tcg_env, rn, rm);
1888 }
1889
trans_VQDMLAL_3d(DisasContext * s,arg_3diff * a)1890 static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a)
1891 {
1892 static NeonGenTwoOpWidenFn * const opfn[] = {
1893 NULL,
1894 gen_VQDMULL_16,
1895 gen_VQDMULL_32,
1896 NULL,
1897 };
1898 static NeonGenTwo64OpFn * const accfn[] = {
1899 NULL,
1900 gen_VQDMLAL_acc_16,
1901 gen_VQDMLAL_acc_32,
1902 NULL,
1903 };
1904
1905 return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
1906 }
1907
gen_VQDMLSL_acc_16(TCGv_i64 rd,TCGv_i64 rn,TCGv_i64 rm)1908 static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
1909 {
1910 gen_helper_neon_negl_u32(rm, rm);
1911 gen_helper_neon_addl_saturate_s32(rd, tcg_env, rn, rm);
1912 }
1913
gen_VQDMLSL_acc_32(TCGv_i64 rd,TCGv_i64 rn,TCGv_i64 rm)1914 static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
1915 {
1916 tcg_gen_neg_i64(rm, rm);
1917 gen_helper_neon_addl_saturate_s64(rd, tcg_env, rn, rm);
1918 }
1919
trans_VQDMLSL_3d(DisasContext * s,arg_3diff * a)1920 static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
1921 {
1922 static NeonGenTwoOpWidenFn * const opfn[] = {
1923 NULL,
1924 gen_VQDMULL_16,
1925 gen_VQDMULL_32,
1926 NULL,
1927 };
1928 static NeonGenTwo64OpFn * const accfn[] = {
1929 NULL,
1930 gen_VQDMLSL_acc_16,
1931 gen_VQDMLSL_acc_32,
1932 NULL,
1933 };
1934
1935 return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
1936 }
1937
trans_VMULL_P_3d(DisasContext * s,arg_3diff * a)1938 static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
1939 {
1940 gen_helper_gvec_3 *fn_gvec;
1941
1942 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1943 return false;
1944 }
1945
1946 /* UNDEF accesses to D16-D31 if they don't exist. */
1947 if (!dc_isar_feature(aa32_simd_r32, s) &&
1948 ((a->vd | a->vn | a->vm) & 0x10)) {
1949 return false;
1950 }
1951
1952 if (a->vd & 1) {
1953 return false;
1954 }
1955
1956 switch (a->size) {
1957 case 0:
1958 fn_gvec = gen_helper_neon_pmull_h;
1959 break;
1960 case 2:
1961 if (!dc_isar_feature(aa32_pmull, s)) {
1962 return false;
1963 }
1964 fn_gvec = gen_helper_gvec_pmull_q;
1965 break;
1966 default:
1967 return false;
1968 }
1969
1970 if (!vfp_access_check(s)) {
1971 return true;
1972 }
1973
1974 tcg_gen_gvec_3_ool(neon_full_reg_offset(a->vd),
1975 neon_full_reg_offset(a->vn),
1976 neon_full_reg_offset(a->vm),
1977 16, 16, 0, fn_gvec);
1978 return true;
1979 }
1980
gen_neon_dup_low16(TCGv_i32 var)1981 static void gen_neon_dup_low16(TCGv_i32 var)
1982 {
1983 TCGv_i32 tmp = tcg_temp_new_i32();
1984 tcg_gen_ext16u_i32(var, var);
1985 tcg_gen_shli_i32(tmp, var, 16);
1986 tcg_gen_or_i32(var, var, tmp);
1987 }
1988
gen_neon_dup_high16(TCGv_i32 var)1989 static void gen_neon_dup_high16(TCGv_i32 var)
1990 {
1991 TCGv_i32 tmp = tcg_temp_new_i32();
1992 tcg_gen_andi_i32(var, var, 0xffff0000);
1993 tcg_gen_shri_i32(tmp, var, 16);
1994 tcg_gen_or_i32(var, var, tmp);
1995 }
1996
neon_get_scalar(int size,int reg)1997 static inline TCGv_i32 neon_get_scalar(int size, int reg)
1998 {
1999 TCGv_i32 tmp = tcg_temp_new_i32();
2000 if (size == MO_16) {
2001 read_neon_element32(tmp, reg & 7, reg >> 4, MO_32);
2002 if (reg & 8) {
2003 gen_neon_dup_high16(tmp);
2004 } else {
2005 gen_neon_dup_low16(tmp);
2006 }
2007 } else {
2008 read_neon_element32(tmp, reg & 15, reg >> 4, MO_32);
2009 }
2010 return tmp;
2011 }
2012
do_2scalar(DisasContext * s,arg_2scalar * a,NeonGenTwoOpFn * opfn,NeonGenTwoOpFn * accfn)2013 static bool do_2scalar(DisasContext *s, arg_2scalar *a,
2014 NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn)
2015 {
2016 /*
2017 * Two registers and a scalar: perform an operation between
2018 * the input elements and the scalar, and then possibly
2019 * perform an accumulation operation of that result into the
2020 * destination.
2021 */
2022 TCGv_i32 scalar, tmp;
2023 int pass;
2024
2025 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2026 return false;
2027 }
2028
2029 /* UNDEF accesses to D16-D31 if they don't exist. */
2030 if (!dc_isar_feature(aa32_simd_r32, s) &&
2031 ((a->vd | a->vn | a->vm) & 0x10)) {
2032 return false;
2033 }
2034
2035 if (!opfn) {
2036 /* Bad size (including size == 3, which is a different insn group) */
2037 return false;
2038 }
2039
2040 if (a->q && ((a->vd | a->vn) & 1)) {
2041 return false;
2042 }
2043
2044 if (!vfp_access_check(s)) {
2045 return true;
2046 }
2047
2048 scalar = neon_get_scalar(a->size, a->vm);
2049 tmp = tcg_temp_new_i32();
2050
2051 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
2052 read_neon_element32(tmp, a->vn, pass, MO_32);
2053 opfn(tmp, tmp, scalar);
2054 if (accfn) {
2055 TCGv_i32 rd = tcg_temp_new_i32();
2056 read_neon_element32(rd, a->vd, pass, MO_32);
2057 accfn(tmp, rd, tmp);
2058 }
2059 write_neon_element32(tmp, a->vd, pass, MO_32);
2060 }
2061 return true;
2062 }
2063
trans_VMUL_2sc(DisasContext * s,arg_2scalar * a)2064 static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a)
2065 {
2066 static NeonGenTwoOpFn * const opfn[] = {
2067 NULL,
2068 gen_helper_neon_mul_u16,
2069 tcg_gen_mul_i32,
2070 NULL,
2071 };
2072
2073 return do_2scalar(s, a, opfn[a->size], NULL);
2074 }
2075
trans_VMLA_2sc(DisasContext * s,arg_2scalar * a)2076 static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a)
2077 {
2078 static NeonGenTwoOpFn * const opfn[] = {
2079 NULL,
2080 gen_helper_neon_mul_u16,
2081 tcg_gen_mul_i32,
2082 NULL,
2083 };
2084 static NeonGenTwoOpFn * const accfn[] = {
2085 NULL,
2086 gen_helper_neon_add_u16,
2087 tcg_gen_add_i32,
2088 NULL,
2089 };
2090
2091 return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
2092 }
2093
trans_VMLS_2sc(DisasContext * s,arg_2scalar * a)2094 static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
2095 {
2096 static NeonGenTwoOpFn * const opfn[] = {
2097 NULL,
2098 gen_helper_neon_mul_u16,
2099 tcg_gen_mul_i32,
2100 NULL,
2101 };
2102 static NeonGenTwoOpFn * const accfn[] = {
2103 NULL,
2104 gen_helper_neon_sub_u16,
2105 tcg_gen_sub_i32,
2106 NULL,
2107 };
2108
2109 return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
2110 }
2111
do_2scalar_fp_vec(DisasContext * s,arg_2scalar * a,gen_helper_gvec_3_ptr * fn)2112 static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
2113 gen_helper_gvec_3_ptr *fn)
2114 {
2115 /* Two registers and a scalar, using gvec */
2116 int vec_size = a->q ? 16 : 8;
2117 int rd_ofs = neon_full_reg_offset(a->vd);
2118 int rn_ofs = neon_full_reg_offset(a->vn);
2119 int rm_ofs;
2120 int idx;
2121 TCGv_ptr fpstatus;
2122
2123 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2124 return false;
2125 }
2126
2127 /* UNDEF accesses to D16-D31 if they don't exist. */
2128 if (!dc_isar_feature(aa32_simd_r32, s) &&
2129 ((a->vd | a->vn | a->vm) & 0x10)) {
2130 return false;
2131 }
2132
2133 if (!fn) {
2134 /* Bad size (including size == 3, which is a different insn group) */
2135 return false;
2136 }
2137
2138 if (a->q && ((a->vd | a->vn) & 1)) {
2139 return false;
2140 }
2141
2142 if (!vfp_access_check(s)) {
2143 return true;
2144 }
2145
2146 /* a->vm is M:Vm, which encodes both register and index */
2147 idx = extract32(a->vm, a->size + 2, 2);
2148 a->vm = extract32(a->vm, 0, a->size + 2);
2149 rm_ofs = neon_full_reg_offset(a->vm);
2150
2151 fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD);
2152 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus,
2153 vec_size, vec_size, idx, fn);
2154 return true;
2155 }
2156
2157 #define DO_VMUL_F_2sc(NAME, FUNC) \
2158 static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a) \
2159 { \
2160 static gen_helper_gvec_3_ptr * const opfn[] = { \
2161 NULL, \
2162 gen_helper_##FUNC##_h, \
2163 gen_helper_##FUNC##_s, \
2164 NULL, \
2165 }; \
2166 if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \
2167 return false; \
2168 } \
2169 return do_2scalar_fp_vec(s, a, opfn[a->size]); \
2170 }
2171
DO_VMUL_F_2sc(VMUL,gvec_fmul_idx)2172 DO_VMUL_F_2sc(VMUL, gvec_fmul_idx)
2173 DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx)
2174 DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx)
2175
2176 WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
2177 WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
2178 WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
2179 WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
2180
2181 static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a)
2182 {
2183 static NeonGenTwoOpFn * const opfn[] = {
2184 NULL,
2185 gen_VQDMULH_16,
2186 gen_VQDMULH_32,
2187 NULL,
2188 };
2189
2190 return do_2scalar(s, a, opfn[a->size], NULL);
2191 }
2192
trans_VQRDMULH_2sc(DisasContext * s,arg_2scalar * a)2193 static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
2194 {
2195 static NeonGenTwoOpFn * const opfn[] = {
2196 NULL,
2197 gen_VQRDMULH_16,
2198 gen_VQRDMULH_32,
2199 NULL,
2200 };
2201
2202 return do_2scalar(s, a, opfn[a->size], NULL);
2203 }
2204
do_vqrdmlah_2sc(DisasContext * s,arg_2scalar * a,NeonGenThreeOpEnvFn * opfn)2205 static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
2206 NeonGenThreeOpEnvFn *opfn)
2207 {
2208 /*
2209 * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
2210 * performs a kind of fused op-then-accumulate using a helper
2211 * function that takes all of rd, rn and the scalar at once.
2212 */
2213 TCGv_i32 scalar, rn, rd;
2214 int pass;
2215
2216 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2217 return false;
2218 }
2219
2220 if (!dc_isar_feature(aa32_rdm, s)) {
2221 return false;
2222 }
2223
2224 /* UNDEF accesses to D16-D31 if they don't exist. */
2225 if (!dc_isar_feature(aa32_simd_r32, s) &&
2226 ((a->vd | a->vn | a->vm) & 0x10)) {
2227 return false;
2228 }
2229
2230 if (!opfn) {
2231 /* Bad size (including size == 3, which is a different insn group) */
2232 return false;
2233 }
2234
2235 if (a->q && ((a->vd | a->vn) & 1)) {
2236 return false;
2237 }
2238
2239 if (!vfp_access_check(s)) {
2240 return true;
2241 }
2242
2243 scalar = neon_get_scalar(a->size, a->vm);
2244 rn = tcg_temp_new_i32();
2245 rd = tcg_temp_new_i32();
2246
2247 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
2248 read_neon_element32(rn, a->vn, pass, MO_32);
2249 read_neon_element32(rd, a->vd, pass, MO_32);
2250 opfn(rd, tcg_env, rn, scalar, rd);
2251 write_neon_element32(rd, a->vd, pass, MO_32);
2252 }
2253 return true;
2254 }
2255
trans_VQRDMLAH_2sc(DisasContext * s,arg_2scalar * a)2256 static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
2257 {
2258 static NeonGenThreeOpEnvFn *opfn[] = {
2259 NULL,
2260 gen_helper_neon_qrdmlah_s16,
2261 gen_helper_neon_qrdmlah_s32,
2262 NULL,
2263 };
2264 return do_vqrdmlah_2sc(s, a, opfn[a->size]);
2265 }
2266
trans_VQRDMLSH_2sc(DisasContext * s,arg_2scalar * a)2267 static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
2268 {
2269 static NeonGenThreeOpEnvFn *opfn[] = {
2270 NULL,
2271 gen_helper_neon_qrdmlsh_s16,
2272 gen_helper_neon_qrdmlsh_s32,
2273 NULL,
2274 };
2275 return do_vqrdmlah_2sc(s, a, opfn[a->size]);
2276 }
2277
do_2scalar_long(DisasContext * s,arg_2scalar * a,NeonGenTwoOpWidenFn * opfn,NeonGenTwo64OpFn * accfn)2278 static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
2279 NeonGenTwoOpWidenFn *opfn,
2280 NeonGenTwo64OpFn *accfn)
2281 {
2282 /*
2283 * Two registers and a scalar, long operations: perform an
2284 * operation on the input elements and the scalar which produces
2285 * a double-width result, and then possibly perform an accumulation
2286 * operation of that result into the destination.
2287 */
2288 TCGv_i32 scalar, rn;
2289 TCGv_i64 rn0_64, rn1_64;
2290
2291 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2292 return false;
2293 }
2294
2295 /* UNDEF accesses to D16-D31 if they don't exist. */
2296 if (!dc_isar_feature(aa32_simd_r32, s) &&
2297 ((a->vd | a->vn | a->vm) & 0x10)) {
2298 return false;
2299 }
2300
2301 if (!opfn) {
2302 /* Bad size (including size == 3, which is a different insn group) */
2303 return false;
2304 }
2305
2306 if (a->vd & 1) {
2307 return false;
2308 }
2309
2310 if (!vfp_access_check(s)) {
2311 return true;
2312 }
2313
2314 scalar = neon_get_scalar(a->size, a->vm);
2315
2316 /* Load all inputs before writing any outputs, in case of overlap */
2317 rn = tcg_temp_new_i32();
2318 read_neon_element32(rn, a->vn, 0, MO_32);
2319 rn0_64 = tcg_temp_new_i64();
2320 opfn(rn0_64, rn, scalar);
2321
2322 read_neon_element32(rn, a->vn, 1, MO_32);
2323 rn1_64 = tcg_temp_new_i64();
2324 opfn(rn1_64, rn, scalar);
2325
2326 if (accfn) {
2327 TCGv_i64 t64 = tcg_temp_new_i64();
2328 read_neon_element64(t64, a->vd, 0, MO_64);
2329 accfn(rn0_64, t64, rn0_64);
2330 read_neon_element64(t64, a->vd, 1, MO_64);
2331 accfn(rn1_64, t64, rn1_64);
2332 }
2333
2334 write_neon_element64(rn0_64, a->vd, 0, MO_64);
2335 write_neon_element64(rn1_64, a->vd, 1, MO_64);
2336 return true;
2337 }
2338
trans_VMULL_S_2sc(DisasContext * s,arg_2scalar * a)2339 static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a)
2340 {
2341 static NeonGenTwoOpWidenFn * const opfn[] = {
2342 NULL,
2343 gen_helper_neon_mull_s16,
2344 gen_mull_s32,
2345 NULL,
2346 };
2347
2348 return do_2scalar_long(s, a, opfn[a->size], NULL);
2349 }
2350
trans_VMULL_U_2sc(DisasContext * s,arg_2scalar * a)2351 static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
2352 {
2353 static NeonGenTwoOpWidenFn * const opfn[] = {
2354 NULL,
2355 gen_helper_neon_mull_u16,
2356 gen_mull_u32,
2357 NULL,
2358 };
2359
2360 return do_2scalar_long(s, a, opfn[a->size], NULL);
2361 }
2362
2363 #define DO_VMLAL_2SC(INSN, MULL, ACC) \
2364 static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a) \
2365 { \
2366 static NeonGenTwoOpWidenFn * const opfn[] = { \
2367 NULL, \
2368 gen_helper_neon_##MULL##16, \
2369 gen_##MULL##32, \
2370 NULL, \
2371 }; \
2372 static NeonGenTwo64OpFn * const accfn[] = { \
2373 NULL, \
2374 gen_helper_neon_##ACC##l_u32, \
2375 tcg_gen_##ACC##_i64, \
2376 NULL, \
2377 }; \
2378 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); \
2379 }
2380
DO_VMLAL_2SC(VMLAL_S,mull_s,add)2381 DO_VMLAL_2SC(VMLAL_S, mull_s, add)
2382 DO_VMLAL_2SC(VMLAL_U, mull_u, add)
2383 DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
2384 DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
2385
2386 static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a)
2387 {
2388 static NeonGenTwoOpWidenFn * const opfn[] = {
2389 NULL,
2390 gen_VQDMULL_16,
2391 gen_VQDMULL_32,
2392 NULL,
2393 };
2394
2395 return do_2scalar_long(s, a, opfn[a->size], NULL);
2396 }
2397
trans_VQDMLAL_2sc(DisasContext * s,arg_2scalar * a)2398 static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a)
2399 {
2400 static NeonGenTwoOpWidenFn * const opfn[] = {
2401 NULL,
2402 gen_VQDMULL_16,
2403 gen_VQDMULL_32,
2404 NULL,
2405 };
2406 static NeonGenTwo64OpFn * const accfn[] = {
2407 NULL,
2408 gen_VQDMLAL_acc_16,
2409 gen_VQDMLAL_acc_32,
2410 NULL,
2411 };
2412
2413 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
2414 }
2415
trans_VQDMLSL_2sc(DisasContext * s,arg_2scalar * a)2416 static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
2417 {
2418 static NeonGenTwoOpWidenFn * const opfn[] = {
2419 NULL,
2420 gen_VQDMULL_16,
2421 gen_VQDMULL_32,
2422 NULL,
2423 };
2424 static NeonGenTwo64OpFn * const accfn[] = {
2425 NULL,
2426 gen_VQDMLSL_acc_16,
2427 gen_VQDMLSL_acc_32,
2428 NULL,
2429 };
2430
2431 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
2432 }
2433
trans_VEXT(DisasContext * s,arg_VEXT * a)2434 static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
2435 {
2436 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2437 return false;
2438 }
2439
2440 /* UNDEF accesses to D16-D31 if they don't exist. */
2441 if (!dc_isar_feature(aa32_simd_r32, s) &&
2442 ((a->vd | a->vn | a->vm) & 0x10)) {
2443 return false;
2444 }
2445
2446 if ((a->vn | a->vm | a->vd) & a->q) {
2447 return false;
2448 }
2449
2450 if (a->imm > 7 && !a->q) {
2451 return false;
2452 }
2453
2454 if (!vfp_access_check(s)) {
2455 return true;
2456 }
2457
2458 if (!a->q) {
2459 /* Extract 64 bits from <Vm:Vn> */
2460 TCGv_i64 left, right, dest;
2461
2462 left = tcg_temp_new_i64();
2463 right = tcg_temp_new_i64();
2464 dest = tcg_temp_new_i64();
2465
2466 read_neon_element64(right, a->vn, 0, MO_64);
2467 read_neon_element64(left, a->vm, 0, MO_64);
2468 tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
2469 write_neon_element64(dest, a->vd, 0, MO_64);
2470 } else {
2471 /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
2472 TCGv_i64 left, middle, right, destleft, destright;
2473
2474 left = tcg_temp_new_i64();
2475 middle = tcg_temp_new_i64();
2476 right = tcg_temp_new_i64();
2477 destleft = tcg_temp_new_i64();
2478 destright = tcg_temp_new_i64();
2479
2480 if (a->imm < 8) {
2481 read_neon_element64(right, a->vn, 0, MO_64);
2482 read_neon_element64(middle, a->vn, 1, MO_64);
2483 tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
2484 read_neon_element64(left, a->vm, 0, MO_64);
2485 tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
2486 } else {
2487 read_neon_element64(right, a->vn, 1, MO_64);
2488 read_neon_element64(middle, a->vm, 0, MO_64);
2489 tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
2490 read_neon_element64(left, a->vm, 1, MO_64);
2491 tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
2492 }
2493
2494 write_neon_element64(destright, a->vd, 0, MO_64);
2495 write_neon_element64(destleft, a->vd, 1, MO_64);
2496 }
2497 return true;
2498 }
2499
trans_VTBL(DisasContext * s,arg_VTBL * a)2500 static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
2501 {
2502 TCGv_i64 val, def;
2503 TCGv_i32 desc;
2504
2505 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2506 return false;
2507 }
2508
2509 /* UNDEF accesses to D16-D31 if they don't exist. */
2510 if (!dc_isar_feature(aa32_simd_r32, s) &&
2511 ((a->vd | a->vn | a->vm) & 0x10)) {
2512 return false;
2513 }
2514
2515 if ((a->vn + a->len + 1) > 32) {
2516 /*
2517 * This is UNPREDICTABLE; we choose to UNDEF to avoid the
2518 * helper function running off the end of the register file.
2519 */
2520 return false;
2521 }
2522
2523 if (!vfp_access_check(s)) {
2524 return true;
2525 }
2526
2527 desc = tcg_constant_i32((a->vn << 2) | a->len);
2528 def = tcg_temp_new_i64();
2529 if (a->op) {
2530 read_neon_element64(def, a->vd, 0, MO_64);
2531 } else {
2532 tcg_gen_movi_i64(def, 0);
2533 }
2534 val = tcg_temp_new_i64();
2535 read_neon_element64(val, a->vm, 0, MO_64);
2536
2537 gen_helper_neon_tbl(val, tcg_env, desc, val, def);
2538 write_neon_element64(val, a->vd, 0, MO_64);
2539 return true;
2540 }
2541
trans_VDUP_scalar(DisasContext * s,arg_VDUP_scalar * a)2542 static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
2543 {
2544 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2545 return false;
2546 }
2547
2548 /* UNDEF accesses to D16-D31 if they don't exist. */
2549 if (!dc_isar_feature(aa32_simd_r32, s) &&
2550 ((a->vd | a->vm) & 0x10)) {
2551 return false;
2552 }
2553
2554 if (a->vd & a->q) {
2555 return false;
2556 }
2557
2558 if (!vfp_access_check(s)) {
2559 return true;
2560 }
2561
2562 tcg_gen_gvec_dup_mem(a->size, neon_full_reg_offset(a->vd),
2563 neon_element_offset(a->vm, a->index, a->size),
2564 a->q ? 16 : 8, a->q ? 16 : 8);
2565 return true;
2566 }
2567
trans_VREV64(DisasContext * s,arg_VREV64 * a)2568 static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
2569 {
2570 int pass, half;
2571 TCGv_i32 tmp[2];
2572
2573 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2574 return false;
2575 }
2576
2577 /* UNDEF accesses to D16-D31 if they don't exist. */
2578 if (!dc_isar_feature(aa32_simd_r32, s) &&
2579 ((a->vd | a->vm) & 0x10)) {
2580 return false;
2581 }
2582
2583 if ((a->vd | a->vm) & a->q) {
2584 return false;
2585 }
2586
2587 if (a->size == 3) {
2588 return false;
2589 }
2590
2591 if (!vfp_access_check(s)) {
2592 return true;
2593 }
2594
2595 tmp[0] = tcg_temp_new_i32();
2596 tmp[1] = tcg_temp_new_i32();
2597
2598 for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
2599 for (half = 0; half < 2; half++) {
2600 read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32);
2601 switch (a->size) {
2602 case 0:
2603 tcg_gen_bswap32_i32(tmp[half], tmp[half]);
2604 break;
2605 case 1:
2606 gen_swap_half(tmp[half], tmp[half]);
2607 break;
2608 case 2:
2609 break;
2610 default:
2611 g_assert_not_reached();
2612 }
2613 }
2614 write_neon_element32(tmp[1], a->vd, pass * 2, MO_32);
2615 write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32);
2616 }
2617 return true;
2618 }
2619
do_2misc_pairwise(DisasContext * s,arg_2misc * a,NeonGenWidenFn * widenfn,NeonGenTwo64OpFn * opfn,NeonGenTwo64OpFn * accfn)2620 static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
2621 NeonGenWidenFn *widenfn,
2622 NeonGenTwo64OpFn *opfn,
2623 NeonGenTwo64OpFn *accfn)
2624 {
2625 /*
2626 * Pairwise long operations: widen both halves of the pair,
2627 * combine the pairs with the opfn, and then possibly accumulate
2628 * into the destination with the accfn.
2629 */
2630 int pass;
2631
2632 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2633 return false;
2634 }
2635
2636 /* UNDEF accesses to D16-D31 if they don't exist. */
2637 if (!dc_isar_feature(aa32_simd_r32, s) &&
2638 ((a->vd | a->vm) & 0x10)) {
2639 return false;
2640 }
2641
2642 if ((a->vd | a->vm) & a->q) {
2643 return false;
2644 }
2645
2646 if (!widenfn) {
2647 return false;
2648 }
2649
2650 if (!vfp_access_check(s)) {
2651 return true;
2652 }
2653
2654 for (pass = 0; pass < a->q + 1; pass++) {
2655 TCGv_i32 tmp;
2656 TCGv_i64 rm0_64, rm1_64, rd_64;
2657
2658 rm0_64 = tcg_temp_new_i64();
2659 rm1_64 = tcg_temp_new_i64();
2660 rd_64 = tcg_temp_new_i64();
2661
2662 tmp = tcg_temp_new_i32();
2663 read_neon_element32(tmp, a->vm, pass * 2, MO_32);
2664 widenfn(rm0_64, tmp);
2665 read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32);
2666 widenfn(rm1_64, tmp);
2667
2668 opfn(rd_64, rm0_64, rm1_64);
2669
2670 if (accfn) {
2671 TCGv_i64 tmp64 = tcg_temp_new_i64();
2672 read_neon_element64(tmp64, a->vd, pass, MO_64);
2673 accfn(rd_64, tmp64, rd_64);
2674 }
2675 write_neon_element64(rd_64, a->vd, pass, MO_64);
2676 }
2677 return true;
2678 }
2679
trans_VPADDL_S(DisasContext * s,arg_2misc * a)2680 static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a)
2681 {
2682 static NeonGenWidenFn * const widenfn[] = {
2683 gen_helper_neon_widen_s8,
2684 gen_helper_neon_widen_s16,
2685 tcg_gen_ext_i32_i64,
2686 NULL,
2687 };
2688 static NeonGenTwo64OpFn * const opfn[] = {
2689 gen_helper_neon_paddl_u16,
2690 gen_helper_neon_paddl_u32,
2691 tcg_gen_add_i64,
2692 NULL,
2693 };
2694
2695 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
2696 }
2697
trans_VPADDL_U(DisasContext * s,arg_2misc * a)2698 static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a)
2699 {
2700 static NeonGenWidenFn * const widenfn[] = {
2701 gen_helper_neon_widen_u8,
2702 gen_helper_neon_widen_u16,
2703 tcg_gen_extu_i32_i64,
2704 NULL,
2705 };
2706 static NeonGenTwo64OpFn * const opfn[] = {
2707 gen_helper_neon_paddl_u16,
2708 gen_helper_neon_paddl_u32,
2709 tcg_gen_add_i64,
2710 NULL,
2711 };
2712
2713 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
2714 }
2715
trans_VPADAL_S(DisasContext * s,arg_2misc * a)2716 static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a)
2717 {
2718 static NeonGenWidenFn * const widenfn[] = {
2719 gen_helper_neon_widen_s8,
2720 gen_helper_neon_widen_s16,
2721 tcg_gen_ext_i32_i64,
2722 NULL,
2723 };
2724 static NeonGenTwo64OpFn * const opfn[] = {
2725 gen_helper_neon_paddl_u16,
2726 gen_helper_neon_paddl_u32,
2727 tcg_gen_add_i64,
2728 NULL,
2729 };
2730 static NeonGenTwo64OpFn * const accfn[] = {
2731 gen_helper_neon_addl_u16,
2732 gen_helper_neon_addl_u32,
2733 tcg_gen_add_i64,
2734 NULL,
2735 };
2736
2737 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
2738 accfn[a->size]);
2739 }
2740
trans_VPADAL_U(DisasContext * s,arg_2misc * a)2741 static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a)
2742 {
2743 static NeonGenWidenFn * const widenfn[] = {
2744 gen_helper_neon_widen_u8,
2745 gen_helper_neon_widen_u16,
2746 tcg_gen_extu_i32_i64,
2747 NULL,
2748 };
2749 static NeonGenTwo64OpFn * const opfn[] = {
2750 gen_helper_neon_paddl_u16,
2751 gen_helper_neon_paddl_u32,
2752 tcg_gen_add_i64,
2753 NULL,
2754 };
2755 static NeonGenTwo64OpFn * const accfn[] = {
2756 gen_helper_neon_addl_u16,
2757 gen_helper_neon_addl_u32,
2758 tcg_gen_add_i64,
2759 NULL,
2760 };
2761
2762 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
2763 accfn[a->size]);
2764 }
2765
2766 typedef void ZipFn(TCGv_ptr, TCGv_ptr);
2767
do_zip_uzp(DisasContext * s,arg_2misc * a,ZipFn * fn)2768 static bool do_zip_uzp(DisasContext *s, arg_2misc *a,
2769 ZipFn *fn)
2770 {
2771 TCGv_ptr pd, pm;
2772
2773 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2774 return false;
2775 }
2776
2777 /* UNDEF accesses to D16-D31 if they don't exist. */
2778 if (!dc_isar_feature(aa32_simd_r32, s) &&
2779 ((a->vd | a->vm) & 0x10)) {
2780 return false;
2781 }
2782
2783 if ((a->vd | a->vm) & a->q) {
2784 return false;
2785 }
2786
2787 if (!fn) {
2788 /* Bad size or size/q combination */
2789 return false;
2790 }
2791
2792 if (!vfp_access_check(s)) {
2793 return true;
2794 }
2795
2796 pd = vfp_reg_ptr(true, a->vd);
2797 pm = vfp_reg_ptr(true, a->vm);
2798 fn(pd, pm);
2799 return true;
2800 }
2801
trans_VUZP(DisasContext * s,arg_2misc * a)2802 static bool trans_VUZP(DisasContext *s, arg_2misc *a)
2803 {
2804 static ZipFn * const fn[2][4] = {
2805 {
2806 gen_helper_neon_unzip8,
2807 gen_helper_neon_unzip16,
2808 NULL,
2809 NULL,
2810 }, {
2811 gen_helper_neon_qunzip8,
2812 gen_helper_neon_qunzip16,
2813 gen_helper_neon_qunzip32,
2814 NULL,
2815 }
2816 };
2817 return do_zip_uzp(s, a, fn[a->q][a->size]);
2818 }
2819
trans_VZIP(DisasContext * s,arg_2misc * a)2820 static bool trans_VZIP(DisasContext *s, arg_2misc *a)
2821 {
2822 static ZipFn * const fn[2][4] = {
2823 {
2824 gen_helper_neon_zip8,
2825 gen_helper_neon_zip16,
2826 NULL,
2827 NULL,
2828 }, {
2829 gen_helper_neon_qzip8,
2830 gen_helper_neon_qzip16,
2831 gen_helper_neon_qzip32,
2832 NULL,
2833 }
2834 };
2835 return do_zip_uzp(s, a, fn[a->q][a->size]);
2836 }
2837
do_vmovn(DisasContext * s,arg_2misc * a,NeonGenOne64OpEnvFn * narrowfn)2838 static bool do_vmovn(DisasContext *s, arg_2misc *a,
2839 NeonGenOne64OpEnvFn *narrowfn)
2840 {
2841 TCGv_i64 rm, rd0, rd1;
2842
2843 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2844 return false;
2845 }
2846
2847 /* UNDEF accesses to D16-D31 if they don't exist. */
2848 if (!dc_isar_feature(aa32_simd_r32, s) &&
2849 ((a->vd | a->vm) & 0x10)) {
2850 return false;
2851 }
2852
2853 if (a->vm & 1) {
2854 return false;
2855 }
2856
2857 if (!narrowfn) {
2858 return false;
2859 }
2860
2861 if (!vfp_access_check(s)) {
2862 return true;
2863 }
2864
2865 rm = tcg_temp_new_i64();
2866 rd0 = tcg_temp_new_i64();
2867 rd1 = tcg_temp_new_i64();
2868
2869 read_neon_element64(rm, a->vm, 0, MO_64);
2870 narrowfn(rd0, tcg_env, rm);
2871 read_neon_element64(rm, a->vm, 1, MO_64);
2872 narrowfn(rd1, tcg_env, rm);
2873 write_neon_element64(rd0, a->vd, 0, MO_32);
2874 write_neon_element64(rd1, a->vd, 1, MO_32);
2875 return true;
2876 }
2877
2878 #define DO_VMOVN(INSN, FUNC) \
2879 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
2880 { \
2881 static NeonGenOne64OpEnvFn * const narrowfn[] = { \
2882 FUNC##8, \
2883 FUNC##16, \
2884 FUNC##32, \
2885 NULL, \
2886 }; \
2887 return do_vmovn(s, a, narrowfn[a->size]); \
2888 }
2889
DO_VMOVN(VMOVN,gen_neon_narrow_u)2890 DO_VMOVN(VMOVN, gen_neon_narrow_u)
2891 DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat)
2892 DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s)
2893 DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u)
2894
2895 static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
2896 {
2897 TCGv_i32 rm0, rm1;
2898 TCGv_i64 rd;
2899 static NeonGenWidenFn * const widenfns[] = {
2900 gen_helper_neon_widen_u8,
2901 gen_helper_neon_widen_u16,
2902 tcg_gen_extu_i32_i64,
2903 NULL,
2904 };
2905 NeonGenWidenFn *widenfn = widenfns[a->size];
2906
2907 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2908 return false;
2909 }
2910
2911 /* UNDEF accesses to D16-D31 if they don't exist. */
2912 if (!dc_isar_feature(aa32_simd_r32, s) &&
2913 ((a->vd | a->vm) & 0x10)) {
2914 return false;
2915 }
2916
2917 if (a->vd & 1) {
2918 return false;
2919 }
2920
2921 if (!widenfn) {
2922 return false;
2923 }
2924
2925 if (!vfp_access_check(s)) {
2926 return true;
2927 }
2928
2929 rd = tcg_temp_new_i64();
2930 rm0 = tcg_temp_new_i32();
2931 rm1 = tcg_temp_new_i32();
2932
2933 read_neon_element32(rm0, a->vm, 0, MO_32);
2934 read_neon_element32(rm1, a->vm, 1, MO_32);
2935
2936 widenfn(rd, rm0);
2937 tcg_gen_shli_i64(rd, rd, 8 << a->size);
2938 write_neon_element64(rd, a->vd, 0, MO_64);
2939 widenfn(rd, rm1);
2940 tcg_gen_shli_i64(rd, rd, 8 << a->size);
2941 write_neon_element64(rd, a->vd, 1, MO_64);
2942 return true;
2943 }
2944
trans_VCVT_B16_F32(DisasContext * s,arg_2misc * a)2945 static bool trans_VCVT_B16_F32(DisasContext *s, arg_2misc *a)
2946 {
2947 TCGv_ptr fpst;
2948 TCGv_i64 tmp;
2949 TCGv_i32 dst0, dst1;
2950
2951 if (!dc_isar_feature(aa32_bf16, s)) {
2952 return false;
2953 }
2954
2955 /* UNDEF accesses to D16-D31 if they don't exist. */
2956 if (!dc_isar_feature(aa32_simd_r32, s) &&
2957 ((a->vd | a->vm) & 0x10)) {
2958 return false;
2959 }
2960
2961 if ((a->vm & 1) || (a->size != 1)) {
2962 return false;
2963 }
2964
2965 if (!vfp_access_check(s)) {
2966 return true;
2967 }
2968
2969 fpst = fpstatus_ptr(FPST_STD);
2970 tmp = tcg_temp_new_i64();
2971 dst0 = tcg_temp_new_i32();
2972 dst1 = tcg_temp_new_i32();
2973
2974 read_neon_element64(tmp, a->vm, 0, MO_64);
2975 gen_helper_bfcvt_pair(dst0, tmp, fpst);
2976
2977 read_neon_element64(tmp, a->vm, 1, MO_64);
2978 gen_helper_bfcvt_pair(dst1, tmp, fpst);
2979
2980 write_neon_element32(dst0, a->vd, 0, MO_32);
2981 write_neon_element32(dst1, a->vd, 1, MO_32);
2982 return true;
2983 }
2984
trans_VCVT_F16_F32(DisasContext * s,arg_2misc * a)2985 static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
2986 {
2987 TCGv_ptr fpst;
2988 TCGv_i32 ahp, tmp, tmp2, tmp3;
2989
2990 if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
2991 !dc_isar_feature(aa32_fp16_spconv, s)) {
2992 return false;
2993 }
2994
2995 /* UNDEF accesses to D16-D31 if they don't exist. */
2996 if (!dc_isar_feature(aa32_simd_r32, s) &&
2997 ((a->vd | a->vm) & 0x10)) {
2998 return false;
2999 }
3000
3001 if ((a->vm & 1) || (a->size != 1)) {
3002 return false;
3003 }
3004
3005 if (!vfp_access_check(s)) {
3006 return true;
3007 }
3008
3009 fpst = fpstatus_ptr(FPST_STD);
3010 ahp = get_ahp_flag();
3011 tmp = tcg_temp_new_i32();
3012 read_neon_element32(tmp, a->vm, 0, MO_32);
3013 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
3014 tmp2 = tcg_temp_new_i32();
3015 read_neon_element32(tmp2, a->vm, 1, MO_32);
3016 gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
3017 tcg_gen_shli_i32(tmp2, tmp2, 16);
3018 tcg_gen_or_i32(tmp2, tmp2, tmp);
3019 read_neon_element32(tmp, a->vm, 2, MO_32);
3020 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
3021 tmp3 = tcg_temp_new_i32();
3022 read_neon_element32(tmp3, a->vm, 3, MO_32);
3023 write_neon_element32(tmp2, a->vd, 0, MO_32);
3024 gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
3025 tcg_gen_shli_i32(tmp3, tmp3, 16);
3026 tcg_gen_or_i32(tmp3, tmp3, tmp);
3027 write_neon_element32(tmp3, a->vd, 1, MO_32);
3028 return true;
3029 }
3030
trans_VCVT_F32_F16(DisasContext * s,arg_2misc * a)3031 static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
3032 {
3033 TCGv_ptr fpst;
3034 TCGv_i32 ahp, tmp, tmp2, tmp3;
3035
3036 if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
3037 !dc_isar_feature(aa32_fp16_spconv, s)) {
3038 return false;
3039 }
3040
3041 /* UNDEF accesses to D16-D31 if they don't exist. */
3042 if (!dc_isar_feature(aa32_simd_r32, s) &&
3043 ((a->vd | a->vm) & 0x10)) {
3044 return false;
3045 }
3046
3047 if ((a->vd & 1) || (a->size != 1)) {
3048 return false;
3049 }
3050
3051 if (!vfp_access_check(s)) {
3052 return true;
3053 }
3054
3055 fpst = fpstatus_ptr(FPST_STD);
3056 ahp = get_ahp_flag();
3057 tmp3 = tcg_temp_new_i32();
3058 tmp2 = tcg_temp_new_i32();
3059 tmp = tcg_temp_new_i32();
3060 read_neon_element32(tmp, a->vm, 0, MO_32);
3061 read_neon_element32(tmp2, a->vm, 1, MO_32);
3062 tcg_gen_ext16u_i32(tmp3, tmp);
3063 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
3064 write_neon_element32(tmp3, a->vd, 0, MO_32);
3065 tcg_gen_shri_i32(tmp, tmp, 16);
3066 gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
3067 write_neon_element32(tmp, a->vd, 1, MO_32);
3068 tcg_gen_ext16u_i32(tmp3, tmp2);
3069 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
3070 write_neon_element32(tmp3, a->vd, 2, MO_32);
3071 tcg_gen_shri_i32(tmp2, tmp2, 16);
3072 gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
3073 write_neon_element32(tmp2, a->vd, 3, MO_32);
3074 return true;
3075 }
3076
do_2misc_vec(DisasContext * s,arg_2misc * a,GVecGen2Fn * fn)3077 static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn)
3078 {
3079 int vec_size = a->q ? 16 : 8;
3080 int rd_ofs = neon_full_reg_offset(a->vd);
3081 int rm_ofs = neon_full_reg_offset(a->vm);
3082
3083 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3084 return false;
3085 }
3086
3087 /* UNDEF accesses to D16-D31 if they don't exist. */
3088 if (!dc_isar_feature(aa32_simd_r32, s) &&
3089 ((a->vd | a->vm) & 0x10)) {
3090 return false;
3091 }
3092
3093 if (a->size == 3) {
3094 return false;
3095 }
3096
3097 if ((a->vd | a->vm) & a->q) {
3098 return false;
3099 }
3100
3101 if (!vfp_access_check(s)) {
3102 return true;
3103 }
3104
3105 fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size);
3106
3107 return true;
3108 }
3109
3110 #define DO_2MISC_VEC(INSN, FN) \
3111 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3112 { \
3113 return do_2misc_vec(s, a, FN); \
3114 }
3115
DO_2MISC_VEC(VNEG,tcg_gen_gvec_neg)3116 DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg)
3117 DO_2MISC_VEC(VABS, tcg_gen_gvec_abs)
3118 DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0)
3119 DO_2MISC_VEC(VCGT0, gen_gvec_cgt0)
3120 DO_2MISC_VEC(VCLE0, gen_gvec_cle0)
3121 DO_2MISC_VEC(VCGE0, gen_gvec_cge0)
3122 DO_2MISC_VEC(VCLT0, gen_gvec_clt0)
3123
3124 static bool trans_VMVN(DisasContext *s, arg_2misc *a)
3125 {
3126 if (a->size != 0) {
3127 return false;
3128 }
3129 return do_2misc_vec(s, a, tcg_gen_gvec_not);
3130 }
3131
3132 #define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA) \
3133 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
3134 uint32_t rm_ofs, uint32_t oprsz, \
3135 uint32_t maxsz) \
3136 { \
3137 tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz, \
3138 DATA, FUNC); \
3139 }
3140
3141 #define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA) \
3142 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
3143 uint32_t rm_ofs, uint32_t oprsz, \
3144 uint32_t maxsz) \
3145 { \
3146 tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC); \
3147 }
3148
3149 WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0)
3150 WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aesd, 0)
3151 WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0)
3152 WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesimc, 0)
3153 WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0)
3154 WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0)
3155 WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0)
3156
3157 #define DO_2M_CRYPTO(INSN, FEATURE, SIZE) \
3158 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3159 { \
3160 if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) { \
3161 return false; \
3162 } \
3163 return do_2misc_vec(s, a, gen_##INSN); \
3164 }
3165
3166 DO_2M_CRYPTO(AESE, aa32_aes, 0)
3167 DO_2M_CRYPTO(AESD, aa32_aes, 0)
3168 DO_2M_CRYPTO(AESMC, aa32_aes, 0)
3169 DO_2M_CRYPTO(AESIMC, aa32_aes, 0)
3170 DO_2M_CRYPTO(SHA1H, aa32_sha1, 2)
3171 DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2)
3172 DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
3173
do_2misc(DisasContext * s,arg_2misc * a,NeonGenOneOpFn * fn)3174 static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
3175 {
3176 TCGv_i32 tmp;
3177 int pass;
3178
3179 /* Handle a 2-reg-misc operation by iterating 32 bits at a time */
3180 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3181 return false;
3182 }
3183
3184 /* UNDEF accesses to D16-D31 if they don't exist. */
3185 if (!dc_isar_feature(aa32_simd_r32, s) &&
3186 ((a->vd | a->vm) & 0x10)) {
3187 return false;
3188 }
3189
3190 if (!fn) {
3191 return false;
3192 }
3193
3194 if ((a->vd | a->vm) & a->q) {
3195 return false;
3196 }
3197
3198 if (!vfp_access_check(s)) {
3199 return true;
3200 }
3201
3202 tmp = tcg_temp_new_i32();
3203 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
3204 read_neon_element32(tmp, a->vm, pass, MO_32);
3205 fn(tmp, tmp);
3206 write_neon_element32(tmp, a->vd, pass, MO_32);
3207 }
3208 return true;
3209 }
3210
trans_VREV32(DisasContext * s,arg_2misc * a)3211 static bool trans_VREV32(DisasContext *s, arg_2misc *a)
3212 {
3213 static NeonGenOneOpFn * const fn[] = {
3214 tcg_gen_bswap32_i32,
3215 gen_swap_half,
3216 NULL,
3217 NULL,
3218 };
3219 return do_2misc(s, a, fn[a->size]);
3220 }
3221
trans_VREV16(DisasContext * s,arg_2misc * a)3222 static bool trans_VREV16(DisasContext *s, arg_2misc *a)
3223 {
3224 if (a->size != 0) {
3225 return false;
3226 }
3227 return do_2misc(s, a, gen_rev16);
3228 }
3229
trans_VCLS(DisasContext * s,arg_2misc * a)3230 static bool trans_VCLS(DisasContext *s, arg_2misc *a)
3231 {
3232 static NeonGenOneOpFn * const fn[] = {
3233 gen_helper_neon_cls_s8,
3234 gen_helper_neon_cls_s16,
3235 gen_helper_neon_cls_s32,
3236 NULL,
3237 };
3238 return do_2misc(s, a, fn[a->size]);
3239 }
3240
do_VCLZ_32(TCGv_i32 rd,TCGv_i32 rm)3241 static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm)
3242 {
3243 tcg_gen_clzi_i32(rd, rm, 32);
3244 }
3245
trans_VCLZ(DisasContext * s,arg_2misc * a)3246 static bool trans_VCLZ(DisasContext *s, arg_2misc *a)
3247 {
3248 static NeonGenOneOpFn * const fn[] = {
3249 gen_helper_neon_clz_u8,
3250 gen_helper_neon_clz_u16,
3251 do_VCLZ_32,
3252 NULL,
3253 };
3254 return do_2misc(s, a, fn[a->size]);
3255 }
3256
trans_VCNT(DisasContext * s,arg_2misc * a)3257 static bool trans_VCNT(DisasContext *s, arg_2misc *a)
3258 {
3259 if (a->size != 0) {
3260 return false;
3261 }
3262 return do_2misc(s, a, gen_helper_neon_cnt_u8);
3263 }
3264
gen_VABS_F(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,uint32_t oprsz,uint32_t maxsz)3265 static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3266 uint32_t oprsz, uint32_t maxsz)
3267 {
3268 tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs,
3269 vece == MO_16 ? 0x7fff : 0x7fffffff,
3270 oprsz, maxsz);
3271 }
3272
trans_VABS_F(DisasContext * s,arg_2misc * a)3273 static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
3274 {
3275 if (a->size == MO_16) {
3276 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3277 return false;
3278 }
3279 } else if (a->size != MO_32) {
3280 return false;
3281 }
3282 return do_2misc_vec(s, a, gen_VABS_F);
3283 }
3284
gen_VNEG_F(unsigned vece,uint32_t rd_ofs,uint32_t rm_ofs,uint32_t oprsz,uint32_t maxsz)3285 static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3286 uint32_t oprsz, uint32_t maxsz)
3287 {
3288 tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs,
3289 vece == MO_16 ? 0x8000 : 0x80000000,
3290 oprsz, maxsz);
3291 }
3292
trans_VNEG_F(DisasContext * s,arg_2misc * a)3293 static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
3294 {
3295 if (a->size == MO_16) {
3296 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3297 return false;
3298 }
3299 } else if (a->size != MO_32) {
3300 return false;
3301 }
3302 return do_2misc_vec(s, a, gen_VNEG_F);
3303 }
3304
trans_VRECPE(DisasContext * s,arg_2misc * a)3305 static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
3306 {
3307 if (a->size != 2) {
3308 return false;
3309 }
3310 return do_2misc(s, a, gen_helper_recpe_u32);
3311 }
3312
trans_VRSQRTE(DisasContext * s,arg_2misc * a)3313 static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
3314 {
3315 if (a->size != 2) {
3316 return false;
3317 }
3318 return do_2misc(s, a, gen_helper_rsqrte_u32);
3319 }
3320
3321 #define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \
3322 static void WRAPNAME(TCGv_i32 d, TCGv_i32 m) \
3323 { \
3324 FUNC(d, tcg_env, m); \
3325 }
3326
WRAP_1OP_ENV_FN(gen_VQABS_s8,gen_helper_neon_qabs_s8)3327 WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8)
3328 WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16)
3329 WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32)
3330 WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8)
3331 WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16)
3332 WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32)
3333
3334 static bool trans_VQABS(DisasContext *s, arg_2misc *a)
3335 {
3336 static NeonGenOneOpFn * const fn[] = {
3337 gen_VQABS_s8,
3338 gen_VQABS_s16,
3339 gen_VQABS_s32,
3340 NULL,
3341 };
3342 return do_2misc(s, a, fn[a->size]);
3343 }
3344
trans_VQNEG(DisasContext * s,arg_2misc * a)3345 static bool trans_VQNEG(DisasContext *s, arg_2misc *a)
3346 {
3347 static NeonGenOneOpFn * const fn[] = {
3348 gen_VQNEG_s8,
3349 gen_VQNEG_s16,
3350 gen_VQNEG_s32,
3351 NULL,
3352 };
3353 return do_2misc(s, a, fn[a->size]);
3354 }
3355
3356 #define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC) \
3357 static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \
3358 uint32_t rm_ofs, \
3359 uint32_t oprsz, uint32_t maxsz) \
3360 { \
3361 static gen_helper_gvec_2_ptr * const fns[4] = { \
3362 NULL, HFUNC, SFUNC, NULL, \
3363 }; \
3364 TCGv_ptr fpst; \
3365 fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD); \
3366 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0, \
3367 fns[vece]); \
3368 } \
3369 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3370 { \
3371 if (a->size == MO_16) { \
3372 if (!dc_isar_feature(aa32_fp16_arith, s)) { \
3373 return false; \
3374 } \
3375 } else if (a->size != MO_32) { \
3376 return false; \
3377 } \
3378 return do_2misc_vec(s, a, gen_##INSN); \
3379 }
3380
DO_2MISC_FP_VEC(VRECPE_F,gen_helper_gvec_frecpe_h,gen_helper_gvec_frecpe_s)3381 DO_2MISC_FP_VEC(VRECPE_F, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s)
3382 DO_2MISC_FP_VEC(VRSQRTE_F, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s)
3383 DO_2MISC_FP_VEC(VCGT0_F, gen_helper_gvec_fcgt0_h, gen_helper_gvec_fcgt0_s)
3384 DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h, gen_helper_gvec_fcge0_s)
3385 DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s)
3386 DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s)
3387 DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s)
3388 DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos)
3389 DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos)
3390 DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs)
3391 DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs)
3392
3393 DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h, gen_helper_gvec_vrintx_s)
3394
3395 static bool trans_VRINTX(DisasContext *s, arg_2misc *a)
3396 {
3397 if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3398 return false;
3399 }
3400 return trans_VRINTX_impl(s, a);
3401 }
3402
3403 #define DO_VEC_RMODE(INSN, RMODE, OP) \
3404 static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \
3405 uint32_t rm_ofs, \
3406 uint32_t oprsz, uint32_t maxsz) \
3407 { \
3408 static gen_helper_gvec_2_ptr * const fns[4] = { \
3409 NULL, \
3410 gen_helper_gvec_##OP##h, \
3411 gen_helper_gvec_##OP##s, \
3412 NULL, \
3413 }; \
3414 TCGv_ptr fpst; \
3415 fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD); \
3416 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, \
3417 arm_rmode_to_sf(RMODE), fns[vece]); \
3418 } \
3419 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3420 { \
3421 if (!arm_dc_feature(s, ARM_FEATURE_V8)) { \
3422 return false; \
3423 } \
3424 if (a->size == MO_16) { \
3425 if (!dc_isar_feature(aa32_fp16_arith, s)) { \
3426 return false; \
3427 } \
3428 } else if (a->size != MO_32) { \
3429 return false; \
3430 } \
3431 return do_2misc_vec(s, a, gen_##INSN); \
3432 }
3433
DO_VEC_RMODE(VCVTAU,FPROUNDING_TIEAWAY,vcvt_rm_u)3434 DO_VEC_RMODE(VCVTAU, FPROUNDING_TIEAWAY, vcvt_rm_u)
3435 DO_VEC_RMODE(VCVTAS, FPROUNDING_TIEAWAY, vcvt_rm_s)
3436 DO_VEC_RMODE(VCVTNU, FPROUNDING_TIEEVEN, vcvt_rm_u)
3437 DO_VEC_RMODE(VCVTNS, FPROUNDING_TIEEVEN, vcvt_rm_s)
3438 DO_VEC_RMODE(VCVTPU, FPROUNDING_POSINF, vcvt_rm_u)
3439 DO_VEC_RMODE(VCVTPS, FPROUNDING_POSINF, vcvt_rm_s)
3440 DO_VEC_RMODE(VCVTMU, FPROUNDING_NEGINF, vcvt_rm_u)
3441 DO_VEC_RMODE(VCVTMS, FPROUNDING_NEGINF, vcvt_rm_s)
3442
3443 DO_VEC_RMODE(VRINTN, FPROUNDING_TIEEVEN, vrint_rm_)
3444 DO_VEC_RMODE(VRINTA, FPROUNDING_TIEAWAY, vrint_rm_)
3445 DO_VEC_RMODE(VRINTZ, FPROUNDING_ZERO, vrint_rm_)
3446 DO_VEC_RMODE(VRINTM, FPROUNDING_NEGINF, vrint_rm_)
3447 DO_VEC_RMODE(VRINTP, FPROUNDING_POSINF, vrint_rm_)
3448
3449 static bool trans_VSWP(DisasContext *s, arg_2misc *a)
3450 {
3451 TCGv_i64 rm, rd;
3452 int pass;
3453
3454 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3455 return false;
3456 }
3457
3458 /* UNDEF accesses to D16-D31 if they don't exist. */
3459 if (!dc_isar_feature(aa32_simd_r32, s) &&
3460 ((a->vd | a->vm) & 0x10)) {
3461 return false;
3462 }
3463
3464 if (a->size != 0) {
3465 return false;
3466 }
3467
3468 if ((a->vd | a->vm) & a->q) {
3469 return false;
3470 }
3471
3472 if (!vfp_access_check(s)) {
3473 return true;
3474 }
3475
3476 rm = tcg_temp_new_i64();
3477 rd = tcg_temp_new_i64();
3478 for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
3479 read_neon_element64(rm, a->vm, pass, MO_64);
3480 read_neon_element64(rd, a->vd, pass, MO_64);
3481 write_neon_element64(rm, a->vd, pass, MO_64);
3482 write_neon_element64(rd, a->vm, pass, MO_64);
3483 }
3484 return true;
3485 }
3486
gen_neon_trn_u8(TCGv_i32 t0,TCGv_i32 t1)3487 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3488 {
3489 TCGv_i32 rd, tmp;
3490
3491 rd = tcg_temp_new_i32();
3492 tmp = tcg_temp_new_i32();
3493
3494 tcg_gen_shli_i32(rd, t0, 8);
3495 tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3496 tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3497 tcg_gen_or_i32(rd, rd, tmp);
3498
3499 tcg_gen_shri_i32(t1, t1, 8);
3500 tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3501 tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3502 tcg_gen_or_i32(t1, t1, tmp);
3503 tcg_gen_mov_i32(t0, rd);
3504 }
3505
gen_neon_trn_u16(TCGv_i32 t0,TCGv_i32 t1)3506 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3507 {
3508 TCGv_i32 rd, tmp;
3509
3510 rd = tcg_temp_new_i32();
3511 tmp = tcg_temp_new_i32();
3512
3513 tcg_gen_shli_i32(rd, t0, 16);
3514 tcg_gen_andi_i32(tmp, t1, 0xffff);
3515 tcg_gen_or_i32(rd, rd, tmp);
3516 tcg_gen_shri_i32(t1, t1, 16);
3517 tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3518 tcg_gen_or_i32(t1, t1, tmp);
3519 tcg_gen_mov_i32(t0, rd);
3520 }
3521
trans_VTRN(DisasContext * s,arg_2misc * a)3522 static bool trans_VTRN(DisasContext *s, arg_2misc *a)
3523 {
3524 TCGv_i32 tmp, tmp2;
3525 int pass;
3526
3527 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3528 return false;
3529 }
3530
3531 /* UNDEF accesses to D16-D31 if they don't exist. */
3532 if (!dc_isar_feature(aa32_simd_r32, s) &&
3533 ((a->vd | a->vm) & 0x10)) {
3534 return false;
3535 }
3536
3537 if ((a->vd | a->vm) & a->q) {
3538 return false;
3539 }
3540
3541 if (a->size == 3) {
3542 return false;
3543 }
3544
3545 if (!vfp_access_check(s)) {
3546 return true;
3547 }
3548
3549 tmp = tcg_temp_new_i32();
3550 tmp2 = tcg_temp_new_i32();
3551 if (a->size == MO_32) {
3552 for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
3553 read_neon_element32(tmp, a->vm, pass, MO_32);
3554 read_neon_element32(tmp2, a->vd, pass + 1, MO_32);
3555 write_neon_element32(tmp2, a->vm, pass, MO_32);
3556 write_neon_element32(tmp, a->vd, pass + 1, MO_32);
3557 }
3558 } else {
3559 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
3560 read_neon_element32(tmp, a->vm, pass, MO_32);
3561 read_neon_element32(tmp2, a->vd, pass, MO_32);
3562 if (a->size == MO_8) {
3563 gen_neon_trn_u8(tmp, tmp2);
3564 } else {
3565 gen_neon_trn_u16(tmp, tmp2);
3566 }
3567 write_neon_element32(tmp2, a->vm, pass, MO_32);
3568 write_neon_element32(tmp, a->vd, pass, MO_32);
3569 }
3570 }
3571 return true;
3572 }
3573
trans_VSMMLA(DisasContext * s,arg_VSMMLA * a)3574 static bool trans_VSMMLA(DisasContext *s, arg_VSMMLA *a)
3575 {
3576 if (!dc_isar_feature(aa32_i8mm, s)) {
3577 return false;
3578 }
3579 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
3580 gen_helper_gvec_smmla_b);
3581 }
3582
trans_VUMMLA(DisasContext * s,arg_VUMMLA * a)3583 static bool trans_VUMMLA(DisasContext *s, arg_VUMMLA *a)
3584 {
3585 if (!dc_isar_feature(aa32_i8mm, s)) {
3586 return false;
3587 }
3588 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
3589 gen_helper_gvec_ummla_b);
3590 }
3591
trans_VUSMMLA(DisasContext * s,arg_VUSMMLA * a)3592 static bool trans_VUSMMLA(DisasContext *s, arg_VUSMMLA *a)
3593 {
3594 if (!dc_isar_feature(aa32_i8mm, s)) {
3595 return false;
3596 }
3597 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
3598 gen_helper_gvec_usmmla_b);
3599 }
3600
trans_VMMLA_b16(DisasContext * s,arg_VMMLA_b16 * a)3601 static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a)
3602 {
3603 if (!dc_isar_feature(aa32_bf16, s)) {
3604 return false;
3605 }
3606 return do_neon_ddda_env(s, 7, a->vd, a->vn, a->vm, 0,
3607 gen_helper_gvec_bfmmla);
3608 }
3609
trans_VFMA_b16(DisasContext * s,arg_VFMA_b16 * a)3610 static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a)
3611 {
3612 if (!dc_isar_feature(aa32_bf16, s)) {
3613 return false;
3614 }
3615 return do_neon_ddda_fpst(s, 7, a->vd, a->vn, a->vm, a->q, FPST_STD,
3616 gen_helper_gvec_bfmlal);
3617 }
3618
trans_VFMA_b16_scal(DisasContext * s,arg_VFMA_b16_scal * a)3619 static bool trans_VFMA_b16_scal(DisasContext *s, arg_VFMA_b16_scal *a)
3620 {
3621 if (!dc_isar_feature(aa32_bf16, s)) {
3622 return false;
3623 }
3624 return do_neon_ddda_fpst(s, 6, a->vd, a->vn, a->vm,
3625 (a->index << 1) | a->q, FPST_STD,
3626 gen_helper_gvec_bfmlal_idx);
3627 }
3628