1 /* 2 * ARM translation: AArch32 Neon instructions 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * Copyright (c) 2005-2007 CodeSourcery 6 * Copyright (c) 2007 OpenedHand, Ltd. 7 * Copyright (c) 2020 Linaro, Ltd. 8 * 9 * This library is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public 11 * License as published by the Free Software Foundation; either 12 * version 2.1 of the License, or (at your option) any later version. 13 * 14 * This library is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 21 */ 22 23 #include "qemu/osdep.h" 24 #include "translate.h" 25 #include "translate-a32.h" 26 27 /* Include the generated Neon decoder */ 28 #include "decode-neon-dp.c.inc" 29 #include "decode-neon-ls.c.inc" 30 #include "decode-neon-shared.c.inc" 31 32 static TCGv_ptr vfp_reg_ptr(bool dp, int reg) 33 { 34 TCGv_ptr ret = tcg_temp_new_ptr(); 35 tcg_gen_addi_ptr(ret, tcg_env, vfp_reg_offset(dp, reg)); 36 return ret; 37 } 38 39 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop) 40 { 41 long offset = neon_element_offset(reg, ele, mop & MO_SIZE); 42 43 switch (mop) { 44 case MO_UB: 45 tcg_gen_ld8u_i32(var, tcg_env, offset); 46 break; 47 case MO_UW: 48 tcg_gen_ld16u_i32(var, tcg_env, offset); 49 break; 50 case MO_UL: 51 tcg_gen_ld_i32(var, tcg_env, offset); 52 break; 53 default: 54 g_assert_not_reached(); 55 } 56 } 57 58 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop) 59 { 60 long offset = neon_element_offset(reg, ele, mop & MO_SIZE); 61 62 switch (mop) { 63 case MO_UB: 64 tcg_gen_ld8u_i64(var, tcg_env, offset); 65 break; 66 case MO_UW: 67 tcg_gen_ld16u_i64(var, tcg_env, offset); 68 break; 69 case MO_UL: 70 tcg_gen_ld32u_i64(var, tcg_env, offset); 71 break; 72 case MO_UQ: 73 tcg_gen_ld_i64(var, tcg_env, offset); 74 break; 75 default: 76 g_assert_not_reached(); 77 } 78 } 79 80 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var) 81 { 82 long offset = neon_element_offset(reg, ele, size); 83 84 switch (size) { 85 case MO_8: 86 tcg_gen_st8_i32(var, tcg_env, offset); 87 break; 88 case MO_16: 89 tcg_gen_st16_i32(var, tcg_env, offset); 90 break; 91 case MO_32: 92 tcg_gen_st_i32(var, tcg_env, offset); 93 break; 94 default: 95 g_assert_not_reached(); 96 } 97 } 98 99 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var) 100 { 101 long offset = neon_element_offset(reg, ele, size); 102 103 switch (size) { 104 case MO_8: 105 tcg_gen_st8_i64(var, tcg_env, offset); 106 break; 107 case MO_16: 108 tcg_gen_st16_i64(var, tcg_env, offset); 109 break; 110 case MO_32: 111 tcg_gen_st32_i64(var, tcg_env, offset); 112 break; 113 case MO_64: 114 tcg_gen_st_i64(var, tcg_env, offset); 115 break; 116 default: 117 g_assert_not_reached(); 118 } 119 } 120 121 static bool do_neon_ddda(DisasContext *s, int q, int vd, int vn, int vm, 122 int data, gen_helper_gvec_4 *fn_gvec) 123 { 124 /* UNDEF accesses to D16-D31 if they don't exist. */ 125 if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) { 126 return false; 127 } 128 129 /* 130 * UNDEF accesses to odd registers for each bit of Q. 131 * Q will be 0b111 for all Q-reg instructions, otherwise 132 * when we have mixed Q- and D-reg inputs. 133 */ 134 if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) { 135 return false; 136 } 137 138 if (!vfp_access_check(s)) { 139 return true; 140 } 141 142 int opr_sz = q ? 16 : 8; 143 tcg_gen_gvec_4_ool(vfp_reg_offset(1, vd), 144 vfp_reg_offset(1, vn), 145 vfp_reg_offset(1, vm), 146 vfp_reg_offset(1, vd), 147 opr_sz, opr_sz, data, fn_gvec); 148 return true; 149 } 150 151 static bool do_neon_ddda_fpst(DisasContext *s, int q, int vd, int vn, int vm, 152 int data, ARMFPStatusFlavour fp_flavour, 153 gen_helper_gvec_4_ptr *fn_gvec_ptr) 154 { 155 /* UNDEF accesses to D16-D31 if they don't exist. */ 156 if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) { 157 return false; 158 } 159 160 /* 161 * UNDEF accesses to odd registers for each bit of Q. 162 * Q will be 0b111 for all Q-reg instructions, otherwise 163 * when we have mixed Q- and D-reg inputs. 164 */ 165 if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) { 166 return false; 167 } 168 169 if (!vfp_access_check(s)) { 170 return true; 171 } 172 173 int opr_sz = q ? 16 : 8; 174 TCGv_ptr fpst = fpstatus_ptr(fp_flavour); 175 176 tcg_gen_gvec_4_ptr(vfp_reg_offset(1, vd), 177 vfp_reg_offset(1, vn), 178 vfp_reg_offset(1, vm), 179 vfp_reg_offset(1, vd), 180 fpst, opr_sz, opr_sz, data, fn_gvec_ptr); 181 return true; 182 } 183 184 static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a) 185 { 186 if (!dc_isar_feature(aa32_vcma, s)) { 187 return false; 188 } 189 if (a->size == MO_16) { 190 if (!dc_isar_feature(aa32_fp16_arith, s)) { 191 return false; 192 } 193 return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot, 194 FPST_STD_F16, gen_helper_gvec_fcmlah); 195 } 196 return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot, 197 FPST_STD, gen_helper_gvec_fcmlas); 198 } 199 200 static bool trans_VCADD(DisasContext *s, arg_VCADD *a) 201 { 202 int opr_sz; 203 TCGv_ptr fpst; 204 gen_helper_gvec_3_ptr *fn_gvec_ptr; 205 206 if (!dc_isar_feature(aa32_vcma, s) 207 || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) { 208 return false; 209 } 210 211 /* UNDEF accesses to D16-D31 if they don't exist. */ 212 if (!dc_isar_feature(aa32_simd_r32, s) && 213 ((a->vd | a->vn | a->vm) & 0x10)) { 214 return false; 215 } 216 217 if ((a->vn | a->vm | a->vd) & a->q) { 218 return false; 219 } 220 221 if (!vfp_access_check(s)) { 222 return true; 223 } 224 225 opr_sz = (1 + a->q) * 8; 226 fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD); 227 fn_gvec_ptr = (a->size == MO_16) ? 228 gen_helper_gvec_fcaddh : gen_helper_gvec_fcadds; 229 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), 230 vfp_reg_offset(1, a->vn), 231 vfp_reg_offset(1, a->vm), 232 fpst, opr_sz, opr_sz, a->rot, 233 fn_gvec_ptr); 234 return true; 235 } 236 237 static bool trans_VSDOT(DisasContext *s, arg_VSDOT *a) 238 { 239 if (!dc_isar_feature(aa32_dp, s)) { 240 return false; 241 } 242 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0, 243 gen_helper_gvec_sdot_b); 244 } 245 246 static bool trans_VUDOT(DisasContext *s, arg_VUDOT *a) 247 { 248 if (!dc_isar_feature(aa32_dp, s)) { 249 return false; 250 } 251 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0, 252 gen_helper_gvec_udot_b); 253 } 254 255 static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a) 256 { 257 if (!dc_isar_feature(aa32_i8mm, s)) { 258 return false; 259 } 260 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0, 261 gen_helper_gvec_usdot_b); 262 } 263 264 static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a) 265 { 266 if (!dc_isar_feature(aa32_bf16, s)) { 267 return false; 268 } 269 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0, 270 gen_helper_gvec_bfdot); 271 } 272 273 static bool trans_VFML(DisasContext *s, arg_VFML *a) 274 { 275 int opr_sz; 276 277 if (!dc_isar_feature(aa32_fhm, s)) { 278 return false; 279 } 280 281 /* UNDEF accesses to D16-D31 if they don't exist. */ 282 if (!dc_isar_feature(aa32_simd_r32, s) && 283 (a->vd & 0x10)) { 284 return false; 285 } 286 287 if (a->vd & a->q) { 288 return false; 289 } 290 291 if (!vfp_access_check(s)) { 292 return true; 293 } 294 295 opr_sz = (1 + a->q) * 8; 296 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), 297 vfp_reg_offset(a->q, a->vn), 298 vfp_reg_offset(a->q, a->vm), 299 tcg_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */ 300 gen_helper_gvec_fmlal_a32); 301 return true; 302 } 303 304 static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a) 305 { 306 int data = (a->index << 2) | a->rot; 307 308 if (!dc_isar_feature(aa32_vcma, s)) { 309 return false; 310 } 311 if (a->size == MO_16) { 312 if (!dc_isar_feature(aa32_fp16_arith, s)) { 313 return false; 314 } 315 return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data, 316 FPST_STD_F16, gen_helper_gvec_fcmlah_idx); 317 } 318 return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data, 319 FPST_STD, gen_helper_gvec_fcmlas_idx); 320 } 321 322 static bool trans_VSDOT_scalar(DisasContext *s, arg_VSDOT_scalar *a) 323 { 324 if (!dc_isar_feature(aa32_dp, s)) { 325 return false; 326 } 327 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, 328 gen_helper_gvec_sdot_idx_b); 329 } 330 331 static bool trans_VUDOT_scalar(DisasContext *s, arg_VUDOT_scalar *a) 332 { 333 if (!dc_isar_feature(aa32_dp, s)) { 334 return false; 335 } 336 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, 337 gen_helper_gvec_udot_idx_b); 338 } 339 340 static bool trans_VUSDOT_scalar(DisasContext *s, arg_VUSDOT_scalar *a) 341 { 342 if (!dc_isar_feature(aa32_i8mm, s)) { 343 return false; 344 } 345 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, 346 gen_helper_gvec_usdot_idx_b); 347 } 348 349 static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a) 350 { 351 if (!dc_isar_feature(aa32_i8mm, s)) { 352 return false; 353 } 354 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, 355 gen_helper_gvec_sudot_idx_b); 356 } 357 358 static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a) 359 { 360 if (!dc_isar_feature(aa32_bf16, s)) { 361 return false; 362 } 363 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, 364 gen_helper_gvec_bfdot_idx); 365 } 366 367 static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a) 368 { 369 int opr_sz; 370 371 if (!dc_isar_feature(aa32_fhm, s)) { 372 return false; 373 } 374 375 /* UNDEF accesses to D16-D31 if they don't exist. */ 376 if (!dc_isar_feature(aa32_simd_r32, s) && 377 ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) { 378 return false; 379 } 380 381 if (a->vd & a->q) { 382 return false; 383 } 384 385 if (!vfp_access_check(s)) { 386 return true; 387 } 388 389 opr_sz = (1 + a->q) * 8; 390 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), 391 vfp_reg_offset(a->q, a->vn), 392 vfp_reg_offset(a->q, a->rm), 393 tcg_env, opr_sz, opr_sz, 394 (a->index << 2) | a->s, /* is_2 == 0 */ 395 gen_helper_gvec_fmlal_idx_a32); 396 return true; 397 } 398 399 static struct { 400 int nregs; 401 int interleave; 402 int spacing; 403 } const neon_ls_element_type[11] = { 404 {1, 4, 1}, 405 {1, 4, 2}, 406 {4, 1, 1}, 407 {2, 2, 2}, 408 {1, 3, 1}, 409 {1, 3, 2}, 410 {3, 1, 1}, 411 {1, 1, 1}, 412 {1, 2, 1}, 413 {1, 2, 2}, 414 {2, 1, 1} 415 }; 416 417 static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn, 418 int stride) 419 { 420 if (rm != 15) { 421 TCGv_i32 base; 422 423 base = load_reg(s, rn); 424 if (rm == 13) { 425 tcg_gen_addi_i32(base, base, stride); 426 } else { 427 TCGv_i32 index; 428 index = load_reg(s, rm); 429 tcg_gen_add_i32(base, base, index); 430 } 431 store_reg(s, rn, base); 432 } 433 } 434 435 static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a) 436 { 437 /* Neon load/store multiple structures */ 438 int nregs, interleave, spacing, reg, n; 439 MemOp mop, align, endian; 440 int mmu_idx = get_mem_index(s); 441 int size = a->size; 442 TCGv_i64 tmp64; 443 TCGv_i32 addr; 444 445 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 446 return false; 447 } 448 449 /* UNDEF accesses to D16-D31 if they don't exist */ 450 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 451 return false; 452 } 453 if (a->itype > 10) { 454 return false; 455 } 456 /* Catch UNDEF cases for bad values of align field */ 457 switch (a->itype & 0xc) { 458 case 4: 459 if (a->align >= 2) { 460 return false; 461 } 462 break; 463 case 8: 464 if (a->align == 3) { 465 return false; 466 } 467 break; 468 default: 469 break; 470 } 471 nregs = neon_ls_element_type[a->itype].nregs; 472 interleave = neon_ls_element_type[a->itype].interleave; 473 spacing = neon_ls_element_type[a->itype].spacing; 474 if (size == 3 && (interleave | spacing) != 1) { 475 return false; 476 } 477 478 if (!vfp_access_check(s)) { 479 return true; 480 } 481 482 /* For our purposes, bytes are always little-endian. */ 483 endian = s->be_data; 484 if (size == 0) { 485 endian = MO_LE; 486 } 487 488 /* Enforce alignment requested by the instruction */ 489 if (a->align) { 490 align = pow2_align(a->align + 2); /* 4 ** a->align */ 491 } else { 492 align = s->align_mem ? MO_ALIGN : 0; 493 } 494 495 /* 496 * Consecutive little-endian elements from a single register 497 * can be promoted to a larger little-endian operation. 498 */ 499 if (interleave == 1 && endian == MO_LE) { 500 /* Retain any natural alignment. */ 501 if (align == MO_ALIGN) { 502 align = pow2_align(size); 503 } 504 size = 3; 505 } 506 507 tmp64 = tcg_temp_new_i64(); 508 addr = tcg_temp_new_i32(); 509 load_reg_var(s, addr, a->rn); 510 511 mop = endian | size | align; 512 for (reg = 0; reg < nregs; reg++) { 513 for (n = 0; n < 8 >> size; n++) { 514 int xs; 515 for (xs = 0; xs < interleave; xs++) { 516 int tt = a->vd + reg + spacing * xs; 517 518 if (a->l) { 519 gen_aa32_ld_internal_i64(s, tmp64, addr, mmu_idx, mop); 520 neon_store_element64(tt, n, size, tmp64); 521 } else { 522 neon_load_element64(tmp64, tt, n, size); 523 gen_aa32_st_internal_i64(s, tmp64, addr, mmu_idx, mop); 524 } 525 tcg_gen_addi_i32(addr, addr, 1 << size); 526 527 /* Subsequent memory operations inherit alignment */ 528 mop &= ~MO_AMASK; 529 } 530 } 531 } 532 533 gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8); 534 return true; 535 } 536 537 static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) 538 { 539 /* Neon load single structure to all lanes */ 540 int reg, stride, vec_size; 541 int vd = a->vd; 542 int size = a->size; 543 int nregs = a->n + 1; 544 TCGv_i32 addr, tmp; 545 MemOp mop, align; 546 547 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 548 return false; 549 } 550 551 /* UNDEF accesses to D16-D31 if they don't exist */ 552 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 553 return false; 554 } 555 556 align = 0; 557 if (size == 3) { 558 if (nregs != 4 || a->a == 0) { 559 return false; 560 } 561 /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */ 562 size = MO_32; 563 align = MO_ALIGN_16; 564 } else if (a->a) { 565 switch (nregs) { 566 case 1: 567 if (size == 0) { 568 return false; 569 } 570 align = MO_ALIGN; 571 break; 572 case 2: 573 align = pow2_align(size + 1); 574 break; 575 case 3: 576 return false; 577 case 4: 578 if (size == 2) { 579 align = pow2_align(3); 580 } else { 581 align = pow2_align(size + 2); 582 } 583 break; 584 default: 585 g_assert_not_reached(); 586 } 587 } 588 589 if (!vfp_access_check(s)) { 590 return true; 591 } 592 593 /* 594 * VLD1 to all lanes: T bit indicates how many Dregs to write. 595 * VLD2/3/4 to all lanes: T bit indicates register stride. 596 */ 597 stride = a->t ? 2 : 1; 598 vec_size = nregs == 1 ? stride * 8 : 8; 599 mop = size | align; 600 tmp = tcg_temp_new_i32(); 601 addr = tcg_temp_new_i32(); 602 load_reg_var(s, addr, a->rn); 603 for (reg = 0; reg < nregs; reg++) { 604 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop); 605 if ((vd & 1) && vec_size == 16) { 606 /* 607 * We cannot write 16 bytes at once because the 608 * destination is unaligned. 609 */ 610 tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd), 611 8, 8, tmp); 612 tcg_gen_gvec_mov(0, neon_full_reg_offset(vd + 1), 613 neon_full_reg_offset(vd), 8, 8); 614 } else { 615 tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd), 616 vec_size, vec_size, tmp); 617 } 618 tcg_gen_addi_i32(addr, addr, 1 << size); 619 vd += stride; 620 621 /* Subsequent memory operations inherit alignment */ 622 mop &= ~MO_AMASK; 623 } 624 625 gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs); 626 627 return true; 628 } 629 630 static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a) 631 { 632 /* Neon load/store single structure to one lane */ 633 int reg; 634 int nregs = a->n + 1; 635 int vd = a->vd; 636 TCGv_i32 addr, tmp; 637 MemOp mop; 638 639 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 640 return false; 641 } 642 643 /* UNDEF accesses to D16-D31 if they don't exist */ 644 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 645 return false; 646 } 647 648 /* Catch the UNDEF cases. This is unavoidably a bit messy. */ 649 switch (nregs) { 650 case 1: 651 if (a->stride != 1) { 652 return false; 653 } 654 if (((a->align & (1 << a->size)) != 0) || 655 (a->size == 2 && (a->align == 1 || a->align == 2))) { 656 return false; 657 } 658 break; 659 case 2: 660 if (a->size == 2 && (a->align & 2) != 0) { 661 return false; 662 } 663 break; 664 case 3: 665 if (a->align != 0) { 666 return false; 667 } 668 break; 669 case 4: 670 if (a->size == 2 && a->align == 3) { 671 return false; 672 } 673 break; 674 default: 675 g_assert_not_reached(); 676 } 677 if ((vd + a->stride * (nregs - 1)) > 31) { 678 /* 679 * Attempts to write off the end of the register file are 680 * UNPREDICTABLE; we choose to UNDEF because otherwise we would 681 * access off the end of the array that holds the register data. 682 */ 683 return false; 684 } 685 686 if (!vfp_access_check(s)) { 687 return true; 688 } 689 690 /* Pick up SCTLR settings */ 691 mop = finalize_memop(s, a->size); 692 693 if (a->align) { 694 MemOp align_op; 695 696 switch (nregs) { 697 case 1: 698 /* For VLD1, use natural alignment. */ 699 align_op = MO_ALIGN; 700 break; 701 case 2: 702 /* For VLD2, use double alignment. */ 703 align_op = pow2_align(a->size + 1); 704 break; 705 case 4: 706 if (a->size == MO_32) { 707 /* 708 * For VLD4.32, align = 1 is double alignment, align = 2 is 709 * quad alignment; align = 3 is rejected above. 710 */ 711 align_op = pow2_align(a->size + a->align); 712 } else { 713 /* For VLD4.8 and VLD.16, we want quad alignment. */ 714 align_op = pow2_align(a->size + 2); 715 } 716 break; 717 default: 718 /* For VLD3, the alignment field is zero and rejected above. */ 719 g_assert_not_reached(); 720 } 721 722 mop = (mop & ~MO_AMASK) | align_op; 723 } 724 725 tmp = tcg_temp_new_i32(); 726 addr = tcg_temp_new_i32(); 727 load_reg_var(s, addr, a->rn); 728 729 for (reg = 0; reg < nregs; reg++) { 730 if (a->l) { 731 gen_aa32_ld_internal_i32(s, tmp, addr, get_mem_index(s), mop); 732 neon_store_element(vd, a->reg_idx, a->size, tmp); 733 } else { /* Store */ 734 neon_load_element(tmp, vd, a->reg_idx, a->size); 735 gen_aa32_st_internal_i32(s, tmp, addr, get_mem_index(s), mop); 736 } 737 vd += a->stride; 738 tcg_gen_addi_i32(addr, addr, 1 << a->size); 739 740 /* Subsequent memory operations inherit alignment */ 741 mop &= ~MO_AMASK; 742 } 743 744 gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs); 745 746 return true; 747 } 748 749 static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn) 750 { 751 int vec_size = a->q ? 16 : 8; 752 int rd_ofs = neon_full_reg_offset(a->vd); 753 int rn_ofs = neon_full_reg_offset(a->vn); 754 int rm_ofs = neon_full_reg_offset(a->vm); 755 756 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 757 return false; 758 } 759 760 /* UNDEF accesses to D16-D31 if they don't exist. */ 761 if (!dc_isar_feature(aa32_simd_r32, s) && 762 ((a->vd | a->vn | a->vm) & 0x10)) { 763 return false; 764 } 765 766 if ((a->vn | a->vm | a->vd) & a->q) { 767 return false; 768 } 769 770 if (!vfp_access_check(s)) { 771 return true; 772 } 773 774 fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); 775 return true; 776 } 777 778 #define DO_3SAME(INSN, FUNC) \ 779 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 780 { \ 781 return do_3same(s, a, FUNC); \ 782 } 783 784 DO_3SAME(VADD, tcg_gen_gvec_add) 785 DO_3SAME(VSUB, tcg_gen_gvec_sub) 786 DO_3SAME(VAND, tcg_gen_gvec_and) 787 DO_3SAME(VBIC, tcg_gen_gvec_andc) 788 DO_3SAME(VORR, tcg_gen_gvec_or) 789 DO_3SAME(VORN, tcg_gen_gvec_orc) 790 DO_3SAME(VEOR, tcg_gen_gvec_xor) 791 DO_3SAME(VSHL_S, gen_gvec_sshl) 792 DO_3SAME(VSHL_U, gen_gvec_ushl) 793 DO_3SAME(VQADD_S, gen_gvec_sqadd_qc) 794 DO_3SAME(VQADD_U, gen_gvec_uqadd_qc) 795 DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc) 796 DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc) 797 798 /* These insns are all gvec_bitsel but with the inputs in various orders. */ 799 #define DO_3SAME_BITSEL(INSN, O1, O2, O3) \ 800 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 801 uint32_t rn_ofs, uint32_t rm_ofs, \ 802 uint32_t oprsz, uint32_t maxsz) \ 803 { \ 804 tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz); \ 805 } \ 806 DO_3SAME(INSN, gen_##INSN##_3s) 807 808 DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs) 809 DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs) 810 DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs) 811 812 #define DO_3SAME_NO_SZ_3(INSN, FUNC) \ 813 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 814 { \ 815 if (a->size == 3) { \ 816 return false; \ 817 } \ 818 return do_3same(s, a, FUNC); \ 819 } 820 821 DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax) 822 DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax) 823 DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin) 824 DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin) 825 DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul) 826 DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla) 827 DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls) 828 DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst) 829 DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd) 830 DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba) 831 DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd) 832 DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba) 833 DO_3SAME_NO_SZ_3(VPADD, gen_gvec_addp) 834 DO_3SAME_NO_SZ_3(VPMAX_S, gen_gvec_smaxp) 835 DO_3SAME_NO_SZ_3(VPMIN_S, gen_gvec_sminp) 836 DO_3SAME_NO_SZ_3(VPMAX_U, gen_gvec_umaxp) 837 DO_3SAME_NO_SZ_3(VPMIN_U, gen_gvec_uminp) 838 839 #define DO_3SAME_CMP(INSN, COND) \ 840 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 841 uint32_t rn_ofs, uint32_t rm_ofs, \ 842 uint32_t oprsz, uint32_t maxsz) \ 843 { \ 844 tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \ 845 } \ 846 DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s) 847 848 DO_3SAME_CMP(VCGT_S, TCG_COND_GT) 849 DO_3SAME_CMP(VCGT_U, TCG_COND_GTU) 850 DO_3SAME_CMP(VCGE_S, TCG_COND_GE) 851 DO_3SAME_CMP(VCGE_U, TCG_COND_GEU) 852 DO_3SAME_CMP(VCEQ, TCG_COND_EQ) 853 854 #define WRAP_OOL_FN(WRAPNAME, FUNC) \ 855 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, \ 856 uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) \ 857 { \ 858 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \ 859 } 860 861 WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b) 862 863 static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a) 864 { 865 if (a->size != 0) { 866 return false; 867 } 868 return do_3same(s, a, gen_VMUL_p_3s); 869 } 870 871 #define DO_VQRDMLAH(INSN, FUNC) \ 872 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 873 { \ 874 if (!dc_isar_feature(aa32_rdm, s)) { \ 875 return false; \ 876 } \ 877 if (a->size != 1 && a->size != 2) { \ 878 return false; \ 879 } \ 880 return do_3same(s, a, FUNC); \ 881 } 882 883 DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc) 884 DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc) 885 886 #define DO_SHA1(NAME, FUNC) \ 887 WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \ 888 static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \ 889 { \ 890 if (!dc_isar_feature(aa32_sha1, s)) { \ 891 return false; \ 892 } \ 893 return do_3same(s, a, gen_##NAME##_3s); \ 894 } 895 896 DO_SHA1(SHA1C, gen_helper_crypto_sha1c) 897 DO_SHA1(SHA1P, gen_helper_crypto_sha1p) 898 DO_SHA1(SHA1M, gen_helper_crypto_sha1m) 899 DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0) 900 901 #define DO_SHA2(NAME, FUNC) \ 902 WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \ 903 static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \ 904 { \ 905 if (!dc_isar_feature(aa32_sha2, s)) { \ 906 return false; \ 907 } \ 908 return do_3same(s, a, gen_##NAME##_3s); \ 909 } 910 911 DO_SHA2(SHA256H, gen_helper_crypto_sha256h) 912 DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2) 913 DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1) 914 915 #define DO_3SAME_64(INSN, FUNC) \ 916 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 917 uint32_t rn_ofs, uint32_t rm_ofs, \ 918 uint32_t oprsz, uint32_t maxsz) \ 919 { \ 920 static const GVecGen3 op = { .fni8 = FUNC }; \ 921 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op); \ 922 } \ 923 DO_3SAME(INSN, gen_##INSN##_3s) 924 925 #define DO_3SAME_64_ENV(INSN, FUNC) \ 926 static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \ 927 { \ 928 FUNC(d, tcg_env, n, m); \ 929 } \ 930 DO_3SAME_64(INSN, gen_##INSN##_elt) 931 932 DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64) 933 DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64) 934 DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64) 935 DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64) 936 DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64) 937 DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64) 938 939 #define DO_3SAME_32(INSN, FUNC) \ 940 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 941 uint32_t rn_ofs, uint32_t rm_ofs, \ 942 uint32_t oprsz, uint32_t maxsz) \ 943 { \ 944 static const GVecGen3 ops[4] = { \ 945 { .fni4 = gen_helper_neon_##FUNC##8 }, \ 946 { .fni4 = gen_helper_neon_##FUNC##16 }, \ 947 { .fni4 = gen_helper_neon_##FUNC##32 }, \ 948 { 0 }, \ 949 }; \ 950 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \ 951 } \ 952 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 953 { \ 954 if (a->size > 2) { \ 955 return false; \ 956 } \ 957 return do_3same(s, a, gen_##INSN##_3s); \ 958 } 959 960 /* 961 * Some helper functions need to be passed the tcg_env. In order 962 * to use those with the gvec APIs like tcg_gen_gvec_3() we need 963 * to create wrapper functions whose prototype is a NeonGenTwoOpFn() 964 * and which call a NeonGenTwoOpEnvFn(). 965 */ 966 #define WRAP_ENV_FN(WRAPNAME, FUNC) \ 967 static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m) \ 968 { \ 969 FUNC(d, tcg_env, n, m); \ 970 } 971 972 #define DO_3SAME_32_ENV(INSN, FUNC) \ 973 WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8); \ 974 WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16); \ 975 WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32); \ 976 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 977 uint32_t rn_ofs, uint32_t rm_ofs, \ 978 uint32_t oprsz, uint32_t maxsz) \ 979 { \ 980 static const GVecGen3 ops[4] = { \ 981 { .fni4 = gen_##INSN##_tramp8 }, \ 982 { .fni4 = gen_##INSN##_tramp16 }, \ 983 { .fni4 = gen_##INSN##_tramp32 }, \ 984 { 0 }, \ 985 }; \ 986 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \ 987 } \ 988 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 989 { \ 990 if (a->size > 2) { \ 991 return false; \ 992 } \ 993 return do_3same(s, a, gen_##INSN##_3s); \ 994 } 995 996 DO_3SAME_32(VHADD_S, hadd_s) 997 DO_3SAME_32(VHADD_U, hadd_u) 998 DO_3SAME_32(VHSUB_S, hsub_s) 999 DO_3SAME_32(VHSUB_U, hsub_u) 1000 DO_3SAME_32(VRHADD_S, rhadd_s) 1001 DO_3SAME_32(VRHADD_U, rhadd_u) 1002 DO_3SAME_32(VRSHL_S, rshl_s) 1003 DO_3SAME_32(VRSHL_U, rshl_u) 1004 1005 DO_3SAME_32_ENV(VQSHL_S, qshl_s) 1006 DO_3SAME_32_ENV(VQSHL_U, qshl_u) 1007 DO_3SAME_32_ENV(VQRSHL_S, qrshl_s) 1008 DO_3SAME_32_ENV(VQRSHL_U, qrshl_u) 1009 1010 #define DO_3SAME_VQDMULH(INSN, FUNC) \ 1011 WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \ 1012 WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32); \ 1013 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 1014 uint32_t rn_ofs, uint32_t rm_ofs, \ 1015 uint32_t oprsz, uint32_t maxsz) \ 1016 { \ 1017 static const GVecGen3 ops[2] = { \ 1018 { .fni4 = gen_##INSN##_tramp16 }, \ 1019 { .fni4 = gen_##INSN##_tramp32 }, \ 1020 }; \ 1021 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \ 1022 } \ 1023 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 1024 { \ 1025 if (a->size != 1 && a->size != 2) { \ 1026 return false; \ 1027 } \ 1028 return do_3same(s, a, gen_##INSN##_3s); \ 1029 } 1030 1031 DO_3SAME_VQDMULH(VQDMULH, qdmulh) 1032 DO_3SAME_VQDMULH(VQRDMULH, qrdmulh) 1033 1034 #define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC) \ 1035 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \ 1036 uint32_t rn_ofs, uint32_t rm_ofs, \ 1037 uint32_t oprsz, uint32_t maxsz) \ 1038 { \ 1039 TCGv_ptr fpst = fpstatus_ptr(FPST); \ 1040 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst, \ 1041 oprsz, maxsz, 0, FUNC); \ 1042 } 1043 1044 #define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC) \ 1045 WRAP_FP_GVEC(gen_##INSN##_fp32_3s, FPST_STD, SFUNC) \ 1046 WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC) \ 1047 static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \ 1048 { \ 1049 if (a->size == MO_16) { \ 1050 if (!dc_isar_feature(aa32_fp16_arith, s)) { \ 1051 return false; \ 1052 } \ 1053 return do_3same(s, a, gen_##INSN##_fp16_3s); \ 1054 } \ 1055 return do_3same(s, a, gen_##INSN##_fp32_3s); \ 1056 } 1057 1058 1059 DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_h) 1060 DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_h) 1061 DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_h) 1062 DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_h) 1063 DO_3S_FP_GVEC(VCEQ, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_h) 1064 DO_3S_FP_GVEC(VCGE, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_h) 1065 DO_3S_FP_GVEC(VCGT, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_h) 1066 DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h) 1067 DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h) 1068 DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h) 1069 DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h) 1070 DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h) 1071 DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h) 1072 DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h) 1073 DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h) 1074 DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h) 1075 DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h) 1076 DO_3S_FP_GVEC(VPADD, gen_helper_gvec_faddp_s, gen_helper_gvec_faddp_h) 1077 DO_3S_FP_GVEC(VPMAX, gen_helper_gvec_fmaxp_s, gen_helper_gvec_fmaxp_h) 1078 DO_3S_FP_GVEC(VPMIN, gen_helper_gvec_fminp_s, gen_helper_gvec_fminp_h) 1079 1080 WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s) 1081 WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h) 1082 WRAP_FP_GVEC(gen_VMINNM_fp32_3s, FPST_STD, gen_helper_gvec_fminnum_s) 1083 WRAP_FP_GVEC(gen_VMINNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fminnum_h) 1084 1085 static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a) 1086 { 1087 if (!arm_dc_feature(s, ARM_FEATURE_V8)) { 1088 return false; 1089 } 1090 1091 if (a->size == MO_16) { 1092 if (!dc_isar_feature(aa32_fp16_arith, s)) { 1093 return false; 1094 } 1095 return do_3same(s, a, gen_VMAXNM_fp16_3s); 1096 } 1097 return do_3same(s, a, gen_VMAXNM_fp32_3s); 1098 } 1099 1100 static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a) 1101 { 1102 if (!arm_dc_feature(s, ARM_FEATURE_V8)) { 1103 return false; 1104 } 1105 1106 if (a->size == MO_16) { 1107 if (!dc_isar_feature(aa32_fp16_arith, s)) { 1108 return false; 1109 } 1110 return do_3same(s, a, gen_VMINNM_fp16_3s); 1111 } 1112 return do_3same(s, a, gen_VMINNM_fp32_3s); 1113 } 1114 1115 static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn) 1116 { 1117 /* Handle a 2-reg-shift insn which can be vectorized. */ 1118 int vec_size = a->q ? 16 : 8; 1119 int rd_ofs = neon_full_reg_offset(a->vd); 1120 int rm_ofs = neon_full_reg_offset(a->vm); 1121 1122 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1123 return false; 1124 } 1125 1126 /* UNDEF accesses to D16-D31 if they don't exist. */ 1127 if (!dc_isar_feature(aa32_simd_r32, s) && 1128 ((a->vd | a->vm) & 0x10)) { 1129 return false; 1130 } 1131 1132 if ((a->vm | a->vd) & a->q) { 1133 return false; 1134 } 1135 1136 if (!vfp_access_check(s)) { 1137 return true; 1138 } 1139 1140 fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size); 1141 return true; 1142 } 1143 1144 #define DO_2SH(INSN, FUNC) \ 1145 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1146 { \ 1147 return do_vector_2sh(s, a, FUNC); \ 1148 } \ 1149 1150 DO_2SH(VSHL, tcg_gen_gvec_shli) 1151 DO_2SH(VSLI, gen_gvec_sli) 1152 DO_2SH(VSRI, gen_gvec_sri) 1153 DO_2SH(VSRA_S, gen_gvec_ssra) 1154 DO_2SH(VSRA_U, gen_gvec_usra) 1155 DO_2SH(VRSHR_S, gen_gvec_srshr) 1156 DO_2SH(VRSHR_U, gen_gvec_urshr) 1157 DO_2SH(VRSRA_S, gen_gvec_srsra) 1158 DO_2SH(VRSRA_U, gen_gvec_ursra) 1159 1160 static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a) 1161 { 1162 /* Signed shift out of range results in all-sign-bits */ 1163 a->shift = MIN(a->shift, (8 << a->size) - 1); 1164 return do_vector_2sh(s, a, tcg_gen_gvec_sari); 1165 } 1166 1167 static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 1168 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1169 { 1170 tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0); 1171 } 1172 1173 static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a) 1174 { 1175 /* Shift out of range is architecturally valid and results in zero. */ 1176 if (a->shift >= (8 << a->size)) { 1177 return do_vector_2sh(s, a, gen_zero_rd_2sh); 1178 } else { 1179 return do_vector_2sh(s, a, tcg_gen_gvec_shri); 1180 } 1181 } 1182 1183 static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a, 1184 NeonGenTwo64OpEnvFn *fn) 1185 { 1186 /* 1187 * 2-reg-and-shift operations, size == 3 case, where the 1188 * function needs to be passed tcg_env. 1189 */ 1190 TCGv_i64 constimm; 1191 int pass; 1192 1193 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1194 return false; 1195 } 1196 1197 /* UNDEF accesses to D16-D31 if they don't exist. */ 1198 if (!dc_isar_feature(aa32_simd_r32, s) && 1199 ((a->vd | a->vm) & 0x10)) { 1200 return false; 1201 } 1202 1203 if ((a->vm | a->vd) & a->q) { 1204 return false; 1205 } 1206 1207 if (!vfp_access_check(s)) { 1208 return true; 1209 } 1210 1211 /* 1212 * To avoid excessive duplication of ops we implement shift 1213 * by immediate using the variable shift operations. 1214 */ 1215 constimm = tcg_constant_i64(dup_const(a->size, a->shift)); 1216 1217 for (pass = 0; pass < a->q + 1; pass++) { 1218 TCGv_i64 tmp = tcg_temp_new_i64(); 1219 1220 read_neon_element64(tmp, a->vm, pass, MO_64); 1221 fn(tmp, tcg_env, tmp, constimm); 1222 write_neon_element64(tmp, a->vd, pass, MO_64); 1223 } 1224 return true; 1225 } 1226 1227 static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a, 1228 NeonGenTwoOpEnvFn *fn) 1229 { 1230 /* 1231 * 2-reg-and-shift operations, size < 3 case, where the 1232 * helper needs to be passed tcg_env. 1233 */ 1234 TCGv_i32 constimm, tmp; 1235 int pass; 1236 1237 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1238 return false; 1239 } 1240 1241 /* UNDEF accesses to D16-D31 if they don't exist. */ 1242 if (!dc_isar_feature(aa32_simd_r32, s) && 1243 ((a->vd | a->vm) & 0x10)) { 1244 return false; 1245 } 1246 1247 if ((a->vm | a->vd) & a->q) { 1248 return false; 1249 } 1250 1251 if (!vfp_access_check(s)) { 1252 return true; 1253 } 1254 1255 /* 1256 * To avoid excessive duplication of ops we implement shift 1257 * by immediate using the variable shift operations. 1258 */ 1259 constimm = tcg_constant_i32(dup_const(a->size, a->shift)); 1260 tmp = tcg_temp_new_i32(); 1261 1262 for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 1263 read_neon_element32(tmp, a->vm, pass, MO_32); 1264 fn(tmp, tcg_env, tmp, constimm); 1265 write_neon_element32(tmp, a->vd, pass, MO_32); 1266 } 1267 return true; 1268 } 1269 1270 #define DO_2SHIFT_ENV(INSN, FUNC) \ 1271 static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \ 1272 { \ 1273 return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64); \ 1274 } \ 1275 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1276 { \ 1277 static NeonGenTwoOpEnvFn * const fns[] = { \ 1278 gen_helper_neon_##FUNC##8, \ 1279 gen_helper_neon_##FUNC##16, \ 1280 gen_helper_neon_##FUNC##32, \ 1281 }; \ 1282 assert(a->size < ARRAY_SIZE(fns)); \ 1283 return do_2shift_env_32(s, a, fns[a->size]); \ 1284 } 1285 1286 DO_2SHIFT_ENV(VQSHLU, qshlu_s) 1287 DO_2SHIFT_ENV(VQSHL_U, qshl_u) 1288 DO_2SHIFT_ENV(VQSHL_S, qshl_s) 1289 1290 static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a, 1291 NeonGenTwo64OpFn *shiftfn, 1292 NeonGenNarrowEnvFn *narrowfn) 1293 { 1294 /* 2-reg-and-shift narrowing-shift operations, size == 3 case */ 1295 TCGv_i64 constimm, rm1, rm2; 1296 TCGv_i32 rd; 1297 1298 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1299 return false; 1300 } 1301 1302 /* UNDEF accesses to D16-D31 if they don't exist. */ 1303 if (!dc_isar_feature(aa32_simd_r32, s) && 1304 ((a->vd | a->vm) & 0x10)) { 1305 return false; 1306 } 1307 1308 if (a->vm & 1) { 1309 return false; 1310 } 1311 1312 if (!vfp_access_check(s)) { 1313 return true; 1314 } 1315 1316 /* 1317 * This is always a right shift, and the shiftfn is always a 1318 * left-shift helper, which thus needs the negated shift count. 1319 */ 1320 constimm = tcg_constant_i64(-a->shift); 1321 rm1 = tcg_temp_new_i64(); 1322 rm2 = tcg_temp_new_i64(); 1323 rd = tcg_temp_new_i32(); 1324 1325 /* Load both inputs first to avoid potential overwrite if rm == rd */ 1326 read_neon_element64(rm1, a->vm, 0, MO_64); 1327 read_neon_element64(rm2, a->vm, 1, MO_64); 1328 1329 shiftfn(rm1, rm1, constimm); 1330 narrowfn(rd, tcg_env, rm1); 1331 write_neon_element32(rd, a->vd, 0, MO_32); 1332 1333 shiftfn(rm2, rm2, constimm); 1334 narrowfn(rd, tcg_env, rm2); 1335 write_neon_element32(rd, a->vd, 1, MO_32); 1336 1337 return true; 1338 } 1339 1340 static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a, 1341 NeonGenTwoOpFn *shiftfn, 1342 NeonGenNarrowEnvFn *narrowfn) 1343 { 1344 /* 2-reg-and-shift narrowing-shift operations, size < 3 case */ 1345 TCGv_i32 constimm, rm1, rm2, rm3, rm4; 1346 TCGv_i64 rtmp; 1347 uint32_t imm; 1348 1349 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1350 return false; 1351 } 1352 1353 /* UNDEF accesses to D16-D31 if they don't exist. */ 1354 if (!dc_isar_feature(aa32_simd_r32, s) && 1355 ((a->vd | a->vm) & 0x10)) { 1356 return false; 1357 } 1358 1359 if (a->vm & 1) { 1360 return false; 1361 } 1362 1363 if (!vfp_access_check(s)) { 1364 return true; 1365 } 1366 1367 /* 1368 * This is always a right shift, and the shiftfn is always a 1369 * left-shift helper, which thus needs the negated shift count 1370 * duplicated into each lane of the immediate value. 1371 */ 1372 if (a->size == 1) { 1373 imm = (uint16_t)(-a->shift); 1374 imm |= imm << 16; 1375 } else { 1376 /* size == 2 */ 1377 imm = -a->shift; 1378 } 1379 constimm = tcg_constant_i32(imm); 1380 1381 /* Load all inputs first to avoid potential overwrite */ 1382 rm1 = tcg_temp_new_i32(); 1383 rm2 = tcg_temp_new_i32(); 1384 rm3 = tcg_temp_new_i32(); 1385 rm4 = tcg_temp_new_i32(); 1386 read_neon_element32(rm1, a->vm, 0, MO_32); 1387 read_neon_element32(rm2, a->vm, 1, MO_32); 1388 read_neon_element32(rm3, a->vm, 2, MO_32); 1389 read_neon_element32(rm4, a->vm, 3, MO_32); 1390 rtmp = tcg_temp_new_i64(); 1391 1392 shiftfn(rm1, rm1, constimm); 1393 shiftfn(rm2, rm2, constimm); 1394 1395 tcg_gen_concat_i32_i64(rtmp, rm1, rm2); 1396 1397 narrowfn(rm1, tcg_env, rtmp); 1398 write_neon_element32(rm1, a->vd, 0, MO_32); 1399 1400 shiftfn(rm3, rm3, constimm); 1401 shiftfn(rm4, rm4, constimm); 1402 1403 tcg_gen_concat_i32_i64(rtmp, rm3, rm4); 1404 1405 narrowfn(rm3, tcg_env, rtmp); 1406 write_neon_element32(rm3, a->vd, 1, MO_32); 1407 return true; 1408 } 1409 1410 #define DO_2SN_64(INSN, FUNC, NARROWFUNC) \ 1411 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1412 { \ 1413 return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC); \ 1414 } 1415 #define DO_2SN_32(INSN, FUNC, NARROWFUNC) \ 1416 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1417 { \ 1418 return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC); \ 1419 } 1420 1421 static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) 1422 { 1423 tcg_gen_extrl_i64_i32(dest, src); 1424 } 1425 1426 static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) 1427 { 1428 gen_helper_neon_narrow_u16(dest, src); 1429 } 1430 1431 static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) 1432 { 1433 gen_helper_neon_narrow_u8(dest, src); 1434 } 1435 1436 DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32) 1437 DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16) 1438 DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8) 1439 1440 DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32) 1441 DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16) 1442 DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8) 1443 1444 DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32) 1445 DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16) 1446 DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8) 1447 1448 DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32) 1449 DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16) 1450 DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8) 1451 DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32) 1452 DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16) 1453 DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8) 1454 1455 DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32) 1456 DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16) 1457 DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8) 1458 1459 DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32) 1460 DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16) 1461 DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8) 1462 1463 DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32) 1464 DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16) 1465 DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8) 1466 1467 static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a, 1468 NeonGenWidenFn *widenfn, bool u) 1469 { 1470 TCGv_i64 tmp; 1471 TCGv_i32 rm0, rm1; 1472 uint64_t widen_mask = 0; 1473 1474 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1475 return false; 1476 } 1477 1478 /* UNDEF accesses to D16-D31 if they don't exist. */ 1479 if (!dc_isar_feature(aa32_simd_r32, s) && 1480 ((a->vd | a->vm) & 0x10)) { 1481 return false; 1482 } 1483 1484 if (a->vd & 1) { 1485 return false; 1486 } 1487 1488 if (!vfp_access_check(s)) { 1489 return true; 1490 } 1491 1492 /* 1493 * This is a widen-and-shift operation. The shift is always less 1494 * than the width of the source type, so after widening the input 1495 * vector we can simply shift the whole 64-bit widened register, 1496 * and then clear the potential overflow bits resulting from left 1497 * bits of the narrow input appearing as right bits of the left 1498 * neighbour narrow input. Calculate a mask of bits to clear. 1499 */ 1500 if ((a->shift != 0) && (a->size < 2 || u)) { 1501 int esize = 8 << a->size; 1502 widen_mask = MAKE_64BIT_MASK(0, esize); 1503 widen_mask >>= esize - a->shift; 1504 widen_mask = dup_const(a->size + 1, widen_mask); 1505 } 1506 1507 rm0 = tcg_temp_new_i32(); 1508 rm1 = tcg_temp_new_i32(); 1509 read_neon_element32(rm0, a->vm, 0, MO_32); 1510 read_neon_element32(rm1, a->vm, 1, MO_32); 1511 tmp = tcg_temp_new_i64(); 1512 1513 widenfn(tmp, rm0); 1514 if (a->shift != 0) { 1515 tcg_gen_shli_i64(tmp, tmp, a->shift); 1516 tcg_gen_andi_i64(tmp, tmp, ~widen_mask); 1517 } 1518 write_neon_element64(tmp, a->vd, 0, MO_64); 1519 1520 widenfn(tmp, rm1); 1521 if (a->shift != 0) { 1522 tcg_gen_shli_i64(tmp, tmp, a->shift); 1523 tcg_gen_andi_i64(tmp, tmp, ~widen_mask); 1524 } 1525 write_neon_element64(tmp, a->vd, 1, MO_64); 1526 return true; 1527 } 1528 1529 static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a) 1530 { 1531 static NeonGenWidenFn * const widenfn[] = { 1532 gen_helper_neon_widen_s8, 1533 gen_helper_neon_widen_s16, 1534 tcg_gen_ext_i32_i64, 1535 }; 1536 return do_vshll_2sh(s, a, widenfn[a->size], false); 1537 } 1538 1539 static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a) 1540 { 1541 static NeonGenWidenFn * const widenfn[] = { 1542 gen_helper_neon_widen_u8, 1543 gen_helper_neon_widen_u16, 1544 tcg_gen_extu_i32_i64, 1545 }; 1546 return do_vshll_2sh(s, a, widenfn[a->size], true); 1547 } 1548 1549 static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a, 1550 gen_helper_gvec_2_ptr *fn) 1551 { 1552 /* FP operations in 2-reg-and-shift group */ 1553 int vec_size = a->q ? 16 : 8; 1554 int rd_ofs = neon_full_reg_offset(a->vd); 1555 int rm_ofs = neon_full_reg_offset(a->vm); 1556 TCGv_ptr fpst; 1557 1558 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1559 return false; 1560 } 1561 1562 if (a->size == MO_16) { 1563 if (!dc_isar_feature(aa32_fp16_arith, s)) { 1564 return false; 1565 } 1566 } 1567 1568 /* UNDEF accesses to D16-D31 if they don't exist. */ 1569 if (!dc_isar_feature(aa32_simd_r32, s) && 1570 ((a->vd | a->vm) & 0x10)) { 1571 return false; 1572 } 1573 1574 if ((a->vm | a->vd) & a->q) { 1575 return false; 1576 } 1577 1578 if (!vfp_access_check(s)) { 1579 return true; 1580 } 1581 1582 fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD); 1583 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn); 1584 return true; 1585 } 1586 1587 #define DO_FP_2SH(INSN, FUNC) \ 1588 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1589 { \ 1590 return do_fp_2sh(s, a, FUNC); \ 1591 } 1592 1593 DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf) 1594 DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf) 1595 DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs) 1596 DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu) 1597 1598 DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh) 1599 DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh) 1600 DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs) 1601 DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu) 1602 1603 static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a, 1604 GVecGen2iFn *fn) 1605 { 1606 uint64_t imm; 1607 int reg_ofs, vec_size; 1608 1609 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1610 return false; 1611 } 1612 1613 /* UNDEF accesses to D16-D31 if they don't exist. */ 1614 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 1615 return false; 1616 } 1617 1618 if (a->vd & a->q) { 1619 return false; 1620 } 1621 1622 if (!vfp_access_check(s)) { 1623 return true; 1624 } 1625 1626 reg_ofs = neon_full_reg_offset(a->vd); 1627 vec_size = a->q ? 16 : 8; 1628 imm = asimd_imm_const(a->imm, a->cmode, a->op); 1629 1630 fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size); 1631 return true; 1632 } 1633 1634 static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs, 1635 int64_t c, uint32_t oprsz, uint32_t maxsz) 1636 { 1637 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c); 1638 } 1639 1640 static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a) 1641 { 1642 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ 1643 GVecGen2iFn *fn; 1644 1645 if ((a->cmode & 1) && a->cmode < 12) { 1646 /* for op=1, the imm will be inverted, so BIC becomes AND. */ 1647 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori; 1648 } else { 1649 /* There is one unallocated cmode/op combination in this space */ 1650 if (a->cmode == 15 && a->op == 1) { 1651 return false; 1652 } 1653 fn = gen_VMOV_1r; 1654 } 1655 return do_1reg_imm(s, a, fn); 1656 } 1657 1658 static bool do_prewiden_3d(DisasContext *s, arg_3diff *a, 1659 NeonGenWidenFn *widenfn, 1660 NeonGenTwo64OpFn *opfn, 1661 int src1_mop, int src2_mop) 1662 { 1663 /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */ 1664 TCGv_i64 rn0_64, rn1_64, rm_64; 1665 1666 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1667 return false; 1668 } 1669 1670 /* UNDEF accesses to D16-D31 if they don't exist. */ 1671 if (!dc_isar_feature(aa32_simd_r32, s) && 1672 ((a->vd | a->vn | a->vm) & 0x10)) { 1673 return false; 1674 } 1675 1676 if (!opfn) { 1677 /* size == 3 case, which is an entirely different insn group */ 1678 return false; 1679 } 1680 1681 if ((a->vd & 1) || (src1_mop == MO_UQ && (a->vn & 1))) { 1682 return false; 1683 } 1684 1685 if (!vfp_access_check(s)) { 1686 return true; 1687 } 1688 1689 rn0_64 = tcg_temp_new_i64(); 1690 rn1_64 = tcg_temp_new_i64(); 1691 rm_64 = tcg_temp_new_i64(); 1692 1693 if (src1_mop >= 0) { 1694 read_neon_element64(rn0_64, a->vn, 0, src1_mop); 1695 } else { 1696 TCGv_i32 tmp = tcg_temp_new_i32(); 1697 read_neon_element32(tmp, a->vn, 0, MO_32); 1698 widenfn(rn0_64, tmp); 1699 } 1700 if (src2_mop >= 0) { 1701 read_neon_element64(rm_64, a->vm, 0, src2_mop); 1702 } else { 1703 TCGv_i32 tmp = tcg_temp_new_i32(); 1704 read_neon_element32(tmp, a->vm, 0, MO_32); 1705 widenfn(rm_64, tmp); 1706 } 1707 1708 opfn(rn0_64, rn0_64, rm_64); 1709 1710 /* 1711 * Load second pass inputs before storing the first pass result, to 1712 * avoid incorrect results if a narrow input overlaps with the result. 1713 */ 1714 if (src1_mop >= 0) { 1715 read_neon_element64(rn1_64, a->vn, 1, src1_mop); 1716 } else { 1717 TCGv_i32 tmp = tcg_temp_new_i32(); 1718 read_neon_element32(tmp, a->vn, 1, MO_32); 1719 widenfn(rn1_64, tmp); 1720 } 1721 if (src2_mop >= 0) { 1722 read_neon_element64(rm_64, a->vm, 1, src2_mop); 1723 } else { 1724 TCGv_i32 tmp = tcg_temp_new_i32(); 1725 read_neon_element32(tmp, a->vm, 1, MO_32); 1726 widenfn(rm_64, tmp); 1727 } 1728 1729 write_neon_element64(rn0_64, a->vd, 0, MO_64); 1730 1731 opfn(rn1_64, rn1_64, rm_64); 1732 write_neon_element64(rn1_64, a->vd, 1, MO_64); 1733 1734 return true; 1735 } 1736 1737 #define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN) \ 1738 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ 1739 { \ 1740 static NeonGenWidenFn * const widenfn[] = { \ 1741 gen_helper_neon_widen_##S##8, \ 1742 gen_helper_neon_widen_##S##16, \ 1743 NULL, NULL, \ 1744 }; \ 1745 static NeonGenTwo64OpFn * const addfn[] = { \ 1746 gen_helper_neon_##OP##l_u16, \ 1747 gen_helper_neon_##OP##l_u32, \ 1748 tcg_gen_##OP##_i64, \ 1749 NULL, \ 1750 }; \ 1751 int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1; \ 1752 return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size], \ 1753 SRC1WIDE ? MO_UQ : narrow_mop, \ 1754 narrow_mop); \ 1755 } 1756 1757 DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN) 1758 DO_PREWIDEN(VADDL_U, u, add, false, 0) 1759 DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN) 1760 DO_PREWIDEN(VSUBL_U, u, sub, false, 0) 1761 DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN) 1762 DO_PREWIDEN(VADDW_U, u, add, true, 0) 1763 DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN) 1764 DO_PREWIDEN(VSUBW_U, u, sub, true, 0) 1765 1766 static bool do_narrow_3d(DisasContext *s, arg_3diff *a, 1767 NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn) 1768 { 1769 /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */ 1770 TCGv_i64 rn_64, rm_64; 1771 TCGv_i32 rd0, rd1; 1772 1773 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1774 return false; 1775 } 1776 1777 /* UNDEF accesses to D16-D31 if they don't exist. */ 1778 if (!dc_isar_feature(aa32_simd_r32, s) && 1779 ((a->vd | a->vn | a->vm) & 0x10)) { 1780 return false; 1781 } 1782 1783 if (!opfn || !narrowfn) { 1784 /* size == 3 case, which is an entirely different insn group */ 1785 return false; 1786 } 1787 1788 if ((a->vn | a->vm) & 1) { 1789 return false; 1790 } 1791 1792 if (!vfp_access_check(s)) { 1793 return true; 1794 } 1795 1796 rn_64 = tcg_temp_new_i64(); 1797 rm_64 = tcg_temp_new_i64(); 1798 rd0 = tcg_temp_new_i32(); 1799 rd1 = tcg_temp_new_i32(); 1800 1801 read_neon_element64(rn_64, a->vn, 0, MO_64); 1802 read_neon_element64(rm_64, a->vm, 0, MO_64); 1803 1804 opfn(rn_64, rn_64, rm_64); 1805 1806 narrowfn(rd0, rn_64); 1807 1808 read_neon_element64(rn_64, a->vn, 1, MO_64); 1809 read_neon_element64(rm_64, a->vm, 1, MO_64); 1810 1811 opfn(rn_64, rn_64, rm_64); 1812 1813 narrowfn(rd1, rn_64); 1814 1815 write_neon_element32(rd0, a->vd, 0, MO_32); 1816 write_neon_element32(rd1, a->vd, 1, MO_32); 1817 1818 return true; 1819 } 1820 1821 #define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP) \ 1822 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ 1823 { \ 1824 static NeonGenTwo64OpFn * const addfn[] = { \ 1825 gen_helper_neon_##OP##l_u16, \ 1826 gen_helper_neon_##OP##l_u32, \ 1827 tcg_gen_##OP##_i64, \ 1828 NULL, \ 1829 }; \ 1830 static NeonGenNarrowFn * const narrowfn[] = { \ 1831 gen_helper_neon_##NARROWTYPE##_high_u8, \ 1832 gen_helper_neon_##NARROWTYPE##_high_u16, \ 1833 EXTOP, \ 1834 NULL, \ 1835 }; \ 1836 return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]); \ 1837 } 1838 1839 static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn) 1840 { 1841 tcg_gen_addi_i64(rn, rn, 1u << 31); 1842 tcg_gen_extrh_i64_i32(rd, rn); 1843 } 1844 1845 DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32) 1846 DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32) 1847 DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32) 1848 DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32) 1849 1850 static bool do_long_3d(DisasContext *s, arg_3diff *a, 1851 NeonGenTwoOpWidenFn *opfn, 1852 NeonGenTwo64OpFn *accfn) 1853 { 1854 /* 1855 * 3-regs different lengths, long operations. 1856 * These perform an operation on two inputs that returns a double-width 1857 * result, and then possibly perform an accumulation operation of 1858 * that result into the double-width destination. 1859 */ 1860 TCGv_i64 rd0, rd1, tmp; 1861 TCGv_i32 rn, rm; 1862 1863 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1864 return false; 1865 } 1866 1867 /* UNDEF accesses to D16-D31 if they don't exist. */ 1868 if (!dc_isar_feature(aa32_simd_r32, s) && 1869 ((a->vd | a->vn | a->vm) & 0x10)) { 1870 return false; 1871 } 1872 1873 if (!opfn) { 1874 /* size == 3 case, which is an entirely different insn group */ 1875 return false; 1876 } 1877 1878 if (a->vd & 1) { 1879 return false; 1880 } 1881 1882 if (!vfp_access_check(s)) { 1883 return true; 1884 } 1885 1886 rd0 = tcg_temp_new_i64(); 1887 rd1 = tcg_temp_new_i64(); 1888 1889 rn = tcg_temp_new_i32(); 1890 rm = tcg_temp_new_i32(); 1891 read_neon_element32(rn, a->vn, 0, MO_32); 1892 read_neon_element32(rm, a->vm, 0, MO_32); 1893 opfn(rd0, rn, rm); 1894 1895 read_neon_element32(rn, a->vn, 1, MO_32); 1896 read_neon_element32(rm, a->vm, 1, MO_32); 1897 opfn(rd1, rn, rm); 1898 1899 /* Don't store results until after all loads: they might overlap */ 1900 if (accfn) { 1901 tmp = tcg_temp_new_i64(); 1902 read_neon_element64(tmp, a->vd, 0, MO_64); 1903 accfn(rd0, tmp, rd0); 1904 read_neon_element64(tmp, a->vd, 1, MO_64); 1905 accfn(rd1, tmp, rd1); 1906 } 1907 1908 write_neon_element64(rd0, a->vd, 0, MO_64); 1909 write_neon_element64(rd1, a->vd, 1, MO_64); 1910 1911 return true; 1912 } 1913 1914 static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a) 1915 { 1916 static NeonGenTwoOpWidenFn * const opfn[] = { 1917 gen_helper_neon_abdl_s16, 1918 gen_helper_neon_abdl_s32, 1919 gen_helper_neon_abdl_s64, 1920 NULL, 1921 }; 1922 1923 return do_long_3d(s, a, opfn[a->size], NULL); 1924 } 1925 1926 static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a) 1927 { 1928 static NeonGenTwoOpWidenFn * const opfn[] = { 1929 gen_helper_neon_abdl_u16, 1930 gen_helper_neon_abdl_u32, 1931 gen_helper_neon_abdl_u64, 1932 NULL, 1933 }; 1934 1935 return do_long_3d(s, a, opfn[a->size], NULL); 1936 } 1937 1938 static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a) 1939 { 1940 static NeonGenTwoOpWidenFn * const opfn[] = { 1941 gen_helper_neon_abdl_s16, 1942 gen_helper_neon_abdl_s32, 1943 gen_helper_neon_abdl_s64, 1944 NULL, 1945 }; 1946 static NeonGenTwo64OpFn * const addfn[] = { 1947 gen_helper_neon_addl_u16, 1948 gen_helper_neon_addl_u32, 1949 tcg_gen_add_i64, 1950 NULL, 1951 }; 1952 1953 return do_long_3d(s, a, opfn[a->size], addfn[a->size]); 1954 } 1955 1956 static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a) 1957 { 1958 static NeonGenTwoOpWidenFn * const opfn[] = { 1959 gen_helper_neon_abdl_u16, 1960 gen_helper_neon_abdl_u32, 1961 gen_helper_neon_abdl_u64, 1962 NULL, 1963 }; 1964 static NeonGenTwo64OpFn * const addfn[] = { 1965 gen_helper_neon_addl_u16, 1966 gen_helper_neon_addl_u32, 1967 tcg_gen_add_i64, 1968 NULL, 1969 }; 1970 1971 return do_long_3d(s, a, opfn[a->size], addfn[a->size]); 1972 } 1973 1974 static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) 1975 { 1976 TCGv_i32 lo = tcg_temp_new_i32(); 1977 TCGv_i32 hi = tcg_temp_new_i32(); 1978 1979 tcg_gen_muls2_i32(lo, hi, rn, rm); 1980 tcg_gen_concat_i32_i64(rd, lo, hi); 1981 } 1982 1983 static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) 1984 { 1985 TCGv_i32 lo = tcg_temp_new_i32(); 1986 TCGv_i32 hi = tcg_temp_new_i32(); 1987 1988 tcg_gen_mulu2_i32(lo, hi, rn, rm); 1989 tcg_gen_concat_i32_i64(rd, lo, hi); 1990 } 1991 1992 static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a) 1993 { 1994 static NeonGenTwoOpWidenFn * const opfn[] = { 1995 gen_helper_neon_mull_s8, 1996 gen_helper_neon_mull_s16, 1997 gen_mull_s32, 1998 NULL, 1999 }; 2000 2001 return do_long_3d(s, a, opfn[a->size], NULL); 2002 } 2003 2004 static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a) 2005 { 2006 static NeonGenTwoOpWidenFn * const opfn[] = { 2007 gen_helper_neon_mull_u8, 2008 gen_helper_neon_mull_u16, 2009 gen_mull_u32, 2010 NULL, 2011 }; 2012 2013 return do_long_3d(s, a, opfn[a->size], NULL); 2014 } 2015 2016 #define DO_VMLAL(INSN,MULL,ACC) \ 2017 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ 2018 { \ 2019 static NeonGenTwoOpWidenFn * const opfn[] = { \ 2020 gen_helper_neon_##MULL##8, \ 2021 gen_helper_neon_##MULL##16, \ 2022 gen_##MULL##32, \ 2023 NULL, \ 2024 }; \ 2025 static NeonGenTwo64OpFn * const accfn[] = { \ 2026 gen_helper_neon_##ACC##l_u16, \ 2027 gen_helper_neon_##ACC##l_u32, \ 2028 tcg_gen_##ACC##_i64, \ 2029 NULL, \ 2030 }; \ 2031 return do_long_3d(s, a, opfn[a->size], accfn[a->size]); \ 2032 } 2033 2034 DO_VMLAL(VMLAL_S,mull_s,add) 2035 DO_VMLAL(VMLAL_U,mull_u,add) 2036 DO_VMLAL(VMLSL_S,mull_s,sub) 2037 DO_VMLAL(VMLSL_U,mull_u,sub) 2038 2039 static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) 2040 { 2041 gen_helper_neon_mull_s16(rd, rn, rm); 2042 gen_helper_neon_addl_saturate_s32(rd, tcg_env, rd, rd); 2043 } 2044 2045 static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) 2046 { 2047 gen_mull_s32(rd, rn, rm); 2048 gen_helper_neon_addl_saturate_s64(rd, tcg_env, rd, rd); 2049 } 2050 2051 static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a) 2052 { 2053 static NeonGenTwoOpWidenFn * const opfn[] = { 2054 NULL, 2055 gen_VQDMULL_16, 2056 gen_VQDMULL_32, 2057 NULL, 2058 }; 2059 2060 return do_long_3d(s, a, opfn[a->size], NULL); 2061 } 2062 2063 static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) 2064 { 2065 gen_helper_neon_addl_saturate_s32(rd, tcg_env, rn, rm); 2066 } 2067 2068 static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) 2069 { 2070 gen_helper_neon_addl_saturate_s64(rd, tcg_env, rn, rm); 2071 } 2072 2073 static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a) 2074 { 2075 static NeonGenTwoOpWidenFn * const opfn[] = { 2076 NULL, 2077 gen_VQDMULL_16, 2078 gen_VQDMULL_32, 2079 NULL, 2080 }; 2081 static NeonGenTwo64OpFn * const accfn[] = { 2082 NULL, 2083 gen_VQDMLAL_acc_16, 2084 gen_VQDMLAL_acc_32, 2085 NULL, 2086 }; 2087 2088 return do_long_3d(s, a, opfn[a->size], accfn[a->size]); 2089 } 2090 2091 static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) 2092 { 2093 gen_helper_neon_negl_u32(rm, rm); 2094 gen_helper_neon_addl_saturate_s32(rd, tcg_env, rn, rm); 2095 } 2096 2097 static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) 2098 { 2099 tcg_gen_neg_i64(rm, rm); 2100 gen_helper_neon_addl_saturate_s64(rd, tcg_env, rn, rm); 2101 } 2102 2103 static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a) 2104 { 2105 static NeonGenTwoOpWidenFn * const opfn[] = { 2106 NULL, 2107 gen_VQDMULL_16, 2108 gen_VQDMULL_32, 2109 NULL, 2110 }; 2111 static NeonGenTwo64OpFn * const accfn[] = { 2112 NULL, 2113 gen_VQDMLSL_acc_16, 2114 gen_VQDMLSL_acc_32, 2115 NULL, 2116 }; 2117 2118 return do_long_3d(s, a, opfn[a->size], accfn[a->size]); 2119 } 2120 2121 static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a) 2122 { 2123 gen_helper_gvec_3 *fn_gvec; 2124 2125 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2126 return false; 2127 } 2128 2129 /* UNDEF accesses to D16-D31 if they don't exist. */ 2130 if (!dc_isar_feature(aa32_simd_r32, s) && 2131 ((a->vd | a->vn | a->vm) & 0x10)) { 2132 return false; 2133 } 2134 2135 if (a->vd & 1) { 2136 return false; 2137 } 2138 2139 switch (a->size) { 2140 case 0: 2141 fn_gvec = gen_helper_neon_pmull_h; 2142 break; 2143 case 2: 2144 if (!dc_isar_feature(aa32_pmull, s)) { 2145 return false; 2146 } 2147 fn_gvec = gen_helper_gvec_pmull_q; 2148 break; 2149 default: 2150 return false; 2151 } 2152 2153 if (!vfp_access_check(s)) { 2154 return true; 2155 } 2156 2157 tcg_gen_gvec_3_ool(neon_full_reg_offset(a->vd), 2158 neon_full_reg_offset(a->vn), 2159 neon_full_reg_offset(a->vm), 2160 16, 16, 0, fn_gvec); 2161 return true; 2162 } 2163 2164 static void gen_neon_dup_low16(TCGv_i32 var) 2165 { 2166 TCGv_i32 tmp = tcg_temp_new_i32(); 2167 tcg_gen_ext16u_i32(var, var); 2168 tcg_gen_shli_i32(tmp, var, 16); 2169 tcg_gen_or_i32(var, var, tmp); 2170 } 2171 2172 static void gen_neon_dup_high16(TCGv_i32 var) 2173 { 2174 TCGv_i32 tmp = tcg_temp_new_i32(); 2175 tcg_gen_andi_i32(var, var, 0xffff0000); 2176 tcg_gen_shri_i32(tmp, var, 16); 2177 tcg_gen_or_i32(var, var, tmp); 2178 } 2179 2180 static inline TCGv_i32 neon_get_scalar(int size, int reg) 2181 { 2182 TCGv_i32 tmp = tcg_temp_new_i32(); 2183 if (size == MO_16) { 2184 read_neon_element32(tmp, reg & 7, reg >> 4, MO_32); 2185 if (reg & 8) { 2186 gen_neon_dup_high16(tmp); 2187 } else { 2188 gen_neon_dup_low16(tmp); 2189 } 2190 } else { 2191 read_neon_element32(tmp, reg & 15, reg >> 4, MO_32); 2192 } 2193 return tmp; 2194 } 2195 2196 static bool do_2scalar(DisasContext *s, arg_2scalar *a, 2197 NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn) 2198 { 2199 /* 2200 * Two registers and a scalar: perform an operation between 2201 * the input elements and the scalar, and then possibly 2202 * perform an accumulation operation of that result into the 2203 * destination. 2204 */ 2205 TCGv_i32 scalar, tmp; 2206 int pass; 2207 2208 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2209 return false; 2210 } 2211 2212 /* UNDEF accesses to D16-D31 if they don't exist. */ 2213 if (!dc_isar_feature(aa32_simd_r32, s) && 2214 ((a->vd | a->vn | a->vm) & 0x10)) { 2215 return false; 2216 } 2217 2218 if (!opfn) { 2219 /* Bad size (including size == 3, which is a different insn group) */ 2220 return false; 2221 } 2222 2223 if (a->q && ((a->vd | a->vn) & 1)) { 2224 return false; 2225 } 2226 2227 if (!vfp_access_check(s)) { 2228 return true; 2229 } 2230 2231 scalar = neon_get_scalar(a->size, a->vm); 2232 tmp = tcg_temp_new_i32(); 2233 2234 for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 2235 read_neon_element32(tmp, a->vn, pass, MO_32); 2236 opfn(tmp, tmp, scalar); 2237 if (accfn) { 2238 TCGv_i32 rd = tcg_temp_new_i32(); 2239 read_neon_element32(rd, a->vd, pass, MO_32); 2240 accfn(tmp, rd, tmp); 2241 } 2242 write_neon_element32(tmp, a->vd, pass, MO_32); 2243 } 2244 return true; 2245 } 2246 2247 static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a) 2248 { 2249 static NeonGenTwoOpFn * const opfn[] = { 2250 NULL, 2251 gen_helper_neon_mul_u16, 2252 tcg_gen_mul_i32, 2253 NULL, 2254 }; 2255 2256 return do_2scalar(s, a, opfn[a->size], NULL); 2257 } 2258 2259 static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a) 2260 { 2261 static NeonGenTwoOpFn * const opfn[] = { 2262 NULL, 2263 gen_helper_neon_mul_u16, 2264 tcg_gen_mul_i32, 2265 NULL, 2266 }; 2267 static NeonGenTwoOpFn * const accfn[] = { 2268 NULL, 2269 gen_helper_neon_add_u16, 2270 tcg_gen_add_i32, 2271 NULL, 2272 }; 2273 2274 return do_2scalar(s, a, opfn[a->size], accfn[a->size]); 2275 } 2276 2277 static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a) 2278 { 2279 static NeonGenTwoOpFn * const opfn[] = { 2280 NULL, 2281 gen_helper_neon_mul_u16, 2282 tcg_gen_mul_i32, 2283 NULL, 2284 }; 2285 static NeonGenTwoOpFn * const accfn[] = { 2286 NULL, 2287 gen_helper_neon_sub_u16, 2288 tcg_gen_sub_i32, 2289 NULL, 2290 }; 2291 2292 return do_2scalar(s, a, opfn[a->size], accfn[a->size]); 2293 } 2294 2295 static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a, 2296 gen_helper_gvec_3_ptr *fn) 2297 { 2298 /* Two registers and a scalar, using gvec */ 2299 int vec_size = a->q ? 16 : 8; 2300 int rd_ofs = neon_full_reg_offset(a->vd); 2301 int rn_ofs = neon_full_reg_offset(a->vn); 2302 int rm_ofs; 2303 int idx; 2304 TCGv_ptr fpstatus; 2305 2306 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2307 return false; 2308 } 2309 2310 /* UNDEF accesses to D16-D31 if they don't exist. */ 2311 if (!dc_isar_feature(aa32_simd_r32, s) && 2312 ((a->vd | a->vn | a->vm) & 0x10)) { 2313 return false; 2314 } 2315 2316 if (!fn) { 2317 /* Bad size (including size == 3, which is a different insn group) */ 2318 return false; 2319 } 2320 2321 if (a->q && ((a->vd | a->vn) & 1)) { 2322 return false; 2323 } 2324 2325 if (!vfp_access_check(s)) { 2326 return true; 2327 } 2328 2329 /* a->vm is M:Vm, which encodes both register and index */ 2330 idx = extract32(a->vm, a->size + 2, 2); 2331 a->vm = extract32(a->vm, 0, a->size + 2); 2332 rm_ofs = neon_full_reg_offset(a->vm); 2333 2334 fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD); 2335 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus, 2336 vec_size, vec_size, idx, fn); 2337 return true; 2338 } 2339 2340 #define DO_VMUL_F_2sc(NAME, FUNC) \ 2341 static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a) \ 2342 { \ 2343 static gen_helper_gvec_3_ptr * const opfn[] = { \ 2344 NULL, \ 2345 gen_helper_##FUNC##_h, \ 2346 gen_helper_##FUNC##_s, \ 2347 NULL, \ 2348 }; \ 2349 if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \ 2350 return false; \ 2351 } \ 2352 return do_2scalar_fp_vec(s, a, opfn[a->size]); \ 2353 } 2354 2355 DO_VMUL_F_2sc(VMUL, gvec_fmul_idx) 2356 DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx) 2357 DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx) 2358 2359 WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16) 2360 WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32) 2361 WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16) 2362 WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32) 2363 2364 static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a) 2365 { 2366 static NeonGenTwoOpFn * const opfn[] = { 2367 NULL, 2368 gen_VQDMULH_16, 2369 gen_VQDMULH_32, 2370 NULL, 2371 }; 2372 2373 return do_2scalar(s, a, opfn[a->size], NULL); 2374 } 2375 2376 static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a) 2377 { 2378 static NeonGenTwoOpFn * const opfn[] = { 2379 NULL, 2380 gen_VQRDMULH_16, 2381 gen_VQRDMULH_32, 2382 NULL, 2383 }; 2384 2385 return do_2scalar(s, a, opfn[a->size], NULL); 2386 } 2387 2388 static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a, 2389 NeonGenThreeOpEnvFn *opfn) 2390 { 2391 /* 2392 * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn 2393 * performs a kind of fused op-then-accumulate using a helper 2394 * function that takes all of rd, rn and the scalar at once. 2395 */ 2396 TCGv_i32 scalar, rn, rd; 2397 int pass; 2398 2399 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2400 return false; 2401 } 2402 2403 if (!dc_isar_feature(aa32_rdm, s)) { 2404 return false; 2405 } 2406 2407 /* UNDEF accesses to D16-D31 if they don't exist. */ 2408 if (!dc_isar_feature(aa32_simd_r32, s) && 2409 ((a->vd | a->vn | a->vm) & 0x10)) { 2410 return false; 2411 } 2412 2413 if (!opfn) { 2414 /* Bad size (including size == 3, which is a different insn group) */ 2415 return false; 2416 } 2417 2418 if (a->q && ((a->vd | a->vn) & 1)) { 2419 return false; 2420 } 2421 2422 if (!vfp_access_check(s)) { 2423 return true; 2424 } 2425 2426 scalar = neon_get_scalar(a->size, a->vm); 2427 rn = tcg_temp_new_i32(); 2428 rd = tcg_temp_new_i32(); 2429 2430 for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 2431 read_neon_element32(rn, a->vn, pass, MO_32); 2432 read_neon_element32(rd, a->vd, pass, MO_32); 2433 opfn(rd, tcg_env, rn, scalar, rd); 2434 write_neon_element32(rd, a->vd, pass, MO_32); 2435 } 2436 return true; 2437 } 2438 2439 static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a) 2440 { 2441 static NeonGenThreeOpEnvFn *opfn[] = { 2442 NULL, 2443 gen_helper_neon_qrdmlah_s16, 2444 gen_helper_neon_qrdmlah_s32, 2445 NULL, 2446 }; 2447 return do_vqrdmlah_2sc(s, a, opfn[a->size]); 2448 } 2449 2450 static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a) 2451 { 2452 static NeonGenThreeOpEnvFn *opfn[] = { 2453 NULL, 2454 gen_helper_neon_qrdmlsh_s16, 2455 gen_helper_neon_qrdmlsh_s32, 2456 NULL, 2457 }; 2458 return do_vqrdmlah_2sc(s, a, opfn[a->size]); 2459 } 2460 2461 static bool do_2scalar_long(DisasContext *s, arg_2scalar *a, 2462 NeonGenTwoOpWidenFn *opfn, 2463 NeonGenTwo64OpFn *accfn) 2464 { 2465 /* 2466 * Two registers and a scalar, long operations: perform an 2467 * operation on the input elements and the scalar which produces 2468 * a double-width result, and then possibly perform an accumulation 2469 * operation of that result into the destination. 2470 */ 2471 TCGv_i32 scalar, rn; 2472 TCGv_i64 rn0_64, rn1_64; 2473 2474 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2475 return false; 2476 } 2477 2478 /* UNDEF accesses to D16-D31 if they don't exist. */ 2479 if (!dc_isar_feature(aa32_simd_r32, s) && 2480 ((a->vd | a->vn | a->vm) & 0x10)) { 2481 return false; 2482 } 2483 2484 if (!opfn) { 2485 /* Bad size (including size == 3, which is a different insn group) */ 2486 return false; 2487 } 2488 2489 if (a->vd & 1) { 2490 return false; 2491 } 2492 2493 if (!vfp_access_check(s)) { 2494 return true; 2495 } 2496 2497 scalar = neon_get_scalar(a->size, a->vm); 2498 2499 /* Load all inputs before writing any outputs, in case of overlap */ 2500 rn = tcg_temp_new_i32(); 2501 read_neon_element32(rn, a->vn, 0, MO_32); 2502 rn0_64 = tcg_temp_new_i64(); 2503 opfn(rn0_64, rn, scalar); 2504 2505 read_neon_element32(rn, a->vn, 1, MO_32); 2506 rn1_64 = tcg_temp_new_i64(); 2507 opfn(rn1_64, rn, scalar); 2508 2509 if (accfn) { 2510 TCGv_i64 t64 = tcg_temp_new_i64(); 2511 read_neon_element64(t64, a->vd, 0, MO_64); 2512 accfn(rn0_64, t64, rn0_64); 2513 read_neon_element64(t64, a->vd, 1, MO_64); 2514 accfn(rn1_64, t64, rn1_64); 2515 } 2516 2517 write_neon_element64(rn0_64, a->vd, 0, MO_64); 2518 write_neon_element64(rn1_64, a->vd, 1, MO_64); 2519 return true; 2520 } 2521 2522 static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a) 2523 { 2524 static NeonGenTwoOpWidenFn * const opfn[] = { 2525 NULL, 2526 gen_helper_neon_mull_s16, 2527 gen_mull_s32, 2528 NULL, 2529 }; 2530 2531 return do_2scalar_long(s, a, opfn[a->size], NULL); 2532 } 2533 2534 static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a) 2535 { 2536 static NeonGenTwoOpWidenFn * const opfn[] = { 2537 NULL, 2538 gen_helper_neon_mull_u16, 2539 gen_mull_u32, 2540 NULL, 2541 }; 2542 2543 return do_2scalar_long(s, a, opfn[a->size], NULL); 2544 } 2545 2546 #define DO_VMLAL_2SC(INSN, MULL, ACC) \ 2547 static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a) \ 2548 { \ 2549 static NeonGenTwoOpWidenFn * const opfn[] = { \ 2550 NULL, \ 2551 gen_helper_neon_##MULL##16, \ 2552 gen_##MULL##32, \ 2553 NULL, \ 2554 }; \ 2555 static NeonGenTwo64OpFn * const accfn[] = { \ 2556 NULL, \ 2557 gen_helper_neon_##ACC##l_u32, \ 2558 tcg_gen_##ACC##_i64, \ 2559 NULL, \ 2560 }; \ 2561 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); \ 2562 } 2563 2564 DO_VMLAL_2SC(VMLAL_S, mull_s, add) 2565 DO_VMLAL_2SC(VMLAL_U, mull_u, add) 2566 DO_VMLAL_2SC(VMLSL_S, mull_s, sub) 2567 DO_VMLAL_2SC(VMLSL_U, mull_u, sub) 2568 2569 static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a) 2570 { 2571 static NeonGenTwoOpWidenFn * const opfn[] = { 2572 NULL, 2573 gen_VQDMULL_16, 2574 gen_VQDMULL_32, 2575 NULL, 2576 }; 2577 2578 return do_2scalar_long(s, a, opfn[a->size], NULL); 2579 } 2580 2581 static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a) 2582 { 2583 static NeonGenTwoOpWidenFn * const opfn[] = { 2584 NULL, 2585 gen_VQDMULL_16, 2586 gen_VQDMULL_32, 2587 NULL, 2588 }; 2589 static NeonGenTwo64OpFn * const accfn[] = { 2590 NULL, 2591 gen_VQDMLAL_acc_16, 2592 gen_VQDMLAL_acc_32, 2593 NULL, 2594 }; 2595 2596 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); 2597 } 2598 2599 static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a) 2600 { 2601 static NeonGenTwoOpWidenFn * const opfn[] = { 2602 NULL, 2603 gen_VQDMULL_16, 2604 gen_VQDMULL_32, 2605 NULL, 2606 }; 2607 static NeonGenTwo64OpFn * const accfn[] = { 2608 NULL, 2609 gen_VQDMLSL_acc_16, 2610 gen_VQDMLSL_acc_32, 2611 NULL, 2612 }; 2613 2614 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); 2615 } 2616 2617 static bool trans_VEXT(DisasContext *s, arg_VEXT *a) 2618 { 2619 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2620 return false; 2621 } 2622 2623 /* UNDEF accesses to D16-D31 if they don't exist. */ 2624 if (!dc_isar_feature(aa32_simd_r32, s) && 2625 ((a->vd | a->vn | a->vm) & 0x10)) { 2626 return false; 2627 } 2628 2629 if ((a->vn | a->vm | a->vd) & a->q) { 2630 return false; 2631 } 2632 2633 if (a->imm > 7 && !a->q) { 2634 return false; 2635 } 2636 2637 if (!vfp_access_check(s)) { 2638 return true; 2639 } 2640 2641 if (!a->q) { 2642 /* Extract 64 bits from <Vm:Vn> */ 2643 TCGv_i64 left, right, dest; 2644 2645 left = tcg_temp_new_i64(); 2646 right = tcg_temp_new_i64(); 2647 dest = tcg_temp_new_i64(); 2648 2649 read_neon_element64(right, a->vn, 0, MO_64); 2650 read_neon_element64(left, a->vm, 0, MO_64); 2651 tcg_gen_extract2_i64(dest, right, left, a->imm * 8); 2652 write_neon_element64(dest, a->vd, 0, MO_64); 2653 } else { 2654 /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */ 2655 TCGv_i64 left, middle, right, destleft, destright; 2656 2657 left = tcg_temp_new_i64(); 2658 middle = tcg_temp_new_i64(); 2659 right = tcg_temp_new_i64(); 2660 destleft = tcg_temp_new_i64(); 2661 destright = tcg_temp_new_i64(); 2662 2663 if (a->imm < 8) { 2664 read_neon_element64(right, a->vn, 0, MO_64); 2665 read_neon_element64(middle, a->vn, 1, MO_64); 2666 tcg_gen_extract2_i64(destright, right, middle, a->imm * 8); 2667 read_neon_element64(left, a->vm, 0, MO_64); 2668 tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8); 2669 } else { 2670 read_neon_element64(right, a->vn, 1, MO_64); 2671 read_neon_element64(middle, a->vm, 0, MO_64); 2672 tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8); 2673 read_neon_element64(left, a->vm, 1, MO_64); 2674 tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8); 2675 } 2676 2677 write_neon_element64(destright, a->vd, 0, MO_64); 2678 write_neon_element64(destleft, a->vd, 1, MO_64); 2679 } 2680 return true; 2681 } 2682 2683 static bool trans_VTBL(DisasContext *s, arg_VTBL *a) 2684 { 2685 TCGv_i64 val, def; 2686 TCGv_i32 desc; 2687 2688 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2689 return false; 2690 } 2691 2692 /* UNDEF accesses to D16-D31 if they don't exist. */ 2693 if (!dc_isar_feature(aa32_simd_r32, s) && 2694 ((a->vd | a->vn | a->vm) & 0x10)) { 2695 return false; 2696 } 2697 2698 if ((a->vn + a->len + 1) > 32) { 2699 /* 2700 * This is UNPREDICTABLE; we choose to UNDEF to avoid the 2701 * helper function running off the end of the register file. 2702 */ 2703 return false; 2704 } 2705 2706 if (!vfp_access_check(s)) { 2707 return true; 2708 } 2709 2710 desc = tcg_constant_i32((a->vn << 2) | a->len); 2711 def = tcg_temp_new_i64(); 2712 if (a->op) { 2713 read_neon_element64(def, a->vd, 0, MO_64); 2714 } else { 2715 tcg_gen_movi_i64(def, 0); 2716 } 2717 val = tcg_temp_new_i64(); 2718 read_neon_element64(val, a->vm, 0, MO_64); 2719 2720 gen_helper_neon_tbl(val, tcg_env, desc, val, def); 2721 write_neon_element64(val, a->vd, 0, MO_64); 2722 return true; 2723 } 2724 2725 static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a) 2726 { 2727 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2728 return false; 2729 } 2730 2731 /* UNDEF accesses to D16-D31 if they don't exist. */ 2732 if (!dc_isar_feature(aa32_simd_r32, s) && 2733 ((a->vd | a->vm) & 0x10)) { 2734 return false; 2735 } 2736 2737 if (a->vd & a->q) { 2738 return false; 2739 } 2740 2741 if (!vfp_access_check(s)) { 2742 return true; 2743 } 2744 2745 tcg_gen_gvec_dup_mem(a->size, neon_full_reg_offset(a->vd), 2746 neon_element_offset(a->vm, a->index, a->size), 2747 a->q ? 16 : 8, a->q ? 16 : 8); 2748 return true; 2749 } 2750 2751 static bool trans_VREV64(DisasContext *s, arg_VREV64 *a) 2752 { 2753 int pass, half; 2754 TCGv_i32 tmp[2]; 2755 2756 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2757 return false; 2758 } 2759 2760 /* UNDEF accesses to D16-D31 if they don't exist. */ 2761 if (!dc_isar_feature(aa32_simd_r32, s) && 2762 ((a->vd | a->vm) & 0x10)) { 2763 return false; 2764 } 2765 2766 if ((a->vd | a->vm) & a->q) { 2767 return false; 2768 } 2769 2770 if (a->size == 3) { 2771 return false; 2772 } 2773 2774 if (!vfp_access_check(s)) { 2775 return true; 2776 } 2777 2778 tmp[0] = tcg_temp_new_i32(); 2779 tmp[1] = tcg_temp_new_i32(); 2780 2781 for (pass = 0; pass < (a->q ? 2 : 1); pass++) { 2782 for (half = 0; half < 2; half++) { 2783 read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32); 2784 switch (a->size) { 2785 case 0: 2786 tcg_gen_bswap32_i32(tmp[half], tmp[half]); 2787 break; 2788 case 1: 2789 gen_swap_half(tmp[half], tmp[half]); 2790 break; 2791 case 2: 2792 break; 2793 default: 2794 g_assert_not_reached(); 2795 } 2796 } 2797 write_neon_element32(tmp[1], a->vd, pass * 2, MO_32); 2798 write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32); 2799 } 2800 return true; 2801 } 2802 2803 static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a, 2804 NeonGenWidenFn *widenfn, 2805 NeonGenTwo64OpFn *opfn, 2806 NeonGenTwo64OpFn *accfn) 2807 { 2808 /* 2809 * Pairwise long operations: widen both halves of the pair, 2810 * combine the pairs with the opfn, and then possibly accumulate 2811 * into the destination with the accfn. 2812 */ 2813 int pass; 2814 2815 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2816 return false; 2817 } 2818 2819 /* UNDEF accesses to D16-D31 if they don't exist. */ 2820 if (!dc_isar_feature(aa32_simd_r32, s) && 2821 ((a->vd | a->vm) & 0x10)) { 2822 return false; 2823 } 2824 2825 if ((a->vd | a->vm) & a->q) { 2826 return false; 2827 } 2828 2829 if (!widenfn) { 2830 return false; 2831 } 2832 2833 if (!vfp_access_check(s)) { 2834 return true; 2835 } 2836 2837 for (pass = 0; pass < a->q + 1; pass++) { 2838 TCGv_i32 tmp; 2839 TCGv_i64 rm0_64, rm1_64, rd_64; 2840 2841 rm0_64 = tcg_temp_new_i64(); 2842 rm1_64 = tcg_temp_new_i64(); 2843 rd_64 = tcg_temp_new_i64(); 2844 2845 tmp = tcg_temp_new_i32(); 2846 read_neon_element32(tmp, a->vm, pass * 2, MO_32); 2847 widenfn(rm0_64, tmp); 2848 read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32); 2849 widenfn(rm1_64, tmp); 2850 2851 opfn(rd_64, rm0_64, rm1_64); 2852 2853 if (accfn) { 2854 TCGv_i64 tmp64 = tcg_temp_new_i64(); 2855 read_neon_element64(tmp64, a->vd, pass, MO_64); 2856 accfn(rd_64, tmp64, rd_64); 2857 } 2858 write_neon_element64(rd_64, a->vd, pass, MO_64); 2859 } 2860 return true; 2861 } 2862 2863 static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a) 2864 { 2865 static NeonGenWidenFn * const widenfn[] = { 2866 gen_helper_neon_widen_s8, 2867 gen_helper_neon_widen_s16, 2868 tcg_gen_ext_i32_i64, 2869 NULL, 2870 }; 2871 static NeonGenTwo64OpFn * const opfn[] = { 2872 gen_helper_neon_paddl_u16, 2873 gen_helper_neon_paddl_u32, 2874 tcg_gen_add_i64, 2875 NULL, 2876 }; 2877 2878 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL); 2879 } 2880 2881 static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a) 2882 { 2883 static NeonGenWidenFn * const widenfn[] = { 2884 gen_helper_neon_widen_u8, 2885 gen_helper_neon_widen_u16, 2886 tcg_gen_extu_i32_i64, 2887 NULL, 2888 }; 2889 static NeonGenTwo64OpFn * const opfn[] = { 2890 gen_helper_neon_paddl_u16, 2891 gen_helper_neon_paddl_u32, 2892 tcg_gen_add_i64, 2893 NULL, 2894 }; 2895 2896 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL); 2897 } 2898 2899 static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a) 2900 { 2901 static NeonGenWidenFn * const widenfn[] = { 2902 gen_helper_neon_widen_s8, 2903 gen_helper_neon_widen_s16, 2904 tcg_gen_ext_i32_i64, 2905 NULL, 2906 }; 2907 static NeonGenTwo64OpFn * const opfn[] = { 2908 gen_helper_neon_paddl_u16, 2909 gen_helper_neon_paddl_u32, 2910 tcg_gen_add_i64, 2911 NULL, 2912 }; 2913 static NeonGenTwo64OpFn * const accfn[] = { 2914 gen_helper_neon_addl_u16, 2915 gen_helper_neon_addl_u32, 2916 tcg_gen_add_i64, 2917 NULL, 2918 }; 2919 2920 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], 2921 accfn[a->size]); 2922 } 2923 2924 static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a) 2925 { 2926 static NeonGenWidenFn * const widenfn[] = { 2927 gen_helper_neon_widen_u8, 2928 gen_helper_neon_widen_u16, 2929 tcg_gen_extu_i32_i64, 2930 NULL, 2931 }; 2932 static NeonGenTwo64OpFn * const opfn[] = { 2933 gen_helper_neon_paddl_u16, 2934 gen_helper_neon_paddl_u32, 2935 tcg_gen_add_i64, 2936 NULL, 2937 }; 2938 static NeonGenTwo64OpFn * const accfn[] = { 2939 gen_helper_neon_addl_u16, 2940 gen_helper_neon_addl_u32, 2941 tcg_gen_add_i64, 2942 NULL, 2943 }; 2944 2945 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], 2946 accfn[a->size]); 2947 } 2948 2949 typedef void ZipFn(TCGv_ptr, TCGv_ptr); 2950 2951 static bool do_zip_uzp(DisasContext *s, arg_2misc *a, 2952 ZipFn *fn) 2953 { 2954 TCGv_ptr pd, pm; 2955 2956 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2957 return false; 2958 } 2959 2960 /* UNDEF accesses to D16-D31 if they don't exist. */ 2961 if (!dc_isar_feature(aa32_simd_r32, s) && 2962 ((a->vd | a->vm) & 0x10)) { 2963 return false; 2964 } 2965 2966 if ((a->vd | a->vm) & a->q) { 2967 return false; 2968 } 2969 2970 if (!fn) { 2971 /* Bad size or size/q combination */ 2972 return false; 2973 } 2974 2975 if (!vfp_access_check(s)) { 2976 return true; 2977 } 2978 2979 pd = vfp_reg_ptr(true, a->vd); 2980 pm = vfp_reg_ptr(true, a->vm); 2981 fn(pd, pm); 2982 return true; 2983 } 2984 2985 static bool trans_VUZP(DisasContext *s, arg_2misc *a) 2986 { 2987 static ZipFn * const fn[2][4] = { 2988 { 2989 gen_helper_neon_unzip8, 2990 gen_helper_neon_unzip16, 2991 NULL, 2992 NULL, 2993 }, { 2994 gen_helper_neon_qunzip8, 2995 gen_helper_neon_qunzip16, 2996 gen_helper_neon_qunzip32, 2997 NULL, 2998 } 2999 }; 3000 return do_zip_uzp(s, a, fn[a->q][a->size]); 3001 } 3002 3003 static bool trans_VZIP(DisasContext *s, arg_2misc *a) 3004 { 3005 static ZipFn * const fn[2][4] = { 3006 { 3007 gen_helper_neon_zip8, 3008 gen_helper_neon_zip16, 3009 NULL, 3010 NULL, 3011 }, { 3012 gen_helper_neon_qzip8, 3013 gen_helper_neon_qzip16, 3014 gen_helper_neon_qzip32, 3015 NULL, 3016 } 3017 }; 3018 return do_zip_uzp(s, a, fn[a->q][a->size]); 3019 } 3020 3021 static bool do_vmovn(DisasContext *s, arg_2misc *a, 3022 NeonGenNarrowEnvFn *narrowfn) 3023 { 3024 TCGv_i64 rm; 3025 TCGv_i32 rd0, rd1; 3026 3027 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3028 return false; 3029 } 3030 3031 /* UNDEF accesses to D16-D31 if they don't exist. */ 3032 if (!dc_isar_feature(aa32_simd_r32, s) && 3033 ((a->vd | a->vm) & 0x10)) { 3034 return false; 3035 } 3036 3037 if (a->vm & 1) { 3038 return false; 3039 } 3040 3041 if (!narrowfn) { 3042 return false; 3043 } 3044 3045 if (!vfp_access_check(s)) { 3046 return true; 3047 } 3048 3049 rm = tcg_temp_new_i64(); 3050 rd0 = tcg_temp_new_i32(); 3051 rd1 = tcg_temp_new_i32(); 3052 3053 read_neon_element64(rm, a->vm, 0, MO_64); 3054 narrowfn(rd0, tcg_env, rm); 3055 read_neon_element64(rm, a->vm, 1, MO_64); 3056 narrowfn(rd1, tcg_env, rm); 3057 write_neon_element32(rd0, a->vd, 0, MO_32); 3058 write_neon_element32(rd1, a->vd, 1, MO_32); 3059 return true; 3060 } 3061 3062 #define DO_VMOVN(INSN, FUNC) \ 3063 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3064 { \ 3065 static NeonGenNarrowEnvFn * const narrowfn[] = { \ 3066 FUNC##8, \ 3067 FUNC##16, \ 3068 FUNC##32, \ 3069 NULL, \ 3070 }; \ 3071 return do_vmovn(s, a, narrowfn[a->size]); \ 3072 } 3073 3074 DO_VMOVN(VMOVN, gen_neon_narrow_u) 3075 DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat) 3076 DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s) 3077 DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u) 3078 3079 static bool trans_VSHLL(DisasContext *s, arg_2misc *a) 3080 { 3081 TCGv_i32 rm0, rm1; 3082 TCGv_i64 rd; 3083 static NeonGenWidenFn * const widenfns[] = { 3084 gen_helper_neon_widen_u8, 3085 gen_helper_neon_widen_u16, 3086 tcg_gen_extu_i32_i64, 3087 NULL, 3088 }; 3089 NeonGenWidenFn *widenfn = widenfns[a->size]; 3090 3091 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3092 return false; 3093 } 3094 3095 /* UNDEF accesses to D16-D31 if they don't exist. */ 3096 if (!dc_isar_feature(aa32_simd_r32, s) && 3097 ((a->vd | a->vm) & 0x10)) { 3098 return false; 3099 } 3100 3101 if (a->vd & 1) { 3102 return false; 3103 } 3104 3105 if (!widenfn) { 3106 return false; 3107 } 3108 3109 if (!vfp_access_check(s)) { 3110 return true; 3111 } 3112 3113 rd = tcg_temp_new_i64(); 3114 rm0 = tcg_temp_new_i32(); 3115 rm1 = tcg_temp_new_i32(); 3116 3117 read_neon_element32(rm0, a->vm, 0, MO_32); 3118 read_neon_element32(rm1, a->vm, 1, MO_32); 3119 3120 widenfn(rd, rm0); 3121 tcg_gen_shli_i64(rd, rd, 8 << a->size); 3122 write_neon_element64(rd, a->vd, 0, MO_64); 3123 widenfn(rd, rm1); 3124 tcg_gen_shli_i64(rd, rd, 8 << a->size); 3125 write_neon_element64(rd, a->vd, 1, MO_64); 3126 return true; 3127 } 3128 3129 static bool trans_VCVT_B16_F32(DisasContext *s, arg_2misc *a) 3130 { 3131 TCGv_ptr fpst; 3132 TCGv_i64 tmp; 3133 TCGv_i32 dst0, dst1; 3134 3135 if (!dc_isar_feature(aa32_bf16, s)) { 3136 return false; 3137 } 3138 3139 /* UNDEF accesses to D16-D31 if they don't exist. */ 3140 if (!dc_isar_feature(aa32_simd_r32, s) && 3141 ((a->vd | a->vm) & 0x10)) { 3142 return false; 3143 } 3144 3145 if ((a->vm & 1) || (a->size != 1)) { 3146 return false; 3147 } 3148 3149 if (!vfp_access_check(s)) { 3150 return true; 3151 } 3152 3153 fpst = fpstatus_ptr(FPST_STD); 3154 tmp = tcg_temp_new_i64(); 3155 dst0 = tcg_temp_new_i32(); 3156 dst1 = tcg_temp_new_i32(); 3157 3158 read_neon_element64(tmp, a->vm, 0, MO_64); 3159 gen_helper_bfcvt_pair(dst0, tmp, fpst); 3160 3161 read_neon_element64(tmp, a->vm, 1, MO_64); 3162 gen_helper_bfcvt_pair(dst1, tmp, fpst); 3163 3164 write_neon_element32(dst0, a->vd, 0, MO_32); 3165 write_neon_element32(dst1, a->vd, 1, MO_32); 3166 return true; 3167 } 3168 3169 static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a) 3170 { 3171 TCGv_ptr fpst; 3172 TCGv_i32 ahp, tmp, tmp2, tmp3; 3173 3174 if (!arm_dc_feature(s, ARM_FEATURE_NEON) || 3175 !dc_isar_feature(aa32_fp16_spconv, s)) { 3176 return false; 3177 } 3178 3179 /* UNDEF accesses to D16-D31 if they don't exist. */ 3180 if (!dc_isar_feature(aa32_simd_r32, s) && 3181 ((a->vd | a->vm) & 0x10)) { 3182 return false; 3183 } 3184 3185 if ((a->vm & 1) || (a->size != 1)) { 3186 return false; 3187 } 3188 3189 if (!vfp_access_check(s)) { 3190 return true; 3191 } 3192 3193 fpst = fpstatus_ptr(FPST_STD); 3194 ahp = get_ahp_flag(); 3195 tmp = tcg_temp_new_i32(); 3196 read_neon_element32(tmp, a->vm, 0, MO_32); 3197 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); 3198 tmp2 = tcg_temp_new_i32(); 3199 read_neon_element32(tmp2, a->vm, 1, MO_32); 3200 gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp); 3201 tcg_gen_shli_i32(tmp2, tmp2, 16); 3202 tcg_gen_or_i32(tmp2, tmp2, tmp); 3203 read_neon_element32(tmp, a->vm, 2, MO_32); 3204 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); 3205 tmp3 = tcg_temp_new_i32(); 3206 read_neon_element32(tmp3, a->vm, 3, MO_32); 3207 write_neon_element32(tmp2, a->vd, 0, MO_32); 3208 gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp); 3209 tcg_gen_shli_i32(tmp3, tmp3, 16); 3210 tcg_gen_or_i32(tmp3, tmp3, tmp); 3211 write_neon_element32(tmp3, a->vd, 1, MO_32); 3212 return true; 3213 } 3214 3215 static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a) 3216 { 3217 TCGv_ptr fpst; 3218 TCGv_i32 ahp, tmp, tmp2, tmp3; 3219 3220 if (!arm_dc_feature(s, ARM_FEATURE_NEON) || 3221 !dc_isar_feature(aa32_fp16_spconv, s)) { 3222 return false; 3223 } 3224 3225 /* UNDEF accesses to D16-D31 if they don't exist. */ 3226 if (!dc_isar_feature(aa32_simd_r32, s) && 3227 ((a->vd | a->vm) & 0x10)) { 3228 return false; 3229 } 3230 3231 if ((a->vd & 1) || (a->size != 1)) { 3232 return false; 3233 } 3234 3235 if (!vfp_access_check(s)) { 3236 return true; 3237 } 3238 3239 fpst = fpstatus_ptr(FPST_STD); 3240 ahp = get_ahp_flag(); 3241 tmp3 = tcg_temp_new_i32(); 3242 tmp2 = tcg_temp_new_i32(); 3243 tmp = tcg_temp_new_i32(); 3244 read_neon_element32(tmp, a->vm, 0, MO_32); 3245 read_neon_element32(tmp2, a->vm, 1, MO_32); 3246 tcg_gen_ext16u_i32(tmp3, tmp); 3247 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp); 3248 write_neon_element32(tmp3, a->vd, 0, MO_32); 3249 tcg_gen_shri_i32(tmp, tmp, 16); 3250 gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp); 3251 write_neon_element32(tmp, a->vd, 1, MO_32); 3252 tcg_gen_ext16u_i32(tmp3, tmp2); 3253 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp); 3254 write_neon_element32(tmp3, a->vd, 2, MO_32); 3255 tcg_gen_shri_i32(tmp2, tmp2, 16); 3256 gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp); 3257 write_neon_element32(tmp2, a->vd, 3, MO_32); 3258 return true; 3259 } 3260 3261 static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn) 3262 { 3263 int vec_size = a->q ? 16 : 8; 3264 int rd_ofs = neon_full_reg_offset(a->vd); 3265 int rm_ofs = neon_full_reg_offset(a->vm); 3266 3267 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3268 return false; 3269 } 3270 3271 /* UNDEF accesses to D16-D31 if they don't exist. */ 3272 if (!dc_isar_feature(aa32_simd_r32, s) && 3273 ((a->vd | a->vm) & 0x10)) { 3274 return false; 3275 } 3276 3277 if (a->size == 3) { 3278 return false; 3279 } 3280 3281 if ((a->vd | a->vm) & a->q) { 3282 return false; 3283 } 3284 3285 if (!vfp_access_check(s)) { 3286 return true; 3287 } 3288 3289 fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size); 3290 3291 return true; 3292 } 3293 3294 #define DO_2MISC_VEC(INSN, FN) \ 3295 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3296 { \ 3297 return do_2misc_vec(s, a, FN); \ 3298 } 3299 3300 DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg) 3301 DO_2MISC_VEC(VABS, tcg_gen_gvec_abs) 3302 DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0) 3303 DO_2MISC_VEC(VCGT0, gen_gvec_cgt0) 3304 DO_2MISC_VEC(VCLE0, gen_gvec_cle0) 3305 DO_2MISC_VEC(VCGE0, gen_gvec_cge0) 3306 DO_2MISC_VEC(VCLT0, gen_gvec_clt0) 3307 3308 static bool trans_VMVN(DisasContext *s, arg_2misc *a) 3309 { 3310 if (a->size != 0) { 3311 return false; 3312 } 3313 return do_2misc_vec(s, a, tcg_gen_gvec_not); 3314 } 3315 3316 #define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA) \ 3317 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \ 3318 uint32_t rm_ofs, uint32_t oprsz, \ 3319 uint32_t maxsz) \ 3320 { \ 3321 tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz, \ 3322 DATA, FUNC); \ 3323 } 3324 3325 #define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA) \ 3326 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \ 3327 uint32_t rm_ofs, uint32_t oprsz, \ 3328 uint32_t maxsz) \ 3329 { \ 3330 tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC); \ 3331 } 3332 3333 WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0) 3334 WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aesd, 0) 3335 WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0) 3336 WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesimc, 0) 3337 WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0) 3338 WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0) 3339 WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0) 3340 3341 #define DO_2M_CRYPTO(INSN, FEATURE, SIZE) \ 3342 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3343 { \ 3344 if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) { \ 3345 return false; \ 3346 } \ 3347 return do_2misc_vec(s, a, gen_##INSN); \ 3348 } 3349 3350 DO_2M_CRYPTO(AESE, aa32_aes, 0) 3351 DO_2M_CRYPTO(AESD, aa32_aes, 0) 3352 DO_2M_CRYPTO(AESMC, aa32_aes, 0) 3353 DO_2M_CRYPTO(AESIMC, aa32_aes, 0) 3354 DO_2M_CRYPTO(SHA1H, aa32_sha1, 2) 3355 DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2) 3356 DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2) 3357 3358 static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn) 3359 { 3360 TCGv_i32 tmp; 3361 int pass; 3362 3363 /* Handle a 2-reg-misc operation by iterating 32 bits at a time */ 3364 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3365 return false; 3366 } 3367 3368 /* UNDEF accesses to D16-D31 if they don't exist. */ 3369 if (!dc_isar_feature(aa32_simd_r32, s) && 3370 ((a->vd | a->vm) & 0x10)) { 3371 return false; 3372 } 3373 3374 if (!fn) { 3375 return false; 3376 } 3377 3378 if ((a->vd | a->vm) & a->q) { 3379 return false; 3380 } 3381 3382 if (!vfp_access_check(s)) { 3383 return true; 3384 } 3385 3386 tmp = tcg_temp_new_i32(); 3387 for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 3388 read_neon_element32(tmp, a->vm, pass, MO_32); 3389 fn(tmp, tmp); 3390 write_neon_element32(tmp, a->vd, pass, MO_32); 3391 } 3392 return true; 3393 } 3394 3395 static bool trans_VREV32(DisasContext *s, arg_2misc *a) 3396 { 3397 static NeonGenOneOpFn * const fn[] = { 3398 tcg_gen_bswap32_i32, 3399 gen_swap_half, 3400 NULL, 3401 NULL, 3402 }; 3403 return do_2misc(s, a, fn[a->size]); 3404 } 3405 3406 static bool trans_VREV16(DisasContext *s, arg_2misc *a) 3407 { 3408 if (a->size != 0) { 3409 return false; 3410 } 3411 return do_2misc(s, a, gen_rev16); 3412 } 3413 3414 static bool trans_VCLS(DisasContext *s, arg_2misc *a) 3415 { 3416 static NeonGenOneOpFn * const fn[] = { 3417 gen_helper_neon_cls_s8, 3418 gen_helper_neon_cls_s16, 3419 gen_helper_neon_cls_s32, 3420 NULL, 3421 }; 3422 return do_2misc(s, a, fn[a->size]); 3423 } 3424 3425 static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm) 3426 { 3427 tcg_gen_clzi_i32(rd, rm, 32); 3428 } 3429 3430 static bool trans_VCLZ(DisasContext *s, arg_2misc *a) 3431 { 3432 static NeonGenOneOpFn * const fn[] = { 3433 gen_helper_neon_clz_u8, 3434 gen_helper_neon_clz_u16, 3435 do_VCLZ_32, 3436 NULL, 3437 }; 3438 return do_2misc(s, a, fn[a->size]); 3439 } 3440 3441 static bool trans_VCNT(DisasContext *s, arg_2misc *a) 3442 { 3443 if (a->size != 0) { 3444 return false; 3445 } 3446 return do_2misc(s, a, gen_helper_neon_cnt_u8); 3447 } 3448 3449 static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 3450 uint32_t oprsz, uint32_t maxsz) 3451 { 3452 tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs, 3453 vece == MO_16 ? 0x7fff : 0x7fffffff, 3454 oprsz, maxsz); 3455 } 3456 3457 static bool trans_VABS_F(DisasContext *s, arg_2misc *a) 3458 { 3459 if (a->size == MO_16) { 3460 if (!dc_isar_feature(aa32_fp16_arith, s)) { 3461 return false; 3462 } 3463 } else if (a->size != MO_32) { 3464 return false; 3465 } 3466 return do_2misc_vec(s, a, gen_VABS_F); 3467 } 3468 3469 static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 3470 uint32_t oprsz, uint32_t maxsz) 3471 { 3472 tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs, 3473 vece == MO_16 ? 0x8000 : 0x80000000, 3474 oprsz, maxsz); 3475 } 3476 3477 static bool trans_VNEG_F(DisasContext *s, arg_2misc *a) 3478 { 3479 if (a->size == MO_16) { 3480 if (!dc_isar_feature(aa32_fp16_arith, s)) { 3481 return false; 3482 } 3483 } else if (a->size != MO_32) { 3484 return false; 3485 } 3486 return do_2misc_vec(s, a, gen_VNEG_F); 3487 } 3488 3489 static bool trans_VRECPE(DisasContext *s, arg_2misc *a) 3490 { 3491 if (a->size != 2) { 3492 return false; 3493 } 3494 return do_2misc(s, a, gen_helper_recpe_u32); 3495 } 3496 3497 static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a) 3498 { 3499 if (a->size != 2) { 3500 return false; 3501 } 3502 return do_2misc(s, a, gen_helper_rsqrte_u32); 3503 } 3504 3505 #define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \ 3506 static void WRAPNAME(TCGv_i32 d, TCGv_i32 m) \ 3507 { \ 3508 FUNC(d, tcg_env, m); \ 3509 } 3510 3511 WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8) 3512 WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16) 3513 WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32) 3514 WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8) 3515 WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16) 3516 WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32) 3517 3518 static bool trans_VQABS(DisasContext *s, arg_2misc *a) 3519 { 3520 static NeonGenOneOpFn * const fn[] = { 3521 gen_VQABS_s8, 3522 gen_VQABS_s16, 3523 gen_VQABS_s32, 3524 NULL, 3525 }; 3526 return do_2misc(s, a, fn[a->size]); 3527 } 3528 3529 static bool trans_VQNEG(DisasContext *s, arg_2misc *a) 3530 { 3531 static NeonGenOneOpFn * const fn[] = { 3532 gen_VQNEG_s8, 3533 gen_VQNEG_s16, 3534 gen_VQNEG_s32, 3535 NULL, 3536 }; 3537 return do_2misc(s, a, fn[a->size]); 3538 } 3539 3540 #define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC) \ 3541 static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \ 3542 uint32_t rm_ofs, \ 3543 uint32_t oprsz, uint32_t maxsz) \ 3544 { \ 3545 static gen_helper_gvec_2_ptr * const fns[4] = { \ 3546 NULL, HFUNC, SFUNC, NULL, \ 3547 }; \ 3548 TCGv_ptr fpst; \ 3549 fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD); \ 3550 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0, \ 3551 fns[vece]); \ 3552 } \ 3553 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3554 { \ 3555 if (a->size == MO_16) { \ 3556 if (!dc_isar_feature(aa32_fp16_arith, s)) { \ 3557 return false; \ 3558 } \ 3559 } else if (a->size != MO_32) { \ 3560 return false; \ 3561 } \ 3562 return do_2misc_vec(s, a, gen_##INSN); \ 3563 } 3564 3565 DO_2MISC_FP_VEC(VRECPE_F, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s) 3566 DO_2MISC_FP_VEC(VRSQRTE_F, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s) 3567 DO_2MISC_FP_VEC(VCGT0_F, gen_helper_gvec_fcgt0_h, gen_helper_gvec_fcgt0_s) 3568 DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h, gen_helper_gvec_fcge0_s) 3569 DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s) 3570 DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s) 3571 DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s) 3572 DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos) 3573 DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos) 3574 DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs) 3575 DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs) 3576 3577 DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h, gen_helper_gvec_vrintx_s) 3578 3579 static bool trans_VRINTX(DisasContext *s, arg_2misc *a) 3580 { 3581 if (!arm_dc_feature(s, ARM_FEATURE_V8)) { 3582 return false; 3583 } 3584 return trans_VRINTX_impl(s, a); 3585 } 3586 3587 #define DO_VEC_RMODE(INSN, RMODE, OP) \ 3588 static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \ 3589 uint32_t rm_ofs, \ 3590 uint32_t oprsz, uint32_t maxsz) \ 3591 { \ 3592 static gen_helper_gvec_2_ptr * const fns[4] = { \ 3593 NULL, \ 3594 gen_helper_gvec_##OP##h, \ 3595 gen_helper_gvec_##OP##s, \ 3596 NULL, \ 3597 }; \ 3598 TCGv_ptr fpst; \ 3599 fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD); \ 3600 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, \ 3601 arm_rmode_to_sf(RMODE), fns[vece]); \ 3602 } \ 3603 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3604 { \ 3605 if (!arm_dc_feature(s, ARM_FEATURE_V8)) { \ 3606 return false; \ 3607 } \ 3608 if (a->size == MO_16) { \ 3609 if (!dc_isar_feature(aa32_fp16_arith, s)) { \ 3610 return false; \ 3611 } \ 3612 } else if (a->size != MO_32) { \ 3613 return false; \ 3614 } \ 3615 return do_2misc_vec(s, a, gen_##INSN); \ 3616 } 3617 3618 DO_VEC_RMODE(VCVTAU, FPROUNDING_TIEAWAY, vcvt_rm_u) 3619 DO_VEC_RMODE(VCVTAS, FPROUNDING_TIEAWAY, vcvt_rm_s) 3620 DO_VEC_RMODE(VCVTNU, FPROUNDING_TIEEVEN, vcvt_rm_u) 3621 DO_VEC_RMODE(VCVTNS, FPROUNDING_TIEEVEN, vcvt_rm_s) 3622 DO_VEC_RMODE(VCVTPU, FPROUNDING_POSINF, vcvt_rm_u) 3623 DO_VEC_RMODE(VCVTPS, FPROUNDING_POSINF, vcvt_rm_s) 3624 DO_VEC_RMODE(VCVTMU, FPROUNDING_NEGINF, vcvt_rm_u) 3625 DO_VEC_RMODE(VCVTMS, FPROUNDING_NEGINF, vcvt_rm_s) 3626 3627 DO_VEC_RMODE(VRINTN, FPROUNDING_TIEEVEN, vrint_rm_) 3628 DO_VEC_RMODE(VRINTA, FPROUNDING_TIEAWAY, vrint_rm_) 3629 DO_VEC_RMODE(VRINTZ, FPROUNDING_ZERO, vrint_rm_) 3630 DO_VEC_RMODE(VRINTM, FPROUNDING_NEGINF, vrint_rm_) 3631 DO_VEC_RMODE(VRINTP, FPROUNDING_POSINF, vrint_rm_) 3632 3633 static bool trans_VSWP(DisasContext *s, arg_2misc *a) 3634 { 3635 TCGv_i64 rm, rd; 3636 int pass; 3637 3638 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3639 return false; 3640 } 3641 3642 /* UNDEF accesses to D16-D31 if they don't exist. */ 3643 if (!dc_isar_feature(aa32_simd_r32, s) && 3644 ((a->vd | a->vm) & 0x10)) { 3645 return false; 3646 } 3647 3648 if (a->size != 0) { 3649 return false; 3650 } 3651 3652 if ((a->vd | a->vm) & a->q) { 3653 return false; 3654 } 3655 3656 if (!vfp_access_check(s)) { 3657 return true; 3658 } 3659 3660 rm = tcg_temp_new_i64(); 3661 rd = tcg_temp_new_i64(); 3662 for (pass = 0; pass < (a->q ? 2 : 1); pass++) { 3663 read_neon_element64(rm, a->vm, pass, MO_64); 3664 read_neon_element64(rd, a->vd, pass, MO_64); 3665 write_neon_element64(rm, a->vd, pass, MO_64); 3666 write_neon_element64(rd, a->vm, pass, MO_64); 3667 } 3668 return true; 3669 } 3670 3671 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1) 3672 { 3673 TCGv_i32 rd, tmp; 3674 3675 rd = tcg_temp_new_i32(); 3676 tmp = tcg_temp_new_i32(); 3677 3678 tcg_gen_shli_i32(rd, t0, 8); 3679 tcg_gen_andi_i32(rd, rd, 0xff00ff00); 3680 tcg_gen_andi_i32(tmp, t1, 0x00ff00ff); 3681 tcg_gen_or_i32(rd, rd, tmp); 3682 3683 tcg_gen_shri_i32(t1, t1, 8); 3684 tcg_gen_andi_i32(t1, t1, 0x00ff00ff); 3685 tcg_gen_andi_i32(tmp, t0, 0xff00ff00); 3686 tcg_gen_or_i32(t1, t1, tmp); 3687 tcg_gen_mov_i32(t0, rd); 3688 } 3689 3690 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1) 3691 { 3692 TCGv_i32 rd, tmp; 3693 3694 rd = tcg_temp_new_i32(); 3695 tmp = tcg_temp_new_i32(); 3696 3697 tcg_gen_shli_i32(rd, t0, 16); 3698 tcg_gen_andi_i32(tmp, t1, 0xffff); 3699 tcg_gen_or_i32(rd, rd, tmp); 3700 tcg_gen_shri_i32(t1, t1, 16); 3701 tcg_gen_andi_i32(tmp, t0, 0xffff0000); 3702 tcg_gen_or_i32(t1, t1, tmp); 3703 tcg_gen_mov_i32(t0, rd); 3704 } 3705 3706 static bool trans_VTRN(DisasContext *s, arg_2misc *a) 3707 { 3708 TCGv_i32 tmp, tmp2; 3709 int pass; 3710 3711 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3712 return false; 3713 } 3714 3715 /* UNDEF accesses to D16-D31 if they don't exist. */ 3716 if (!dc_isar_feature(aa32_simd_r32, s) && 3717 ((a->vd | a->vm) & 0x10)) { 3718 return false; 3719 } 3720 3721 if ((a->vd | a->vm) & a->q) { 3722 return false; 3723 } 3724 3725 if (a->size == 3) { 3726 return false; 3727 } 3728 3729 if (!vfp_access_check(s)) { 3730 return true; 3731 } 3732 3733 tmp = tcg_temp_new_i32(); 3734 tmp2 = tcg_temp_new_i32(); 3735 if (a->size == MO_32) { 3736 for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) { 3737 read_neon_element32(tmp, a->vm, pass, MO_32); 3738 read_neon_element32(tmp2, a->vd, pass + 1, MO_32); 3739 write_neon_element32(tmp2, a->vm, pass, MO_32); 3740 write_neon_element32(tmp, a->vd, pass + 1, MO_32); 3741 } 3742 } else { 3743 for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 3744 read_neon_element32(tmp, a->vm, pass, MO_32); 3745 read_neon_element32(tmp2, a->vd, pass, MO_32); 3746 if (a->size == MO_8) { 3747 gen_neon_trn_u8(tmp, tmp2); 3748 } else { 3749 gen_neon_trn_u16(tmp, tmp2); 3750 } 3751 write_neon_element32(tmp2, a->vm, pass, MO_32); 3752 write_neon_element32(tmp, a->vd, pass, MO_32); 3753 } 3754 } 3755 return true; 3756 } 3757 3758 static bool trans_VSMMLA(DisasContext *s, arg_VSMMLA *a) 3759 { 3760 if (!dc_isar_feature(aa32_i8mm, s)) { 3761 return false; 3762 } 3763 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0, 3764 gen_helper_gvec_smmla_b); 3765 } 3766 3767 static bool trans_VUMMLA(DisasContext *s, arg_VUMMLA *a) 3768 { 3769 if (!dc_isar_feature(aa32_i8mm, s)) { 3770 return false; 3771 } 3772 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0, 3773 gen_helper_gvec_ummla_b); 3774 } 3775 3776 static bool trans_VUSMMLA(DisasContext *s, arg_VUSMMLA *a) 3777 { 3778 if (!dc_isar_feature(aa32_i8mm, s)) { 3779 return false; 3780 } 3781 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0, 3782 gen_helper_gvec_usmmla_b); 3783 } 3784 3785 static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a) 3786 { 3787 if (!dc_isar_feature(aa32_bf16, s)) { 3788 return false; 3789 } 3790 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0, 3791 gen_helper_gvec_bfmmla); 3792 } 3793 3794 static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a) 3795 { 3796 if (!dc_isar_feature(aa32_bf16, s)) { 3797 return false; 3798 } 3799 return do_neon_ddda_fpst(s, 7, a->vd, a->vn, a->vm, a->q, FPST_STD, 3800 gen_helper_gvec_bfmlal); 3801 } 3802 3803 static bool trans_VFMA_b16_scal(DisasContext *s, arg_VFMA_b16_scal *a) 3804 { 3805 if (!dc_isar_feature(aa32_bf16, s)) { 3806 return false; 3807 } 3808 return do_neon_ddda_fpst(s, 6, a->vd, a->vn, a->vm, 3809 (a->index << 1) | a->q, FPST_STD, 3810 gen_helper_gvec_bfmlal_idx); 3811 } 3812