1 /* 2 * ARM translation: AArch32 Neon instructions 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * Copyright (c) 2005-2007 CodeSourcery 6 * Copyright (c) 2007 OpenedHand, Ltd. 7 * Copyright (c) 2020 Linaro, Ltd. 8 * 9 * This library is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public 11 * License as published by the Free Software Foundation; either 12 * version 2.1 of the License, or (at your option) any later version. 13 * 14 * This library is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 21 */ 22 23 #include "qemu/osdep.h" 24 #include "tcg/tcg-op.h" 25 #include "tcg/tcg-op-gvec.h" 26 #include "exec/exec-all.h" 27 #include "translate.h" 28 #include "translate-a32.h" 29 30 /* Include the generated Neon decoder */ 31 #include "decode-neon-dp.c.inc" 32 #include "decode-neon-ls.c.inc" 33 #include "decode-neon-shared.c.inc" 34 35 static TCGv_ptr vfp_reg_ptr(bool dp, int reg) 36 { 37 TCGv_ptr ret = tcg_temp_new_ptr(); 38 tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg)); 39 return ret; 40 } 41 42 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop) 43 { 44 long offset = neon_element_offset(reg, ele, mop & MO_SIZE); 45 46 switch (mop) { 47 case MO_UB: 48 tcg_gen_ld8u_i32(var, cpu_env, offset); 49 break; 50 case MO_UW: 51 tcg_gen_ld16u_i32(var, cpu_env, offset); 52 break; 53 case MO_UL: 54 tcg_gen_ld_i32(var, cpu_env, offset); 55 break; 56 default: 57 g_assert_not_reached(); 58 } 59 } 60 61 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop) 62 { 63 long offset = neon_element_offset(reg, ele, mop & MO_SIZE); 64 65 switch (mop) { 66 case MO_UB: 67 tcg_gen_ld8u_i64(var, cpu_env, offset); 68 break; 69 case MO_UW: 70 tcg_gen_ld16u_i64(var, cpu_env, offset); 71 break; 72 case MO_UL: 73 tcg_gen_ld32u_i64(var, cpu_env, offset); 74 break; 75 case MO_UQ: 76 tcg_gen_ld_i64(var, cpu_env, offset); 77 break; 78 default: 79 g_assert_not_reached(); 80 } 81 } 82 83 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var) 84 { 85 long offset = neon_element_offset(reg, ele, size); 86 87 switch (size) { 88 case MO_8: 89 tcg_gen_st8_i32(var, cpu_env, offset); 90 break; 91 case MO_16: 92 tcg_gen_st16_i32(var, cpu_env, offset); 93 break; 94 case MO_32: 95 tcg_gen_st_i32(var, cpu_env, offset); 96 break; 97 default: 98 g_assert_not_reached(); 99 } 100 } 101 102 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var) 103 { 104 long offset = neon_element_offset(reg, ele, size); 105 106 switch (size) { 107 case MO_8: 108 tcg_gen_st8_i64(var, cpu_env, offset); 109 break; 110 case MO_16: 111 tcg_gen_st16_i64(var, cpu_env, offset); 112 break; 113 case MO_32: 114 tcg_gen_st32_i64(var, cpu_env, offset); 115 break; 116 case MO_64: 117 tcg_gen_st_i64(var, cpu_env, offset); 118 break; 119 default: 120 g_assert_not_reached(); 121 } 122 } 123 124 static bool do_neon_ddda(DisasContext *s, int q, int vd, int vn, int vm, 125 int data, gen_helper_gvec_4 *fn_gvec) 126 { 127 /* UNDEF accesses to D16-D31 if they don't exist. */ 128 if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) { 129 return false; 130 } 131 132 /* 133 * UNDEF accesses to odd registers for each bit of Q. 134 * Q will be 0b111 for all Q-reg instructions, otherwise 135 * when we have mixed Q- and D-reg inputs. 136 */ 137 if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) { 138 return false; 139 } 140 141 if (!vfp_access_check(s)) { 142 return true; 143 } 144 145 int opr_sz = q ? 16 : 8; 146 tcg_gen_gvec_4_ool(vfp_reg_offset(1, vd), 147 vfp_reg_offset(1, vn), 148 vfp_reg_offset(1, vm), 149 vfp_reg_offset(1, vd), 150 opr_sz, opr_sz, data, fn_gvec); 151 return true; 152 } 153 154 static bool do_neon_ddda_fpst(DisasContext *s, int q, int vd, int vn, int vm, 155 int data, ARMFPStatusFlavour fp_flavour, 156 gen_helper_gvec_4_ptr *fn_gvec_ptr) 157 { 158 /* UNDEF accesses to D16-D31 if they don't exist. */ 159 if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) { 160 return false; 161 } 162 163 /* 164 * UNDEF accesses to odd registers for each bit of Q. 165 * Q will be 0b111 for all Q-reg instructions, otherwise 166 * when we have mixed Q- and D-reg inputs. 167 */ 168 if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) { 169 return false; 170 } 171 172 if (!vfp_access_check(s)) { 173 return true; 174 } 175 176 int opr_sz = q ? 16 : 8; 177 TCGv_ptr fpst = fpstatus_ptr(fp_flavour); 178 179 tcg_gen_gvec_4_ptr(vfp_reg_offset(1, vd), 180 vfp_reg_offset(1, vn), 181 vfp_reg_offset(1, vm), 182 vfp_reg_offset(1, vd), 183 fpst, opr_sz, opr_sz, data, fn_gvec_ptr); 184 return true; 185 } 186 187 static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a) 188 { 189 if (!dc_isar_feature(aa32_vcma, s)) { 190 return false; 191 } 192 if (a->size == MO_16) { 193 if (!dc_isar_feature(aa32_fp16_arith, s)) { 194 return false; 195 } 196 return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot, 197 FPST_STD_F16, gen_helper_gvec_fcmlah); 198 } 199 return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot, 200 FPST_STD, gen_helper_gvec_fcmlas); 201 } 202 203 static bool trans_VCADD(DisasContext *s, arg_VCADD *a) 204 { 205 int opr_sz; 206 TCGv_ptr fpst; 207 gen_helper_gvec_3_ptr *fn_gvec_ptr; 208 209 if (!dc_isar_feature(aa32_vcma, s) 210 || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) { 211 return false; 212 } 213 214 /* UNDEF accesses to D16-D31 if they don't exist. */ 215 if (!dc_isar_feature(aa32_simd_r32, s) && 216 ((a->vd | a->vn | a->vm) & 0x10)) { 217 return false; 218 } 219 220 if ((a->vn | a->vm | a->vd) & a->q) { 221 return false; 222 } 223 224 if (!vfp_access_check(s)) { 225 return true; 226 } 227 228 opr_sz = (1 + a->q) * 8; 229 fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD); 230 fn_gvec_ptr = (a->size == MO_16) ? 231 gen_helper_gvec_fcaddh : gen_helper_gvec_fcadds; 232 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), 233 vfp_reg_offset(1, a->vn), 234 vfp_reg_offset(1, a->vm), 235 fpst, opr_sz, opr_sz, a->rot, 236 fn_gvec_ptr); 237 return true; 238 } 239 240 static bool trans_VSDOT(DisasContext *s, arg_VSDOT *a) 241 { 242 if (!dc_isar_feature(aa32_dp, s)) { 243 return false; 244 } 245 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0, 246 gen_helper_gvec_sdot_b); 247 } 248 249 static bool trans_VUDOT(DisasContext *s, arg_VUDOT *a) 250 { 251 if (!dc_isar_feature(aa32_dp, s)) { 252 return false; 253 } 254 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0, 255 gen_helper_gvec_udot_b); 256 } 257 258 static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a) 259 { 260 if (!dc_isar_feature(aa32_i8mm, s)) { 261 return false; 262 } 263 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0, 264 gen_helper_gvec_usdot_b); 265 } 266 267 static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a) 268 { 269 if (!dc_isar_feature(aa32_bf16, s)) { 270 return false; 271 } 272 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0, 273 gen_helper_gvec_bfdot); 274 } 275 276 static bool trans_VFML(DisasContext *s, arg_VFML *a) 277 { 278 int opr_sz; 279 280 if (!dc_isar_feature(aa32_fhm, s)) { 281 return false; 282 } 283 284 /* UNDEF accesses to D16-D31 if they don't exist. */ 285 if (!dc_isar_feature(aa32_simd_r32, s) && 286 (a->vd & 0x10)) { 287 return false; 288 } 289 290 if (a->vd & a->q) { 291 return false; 292 } 293 294 if (!vfp_access_check(s)) { 295 return true; 296 } 297 298 opr_sz = (1 + a->q) * 8; 299 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), 300 vfp_reg_offset(a->q, a->vn), 301 vfp_reg_offset(a->q, a->vm), 302 cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */ 303 gen_helper_gvec_fmlal_a32); 304 return true; 305 } 306 307 static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a) 308 { 309 int data = (a->index << 2) | a->rot; 310 311 if (!dc_isar_feature(aa32_vcma, s)) { 312 return false; 313 } 314 if (a->size == MO_16) { 315 if (!dc_isar_feature(aa32_fp16_arith, s)) { 316 return false; 317 } 318 return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data, 319 FPST_STD_F16, gen_helper_gvec_fcmlah_idx); 320 } 321 return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data, 322 FPST_STD, gen_helper_gvec_fcmlas_idx); 323 } 324 325 static bool trans_VSDOT_scalar(DisasContext *s, arg_VSDOT_scalar *a) 326 { 327 if (!dc_isar_feature(aa32_dp, s)) { 328 return false; 329 } 330 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, 331 gen_helper_gvec_sdot_idx_b); 332 } 333 334 static bool trans_VUDOT_scalar(DisasContext *s, arg_VUDOT_scalar *a) 335 { 336 if (!dc_isar_feature(aa32_dp, s)) { 337 return false; 338 } 339 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, 340 gen_helper_gvec_udot_idx_b); 341 } 342 343 static bool trans_VUSDOT_scalar(DisasContext *s, arg_VUSDOT_scalar *a) 344 { 345 if (!dc_isar_feature(aa32_i8mm, s)) { 346 return false; 347 } 348 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, 349 gen_helper_gvec_usdot_idx_b); 350 } 351 352 static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a) 353 { 354 if (!dc_isar_feature(aa32_i8mm, s)) { 355 return false; 356 } 357 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, 358 gen_helper_gvec_sudot_idx_b); 359 } 360 361 static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a) 362 { 363 if (!dc_isar_feature(aa32_bf16, s)) { 364 return false; 365 } 366 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, 367 gen_helper_gvec_bfdot_idx); 368 } 369 370 static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a) 371 { 372 int opr_sz; 373 374 if (!dc_isar_feature(aa32_fhm, s)) { 375 return false; 376 } 377 378 /* UNDEF accesses to D16-D31 if they don't exist. */ 379 if (!dc_isar_feature(aa32_simd_r32, s) && 380 ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) { 381 return false; 382 } 383 384 if (a->vd & a->q) { 385 return false; 386 } 387 388 if (!vfp_access_check(s)) { 389 return true; 390 } 391 392 opr_sz = (1 + a->q) * 8; 393 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), 394 vfp_reg_offset(a->q, a->vn), 395 vfp_reg_offset(a->q, a->rm), 396 cpu_env, opr_sz, opr_sz, 397 (a->index << 2) | a->s, /* is_2 == 0 */ 398 gen_helper_gvec_fmlal_idx_a32); 399 return true; 400 } 401 402 static struct { 403 int nregs; 404 int interleave; 405 int spacing; 406 } const neon_ls_element_type[11] = { 407 {1, 4, 1}, 408 {1, 4, 2}, 409 {4, 1, 1}, 410 {2, 2, 2}, 411 {1, 3, 1}, 412 {1, 3, 2}, 413 {3, 1, 1}, 414 {1, 1, 1}, 415 {1, 2, 1}, 416 {1, 2, 2}, 417 {2, 1, 1} 418 }; 419 420 static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn, 421 int stride) 422 { 423 if (rm != 15) { 424 TCGv_i32 base; 425 426 base = load_reg(s, rn); 427 if (rm == 13) { 428 tcg_gen_addi_i32(base, base, stride); 429 } else { 430 TCGv_i32 index; 431 index = load_reg(s, rm); 432 tcg_gen_add_i32(base, base, index); 433 } 434 store_reg(s, rn, base); 435 } 436 } 437 438 static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a) 439 { 440 /* Neon load/store multiple structures */ 441 int nregs, interleave, spacing, reg, n; 442 MemOp mop, align, endian; 443 int mmu_idx = get_mem_index(s); 444 int size = a->size; 445 TCGv_i64 tmp64; 446 TCGv_i32 addr; 447 448 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 449 return false; 450 } 451 452 /* UNDEF accesses to D16-D31 if they don't exist */ 453 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 454 return false; 455 } 456 if (a->itype > 10) { 457 return false; 458 } 459 /* Catch UNDEF cases for bad values of align field */ 460 switch (a->itype & 0xc) { 461 case 4: 462 if (a->align >= 2) { 463 return false; 464 } 465 break; 466 case 8: 467 if (a->align == 3) { 468 return false; 469 } 470 break; 471 default: 472 break; 473 } 474 nregs = neon_ls_element_type[a->itype].nregs; 475 interleave = neon_ls_element_type[a->itype].interleave; 476 spacing = neon_ls_element_type[a->itype].spacing; 477 if (size == 3 && (interleave | spacing) != 1) { 478 return false; 479 } 480 481 if (!vfp_access_check(s)) { 482 return true; 483 } 484 485 /* For our purposes, bytes are always little-endian. */ 486 endian = s->be_data; 487 if (size == 0) { 488 endian = MO_LE; 489 } 490 491 /* Enforce alignment requested by the instruction */ 492 if (a->align) { 493 align = pow2_align(a->align + 2); /* 4 ** a->align */ 494 } else { 495 align = s->align_mem ? MO_ALIGN : 0; 496 } 497 498 /* 499 * Consecutive little-endian elements from a single register 500 * can be promoted to a larger little-endian operation. 501 */ 502 if (interleave == 1 && endian == MO_LE) { 503 /* Retain any natural alignment. */ 504 if (align == MO_ALIGN) { 505 align = pow2_align(size); 506 } 507 size = 3; 508 } 509 510 tmp64 = tcg_temp_new_i64(); 511 addr = tcg_temp_new_i32(); 512 load_reg_var(s, addr, a->rn); 513 514 mop = endian | size | align; 515 for (reg = 0; reg < nregs; reg++) { 516 for (n = 0; n < 8 >> size; n++) { 517 int xs; 518 for (xs = 0; xs < interleave; xs++) { 519 int tt = a->vd + reg + spacing * xs; 520 521 if (a->l) { 522 gen_aa32_ld_internal_i64(s, tmp64, addr, mmu_idx, mop); 523 neon_store_element64(tt, n, size, tmp64); 524 } else { 525 neon_load_element64(tmp64, tt, n, size); 526 gen_aa32_st_internal_i64(s, tmp64, addr, mmu_idx, mop); 527 } 528 tcg_gen_addi_i32(addr, addr, 1 << size); 529 530 /* Subsequent memory operations inherit alignment */ 531 mop &= ~MO_AMASK; 532 } 533 } 534 } 535 536 gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8); 537 return true; 538 } 539 540 static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) 541 { 542 /* Neon load single structure to all lanes */ 543 int reg, stride, vec_size; 544 int vd = a->vd; 545 int size = a->size; 546 int nregs = a->n + 1; 547 TCGv_i32 addr, tmp; 548 MemOp mop, align; 549 550 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 551 return false; 552 } 553 554 /* UNDEF accesses to D16-D31 if they don't exist */ 555 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 556 return false; 557 } 558 559 align = 0; 560 if (size == 3) { 561 if (nregs != 4 || a->a == 0) { 562 return false; 563 } 564 /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */ 565 size = MO_32; 566 align = MO_ALIGN_16; 567 } else if (a->a) { 568 switch (nregs) { 569 case 1: 570 if (size == 0) { 571 return false; 572 } 573 align = MO_ALIGN; 574 break; 575 case 2: 576 align = pow2_align(size + 1); 577 break; 578 case 3: 579 return false; 580 case 4: 581 if (size == 2) { 582 align = pow2_align(3); 583 } else { 584 align = pow2_align(size + 2); 585 } 586 break; 587 default: 588 g_assert_not_reached(); 589 } 590 } 591 592 if (!vfp_access_check(s)) { 593 return true; 594 } 595 596 /* 597 * VLD1 to all lanes: T bit indicates how many Dregs to write. 598 * VLD2/3/4 to all lanes: T bit indicates register stride. 599 */ 600 stride = a->t ? 2 : 1; 601 vec_size = nregs == 1 ? stride * 8 : 8; 602 mop = size | align; 603 tmp = tcg_temp_new_i32(); 604 addr = tcg_temp_new_i32(); 605 load_reg_var(s, addr, a->rn); 606 for (reg = 0; reg < nregs; reg++) { 607 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop); 608 if ((vd & 1) && vec_size == 16) { 609 /* 610 * We cannot write 16 bytes at once because the 611 * destination is unaligned. 612 */ 613 tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd), 614 8, 8, tmp); 615 tcg_gen_gvec_mov(0, neon_full_reg_offset(vd + 1), 616 neon_full_reg_offset(vd), 8, 8); 617 } else { 618 tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd), 619 vec_size, vec_size, tmp); 620 } 621 tcg_gen_addi_i32(addr, addr, 1 << size); 622 vd += stride; 623 624 /* Subsequent memory operations inherit alignment */ 625 mop &= ~MO_AMASK; 626 } 627 628 gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs); 629 630 return true; 631 } 632 633 static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a) 634 { 635 /* Neon load/store single structure to one lane */ 636 int reg; 637 int nregs = a->n + 1; 638 int vd = a->vd; 639 TCGv_i32 addr, tmp; 640 MemOp mop; 641 642 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 643 return false; 644 } 645 646 /* UNDEF accesses to D16-D31 if they don't exist */ 647 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 648 return false; 649 } 650 651 /* Catch the UNDEF cases. This is unavoidably a bit messy. */ 652 switch (nregs) { 653 case 1: 654 if (a->stride != 1) { 655 return false; 656 } 657 if (((a->align & (1 << a->size)) != 0) || 658 (a->size == 2 && (a->align == 1 || a->align == 2))) { 659 return false; 660 } 661 break; 662 case 2: 663 if (a->size == 2 && (a->align & 2) != 0) { 664 return false; 665 } 666 break; 667 case 3: 668 if (a->align != 0) { 669 return false; 670 } 671 break; 672 case 4: 673 if (a->size == 2 && a->align == 3) { 674 return false; 675 } 676 break; 677 default: 678 g_assert_not_reached(); 679 } 680 if ((vd + a->stride * (nregs - 1)) > 31) { 681 /* 682 * Attempts to write off the end of the register file are 683 * UNPREDICTABLE; we choose to UNDEF because otherwise we would 684 * access off the end of the array that holds the register data. 685 */ 686 return false; 687 } 688 689 if (!vfp_access_check(s)) { 690 return true; 691 } 692 693 /* Pick up SCTLR settings */ 694 mop = finalize_memop(s, a->size); 695 696 if (a->align) { 697 MemOp align_op; 698 699 switch (nregs) { 700 case 1: 701 /* For VLD1, use natural alignment. */ 702 align_op = MO_ALIGN; 703 break; 704 case 2: 705 /* For VLD2, use double alignment. */ 706 align_op = pow2_align(a->size + 1); 707 break; 708 case 4: 709 if (a->size == MO_32) { 710 /* 711 * For VLD4.32, align = 1 is double alignment, align = 2 is 712 * quad alignment; align = 3 is rejected above. 713 */ 714 align_op = pow2_align(a->size + a->align); 715 } else { 716 /* For VLD4.8 and VLD.16, we want quad alignment. */ 717 align_op = pow2_align(a->size + 2); 718 } 719 break; 720 default: 721 /* For VLD3, the alignment field is zero and rejected above. */ 722 g_assert_not_reached(); 723 } 724 725 mop = (mop & ~MO_AMASK) | align_op; 726 } 727 728 tmp = tcg_temp_new_i32(); 729 addr = tcg_temp_new_i32(); 730 load_reg_var(s, addr, a->rn); 731 732 for (reg = 0; reg < nregs; reg++) { 733 if (a->l) { 734 gen_aa32_ld_internal_i32(s, tmp, addr, get_mem_index(s), mop); 735 neon_store_element(vd, a->reg_idx, a->size, tmp); 736 } else { /* Store */ 737 neon_load_element(tmp, vd, a->reg_idx, a->size); 738 gen_aa32_st_internal_i32(s, tmp, addr, get_mem_index(s), mop); 739 } 740 vd += a->stride; 741 tcg_gen_addi_i32(addr, addr, 1 << a->size); 742 743 /* Subsequent memory operations inherit alignment */ 744 mop &= ~MO_AMASK; 745 } 746 747 gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs); 748 749 return true; 750 } 751 752 static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn) 753 { 754 int vec_size = a->q ? 16 : 8; 755 int rd_ofs = neon_full_reg_offset(a->vd); 756 int rn_ofs = neon_full_reg_offset(a->vn); 757 int rm_ofs = neon_full_reg_offset(a->vm); 758 759 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 760 return false; 761 } 762 763 /* UNDEF accesses to D16-D31 if they don't exist. */ 764 if (!dc_isar_feature(aa32_simd_r32, s) && 765 ((a->vd | a->vn | a->vm) & 0x10)) { 766 return false; 767 } 768 769 if ((a->vn | a->vm | a->vd) & a->q) { 770 return false; 771 } 772 773 if (!vfp_access_check(s)) { 774 return true; 775 } 776 777 fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); 778 return true; 779 } 780 781 #define DO_3SAME(INSN, FUNC) \ 782 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 783 { \ 784 return do_3same(s, a, FUNC); \ 785 } 786 787 DO_3SAME(VADD, tcg_gen_gvec_add) 788 DO_3SAME(VSUB, tcg_gen_gvec_sub) 789 DO_3SAME(VAND, tcg_gen_gvec_and) 790 DO_3SAME(VBIC, tcg_gen_gvec_andc) 791 DO_3SAME(VORR, tcg_gen_gvec_or) 792 DO_3SAME(VORN, tcg_gen_gvec_orc) 793 DO_3SAME(VEOR, tcg_gen_gvec_xor) 794 DO_3SAME(VSHL_S, gen_gvec_sshl) 795 DO_3SAME(VSHL_U, gen_gvec_ushl) 796 DO_3SAME(VQADD_S, gen_gvec_sqadd_qc) 797 DO_3SAME(VQADD_U, gen_gvec_uqadd_qc) 798 DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc) 799 DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc) 800 801 /* These insns are all gvec_bitsel but with the inputs in various orders. */ 802 #define DO_3SAME_BITSEL(INSN, O1, O2, O3) \ 803 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 804 uint32_t rn_ofs, uint32_t rm_ofs, \ 805 uint32_t oprsz, uint32_t maxsz) \ 806 { \ 807 tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz); \ 808 } \ 809 DO_3SAME(INSN, gen_##INSN##_3s) 810 811 DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs) 812 DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs) 813 DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs) 814 815 #define DO_3SAME_NO_SZ_3(INSN, FUNC) \ 816 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 817 { \ 818 if (a->size == 3) { \ 819 return false; \ 820 } \ 821 return do_3same(s, a, FUNC); \ 822 } 823 824 DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax) 825 DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax) 826 DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin) 827 DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin) 828 DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul) 829 DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla) 830 DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls) 831 DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst) 832 DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd) 833 DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba) 834 DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd) 835 DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba) 836 837 #define DO_3SAME_CMP(INSN, COND) \ 838 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 839 uint32_t rn_ofs, uint32_t rm_ofs, \ 840 uint32_t oprsz, uint32_t maxsz) \ 841 { \ 842 tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \ 843 } \ 844 DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s) 845 846 DO_3SAME_CMP(VCGT_S, TCG_COND_GT) 847 DO_3SAME_CMP(VCGT_U, TCG_COND_GTU) 848 DO_3SAME_CMP(VCGE_S, TCG_COND_GE) 849 DO_3SAME_CMP(VCGE_U, TCG_COND_GEU) 850 DO_3SAME_CMP(VCEQ, TCG_COND_EQ) 851 852 #define WRAP_OOL_FN(WRAPNAME, FUNC) \ 853 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, \ 854 uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) \ 855 { \ 856 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \ 857 } 858 859 WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b) 860 861 static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a) 862 { 863 if (a->size != 0) { 864 return false; 865 } 866 return do_3same(s, a, gen_VMUL_p_3s); 867 } 868 869 #define DO_VQRDMLAH(INSN, FUNC) \ 870 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 871 { \ 872 if (!dc_isar_feature(aa32_rdm, s)) { \ 873 return false; \ 874 } \ 875 if (a->size != 1 && a->size != 2) { \ 876 return false; \ 877 } \ 878 return do_3same(s, a, FUNC); \ 879 } 880 881 DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc) 882 DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc) 883 884 #define DO_SHA1(NAME, FUNC) \ 885 WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \ 886 static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \ 887 { \ 888 if (!dc_isar_feature(aa32_sha1, s)) { \ 889 return false; \ 890 } \ 891 return do_3same(s, a, gen_##NAME##_3s); \ 892 } 893 894 DO_SHA1(SHA1C, gen_helper_crypto_sha1c) 895 DO_SHA1(SHA1P, gen_helper_crypto_sha1p) 896 DO_SHA1(SHA1M, gen_helper_crypto_sha1m) 897 DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0) 898 899 #define DO_SHA2(NAME, FUNC) \ 900 WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \ 901 static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \ 902 { \ 903 if (!dc_isar_feature(aa32_sha2, s)) { \ 904 return false; \ 905 } \ 906 return do_3same(s, a, gen_##NAME##_3s); \ 907 } 908 909 DO_SHA2(SHA256H, gen_helper_crypto_sha256h) 910 DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2) 911 DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1) 912 913 #define DO_3SAME_64(INSN, FUNC) \ 914 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 915 uint32_t rn_ofs, uint32_t rm_ofs, \ 916 uint32_t oprsz, uint32_t maxsz) \ 917 { \ 918 static const GVecGen3 op = { .fni8 = FUNC }; \ 919 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op); \ 920 } \ 921 DO_3SAME(INSN, gen_##INSN##_3s) 922 923 #define DO_3SAME_64_ENV(INSN, FUNC) \ 924 static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \ 925 { \ 926 FUNC(d, cpu_env, n, m); \ 927 } \ 928 DO_3SAME_64(INSN, gen_##INSN##_elt) 929 930 DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64) 931 DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64) 932 DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64) 933 DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64) 934 DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64) 935 DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64) 936 937 #define DO_3SAME_32(INSN, FUNC) \ 938 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 939 uint32_t rn_ofs, uint32_t rm_ofs, \ 940 uint32_t oprsz, uint32_t maxsz) \ 941 { \ 942 static const GVecGen3 ops[4] = { \ 943 { .fni4 = gen_helper_neon_##FUNC##8 }, \ 944 { .fni4 = gen_helper_neon_##FUNC##16 }, \ 945 { .fni4 = gen_helper_neon_##FUNC##32 }, \ 946 { 0 }, \ 947 }; \ 948 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \ 949 } \ 950 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 951 { \ 952 if (a->size > 2) { \ 953 return false; \ 954 } \ 955 return do_3same(s, a, gen_##INSN##_3s); \ 956 } 957 958 /* 959 * Some helper functions need to be passed the cpu_env. In order 960 * to use those with the gvec APIs like tcg_gen_gvec_3() we need 961 * to create wrapper functions whose prototype is a NeonGenTwoOpFn() 962 * and which call a NeonGenTwoOpEnvFn(). 963 */ 964 #define WRAP_ENV_FN(WRAPNAME, FUNC) \ 965 static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m) \ 966 { \ 967 FUNC(d, cpu_env, n, m); \ 968 } 969 970 #define DO_3SAME_32_ENV(INSN, FUNC) \ 971 WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8); \ 972 WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16); \ 973 WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32); \ 974 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 975 uint32_t rn_ofs, uint32_t rm_ofs, \ 976 uint32_t oprsz, uint32_t maxsz) \ 977 { \ 978 static const GVecGen3 ops[4] = { \ 979 { .fni4 = gen_##INSN##_tramp8 }, \ 980 { .fni4 = gen_##INSN##_tramp16 }, \ 981 { .fni4 = gen_##INSN##_tramp32 }, \ 982 { 0 }, \ 983 }; \ 984 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \ 985 } \ 986 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 987 { \ 988 if (a->size > 2) { \ 989 return false; \ 990 } \ 991 return do_3same(s, a, gen_##INSN##_3s); \ 992 } 993 994 DO_3SAME_32(VHADD_S, hadd_s) 995 DO_3SAME_32(VHADD_U, hadd_u) 996 DO_3SAME_32(VHSUB_S, hsub_s) 997 DO_3SAME_32(VHSUB_U, hsub_u) 998 DO_3SAME_32(VRHADD_S, rhadd_s) 999 DO_3SAME_32(VRHADD_U, rhadd_u) 1000 DO_3SAME_32(VRSHL_S, rshl_s) 1001 DO_3SAME_32(VRSHL_U, rshl_u) 1002 1003 DO_3SAME_32_ENV(VQSHL_S, qshl_s) 1004 DO_3SAME_32_ENV(VQSHL_U, qshl_u) 1005 DO_3SAME_32_ENV(VQRSHL_S, qrshl_s) 1006 DO_3SAME_32_ENV(VQRSHL_U, qrshl_u) 1007 1008 static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn) 1009 { 1010 /* Operations handled pairwise 32 bits at a time */ 1011 TCGv_i32 tmp, tmp2, tmp3; 1012 1013 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1014 return false; 1015 } 1016 1017 /* UNDEF accesses to D16-D31 if they don't exist. */ 1018 if (!dc_isar_feature(aa32_simd_r32, s) && 1019 ((a->vd | a->vn | a->vm) & 0x10)) { 1020 return false; 1021 } 1022 1023 if (a->size == 3) { 1024 return false; 1025 } 1026 1027 if (!vfp_access_check(s)) { 1028 return true; 1029 } 1030 1031 assert(a->q == 0); /* enforced by decode patterns */ 1032 1033 /* 1034 * Note that we have to be careful not to clobber the source operands 1035 * in the "vm == vd" case by storing the result of the first pass too 1036 * early. Since Q is 0 there are always just two passes, so instead 1037 * of a complicated loop over each pass we just unroll. 1038 */ 1039 tmp = tcg_temp_new_i32(); 1040 tmp2 = tcg_temp_new_i32(); 1041 tmp3 = tcg_temp_new_i32(); 1042 1043 read_neon_element32(tmp, a->vn, 0, MO_32); 1044 read_neon_element32(tmp2, a->vn, 1, MO_32); 1045 fn(tmp, tmp, tmp2); 1046 1047 read_neon_element32(tmp3, a->vm, 0, MO_32); 1048 read_neon_element32(tmp2, a->vm, 1, MO_32); 1049 fn(tmp3, tmp3, tmp2); 1050 1051 write_neon_element32(tmp, a->vd, 0, MO_32); 1052 write_neon_element32(tmp3, a->vd, 1, MO_32); 1053 1054 return true; 1055 } 1056 1057 #define DO_3SAME_PAIR(INSN, func) \ 1058 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 1059 { \ 1060 static NeonGenTwoOpFn * const fns[] = { \ 1061 gen_helper_neon_##func##8, \ 1062 gen_helper_neon_##func##16, \ 1063 gen_helper_neon_##func##32, \ 1064 }; \ 1065 if (a->size > 2) { \ 1066 return false; \ 1067 } \ 1068 return do_3same_pair(s, a, fns[a->size]); \ 1069 } 1070 1071 /* 32-bit pairwise ops end up the same as the elementwise versions. */ 1072 #define gen_helper_neon_pmax_s32 tcg_gen_smax_i32 1073 #define gen_helper_neon_pmax_u32 tcg_gen_umax_i32 1074 #define gen_helper_neon_pmin_s32 tcg_gen_smin_i32 1075 #define gen_helper_neon_pmin_u32 tcg_gen_umin_i32 1076 #define gen_helper_neon_padd_u32 tcg_gen_add_i32 1077 1078 DO_3SAME_PAIR(VPMAX_S, pmax_s) 1079 DO_3SAME_PAIR(VPMIN_S, pmin_s) 1080 DO_3SAME_PAIR(VPMAX_U, pmax_u) 1081 DO_3SAME_PAIR(VPMIN_U, pmin_u) 1082 DO_3SAME_PAIR(VPADD, padd_u) 1083 1084 #define DO_3SAME_VQDMULH(INSN, FUNC) \ 1085 WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \ 1086 WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32); \ 1087 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 1088 uint32_t rn_ofs, uint32_t rm_ofs, \ 1089 uint32_t oprsz, uint32_t maxsz) \ 1090 { \ 1091 static const GVecGen3 ops[2] = { \ 1092 { .fni4 = gen_##INSN##_tramp16 }, \ 1093 { .fni4 = gen_##INSN##_tramp32 }, \ 1094 }; \ 1095 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \ 1096 } \ 1097 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 1098 { \ 1099 if (a->size != 1 && a->size != 2) { \ 1100 return false; \ 1101 } \ 1102 return do_3same(s, a, gen_##INSN##_3s); \ 1103 } 1104 1105 DO_3SAME_VQDMULH(VQDMULH, qdmulh) 1106 DO_3SAME_VQDMULH(VQRDMULH, qrdmulh) 1107 1108 #define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC) \ 1109 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \ 1110 uint32_t rn_ofs, uint32_t rm_ofs, \ 1111 uint32_t oprsz, uint32_t maxsz) \ 1112 { \ 1113 TCGv_ptr fpst = fpstatus_ptr(FPST); \ 1114 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst, \ 1115 oprsz, maxsz, 0, FUNC); \ 1116 } 1117 1118 #define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC) \ 1119 WRAP_FP_GVEC(gen_##INSN##_fp32_3s, FPST_STD, SFUNC) \ 1120 WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC) \ 1121 static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \ 1122 { \ 1123 if (a->size == MO_16) { \ 1124 if (!dc_isar_feature(aa32_fp16_arith, s)) { \ 1125 return false; \ 1126 } \ 1127 return do_3same(s, a, gen_##INSN##_fp16_3s); \ 1128 } \ 1129 return do_3same(s, a, gen_##INSN##_fp32_3s); \ 1130 } 1131 1132 1133 DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_h) 1134 DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_h) 1135 DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_h) 1136 DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_h) 1137 DO_3S_FP_GVEC(VCEQ, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_h) 1138 DO_3S_FP_GVEC(VCGE, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_h) 1139 DO_3S_FP_GVEC(VCGT, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_h) 1140 DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h) 1141 DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h) 1142 DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h) 1143 DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h) 1144 DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h) 1145 DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h) 1146 DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h) 1147 DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h) 1148 DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h) 1149 DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h) 1150 1151 WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s) 1152 WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h) 1153 WRAP_FP_GVEC(gen_VMINNM_fp32_3s, FPST_STD, gen_helper_gvec_fminnum_s) 1154 WRAP_FP_GVEC(gen_VMINNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fminnum_h) 1155 1156 static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a) 1157 { 1158 if (!arm_dc_feature(s, ARM_FEATURE_V8)) { 1159 return false; 1160 } 1161 1162 if (a->size == MO_16) { 1163 if (!dc_isar_feature(aa32_fp16_arith, s)) { 1164 return false; 1165 } 1166 return do_3same(s, a, gen_VMAXNM_fp16_3s); 1167 } 1168 return do_3same(s, a, gen_VMAXNM_fp32_3s); 1169 } 1170 1171 static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a) 1172 { 1173 if (!arm_dc_feature(s, ARM_FEATURE_V8)) { 1174 return false; 1175 } 1176 1177 if (a->size == MO_16) { 1178 if (!dc_isar_feature(aa32_fp16_arith, s)) { 1179 return false; 1180 } 1181 return do_3same(s, a, gen_VMINNM_fp16_3s); 1182 } 1183 return do_3same(s, a, gen_VMINNM_fp32_3s); 1184 } 1185 1186 static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, 1187 gen_helper_gvec_3_ptr *fn) 1188 { 1189 /* FP pairwise operations */ 1190 TCGv_ptr fpstatus; 1191 1192 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1193 return false; 1194 } 1195 1196 /* UNDEF accesses to D16-D31 if they don't exist. */ 1197 if (!dc_isar_feature(aa32_simd_r32, s) && 1198 ((a->vd | a->vn | a->vm) & 0x10)) { 1199 return false; 1200 } 1201 1202 if (!vfp_access_check(s)) { 1203 return true; 1204 } 1205 1206 assert(a->q == 0); /* enforced by decode patterns */ 1207 1208 1209 fpstatus = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD); 1210 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), 1211 vfp_reg_offset(1, a->vn), 1212 vfp_reg_offset(1, a->vm), 1213 fpstatus, 8, 8, 0, fn); 1214 1215 return true; 1216 } 1217 1218 /* 1219 * For all the functions using this macro, size == 1 means fp16, 1220 * which is an architecture extension we don't implement yet. 1221 */ 1222 #define DO_3S_FP_PAIR(INSN,FUNC) \ 1223 static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \ 1224 { \ 1225 if (a->size == MO_16) { \ 1226 if (!dc_isar_feature(aa32_fp16_arith, s)) { \ 1227 return false; \ 1228 } \ 1229 return do_3same_fp_pair(s, a, FUNC##h); \ 1230 } \ 1231 return do_3same_fp_pair(s, a, FUNC##s); \ 1232 } 1233 1234 DO_3S_FP_PAIR(VPADD, gen_helper_neon_padd) 1235 DO_3S_FP_PAIR(VPMAX, gen_helper_neon_pmax) 1236 DO_3S_FP_PAIR(VPMIN, gen_helper_neon_pmin) 1237 1238 static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn) 1239 { 1240 /* Handle a 2-reg-shift insn which can be vectorized. */ 1241 int vec_size = a->q ? 16 : 8; 1242 int rd_ofs = neon_full_reg_offset(a->vd); 1243 int rm_ofs = neon_full_reg_offset(a->vm); 1244 1245 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1246 return false; 1247 } 1248 1249 /* UNDEF accesses to D16-D31 if they don't exist. */ 1250 if (!dc_isar_feature(aa32_simd_r32, s) && 1251 ((a->vd | a->vm) & 0x10)) { 1252 return false; 1253 } 1254 1255 if ((a->vm | a->vd) & a->q) { 1256 return false; 1257 } 1258 1259 if (!vfp_access_check(s)) { 1260 return true; 1261 } 1262 1263 fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size); 1264 return true; 1265 } 1266 1267 #define DO_2SH(INSN, FUNC) \ 1268 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1269 { \ 1270 return do_vector_2sh(s, a, FUNC); \ 1271 } \ 1272 1273 DO_2SH(VSHL, tcg_gen_gvec_shli) 1274 DO_2SH(VSLI, gen_gvec_sli) 1275 DO_2SH(VSRI, gen_gvec_sri) 1276 DO_2SH(VSRA_S, gen_gvec_ssra) 1277 DO_2SH(VSRA_U, gen_gvec_usra) 1278 DO_2SH(VRSHR_S, gen_gvec_srshr) 1279 DO_2SH(VRSHR_U, gen_gvec_urshr) 1280 DO_2SH(VRSRA_S, gen_gvec_srsra) 1281 DO_2SH(VRSRA_U, gen_gvec_ursra) 1282 1283 static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a) 1284 { 1285 /* Signed shift out of range results in all-sign-bits */ 1286 a->shift = MIN(a->shift, (8 << a->size) - 1); 1287 return do_vector_2sh(s, a, tcg_gen_gvec_sari); 1288 } 1289 1290 static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 1291 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1292 { 1293 tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0); 1294 } 1295 1296 static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a) 1297 { 1298 /* Shift out of range is architecturally valid and results in zero. */ 1299 if (a->shift >= (8 << a->size)) { 1300 return do_vector_2sh(s, a, gen_zero_rd_2sh); 1301 } else { 1302 return do_vector_2sh(s, a, tcg_gen_gvec_shri); 1303 } 1304 } 1305 1306 static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a, 1307 NeonGenTwo64OpEnvFn *fn) 1308 { 1309 /* 1310 * 2-reg-and-shift operations, size == 3 case, where the 1311 * function needs to be passed cpu_env. 1312 */ 1313 TCGv_i64 constimm; 1314 int pass; 1315 1316 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1317 return false; 1318 } 1319 1320 /* UNDEF accesses to D16-D31 if they don't exist. */ 1321 if (!dc_isar_feature(aa32_simd_r32, s) && 1322 ((a->vd | a->vm) & 0x10)) { 1323 return false; 1324 } 1325 1326 if ((a->vm | a->vd) & a->q) { 1327 return false; 1328 } 1329 1330 if (!vfp_access_check(s)) { 1331 return true; 1332 } 1333 1334 /* 1335 * To avoid excessive duplication of ops we implement shift 1336 * by immediate using the variable shift operations. 1337 */ 1338 constimm = tcg_constant_i64(dup_const(a->size, a->shift)); 1339 1340 for (pass = 0; pass < a->q + 1; pass++) { 1341 TCGv_i64 tmp = tcg_temp_new_i64(); 1342 1343 read_neon_element64(tmp, a->vm, pass, MO_64); 1344 fn(tmp, cpu_env, tmp, constimm); 1345 write_neon_element64(tmp, a->vd, pass, MO_64); 1346 } 1347 return true; 1348 } 1349 1350 static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a, 1351 NeonGenTwoOpEnvFn *fn) 1352 { 1353 /* 1354 * 2-reg-and-shift operations, size < 3 case, where the 1355 * helper needs to be passed cpu_env. 1356 */ 1357 TCGv_i32 constimm, tmp; 1358 int pass; 1359 1360 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1361 return false; 1362 } 1363 1364 /* UNDEF accesses to D16-D31 if they don't exist. */ 1365 if (!dc_isar_feature(aa32_simd_r32, s) && 1366 ((a->vd | a->vm) & 0x10)) { 1367 return false; 1368 } 1369 1370 if ((a->vm | a->vd) & a->q) { 1371 return false; 1372 } 1373 1374 if (!vfp_access_check(s)) { 1375 return true; 1376 } 1377 1378 /* 1379 * To avoid excessive duplication of ops we implement shift 1380 * by immediate using the variable shift operations. 1381 */ 1382 constimm = tcg_constant_i32(dup_const(a->size, a->shift)); 1383 tmp = tcg_temp_new_i32(); 1384 1385 for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 1386 read_neon_element32(tmp, a->vm, pass, MO_32); 1387 fn(tmp, cpu_env, tmp, constimm); 1388 write_neon_element32(tmp, a->vd, pass, MO_32); 1389 } 1390 return true; 1391 } 1392 1393 #define DO_2SHIFT_ENV(INSN, FUNC) \ 1394 static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \ 1395 { \ 1396 return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64); \ 1397 } \ 1398 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1399 { \ 1400 static NeonGenTwoOpEnvFn * const fns[] = { \ 1401 gen_helper_neon_##FUNC##8, \ 1402 gen_helper_neon_##FUNC##16, \ 1403 gen_helper_neon_##FUNC##32, \ 1404 }; \ 1405 assert(a->size < ARRAY_SIZE(fns)); \ 1406 return do_2shift_env_32(s, a, fns[a->size]); \ 1407 } 1408 1409 DO_2SHIFT_ENV(VQSHLU, qshlu_s) 1410 DO_2SHIFT_ENV(VQSHL_U, qshl_u) 1411 DO_2SHIFT_ENV(VQSHL_S, qshl_s) 1412 1413 static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a, 1414 NeonGenTwo64OpFn *shiftfn, 1415 NeonGenNarrowEnvFn *narrowfn) 1416 { 1417 /* 2-reg-and-shift narrowing-shift operations, size == 3 case */ 1418 TCGv_i64 constimm, rm1, rm2; 1419 TCGv_i32 rd; 1420 1421 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1422 return false; 1423 } 1424 1425 /* UNDEF accesses to D16-D31 if they don't exist. */ 1426 if (!dc_isar_feature(aa32_simd_r32, s) && 1427 ((a->vd | a->vm) & 0x10)) { 1428 return false; 1429 } 1430 1431 if (a->vm & 1) { 1432 return false; 1433 } 1434 1435 if (!vfp_access_check(s)) { 1436 return true; 1437 } 1438 1439 /* 1440 * This is always a right shift, and the shiftfn is always a 1441 * left-shift helper, which thus needs the negated shift count. 1442 */ 1443 constimm = tcg_constant_i64(-a->shift); 1444 rm1 = tcg_temp_new_i64(); 1445 rm2 = tcg_temp_new_i64(); 1446 rd = tcg_temp_new_i32(); 1447 1448 /* Load both inputs first to avoid potential overwrite if rm == rd */ 1449 read_neon_element64(rm1, a->vm, 0, MO_64); 1450 read_neon_element64(rm2, a->vm, 1, MO_64); 1451 1452 shiftfn(rm1, rm1, constimm); 1453 narrowfn(rd, cpu_env, rm1); 1454 write_neon_element32(rd, a->vd, 0, MO_32); 1455 1456 shiftfn(rm2, rm2, constimm); 1457 narrowfn(rd, cpu_env, rm2); 1458 write_neon_element32(rd, a->vd, 1, MO_32); 1459 1460 return true; 1461 } 1462 1463 static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a, 1464 NeonGenTwoOpFn *shiftfn, 1465 NeonGenNarrowEnvFn *narrowfn) 1466 { 1467 /* 2-reg-and-shift narrowing-shift operations, size < 3 case */ 1468 TCGv_i32 constimm, rm1, rm2, rm3, rm4; 1469 TCGv_i64 rtmp; 1470 uint32_t imm; 1471 1472 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1473 return false; 1474 } 1475 1476 /* UNDEF accesses to D16-D31 if they don't exist. */ 1477 if (!dc_isar_feature(aa32_simd_r32, s) && 1478 ((a->vd | a->vm) & 0x10)) { 1479 return false; 1480 } 1481 1482 if (a->vm & 1) { 1483 return false; 1484 } 1485 1486 if (!vfp_access_check(s)) { 1487 return true; 1488 } 1489 1490 /* 1491 * This is always a right shift, and the shiftfn is always a 1492 * left-shift helper, which thus needs the negated shift count 1493 * duplicated into each lane of the immediate value. 1494 */ 1495 if (a->size == 1) { 1496 imm = (uint16_t)(-a->shift); 1497 imm |= imm << 16; 1498 } else { 1499 /* size == 2 */ 1500 imm = -a->shift; 1501 } 1502 constimm = tcg_constant_i32(imm); 1503 1504 /* Load all inputs first to avoid potential overwrite */ 1505 rm1 = tcg_temp_new_i32(); 1506 rm2 = tcg_temp_new_i32(); 1507 rm3 = tcg_temp_new_i32(); 1508 rm4 = tcg_temp_new_i32(); 1509 read_neon_element32(rm1, a->vm, 0, MO_32); 1510 read_neon_element32(rm2, a->vm, 1, MO_32); 1511 read_neon_element32(rm3, a->vm, 2, MO_32); 1512 read_neon_element32(rm4, a->vm, 3, MO_32); 1513 rtmp = tcg_temp_new_i64(); 1514 1515 shiftfn(rm1, rm1, constimm); 1516 shiftfn(rm2, rm2, constimm); 1517 1518 tcg_gen_concat_i32_i64(rtmp, rm1, rm2); 1519 1520 narrowfn(rm1, cpu_env, rtmp); 1521 write_neon_element32(rm1, a->vd, 0, MO_32); 1522 1523 shiftfn(rm3, rm3, constimm); 1524 shiftfn(rm4, rm4, constimm); 1525 1526 tcg_gen_concat_i32_i64(rtmp, rm3, rm4); 1527 1528 narrowfn(rm3, cpu_env, rtmp); 1529 write_neon_element32(rm3, a->vd, 1, MO_32); 1530 return true; 1531 } 1532 1533 #define DO_2SN_64(INSN, FUNC, NARROWFUNC) \ 1534 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1535 { \ 1536 return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC); \ 1537 } 1538 #define DO_2SN_32(INSN, FUNC, NARROWFUNC) \ 1539 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1540 { \ 1541 return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC); \ 1542 } 1543 1544 static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) 1545 { 1546 tcg_gen_extrl_i64_i32(dest, src); 1547 } 1548 1549 static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) 1550 { 1551 gen_helper_neon_narrow_u16(dest, src); 1552 } 1553 1554 static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) 1555 { 1556 gen_helper_neon_narrow_u8(dest, src); 1557 } 1558 1559 DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32) 1560 DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16) 1561 DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8) 1562 1563 DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32) 1564 DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16) 1565 DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8) 1566 1567 DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32) 1568 DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16) 1569 DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8) 1570 1571 DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32) 1572 DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16) 1573 DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8) 1574 DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32) 1575 DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16) 1576 DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8) 1577 1578 DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32) 1579 DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16) 1580 DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8) 1581 1582 DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32) 1583 DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16) 1584 DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8) 1585 1586 DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32) 1587 DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16) 1588 DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8) 1589 1590 static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a, 1591 NeonGenWidenFn *widenfn, bool u) 1592 { 1593 TCGv_i64 tmp; 1594 TCGv_i32 rm0, rm1; 1595 uint64_t widen_mask = 0; 1596 1597 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1598 return false; 1599 } 1600 1601 /* UNDEF accesses to D16-D31 if they don't exist. */ 1602 if (!dc_isar_feature(aa32_simd_r32, s) && 1603 ((a->vd | a->vm) & 0x10)) { 1604 return false; 1605 } 1606 1607 if (a->vd & 1) { 1608 return false; 1609 } 1610 1611 if (!vfp_access_check(s)) { 1612 return true; 1613 } 1614 1615 /* 1616 * This is a widen-and-shift operation. The shift is always less 1617 * than the width of the source type, so after widening the input 1618 * vector we can simply shift the whole 64-bit widened register, 1619 * and then clear the potential overflow bits resulting from left 1620 * bits of the narrow input appearing as right bits of the left 1621 * neighbour narrow input. Calculate a mask of bits to clear. 1622 */ 1623 if ((a->shift != 0) && (a->size < 2 || u)) { 1624 int esize = 8 << a->size; 1625 widen_mask = MAKE_64BIT_MASK(0, esize); 1626 widen_mask >>= esize - a->shift; 1627 widen_mask = dup_const(a->size + 1, widen_mask); 1628 } 1629 1630 rm0 = tcg_temp_new_i32(); 1631 rm1 = tcg_temp_new_i32(); 1632 read_neon_element32(rm0, a->vm, 0, MO_32); 1633 read_neon_element32(rm1, a->vm, 1, MO_32); 1634 tmp = tcg_temp_new_i64(); 1635 1636 widenfn(tmp, rm0); 1637 if (a->shift != 0) { 1638 tcg_gen_shli_i64(tmp, tmp, a->shift); 1639 tcg_gen_andi_i64(tmp, tmp, ~widen_mask); 1640 } 1641 write_neon_element64(tmp, a->vd, 0, MO_64); 1642 1643 widenfn(tmp, rm1); 1644 if (a->shift != 0) { 1645 tcg_gen_shli_i64(tmp, tmp, a->shift); 1646 tcg_gen_andi_i64(tmp, tmp, ~widen_mask); 1647 } 1648 write_neon_element64(tmp, a->vd, 1, MO_64); 1649 return true; 1650 } 1651 1652 static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a) 1653 { 1654 static NeonGenWidenFn * const widenfn[] = { 1655 gen_helper_neon_widen_s8, 1656 gen_helper_neon_widen_s16, 1657 tcg_gen_ext_i32_i64, 1658 }; 1659 return do_vshll_2sh(s, a, widenfn[a->size], false); 1660 } 1661 1662 static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a) 1663 { 1664 static NeonGenWidenFn * const widenfn[] = { 1665 gen_helper_neon_widen_u8, 1666 gen_helper_neon_widen_u16, 1667 tcg_gen_extu_i32_i64, 1668 }; 1669 return do_vshll_2sh(s, a, widenfn[a->size], true); 1670 } 1671 1672 static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a, 1673 gen_helper_gvec_2_ptr *fn) 1674 { 1675 /* FP operations in 2-reg-and-shift group */ 1676 int vec_size = a->q ? 16 : 8; 1677 int rd_ofs = neon_full_reg_offset(a->vd); 1678 int rm_ofs = neon_full_reg_offset(a->vm); 1679 TCGv_ptr fpst; 1680 1681 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1682 return false; 1683 } 1684 1685 if (a->size == MO_16) { 1686 if (!dc_isar_feature(aa32_fp16_arith, s)) { 1687 return false; 1688 } 1689 } 1690 1691 /* UNDEF accesses to D16-D31 if they don't exist. */ 1692 if (!dc_isar_feature(aa32_simd_r32, s) && 1693 ((a->vd | a->vm) & 0x10)) { 1694 return false; 1695 } 1696 1697 if ((a->vm | a->vd) & a->q) { 1698 return false; 1699 } 1700 1701 if (!vfp_access_check(s)) { 1702 return true; 1703 } 1704 1705 fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD); 1706 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn); 1707 return true; 1708 } 1709 1710 #define DO_FP_2SH(INSN, FUNC) \ 1711 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1712 { \ 1713 return do_fp_2sh(s, a, FUNC); \ 1714 } 1715 1716 DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf) 1717 DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf) 1718 DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs) 1719 DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu) 1720 1721 DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh) 1722 DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh) 1723 DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs) 1724 DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu) 1725 1726 static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a, 1727 GVecGen2iFn *fn) 1728 { 1729 uint64_t imm; 1730 int reg_ofs, vec_size; 1731 1732 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1733 return false; 1734 } 1735 1736 /* UNDEF accesses to D16-D31 if they don't exist. */ 1737 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 1738 return false; 1739 } 1740 1741 if (a->vd & a->q) { 1742 return false; 1743 } 1744 1745 if (!vfp_access_check(s)) { 1746 return true; 1747 } 1748 1749 reg_ofs = neon_full_reg_offset(a->vd); 1750 vec_size = a->q ? 16 : 8; 1751 imm = asimd_imm_const(a->imm, a->cmode, a->op); 1752 1753 fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size); 1754 return true; 1755 } 1756 1757 static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs, 1758 int64_t c, uint32_t oprsz, uint32_t maxsz) 1759 { 1760 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c); 1761 } 1762 1763 static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a) 1764 { 1765 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ 1766 GVecGen2iFn *fn; 1767 1768 if ((a->cmode & 1) && a->cmode < 12) { 1769 /* for op=1, the imm will be inverted, so BIC becomes AND. */ 1770 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori; 1771 } else { 1772 /* There is one unallocated cmode/op combination in this space */ 1773 if (a->cmode == 15 && a->op == 1) { 1774 return false; 1775 } 1776 fn = gen_VMOV_1r; 1777 } 1778 return do_1reg_imm(s, a, fn); 1779 } 1780 1781 static bool do_prewiden_3d(DisasContext *s, arg_3diff *a, 1782 NeonGenWidenFn *widenfn, 1783 NeonGenTwo64OpFn *opfn, 1784 int src1_mop, int src2_mop) 1785 { 1786 /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */ 1787 TCGv_i64 rn0_64, rn1_64, rm_64; 1788 1789 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1790 return false; 1791 } 1792 1793 /* UNDEF accesses to D16-D31 if they don't exist. */ 1794 if (!dc_isar_feature(aa32_simd_r32, s) && 1795 ((a->vd | a->vn | a->vm) & 0x10)) { 1796 return false; 1797 } 1798 1799 if (!opfn) { 1800 /* size == 3 case, which is an entirely different insn group */ 1801 return false; 1802 } 1803 1804 if ((a->vd & 1) || (src1_mop == MO_UQ && (a->vn & 1))) { 1805 return false; 1806 } 1807 1808 if (!vfp_access_check(s)) { 1809 return true; 1810 } 1811 1812 rn0_64 = tcg_temp_new_i64(); 1813 rn1_64 = tcg_temp_new_i64(); 1814 rm_64 = tcg_temp_new_i64(); 1815 1816 if (src1_mop >= 0) { 1817 read_neon_element64(rn0_64, a->vn, 0, src1_mop); 1818 } else { 1819 TCGv_i32 tmp = tcg_temp_new_i32(); 1820 read_neon_element32(tmp, a->vn, 0, MO_32); 1821 widenfn(rn0_64, tmp); 1822 } 1823 if (src2_mop >= 0) { 1824 read_neon_element64(rm_64, a->vm, 0, src2_mop); 1825 } else { 1826 TCGv_i32 tmp = tcg_temp_new_i32(); 1827 read_neon_element32(tmp, a->vm, 0, MO_32); 1828 widenfn(rm_64, tmp); 1829 } 1830 1831 opfn(rn0_64, rn0_64, rm_64); 1832 1833 /* 1834 * Load second pass inputs before storing the first pass result, to 1835 * avoid incorrect results if a narrow input overlaps with the result. 1836 */ 1837 if (src1_mop >= 0) { 1838 read_neon_element64(rn1_64, a->vn, 1, src1_mop); 1839 } else { 1840 TCGv_i32 tmp = tcg_temp_new_i32(); 1841 read_neon_element32(tmp, a->vn, 1, MO_32); 1842 widenfn(rn1_64, tmp); 1843 } 1844 if (src2_mop >= 0) { 1845 read_neon_element64(rm_64, a->vm, 1, src2_mop); 1846 } else { 1847 TCGv_i32 tmp = tcg_temp_new_i32(); 1848 read_neon_element32(tmp, a->vm, 1, MO_32); 1849 widenfn(rm_64, tmp); 1850 } 1851 1852 write_neon_element64(rn0_64, a->vd, 0, MO_64); 1853 1854 opfn(rn1_64, rn1_64, rm_64); 1855 write_neon_element64(rn1_64, a->vd, 1, MO_64); 1856 1857 return true; 1858 } 1859 1860 #define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN) \ 1861 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ 1862 { \ 1863 static NeonGenWidenFn * const widenfn[] = { \ 1864 gen_helper_neon_widen_##S##8, \ 1865 gen_helper_neon_widen_##S##16, \ 1866 NULL, NULL, \ 1867 }; \ 1868 static NeonGenTwo64OpFn * const addfn[] = { \ 1869 gen_helper_neon_##OP##l_u16, \ 1870 gen_helper_neon_##OP##l_u32, \ 1871 tcg_gen_##OP##_i64, \ 1872 NULL, \ 1873 }; \ 1874 int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1; \ 1875 return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size], \ 1876 SRC1WIDE ? MO_UQ : narrow_mop, \ 1877 narrow_mop); \ 1878 } 1879 1880 DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN) 1881 DO_PREWIDEN(VADDL_U, u, add, false, 0) 1882 DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN) 1883 DO_PREWIDEN(VSUBL_U, u, sub, false, 0) 1884 DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN) 1885 DO_PREWIDEN(VADDW_U, u, add, true, 0) 1886 DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN) 1887 DO_PREWIDEN(VSUBW_U, u, sub, true, 0) 1888 1889 static bool do_narrow_3d(DisasContext *s, arg_3diff *a, 1890 NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn) 1891 { 1892 /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */ 1893 TCGv_i64 rn_64, rm_64; 1894 TCGv_i32 rd0, rd1; 1895 1896 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1897 return false; 1898 } 1899 1900 /* UNDEF accesses to D16-D31 if they don't exist. */ 1901 if (!dc_isar_feature(aa32_simd_r32, s) && 1902 ((a->vd | a->vn | a->vm) & 0x10)) { 1903 return false; 1904 } 1905 1906 if (!opfn || !narrowfn) { 1907 /* size == 3 case, which is an entirely different insn group */ 1908 return false; 1909 } 1910 1911 if ((a->vn | a->vm) & 1) { 1912 return false; 1913 } 1914 1915 if (!vfp_access_check(s)) { 1916 return true; 1917 } 1918 1919 rn_64 = tcg_temp_new_i64(); 1920 rm_64 = tcg_temp_new_i64(); 1921 rd0 = tcg_temp_new_i32(); 1922 rd1 = tcg_temp_new_i32(); 1923 1924 read_neon_element64(rn_64, a->vn, 0, MO_64); 1925 read_neon_element64(rm_64, a->vm, 0, MO_64); 1926 1927 opfn(rn_64, rn_64, rm_64); 1928 1929 narrowfn(rd0, rn_64); 1930 1931 read_neon_element64(rn_64, a->vn, 1, MO_64); 1932 read_neon_element64(rm_64, a->vm, 1, MO_64); 1933 1934 opfn(rn_64, rn_64, rm_64); 1935 1936 narrowfn(rd1, rn_64); 1937 1938 write_neon_element32(rd0, a->vd, 0, MO_32); 1939 write_neon_element32(rd1, a->vd, 1, MO_32); 1940 1941 return true; 1942 } 1943 1944 #define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP) \ 1945 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ 1946 { \ 1947 static NeonGenTwo64OpFn * const addfn[] = { \ 1948 gen_helper_neon_##OP##l_u16, \ 1949 gen_helper_neon_##OP##l_u32, \ 1950 tcg_gen_##OP##_i64, \ 1951 NULL, \ 1952 }; \ 1953 static NeonGenNarrowFn * const narrowfn[] = { \ 1954 gen_helper_neon_##NARROWTYPE##_high_u8, \ 1955 gen_helper_neon_##NARROWTYPE##_high_u16, \ 1956 EXTOP, \ 1957 NULL, \ 1958 }; \ 1959 return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]); \ 1960 } 1961 1962 static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn) 1963 { 1964 tcg_gen_addi_i64(rn, rn, 1u << 31); 1965 tcg_gen_extrh_i64_i32(rd, rn); 1966 } 1967 1968 DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32) 1969 DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32) 1970 DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32) 1971 DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32) 1972 1973 static bool do_long_3d(DisasContext *s, arg_3diff *a, 1974 NeonGenTwoOpWidenFn *opfn, 1975 NeonGenTwo64OpFn *accfn) 1976 { 1977 /* 1978 * 3-regs different lengths, long operations. 1979 * These perform an operation on two inputs that returns a double-width 1980 * result, and then possibly perform an accumulation operation of 1981 * that result into the double-width destination. 1982 */ 1983 TCGv_i64 rd0, rd1, tmp; 1984 TCGv_i32 rn, rm; 1985 1986 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1987 return false; 1988 } 1989 1990 /* UNDEF accesses to D16-D31 if they don't exist. */ 1991 if (!dc_isar_feature(aa32_simd_r32, s) && 1992 ((a->vd | a->vn | a->vm) & 0x10)) { 1993 return false; 1994 } 1995 1996 if (!opfn) { 1997 /* size == 3 case, which is an entirely different insn group */ 1998 return false; 1999 } 2000 2001 if (a->vd & 1) { 2002 return false; 2003 } 2004 2005 if (!vfp_access_check(s)) { 2006 return true; 2007 } 2008 2009 rd0 = tcg_temp_new_i64(); 2010 rd1 = tcg_temp_new_i64(); 2011 2012 rn = tcg_temp_new_i32(); 2013 rm = tcg_temp_new_i32(); 2014 read_neon_element32(rn, a->vn, 0, MO_32); 2015 read_neon_element32(rm, a->vm, 0, MO_32); 2016 opfn(rd0, rn, rm); 2017 2018 read_neon_element32(rn, a->vn, 1, MO_32); 2019 read_neon_element32(rm, a->vm, 1, MO_32); 2020 opfn(rd1, rn, rm); 2021 2022 /* Don't store results until after all loads: they might overlap */ 2023 if (accfn) { 2024 tmp = tcg_temp_new_i64(); 2025 read_neon_element64(tmp, a->vd, 0, MO_64); 2026 accfn(rd0, tmp, rd0); 2027 read_neon_element64(tmp, a->vd, 1, MO_64); 2028 accfn(rd1, tmp, rd1); 2029 } 2030 2031 write_neon_element64(rd0, a->vd, 0, MO_64); 2032 write_neon_element64(rd1, a->vd, 1, MO_64); 2033 2034 return true; 2035 } 2036 2037 static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a) 2038 { 2039 static NeonGenTwoOpWidenFn * const opfn[] = { 2040 gen_helper_neon_abdl_s16, 2041 gen_helper_neon_abdl_s32, 2042 gen_helper_neon_abdl_s64, 2043 NULL, 2044 }; 2045 2046 return do_long_3d(s, a, opfn[a->size], NULL); 2047 } 2048 2049 static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a) 2050 { 2051 static NeonGenTwoOpWidenFn * const opfn[] = { 2052 gen_helper_neon_abdl_u16, 2053 gen_helper_neon_abdl_u32, 2054 gen_helper_neon_abdl_u64, 2055 NULL, 2056 }; 2057 2058 return do_long_3d(s, a, opfn[a->size], NULL); 2059 } 2060 2061 static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a) 2062 { 2063 static NeonGenTwoOpWidenFn * const opfn[] = { 2064 gen_helper_neon_abdl_s16, 2065 gen_helper_neon_abdl_s32, 2066 gen_helper_neon_abdl_s64, 2067 NULL, 2068 }; 2069 static NeonGenTwo64OpFn * const addfn[] = { 2070 gen_helper_neon_addl_u16, 2071 gen_helper_neon_addl_u32, 2072 tcg_gen_add_i64, 2073 NULL, 2074 }; 2075 2076 return do_long_3d(s, a, opfn[a->size], addfn[a->size]); 2077 } 2078 2079 static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a) 2080 { 2081 static NeonGenTwoOpWidenFn * const opfn[] = { 2082 gen_helper_neon_abdl_u16, 2083 gen_helper_neon_abdl_u32, 2084 gen_helper_neon_abdl_u64, 2085 NULL, 2086 }; 2087 static NeonGenTwo64OpFn * const addfn[] = { 2088 gen_helper_neon_addl_u16, 2089 gen_helper_neon_addl_u32, 2090 tcg_gen_add_i64, 2091 NULL, 2092 }; 2093 2094 return do_long_3d(s, a, opfn[a->size], addfn[a->size]); 2095 } 2096 2097 static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) 2098 { 2099 TCGv_i32 lo = tcg_temp_new_i32(); 2100 TCGv_i32 hi = tcg_temp_new_i32(); 2101 2102 tcg_gen_muls2_i32(lo, hi, rn, rm); 2103 tcg_gen_concat_i32_i64(rd, lo, hi); 2104 } 2105 2106 static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) 2107 { 2108 TCGv_i32 lo = tcg_temp_new_i32(); 2109 TCGv_i32 hi = tcg_temp_new_i32(); 2110 2111 tcg_gen_mulu2_i32(lo, hi, rn, rm); 2112 tcg_gen_concat_i32_i64(rd, lo, hi); 2113 } 2114 2115 static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a) 2116 { 2117 static NeonGenTwoOpWidenFn * const opfn[] = { 2118 gen_helper_neon_mull_s8, 2119 gen_helper_neon_mull_s16, 2120 gen_mull_s32, 2121 NULL, 2122 }; 2123 2124 return do_long_3d(s, a, opfn[a->size], NULL); 2125 } 2126 2127 static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a) 2128 { 2129 static NeonGenTwoOpWidenFn * const opfn[] = { 2130 gen_helper_neon_mull_u8, 2131 gen_helper_neon_mull_u16, 2132 gen_mull_u32, 2133 NULL, 2134 }; 2135 2136 return do_long_3d(s, a, opfn[a->size], NULL); 2137 } 2138 2139 #define DO_VMLAL(INSN,MULL,ACC) \ 2140 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ 2141 { \ 2142 static NeonGenTwoOpWidenFn * const opfn[] = { \ 2143 gen_helper_neon_##MULL##8, \ 2144 gen_helper_neon_##MULL##16, \ 2145 gen_##MULL##32, \ 2146 NULL, \ 2147 }; \ 2148 static NeonGenTwo64OpFn * const accfn[] = { \ 2149 gen_helper_neon_##ACC##l_u16, \ 2150 gen_helper_neon_##ACC##l_u32, \ 2151 tcg_gen_##ACC##_i64, \ 2152 NULL, \ 2153 }; \ 2154 return do_long_3d(s, a, opfn[a->size], accfn[a->size]); \ 2155 } 2156 2157 DO_VMLAL(VMLAL_S,mull_s,add) 2158 DO_VMLAL(VMLAL_U,mull_u,add) 2159 DO_VMLAL(VMLSL_S,mull_s,sub) 2160 DO_VMLAL(VMLSL_U,mull_u,sub) 2161 2162 static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) 2163 { 2164 gen_helper_neon_mull_s16(rd, rn, rm); 2165 gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd); 2166 } 2167 2168 static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) 2169 { 2170 gen_mull_s32(rd, rn, rm); 2171 gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd); 2172 } 2173 2174 static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a) 2175 { 2176 static NeonGenTwoOpWidenFn * const opfn[] = { 2177 NULL, 2178 gen_VQDMULL_16, 2179 gen_VQDMULL_32, 2180 NULL, 2181 }; 2182 2183 return do_long_3d(s, a, opfn[a->size], NULL); 2184 } 2185 2186 static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) 2187 { 2188 gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm); 2189 } 2190 2191 static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) 2192 { 2193 gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm); 2194 } 2195 2196 static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a) 2197 { 2198 static NeonGenTwoOpWidenFn * const opfn[] = { 2199 NULL, 2200 gen_VQDMULL_16, 2201 gen_VQDMULL_32, 2202 NULL, 2203 }; 2204 static NeonGenTwo64OpFn * const accfn[] = { 2205 NULL, 2206 gen_VQDMLAL_acc_16, 2207 gen_VQDMLAL_acc_32, 2208 NULL, 2209 }; 2210 2211 return do_long_3d(s, a, opfn[a->size], accfn[a->size]); 2212 } 2213 2214 static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) 2215 { 2216 gen_helper_neon_negl_u32(rm, rm); 2217 gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm); 2218 } 2219 2220 static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) 2221 { 2222 tcg_gen_neg_i64(rm, rm); 2223 gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm); 2224 } 2225 2226 static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a) 2227 { 2228 static NeonGenTwoOpWidenFn * const opfn[] = { 2229 NULL, 2230 gen_VQDMULL_16, 2231 gen_VQDMULL_32, 2232 NULL, 2233 }; 2234 static NeonGenTwo64OpFn * const accfn[] = { 2235 NULL, 2236 gen_VQDMLSL_acc_16, 2237 gen_VQDMLSL_acc_32, 2238 NULL, 2239 }; 2240 2241 return do_long_3d(s, a, opfn[a->size], accfn[a->size]); 2242 } 2243 2244 static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a) 2245 { 2246 gen_helper_gvec_3 *fn_gvec; 2247 2248 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2249 return false; 2250 } 2251 2252 /* UNDEF accesses to D16-D31 if they don't exist. */ 2253 if (!dc_isar_feature(aa32_simd_r32, s) && 2254 ((a->vd | a->vn | a->vm) & 0x10)) { 2255 return false; 2256 } 2257 2258 if (a->vd & 1) { 2259 return false; 2260 } 2261 2262 switch (a->size) { 2263 case 0: 2264 fn_gvec = gen_helper_neon_pmull_h; 2265 break; 2266 case 2: 2267 if (!dc_isar_feature(aa32_pmull, s)) { 2268 return false; 2269 } 2270 fn_gvec = gen_helper_gvec_pmull_q; 2271 break; 2272 default: 2273 return false; 2274 } 2275 2276 if (!vfp_access_check(s)) { 2277 return true; 2278 } 2279 2280 tcg_gen_gvec_3_ool(neon_full_reg_offset(a->vd), 2281 neon_full_reg_offset(a->vn), 2282 neon_full_reg_offset(a->vm), 2283 16, 16, 0, fn_gvec); 2284 return true; 2285 } 2286 2287 static void gen_neon_dup_low16(TCGv_i32 var) 2288 { 2289 TCGv_i32 tmp = tcg_temp_new_i32(); 2290 tcg_gen_ext16u_i32(var, var); 2291 tcg_gen_shli_i32(tmp, var, 16); 2292 tcg_gen_or_i32(var, var, tmp); 2293 } 2294 2295 static void gen_neon_dup_high16(TCGv_i32 var) 2296 { 2297 TCGv_i32 tmp = tcg_temp_new_i32(); 2298 tcg_gen_andi_i32(var, var, 0xffff0000); 2299 tcg_gen_shri_i32(tmp, var, 16); 2300 tcg_gen_or_i32(var, var, tmp); 2301 } 2302 2303 static inline TCGv_i32 neon_get_scalar(int size, int reg) 2304 { 2305 TCGv_i32 tmp = tcg_temp_new_i32(); 2306 if (size == MO_16) { 2307 read_neon_element32(tmp, reg & 7, reg >> 4, MO_32); 2308 if (reg & 8) { 2309 gen_neon_dup_high16(tmp); 2310 } else { 2311 gen_neon_dup_low16(tmp); 2312 } 2313 } else { 2314 read_neon_element32(tmp, reg & 15, reg >> 4, MO_32); 2315 } 2316 return tmp; 2317 } 2318 2319 static bool do_2scalar(DisasContext *s, arg_2scalar *a, 2320 NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn) 2321 { 2322 /* 2323 * Two registers and a scalar: perform an operation between 2324 * the input elements and the scalar, and then possibly 2325 * perform an accumulation operation of that result into the 2326 * destination. 2327 */ 2328 TCGv_i32 scalar, tmp; 2329 int pass; 2330 2331 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2332 return false; 2333 } 2334 2335 /* UNDEF accesses to D16-D31 if they don't exist. */ 2336 if (!dc_isar_feature(aa32_simd_r32, s) && 2337 ((a->vd | a->vn | a->vm) & 0x10)) { 2338 return false; 2339 } 2340 2341 if (!opfn) { 2342 /* Bad size (including size == 3, which is a different insn group) */ 2343 return false; 2344 } 2345 2346 if (a->q && ((a->vd | a->vn) & 1)) { 2347 return false; 2348 } 2349 2350 if (!vfp_access_check(s)) { 2351 return true; 2352 } 2353 2354 scalar = neon_get_scalar(a->size, a->vm); 2355 tmp = tcg_temp_new_i32(); 2356 2357 for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 2358 read_neon_element32(tmp, a->vn, pass, MO_32); 2359 opfn(tmp, tmp, scalar); 2360 if (accfn) { 2361 TCGv_i32 rd = tcg_temp_new_i32(); 2362 read_neon_element32(rd, a->vd, pass, MO_32); 2363 accfn(tmp, rd, tmp); 2364 } 2365 write_neon_element32(tmp, a->vd, pass, MO_32); 2366 } 2367 return true; 2368 } 2369 2370 static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a) 2371 { 2372 static NeonGenTwoOpFn * const opfn[] = { 2373 NULL, 2374 gen_helper_neon_mul_u16, 2375 tcg_gen_mul_i32, 2376 NULL, 2377 }; 2378 2379 return do_2scalar(s, a, opfn[a->size], NULL); 2380 } 2381 2382 static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a) 2383 { 2384 static NeonGenTwoOpFn * const opfn[] = { 2385 NULL, 2386 gen_helper_neon_mul_u16, 2387 tcg_gen_mul_i32, 2388 NULL, 2389 }; 2390 static NeonGenTwoOpFn * const accfn[] = { 2391 NULL, 2392 gen_helper_neon_add_u16, 2393 tcg_gen_add_i32, 2394 NULL, 2395 }; 2396 2397 return do_2scalar(s, a, opfn[a->size], accfn[a->size]); 2398 } 2399 2400 static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a) 2401 { 2402 static NeonGenTwoOpFn * const opfn[] = { 2403 NULL, 2404 gen_helper_neon_mul_u16, 2405 tcg_gen_mul_i32, 2406 NULL, 2407 }; 2408 static NeonGenTwoOpFn * const accfn[] = { 2409 NULL, 2410 gen_helper_neon_sub_u16, 2411 tcg_gen_sub_i32, 2412 NULL, 2413 }; 2414 2415 return do_2scalar(s, a, opfn[a->size], accfn[a->size]); 2416 } 2417 2418 static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a, 2419 gen_helper_gvec_3_ptr *fn) 2420 { 2421 /* Two registers and a scalar, using gvec */ 2422 int vec_size = a->q ? 16 : 8; 2423 int rd_ofs = neon_full_reg_offset(a->vd); 2424 int rn_ofs = neon_full_reg_offset(a->vn); 2425 int rm_ofs; 2426 int idx; 2427 TCGv_ptr fpstatus; 2428 2429 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2430 return false; 2431 } 2432 2433 /* UNDEF accesses to D16-D31 if they don't exist. */ 2434 if (!dc_isar_feature(aa32_simd_r32, s) && 2435 ((a->vd | a->vn | a->vm) & 0x10)) { 2436 return false; 2437 } 2438 2439 if (!fn) { 2440 /* Bad size (including size == 3, which is a different insn group) */ 2441 return false; 2442 } 2443 2444 if (a->q && ((a->vd | a->vn) & 1)) { 2445 return false; 2446 } 2447 2448 if (!vfp_access_check(s)) { 2449 return true; 2450 } 2451 2452 /* a->vm is M:Vm, which encodes both register and index */ 2453 idx = extract32(a->vm, a->size + 2, 2); 2454 a->vm = extract32(a->vm, 0, a->size + 2); 2455 rm_ofs = neon_full_reg_offset(a->vm); 2456 2457 fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD); 2458 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus, 2459 vec_size, vec_size, idx, fn); 2460 return true; 2461 } 2462 2463 #define DO_VMUL_F_2sc(NAME, FUNC) \ 2464 static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a) \ 2465 { \ 2466 static gen_helper_gvec_3_ptr * const opfn[] = { \ 2467 NULL, \ 2468 gen_helper_##FUNC##_h, \ 2469 gen_helper_##FUNC##_s, \ 2470 NULL, \ 2471 }; \ 2472 if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \ 2473 return false; \ 2474 } \ 2475 return do_2scalar_fp_vec(s, a, opfn[a->size]); \ 2476 } 2477 2478 DO_VMUL_F_2sc(VMUL, gvec_fmul_idx) 2479 DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx) 2480 DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx) 2481 2482 WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16) 2483 WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32) 2484 WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16) 2485 WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32) 2486 2487 static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a) 2488 { 2489 static NeonGenTwoOpFn * const opfn[] = { 2490 NULL, 2491 gen_VQDMULH_16, 2492 gen_VQDMULH_32, 2493 NULL, 2494 }; 2495 2496 return do_2scalar(s, a, opfn[a->size], NULL); 2497 } 2498 2499 static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a) 2500 { 2501 static NeonGenTwoOpFn * const opfn[] = { 2502 NULL, 2503 gen_VQRDMULH_16, 2504 gen_VQRDMULH_32, 2505 NULL, 2506 }; 2507 2508 return do_2scalar(s, a, opfn[a->size], NULL); 2509 } 2510 2511 static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a, 2512 NeonGenThreeOpEnvFn *opfn) 2513 { 2514 /* 2515 * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn 2516 * performs a kind of fused op-then-accumulate using a helper 2517 * function that takes all of rd, rn and the scalar at once. 2518 */ 2519 TCGv_i32 scalar, rn, rd; 2520 int pass; 2521 2522 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2523 return false; 2524 } 2525 2526 if (!dc_isar_feature(aa32_rdm, s)) { 2527 return false; 2528 } 2529 2530 /* UNDEF accesses to D16-D31 if they don't exist. */ 2531 if (!dc_isar_feature(aa32_simd_r32, s) && 2532 ((a->vd | a->vn | a->vm) & 0x10)) { 2533 return false; 2534 } 2535 2536 if (!opfn) { 2537 /* Bad size (including size == 3, which is a different insn group) */ 2538 return false; 2539 } 2540 2541 if (a->q && ((a->vd | a->vn) & 1)) { 2542 return false; 2543 } 2544 2545 if (!vfp_access_check(s)) { 2546 return true; 2547 } 2548 2549 scalar = neon_get_scalar(a->size, a->vm); 2550 rn = tcg_temp_new_i32(); 2551 rd = tcg_temp_new_i32(); 2552 2553 for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 2554 read_neon_element32(rn, a->vn, pass, MO_32); 2555 read_neon_element32(rd, a->vd, pass, MO_32); 2556 opfn(rd, cpu_env, rn, scalar, rd); 2557 write_neon_element32(rd, a->vd, pass, MO_32); 2558 } 2559 return true; 2560 } 2561 2562 static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a) 2563 { 2564 static NeonGenThreeOpEnvFn *opfn[] = { 2565 NULL, 2566 gen_helper_neon_qrdmlah_s16, 2567 gen_helper_neon_qrdmlah_s32, 2568 NULL, 2569 }; 2570 return do_vqrdmlah_2sc(s, a, opfn[a->size]); 2571 } 2572 2573 static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a) 2574 { 2575 static NeonGenThreeOpEnvFn *opfn[] = { 2576 NULL, 2577 gen_helper_neon_qrdmlsh_s16, 2578 gen_helper_neon_qrdmlsh_s32, 2579 NULL, 2580 }; 2581 return do_vqrdmlah_2sc(s, a, opfn[a->size]); 2582 } 2583 2584 static bool do_2scalar_long(DisasContext *s, arg_2scalar *a, 2585 NeonGenTwoOpWidenFn *opfn, 2586 NeonGenTwo64OpFn *accfn) 2587 { 2588 /* 2589 * Two registers and a scalar, long operations: perform an 2590 * operation on the input elements and the scalar which produces 2591 * a double-width result, and then possibly perform an accumulation 2592 * operation of that result into the destination. 2593 */ 2594 TCGv_i32 scalar, rn; 2595 TCGv_i64 rn0_64, rn1_64; 2596 2597 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2598 return false; 2599 } 2600 2601 /* UNDEF accesses to D16-D31 if they don't exist. */ 2602 if (!dc_isar_feature(aa32_simd_r32, s) && 2603 ((a->vd | a->vn | a->vm) & 0x10)) { 2604 return false; 2605 } 2606 2607 if (!opfn) { 2608 /* Bad size (including size == 3, which is a different insn group) */ 2609 return false; 2610 } 2611 2612 if (a->vd & 1) { 2613 return false; 2614 } 2615 2616 if (!vfp_access_check(s)) { 2617 return true; 2618 } 2619 2620 scalar = neon_get_scalar(a->size, a->vm); 2621 2622 /* Load all inputs before writing any outputs, in case of overlap */ 2623 rn = tcg_temp_new_i32(); 2624 read_neon_element32(rn, a->vn, 0, MO_32); 2625 rn0_64 = tcg_temp_new_i64(); 2626 opfn(rn0_64, rn, scalar); 2627 2628 read_neon_element32(rn, a->vn, 1, MO_32); 2629 rn1_64 = tcg_temp_new_i64(); 2630 opfn(rn1_64, rn, scalar); 2631 2632 if (accfn) { 2633 TCGv_i64 t64 = tcg_temp_new_i64(); 2634 read_neon_element64(t64, a->vd, 0, MO_64); 2635 accfn(rn0_64, t64, rn0_64); 2636 read_neon_element64(t64, a->vd, 1, MO_64); 2637 accfn(rn1_64, t64, rn1_64); 2638 } 2639 2640 write_neon_element64(rn0_64, a->vd, 0, MO_64); 2641 write_neon_element64(rn1_64, a->vd, 1, MO_64); 2642 return true; 2643 } 2644 2645 static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a) 2646 { 2647 static NeonGenTwoOpWidenFn * const opfn[] = { 2648 NULL, 2649 gen_helper_neon_mull_s16, 2650 gen_mull_s32, 2651 NULL, 2652 }; 2653 2654 return do_2scalar_long(s, a, opfn[a->size], NULL); 2655 } 2656 2657 static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a) 2658 { 2659 static NeonGenTwoOpWidenFn * const opfn[] = { 2660 NULL, 2661 gen_helper_neon_mull_u16, 2662 gen_mull_u32, 2663 NULL, 2664 }; 2665 2666 return do_2scalar_long(s, a, opfn[a->size], NULL); 2667 } 2668 2669 #define DO_VMLAL_2SC(INSN, MULL, ACC) \ 2670 static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a) \ 2671 { \ 2672 static NeonGenTwoOpWidenFn * const opfn[] = { \ 2673 NULL, \ 2674 gen_helper_neon_##MULL##16, \ 2675 gen_##MULL##32, \ 2676 NULL, \ 2677 }; \ 2678 static NeonGenTwo64OpFn * const accfn[] = { \ 2679 NULL, \ 2680 gen_helper_neon_##ACC##l_u32, \ 2681 tcg_gen_##ACC##_i64, \ 2682 NULL, \ 2683 }; \ 2684 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); \ 2685 } 2686 2687 DO_VMLAL_2SC(VMLAL_S, mull_s, add) 2688 DO_VMLAL_2SC(VMLAL_U, mull_u, add) 2689 DO_VMLAL_2SC(VMLSL_S, mull_s, sub) 2690 DO_VMLAL_2SC(VMLSL_U, mull_u, sub) 2691 2692 static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a) 2693 { 2694 static NeonGenTwoOpWidenFn * const opfn[] = { 2695 NULL, 2696 gen_VQDMULL_16, 2697 gen_VQDMULL_32, 2698 NULL, 2699 }; 2700 2701 return do_2scalar_long(s, a, opfn[a->size], NULL); 2702 } 2703 2704 static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a) 2705 { 2706 static NeonGenTwoOpWidenFn * const opfn[] = { 2707 NULL, 2708 gen_VQDMULL_16, 2709 gen_VQDMULL_32, 2710 NULL, 2711 }; 2712 static NeonGenTwo64OpFn * const accfn[] = { 2713 NULL, 2714 gen_VQDMLAL_acc_16, 2715 gen_VQDMLAL_acc_32, 2716 NULL, 2717 }; 2718 2719 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); 2720 } 2721 2722 static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a) 2723 { 2724 static NeonGenTwoOpWidenFn * const opfn[] = { 2725 NULL, 2726 gen_VQDMULL_16, 2727 gen_VQDMULL_32, 2728 NULL, 2729 }; 2730 static NeonGenTwo64OpFn * const accfn[] = { 2731 NULL, 2732 gen_VQDMLSL_acc_16, 2733 gen_VQDMLSL_acc_32, 2734 NULL, 2735 }; 2736 2737 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); 2738 } 2739 2740 static bool trans_VEXT(DisasContext *s, arg_VEXT *a) 2741 { 2742 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2743 return false; 2744 } 2745 2746 /* UNDEF accesses to D16-D31 if they don't exist. */ 2747 if (!dc_isar_feature(aa32_simd_r32, s) && 2748 ((a->vd | a->vn | a->vm) & 0x10)) { 2749 return false; 2750 } 2751 2752 if ((a->vn | a->vm | a->vd) & a->q) { 2753 return false; 2754 } 2755 2756 if (a->imm > 7 && !a->q) { 2757 return false; 2758 } 2759 2760 if (!vfp_access_check(s)) { 2761 return true; 2762 } 2763 2764 if (!a->q) { 2765 /* Extract 64 bits from <Vm:Vn> */ 2766 TCGv_i64 left, right, dest; 2767 2768 left = tcg_temp_new_i64(); 2769 right = tcg_temp_new_i64(); 2770 dest = tcg_temp_new_i64(); 2771 2772 read_neon_element64(right, a->vn, 0, MO_64); 2773 read_neon_element64(left, a->vm, 0, MO_64); 2774 tcg_gen_extract2_i64(dest, right, left, a->imm * 8); 2775 write_neon_element64(dest, a->vd, 0, MO_64); 2776 } else { 2777 /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */ 2778 TCGv_i64 left, middle, right, destleft, destright; 2779 2780 left = tcg_temp_new_i64(); 2781 middle = tcg_temp_new_i64(); 2782 right = tcg_temp_new_i64(); 2783 destleft = tcg_temp_new_i64(); 2784 destright = tcg_temp_new_i64(); 2785 2786 if (a->imm < 8) { 2787 read_neon_element64(right, a->vn, 0, MO_64); 2788 read_neon_element64(middle, a->vn, 1, MO_64); 2789 tcg_gen_extract2_i64(destright, right, middle, a->imm * 8); 2790 read_neon_element64(left, a->vm, 0, MO_64); 2791 tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8); 2792 } else { 2793 read_neon_element64(right, a->vn, 1, MO_64); 2794 read_neon_element64(middle, a->vm, 0, MO_64); 2795 tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8); 2796 read_neon_element64(left, a->vm, 1, MO_64); 2797 tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8); 2798 } 2799 2800 write_neon_element64(destright, a->vd, 0, MO_64); 2801 write_neon_element64(destleft, a->vd, 1, MO_64); 2802 } 2803 return true; 2804 } 2805 2806 static bool trans_VTBL(DisasContext *s, arg_VTBL *a) 2807 { 2808 TCGv_i64 val, def; 2809 TCGv_i32 desc; 2810 2811 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2812 return false; 2813 } 2814 2815 /* UNDEF accesses to D16-D31 if they don't exist. */ 2816 if (!dc_isar_feature(aa32_simd_r32, s) && 2817 ((a->vd | a->vn | a->vm) & 0x10)) { 2818 return false; 2819 } 2820 2821 if ((a->vn + a->len + 1) > 32) { 2822 /* 2823 * This is UNPREDICTABLE; we choose to UNDEF to avoid the 2824 * helper function running off the end of the register file. 2825 */ 2826 return false; 2827 } 2828 2829 if (!vfp_access_check(s)) { 2830 return true; 2831 } 2832 2833 desc = tcg_constant_i32((a->vn << 2) | a->len); 2834 def = tcg_temp_new_i64(); 2835 if (a->op) { 2836 read_neon_element64(def, a->vd, 0, MO_64); 2837 } else { 2838 tcg_gen_movi_i64(def, 0); 2839 } 2840 val = tcg_temp_new_i64(); 2841 read_neon_element64(val, a->vm, 0, MO_64); 2842 2843 gen_helper_neon_tbl(val, cpu_env, desc, val, def); 2844 write_neon_element64(val, a->vd, 0, MO_64); 2845 return true; 2846 } 2847 2848 static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a) 2849 { 2850 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2851 return false; 2852 } 2853 2854 /* UNDEF accesses to D16-D31 if they don't exist. */ 2855 if (!dc_isar_feature(aa32_simd_r32, s) && 2856 ((a->vd | a->vm) & 0x10)) { 2857 return false; 2858 } 2859 2860 if (a->vd & a->q) { 2861 return false; 2862 } 2863 2864 if (!vfp_access_check(s)) { 2865 return true; 2866 } 2867 2868 tcg_gen_gvec_dup_mem(a->size, neon_full_reg_offset(a->vd), 2869 neon_element_offset(a->vm, a->index, a->size), 2870 a->q ? 16 : 8, a->q ? 16 : 8); 2871 return true; 2872 } 2873 2874 static bool trans_VREV64(DisasContext *s, arg_VREV64 *a) 2875 { 2876 int pass, half; 2877 TCGv_i32 tmp[2]; 2878 2879 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2880 return false; 2881 } 2882 2883 /* UNDEF accesses to D16-D31 if they don't exist. */ 2884 if (!dc_isar_feature(aa32_simd_r32, s) && 2885 ((a->vd | a->vm) & 0x10)) { 2886 return false; 2887 } 2888 2889 if ((a->vd | a->vm) & a->q) { 2890 return false; 2891 } 2892 2893 if (a->size == 3) { 2894 return false; 2895 } 2896 2897 if (!vfp_access_check(s)) { 2898 return true; 2899 } 2900 2901 tmp[0] = tcg_temp_new_i32(); 2902 tmp[1] = tcg_temp_new_i32(); 2903 2904 for (pass = 0; pass < (a->q ? 2 : 1); pass++) { 2905 for (half = 0; half < 2; half++) { 2906 read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32); 2907 switch (a->size) { 2908 case 0: 2909 tcg_gen_bswap32_i32(tmp[half], tmp[half]); 2910 break; 2911 case 1: 2912 gen_swap_half(tmp[half], tmp[half]); 2913 break; 2914 case 2: 2915 break; 2916 default: 2917 g_assert_not_reached(); 2918 } 2919 } 2920 write_neon_element32(tmp[1], a->vd, pass * 2, MO_32); 2921 write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32); 2922 } 2923 return true; 2924 } 2925 2926 static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a, 2927 NeonGenWidenFn *widenfn, 2928 NeonGenTwo64OpFn *opfn, 2929 NeonGenTwo64OpFn *accfn) 2930 { 2931 /* 2932 * Pairwise long operations: widen both halves of the pair, 2933 * combine the pairs with the opfn, and then possibly accumulate 2934 * into the destination with the accfn. 2935 */ 2936 int pass; 2937 2938 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2939 return false; 2940 } 2941 2942 /* UNDEF accesses to D16-D31 if they don't exist. */ 2943 if (!dc_isar_feature(aa32_simd_r32, s) && 2944 ((a->vd | a->vm) & 0x10)) { 2945 return false; 2946 } 2947 2948 if ((a->vd | a->vm) & a->q) { 2949 return false; 2950 } 2951 2952 if (!widenfn) { 2953 return false; 2954 } 2955 2956 if (!vfp_access_check(s)) { 2957 return true; 2958 } 2959 2960 for (pass = 0; pass < a->q + 1; pass++) { 2961 TCGv_i32 tmp; 2962 TCGv_i64 rm0_64, rm1_64, rd_64; 2963 2964 rm0_64 = tcg_temp_new_i64(); 2965 rm1_64 = tcg_temp_new_i64(); 2966 rd_64 = tcg_temp_new_i64(); 2967 2968 tmp = tcg_temp_new_i32(); 2969 read_neon_element32(tmp, a->vm, pass * 2, MO_32); 2970 widenfn(rm0_64, tmp); 2971 read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32); 2972 widenfn(rm1_64, tmp); 2973 2974 opfn(rd_64, rm0_64, rm1_64); 2975 2976 if (accfn) { 2977 TCGv_i64 tmp64 = tcg_temp_new_i64(); 2978 read_neon_element64(tmp64, a->vd, pass, MO_64); 2979 accfn(rd_64, tmp64, rd_64); 2980 } 2981 write_neon_element64(rd_64, a->vd, pass, MO_64); 2982 } 2983 return true; 2984 } 2985 2986 static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a) 2987 { 2988 static NeonGenWidenFn * const widenfn[] = { 2989 gen_helper_neon_widen_s8, 2990 gen_helper_neon_widen_s16, 2991 tcg_gen_ext_i32_i64, 2992 NULL, 2993 }; 2994 static NeonGenTwo64OpFn * const opfn[] = { 2995 gen_helper_neon_paddl_u16, 2996 gen_helper_neon_paddl_u32, 2997 tcg_gen_add_i64, 2998 NULL, 2999 }; 3000 3001 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL); 3002 } 3003 3004 static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a) 3005 { 3006 static NeonGenWidenFn * const widenfn[] = { 3007 gen_helper_neon_widen_u8, 3008 gen_helper_neon_widen_u16, 3009 tcg_gen_extu_i32_i64, 3010 NULL, 3011 }; 3012 static NeonGenTwo64OpFn * const opfn[] = { 3013 gen_helper_neon_paddl_u16, 3014 gen_helper_neon_paddl_u32, 3015 tcg_gen_add_i64, 3016 NULL, 3017 }; 3018 3019 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL); 3020 } 3021 3022 static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a) 3023 { 3024 static NeonGenWidenFn * const widenfn[] = { 3025 gen_helper_neon_widen_s8, 3026 gen_helper_neon_widen_s16, 3027 tcg_gen_ext_i32_i64, 3028 NULL, 3029 }; 3030 static NeonGenTwo64OpFn * const opfn[] = { 3031 gen_helper_neon_paddl_u16, 3032 gen_helper_neon_paddl_u32, 3033 tcg_gen_add_i64, 3034 NULL, 3035 }; 3036 static NeonGenTwo64OpFn * const accfn[] = { 3037 gen_helper_neon_addl_u16, 3038 gen_helper_neon_addl_u32, 3039 tcg_gen_add_i64, 3040 NULL, 3041 }; 3042 3043 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], 3044 accfn[a->size]); 3045 } 3046 3047 static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a) 3048 { 3049 static NeonGenWidenFn * const widenfn[] = { 3050 gen_helper_neon_widen_u8, 3051 gen_helper_neon_widen_u16, 3052 tcg_gen_extu_i32_i64, 3053 NULL, 3054 }; 3055 static NeonGenTwo64OpFn * const opfn[] = { 3056 gen_helper_neon_paddl_u16, 3057 gen_helper_neon_paddl_u32, 3058 tcg_gen_add_i64, 3059 NULL, 3060 }; 3061 static NeonGenTwo64OpFn * const accfn[] = { 3062 gen_helper_neon_addl_u16, 3063 gen_helper_neon_addl_u32, 3064 tcg_gen_add_i64, 3065 NULL, 3066 }; 3067 3068 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], 3069 accfn[a->size]); 3070 } 3071 3072 typedef void ZipFn(TCGv_ptr, TCGv_ptr); 3073 3074 static bool do_zip_uzp(DisasContext *s, arg_2misc *a, 3075 ZipFn *fn) 3076 { 3077 TCGv_ptr pd, pm; 3078 3079 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3080 return false; 3081 } 3082 3083 /* UNDEF accesses to D16-D31 if they don't exist. */ 3084 if (!dc_isar_feature(aa32_simd_r32, s) && 3085 ((a->vd | a->vm) & 0x10)) { 3086 return false; 3087 } 3088 3089 if ((a->vd | a->vm) & a->q) { 3090 return false; 3091 } 3092 3093 if (!fn) { 3094 /* Bad size or size/q combination */ 3095 return false; 3096 } 3097 3098 if (!vfp_access_check(s)) { 3099 return true; 3100 } 3101 3102 pd = vfp_reg_ptr(true, a->vd); 3103 pm = vfp_reg_ptr(true, a->vm); 3104 fn(pd, pm); 3105 return true; 3106 } 3107 3108 static bool trans_VUZP(DisasContext *s, arg_2misc *a) 3109 { 3110 static ZipFn * const fn[2][4] = { 3111 { 3112 gen_helper_neon_unzip8, 3113 gen_helper_neon_unzip16, 3114 NULL, 3115 NULL, 3116 }, { 3117 gen_helper_neon_qunzip8, 3118 gen_helper_neon_qunzip16, 3119 gen_helper_neon_qunzip32, 3120 NULL, 3121 } 3122 }; 3123 return do_zip_uzp(s, a, fn[a->q][a->size]); 3124 } 3125 3126 static bool trans_VZIP(DisasContext *s, arg_2misc *a) 3127 { 3128 static ZipFn * const fn[2][4] = { 3129 { 3130 gen_helper_neon_zip8, 3131 gen_helper_neon_zip16, 3132 NULL, 3133 NULL, 3134 }, { 3135 gen_helper_neon_qzip8, 3136 gen_helper_neon_qzip16, 3137 gen_helper_neon_qzip32, 3138 NULL, 3139 } 3140 }; 3141 return do_zip_uzp(s, a, fn[a->q][a->size]); 3142 } 3143 3144 static bool do_vmovn(DisasContext *s, arg_2misc *a, 3145 NeonGenNarrowEnvFn *narrowfn) 3146 { 3147 TCGv_i64 rm; 3148 TCGv_i32 rd0, rd1; 3149 3150 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3151 return false; 3152 } 3153 3154 /* UNDEF accesses to D16-D31 if they don't exist. */ 3155 if (!dc_isar_feature(aa32_simd_r32, s) && 3156 ((a->vd | a->vm) & 0x10)) { 3157 return false; 3158 } 3159 3160 if (a->vm & 1) { 3161 return false; 3162 } 3163 3164 if (!narrowfn) { 3165 return false; 3166 } 3167 3168 if (!vfp_access_check(s)) { 3169 return true; 3170 } 3171 3172 rm = tcg_temp_new_i64(); 3173 rd0 = tcg_temp_new_i32(); 3174 rd1 = tcg_temp_new_i32(); 3175 3176 read_neon_element64(rm, a->vm, 0, MO_64); 3177 narrowfn(rd0, cpu_env, rm); 3178 read_neon_element64(rm, a->vm, 1, MO_64); 3179 narrowfn(rd1, cpu_env, rm); 3180 write_neon_element32(rd0, a->vd, 0, MO_32); 3181 write_neon_element32(rd1, a->vd, 1, MO_32); 3182 return true; 3183 } 3184 3185 #define DO_VMOVN(INSN, FUNC) \ 3186 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3187 { \ 3188 static NeonGenNarrowEnvFn * const narrowfn[] = { \ 3189 FUNC##8, \ 3190 FUNC##16, \ 3191 FUNC##32, \ 3192 NULL, \ 3193 }; \ 3194 return do_vmovn(s, a, narrowfn[a->size]); \ 3195 } 3196 3197 DO_VMOVN(VMOVN, gen_neon_narrow_u) 3198 DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat) 3199 DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s) 3200 DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u) 3201 3202 static bool trans_VSHLL(DisasContext *s, arg_2misc *a) 3203 { 3204 TCGv_i32 rm0, rm1; 3205 TCGv_i64 rd; 3206 static NeonGenWidenFn * const widenfns[] = { 3207 gen_helper_neon_widen_u8, 3208 gen_helper_neon_widen_u16, 3209 tcg_gen_extu_i32_i64, 3210 NULL, 3211 }; 3212 NeonGenWidenFn *widenfn = widenfns[a->size]; 3213 3214 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3215 return false; 3216 } 3217 3218 /* UNDEF accesses to D16-D31 if they don't exist. */ 3219 if (!dc_isar_feature(aa32_simd_r32, s) && 3220 ((a->vd | a->vm) & 0x10)) { 3221 return false; 3222 } 3223 3224 if (a->vd & 1) { 3225 return false; 3226 } 3227 3228 if (!widenfn) { 3229 return false; 3230 } 3231 3232 if (!vfp_access_check(s)) { 3233 return true; 3234 } 3235 3236 rd = tcg_temp_new_i64(); 3237 rm0 = tcg_temp_new_i32(); 3238 rm1 = tcg_temp_new_i32(); 3239 3240 read_neon_element32(rm0, a->vm, 0, MO_32); 3241 read_neon_element32(rm1, a->vm, 1, MO_32); 3242 3243 widenfn(rd, rm0); 3244 tcg_gen_shli_i64(rd, rd, 8 << a->size); 3245 write_neon_element64(rd, a->vd, 0, MO_64); 3246 widenfn(rd, rm1); 3247 tcg_gen_shli_i64(rd, rd, 8 << a->size); 3248 write_neon_element64(rd, a->vd, 1, MO_64); 3249 return true; 3250 } 3251 3252 static bool trans_VCVT_B16_F32(DisasContext *s, arg_2misc *a) 3253 { 3254 TCGv_ptr fpst; 3255 TCGv_i64 tmp; 3256 TCGv_i32 dst0, dst1; 3257 3258 if (!dc_isar_feature(aa32_bf16, s)) { 3259 return false; 3260 } 3261 3262 /* UNDEF accesses to D16-D31 if they don't exist. */ 3263 if (!dc_isar_feature(aa32_simd_r32, s) && 3264 ((a->vd | a->vm) & 0x10)) { 3265 return false; 3266 } 3267 3268 if ((a->vm & 1) || (a->size != 1)) { 3269 return false; 3270 } 3271 3272 if (!vfp_access_check(s)) { 3273 return true; 3274 } 3275 3276 fpst = fpstatus_ptr(FPST_STD); 3277 tmp = tcg_temp_new_i64(); 3278 dst0 = tcg_temp_new_i32(); 3279 dst1 = tcg_temp_new_i32(); 3280 3281 read_neon_element64(tmp, a->vm, 0, MO_64); 3282 gen_helper_bfcvt_pair(dst0, tmp, fpst); 3283 3284 read_neon_element64(tmp, a->vm, 1, MO_64); 3285 gen_helper_bfcvt_pair(dst1, tmp, fpst); 3286 3287 write_neon_element32(dst0, a->vd, 0, MO_32); 3288 write_neon_element32(dst1, a->vd, 1, MO_32); 3289 return true; 3290 } 3291 3292 static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a) 3293 { 3294 TCGv_ptr fpst; 3295 TCGv_i32 ahp, tmp, tmp2, tmp3; 3296 3297 if (!arm_dc_feature(s, ARM_FEATURE_NEON) || 3298 !dc_isar_feature(aa32_fp16_spconv, s)) { 3299 return false; 3300 } 3301 3302 /* UNDEF accesses to D16-D31 if they don't exist. */ 3303 if (!dc_isar_feature(aa32_simd_r32, s) && 3304 ((a->vd | a->vm) & 0x10)) { 3305 return false; 3306 } 3307 3308 if ((a->vm & 1) || (a->size != 1)) { 3309 return false; 3310 } 3311 3312 if (!vfp_access_check(s)) { 3313 return true; 3314 } 3315 3316 fpst = fpstatus_ptr(FPST_STD); 3317 ahp = get_ahp_flag(); 3318 tmp = tcg_temp_new_i32(); 3319 read_neon_element32(tmp, a->vm, 0, MO_32); 3320 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); 3321 tmp2 = tcg_temp_new_i32(); 3322 read_neon_element32(tmp2, a->vm, 1, MO_32); 3323 gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp); 3324 tcg_gen_shli_i32(tmp2, tmp2, 16); 3325 tcg_gen_or_i32(tmp2, tmp2, tmp); 3326 read_neon_element32(tmp, a->vm, 2, MO_32); 3327 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); 3328 tmp3 = tcg_temp_new_i32(); 3329 read_neon_element32(tmp3, a->vm, 3, MO_32); 3330 write_neon_element32(tmp2, a->vd, 0, MO_32); 3331 gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp); 3332 tcg_gen_shli_i32(tmp3, tmp3, 16); 3333 tcg_gen_or_i32(tmp3, tmp3, tmp); 3334 write_neon_element32(tmp3, a->vd, 1, MO_32); 3335 return true; 3336 } 3337 3338 static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a) 3339 { 3340 TCGv_ptr fpst; 3341 TCGv_i32 ahp, tmp, tmp2, tmp3; 3342 3343 if (!arm_dc_feature(s, ARM_FEATURE_NEON) || 3344 !dc_isar_feature(aa32_fp16_spconv, s)) { 3345 return false; 3346 } 3347 3348 /* UNDEF accesses to D16-D31 if they don't exist. */ 3349 if (!dc_isar_feature(aa32_simd_r32, s) && 3350 ((a->vd | a->vm) & 0x10)) { 3351 return false; 3352 } 3353 3354 if ((a->vd & 1) || (a->size != 1)) { 3355 return false; 3356 } 3357 3358 if (!vfp_access_check(s)) { 3359 return true; 3360 } 3361 3362 fpst = fpstatus_ptr(FPST_STD); 3363 ahp = get_ahp_flag(); 3364 tmp3 = tcg_temp_new_i32(); 3365 tmp2 = tcg_temp_new_i32(); 3366 tmp = tcg_temp_new_i32(); 3367 read_neon_element32(tmp, a->vm, 0, MO_32); 3368 read_neon_element32(tmp2, a->vm, 1, MO_32); 3369 tcg_gen_ext16u_i32(tmp3, tmp); 3370 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp); 3371 write_neon_element32(tmp3, a->vd, 0, MO_32); 3372 tcg_gen_shri_i32(tmp, tmp, 16); 3373 gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp); 3374 write_neon_element32(tmp, a->vd, 1, MO_32); 3375 tcg_gen_ext16u_i32(tmp3, tmp2); 3376 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp); 3377 write_neon_element32(tmp3, a->vd, 2, MO_32); 3378 tcg_gen_shri_i32(tmp2, tmp2, 16); 3379 gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp); 3380 write_neon_element32(tmp2, a->vd, 3, MO_32); 3381 return true; 3382 } 3383 3384 static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn) 3385 { 3386 int vec_size = a->q ? 16 : 8; 3387 int rd_ofs = neon_full_reg_offset(a->vd); 3388 int rm_ofs = neon_full_reg_offset(a->vm); 3389 3390 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3391 return false; 3392 } 3393 3394 /* UNDEF accesses to D16-D31 if they don't exist. */ 3395 if (!dc_isar_feature(aa32_simd_r32, s) && 3396 ((a->vd | a->vm) & 0x10)) { 3397 return false; 3398 } 3399 3400 if (a->size == 3) { 3401 return false; 3402 } 3403 3404 if ((a->vd | a->vm) & a->q) { 3405 return false; 3406 } 3407 3408 if (!vfp_access_check(s)) { 3409 return true; 3410 } 3411 3412 fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size); 3413 3414 return true; 3415 } 3416 3417 #define DO_2MISC_VEC(INSN, FN) \ 3418 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3419 { \ 3420 return do_2misc_vec(s, a, FN); \ 3421 } 3422 3423 DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg) 3424 DO_2MISC_VEC(VABS, tcg_gen_gvec_abs) 3425 DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0) 3426 DO_2MISC_VEC(VCGT0, gen_gvec_cgt0) 3427 DO_2MISC_VEC(VCLE0, gen_gvec_cle0) 3428 DO_2MISC_VEC(VCGE0, gen_gvec_cge0) 3429 DO_2MISC_VEC(VCLT0, gen_gvec_clt0) 3430 3431 static bool trans_VMVN(DisasContext *s, arg_2misc *a) 3432 { 3433 if (a->size != 0) { 3434 return false; 3435 } 3436 return do_2misc_vec(s, a, tcg_gen_gvec_not); 3437 } 3438 3439 #define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA) \ 3440 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \ 3441 uint32_t rm_ofs, uint32_t oprsz, \ 3442 uint32_t maxsz) \ 3443 { \ 3444 tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz, \ 3445 DATA, FUNC); \ 3446 } 3447 3448 #define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA) \ 3449 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \ 3450 uint32_t rm_ofs, uint32_t oprsz, \ 3451 uint32_t maxsz) \ 3452 { \ 3453 tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC); \ 3454 } 3455 3456 WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0) 3457 WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aese, 1) 3458 WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0) 3459 WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesmc, 1) 3460 WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0) 3461 WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0) 3462 WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0) 3463 3464 #define DO_2M_CRYPTO(INSN, FEATURE, SIZE) \ 3465 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3466 { \ 3467 if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) { \ 3468 return false; \ 3469 } \ 3470 return do_2misc_vec(s, a, gen_##INSN); \ 3471 } 3472 3473 DO_2M_CRYPTO(AESE, aa32_aes, 0) 3474 DO_2M_CRYPTO(AESD, aa32_aes, 0) 3475 DO_2M_CRYPTO(AESMC, aa32_aes, 0) 3476 DO_2M_CRYPTO(AESIMC, aa32_aes, 0) 3477 DO_2M_CRYPTO(SHA1H, aa32_sha1, 2) 3478 DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2) 3479 DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2) 3480 3481 static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn) 3482 { 3483 TCGv_i32 tmp; 3484 int pass; 3485 3486 /* Handle a 2-reg-misc operation by iterating 32 bits at a time */ 3487 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3488 return false; 3489 } 3490 3491 /* UNDEF accesses to D16-D31 if they don't exist. */ 3492 if (!dc_isar_feature(aa32_simd_r32, s) && 3493 ((a->vd | a->vm) & 0x10)) { 3494 return false; 3495 } 3496 3497 if (!fn) { 3498 return false; 3499 } 3500 3501 if ((a->vd | a->vm) & a->q) { 3502 return false; 3503 } 3504 3505 if (!vfp_access_check(s)) { 3506 return true; 3507 } 3508 3509 tmp = tcg_temp_new_i32(); 3510 for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 3511 read_neon_element32(tmp, a->vm, pass, MO_32); 3512 fn(tmp, tmp); 3513 write_neon_element32(tmp, a->vd, pass, MO_32); 3514 } 3515 return true; 3516 } 3517 3518 static bool trans_VREV32(DisasContext *s, arg_2misc *a) 3519 { 3520 static NeonGenOneOpFn * const fn[] = { 3521 tcg_gen_bswap32_i32, 3522 gen_swap_half, 3523 NULL, 3524 NULL, 3525 }; 3526 return do_2misc(s, a, fn[a->size]); 3527 } 3528 3529 static bool trans_VREV16(DisasContext *s, arg_2misc *a) 3530 { 3531 if (a->size != 0) { 3532 return false; 3533 } 3534 return do_2misc(s, a, gen_rev16); 3535 } 3536 3537 static bool trans_VCLS(DisasContext *s, arg_2misc *a) 3538 { 3539 static NeonGenOneOpFn * const fn[] = { 3540 gen_helper_neon_cls_s8, 3541 gen_helper_neon_cls_s16, 3542 gen_helper_neon_cls_s32, 3543 NULL, 3544 }; 3545 return do_2misc(s, a, fn[a->size]); 3546 } 3547 3548 static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm) 3549 { 3550 tcg_gen_clzi_i32(rd, rm, 32); 3551 } 3552 3553 static bool trans_VCLZ(DisasContext *s, arg_2misc *a) 3554 { 3555 static NeonGenOneOpFn * const fn[] = { 3556 gen_helper_neon_clz_u8, 3557 gen_helper_neon_clz_u16, 3558 do_VCLZ_32, 3559 NULL, 3560 }; 3561 return do_2misc(s, a, fn[a->size]); 3562 } 3563 3564 static bool trans_VCNT(DisasContext *s, arg_2misc *a) 3565 { 3566 if (a->size != 0) { 3567 return false; 3568 } 3569 return do_2misc(s, a, gen_helper_neon_cnt_u8); 3570 } 3571 3572 static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 3573 uint32_t oprsz, uint32_t maxsz) 3574 { 3575 tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs, 3576 vece == MO_16 ? 0x7fff : 0x7fffffff, 3577 oprsz, maxsz); 3578 } 3579 3580 static bool trans_VABS_F(DisasContext *s, arg_2misc *a) 3581 { 3582 if (a->size == MO_16) { 3583 if (!dc_isar_feature(aa32_fp16_arith, s)) { 3584 return false; 3585 } 3586 } else if (a->size != MO_32) { 3587 return false; 3588 } 3589 return do_2misc_vec(s, a, gen_VABS_F); 3590 } 3591 3592 static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 3593 uint32_t oprsz, uint32_t maxsz) 3594 { 3595 tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs, 3596 vece == MO_16 ? 0x8000 : 0x80000000, 3597 oprsz, maxsz); 3598 } 3599 3600 static bool trans_VNEG_F(DisasContext *s, arg_2misc *a) 3601 { 3602 if (a->size == MO_16) { 3603 if (!dc_isar_feature(aa32_fp16_arith, s)) { 3604 return false; 3605 } 3606 } else if (a->size != MO_32) { 3607 return false; 3608 } 3609 return do_2misc_vec(s, a, gen_VNEG_F); 3610 } 3611 3612 static bool trans_VRECPE(DisasContext *s, arg_2misc *a) 3613 { 3614 if (a->size != 2) { 3615 return false; 3616 } 3617 return do_2misc(s, a, gen_helper_recpe_u32); 3618 } 3619 3620 static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a) 3621 { 3622 if (a->size != 2) { 3623 return false; 3624 } 3625 return do_2misc(s, a, gen_helper_rsqrte_u32); 3626 } 3627 3628 #define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \ 3629 static void WRAPNAME(TCGv_i32 d, TCGv_i32 m) \ 3630 { \ 3631 FUNC(d, cpu_env, m); \ 3632 } 3633 3634 WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8) 3635 WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16) 3636 WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32) 3637 WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8) 3638 WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16) 3639 WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32) 3640 3641 static bool trans_VQABS(DisasContext *s, arg_2misc *a) 3642 { 3643 static NeonGenOneOpFn * const fn[] = { 3644 gen_VQABS_s8, 3645 gen_VQABS_s16, 3646 gen_VQABS_s32, 3647 NULL, 3648 }; 3649 return do_2misc(s, a, fn[a->size]); 3650 } 3651 3652 static bool trans_VQNEG(DisasContext *s, arg_2misc *a) 3653 { 3654 static NeonGenOneOpFn * const fn[] = { 3655 gen_VQNEG_s8, 3656 gen_VQNEG_s16, 3657 gen_VQNEG_s32, 3658 NULL, 3659 }; 3660 return do_2misc(s, a, fn[a->size]); 3661 } 3662 3663 #define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC) \ 3664 static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \ 3665 uint32_t rm_ofs, \ 3666 uint32_t oprsz, uint32_t maxsz) \ 3667 { \ 3668 static gen_helper_gvec_2_ptr * const fns[4] = { \ 3669 NULL, HFUNC, SFUNC, NULL, \ 3670 }; \ 3671 TCGv_ptr fpst; \ 3672 fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD); \ 3673 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0, \ 3674 fns[vece]); \ 3675 } \ 3676 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3677 { \ 3678 if (a->size == MO_16) { \ 3679 if (!dc_isar_feature(aa32_fp16_arith, s)) { \ 3680 return false; \ 3681 } \ 3682 } else if (a->size != MO_32) { \ 3683 return false; \ 3684 } \ 3685 return do_2misc_vec(s, a, gen_##INSN); \ 3686 } 3687 3688 DO_2MISC_FP_VEC(VRECPE_F, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s) 3689 DO_2MISC_FP_VEC(VRSQRTE_F, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s) 3690 DO_2MISC_FP_VEC(VCGT0_F, gen_helper_gvec_fcgt0_h, gen_helper_gvec_fcgt0_s) 3691 DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h, gen_helper_gvec_fcge0_s) 3692 DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s) 3693 DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s) 3694 DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s) 3695 DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos) 3696 DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos) 3697 DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs) 3698 DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs) 3699 3700 DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h, gen_helper_gvec_vrintx_s) 3701 3702 static bool trans_VRINTX(DisasContext *s, arg_2misc *a) 3703 { 3704 if (!arm_dc_feature(s, ARM_FEATURE_V8)) { 3705 return false; 3706 } 3707 return trans_VRINTX_impl(s, a); 3708 } 3709 3710 #define DO_VEC_RMODE(INSN, RMODE, OP) \ 3711 static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \ 3712 uint32_t rm_ofs, \ 3713 uint32_t oprsz, uint32_t maxsz) \ 3714 { \ 3715 static gen_helper_gvec_2_ptr * const fns[4] = { \ 3716 NULL, \ 3717 gen_helper_gvec_##OP##h, \ 3718 gen_helper_gvec_##OP##s, \ 3719 NULL, \ 3720 }; \ 3721 TCGv_ptr fpst; \ 3722 fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD); \ 3723 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, \ 3724 arm_rmode_to_sf(RMODE), fns[vece]); \ 3725 } \ 3726 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3727 { \ 3728 if (!arm_dc_feature(s, ARM_FEATURE_V8)) { \ 3729 return false; \ 3730 } \ 3731 if (a->size == MO_16) { \ 3732 if (!dc_isar_feature(aa32_fp16_arith, s)) { \ 3733 return false; \ 3734 } \ 3735 } else if (a->size != MO_32) { \ 3736 return false; \ 3737 } \ 3738 return do_2misc_vec(s, a, gen_##INSN); \ 3739 } 3740 3741 DO_VEC_RMODE(VCVTAU, FPROUNDING_TIEAWAY, vcvt_rm_u) 3742 DO_VEC_RMODE(VCVTAS, FPROUNDING_TIEAWAY, vcvt_rm_s) 3743 DO_VEC_RMODE(VCVTNU, FPROUNDING_TIEEVEN, vcvt_rm_u) 3744 DO_VEC_RMODE(VCVTNS, FPROUNDING_TIEEVEN, vcvt_rm_s) 3745 DO_VEC_RMODE(VCVTPU, FPROUNDING_POSINF, vcvt_rm_u) 3746 DO_VEC_RMODE(VCVTPS, FPROUNDING_POSINF, vcvt_rm_s) 3747 DO_VEC_RMODE(VCVTMU, FPROUNDING_NEGINF, vcvt_rm_u) 3748 DO_VEC_RMODE(VCVTMS, FPROUNDING_NEGINF, vcvt_rm_s) 3749 3750 DO_VEC_RMODE(VRINTN, FPROUNDING_TIEEVEN, vrint_rm_) 3751 DO_VEC_RMODE(VRINTA, FPROUNDING_TIEAWAY, vrint_rm_) 3752 DO_VEC_RMODE(VRINTZ, FPROUNDING_ZERO, vrint_rm_) 3753 DO_VEC_RMODE(VRINTM, FPROUNDING_NEGINF, vrint_rm_) 3754 DO_VEC_RMODE(VRINTP, FPROUNDING_POSINF, vrint_rm_) 3755 3756 static bool trans_VSWP(DisasContext *s, arg_2misc *a) 3757 { 3758 TCGv_i64 rm, rd; 3759 int pass; 3760 3761 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3762 return false; 3763 } 3764 3765 /* UNDEF accesses to D16-D31 if they don't exist. */ 3766 if (!dc_isar_feature(aa32_simd_r32, s) && 3767 ((a->vd | a->vm) & 0x10)) { 3768 return false; 3769 } 3770 3771 if (a->size != 0) { 3772 return false; 3773 } 3774 3775 if ((a->vd | a->vm) & a->q) { 3776 return false; 3777 } 3778 3779 if (!vfp_access_check(s)) { 3780 return true; 3781 } 3782 3783 rm = tcg_temp_new_i64(); 3784 rd = tcg_temp_new_i64(); 3785 for (pass = 0; pass < (a->q ? 2 : 1); pass++) { 3786 read_neon_element64(rm, a->vm, pass, MO_64); 3787 read_neon_element64(rd, a->vd, pass, MO_64); 3788 write_neon_element64(rm, a->vd, pass, MO_64); 3789 write_neon_element64(rd, a->vm, pass, MO_64); 3790 } 3791 return true; 3792 } 3793 3794 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1) 3795 { 3796 TCGv_i32 rd, tmp; 3797 3798 rd = tcg_temp_new_i32(); 3799 tmp = tcg_temp_new_i32(); 3800 3801 tcg_gen_shli_i32(rd, t0, 8); 3802 tcg_gen_andi_i32(rd, rd, 0xff00ff00); 3803 tcg_gen_andi_i32(tmp, t1, 0x00ff00ff); 3804 tcg_gen_or_i32(rd, rd, tmp); 3805 3806 tcg_gen_shri_i32(t1, t1, 8); 3807 tcg_gen_andi_i32(t1, t1, 0x00ff00ff); 3808 tcg_gen_andi_i32(tmp, t0, 0xff00ff00); 3809 tcg_gen_or_i32(t1, t1, tmp); 3810 tcg_gen_mov_i32(t0, rd); 3811 } 3812 3813 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1) 3814 { 3815 TCGv_i32 rd, tmp; 3816 3817 rd = tcg_temp_new_i32(); 3818 tmp = tcg_temp_new_i32(); 3819 3820 tcg_gen_shli_i32(rd, t0, 16); 3821 tcg_gen_andi_i32(tmp, t1, 0xffff); 3822 tcg_gen_or_i32(rd, rd, tmp); 3823 tcg_gen_shri_i32(t1, t1, 16); 3824 tcg_gen_andi_i32(tmp, t0, 0xffff0000); 3825 tcg_gen_or_i32(t1, t1, tmp); 3826 tcg_gen_mov_i32(t0, rd); 3827 } 3828 3829 static bool trans_VTRN(DisasContext *s, arg_2misc *a) 3830 { 3831 TCGv_i32 tmp, tmp2; 3832 int pass; 3833 3834 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3835 return false; 3836 } 3837 3838 /* UNDEF accesses to D16-D31 if they don't exist. */ 3839 if (!dc_isar_feature(aa32_simd_r32, s) && 3840 ((a->vd | a->vm) & 0x10)) { 3841 return false; 3842 } 3843 3844 if ((a->vd | a->vm) & a->q) { 3845 return false; 3846 } 3847 3848 if (a->size == 3) { 3849 return false; 3850 } 3851 3852 if (!vfp_access_check(s)) { 3853 return true; 3854 } 3855 3856 tmp = tcg_temp_new_i32(); 3857 tmp2 = tcg_temp_new_i32(); 3858 if (a->size == MO_32) { 3859 for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) { 3860 read_neon_element32(tmp, a->vm, pass, MO_32); 3861 read_neon_element32(tmp2, a->vd, pass + 1, MO_32); 3862 write_neon_element32(tmp2, a->vm, pass, MO_32); 3863 write_neon_element32(tmp, a->vd, pass + 1, MO_32); 3864 } 3865 } else { 3866 for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 3867 read_neon_element32(tmp, a->vm, pass, MO_32); 3868 read_neon_element32(tmp2, a->vd, pass, MO_32); 3869 if (a->size == MO_8) { 3870 gen_neon_trn_u8(tmp, tmp2); 3871 } else { 3872 gen_neon_trn_u16(tmp, tmp2); 3873 } 3874 write_neon_element32(tmp2, a->vm, pass, MO_32); 3875 write_neon_element32(tmp, a->vd, pass, MO_32); 3876 } 3877 } 3878 return true; 3879 } 3880 3881 static bool trans_VSMMLA(DisasContext *s, arg_VSMMLA *a) 3882 { 3883 if (!dc_isar_feature(aa32_i8mm, s)) { 3884 return false; 3885 } 3886 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0, 3887 gen_helper_gvec_smmla_b); 3888 } 3889 3890 static bool trans_VUMMLA(DisasContext *s, arg_VUMMLA *a) 3891 { 3892 if (!dc_isar_feature(aa32_i8mm, s)) { 3893 return false; 3894 } 3895 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0, 3896 gen_helper_gvec_ummla_b); 3897 } 3898 3899 static bool trans_VUSMMLA(DisasContext *s, arg_VUSMMLA *a) 3900 { 3901 if (!dc_isar_feature(aa32_i8mm, s)) { 3902 return false; 3903 } 3904 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0, 3905 gen_helper_gvec_usmmla_b); 3906 } 3907 3908 static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a) 3909 { 3910 if (!dc_isar_feature(aa32_bf16, s)) { 3911 return false; 3912 } 3913 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0, 3914 gen_helper_gvec_bfmmla); 3915 } 3916 3917 static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a) 3918 { 3919 if (!dc_isar_feature(aa32_bf16, s)) { 3920 return false; 3921 } 3922 return do_neon_ddda_fpst(s, 7, a->vd, a->vn, a->vm, a->q, FPST_STD, 3923 gen_helper_gvec_bfmlal); 3924 } 3925 3926 static bool trans_VFMA_b16_scal(DisasContext *s, arg_VFMA_b16_scal *a) 3927 { 3928 if (!dc_isar_feature(aa32_bf16, s)) { 3929 return false; 3930 } 3931 return do_neon_ddda_fpst(s, 6, a->vd, a->vn, a->vm, 3932 (a->index << 1) | a->q, FPST_STD, 3933 gen_helper_gvec_bfmlal_idx); 3934 } 3935