1 /* 2 * ARM translation: AArch32 Neon instructions 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * Copyright (c) 2005-2007 CodeSourcery 6 * Copyright (c) 2007 OpenedHand, Ltd. 7 * Copyright (c) 2020 Linaro, Ltd. 8 * 9 * This library is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public 11 * License as published by the Free Software Foundation; either 12 * version 2.1 of the License, or (at your option) any later version. 13 * 14 * This library is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 21 */ 22 23 #include "qemu/osdep.h" 24 #include "tcg/tcg-op.h" 25 #include "tcg/tcg-op-gvec.h" 26 #include "exec/exec-all.h" 27 #include "exec/gen-icount.h" 28 #include "translate.h" 29 #include "translate-a32.h" 30 31 /* Include the generated Neon decoder */ 32 #include "decode-neon-dp.c.inc" 33 #include "decode-neon-ls.c.inc" 34 #include "decode-neon-shared.c.inc" 35 36 static TCGv_ptr vfp_reg_ptr(bool dp, int reg) 37 { 38 TCGv_ptr ret = tcg_temp_new_ptr(); 39 tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg)); 40 return ret; 41 } 42 43 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop) 44 { 45 long offset = neon_element_offset(reg, ele, mop & MO_SIZE); 46 47 switch (mop) { 48 case MO_UB: 49 tcg_gen_ld8u_i32(var, cpu_env, offset); 50 break; 51 case MO_UW: 52 tcg_gen_ld16u_i32(var, cpu_env, offset); 53 break; 54 case MO_UL: 55 tcg_gen_ld_i32(var, cpu_env, offset); 56 break; 57 default: 58 g_assert_not_reached(); 59 } 60 } 61 62 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop) 63 { 64 long offset = neon_element_offset(reg, ele, mop & MO_SIZE); 65 66 switch (mop) { 67 case MO_UB: 68 tcg_gen_ld8u_i64(var, cpu_env, offset); 69 break; 70 case MO_UW: 71 tcg_gen_ld16u_i64(var, cpu_env, offset); 72 break; 73 case MO_UL: 74 tcg_gen_ld32u_i64(var, cpu_env, offset); 75 break; 76 case MO_UQ: 77 tcg_gen_ld_i64(var, cpu_env, offset); 78 break; 79 default: 80 g_assert_not_reached(); 81 } 82 } 83 84 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var) 85 { 86 long offset = neon_element_offset(reg, ele, size); 87 88 switch (size) { 89 case MO_8: 90 tcg_gen_st8_i32(var, cpu_env, offset); 91 break; 92 case MO_16: 93 tcg_gen_st16_i32(var, cpu_env, offset); 94 break; 95 case MO_32: 96 tcg_gen_st_i32(var, cpu_env, offset); 97 break; 98 default: 99 g_assert_not_reached(); 100 } 101 } 102 103 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var) 104 { 105 long offset = neon_element_offset(reg, ele, size); 106 107 switch (size) { 108 case MO_8: 109 tcg_gen_st8_i64(var, cpu_env, offset); 110 break; 111 case MO_16: 112 tcg_gen_st16_i64(var, cpu_env, offset); 113 break; 114 case MO_32: 115 tcg_gen_st32_i64(var, cpu_env, offset); 116 break; 117 case MO_64: 118 tcg_gen_st_i64(var, cpu_env, offset); 119 break; 120 default: 121 g_assert_not_reached(); 122 } 123 } 124 125 static bool do_neon_ddda(DisasContext *s, int q, int vd, int vn, int vm, 126 int data, gen_helper_gvec_4 *fn_gvec) 127 { 128 /* UNDEF accesses to D16-D31 if they don't exist. */ 129 if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) { 130 return false; 131 } 132 133 /* 134 * UNDEF accesses to odd registers for each bit of Q. 135 * Q will be 0b111 for all Q-reg instructions, otherwise 136 * when we have mixed Q- and D-reg inputs. 137 */ 138 if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) { 139 return false; 140 } 141 142 if (!vfp_access_check(s)) { 143 return true; 144 } 145 146 int opr_sz = q ? 16 : 8; 147 tcg_gen_gvec_4_ool(vfp_reg_offset(1, vd), 148 vfp_reg_offset(1, vn), 149 vfp_reg_offset(1, vm), 150 vfp_reg_offset(1, vd), 151 opr_sz, opr_sz, data, fn_gvec); 152 return true; 153 } 154 155 static bool do_neon_ddda_fpst(DisasContext *s, int q, int vd, int vn, int vm, 156 int data, ARMFPStatusFlavour fp_flavour, 157 gen_helper_gvec_4_ptr *fn_gvec_ptr) 158 { 159 /* UNDEF accesses to D16-D31 if they don't exist. */ 160 if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) { 161 return false; 162 } 163 164 /* 165 * UNDEF accesses to odd registers for each bit of Q. 166 * Q will be 0b111 for all Q-reg instructions, otherwise 167 * when we have mixed Q- and D-reg inputs. 168 */ 169 if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) { 170 return false; 171 } 172 173 if (!vfp_access_check(s)) { 174 return true; 175 } 176 177 int opr_sz = q ? 16 : 8; 178 TCGv_ptr fpst = fpstatus_ptr(fp_flavour); 179 180 tcg_gen_gvec_4_ptr(vfp_reg_offset(1, vd), 181 vfp_reg_offset(1, vn), 182 vfp_reg_offset(1, vm), 183 vfp_reg_offset(1, vd), 184 fpst, opr_sz, opr_sz, data, fn_gvec_ptr); 185 tcg_temp_free_ptr(fpst); 186 return true; 187 } 188 189 static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a) 190 { 191 if (!dc_isar_feature(aa32_vcma, s)) { 192 return false; 193 } 194 if (a->size == MO_16) { 195 if (!dc_isar_feature(aa32_fp16_arith, s)) { 196 return false; 197 } 198 return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot, 199 FPST_STD_F16, gen_helper_gvec_fcmlah); 200 } 201 return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot, 202 FPST_STD, gen_helper_gvec_fcmlas); 203 } 204 205 static bool trans_VCADD(DisasContext *s, arg_VCADD *a) 206 { 207 int opr_sz; 208 TCGv_ptr fpst; 209 gen_helper_gvec_3_ptr *fn_gvec_ptr; 210 211 if (!dc_isar_feature(aa32_vcma, s) 212 || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) { 213 return false; 214 } 215 216 /* UNDEF accesses to D16-D31 if they don't exist. */ 217 if (!dc_isar_feature(aa32_simd_r32, s) && 218 ((a->vd | a->vn | a->vm) & 0x10)) { 219 return false; 220 } 221 222 if ((a->vn | a->vm | a->vd) & a->q) { 223 return false; 224 } 225 226 if (!vfp_access_check(s)) { 227 return true; 228 } 229 230 opr_sz = (1 + a->q) * 8; 231 fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD); 232 fn_gvec_ptr = (a->size == MO_16) ? 233 gen_helper_gvec_fcaddh : gen_helper_gvec_fcadds; 234 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), 235 vfp_reg_offset(1, a->vn), 236 vfp_reg_offset(1, a->vm), 237 fpst, opr_sz, opr_sz, a->rot, 238 fn_gvec_ptr); 239 tcg_temp_free_ptr(fpst); 240 return true; 241 } 242 243 static bool trans_VSDOT(DisasContext *s, arg_VSDOT *a) 244 { 245 if (!dc_isar_feature(aa32_dp, s)) { 246 return false; 247 } 248 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0, 249 gen_helper_gvec_sdot_b); 250 } 251 252 static bool trans_VUDOT(DisasContext *s, arg_VUDOT *a) 253 { 254 if (!dc_isar_feature(aa32_dp, s)) { 255 return false; 256 } 257 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0, 258 gen_helper_gvec_udot_b); 259 } 260 261 static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a) 262 { 263 if (!dc_isar_feature(aa32_i8mm, s)) { 264 return false; 265 } 266 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0, 267 gen_helper_gvec_usdot_b); 268 } 269 270 static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a) 271 { 272 if (!dc_isar_feature(aa32_bf16, s)) { 273 return false; 274 } 275 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0, 276 gen_helper_gvec_bfdot); 277 } 278 279 static bool trans_VFML(DisasContext *s, arg_VFML *a) 280 { 281 int opr_sz; 282 283 if (!dc_isar_feature(aa32_fhm, s)) { 284 return false; 285 } 286 287 /* UNDEF accesses to D16-D31 if they don't exist. */ 288 if (!dc_isar_feature(aa32_simd_r32, s) && 289 (a->vd & 0x10)) { 290 return false; 291 } 292 293 if (a->vd & a->q) { 294 return false; 295 } 296 297 if (!vfp_access_check(s)) { 298 return true; 299 } 300 301 opr_sz = (1 + a->q) * 8; 302 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), 303 vfp_reg_offset(a->q, a->vn), 304 vfp_reg_offset(a->q, a->vm), 305 cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */ 306 gen_helper_gvec_fmlal_a32); 307 return true; 308 } 309 310 static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a) 311 { 312 int data = (a->index << 2) | a->rot; 313 314 if (!dc_isar_feature(aa32_vcma, s)) { 315 return false; 316 } 317 if (a->size == MO_16) { 318 if (!dc_isar_feature(aa32_fp16_arith, s)) { 319 return false; 320 } 321 return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data, 322 FPST_STD_F16, gen_helper_gvec_fcmlah_idx); 323 } 324 return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data, 325 FPST_STD, gen_helper_gvec_fcmlas_idx); 326 } 327 328 static bool trans_VSDOT_scalar(DisasContext *s, arg_VSDOT_scalar *a) 329 { 330 if (!dc_isar_feature(aa32_dp, s)) { 331 return false; 332 } 333 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, 334 gen_helper_gvec_sdot_idx_b); 335 } 336 337 static bool trans_VUDOT_scalar(DisasContext *s, arg_VUDOT_scalar *a) 338 { 339 if (!dc_isar_feature(aa32_dp, s)) { 340 return false; 341 } 342 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, 343 gen_helper_gvec_udot_idx_b); 344 } 345 346 static bool trans_VUSDOT_scalar(DisasContext *s, arg_VUSDOT_scalar *a) 347 { 348 if (!dc_isar_feature(aa32_i8mm, s)) { 349 return false; 350 } 351 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, 352 gen_helper_gvec_usdot_idx_b); 353 } 354 355 static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a) 356 { 357 if (!dc_isar_feature(aa32_i8mm, s)) { 358 return false; 359 } 360 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, 361 gen_helper_gvec_sudot_idx_b); 362 } 363 364 static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a) 365 { 366 if (!dc_isar_feature(aa32_bf16, s)) { 367 return false; 368 } 369 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, 370 gen_helper_gvec_bfdot_idx); 371 } 372 373 static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a) 374 { 375 int opr_sz; 376 377 if (!dc_isar_feature(aa32_fhm, s)) { 378 return false; 379 } 380 381 /* UNDEF accesses to D16-D31 if they don't exist. */ 382 if (!dc_isar_feature(aa32_simd_r32, s) && 383 ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) { 384 return false; 385 } 386 387 if (a->vd & a->q) { 388 return false; 389 } 390 391 if (!vfp_access_check(s)) { 392 return true; 393 } 394 395 opr_sz = (1 + a->q) * 8; 396 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), 397 vfp_reg_offset(a->q, a->vn), 398 vfp_reg_offset(a->q, a->rm), 399 cpu_env, opr_sz, opr_sz, 400 (a->index << 2) | a->s, /* is_2 == 0 */ 401 gen_helper_gvec_fmlal_idx_a32); 402 return true; 403 } 404 405 static struct { 406 int nregs; 407 int interleave; 408 int spacing; 409 } const neon_ls_element_type[11] = { 410 {1, 4, 1}, 411 {1, 4, 2}, 412 {4, 1, 1}, 413 {2, 2, 2}, 414 {1, 3, 1}, 415 {1, 3, 2}, 416 {3, 1, 1}, 417 {1, 1, 1}, 418 {1, 2, 1}, 419 {1, 2, 2}, 420 {2, 1, 1} 421 }; 422 423 static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn, 424 int stride) 425 { 426 if (rm != 15) { 427 TCGv_i32 base; 428 429 base = load_reg(s, rn); 430 if (rm == 13) { 431 tcg_gen_addi_i32(base, base, stride); 432 } else { 433 TCGv_i32 index; 434 index = load_reg(s, rm); 435 tcg_gen_add_i32(base, base, index); 436 tcg_temp_free_i32(index); 437 } 438 store_reg(s, rn, base); 439 } 440 } 441 442 static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a) 443 { 444 /* Neon load/store multiple structures */ 445 int nregs, interleave, spacing, reg, n; 446 MemOp mop, align, endian; 447 int mmu_idx = get_mem_index(s); 448 int size = a->size; 449 TCGv_i64 tmp64; 450 TCGv_i32 addr; 451 452 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 453 return false; 454 } 455 456 /* UNDEF accesses to D16-D31 if they don't exist */ 457 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 458 return false; 459 } 460 if (a->itype > 10) { 461 return false; 462 } 463 /* Catch UNDEF cases for bad values of align field */ 464 switch (a->itype & 0xc) { 465 case 4: 466 if (a->align >= 2) { 467 return false; 468 } 469 break; 470 case 8: 471 if (a->align == 3) { 472 return false; 473 } 474 break; 475 default: 476 break; 477 } 478 nregs = neon_ls_element_type[a->itype].nregs; 479 interleave = neon_ls_element_type[a->itype].interleave; 480 spacing = neon_ls_element_type[a->itype].spacing; 481 if (size == 3 && (interleave | spacing) != 1) { 482 return false; 483 } 484 485 if (!vfp_access_check(s)) { 486 return true; 487 } 488 489 /* For our purposes, bytes are always little-endian. */ 490 endian = s->be_data; 491 if (size == 0) { 492 endian = MO_LE; 493 } 494 495 /* Enforce alignment requested by the instruction */ 496 if (a->align) { 497 align = pow2_align(a->align + 2); /* 4 ** a->align */ 498 } else { 499 align = s->align_mem ? MO_ALIGN : 0; 500 } 501 502 /* 503 * Consecutive little-endian elements from a single register 504 * can be promoted to a larger little-endian operation. 505 */ 506 if (interleave == 1 && endian == MO_LE) { 507 /* Retain any natural alignment. */ 508 if (align == MO_ALIGN) { 509 align = pow2_align(size); 510 } 511 size = 3; 512 } 513 514 tmp64 = tcg_temp_new_i64(); 515 addr = tcg_temp_new_i32(); 516 load_reg_var(s, addr, a->rn); 517 518 mop = endian | size | align; 519 for (reg = 0; reg < nregs; reg++) { 520 for (n = 0; n < 8 >> size; n++) { 521 int xs; 522 for (xs = 0; xs < interleave; xs++) { 523 int tt = a->vd + reg + spacing * xs; 524 525 if (a->l) { 526 gen_aa32_ld_internal_i64(s, tmp64, addr, mmu_idx, mop); 527 neon_store_element64(tt, n, size, tmp64); 528 } else { 529 neon_load_element64(tmp64, tt, n, size); 530 gen_aa32_st_internal_i64(s, tmp64, addr, mmu_idx, mop); 531 } 532 tcg_gen_addi_i32(addr, addr, 1 << size); 533 534 /* Subsequent memory operations inherit alignment */ 535 mop &= ~MO_AMASK; 536 } 537 } 538 } 539 tcg_temp_free_i32(addr); 540 tcg_temp_free_i64(tmp64); 541 542 gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8); 543 return true; 544 } 545 546 static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) 547 { 548 /* Neon load single structure to all lanes */ 549 int reg, stride, vec_size; 550 int vd = a->vd; 551 int size = a->size; 552 int nregs = a->n + 1; 553 TCGv_i32 addr, tmp; 554 MemOp mop, align; 555 556 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 557 return false; 558 } 559 560 /* UNDEF accesses to D16-D31 if they don't exist */ 561 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 562 return false; 563 } 564 565 align = 0; 566 if (size == 3) { 567 if (nregs != 4 || a->a == 0) { 568 return false; 569 } 570 /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */ 571 size = MO_32; 572 align = MO_ALIGN_16; 573 } else if (a->a) { 574 switch (nregs) { 575 case 1: 576 if (size == 0) { 577 return false; 578 } 579 align = MO_ALIGN; 580 break; 581 case 2: 582 align = pow2_align(size + 1); 583 break; 584 case 3: 585 return false; 586 case 4: 587 if (size == 2) { 588 align = pow2_align(3); 589 } else { 590 align = pow2_align(size + 2); 591 } 592 break; 593 default: 594 g_assert_not_reached(); 595 } 596 } 597 598 if (!vfp_access_check(s)) { 599 return true; 600 } 601 602 /* 603 * VLD1 to all lanes: T bit indicates how many Dregs to write. 604 * VLD2/3/4 to all lanes: T bit indicates register stride. 605 */ 606 stride = a->t ? 2 : 1; 607 vec_size = nregs == 1 ? stride * 8 : 8; 608 mop = size | align; 609 tmp = tcg_temp_new_i32(); 610 addr = tcg_temp_new_i32(); 611 load_reg_var(s, addr, a->rn); 612 for (reg = 0; reg < nregs; reg++) { 613 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop); 614 if ((vd & 1) && vec_size == 16) { 615 /* 616 * We cannot write 16 bytes at once because the 617 * destination is unaligned. 618 */ 619 tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd), 620 8, 8, tmp); 621 tcg_gen_gvec_mov(0, neon_full_reg_offset(vd + 1), 622 neon_full_reg_offset(vd), 8, 8); 623 } else { 624 tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd), 625 vec_size, vec_size, tmp); 626 } 627 tcg_gen_addi_i32(addr, addr, 1 << size); 628 vd += stride; 629 630 /* Subsequent memory operations inherit alignment */ 631 mop &= ~MO_AMASK; 632 } 633 tcg_temp_free_i32(tmp); 634 tcg_temp_free_i32(addr); 635 636 gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs); 637 638 return true; 639 } 640 641 static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a) 642 { 643 /* Neon load/store single structure to one lane */ 644 int reg; 645 int nregs = a->n + 1; 646 int vd = a->vd; 647 TCGv_i32 addr, tmp; 648 MemOp mop; 649 650 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 651 return false; 652 } 653 654 /* UNDEF accesses to D16-D31 if they don't exist */ 655 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 656 return false; 657 } 658 659 /* Catch the UNDEF cases. This is unavoidably a bit messy. */ 660 switch (nregs) { 661 case 1: 662 if (a->stride != 1) { 663 return false; 664 } 665 if (((a->align & (1 << a->size)) != 0) || 666 (a->size == 2 && (a->align == 1 || a->align == 2))) { 667 return false; 668 } 669 break; 670 case 2: 671 if (a->size == 2 && (a->align & 2) != 0) { 672 return false; 673 } 674 break; 675 case 3: 676 if (a->align != 0) { 677 return false; 678 } 679 break; 680 case 4: 681 if (a->size == 2 && a->align == 3) { 682 return false; 683 } 684 break; 685 default: 686 g_assert_not_reached(); 687 } 688 if ((vd + a->stride * (nregs - 1)) > 31) { 689 /* 690 * Attempts to write off the end of the register file are 691 * UNPREDICTABLE; we choose to UNDEF because otherwise we would 692 * access off the end of the array that holds the register data. 693 */ 694 return false; 695 } 696 697 if (!vfp_access_check(s)) { 698 return true; 699 } 700 701 /* Pick up SCTLR settings */ 702 mop = finalize_memop(s, a->size); 703 704 if (a->align) { 705 MemOp align_op; 706 707 switch (nregs) { 708 case 1: 709 /* For VLD1, use natural alignment. */ 710 align_op = MO_ALIGN; 711 break; 712 case 2: 713 /* For VLD2, use double alignment. */ 714 align_op = pow2_align(a->size + 1); 715 break; 716 case 4: 717 if (a->size == MO_32) { 718 /* 719 * For VLD4.32, align = 1 is double alignment, align = 2 is 720 * quad alignment; align = 3 is rejected above. 721 */ 722 align_op = pow2_align(a->size + a->align); 723 } else { 724 /* For VLD4.8 and VLD.16, we want quad alignment. */ 725 align_op = pow2_align(a->size + 2); 726 } 727 break; 728 default: 729 /* For VLD3, the alignment field is zero and rejected above. */ 730 g_assert_not_reached(); 731 } 732 733 mop = (mop & ~MO_AMASK) | align_op; 734 } 735 736 tmp = tcg_temp_new_i32(); 737 addr = tcg_temp_new_i32(); 738 load_reg_var(s, addr, a->rn); 739 740 for (reg = 0; reg < nregs; reg++) { 741 if (a->l) { 742 gen_aa32_ld_internal_i32(s, tmp, addr, get_mem_index(s), mop); 743 neon_store_element(vd, a->reg_idx, a->size, tmp); 744 } else { /* Store */ 745 neon_load_element(tmp, vd, a->reg_idx, a->size); 746 gen_aa32_st_internal_i32(s, tmp, addr, get_mem_index(s), mop); 747 } 748 vd += a->stride; 749 tcg_gen_addi_i32(addr, addr, 1 << a->size); 750 751 /* Subsequent memory operations inherit alignment */ 752 mop &= ~MO_AMASK; 753 } 754 tcg_temp_free_i32(addr); 755 tcg_temp_free_i32(tmp); 756 757 gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs); 758 759 return true; 760 } 761 762 static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn) 763 { 764 int vec_size = a->q ? 16 : 8; 765 int rd_ofs = neon_full_reg_offset(a->vd); 766 int rn_ofs = neon_full_reg_offset(a->vn); 767 int rm_ofs = neon_full_reg_offset(a->vm); 768 769 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 770 return false; 771 } 772 773 /* UNDEF accesses to D16-D31 if they don't exist. */ 774 if (!dc_isar_feature(aa32_simd_r32, s) && 775 ((a->vd | a->vn | a->vm) & 0x10)) { 776 return false; 777 } 778 779 if ((a->vn | a->vm | a->vd) & a->q) { 780 return false; 781 } 782 783 if (!vfp_access_check(s)) { 784 return true; 785 } 786 787 fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); 788 return true; 789 } 790 791 #define DO_3SAME(INSN, FUNC) \ 792 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 793 { \ 794 return do_3same(s, a, FUNC); \ 795 } 796 797 DO_3SAME(VADD, tcg_gen_gvec_add) 798 DO_3SAME(VSUB, tcg_gen_gvec_sub) 799 DO_3SAME(VAND, tcg_gen_gvec_and) 800 DO_3SAME(VBIC, tcg_gen_gvec_andc) 801 DO_3SAME(VORR, tcg_gen_gvec_or) 802 DO_3SAME(VORN, tcg_gen_gvec_orc) 803 DO_3SAME(VEOR, tcg_gen_gvec_xor) 804 DO_3SAME(VSHL_S, gen_gvec_sshl) 805 DO_3SAME(VSHL_U, gen_gvec_ushl) 806 DO_3SAME(VQADD_S, gen_gvec_sqadd_qc) 807 DO_3SAME(VQADD_U, gen_gvec_uqadd_qc) 808 DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc) 809 DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc) 810 811 /* These insns are all gvec_bitsel but with the inputs in various orders. */ 812 #define DO_3SAME_BITSEL(INSN, O1, O2, O3) \ 813 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 814 uint32_t rn_ofs, uint32_t rm_ofs, \ 815 uint32_t oprsz, uint32_t maxsz) \ 816 { \ 817 tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz); \ 818 } \ 819 DO_3SAME(INSN, gen_##INSN##_3s) 820 821 DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs) 822 DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs) 823 DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs) 824 825 #define DO_3SAME_NO_SZ_3(INSN, FUNC) \ 826 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 827 { \ 828 if (a->size == 3) { \ 829 return false; \ 830 } \ 831 return do_3same(s, a, FUNC); \ 832 } 833 834 DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax) 835 DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax) 836 DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin) 837 DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin) 838 DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul) 839 DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla) 840 DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls) 841 DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst) 842 DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd) 843 DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba) 844 DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd) 845 DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba) 846 847 #define DO_3SAME_CMP(INSN, COND) \ 848 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 849 uint32_t rn_ofs, uint32_t rm_ofs, \ 850 uint32_t oprsz, uint32_t maxsz) \ 851 { \ 852 tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \ 853 } \ 854 DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s) 855 856 DO_3SAME_CMP(VCGT_S, TCG_COND_GT) 857 DO_3SAME_CMP(VCGT_U, TCG_COND_GTU) 858 DO_3SAME_CMP(VCGE_S, TCG_COND_GE) 859 DO_3SAME_CMP(VCGE_U, TCG_COND_GEU) 860 DO_3SAME_CMP(VCEQ, TCG_COND_EQ) 861 862 #define WRAP_OOL_FN(WRAPNAME, FUNC) \ 863 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, \ 864 uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) \ 865 { \ 866 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \ 867 } 868 869 WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b) 870 871 static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a) 872 { 873 if (a->size != 0) { 874 return false; 875 } 876 return do_3same(s, a, gen_VMUL_p_3s); 877 } 878 879 #define DO_VQRDMLAH(INSN, FUNC) \ 880 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 881 { \ 882 if (!dc_isar_feature(aa32_rdm, s)) { \ 883 return false; \ 884 } \ 885 if (a->size != 1 && a->size != 2) { \ 886 return false; \ 887 } \ 888 return do_3same(s, a, FUNC); \ 889 } 890 891 DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc) 892 DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc) 893 894 #define DO_SHA1(NAME, FUNC) \ 895 WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \ 896 static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \ 897 { \ 898 if (!dc_isar_feature(aa32_sha1, s)) { \ 899 return false; \ 900 } \ 901 return do_3same(s, a, gen_##NAME##_3s); \ 902 } 903 904 DO_SHA1(SHA1C, gen_helper_crypto_sha1c) 905 DO_SHA1(SHA1P, gen_helper_crypto_sha1p) 906 DO_SHA1(SHA1M, gen_helper_crypto_sha1m) 907 DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0) 908 909 #define DO_SHA2(NAME, FUNC) \ 910 WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \ 911 static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \ 912 { \ 913 if (!dc_isar_feature(aa32_sha2, s)) { \ 914 return false; \ 915 } \ 916 return do_3same(s, a, gen_##NAME##_3s); \ 917 } 918 919 DO_SHA2(SHA256H, gen_helper_crypto_sha256h) 920 DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2) 921 DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1) 922 923 #define DO_3SAME_64(INSN, FUNC) \ 924 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 925 uint32_t rn_ofs, uint32_t rm_ofs, \ 926 uint32_t oprsz, uint32_t maxsz) \ 927 { \ 928 static const GVecGen3 op = { .fni8 = FUNC }; \ 929 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op); \ 930 } \ 931 DO_3SAME(INSN, gen_##INSN##_3s) 932 933 #define DO_3SAME_64_ENV(INSN, FUNC) \ 934 static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \ 935 { \ 936 FUNC(d, cpu_env, n, m); \ 937 } \ 938 DO_3SAME_64(INSN, gen_##INSN##_elt) 939 940 DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64) 941 DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64) 942 DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64) 943 DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64) 944 DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64) 945 DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64) 946 947 #define DO_3SAME_32(INSN, FUNC) \ 948 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 949 uint32_t rn_ofs, uint32_t rm_ofs, \ 950 uint32_t oprsz, uint32_t maxsz) \ 951 { \ 952 static const GVecGen3 ops[4] = { \ 953 { .fni4 = gen_helper_neon_##FUNC##8 }, \ 954 { .fni4 = gen_helper_neon_##FUNC##16 }, \ 955 { .fni4 = gen_helper_neon_##FUNC##32 }, \ 956 { 0 }, \ 957 }; \ 958 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \ 959 } \ 960 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 961 { \ 962 if (a->size > 2) { \ 963 return false; \ 964 } \ 965 return do_3same(s, a, gen_##INSN##_3s); \ 966 } 967 968 /* 969 * Some helper functions need to be passed the cpu_env. In order 970 * to use those with the gvec APIs like tcg_gen_gvec_3() we need 971 * to create wrapper functions whose prototype is a NeonGenTwoOpFn() 972 * and which call a NeonGenTwoOpEnvFn(). 973 */ 974 #define WRAP_ENV_FN(WRAPNAME, FUNC) \ 975 static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m) \ 976 { \ 977 FUNC(d, cpu_env, n, m); \ 978 } 979 980 #define DO_3SAME_32_ENV(INSN, FUNC) \ 981 WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8); \ 982 WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16); \ 983 WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32); \ 984 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 985 uint32_t rn_ofs, uint32_t rm_ofs, \ 986 uint32_t oprsz, uint32_t maxsz) \ 987 { \ 988 static const GVecGen3 ops[4] = { \ 989 { .fni4 = gen_##INSN##_tramp8 }, \ 990 { .fni4 = gen_##INSN##_tramp16 }, \ 991 { .fni4 = gen_##INSN##_tramp32 }, \ 992 { 0 }, \ 993 }; \ 994 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \ 995 } \ 996 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 997 { \ 998 if (a->size > 2) { \ 999 return false; \ 1000 } \ 1001 return do_3same(s, a, gen_##INSN##_3s); \ 1002 } 1003 1004 DO_3SAME_32(VHADD_S, hadd_s) 1005 DO_3SAME_32(VHADD_U, hadd_u) 1006 DO_3SAME_32(VHSUB_S, hsub_s) 1007 DO_3SAME_32(VHSUB_U, hsub_u) 1008 DO_3SAME_32(VRHADD_S, rhadd_s) 1009 DO_3SAME_32(VRHADD_U, rhadd_u) 1010 DO_3SAME_32(VRSHL_S, rshl_s) 1011 DO_3SAME_32(VRSHL_U, rshl_u) 1012 1013 DO_3SAME_32_ENV(VQSHL_S, qshl_s) 1014 DO_3SAME_32_ENV(VQSHL_U, qshl_u) 1015 DO_3SAME_32_ENV(VQRSHL_S, qrshl_s) 1016 DO_3SAME_32_ENV(VQRSHL_U, qrshl_u) 1017 1018 static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn) 1019 { 1020 /* Operations handled pairwise 32 bits at a time */ 1021 TCGv_i32 tmp, tmp2, tmp3; 1022 1023 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1024 return false; 1025 } 1026 1027 /* UNDEF accesses to D16-D31 if they don't exist. */ 1028 if (!dc_isar_feature(aa32_simd_r32, s) && 1029 ((a->vd | a->vn | a->vm) & 0x10)) { 1030 return false; 1031 } 1032 1033 if (a->size == 3) { 1034 return false; 1035 } 1036 1037 if (!vfp_access_check(s)) { 1038 return true; 1039 } 1040 1041 assert(a->q == 0); /* enforced by decode patterns */ 1042 1043 /* 1044 * Note that we have to be careful not to clobber the source operands 1045 * in the "vm == vd" case by storing the result of the first pass too 1046 * early. Since Q is 0 there are always just two passes, so instead 1047 * of a complicated loop over each pass we just unroll. 1048 */ 1049 tmp = tcg_temp_new_i32(); 1050 tmp2 = tcg_temp_new_i32(); 1051 tmp3 = tcg_temp_new_i32(); 1052 1053 read_neon_element32(tmp, a->vn, 0, MO_32); 1054 read_neon_element32(tmp2, a->vn, 1, MO_32); 1055 fn(tmp, tmp, tmp2); 1056 1057 read_neon_element32(tmp3, a->vm, 0, MO_32); 1058 read_neon_element32(tmp2, a->vm, 1, MO_32); 1059 fn(tmp3, tmp3, tmp2); 1060 1061 write_neon_element32(tmp, a->vd, 0, MO_32); 1062 write_neon_element32(tmp3, a->vd, 1, MO_32); 1063 1064 tcg_temp_free_i32(tmp); 1065 tcg_temp_free_i32(tmp2); 1066 tcg_temp_free_i32(tmp3); 1067 return true; 1068 } 1069 1070 #define DO_3SAME_PAIR(INSN, func) \ 1071 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 1072 { \ 1073 static NeonGenTwoOpFn * const fns[] = { \ 1074 gen_helper_neon_##func##8, \ 1075 gen_helper_neon_##func##16, \ 1076 gen_helper_neon_##func##32, \ 1077 }; \ 1078 if (a->size > 2) { \ 1079 return false; \ 1080 } \ 1081 return do_3same_pair(s, a, fns[a->size]); \ 1082 } 1083 1084 /* 32-bit pairwise ops end up the same as the elementwise versions. */ 1085 #define gen_helper_neon_pmax_s32 tcg_gen_smax_i32 1086 #define gen_helper_neon_pmax_u32 tcg_gen_umax_i32 1087 #define gen_helper_neon_pmin_s32 tcg_gen_smin_i32 1088 #define gen_helper_neon_pmin_u32 tcg_gen_umin_i32 1089 #define gen_helper_neon_padd_u32 tcg_gen_add_i32 1090 1091 DO_3SAME_PAIR(VPMAX_S, pmax_s) 1092 DO_3SAME_PAIR(VPMIN_S, pmin_s) 1093 DO_3SAME_PAIR(VPMAX_U, pmax_u) 1094 DO_3SAME_PAIR(VPMIN_U, pmin_u) 1095 DO_3SAME_PAIR(VPADD, padd_u) 1096 1097 #define DO_3SAME_VQDMULH(INSN, FUNC) \ 1098 WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \ 1099 WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32); \ 1100 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 1101 uint32_t rn_ofs, uint32_t rm_ofs, \ 1102 uint32_t oprsz, uint32_t maxsz) \ 1103 { \ 1104 static const GVecGen3 ops[2] = { \ 1105 { .fni4 = gen_##INSN##_tramp16 }, \ 1106 { .fni4 = gen_##INSN##_tramp32 }, \ 1107 }; \ 1108 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \ 1109 } \ 1110 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 1111 { \ 1112 if (a->size != 1 && a->size != 2) { \ 1113 return false; \ 1114 } \ 1115 return do_3same(s, a, gen_##INSN##_3s); \ 1116 } 1117 1118 DO_3SAME_VQDMULH(VQDMULH, qdmulh) 1119 DO_3SAME_VQDMULH(VQRDMULH, qrdmulh) 1120 1121 #define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC) \ 1122 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \ 1123 uint32_t rn_ofs, uint32_t rm_ofs, \ 1124 uint32_t oprsz, uint32_t maxsz) \ 1125 { \ 1126 TCGv_ptr fpst = fpstatus_ptr(FPST); \ 1127 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst, \ 1128 oprsz, maxsz, 0, FUNC); \ 1129 tcg_temp_free_ptr(fpst); \ 1130 } 1131 1132 #define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC) \ 1133 WRAP_FP_GVEC(gen_##INSN##_fp32_3s, FPST_STD, SFUNC) \ 1134 WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC) \ 1135 static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \ 1136 { \ 1137 if (a->size == MO_16) { \ 1138 if (!dc_isar_feature(aa32_fp16_arith, s)) { \ 1139 return false; \ 1140 } \ 1141 return do_3same(s, a, gen_##INSN##_fp16_3s); \ 1142 } \ 1143 return do_3same(s, a, gen_##INSN##_fp32_3s); \ 1144 } 1145 1146 1147 DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_h) 1148 DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_h) 1149 DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_h) 1150 DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_h) 1151 DO_3S_FP_GVEC(VCEQ, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_h) 1152 DO_3S_FP_GVEC(VCGE, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_h) 1153 DO_3S_FP_GVEC(VCGT, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_h) 1154 DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h) 1155 DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h) 1156 DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h) 1157 DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h) 1158 DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h) 1159 DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h) 1160 DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h) 1161 DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h) 1162 DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h) 1163 DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h) 1164 1165 WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s) 1166 WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h) 1167 WRAP_FP_GVEC(gen_VMINNM_fp32_3s, FPST_STD, gen_helper_gvec_fminnum_s) 1168 WRAP_FP_GVEC(gen_VMINNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fminnum_h) 1169 1170 static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a) 1171 { 1172 if (!arm_dc_feature(s, ARM_FEATURE_V8)) { 1173 return false; 1174 } 1175 1176 if (a->size == MO_16) { 1177 if (!dc_isar_feature(aa32_fp16_arith, s)) { 1178 return false; 1179 } 1180 return do_3same(s, a, gen_VMAXNM_fp16_3s); 1181 } 1182 return do_3same(s, a, gen_VMAXNM_fp32_3s); 1183 } 1184 1185 static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a) 1186 { 1187 if (!arm_dc_feature(s, ARM_FEATURE_V8)) { 1188 return false; 1189 } 1190 1191 if (a->size == MO_16) { 1192 if (!dc_isar_feature(aa32_fp16_arith, s)) { 1193 return false; 1194 } 1195 return do_3same(s, a, gen_VMINNM_fp16_3s); 1196 } 1197 return do_3same(s, a, gen_VMINNM_fp32_3s); 1198 } 1199 1200 static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, 1201 gen_helper_gvec_3_ptr *fn) 1202 { 1203 /* FP pairwise operations */ 1204 TCGv_ptr fpstatus; 1205 1206 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1207 return false; 1208 } 1209 1210 /* UNDEF accesses to D16-D31 if they don't exist. */ 1211 if (!dc_isar_feature(aa32_simd_r32, s) && 1212 ((a->vd | a->vn | a->vm) & 0x10)) { 1213 return false; 1214 } 1215 1216 if (!vfp_access_check(s)) { 1217 return true; 1218 } 1219 1220 assert(a->q == 0); /* enforced by decode patterns */ 1221 1222 1223 fpstatus = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD); 1224 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), 1225 vfp_reg_offset(1, a->vn), 1226 vfp_reg_offset(1, a->vm), 1227 fpstatus, 8, 8, 0, fn); 1228 tcg_temp_free_ptr(fpstatus); 1229 1230 return true; 1231 } 1232 1233 /* 1234 * For all the functions using this macro, size == 1 means fp16, 1235 * which is an architecture extension we don't implement yet. 1236 */ 1237 #define DO_3S_FP_PAIR(INSN,FUNC) \ 1238 static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \ 1239 { \ 1240 if (a->size == MO_16) { \ 1241 if (!dc_isar_feature(aa32_fp16_arith, s)) { \ 1242 return false; \ 1243 } \ 1244 return do_3same_fp_pair(s, a, FUNC##h); \ 1245 } \ 1246 return do_3same_fp_pair(s, a, FUNC##s); \ 1247 } 1248 1249 DO_3S_FP_PAIR(VPADD, gen_helper_neon_padd) 1250 DO_3S_FP_PAIR(VPMAX, gen_helper_neon_pmax) 1251 DO_3S_FP_PAIR(VPMIN, gen_helper_neon_pmin) 1252 1253 static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn) 1254 { 1255 /* Handle a 2-reg-shift insn which can be vectorized. */ 1256 int vec_size = a->q ? 16 : 8; 1257 int rd_ofs = neon_full_reg_offset(a->vd); 1258 int rm_ofs = neon_full_reg_offset(a->vm); 1259 1260 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1261 return false; 1262 } 1263 1264 /* UNDEF accesses to D16-D31 if they don't exist. */ 1265 if (!dc_isar_feature(aa32_simd_r32, s) && 1266 ((a->vd | a->vm) & 0x10)) { 1267 return false; 1268 } 1269 1270 if ((a->vm | a->vd) & a->q) { 1271 return false; 1272 } 1273 1274 if (!vfp_access_check(s)) { 1275 return true; 1276 } 1277 1278 fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size); 1279 return true; 1280 } 1281 1282 #define DO_2SH(INSN, FUNC) \ 1283 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1284 { \ 1285 return do_vector_2sh(s, a, FUNC); \ 1286 } \ 1287 1288 DO_2SH(VSHL, tcg_gen_gvec_shli) 1289 DO_2SH(VSLI, gen_gvec_sli) 1290 DO_2SH(VSRI, gen_gvec_sri) 1291 DO_2SH(VSRA_S, gen_gvec_ssra) 1292 DO_2SH(VSRA_U, gen_gvec_usra) 1293 DO_2SH(VRSHR_S, gen_gvec_srshr) 1294 DO_2SH(VRSHR_U, gen_gvec_urshr) 1295 DO_2SH(VRSRA_S, gen_gvec_srsra) 1296 DO_2SH(VRSRA_U, gen_gvec_ursra) 1297 1298 static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a) 1299 { 1300 /* Signed shift out of range results in all-sign-bits */ 1301 a->shift = MIN(a->shift, (8 << a->size) - 1); 1302 return do_vector_2sh(s, a, tcg_gen_gvec_sari); 1303 } 1304 1305 static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 1306 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1307 { 1308 tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0); 1309 } 1310 1311 static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a) 1312 { 1313 /* Shift out of range is architecturally valid and results in zero. */ 1314 if (a->shift >= (8 << a->size)) { 1315 return do_vector_2sh(s, a, gen_zero_rd_2sh); 1316 } else { 1317 return do_vector_2sh(s, a, tcg_gen_gvec_shri); 1318 } 1319 } 1320 1321 static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a, 1322 NeonGenTwo64OpEnvFn *fn) 1323 { 1324 /* 1325 * 2-reg-and-shift operations, size == 3 case, where the 1326 * function needs to be passed cpu_env. 1327 */ 1328 TCGv_i64 constimm; 1329 int pass; 1330 1331 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1332 return false; 1333 } 1334 1335 /* UNDEF accesses to D16-D31 if they don't exist. */ 1336 if (!dc_isar_feature(aa32_simd_r32, s) && 1337 ((a->vd | a->vm) & 0x10)) { 1338 return false; 1339 } 1340 1341 if ((a->vm | a->vd) & a->q) { 1342 return false; 1343 } 1344 1345 if (!vfp_access_check(s)) { 1346 return true; 1347 } 1348 1349 /* 1350 * To avoid excessive duplication of ops we implement shift 1351 * by immediate using the variable shift operations. 1352 */ 1353 constimm = tcg_constant_i64(dup_const(a->size, a->shift)); 1354 1355 for (pass = 0; pass < a->q + 1; pass++) { 1356 TCGv_i64 tmp = tcg_temp_new_i64(); 1357 1358 read_neon_element64(tmp, a->vm, pass, MO_64); 1359 fn(tmp, cpu_env, tmp, constimm); 1360 write_neon_element64(tmp, a->vd, pass, MO_64); 1361 tcg_temp_free_i64(tmp); 1362 } 1363 return true; 1364 } 1365 1366 static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a, 1367 NeonGenTwoOpEnvFn *fn) 1368 { 1369 /* 1370 * 2-reg-and-shift operations, size < 3 case, where the 1371 * helper needs to be passed cpu_env. 1372 */ 1373 TCGv_i32 constimm, tmp; 1374 int pass; 1375 1376 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1377 return false; 1378 } 1379 1380 /* UNDEF accesses to D16-D31 if they don't exist. */ 1381 if (!dc_isar_feature(aa32_simd_r32, s) && 1382 ((a->vd | a->vm) & 0x10)) { 1383 return false; 1384 } 1385 1386 if ((a->vm | a->vd) & a->q) { 1387 return false; 1388 } 1389 1390 if (!vfp_access_check(s)) { 1391 return true; 1392 } 1393 1394 /* 1395 * To avoid excessive duplication of ops we implement shift 1396 * by immediate using the variable shift operations. 1397 */ 1398 constimm = tcg_constant_i32(dup_const(a->size, a->shift)); 1399 tmp = tcg_temp_new_i32(); 1400 1401 for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 1402 read_neon_element32(tmp, a->vm, pass, MO_32); 1403 fn(tmp, cpu_env, tmp, constimm); 1404 write_neon_element32(tmp, a->vd, pass, MO_32); 1405 } 1406 tcg_temp_free_i32(tmp); 1407 return true; 1408 } 1409 1410 #define DO_2SHIFT_ENV(INSN, FUNC) \ 1411 static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \ 1412 { \ 1413 return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64); \ 1414 } \ 1415 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1416 { \ 1417 static NeonGenTwoOpEnvFn * const fns[] = { \ 1418 gen_helper_neon_##FUNC##8, \ 1419 gen_helper_neon_##FUNC##16, \ 1420 gen_helper_neon_##FUNC##32, \ 1421 }; \ 1422 assert(a->size < ARRAY_SIZE(fns)); \ 1423 return do_2shift_env_32(s, a, fns[a->size]); \ 1424 } 1425 1426 DO_2SHIFT_ENV(VQSHLU, qshlu_s) 1427 DO_2SHIFT_ENV(VQSHL_U, qshl_u) 1428 DO_2SHIFT_ENV(VQSHL_S, qshl_s) 1429 1430 static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a, 1431 NeonGenTwo64OpFn *shiftfn, 1432 NeonGenNarrowEnvFn *narrowfn) 1433 { 1434 /* 2-reg-and-shift narrowing-shift operations, size == 3 case */ 1435 TCGv_i64 constimm, rm1, rm2; 1436 TCGv_i32 rd; 1437 1438 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1439 return false; 1440 } 1441 1442 /* UNDEF accesses to D16-D31 if they don't exist. */ 1443 if (!dc_isar_feature(aa32_simd_r32, s) && 1444 ((a->vd | a->vm) & 0x10)) { 1445 return false; 1446 } 1447 1448 if (a->vm & 1) { 1449 return false; 1450 } 1451 1452 if (!vfp_access_check(s)) { 1453 return true; 1454 } 1455 1456 /* 1457 * This is always a right shift, and the shiftfn is always a 1458 * left-shift helper, which thus needs the negated shift count. 1459 */ 1460 constimm = tcg_constant_i64(-a->shift); 1461 rm1 = tcg_temp_new_i64(); 1462 rm2 = tcg_temp_new_i64(); 1463 rd = tcg_temp_new_i32(); 1464 1465 /* Load both inputs first to avoid potential overwrite if rm == rd */ 1466 read_neon_element64(rm1, a->vm, 0, MO_64); 1467 read_neon_element64(rm2, a->vm, 1, MO_64); 1468 1469 shiftfn(rm1, rm1, constimm); 1470 narrowfn(rd, cpu_env, rm1); 1471 write_neon_element32(rd, a->vd, 0, MO_32); 1472 1473 shiftfn(rm2, rm2, constimm); 1474 narrowfn(rd, cpu_env, rm2); 1475 write_neon_element32(rd, a->vd, 1, MO_32); 1476 1477 tcg_temp_free_i32(rd); 1478 tcg_temp_free_i64(rm1); 1479 tcg_temp_free_i64(rm2); 1480 1481 return true; 1482 } 1483 1484 static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a, 1485 NeonGenTwoOpFn *shiftfn, 1486 NeonGenNarrowEnvFn *narrowfn) 1487 { 1488 /* 2-reg-and-shift narrowing-shift operations, size < 3 case */ 1489 TCGv_i32 constimm, rm1, rm2, rm3, rm4; 1490 TCGv_i64 rtmp; 1491 uint32_t imm; 1492 1493 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1494 return false; 1495 } 1496 1497 /* UNDEF accesses to D16-D31 if they don't exist. */ 1498 if (!dc_isar_feature(aa32_simd_r32, s) && 1499 ((a->vd | a->vm) & 0x10)) { 1500 return false; 1501 } 1502 1503 if (a->vm & 1) { 1504 return false; 1505 } 1506 1507 if (!vfp_access_check(s)) { 1508 return true; 1509 } 1510 1511 /* 1512 * This is always a right shift, and the shiftfn is always a 1513 * left-shift helper, which thus needs the negated shift count 1514 * duplicated into each lane of the immediate value. 1515 */ 1516 if (a->size == 1) { 1517 imm = (uint16_t)(-a->shift); 1518 imm |= imm << 16; 1519 } else { 1520 /* size == 2 */ 1521 imm = -a->shift; 1522 } 1523 constimm = tcg_constant_i32(imm); 1524 1525 /* Load all inputs first to avoid potential overwrite */ 1526 rm1 = tcg_temp_new_i32(); 1527 rm2 = tcg_temp_new_i32(); 1528 rm3 = tcg_temp_new_i32(); 1529 rm4 = tcg_temp_new_i32(); 1530 read_neon_element32(rm1, a->vm, 0, MO_32); 1531 read_neon_element32(rm2, a->vm, 1, MO_32); 1532 read_neon_element32(rm3, a->vm, 2, MO_32); 1533 read_neon_element32(rm4, a->vm, 3, MO_32); 1534 rtmp = tcg_temp_new_i64(); 1535 1536 shiftfn(rm1, rm1, constimm); 1537 shiftfn(rm2, rm2, constimm); 1538 1539 tcg_gen_concat_i32_i64(rtmp, rm1, rm2); 1540 tcg_temp_free_i32(rm2); 1541 1542 narrowfn(rm1, cpu_env, rtmp); 1543 write_neon_element32(rm1, a->vd, 0, MO_32); 1544 tcg_temp_free_i32(rm1); 1545 1546 shiftfn(rm3, rm3, constimm); 1547 shiftfn(rm4, rm4, constimm); 1548 1549 tcg_gen_concat_i32_i64(rtmp, rm3, rm4); 1550 tcg_temp_free_i32(rm4); 1551 1552 narrowfn(rm3, cpu_env, rtmp); 1553 tcg_temp_free_i64(rtmp); 1554 write_neon_element32(rm3, a->vd, 1, MO_32); 1555 tcg_temp_free_i32(rm3); 1556 return true; 1557 } 1558 1559 #define DO_2SN_64(INSN, FUNC, NARROWFUNC) \ 1560 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1561 { \ 1562 return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC); \ 1563 } 1564 #define DO_2SN_32(INSN, FUNC, NARROWFUNC) \ 1565 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1566 { \ 1567 return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC); \ 1568 } 1569 1570 static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) 1571 { 1572 tcg_gen_extrl_i64_i32(dest, src); 1573 } 1574 1575 static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) 1576 { 1577 gen_helper_neon_narrow_u16(dest, src); 1578 } 1579 1580 static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) 1581 { 1582 gen_helper_neon_narrow_u8(dest, src); 1583 } 1584 1585 DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32) 1586 DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16) 1587 DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8) 1588 1589 DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32) 1590 DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16) 1591 DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8) 1592 1593 DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32) 1594 DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16) 1595 DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8) 1596 1597 DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32) 1598 DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16) 1599 DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8) 1600 DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32) 1601 DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16) 1602 DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8) 1603 1604 DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32) 1605 DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16) 1606 DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8) 1607 1608 DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32) 1609 DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16) 1610 DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8) 1611 1612 DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32) 1613 DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16) 1614 DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8) 1615 1616 static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a, 1617 NeonGenWidenFn *widenfn, bool u) 1618 { 1619 TCGv_i64 tmp; 1620 TCGv_i32 rm0, rm1; 1621 uint64_t widen_mask = 0; 1622 1623 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1624 return false; 1625 } 1626 1627 /* UNDEF accesses to D16-D31 if they don't exist. */ 1628 if (!dc_isar_feature(aa32_simd_r32, s) && 1629 ((a->vd | a->vm) & 0x10)) { 1630 return false; 1631 } 1632 1633 if (a->vd & 1) { 1634 return false; 1635 } 1636 1637 if (!vfp_access_check(s)) { 1638 return true; 1639 } 1640 1641 /* 1642 * This is a widen-and-shift operation. The shift is always less 1643 * than the width of the source type, so after widening the input 1644 * vector we can simply shift the whole 64-bit widened register, 1645 * and then clear the potential overflow bits resulting from left 1646 * bits of the narrow input appearing as right bits of the left 1647 * neighbour narrow input. Calculate a mask of bits to clear. 1648 */ 1649 if ((a->shift != 0) && (a->size < 2 || u)) { 1650 int esize = 8 << a->size; 1651 widen_mask = MAKE_64BIT_MASK(0, esize); 1652 widen_mask >>= esize - a->shift; 1653 widen_mask = dup_const(a->size + 1, widen_mask); 1654 } 1655 1656 rm0 = tcg_temp_new_i32(); 1657 rm1 = tcg_temp_new_i32(); 1658 read_neon_element32(rm0, a->vm, 0, MO_32); 1659 read_neon_element32(rm1, a->vm, 1, MO_32); 1660 tmp = tcg_temp_new_i64(); 1661 1662 widenfn(tmp, rm0); 1663 tcg_temp_free_i32(rm0); 1664 if (a->shift != 0) { 1665 tcg_gen_shli_i64(tmp, tmp, a->shift); 1666 tcg_gen_andi_i64(tmp, tmp, ~widen_mask); 1667 } 1668 write_neon_element64(tmp, a->vd, 0, MO_64); 1669 1670 widenfn(tmp, rm1); 1671 tcg_temp_free_i32(rm1); 1672 if (a->shift != 0) { 1673 tcg_gen_shli_i64(tmp, tmp, a->shift); 1674 tcg_gen_andi_i64(tmp, tmp, ~widen_mask); 1675 } 1676 write_neon_element64(tmp, a->vd, 1, MO_64); 1677 tcg_temp_free_i64(tmp); 1678 return true; 1679 } 1680 1681 static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a) 1682 { 1683 static NeonGenWidenFn * const widenfn[] = { 1684 gen_helper_neon_widen_s8, 1685 gen_helper_neon_widen_s16, 1686 tcg_gen_ext_i32_i64, 1687 }; 1688 return do_vshll_2sh(s, a, widenfn[a->size], false); 1689 } 1690 1691 static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a) 1692 { 1693 static NeonGenWidenFn * const widenfn[] = { 1694 gen_helper_neon_widen_u8, 1695 gen_helper_neon_widen_u16, 1696 tcg_gen_extu_i32_i64, 1697 }; 1698 return do_vshll_2sh(s, a, widenfn[a->size], true); 1699 } 1700 1701 static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a, 1702 gen_helper_gvec_2_ptr *fn) 1703 { 1704 /* FP operations in 2-reg-and-shift group */ 1705 int vec_size = a->q ? 16 : 8; 1706 int rd_ofs = neon_full_reg_offset(a->vd); 1707 int rm_ofs = neon_full_reg_offset(a->vm); 1708 TCGv_ptr fpst; 1709 1710 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1711 return false; 1712 } 1713 1714 if (a->size == MO_16) { 1715 if (!dc_isar_feature(aa32_fp16_arith, s)) { 1716 return false; 1717 } 1718 } 1719 1720 /* UNDEF accesses to D16-D31 if they don't exist. */ 1721 if (!dc_isar_feature(aa32_simd_r32, s) && 1722 ((a->vd | a->vm) & 0x10)) { 1723 return false; 1724 } 1725 1726 if ((a->vm | a->vd) & a->q) { 1727 return false; 1728 } 1729 1730 if (!vfp_access_check(s)) { 1731 return true; 1732 } 1733 1734 fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD); 1735 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn); 1736 tcg_temp_free_ptr(fpst); 1737 return true; 1738 } 1739 1740 #define DO_FP_2SH(INSN, FUNC) \ 1741 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1742 { \ 1743 return do_fp_2sh(s, a, FUNC); \ 1744 } 1745 1746 DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf) 1747 DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf) 1748 DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs) 1749 DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu) 1750 1751 DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh) 1752 DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh) 1753 DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs) 1754 DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu) 1755 1756 static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a, 1757 GVecGen2iFn *fn) 1758 { 1759 uint64_t imm; 1760 int reg_ofs, vec_size; 1761 1762 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1763 return false; 1764 } 1765 1766 /* UNDEF accesses to D16-D31 if they don't exist. */ 1767 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 1768 return false; 1769 } 1770 1771 if (a->vd & a->q) { 1772 return false; 1773 } 1774 1775 if (!vfp_access_check(s)) { 1776 return true; 1777 } 1778 1779 reg_ofs = neon_full_reg_offset(a->vd); 1780 vec_size = a->q ? 16 : 8; 1781 imm = asimd_imm_const(a->imm, a->cmode, a->op); 1782 1783 fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size); 1784 return true; 1785 } 1786 1787 static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs, 1788 int64_t c, uint32_t oprsz, uint32_t maxsz) 1789 { 1790 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c); 1791 } 1792 1793 static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a) 1794 { 1795 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ 1796 GVecGen2iFn *fn; 1797 1798 if ((a->cmode & 1) && a->cmode < 12) { 1799 /* for op=1, the imm will be inverted, so BIC becomes AND. */ 1800 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori; 1801 } else { 1802 /* There is one unallocated cmode/op combination in this space */ 1803 if (a->cmode == 15 && a->op == 1) { 1804 return false; 1805 } 1806 fn = gen_VMOV_1r; 1807 } 1808 return do_1reg_imm(s, a, fn); 1809 } 1810 1811 static bool do_prewiden_3d(DisasContext *s, arg_3diff *a, 1812 NeonGenWidenFn *widenfn, 1813 NeonGenTwo64OpFn *opfn, 1814 int src1_mop, int src2_mop) 1815 { 1816 /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */ 1817 TCGv_i64 rn0_64, rn1_64, rm_64; 1818 1819 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1820 return false; 1821 } 1822 1823 /* UNDEF accesses to D16-D31 if they don't exist. */ 1824 if (!dc_isar_feature(aa32_simd_r32, s) && 1825 ((a->vd | a->vn | a->vm) & 0x10)) { 1826 return false; 1827 } 1828 1829 if (!opfn) { 1830 /* size == 3 case, which is an entirely different insn group */ 1831 return false; 1832 } 1833 1834 if ((a->vd & 1) || (src1_mop == MO_UQ && (a->vn & 1))) { 1835 return false; 1836 } 1837 1838 if (!vfp_access_check(s)) { 1839 return true; 1840 } 1841 1842 rn0_64 = tcg_temp_new_i64(); 1843 rn1_64 = tcg_temp_new_i64(); 1844 rm_64 = tcg_temp_new_i64(); 1845 1846 if (src1_mop >= 0) { 1847 read_neon_element64(rn0_64, a->vn, 0, src1_mop); 1848 } else { 1849 TCGv_i32 tmp = tcg_temp_new_i32(); 1850 read_neon_element32(tmp, a->vn, 0, MO_32); 1851 widenfn(rn0_64, tmp); 1852 tcg_temp_free_i32(tmp); 1853 } 1854 if (src2_mop >= 0) { 1855 read_neon_element64(rm_64, a->vm, 0, src2_mop); 1856 } else { 1857 TCGv_i32 tmp = tcg_temp_new_i32(); 1858 read_neon_element32(tmp, a->vm, 0, MO_32); 1859 widenfn(rm_64, tmp); 1860 tcg_temp_free_i32(tmp); 1861 } 1862 1863 opfn(rn0_64, rn0_64, rm_64); 1864 1865 /* 1866 * Load second pass inputs before storing the first pass result, to 1867 * avoid incorrect results if a narrow input overlaps with the result. 1868 */ 1869 if (src1_mop >= 0) { 1870 read_neon_element64(rn1_64, a->vn, 1, src1_mop); 1871 } else { 1872 TCGv_i32 tmp = tcg_temp_new_i32(); 1873 read_neon_element32(tmp, a->vn, 1, MO_32); 1874 widenfn(rn1_64, tmp); 1875 tcg_temp_free_i32(tmp); 1876 } 1877 if (src2_mop >= 0) { 1878 read_neon_element64(rm_64, a->vm, 1, src2_mop); 1879 } else { 1880 TCGv_i32 tmp = tcg_temp_new_i32(); 1881 read_neon_element32(tmp, a->vm, 1, MO_32); 1882 widenfn(rm_64, tmp); 1883 tcg_temp_free_i32(tmp); 1884 } 1885 1886 write_neon_element64(rn0_64, a->vd, 0, MO_64); 1887 1888 opfn(rn1_64, rn1_64, rm_64); 1889 write_neon_element64(rn1_64, a->vd, 1, MO_64); 1890 1891 tcg_temp_free_i64(rn0_64); 1892 tcg_temp_free_i64(rn1_64); 1893 tcg_temp_free_i64(rm_64); 1894 1895 return true; 1896 } 1897 1898 #define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN) \ 1899 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ 1900 { \ 1901 static NeonGenWidenFn * const widenfn[] = { \ 1902 gen_helper_neon_widen_##S##8, \ 1903 gen_helper_neon_widen_##S##16, \ 1904 NULL, NULL, \ 1905 }; \ 1906 static NeonGenTwo64OpFn * const addfn[] = { \ 1907 gen_helper_neon_##OP##l_u16, \ 1908 gen_helper_neon_##OP##l_u32, \ 1909 tcg_gen_##OP##_i64, \ 1910 NULL, \ 1911 }; \ 1912 int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1; \ 1913 return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size], \ 1914 SRC1WIDE ? MO_UQ : narrow_mop, \ 1915 narrow_mop); \ 1916 } 1917 1918 DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN) 1919 DO_PREWIDEN(VADDL_U, u, add, false, 0) 1920 DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN) 1921 DO_PREWIDEN(VSUBL_U, u, sub, false, 0) 1922 DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN) 1923 DO_PREWIDEN(VADDW_U, u, add, true, 0) 1924 DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN) 1925 DO_PREWIDEN(VSUBW_U, u, sub, true, 0) 1926 1927 static bool do_narrow_3d(DisasContext *s, arg_3diff *a, 1928 NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn) 1929 { 1930 /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */ 1931 TCGv_i64 rn_64, rm_64; 1932 TCGv_i32 rd0, rd1; 1933 1934 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1935 return false; 1936 } 1937 1938 /* UNDEF accesses to D16-D31 if they don't exist. */ 1939 if (!dc_isar_feature(aa32_simd_r32, s) && 1940 ((a->vd | a->vn | a->vm) & 0x10)) { 1941 return false; 1942 } 1943 1944 if (!opfn || !narrowfn) { 1945 /* size == 3 case, which is an entirely different insn group */ 1946 return false; 1947 } 1948 1949 if ((a->vn | a->vm) & 1) { 1950 return false; 1951 } 1952 1953 if (!vfp_access_check(s)) { 1954 return true; 1955 } 1956 1957 rn_64 = tcg_temp_new_i64(); 1958 rm_64 = tcg_temp_new_i64(); 1959 rd0 = tcg_temp_new_i32(); 1960 rd1 = tcg_temp_new_i32(); 1961 1962 read_neon_element64(rn_64, a->vn, 0, MO_64); 1963 read_neon_element64(rm_64, a->vm, 0, MO_64); 1964 1965 opfn(rn_64, rn_64, rm_64); 1966 1967 narrowfn(rd0, rn_64); 1968 1969 read_neon_element64(rn_64, a->vn, 1, MO_64); 1970 read_neon_element64(rm_64, a->vm, 1, MO_64); 1971 1972 opfn(rn_64, rn_64, rm_64); 1973 1974 narrowfn(rd1, rn_64); 1975 1976 write_neon_element32(rd0, a->vd, 0, MO_32); 1977 write_neon_element32(rd1, a->vd, 1, MO_32); 1978 1979 tcg_temp_free_i32(rd0); 1980 tcg_temp_free_i32(rd1); 1981 tcg_temp_free_i64(rn_64); 1982 tcg_temp_free_i64(rm_64); 1983 1984 return true; 1985 } 1986 1987 #define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP) \ 1988 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ 1989 { \ 1990 static NeonGenTwo64OpFn * const addfn[] = { \ 1991 gen_helper_neon_##OP##l_u16, \ 1992 gen_helper_neon_##OP##l_u32, \ 1993 tcg_gen_##OP##_i64, \ 1994 NULL, \ 1995 }; \ 1996 static NeonGenNarrowFn * const narrowfn[] = { \ 1997 gen_helper_neon_##NARROWTYPE##_high_u8, \ 1998 gen_helper_neon_##NARROWTYPE##_high_u16, \ 1999 EXTOP, \ 2000 NULL, \ 2001 }; \ 2002 return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]); \ 2003 } 2004 2005 static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn) 2006 { 2007 tcg_gen_addi_i64(rn, rn, 1u << 31); 2008 tcg_gen_extrh_i64_i32(rd, rn); 2009 } 2010 2011 DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32) 2012 DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32) 2013 DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32) 2014 DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32) 2015 2016 static bool do_long_3d(DisasContext *s, arg_3diff *a, 2017 NeonGenTwoOpWidenFn *opfn, 2018 NeonGenTwo64OpFn *accfn) 2019 { 2020 /* 2021 * 3-regs different lengths, long operations. 2022 * These perform an operation on two inputs that returns a double-width 2023 * result, and then possibly perform an accumulation operation of 2024 * that result into the double-width destination. 2025 */ 2026 TCGv_i64 rd0, rd1, tmp; 2027 TCGv_i32 rn, rm; 2028 2029 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2030 return false; 2031 } 2032 2033 /* UNDEF accesses to D16-D31 if they don't exist. */ 2034 if (!dc_isar_feature(aa32_simd_r32, s) && 2035 ((a->vd | a->vn | a->vm) & 0x10)) { 2036 return false; 2037 } 2038 2039 if (!opfn) { 2040 /* size == 3 case, which is an entirely different insn group */ 2041 return false; 2042 } 2043 2044 if (a->vd & 1) { 2045 return false; 2046 } 2047 2048 if (!vfp_access_check(s)) { 2049 return true; 2050 } 2051 2052 rd0 = tcg_temp_new_i64(); 2053 rd1 = tcg_temp_new_i64(); 2054 2055 rn = tcg_temp_new_i32(); 2056 rm = tcg_temp_new_i32(); 2057 read_neon_element32(rn, a->vn, 0, MO_32); 2058 read_neon_element32(rm, a->vm, 0, MO_32); 2059 opfn(rd0, rn, rm); 2060 2061 read_neon_element32(rn, a->vn, 1, MO_32); 2062 read_neon_element32(rm, a->vm, 1, MO_32); 2063 opfn(rd1, rn, rm); 2064 tcg_temp_free_i32(rn); 2065 tcg_temp_free_i32(rm); 2066 2067 /* Don't store results until after all loads: they might overlap */ 2068 if (accfn) { 2069 tmp = tcg_temp_new_i64(); 2070 read_neon_element64(tmp, a->vd, 0, MO_64); 2071 accfn(rd0, tmp, rd0); 2072 read_neon_element64(tmp, a->vd, 1, MO_64); 2073 accfn(rd1, tmp, rd1); 2074 tcg_temp_free_i64(tmp); 2075 } 2076 2077 write_neon_element64(rd0, a->vd, 0, MO_64); 2078 write_neon_element64(rd1, a->vd, 1, MO_64); 2079 tcg_temp_free_i64(rd0); 2080 tcg_temp_free_i64(rd1); 2081 2082 return true; 2083 } 2084 2085 static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a) 2086 { 2087 static NeonGenTwoOpWidenFn * const opfn[] = { 2088 gen_helper_neon_abdl_s16, 2089 gen_helper_neon_abdl_s32, 2090 gen_helper_neon_abdl_s64, 2091 NULL, 2092 }; 2093 2094 return do_long_3d(s, a, opfn[a->size], NULL); 2095 } 2096 2097 static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a) 2098 { 2099 static NeonGenTwoOpWidenFn * const opfn[] = { 2100 gen_helper_neon_abdl_u16, 2101 gen_helper_neon_abdl_u32, 2102 gen_helper_neon_abdl_u64, 2103 NULL, 2104 }; 2105 2106 return do_long_3d(s, a, opfn[a->size], NULL); 2107 } 2108 2109 static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a) 2110 { 2111 static NeonGenTwoOpWidenFn * const opfn[] = { 2112 gen_helper_neon_abdl_s16, 2113 gen_helper_neon_abdl_s32, 2114 gen_helper_neon_abdl_s64, 2115 NULL, 2116 }; 2117 static NeonGenTwo64OpFn * const addfn[] = { 2118 gen_helper_neon_addl_u16, 2119 gen_helper_neon_addl_u32, 2120 tcg_gen_add_i64, 2121 NULL, 2122 }; 2123 2124 return do_long_3d(s, a, opfn[a->size], addfn[a->size]); 2125 } 2126 2127 static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a) 2128 { 2129 static NeonGenTwoOpWidenFn * const opfn[] = { 2130 gen_helper_neon_abdl_u16, 2131 gen_helper_neon_abdl_u32, 2132 gen_helper_neon_abdl_u64, 2133 NULL, 2134 }; 2135 static NeonGenTwo64OpFn * const addfn[] = { 2136 gen_helper_neon_addl_u16, 2137 gen_helper_neon_addl_u32, 2138 tcg_gen_add_i64, 2139 NULL, 2140 }; 2141 2142 return do_long_3d(s, a, opfn[a->size], addfn[a->size]); 2143 } 2144 2145 static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) 2146 { 2147 TCGv_i32 lo = tcg_temp_new_i32(); 2148 TCGv_i32 hi = tcg_temp_new_i32(); 2149 2150 tcg_gen_muls2_i32(lo, hi, rn, rm); 2151 tcg_gen_concat_i32_i64(rd, lo, hi); 2152 2153 tcg_temp_free_i32(lo); 2154 tcg_temp_free_i32(hi); 2155 } 2156 2157 static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) 2158 { 2159 TCGv_i32 lo = tcg_temp_new_i32(); 2160 TCGv_i32 hi = tcg_temp_new_i32(); 2161 2162 tcg_gen_mulu2_i32(lo, hi, rn, rm); 2163 tcg_gen_concat_i32_i64(rd, lo, hi); 2164 2165 tcg_temp_free_i32(lo); 2166 tcg_temp_free_i32(hi); 2167 } 2168 2169 static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a) 2170 { 2171 static NeonGenTwoOpWidenFn * const opfn[] = { 2172 gen_helper_neon_mull_s8, 2173 gen_helper_neon_mull_s16, 2174 gen_mull_s32, 2175 NULL, 2176 }; 2177 2178 return do_long_3d(s, a, opfn[a->size], NULL); 2179 } 2180 2181 static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a) 2182 { 2183 static NeonGenTwoOpWidenFn * const opfn[] = { 2184 gen_helper_neon_mull_u8, 2185 gen_helper_neon_mull_u16, 2186 gen_mull_u32, 2187 NULL, 2188 }; 2189 2190 return do_long_3d(s, a, opfn[a->size], NULL); 2191 } 2192 2193 #define DO_VMLAL(INSN,MULL,ACC) \ 2194 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ 2195 { \ 2196 static NeonGenTwoOpWidenFn * const opfn[] = { \ 2197 gen_helper_neon_##MULL##8, \ 2198 gen_helper_neon_##MULL##16, \ 2199 gen_##MULL##32, \ 2200 NULL, \ 2201 }; \ 2202 static NeonGenTwo64OpFn * const accfn[] = { \ 2203 gen_helper_neon_##ACC##l_u16, \ 2204 gen_helper_neon_##ACC##l_u32, \ 2205 tcg_gen_##ACC##_i64, \ 2206 NULL, \ 2207 }; \ 2208 return do_long_3d(s, a, opfn[a->size], accfn[a->size]); \ 2209 } 2210 2211 DO_VMLAL(VMLAL_S,mull_s,add) 2212 DO_VMLAL(VMLAL_U,mull_u,add) 2213 DO_VMLAL(VMLSL_S,mull_s,sub) 2214 DO_VMLAL(VMLSL_U,mull_u,sub) 2215 2216 static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) 2217 { 2218 gen_helper_neon_mull_s16(rd, rn, rm); 2219 gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd); 2220 } 2221 2222 static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) 2223 { 2224 gen_mull_s32(rd, rn, rm); 2225 gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd); 2226 } 2227 2228 static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a) 2229 { 2230 static NeonGenTwoOpWidenFn * const opfn[] = { 2231 NULL, 2232 gen_VQDMULL_16, 2233 gen_VQDMULL_32, 2234 NULL, 2235 }; 2236 2237 return do_long_3d(s, a, opfn[a->size], NULL); 2238 } 2239 2240 static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) 2241 { 2242 gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm); 2243 } 2244 2245 static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) 2246 { 2247 gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm); 2248 } 2249 2250 static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a) 2251 { 2252 static NeonGenTwoOpWidenFn * const opfn[] = { 2253 NULL, 2254 gen_VQDMULL_16, 2255 gen_VQDMULL_32, 2256 NULL, 2257 }; 2258 static NeonGenTwo64OpFn * const accfn[] = { 2259 NULL, 2260 gen_VQDMLAL_acc_16, 2261 gen_VQDMLAL_acc_32, 2262 NULL, 2263 }; 2264 2265 return do_long_3d(s, a, opfn[a->size], accfn[a->size]); 2266 } 2267 2268 static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) 2269 { 2270 gen_helper_neon_negl_u32(rm, rm); 2271 gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm); 2272 } 2273 2274 static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) 2275 { 2276 tcg_gen_neg_i64(rm, rm); 2277 gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm); 2278 } 2279 2280 static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a) 2281 { 2282 static NeonGenTwoOpWidenFn * const opfn[] = { 2283 NULL, 2284 gen_VQDMULL_16, 2285 gen_VQDMULL_32, 2286 NULL, 2287 }; 2288 static NeonGenTwo64OpFn * const accfn[] = { 2289 NULL, 2290 gen_VQDMLSL_acc_16, 2291 gen_VQDMLSL_acc_32, 2292 NULL, 2293 }; 2294 2295 return do_long_3d(s, a, opfn[a->size], accfn[a->size]); 2296 } 2297 2298 static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a) 2299 { 2300 gen_helper_gvec_3 *fn_gvec; 2301 2302 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2303 return false; 2304 } 2305 2306 /* UNDEF accesses to D16-D31 if they don't exist. */ 2307 if (!dc_isar_feature(aa32_simd_r32, s) && 2308 ((a->vd | a->vn | a->vm) & 0x10)) { 2309 return false; 2310 } 2311 2312 if (a->vd & 1) { 2313 return false; 2314 } 2315 2316 switch (a->size) { 2317 case 0: 2318 fn_gvec = gen_helper_neon_pmull_h; 2319 break; 2320 case 2: 2321 if (!dc_isar_feature(aa32_pmull, s)) { 2322 return false; 2323 } 2324 fn_gvec = gen_helper_gvec_pmull_q; 2325 break; 2326 default: 2327 return false; 2328 } 2329 2330 if (!vfp_access_check(s)) { 2331 return true; 2332 } 2333 2334 tcg_gen_gvec_3_ool(neon_full_reg_offset(a->vd), 2335 neon_full_reg_offset(a->vn), 2336 neon_full_reg_offset(a->vm), 2337 16, 16, 0, fn_gvec); 2338 return true; 2339 } 2340 2341 static void gen_neon_dup_low16(TCGv_i32 var) 2342 { 2343 TCGv_i32 tmp = tcg_temp_new_i32(); 2344 tcg_gen_ext16u_i32(var, var); 2345 tcg_gen_shli_i32(tmp, var, 16); 2346 tcg_gen_or_i32(var, var, tmp); 2347 tcg_temp_free_i32(tmp); 2348 } 2349 2350 static void gen_neon_dup_high16(TCGv_i32 var) 2351 { 2352 TCGv_i32 tmp = tcg_temp_new_i32(); 2353 tcg_gen_andi_i32(var, var, 0xffff0000); 2354 tcg_gen_shri_i32(tmp, var, 16); 2355 tcg_gen_or_i32(var, var, tmp); 2356 tcg_temp_free_i32(tmp); 2357 } 2358 2359 static inline TCGv_i32 neon_get_scalar(int size, int reg) 2360 { 2361 TCGv_i32 tmp = tcg_temp_new_i32(); 2362 if (size == MO_16) { 2363 read_neon_element32(tmp, reg & 7, reg >> 4, MO_32); 2364 if (reg & 8) { 2365 gen_neon_dup_high16(tmp); 2366 } else { 2367 gen_neon_dup_low16(tmp); 2368 } 2369 } else { 2370 read_neon_element32(tmp, reg & 15, reg >> 4, MO_32); 2371 } 2372 return tmp; 2373 } 2374 2375 static bool do_2scalar(DisasContext *s, arg_2scalar *a, 2376 NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn) 2377 { 2378 /* 2379 * Two registers and a scalar: perform an operation between 2380 * the input elements and the scalar, and then possibly 2381 * perform an accumulation operation of that result into the 2382 * destination. 2383 */ 2384 TCGv_i32 scalar, tmp; 2385 int pass; 2386 2387 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2388 return false; 2389 } 2390 2391 /* UNDEF accesses to D16-D31 if they don't exist. */ 2392 if (!dc_isar_feature(aa32_simd_r32, s) && 2393 ((a->vd | a->vn | a->vm) & 0x10)) { 2394 return false; 2395 } 2396 2397 if (!opfn) { 2398 /* Bad size (including size == 3, which is a different insn group) */ 2399 return false; 2400 } 2401 2402 if (a->q && ((a->vd | a->vn) & 1)) { 2403 return false; 2404 } 2405 2406 if (!vfp_access_check(s)) { 2407 return true; 2408 } 2409 2410 scalar = neon_get_scalar(a->size, a->vm); 2411 tmp = tcg_temp_new_i32(); 2412 2413 for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 2414 read_neon_element32(tmp, a->vn, pass, MO_32); 2415 opfn(tmp, tmp, scalar); 2416 if (accfn) { 2417 TCGv_i32 rd = tcg_temp_new_i32(); 2418 read_neon_element32(rd, a->vd, pass, MO_32); 2419 accfn(tmp, rd, tmp); 2420 tcg_temp_free_i32(rd); 2421 } 2422 write_neon_element32(tmp, a->vd, pass, MO_32); 2423 } 2424 tcg_temp_free_i32(tmp); 2425 tcg_temp_free_i32(scalar); 2426 return true; 2427 } 2428 2429 static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a) 2430 { 2431 static NeonGenTwoOpFn * const opfn[] = { 2432 NULL, 2433 gen_helper_neon_mul_u16, 2434 tcg_gen_mul_i32, 2435 NULL, 2436 }; 2437 2438 return do_2scalar(s, a, opfn[a->size], NULL); 2439 } 2440 2441 static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a) 2442 { 2443 static NeonGenTwoOpFn * const opfn[] = { 2444 NULL, 2445 gen_helper_neon_mul_u16, 2446 tcg_gen_mul_i32, 2447 NULL, 2448 }; 2449 static NeonGenTwoOpFn * const accfn[] = { 2450 NULL, 2451 gen_helper_neon_add_u16, 2452 tcg_gen_add_i32, 2453 NULL, 2454 }; 2455 2456 return do_2scalar(s, a, opfn[a->size], accfn[a->size]); 2457 } 2458 2459 static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a) 2460 { 2461 static NeonGenTwoOpFn * const opfn[] = { 2462 NULL, 2463 gen_helper_neon_mul_u16, 2464 tcg_gen_mul_i32, 2465 NULL, 2466 }; 2467 static NeonGenTwoOpFn * const accfn[] = { 2468 NULL, 2469 gen_helper_neon_sub_u16, 2470 tcg_gen_sub_i32, 2471 NULL, 2472 }; 2473 2474 return do_2scalar(s, a, opfn[a->size], accfn[a->size]); 2475 } 2476 2477 static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a, 2478 gen_helper_gvec_3_ptr *fn) 2479 { 2480 /* Two registers and a scalar, using gvec */ 2481 int vec_size = a->q ? 16 : 8; 2482 int rd_ofs = neon_full_reg_offset(a->vd); 2483 int rn_ofs = neon_full_reg_offset(a->vn); 2484 int rm_ofs; 2485 int idx; 2486 TCGv_ptr fpstatus; 2487 2488 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2489 return false; 2490 } 2491 2492 /* UNDEF accesses to D16-D31 if they don't exist. */ 2493 if (!dc_isar_feature(aa32_simd_r32, s) && 2494 ((a->vd | a->vn | a->vm) & 0x10)) { 2495 return false; 2496 } 2497 2498 if (!fn) { 2499 /* Bad size (including size == 3, which is a different insn group) */ 2500 return false; 2501 } 2502 2503 if (a->q && ((a->vd | a->vn) & 1)) { 2504 return false; 2505 } 2506 2507 if (!vfp_access_check(s)) { 2508 return true; 2509 } 2510 2511 /* a->vm is M:Vm, which encodes both register and index */ 2512 idx = extract32(a->vm, a->size + 2, 2); 2513 a->vm = extract32(a->vm, 0, a->size + 2); 2514 rm_ofs = neon_full_reg_offset(a->vm); 2515 2516 fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD); 2517 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus, 2518 vec_size, vec_size, idx, fn); 2519 tcg_temp_free_ptr(fpstatus); 2520 return true; 2521 } 2522 2523 #define DO_VMUL_F_2sc(NAME, FUNC) \ 2524 static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a) \ 2525 { \ 2526 static gen_helper_gvec_3_ptr * const opfn[] = { \ 2527 NULL, \ 2528 gen_helper_##FUNC##_h, \ 2529 gen_helper_##FUNC##_s, \ 2530 NULL, \ 2531 }; \ 2532 if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \ 2533 return false; \ 2534 } \ 2535 return do_2scalar_fp_vec(s, a, opfn[a->size]); \ 2536 } 2537 2538 DO_VMUL_F_2sc(VMUL, gvec_fmul_idx) 2539 DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx) 2540 DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx) 2541 2542 WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16) 2543 WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32) 2544 WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16) 2545 WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32) 2546 2547 static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a) 2548 { 2549 static NeonGenTwoOpFn * const opfn[] = { 2550 NULL, 2551 gen_VQDMULH_16, 2552 gen_VQDMULH_32, 2553 NULL, 2554 }; 2555 2556 return do_2scalar(s, a, opfn[a->size], NULL); 2557 } 2558 2559 static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a) 2560 { 2561 static NeonGenTwoOpFn * const opfn[] = { 2562 NULL, 2563 gen_VQRDMULH_16, 2564 gen_VQRDMULH_32, 2565 NULL, 2566 }; 2567 2568 return do_2scalar(s, a, opfn[a->size], NULL); 2569 } 2570 2571 static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a, 2572 NeonGenThreeOpEnvFn *opfn) 2573 { 2574 /* 2575 * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn 2576 * performs a kind of fused op-then-accumulate using a helper 2577 * function that takes all of rd, rn and the scalar at once. 2578 */ 2579 TCGv_i32 scalar, rn, rd; 2580 int pass; 2581 2582 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2583 return false; 2584 } 2585 2586 if (!dc_isar_feature(aa32_rdm, s)) { 2587 return false; 2588 } 2589 2590 /* UNDEF accesses to D16-D31 if they don't exist. */ 2591 if (!dc_isar_feature(aa32_simd_r32, s) && 2592 ((a->vd | a->vn | a->vm) & 0x10)) { 2593 return false; 2594 } 2595 2596 if (!opfn) { 2597 /* Bad size (including size == 3, which is a different insn group) */ 2598 return false; 2599 } 2600 2601 if (a->q && ((a->vd | a->vn) & 1)) { 2602 return false; 2603 } 2604 2605 if (!vfp_access_check(s)) { 2606 return true; 2607 } 2608 2609 scalar = neon_get_scalar(a->size, a->vm); 2610 rn = tcg_temp_new_i32(); 2611 rd = tcg_temp_new_i32(); 2612 2613 for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 2614 read_neon_element32(rn, a->vn, pass, MO_32); 2615 read_neon_element32(rd, a->vd, pass, MO_32); 2616 opfn(rd, cpu_env, rn, scalar, rd); 2617 write_neon_element32(rd, a->vd, pass, MO_32); 2618 } 2619 tcg_temp_free_i32(rn); 2620 tcg_temp_free_i32(rd); 2621 tcg_temp_free_i32(scalar); 2622 2623 return true; 2624 } 2625 2626 static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a) 2627 { 2628 static NeonGenThreeOpEnvFn *opfn[] = { 2629 NULL, 2630 gen_helper_neon_qrdmlah_s16, 2631 gen_helper_neon_qrdmlah_s32, 2632 NULL, 2633 }; 2634 return do_vqrdmlah_2sc(s, a, opfn[a->size]); 2635 } 2636 2637 static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a) 2638 { 2639 static NeonGenThreeOpEnvFn *opfn[] = { 2640 NULL, 2641 gen_helper_neon_qrdmlsh_s16, 2642 gen_helper_neon_qrdmlsh_s32, 2643 NULL, 2644 }; 2645 return do_vqrdmlah_2sc(s, a, opfn[a->size]); 2646 } 2647 2648 static bool do_2scalar_long(DisasContext *s, arg_2scalar *a, 2649 NeonGenTwoOpWidenFn *opfn, 2650 NeonGenTwo64OpFn *accfn) 2651 { 2652 /* 2653 * Two registers and a scalar, long operations: perform an 2654 * operation on the input elements and the scalar which produces 2655 * a double-width result, and then possibly perform an accumulation 2656 * operation of that result into the destination. 2657 */ 2658 TCGv_i32 scalar, rn; 2659 TCGv_i64 rn0_64, rn1_64; 2660 2661 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2662 return false; 2663 } 2664 2665 /* UNDEF accesses to D16-D31 if they don't exist. */ 2666 if (!dc_isar_feature(aa32_simd_r32, s) && 2667 ((a->vd | a->vn | a->vm) & 0x10)) { 2668 return false; 2669 } 2670 2671 if (!opfn) { 2672 /* Bad size (including size == 3, which is a different insn group) */ 2673 return false; 2674 } 2675 2676 if (a->vd & 1) { 2677 return false; 2678 } 2679 2680 if (!vfp_access_check(s)) { 2681 return true; 2682 } 2683 2684 scalar = neon_get_scalar(a->size, a->vm); 2685 2686 /* Load all inputs before writing any outputs, in case of overlap */ 2687 rn = tcg_temp_new_i32(); 2688 read_neon_element32(rn, a->vn, 0, MO_32); 2689 rn0_64 = tcg_temp_new_i64(); 2690 opfn(rn0_64, rn, scalar); 2691 2692 read_neon_element32(rn, a->vn, 1, MO_32); 2693 rn1_64 = tcg_temp_new_i64(); 2694 opfn(rn1_64, rn, scalar); 2695 tcg_temp_free_i32(rn); 2696 tcg_temp_free_i32(scalar); 2697 2698 if (accfn) { 2699 TCGv_i64 t64 = tcg_temp_new_i64(); 2700 read_neon_element64(t64, a->vd, 0, MO_64); 2701 accfn(rn0_64, t64, rn0_64); 2702 read_neon_element64(t64, a->vd, 1, MO_64); 2703 accfn(rn1_64, t64, rn1_64); 2704 tcg_temp_free_i64(t64); 2705 } 2706 2707 write_neon_element64(rn0_64, a->vd, 0, MO_64); 2708 write_neon_element64(rn1_64, a->vd, 1, MO_64); 2709 tcg_temp_free_i64(rn0_64); 2710 tcg_temp_free_i64(rn1_64); 2711 return true; 2712 } 2713 2714 static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a) 2715 { 2716 static NeonGenTwoOpWidenFn * const opfn[] = { 2717 NULL, 2718 gen_helper_neon_mull_s16, 2719 gen_mull_s32, 2720 NULL, 2721 }; 2722 2723 return do_2scalar_long(s, a, opfn[a->size], NULL); 2724 } 2725 2726 static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a) 2727 { 2728 static NeonGenTwoOpWidenFn * const opfn[] = { 2729 NULL, 2730 gen_helper_neon_mull_u16, 2731 gen_mull_u32, 2732 NULL, 2733 }; 2734 2735 return do_2scalar_long(s, a, opfn[a->size], NULL); 2736 } 2737 2738 #define DO_VMLAL_2SC(INSN, MULL, ACC) \ 2739 static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a) \ 2740 { \ 2741 static NeonGenTwoOpWidenFn * const opfn[] = { \ 2742 NULL, \ 2743 gen_helper_neon_##MULL##16, \ 2744 gen_##MULL##32, \ 2745 NULL, \ 2746 }; \ 2747 static NeonGenTwo64OpFn * const accfn[] = { \ 2748 NULL, \ 2749 gen_helper_neon_##ACC##l_u32, \ 2750 tcg_gen_##ACC##_i64, \ 2751 NULL, \ 2752 }; \ 2753 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); \ 2754 } 2755 2756 DO_VMLAL_2SC(VMLAL_S, mull_s, add) 2757 DO_VMLAL_2SC(VMLAL_U, mull_u, add) 2758 DO_VMLAL_2SC(VMLSL_S, mull_s, sub) 2759 DO_VMLAL_2SC(VMLSL_U, mull_u, sub) 2760 2761 static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a) 2762 { 2763 static NeonGenTwoOpWidenFn * const opfn[] = { 2764 NULL, 2765 gen_VQDMULL_16, 2766 gen_VQDMULL_32, 2767 NULL, 2768 }; 2769 2770 return do_2scalar_long(s, a, opfn[a->size], NULL); 2771 } 2772 2773 static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a) 2774 { 2775 static NeonGenTwoOpWidenFn * const opfn[] = { 2776 NULL, 2777 gen_VQDMULL_16, 2778 gen_VQDMULL_32, 2779 NULL, 2780 }; 2781 static NeonGenTwo64OpFn * const accfn[] = { 2782 NULL, 2783 gen_VQDMLAL_acc_16, 2784 gen_VQDMLAL_acc_32, 2785 NULL, 2786 }; 2787 2788 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); 2789 } 2790 2791 static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a) 2792 { 2793 static NeonGenTwoOpWidenFn * const opfn[] = { 2794 NULL, 2795 gen_VQDMULL_16, 2796 gen_VQDMULL_32, 2797 NULL, 2798 }; 2799 static NeonGenTwo64OpFn * const accfn[] = { 2800 NULL, 2801 gen_VQDMLSL_acc_16, 2802 gen_VQDMLSL_acc_32, 2803 NULL, 2804 }; 2805 2806 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); 2807 } 2808 2809 static bool trans_VEXT(DisasContext *s, arg_VEXT *a) 2810 { 2811 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2812 return false; 2813 } 2814 2815 /* UNDEF accesses to D16-D31 if they don't exist. */ 2816 if (!dc_isar_feature(aa32_simd_r32, s) && 2817 ((a->vd | a->vn | a->vm) & 0x10)) { 2818 return false; 2819 } 2820 2821 if ((a->vn | a->vm | a->vd) & a->q) { 2822 return false; 2823 } 2824 2825 if (a->imm > 7 && !a->q) { 2826 return false; 2827 } 2828 2829 if (!vfp_access_check(s)) { 2830 return true; 2831 } 2832 2833 if (!a->q) { 2834 /* Extract 64 bits from <Vm:Vn> */ 2835 TCGv_i64 left, right, dest; 2836 2837 left = tcg_temp_new_i64(); 2838 right = tcg_temp_new_i64(); 2839 dest = tcg_temp_new_i64(); 2840 2841 read_neon_element64(right, a->vn, 0, MO_64); 2842 read_neon_element64(left, a->vm, 0, MO_64); 2843 tcg_gen_extract2_i64(dest, right, left, a->imm * 8); 2844 write_neon_element64(dest, a->vd, 0, MO_64); 2845 2846 tcg_temp_free_i64(left); 2847 tcg_temp_free_i64(right); 2848 tcg_temp_free_i64(dest); 2849 } else { 2850 /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */ 2851 TCGv_i64 left, middle, right, destleft, destright; 2852 2853 left = tcg_temp_new_i64(); 2854 middle = tcg_temp_new_i64(); 2855 right = tcg_temp_new_i64(); 2856 destleft = tcg_temp_new_i64(); 2857 destright = tcg_temp_new_i64(); 2858 2859 if (a->imm < 8) { 2860 read_neon_element64(right, a->vn, 0, MO_64); 2861 read_neon_element64(middle, a->vn, 1, MO_64); 2862 tcg_gen_extract2_i64(destright, right, middle, a->imm * 8); 2863 read_neon_element64(left, a->vm, 0, MO_64); 2864 tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8); 2865 } else { 2866 read_neon_element64(right, a->vn, 1, MO_64); 2867 read_neon_element64(middle, a->vm, 0, MO_64); 2868 tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8); 2869 read_neon_element64(left, a->vm, 1, MO_64); 2870 tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8); 2871 } 2872 2873 write_neon_element64(destright, a->vd, 0, MO_64); 2874 write_neon_element64(destleft, a->vd, 1, MO_64); 2875 2876 tcg_temp_free_i64(destright); 2877 tcg_temp_free_i64(destleft); 2878 tcg_temp_free_i64(right); 2879 tcg_temp_free_i64(middle); 2880 tcg_temp_free_i64(left); 2881 } 2882 return true; 2883 } 2884 2885 static bool trans_VTBL(DisasContext *s, arg_VTBL *a) 2886 { 2887 TCGv_i64 val, def; 2888 TCGv_i32 desc; 2889 2890 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2891 return false; 2892 } 2893 2894 /* UNDEF accesses to D16-D31 if they don't exist. */ 2895 if (!dc_isar_feature(aa32_simd_r32, s) && 2896 ((a->vd | a->vn | a->vm) & 0x10)) { 2897 return false; 2898 } 2899 2900 if ((a->vn + a->len + 1) > 32) { 2901 /* 2902 * This is UNPREDICTABLE; we choose to UNDEF to avoid the 2903 * helper function running off the end of the register file. 2904 */ 2905 return false; 2906 } 2907 2908 if (!vfp_access_check(s)) { 2909 return true; 2910 } 2911 2912 desc = tcg_constant_i32((a->vn << 2) | a->len); 2913 def = tcg_temp_new_i64(); 2914 if (a->op) { 2915 read_neon_element64(def, a->vd, 0, MO_64); 2916 } else { 2917 tcg_gen_movi_i64(def, 0); 2918 } 2919 val = tcg_temp_new_i64(); 2920 read_neon_element64(val, a->vm, 0, MO_64); 2921 2922 gen_helper_neon_tbl(val, cpu_env, desc, val, def); 2923 write_neon_element64(val, a->vd, 0, MO_64); 2924 2925 tcg_temp_free_i64(def); 2926 tcg_temp_free_i64(val); 2927 return true; 2928 } 2929 2930 static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a) 2931 { 2932 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2933 return false; 2934 } 2935 2936 /* UNDEF accesses to D16-D31 if they don't exist. */ 2937 if (!dc_isar_feature(aa32_simd_r32, s) && 2938 ((a->vd | a->vm) & 0x10)) { 2939 return false; 2940 } 2941 2942 if (a->vd & a->q) { 2943 return false; 2944 } 2945 2946 if (!vfp_access_check(s)) { 2947 return true; 2948 } 2949 2950 tcg_gen_gvec_dup_mem(a->size, neon_full_reg_offset(a->vd), 2951 neon_element_offset(a->vm, a->index, a->size), 2952 a->q ? 16 : 8, a->q ? 16 : 8); 2953 return true; 2954 } 2955 2956 static bool trans_VREV64(DisasContext *s, arg_VREV64 *a) 2957 { 2958 int pass, half; 2959 TCGv_i32 tmp[2]; 2960 2961 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2962 return false; 2963 } 2964 2965 /* UNDEF accesses to D16-D31 if they don't exist. */ 2966 if (!dc_isar_feature(aa32_simd_r32, s) && 2967 ((a->vd | a->vm) & 0x10)) { 2968 return false; 2969 } 2970 2971 if ((a->vd | a->vm) & a->q) { 2972 return false; 2973 } 2974 2975 if (a->size == 3) { 2976 return false; 2977 } 2978 2979 if (!vfp_access_check(s)) { 2980 return true; 2981 } 2982 2983 tmp[0] = tcg_temp_new_i32(); 2984 tmp[1] = tcg_temp_new_i32(); 2985 2986 for (pass = 0; pass < (a->q ? 2 : 1); pass++) { 2987 for (half = 0; half < 2; half++) { 2988 read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32); 2989 switch (a->size) { 2990 case 0: 2991 tcg_gen_bswap32_i32(tmp[half], tmp[half]); 2992 break; 2993 case 1: 2994 gen_swap_half(tmp[half], tmp[half]); 2995 break; 2996 case 2: 2997 break; 2998 default: 2999 g_assert_not_reached(); 3000 } 3001 } 3002 write_neon_element32(tmp[1], a->vd, pass * 2, MO_32); 3003 write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32); 3004 } 3005 3006 tcg_temp_free_i32(tmp[0]); 3007 tcg_temp_free_i32(tmp[1]); 3008 return true; 3009 } 3010 3011 static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a, 3012 NeonGenWidenFn *widenfn, 3013 NeonGenTwo64OpFn *opfn, 3014 NeonGenTwo64OpFn *accfn) 3015 { 3016 /* 3017 * Pairwise long operations: widen both halves of the pair, 3018 * combine the pairs with the opfn, and then possibly accumulate 3019 * into the destination with the accfn. 3020 */ 3021 int pass; 3022 3023 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3024 return false; 3025 } 3026 3027 /* UNDEF accesses to D16-D31 if they don't exist. */ 3028 if (!dc_isar_feature(aa32_simd_r32, s) && 3029 ((a->vd | a->vm) & 0x10)) { 3030 return false; 3031 } 3032 3033 if ((a->vd | a->vm) & a->q) { 3034 return false; 3035 } 3036 3037 if (!widenfn) { 3038 return false; 3039 } 3040 3041 if (!vfp_access_check(s)) { 3042 return true; 3043 } 3044 3045 for (pass = 0; pass < a->q + 1; pass++) { 3046 TCGv_i32 tmp; 3047 TCGv_i64 rm0_64, rm1_64, rd_64; 3048 3049 rm0_64 = tcg_temp_new_i64(); 3050 rm1_64 = tcg_temp_new_i64(); 3051 rd_64 = tcg_temp_new_i64(); 3052 3053 tmp = tcg_temp_new_i32(); 3054 read_neon_element32(tmp, a->vm, pass * 2, MO_32); 3055 widenfn(rm0_64, tmp); 3056 read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32); 3057 widenfn(rm1_64, tmp); 3058 tcg_temp_free_i32(tmp); 3059 3060 opfn(rd_64, rm0_64, rm1_64); 3061 tcg_temp_free_i64(rm0_64); 3062 tcg_temp_free_i64(rm1_64); 3063 3064 if (accfn) { 3065 TCGv_i64 tmp64 = tcg_temp_new_i64(); 3066 read_neon_element64(tmp64, a->vd, pass, MO_64); 3067 accfn(rd_64, tmp64, rd_64); 3068 tcg_temp_free_i64(tmp64); 3069 } 3070 write_neon_element64(rd_64, a->vd, pass, MO_64); 3071 tcg_temp_free_i64(rd_64); 3072 } 3073 return true; 3074 } 3075 3076 static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a) 3077 { 3078 static NeonGenWidenFn * const widenfn[] = { 3079 gen_helper_neon_widen_s8, 3080 gen_helper_neon_widen_s16, 3081 tcg_gen_ext_i32_i64, 3082 NULL, 3083 }; 3084 static NeonGenTwo64OpFn * const opfn[] = { 3085 gen_helper_neon_paddl_u16, 3086 gen_helper_neon_paddl_u32, 3087 tcg_gen_add_i64, 3088 NULL, 3089 }; 3090 3091 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL); 3092 } 3093 3094 static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a) 3095 { 3096 static NeonGenWidenFn * const widenfn[] = { 3097 gen_helper_neon_widen_u8, 3098 gen_helper_neon_widen_u16, 3099 tcg_gen_extu_i32_i64, 3100 NULL, 3101 }; 3102 static NeonGenTwo64OpFn * const opfn[] = { 3103 gen_helper_neon_paddl_u16, 3104 gen_helper_neon_paddl_u32, 3105 tcg_gen_add_i64, 3106 NULL, 3107 }; 3108 3109 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL); 3110 } 3111 3112 static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a) 3113 { 3114 static NeonGenWidenFn * const widenfn[] = { 3115 gen_helper_neon_widen_s8, 3116 gen_helper_neon_widen_s16, 3117 tcg_gen_ext_i32_i64, 3118 NULL, 3119 }; 3120 static NeonGenTwo64OpFn * const opfn[] = { 3121 gen_helper_neon_paddl_u16, 3122 gen_helper_neon_paddl_u32, 3123 tcg_gen_add_i64, 3124 NULL, 3125 }; 3126 static NeonGenTwo64OpFn * const accfn[] = { 3127 gen_helper_neon_addl_u16, 3128 gen_helper_neon_addl_u32, 3129 tcg_gen_add_i64, 3130 NULL, 3131 }; 3132 3133 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], 3134 accfn[a->size]); 3135 } 3136 3137 static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a) 3138 { 3139 static NeonGenWidenFn * const widenfn[] = { 3140 gen_helper_neon_widen_u8, 3141 gen_helper_neon_widen_u16, 3142 tcg_gen_extu_i32_i64, 3143 NULL, 3144 }; 3145 static NeonGenTwo64OpFn * const opfn[] = { 3146 gen_helper_neon_paddl_u16, 3147 gen_helper_neon_paddl_u32, 3148 tcg_gen_add_i64, 3149 NULL, 3150 }; 3151 static NeonGenTwo64OpFn * const accfn[] = { 3152 gen_helper_neon_addl_u16, 3153 gen_helper_neon_addl_u32, 3154 tcg_gen_add_i64, 3155 NULL, 3156 }; 3157 3158 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], 3159 accfn[a->size]); 3160 } 3161 3162 typedef void ZipFn(TCGv_ptr, TCGv_ptr); 3163 3164 static bool do_zip_uzp(DisasContext *s, arg_2misc *a, 3165 ZipFn *fn) 3166 { 3167 TCGv_ptr pd, pm; 3168 3169 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3170 return false; 3171 } 3172 3173 /* UNDEF accesses to D16-D31 if they don't exist. */ 3174 if (!dc_isar_feature(aa32_simd_r32, s) && 3175 ((a->vd | a->vm) & 0x10)) { 3176 return false; 3177 } 3178 3179 if ((a->vd | a->vm) & a->q) { 3180 return false; 3181 } 3182 3183 if (!fn) { 3184 /* Bad size or size/q combination */ 3185 return false; 3186 } 3187 3188 if (!vfp_access_check(s)) { 3189 return true; 3190 } 3191 3192 pd = vfp_reg_ptr(true, a->vd); 3193 pm = vfp_reg_ptr(true, a->vm); 3194 fn(pd, pm); 3195 tcg_temp_free_ptr(pd); 3196 tcg_temp_free_ptr(pm); 3197 return true; 3198 } 3199 3200 static bool trans_VUZP(DisasContext *s, arg_2misc *a) 3201 { 3202 static ZipFn * const fn[2][4] = { 3203 { 3204 gen_helper_neon_unzip8, 3205 gen_helper_neon_unzip16, 3206 NULL, 3207 NULL, 3208 }, { 3209 gen_helper_neon_qunzip8, 3210 gen_helper_neon_qunzip16, 3211 gen_helper_neon_qunzip32, 3212 NULL, 3213 } 3214 }; 3215 return do_zip_uzp(s, a, fn[a->q][a->size]); 3216 } 3217 3218 static bool trans_VZIP(DisasContext *s, arg_2misc *a) 3219 { 3220 static ZipFn * const fn[2][4] = { 3221 { 3222 gen_helper_neon_zip8, 3223 gen_helper_neon_zip16, 3224 NULL, 3225 NULL, 3226 }, { 3227 gen_helper_neon_qzip8, 3228 gen_helper_neon_qzip16, 3229 gen_helper_neon_qzip32, 3230 NULL, 3231 } 3232 }; 3233 return do_zip_uzp(s, a, fn[a->q][a->size]); 3234 } 3235 3236 static bool do_vmovn(DisasContext *s, arg_2misc *a, 3237 NeonGenNarrowEnvFn *narrowfn) 3238 { 3239 TCGv_i64 rm; 3240 TCGv_i32 rd0, rd1; 3241 3242 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3243 return false; 3244 } 3245 3246 /* UNDEF accesses to D16-D31 if they don't exist. */ 3247 if (!dc_isar_feature(aa32_simd_r32, s) && 3248 ((a->vd | a->vm) & 0x10)) { 3249 return false; 3250 } 3251 3252 if (a->vm & 1) { 3253 return false; 3254 } 3255 3256 if (!narrowfn) { 3257 return false; 3258 } 3259 3260 if (!vfp_access_check(s)) { 3261 return true; 3262 } 3263 3264 rm = tcg_temp_new_i64(); 3265 rd0 = tcg_temp_new_i32(); 3266 rd1 = tcg_temp_new_i32(); 3267 3268 read_neon_element64(rm, a->vm, 0, MO_64); 3269 narrowfn(rd0, cpu_env, rm); 3270 read_neon_element64(rm, a->vm, 1, MO_64); 3271 narrowfn(rd1, cpu_env, rm); 3272 write_neon_element32(rd0, a->vd, 0, MO_32); 3273 write_neon_element32(rd1, a->vd, 1, MO_32); 3274 tcg_temp_free_i32(rd0); 3275 tcg_temp_free_i32(rd1); 3276 tcg_temp_free_i64(rm); 3277 return true; 3278 } 3279 3280 #define DO_VMOVN(INSN, FUNC) \ 3281 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3282 { \ 3283 static NeonGenNarrowEnvFn * const narrowfn[] = { \ 3284 FUNC##8, \ 3285 FUNC##16, \ 3286 FUNC##32, \ 3287 NULL, \ 3288 }; \ 3289 return do_vmovn(s, a, narrowfn[a->size]); \ 3290 } 3291 3292 DO_VMOVN(VMOVN, gen_neon_narrow_u) 3293 DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat) 3294 DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s) 3295 DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u) 3296 3297 static bool trans_VSHLL(DisasContext *s, arg_2misc *a) 3298 { 3299 TCGv_i32 rm0, rm1; 3300 TCGv_i64 rd; 3301 static NeonGenWidenFn * const widenfns[] = { 3302 gen_helper_neon_widen_u8, 3303 gen_helper_neon_widen_u16, 3304 tcg_gen_extu_i32_i64, 3305 NULL, 3306 }; 3307 NeonGenWidenFn *widenfn = widenfns[a->size]; 3308 3309 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3310 return false; 3311 } 3312 3313 /* UNDEF accesses to D16-D31 if they don't exist. */ 3314 if (!dc_isar_feature(aa32_simd_r32, s) && 3315 ((a->vd | a->vm) & 0x10)) { 3316 return false; 3317 } 3318 3319 if (a->vd & 1) { 3320 return false; 3321 } 3322 3323 if (!widenfn) { 3324 return false; 3325 } 3326 3327 if (!vfp_access_check(s)) { 3328 return true; 3329 } 3330 3331 rd = tcg_temp_new_i64(); 3332 rm0 = tcg_temp_new_i32(); 3333 rm1 = tcg_temp_new_i32(); 3334 3335 read_neon_element32(rm0, a->vm, 0, MO_32); 3336 read_neon_element32(rm1, a->vm, 1, MO_32); 3337 3338 widenfn(rd, rm0); 3339 tcg_gen_shli_i64(rd, rd, 8 << a->size); 3340 write_neon_element64(rd, a->vd, 0, MO_64); 3341 widenfn(rd, rm1); 3342 tcg_gen_shli_i64(rd, rd, 8 << a->size); 3343 write_neon_element64(rd, a->vd, 1, MO_64); 3344 3345 tcg_temp_free_i64(rd); 3346 tcg_temp_free_i32(rm0); 3347 tcg_temp_free_i32(rm1); 3348 return true; 3349 } 3350 3351 static bool trans_VCVT_B16_F32(DisasContext *s, arg_2misc *a) 3352 { 3353 TCGv_ptr fpst; 3354 TCGv_i64 tmp; 3355 TCGv_i32 dst0, dst1; 3356 3357 if (!dc_isar_feature(aa32_bf16, s)) { 3358 return false; 3359 } 3360 3361 /* UNDEF accesses to D16-D31 if they don't exist. */ 3362 if (!dc_isar_feature(aa32_simd_r32, s) && 3363 ((a->vd | a->vm) & 0x10)) { 3364 return false; 3365 } 3366 3367 if ((a->vm & 1) || (a->size != 1)) { 3368 return false; 3369 } 3370 3371 if (!vfp_access_check(s)) { 3372 return true; 3373 } 3374 3375 fpst = fpstatus_ptr(FPST_STD); 3376 tmp = tcg_temp_new_i64(); 3377 dst0 = tcg_temp_new_i32(); 3378 dst1 = tcg_temp_new_i32(); 3379 3380 read_neon_element64(tmp, a->vm, 0, MO_64); 3381 gen_helper_bfcvt_pair(dst0, tmp, fpst); 3382 3383 read_neon_element64(tmp, a->vm, 1, MO_64); 3384 gen_helper_bfcvt_pair(dst1, tmp, fpst); 3385 3386 write_neon_element32(dst0, a->vd, 0, MO_32); 3387 write_neon_element32(dst1, a->vd, 1, MO_32); 3388 3389 tcg_temp_free_i64(tmp); 3390 tcg_temp_free_i32(dst0); 3391 tcg_temp_free_i32(dst1); 3392 tcg_temp_free_ptr(fpst); 3393 return true; 3394 } 3395 3396 static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a) 3397 { 3398 TCGv_ptr fpst; 3399 TCGv_i32 ahp, tmp, tmp2, tmp3; 3400 3401 if (!arm_dc_feature(s, ARM_FEATURE_NEON) || 3402 !dc_isar_feature(aa32_fp16_spconv, s)) { 3403 return false; 3404 } 3405 3406 /* UNDEF accesses to D16-D31 if they don't exist. */ 3407 if (!dc_isar_feature(aa32_simd_r32, s) && 3408 ((a->vd | a->vm) & 0x10)) { 3409 return false; 3410 } 3411 3412 if ((a->vm & 1) || (a->size != 1)) { 3413 return false; 3414 } 3415 3416 if (!vfp_access_check(s)) { 3417 return true; 3418 } 3419 3420 fpst = fpstatus_ptr(FPST_STD); 3421 ahp = get_ahp_flag(); 3422 tmp = tcg_temp_new_i32(); 3423 read_neon_element32(tmp, a->vm, 0, MO_32); 3424 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); 3425 tmp2 = tcg_temp_new_i32(); 3426 read_neon_element32(tmp2, a->vm, 1, MO_32); 3427 gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp); 3428 tcg_gen_shli_i32(tmp2, tmp2, 16); 3429 tcg_gen_or_i32(tmp2, tmp2, tmp); 3430 read_neon_element32(tmp, a->vm, 2, MO_32); 3431 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); 3432 tmp3 = tcg_temp_new_i32(); 3433 read_neon_element32(tmp3, a->vm, 3, MO_32); 3434 write_neon_element32(tmp2, a->vd, 0, MO_32); 3435 tcg_temp_free_i32(tmp2); 3436 gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp); 3437 tcg_gen_shli_i32(tmp3, tmp3, 16); 3438 tcg_gen_or_i32(tmp3, tmp3, tmp); 3439 write_neon_element32(tmp3, a->vd, 1, MO_32); 3440 tcg_temp_free_i32(tmp3); 3441 tcg_temp_free_i32(tmp); 3442 tcg_temp_free_i32(ahp); 3443 tcg_temp_free_ptr(fpst); 3444 3445 return true; 3446 } 3447 3448 static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a) 3449 { 3450 TCGv_ptr fpst; 3451 TCGv_i32 ahp, tmp, tmp2, tmp3; 3452 3453 if (!arm_dc_feature(s, ARM_FEATURE_NEON) || 3454 !dc_isar_feature(aa32_fp16_spconv, s)) { 3455 return false; 3456 } 3457 3458 /* UNDEF accesses to D16-D31 if they don't exist. */ 3459 if (!dc_isar_feature(aa32_simd_r32, s) && 3460 ((a->vd | a->vm) & 0x10)) { 3461 return false; 3462 } 3463 3464 if ((a->vd & 1) || (a->size != 1)) { 3465 return false; 3466 } 3467 3468 if (!vfp_access_check(s)) { 3469 return true; 3470 } 3471 3472 fpst = fpstatus_ptr(FPST_STD); 3473 ahp = get_ahp_flag(); 3474 tmp3 = tcg_temp_new_i32(); 3475 tmp2 = tcg_temp_new_i32(); 3476 tmp = tcg_temp_new_i32(); 3477 read_neon_element32(tmp, a->vm, 0, MO_32); 3478 read_neon_element32(tmp2, a->vm, 1, MO_32); 3479 tcg_gen_ext16u_i32(tmp3, tmp); 3480 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp); 3481 write_neon_element32(tmp3, a->vd, 0, MO_32); 3482 tcg_gen_shri_i32(tmp, tmp, 16); 3483 gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp); 3484 write_neon_element32(tmp, a->vd, 1, MO_32); 3485 tcg_temp_free_i32(tmp); 3486 tcg_gen_ext16u_i32(tmp3, tmp2); 3487 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp); 3488 write_neon_element32(tmp3, a->vd, 2, MO_32); 3489 tcg_temp_free_i32(tmp3); 3490 tcg_gen_shri_i32(tmp2, tmp2, 16); 3491 gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp); 3492 write_neon_element32(tmp2, a->vd, 3, MO_32); 3493 tcg_temp_free_i32(tmp2); 3494 tcg_temp_free_i32(ahp); 3495 tcg_temp_free_ptr(fpst); 3496 3497 return true; 3498 } 3499 3500 static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn) 3501 { 3502 int vec_size = a->q ? 16 : 8; 3503 int rd_ofs = neon_full_reg_offset(a->vd); 3504 int rm_ofs = neon_full_reg_offset(a->vm); 3505 3506 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3507 return false; 3508 } 3509 3510 /* UNDEF accesses to D16-D31 if they don't exist. */ 3511 if (!dc_isar_feature(aa32_simd_r32, s) && 3512 ((a->vd | a->vm) & 0x10)) { 3513 return false; 3514 } 3515 3516 if (a->size == 3) { 3517 return false; 3518 } 3519 3520 if ((a->vd | a->vm) & a->q) { 3521 return false; 3522 } 3523 3524 if (!vfp_access_check(s)) { 3525 return true; 3526 } 3527 3528 fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size); 3529 3530 return true; 3531 } 3532 3533 #define DO_2MISC_VEC(INSN, FN) \ 3534 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3535 { \ 3536 return do_2misc_vec(s, a, FN); \ 3537 } 3538 3539 DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg) 3540 DO_2MISC_VEC(VABS, tcg_gen_gvec_abs) 3541 DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0) 3542 DO_2MISC_VEC(VCGT0, gen_gvec_cgt0) 3543 DO_2MISC_VEC(VCLE0, gen_gvec_cle0) 3544 DO_2MISC_VEC(VCGE0, gen_gvec_cge0) 3545 DO_2MISC_VEC(VCLT0, gen_gvec_clt0) 3546 3547 static bool trans_VMVN(DisasContext *s, arg_2misc *a) 3548 { 3549 if (a->size != 0) { 3550 return false; 3551 } 3552 return do_2misc_vec(s, a, tcg_gen_gvec_not); 3553 } 3554 3555 #define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA) \ 3556 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \ 3557 uint32_t rm_ofs, uint32_t oprsz, \ 3558 uint32_t maxsz) \ 3559 { \ 3560 tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz, \ 3561 DATA, FUNC); \ 3562 } 3563 3564 #define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA) \ 3565 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \ 3566 uint32_t rm_ofs, uint32_t oprsz, \ 3567 uint32_t maxsz) \ 3568 { \ 3569 tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC); \ 3570 } 3571 3572 WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0) 3573 WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aese, 1) 3574 WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0) 3575 WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesmc, 1) 3576 WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0) 3577 WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0) 3578 WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0) 3579 3580 #define DO_2M_CRYPTO(INSN, FEATURE, SIZE) \ 3581 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3582 { \ 3583 if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) { \ 3584 return false; \ 3585 } \ 3586 return do_2misc_vec(s, a, gen_##INSN); \ 3587 } 3588 3589 DO_2M_CRYPTO(AESE, aa32_aes, 0) 3590 DO_2M_CRYPTO(AESD, aa32_aes, 0) 3591 DO_2M_CRYPTO(AESMC, aa32_aes, 0) 3592 DO_2M_CRYPTO(AESIMC, aa32_aes, 0) 3593 DO_2M_CRYPTO(SHA1H, aa32_sha1, 2) 3594 DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2) 3595 DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2) 3596 3597 static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn) 3598 { 3599 TCGv_i32 tmp; 3600 int pass; 3601 3602 /* Handle a 2-reg-misc operation by iterating 32 bits at a time */ 3603 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3604 return false; 3605 } 3606 3607 /* UNDEF accesses to D16-D31 if they don't exist. */ 3608 if (!dc_isar_feature(aa32_simd_r32, s) && 3609 ((a->vd | a->vm) & 0x10)) { 3610 return false; 3611 } 3612 3613 if (!fn) { 3614 return false; 3615 } 3616 3617 if ((a->vd | a->vm) & a->q) { 3618 return false; 3619 } 3620 3621 if (!vfp_access_check(s)) { 3622 return true; 3623 } 3624 3625 tmp = tcg_temp_new_i32(); 3626 for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 3627 read_neon_element32(tmp, a->vm, pass, MO_32); 3628 fn(tmp, tmp); 3629 write_neon_element32(tmp, a->vd, pass, MO_32); 3630 } 3631 tcg_temp_free_i32(tmp); 3632 3633 return true; 3634 } 3635 3636 static bool trans_VREV32(DisasContext *s, arg_2misc *a) 3637 { 3638 static NeonGenOneOpFn * const fn[] = { 3639 tcg_gen_bswap32_i32, 3640 gen_swap_half, 3641 NULL, 3642 NULL, 3643 }; 3644 return do_2misc(s, a, fn[a->size]); 3645 } 3646 3647 static bool trans_VREV16(DisasContext *s, arg_2misc *a) 3648 { 3649 if (a->size != 0) { 3650 return false; 3651 } 3652 return do_2misc(s, a, gen_rev16); 3653 } 3654 3655 static bool trans_VCLS(DisasContext *s, arg_2misc *a) 3656 { 3657 static NeonGenOneOpFn * const fn[] = { 3658 gen_helper_neon_cls_s8, 3659 gen_helper_neon_cls_s16, 3660 gen_helper_neon_cls_s32, 3661 NULL, 3662 }; 3663 return do_2misc(s, a, fn[a->size]); 3664 } 3665 3666 static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm) 3667 { 3668 tcg_gen_clzi_i32(rd, rm, 32); 3669 } 3670 3671 static bool trans_VCLZ(DisasContext *s, arg_2misc *a) 3672 { 3673 static NeonGenOneOpFn * const fn[] = { 3674 gen_helper_neon_clz_u8, 3675 gen_helper_neon_clz_u16, 3676 do_VCLZ_32, 3677 NULL, 3678 }; 3679 return do_2misc(s, a, fn[a->size]); 3680 } 3681 3682 static bool trans_VCNT(DisasContext *s, arg_2misc *a) 3683 { 3684 if (a->size != 0) { 3685 return false; 3686 } 3687 return do_2misc(s, a, gen_helper_neon_cnt_u8); 3688 } 3689 3690 static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 3691 uint32_t oprsz, uint32_t maxsz) 3692 { 3693 tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs, 3694 vece == MO_16 ? 0x7fff : 0x7fffffff, 3695 oprsz, maxsz); 3696 } 3697 3698 static bool trans_VABS_F(DisasContext *s, arg_2misc *a) 3699 { 3700 if (a->size == MO_16) { 3701 if (!dc_isar_feature(aa32_fp16_arith, s)) { 3702 return false; 3703 } 3704 } else if (a->size != MO_32) { 3705 return false; 3706 } 3707 return do_2misc_vec(s, a, gen_VABS_F); 3708 } 3709 3710 static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 3711 uint32_t oprsz, uint32_t maxsz) 3712 { 3713 tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs, 3714 vece == MO_16 ? 0x8000 : 0x80000000, 3715 oprsz, maxsz); 3716 } 3717 3718 static bool trans_VNEG_F(DisasContext *s, arg_2misc *a) 3719 { 3720 if (a->size == MO_16) { 3721 if (!dc_isar_feature(aa32_fp16_arith, s)) { 3722 return false; 3723 } 3724 } else if (a->size != MO_32) { 3725 return false; 3726 } 3727 return do_2misc_vec(s, a, gen_VNEG_F); 3728 } 3729 3730 static bool trans_VRECPE(DisasContext *s, arg_2misc *a) 3731 { 3732 if (a->size != 2) { 3733 return false; 3734 } 3735 return do_2misc(s, a, gen_helper_recpe_u32); 3736 } 3737 3738 static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a) 3739 { 3740 if (a->size != 2) { 3741 return false; 3742 } 3743 return do_2misc(s, a, gen_helper_rsqrte_u32); 3744 } 3745 3746 #define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \ 3747 static void WRAPNAME(TCGv_i32 d, TCGv_i32 m) \ 3748 { \ 3749 FUNC(d, cpu_env, m); \ 3750 } 3751 3752 WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8) 3753 WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16) 3754 WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32) 3755 WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8) 3756 WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16) 3757 WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32) 3758 3759 static bool trans_VQABS(DisasContext *s, arg_2misc *a) 3760 { 3761 static NeonGenOneOpFn * const fn[] = { 3762 gen_VQABS_s8, 3763 gen_VQABS_s16, 3764 gen_VQABS_s32, 3765 NULL, 3766 }; 3767 return do_2misc(s, a, fn[a->size]); 3768 } 3769 3770 static bool trans_VQNEG(DisasContext *s, arg_2misc *a) 3771 { 3772 static NeonGenOneOpFn * const fn[] = { 3773 gen_VQNEG_s8, 3774 gen_VQNEG_s16, 3775 gen_VQNEG_s32, 3776 NULL, 3777 }; 3778 return do_2misc(s, a, fn[a->size]); 3779 } 3780 3781 #define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC) \ 3782 static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \ 3783 uint32_t rm_ofs, \ 3784 uint32_t oprsz, uint32_t maxsz) \ 3785 { \ 3786 static gen_helper_gvec_2_ptr * const fns[4] = { \ 3787 NULL, HFUNC, SFUNC, NULL, \ 3788 }; \ 3789 TCGv_ptr fpst; \ 3790 fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD); \ 3791 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0, \ 3792 fns[vece]); \ 3793 tcg_temp_free_ptr(fpst); \ 3794 } \ 3795 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3796 { \ 3797 if (a->size == MO_16) { \ 3798 if (!dc_isar_feature(aa32_fp16_arith, s)) { \ 3799 return false; \ 3800 } \ 3801 } else if (a->size != MO_32) { \ 3802 return false; \ 3803 } \ 3804 return do_2misc_vec(s, a, gen_##INSN); \ 3805 } 3806 3807 DO_2MISC_FP_VEC(VRECPE_F, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s) 3808 DO_2MISC_FP_VEC(VRSQRTE_F, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s) 3809 DO_2MISC_FP_VEC(VCGT0_F, gen_helper_gvec_fcgt0_h, gen_helper_gvec_fcgt0_s) 3810 DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h, gen_helper_gvec_fcge0_s) 3811 DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s) 3812 DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s) 3813 DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s) 3814 DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos) 3815 DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos) 3816 DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs) 3817 DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs) 3818 3819 DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h, gen_helper_gvec_vrintx_s) 3820 3821 static bool trans_VRINTX(DisasContext *s, arg_2misc *a) 3822 { 3823 if (!arm_dc_feature(s, ARM_FEATURE_V8)) { 3824 return false; 3825 } 3826 return trans_VRINTX_impl(s, a); 3827 } 3828 3829 #define DO_VEC_RMODE(INSN, RMODE, OP) \ 3830 static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \ 3831 uint32_t rm_ofs, \ 3832 uint32_t oprsz, uint32_t maxsz) \ 3833 { \ 3834 static gen_helper_gvec_2_ptr * const fns[4] = { \ 3835 NULL, \ 3836 gen_helper_gvec_##OP##h, \ 3837 gen_helper_gvec_##OP##s, \ 3838 NULL, \ 3839 }; \ 3840 TCGv_ptr fpst; \ 3841 fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD); \ 3842 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, \ 3843 arm_rmode_to_sf(RMODE), fns[vece]); \ 3844 tcg_temp_free_ptr(fpst); \ 3845 } \ 3846 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3847 { \ 3848 if (!arm_dc_feature(s, ARM_FEATURE_V8)) { \ 3849 return false; \ 3850 } \ 3851 if (a->size == MO_16) { \ 3852 if (!dc_isar_feature(aa32_fp16_arith, s)) { \ 3853 return false; \ 3854 } \ 3855 } else if (a->size != MO_32) { \ 3856 return false; \ 3857 } \ 3858 return do_2misc_vec(s, a, gen_##INSN); \ 3859 } 3860 3861 DO_VEC_RMODE(VCVTAU, FPROUNDING_TIEAWAY, vcvt_rm_u) 3862 DO_VEC_RMODE(VCVTAS, FPROUNDING_TIEAWAY, vcvt_rm_s) 3863 DO_VEC_RMODE(VCVTNU, FPROUNDING_TIEEVEN, vcvt_rm_u) 3864 DO_VEC_RMODE(VCVTNS, FPROUNDING_TIEEVEN, vcvt_rm_s) 3865 DO_VEC_RMODE(VCVTPU, FPROUNDING_POSINF, vcvt_rm_u) 3866 DO_VEC_RMODE(VCVTPS, FPROUNDING_POSINF, vcvt_rm_s) 3867 DO_VEC_RMODE(VCVTMU, FPROUNDING_NEGINF, vcvt_rm_u) 3868 DO_VEC_RMODE(VCVTMS, FPROUNDING_NEGINF, vcvt_rm_s) 3869 3870 DO_VEC_RMODE(VRINTN, FPROUNDING_TIEEVEN, vrint_rm_) 3871 DO_VEC_RMODE(VRINTA, FPROUNDING_TIEAWAY, vrint_rm_) 3872 DO_VEC_RMODE(VRINTZ, FPROUNDING_ZERO, vrint_rm_) 3873 DO_VEC_RMODE(VRINTM, FPROUNDING_NEGINF, vrint_rm_) 3874 DO_VEC_RMODE(VRINTP, FPROUNDING_POSINF, vrint_rm_) 3875 3876 static bool trans_VSWP(DisasContext *s, arg_2misc *a) 3877 { 3878 TCGv_i64 rm, rd; 3879 int pass; 3880 3881 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3882 return false; 3883 } 3884 3885 /* UNDEF accesses to D16-D31 if they don't exist. */ 3886 if (!dc_isar_feature(aa32_simd_r32, s) && 3887 ((a->vd | a->vm) & 0x10)) { 3888 return false; 3889 } 3890 3891 if (a->size != 0) { 3892 return false; 3893 } 3894 3895 if ((a->vd | a->vm) & a->q) { 3896 return false; 3897 } 3898 3899 if (!vfp_access_check(s)) { 3900 return true; 3901 } 3902 3903 rm = tcg_temp_new_i64(); 3904 rd = tcg_temp_new_i64(); 3905 for (pass = 0; pass < (a->q ? 2 : 1); pass++) { 3906 read_neon_element64(rm, a->vm, pass, MO_64); 3907 read_neon_element64(rd, a->vd, pass, MO_64); 3908 write_neon_element64(rm, a->vd, pass, MO_64); 3909 write_neon_element64(rd, a->vm, pass, MO_64); 3910 } 3911 tcg_temp_free_i64(rm); 3912 tcg_temp_free_i64(rd); 3913 3914 return true; 3915 } 3916 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1) 3917 { 3918 TCGv_i32 rd, tmp; 3919 3920 rd = tcg_temp_new_i32(); 3921 tmp = tcg_temp_new_i32(); 3922 3923 tcg_gen_shli_i32(rd, t0, 8); 3924 tcg_gen_andi_i32(rd, rd, 0xff00ff00); 3925 tcg_gen_andi_i32(tmp, t1, 0x00ff00ff); 3926 tcg_gen_or_i32(rd, rd, tmp); 3927 3928 tcg_gen_shri_i32(t1, t1, 8); 3929 tcg_gen_andi_i32(t1, t1, 0x00ff00ff); 3930 tcg_gen_andi_i32(tmp, t0, 0xff00ff00); 3931 tcg_gen_or_i32(t1, t1, tmp); 3932 tcg_gen_mov_i32(t0, rd); 3933 3934 tcg_temp_free_i32(tmp); 3935 tcg_temp_free_i32(rd); 3936 } 3937 3938 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1) 3939 { 3940 TCGv_i32 rd, tmp; 3941 3942 rd = tcg_temp_new_i32(); 3943 tmp = tcg_temp_new_i32(); 3944 3945 tcg_gen_shli_i32(rd, t0, 16); 3946 tcg_gen_andi_i32(tmp, t1, 0xffff); 3947 tcg_gen_or_i32(rd, rd, tmp); 3948 tcg_gen_shri_i32(t1, t1, 16); 3949 tcg_gen_andi_i32(tmp, t0, 0xffff0000); 3950 tcg_gen_or_i32(t1, t1, tmp); 3951 tcg_gen_mov_i32(t0, rd); 3952 3953 tcg_temp_free_i32(tmp); 3954 tcg_temp_free_i32(rd); 3955 } 3956 3957 static bool trans_VTRN(DisasContext *s, arg_2misc *a) 3958 { 3959 TCGv_i32 tmp, tmp2; 3960 int pass; 3961 3962 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3963 return false; 3964 } 3965 3966 /* UNDEF accesses to D16-D31 if they don't exist. */ 3967 if (!dc_isar_feature(aa32_simd_r32, s) && 3968 ((a->vd | a->vm) & 0x10)) { 3969 return false; 3970 } 3971 3972 if ((a->vd | a->vm) & a->q) { 3973 return false; 3974 } 3975 3976 if (a->size == 3) { 3977 return false; 3978 } 3979 3980 if (!vfp_access_check(s)) { 3981 return true; 3982 } 3983 3984 tmp = tcg_temp_new_i32(); 3985 tmp2 = tcg_temp_new_i32(); 3986 if (a->size == MO_32) { 3987 for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) { 3988 read_neon_element32(tmp, a->vm, pass, MO_32); 3989 read_neon_element32(tmp2, a->vd, pass + 1, MO_32); 3990 write_neon_element32(tmp2, a->vm, pass, MO_32); 3991 write_neon_element32(tmp, a->vd, pass + 1, MO_32); 3992 } 3993 } else { 3994 for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 3995 read_neon_element32(tmp, a->vm, pass, MO_32); 3996 read_neon_element32(tmp2, a->vd, pass, MO_32); 3997 if (a->size == MO_8) { 3998 gen_neon_trn_u8(tmp, tmp2); 3999 } else { 4000 gen_neon_trn_u16(tmp, tmp2); 4001 } 4002 write_neon_element32(tmp2, a->vm, pass, MO_32); 4003 write_neon_element32(tmp, a->vd, pass, MO_32); 4004 } 4005 } 4006 tcg_temp_free_i32(tmp); 4007 tcg_temp_free_i32(tmp2); 4008 return true; 4009 } 4010 4011 static bool trans_VSMMLA(DisasContext *s, arg_VSMMLA *a) 4012 { 4013 if (!dc_isar_feature(aa32_i8mm, s)) { 4014 return false; 4015 } 4016 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0, 4017 gen_helper_gvec_smmla_b); 4018 } 4019 4020 static bool trans_VUMMLA(DisasContext *s, arg_VUMMLA *a) 4021 { 4022 if (!dc_isar_feature(aa32_i8mm, s)) { 4023 return false; 4024 } 4025 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0, 4026 gen_helper_gvec_ummla_b); 4027 } 4028 4029 static bool trans_VUSMMLA(DisasContext *s, arg_VUSMMLA *a) 4030 { 4031 if (!dc_isar_feature(aa32_i8mm, s)) { 4032 return false; 4033 } 4034 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0, 4035 gen_helper_gvec_usmmla_b); 4036 } 4037 4038 static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a) 4039 { 4040 if (!dc_isar_feature(aa32_bf16, s)) { 4041 return false; 4042 } 4043 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0, 4044 gen_helper_gvec_bfmmla); 4045 } 4046 4047 static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a) 4048 { 4049 if (!dc_isar_feature(aa32_bf16, s)) { 4050 return false; 4051 } 4052 return do_neon_ddda_fpst(s, 7, a->vd, a->vn, a->vm, a->q, FPST_STD, 4053 gen_helper_gvec_bfmlal); 4054 } 4055 4056 static bool trans_VFMA_b16_scal(DisasContext *s, arg_VFMA_b16_scal *a) 4057 { 4058 if (!dc_isar_feature(aa32_bf16, s)) { 4059 return false; 4060 } 4061 return do_neon_ddda_fpst(s, 6, a->vd, a->vn, a->vm, 4062 (a->index << 1) | a->q, FPST_STD, 4063 gen_helper_gvec_bfmlal_idx); 4064 } 4065