1 /* 2 * AArch64 SVE translation 3 * 4 * Copyright (c) 2018 Linaro, Ltd 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "translate.h" 22 #include "translate-a64.h" 23 #include "fpu/softfloat.h" 24 25 26 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, 27 TCGv_i64, uint32_t, uint32_t); 28 29 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr, 30 TCGv_ptr, TCGv_i32); 31 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr, 32 TCGv_ptr, TCGv_ptr, TCGv_i32); 33 34 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32); 35 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr, 36 TCGv_ptr, TCGv_i64, TCGv_i32); 37 38 /* 39 * Helpers for extracting complex instruction fields. 40 */ 41 42 /* See e.g. ASR (immediate, predicated). 43 * Returns -1 for unallocated encoding; diagnose later. 44 */ 45 static int tszimm_esz(DisasContext *s, int x) 46 { 47 x >>= 3; /* discard imm3 */ 48 return 31 - clz32(x); 49 } 50 51 static int tszimm_shr(DisasContext *s, int x) 52 { 53 return (16 << tszimm_esz(s, x)) - x; 54 } 55 56 /* See e.g. LSL (immediate, predicated). */ 57 static int tszimm_shl(DisasContext *s, int x) 58 { 59 return x - (8 << tszimm_esz(s, x)); 60 } 61 62 /* The SH bit is in bit 8. Extract the low 8 and shift. */ 63 static inline int expand_imm_sh8s(DisasContext *s, int x) 64 { 65 return (int8_t)x << (x & 0x100 ? 8 : 0); 66 } 67 68 static inline int expand_imm_sh8u(DisasContext *s, int x) 69 { 70 return (uint8_t)x << (x & 0x100 ? 8 : 0); 71 } 72 73 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype) 74 * with unsigned data. C.f. SVE Memory Contiguous Load Group. 75 */ 76 static inline int msz_dtype(DisasContext *s, int msz) 77 { 78 static const uint8_t dtype[4] = { 0, 5, 10, 15 }; 79 return dtype[msz]; 80 } 81 82 /* 83 * Include the generated decoder. 84 */ 85 86 #include "decode-sve.c.inc" 87 88 /* 89 * Implement all of the translator functions referenced by the decoder. 90 */ 91 92 /* Invoke an out-of-line helper on 2 Zregs. */ 93 static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn, 94 int rd, int rn, int data) 95 { 96 if (fn == NULL) { 97 return false; 98 } 99 if (sve_access_check(s)) { 100 unsigned vsz = vec_full_reg_size(s); 101 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 102 vec_full_reg_offset(s, rn), 103 vsz, vsz, data, fn); 104 } 105 return true; 106 } 107 108 static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 109 int rd, int rn, int data, 110 ARMFPStatusFlavour flavour) 111 { 112 if (fn == NULL) { 113 return false; 114 } 115 if (sve_access_check(s)) { 116 unsigned vsz = vec_full_reg_size(s); 117 TCGv_ptr status = fpstatus_ptr(flavour); 118 119 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 120 vec_full_reg_offset(s, rn), 121 status, vsz, vsz, data, fn); 122 } 123 return true; 124 } 125 126 static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 127 arg_rr_esz *a, int data) 128 { 129 return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data, 130 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 131 } 132 133 /* Invoke an out-of-line helper on 3 Zregs. */ 134 static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 135 int rd, int rn, int rm, int data) 136 { 137 if (fn == NULL) { 138 return false; 139 } 140 if (sve_access_check(s)) { 141 unsigned vsz = vec_full_reg_size(s); 142 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 143 vec_full_reg_offset(s, rn), 144 vec_full_reg_offset(s, rm), 145 vsz, vsz, data, fn); 146 } 147 return true; 148 } 149 150 static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 151 arg_rrr_esz *a, int data) 152 { 153 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data); 154 } 155 156 /* Invoke an out-of-line helper on 3 Zregs, plus float_status. */ 157 static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 158 int rd, int rn, int rm, 159 int data, ARMFPStatusFlavour flavour) 160 { 161 if (fn == NULL) { 162 return false; 163 } 164 if (sve_access_check(s)) { 165 unsigned vsz = vec_full_reg_size(s); 166 TCGv_ptr status = fpstatus_ptr(flavour); 167 168 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 169 vec_full_reg_offset(s, rn), 170 vec_full_reg_offset(s, rm), 171 status, vsz, vsz, data, fn); 172 } 173 return true; 174 } 175 176 static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 177 arg_rrr_esz *a, int data) 178 { 179 return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, 180 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 181 } 182 183 /* Invoke an out-of-line helper on 4 Zregs. */ 184 static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 185 int rd, int rn, int rm, int ra, int data) 186 { 187 if (fn == NULL) { 188 return false; 189 } 190 if (sve_access_check(s)) { 191 unsigned vsz = vec_full_reg_size(s); 192 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 193 vec_full_reg_offset(s, rn), 194 vec_full_reg_offset(s, rm), 195 vec_full_reg_offset(s, ra), 196 vsz, vsz, data, fn); 197 } 198 return true; 199 } 200 201 static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 202 arg_rrrr_esz *a, int data) 203 { 204 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data); 205 } 206 207 static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn, 208 arg_rrxr_esz *a) 209 { 210 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index); 211 } 212 213 /* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */ 214 static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 215 int rd, int rn, int rm, int ra, 216 int data, TCGv_ptr ptr) 217 { 218 if (fn == NULL) { 219 return false; 220 } 221 if (sve_access_check(s)) { 222 unsigned vsz = vec_full_reg_size(s); 223 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 224 vec_full_reg_offset(s, rn), 225 vec_full_reg_offset(s, rm), 226 vec_full_reg_offset(s, ra), 227 ptr, vsz, vsz, data, fn); 228 } 229 return true; 230 } 231 232 static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 233 int rd, int rn, int rm, int ra, 234 int data, ARMFPStatusFlavour flavour) 235 { 236 TCGv_ptr status = fpstatus_ptr(flavour); 237 bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status); 238 return ret; 239 } 240 241 /* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */ 242 static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn, 243 int rd, int rn, int rm, int ra, int pg, 244 int data, ARMFPStatusFlavour flavour) 245 { 246 if (fn == NULL) { 247 return false; 248 } 249 if (sve_access_check(s)) { 250 unsigned vsz = vec_full_reg_size(s); 251 TCGv_ptr status = fpstatus_ptr(flavour); 252 253 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd), 254 vec_full_reg_offset(s, rn), 255 vec_full_reg_offset(s, rm), 256 vec_full_reg_offset(s, ra), 257 pred_full_reg_offset(s, pg), 258 status, vsz, vsz, data, fn); 259 } 260 return true; 261 } 262 263 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */ 264 static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn, 265 int rd, int rn, int pg, int data) 266 { 267 if (fn == NULL) { 268 return false; 269 } 270 if (sve_access_check(s)) { 271 unsigned vsz = vec_full_reg_size(s); 272 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 273 vec_full_reg_offset(s, rn), 274 pred_full_reg_offset(s, pg), 275 vsz, vsz, data, fn); 276 } 277 return true; 278 } 279 280 static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn, 281 arg_rpr_esz *a, int data) 282 { 283 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data); 284 } 285 286 static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn, 287 arg_rpri_esz *a) 288 { 289 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm); 290 } 291 292 static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn, 293 int rd, int rn, int pg, int data, 294 ARMFPStatusFlavour flavour) 295 { 296 if (fn == NULL) { 297 return false; 298 } 299 if (sve_access_check(s)) { 300 unsigned vsz = vec_full_reg_size(s); 301 TCGv_ptr status = fpstatus_ptr(flavour); 302 303 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 304 vec_full_reg_offset(s, rn), 305 pred_full_reg_offset(s, pg), 306 status, vsz, vsz, data, fn); 307 } 308 return true; 309 } 310 311 static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 312 arg_rpr_esz *a, int data, 313 ARMFPStatusFlavour flavour) 314 { 315 return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour); 316 } 317 318 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 319 static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn, 320 int rd, int rn, int rm, int pg, int data) 321 { 322 if (fn == NULL) { 323 return false; 324 } 325 if (sve_access_check(s)) { 326 unsigned vsz = vec_full_reg_size(s); 327 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 328 vec_full_reg_offset(s, rn), 329 vec_full_reg_offset(s, rm), 330 pred_full_reg_offset(s, pg), 331 vsz, vsz, data, fn); 332 } 333 return true; 334 } 335 336 static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn, 337 arg_rprr_esz *a, int data) 338 { 339 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data); 340 } 341 342 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 343 static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn, 344 int rd, int rn, int rm, int pg, int data, 345 ARMFPStatusFlavour flavour) 346 { 347 if (fn == NULL) { 348 return false; 349 } 350 if (sve_access_check(s)) { 351 unsigned vsz = vec_full_reg_size(s); 352 TCGv_ptr status = fpstatus_ptr(flavour); 353 354 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 355 vec_full_reg_offset(s, rn), 356 vec_full_reg_offset(s, rm), 357 pred_full_reg_offset(s, pg), 358 status, vsz, vsz, data, fn); 359 } 360 return true; 361 } 362 363 static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 364 arg_rprr_esz *a) 365 { 366 return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0, 367 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 368 } 369 370 /* Invoke a vector expander on two Zregs and an immediate. */ 371 static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 372 int esz, int rd, int rn, uint64_t imm) 373 { 374 if (gvec_fn == NULL) { 375 return false; 376 } 377 if (sve_access_check(s)) { 378 unsigned vsz = vec_full_reg_size(s); 379 gvec_fn(esz, vec_full_reg_offset(s, rd), 380 vec_full_reg_offset(s, rn), imm, vsz, vsz); 381 } 382 return true; 383 } 384 385 static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 386 arg_rri_esz *a) 387 { 388 if (a->esz < 0) { 389 /* Invalid tsz encoding -- see tszimm_esz. */ 390 return false; 391 } 392 return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm); 393 } 394 395 /* Invoke a vector expander on three Zregs. */ 396 static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn, 397 int esz, int rd, int rn, int rm) 398 { 399 if (gvec_fn == NULL) { 400 return false; 401 } 402 if (sve_access_check(s)) { 403 unsigned vsz = vec_full_reg_size(s); 404 gvec_fn(esz, vec_full_reg_offset(s, rd), 405 vec_full_reg_offset(s, rn), 406 vec_full_reg_offset(s, rm), vsz, vsz); 407 } 408 return true; 409 } 410 411 static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn, 412 arg_rrr_esz *a) 413 { 414 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm); 415 } 416 417 /* Invoke a vector expander on four Zregs. */ 418 static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn, 419 arg_rrrr_esz *a) 420 { 421 if (gvec_fn == NULL) { 422 return false; 423 } 424 if (sve_access_check(s)) { 425 unsigned vsz = vec_full_reg_size(s); 426 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 427 vec_full_reg_offset(s, a->rn), 428 vec_full_reg_offset(s, a->rm), 429 vec_full_reg_offset(s, a->ra), vsz, vsz); 430 } 431 return true; 432 } 433 434 /* Invoke a vector move on two Zregs. */ 435 static bool do_mov_z(DisasContext *s, int rd, int rn) 436 { 437 if (sve_access_check(s)) { 438 unsigned vsz = vec_full_reg_size(s); 439 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd), 440 vec_full_reg_offset(s, rn), vsz, vsz); 441 } 442 return true; 443 } 444 445 /* Initialize a Zreg with replications of a 64-bit immediate. */ 446 static void do_dupi_z(DisasContext *s, int rd, uint64_t word) 447 { 448 unsigned vsz = vec_full_reg_size(s); 449 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word); 450 } 451 452 /* Invoke a vector expander on three Pregs. */ 453 static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn, 454 int rd, int rn, int rm) 455 { 456 if (sve_access_check(s)) { 457 unsigned psz = pred_gvec_reg_size(s); 458 gvec_fn(MO_64, pred_full_reg_offset(s, rd), 459 pred_full_reg_offset(s, rn), 460 pred_full_reg_offset(s, rm), psz, psz); 461 } 462 return true; 463 } 464 465 /* Invoke a vector move on two Pregs. */ 466 static bool do_mov_p(DisasContext *s, int rd, int rn) 467 { 468 if (sve_access_check(s)) { 469 unsigned psz = pred_gvec_reg_size(s); 470 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd), 471 pred_full_reg_offset(s, rn), psz, psz); 472 } 473 return true; 474 } 475 476 /* Set the cpu flags as per a return from an SVE helper. */ 477 static void do_pred_flags(TCGv_i32 t) 478 { 479 tcg_gen_mov_i32(cpu_NF, t); 480 tcg_gen_andi_i32(cpu_ZF, t, 2); 481 tcg_gen_andi_i32(cpu_CF, t, 1); 482 tcg_gen_movi_i32(cpu_VF, 0); 483 } 484 485 /* Subroutines computing the ARM PredTest psuedofunction. */ 486 static void do_predtest1(TCGv_i64 d, TCGv_i64 g) 487 { 488 TCGv_i32 t = tcg_temp_new_i32(); 489 490 gen_helper_sve_predtest1(t, d, g); 491 do_pred_flags(t); 492 } 493 494 static void do_predtest(DisasContext *s, int dofs, int gofs, int words) 495 { 496 TCGv_ptr dptr = tcg_temp_new_ptr(); 497 TCGv_ptr gptr = tcg_temp_new_ptr(); 498 TCGv_i32 t = tcg_temp_new_i32(); 499 500 tcg_gen_addi_ptr(dptr, tcg_env, dofs); 501 tcg_gen_addi_ptr(gptr, tcg_env, gofs); 502 503 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words)); 504 505 do_pred_flags(t); 506 } 507 508 /* For each element size, the bits within a predicate word that are active. */ 509 const uint64_t pred_esz_masks[5] = { 510 0xffffffffffffffffull, 0x5555555555555555ull, 511 0x1111111111111111ull, 0x0101010101010101ull, 512 0x0001000100010001ull, 513 }; 514 515 static bool trans_INVALID(DisasContext *s, arg_INVALID *a) 516 { 517 unallocated_encoding(s); 518 return true; 519 } 520 521 /* 522 *** SVE Logical - Unpredicated Group 523 */ 524 525 TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a) 526 TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a) 527 TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a) 528 TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a) 529 530 static bool trans_XAR(DisasContext *s, arg_rrri_esz *a) 531 { 532 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { 533 return false; 534 } 535 if (sve_access_check(s)) { 536 unsigned vsz = vec_full_reg_size(s); 537 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd), 538 vec_full_reg_offset(s, a->rn), 539 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz); 540 } 541 return true; 542 } 543 544 TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_eor3, a) 545 TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_bcax, a) 546 547 static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 548 uint32_t a, uint32_t oprsz, uint32_t maxsz) 549 { 550 /* BSL differs from the generic bitsel in argument ordering. */ 551 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz); 552 } 553 554 TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a) 555 556 static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 557 { 558 tcg_gen_andc_i64(n, k, n); 559 tcg_gen_andc_i64(m, m, k); 560 tcg_gen_or_i64(d, n, m); 561 } 562 563 static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 564 TCGv_vec m, TCGv_vec k) 565 { 566 if (TCG_TARGET_HAS_bitsel_vec) { 567 tcg_gen_not_vec(vece, n, n); 568 tcg_gen_bitsel_vec(vece, d, k, n, m); 569 } else { 570 tcg_gen_andc_vec(vece, n, k, n); 571 tcg_gen_andc_vec(vece, m, m, k); 572 tcg_gen_or_vec(vece, d, n, m); 573 } 574 } 575 576 static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 577 uint32_t a, uint32_t oprsz, uint32_t maxsz) 578 { 579 static const GVecGen4 op = { 580 .fni8 = gen_bsl1n_i64, 581 .fniv = gen_bsl1n_vec, 582 .fno = gen_helper_sve2_bsl1n, 583 .vece = MO_64, 584 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 585 }; 586 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 587 } 588 589 TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a) 590 591 static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 592 { 593 /* 594 * Z[dn] = (n & k) | (~m & ~k) 595 * = | ~(m | k) 596 */ 597 tcg_gen_and_i64(n, n, k); 598 if (TCG_TARGET_HAS_orc_i64) { 599 tcg_gen_or_i64(m, m, k); 600 tcg_gen_orc_i64(d, n, m); 601 } else { 602 tcg_gen_nor_i64(m, m, k); 603 tcg_gen_or_i64(d, n, m); 604 } 605 } 606 607 static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 608 TCGv_vec m, TCGv_vec k) 609 { 610 if (TCG_TARGET_HAS_bitsel_vec) { 611 tcg_gen_not_vec(vece, m, m); 612 tcg_gen_bitsel_vec(vece, d, k, n, m); 613 } else { 614 tcg_gen_and_vec(vece, n, n, k); 615 tcg_gen_or_vec(vece, m, m, k); 616 tcg_gen_orc_vec(vece, d, n, m); 617 } 618 } 619 620 static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 621 uint32_t a, uint32_t oprsz, uint32_t maxsz) 622 { 623 static const GVecGen4 op = { 624 .fni8 = gen_bsl2n_i64, 625 .fniv = gen_bsl2n_vec, 626 .fno = gen_helper_sve2_bsl2n, 627 .vece = MO_64, 628 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 629 }; 630 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 631 } 632 633 TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a) 634 635 static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 636 { 637 tcg_gen_and_i64(n, n, k); 638 tcg_gen_andc_i64(m, m, k); 639 tcg_gen_nor_i64(d, n, m); 640 } 641 642 static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 643 TCGv_vec m, TCGv_vec k) 644 { 645 tcg_gen_bitsel_vec(vece, d, k, n, m); 646 tcg_gen_not_vec(vece, d, d); 647 } 648 649 static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 650 uint32_t a, uint32_t oprsz, uint32_t maxsz) 651 { 652 static const GVecGen4 op = { 653 .fni8 = gen_nbsl_i64, 654 .fniv = gen_nbsl_vec, 655 .fno = gen_helper_sve2_nbsl, 656 .vece = MO_64, 657 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 658 }; 659 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 660 } 661 662 TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a) 663 664 /* 665 *** SVE Integer Arithmetic - Unpredicated Group 666 */ 667 668 TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a) 669 TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a) 670 TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a) 671 TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a) 672 TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a) 673 TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a) 674 675 /* 676 *** SVE Integer Arithmetic - Binary Predicated Group 677 */ 678 679 /* Select active elememnts from Zn and inactive elements from Zm, 680 * storing the result in Zd. 681 */ 682 static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz) 683 { 684 static gen_helper_gvec_4 * const fns[4] = { 685 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h, 686 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d 687 }; 688 return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0); 689 } 690 691 #define DO_ZPZZ(NAME, FEAT, name) \ 692 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \ 693 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \ 694 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \ 695 }; \ 696 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \ 697 name##_zpzz_fns[a->esz], a, 0) 698 699 DO_ZPZZ(AND_zpzz, aa64_sve, sve_and) 700 DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor) 701 DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr) 702 DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic) 703 704 DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add) 705 DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub) 706 707 DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax) 708 DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax) 709 DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin) 710 DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin) 711 DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd) 712 DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd) 713 714 DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul) 715 DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh) 716 DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh) 717 718 DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr) 719 DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr) 720 DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl) 721 722 static gen_helper_gvec_4 * const sdiv_fns[4] = { 723 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d 724 }; 725 TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0) 726 727 static gen_helper_gvec_4 * const udiv_fns[4] = { 728 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d 729 }; 730 TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0) 731 732 TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz) 733 734 /* 735 *** SVE Integer Arithmetic - Unary Predicated Group 736 */ 737 738 #define DO_ZPZ(NAME, FEAT, name) \ 739 static gen_helper_gvec_3 * const name##_fns[4] = { \ 740 gen_helper_##name##_b, gen_helper_##name##_h, \ 741 gen_helper_##name##_s, gen_helper_##name##_d, \ 742 }; \ 743 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0) 744 745 DO_ZPZ(CLS, aa64_sve, sve_cls) 746 DO_ZPZ(CLZ, aa64_sve, sve_clz) 747 DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz) 748 DO_ZPZ(CNOT, aa64_sve, sve_cnot) 749 DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz) 750 DO_ZPZ(ABS, aa64_sve, sve_abs) 751 DO_ZPZ(NEG, aa64_sve, sve_neg) 752 DO_ZPZ(RBIT, aa64_sve, sve_rbit) 753 754 static gen_helper_gvec_3 * const fabs_fns[4] = { 755 NULL, gen_helper_sve_fabs_h, 756 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d, 757 }; 758 TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0) 759 760 static gen_helper_gvec_3 * const fneg_fns[4] = { 761 NULL, gen_helper_sve_fneg_h, 762 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d, 763 }; 764 TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0) 765 766 static gen_helper_gvec_3 * const sxtb_fns[4] = { 767 NULL, gen_helper_sve_sxtb_h, 768 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d, 769 }; 770 TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0) 771 772 static gen_helper_gvec_3 * const uxtb_fns[4] = { 773 NULL, gen_helper_sve_uxtb_h, 774 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d, 775 }; 776 TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0) 777 778 static gen_helper_gvec_3 * const sxth_fns[4] = { 779 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d 780 }; 781 TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0) 782 783 static gen_helper_gvec_3 * const uxth_fns[4] = { 784 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d 785 }; 786 TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0) 787 788 TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz, 789 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0) 790 TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz, 791 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0) 792 793 /* 794 *** SVE Integer Reduction Group 795 */ 796 797 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32); 798 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a, 799 gen_helper_gvec_reduc *fn) 800 { 801 unsigned vsz = vec_full_reg_size(s); 802 TCGv_ptr t_zn, t_pg; 803 TCGv_i32 desc; 804 TCGv_i64 temp; 805 806 if (fn == NULL) { 807 return false; 808 } 809 if (!sve_access_check(s)) { 810 return true; 811 } 812 813 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 814 temp = tcg_temp_new_i64(); 815 t_zn = tcg_temp_new_ptr(); 816 t_pg = tcg_temp_new_ptr(); 817 818 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); 819 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); 820 fn(temp, t_zn, t_pg, desc); 821 822 write_fp_dreg(s, a->rd, temp); 823 return true; 824 } 825 826 #define DO_VPZ(NAME, name) \ 827 static gen_helper_gvec_reduc * const name##_fns[4] = { \ 828 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \ 829 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 830 }; \ 831 TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz]) 832 833 DO_VPZ(ORV, orv) 834 DO_VPZ(ANDV, andv) 835 DO_VPZ(EORV, eorv) 836 837 DO_VPZ(UADDV, uaddv) 838 DO_VPZ(SMAXV, smaxv) 839 DO_VPZ(UMAXV, umaxv) 840 DO_VPZ(SMINV, sminv) 841 DO_VPZ(UMINV, uminv) 842 843 static gen_helper_gvec_reduc * const saddv_fns[4] = { 844 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h, 845 gen_helper_sve_saddv_s, NULL 846 }; 847 TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz]) 848 849 #undef DO_VPZ 850 851 /* 852 *** SVE Shift by Immediate - Predicated Group 853 */ 854 855 /* 856 * Copy Zn into Zd, storing zeros into inactive elements. 857 * If invert, store zeros into the active elements. 858 */ 859 static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg, 860 int esz, bool invert) 861 { 862 static gen_helper_gvec_3 * const fns[4] = { 863 gen_helper_sve_movz_b, gen_helper_sve_movz_h, 864 gen_helper_sve_movz_s, gen_helper_sve_movz_d, 865 }; 866 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert); 867 } 868 869 static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr, 870 gen_helper_gvec_3 * const fns[4]) 871 { 872 int max; 873 874 if (a->esz < 0) { 875 /* Invalid tsz encoding -- see tszimm_esz. */ 876 return false; 877 } 878 879 /* 880 * Shift by element size is architecturally valid. 881 * For arithmetic right-shift, it's the same as by one less. 882 * For logical shifts and ASRD, it is a zeroing operation. 883 */ 884 max = 8 << a->esz; 885 if (a->imm >= max) { 886 if (asr) { 887 a->imm = max - 1; 888 } else { 889 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); 890 } 891 } 892 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a); 893 } 894 895 static gen_helper_gvec_3 * const asr_zpzi_fns[4] = { 896 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h, 897 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d, 898 }; 899 TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns) 900 901 static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = { 902 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h, 903 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d, 904 }; 905 TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns) 906 907 static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = { 908 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h, 909 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d, 910 }; 911 TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns) 912 913 static gen_helper_gvec_3 * const asrd_fns[4] = { 914 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h, 915 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d, 916 }; 917 TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns) 918 919 static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = { 920 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h, 921 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d, 922 }; 923 TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 924 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a) 925 926 static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = { 927 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h, 928 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d, 929 }; 930 TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 931 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a) 932 933 static gen_helper_gvec_3 * const srshr_fns[4] = { 934 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h, 935 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d, 936 }; 937 TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 938 a->esz < 0 ? NULL : srshr_fns[a->esz], a) 939 940 static gen_helper_gvec_3 * const urshr_fns[4] = { 941 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h, 942 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d, 943 }; 944 TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 945 a->esz < 0 ? NULL : urshr_fns[a->esz], a) 946 947 static gen_helper_gvec_3 * const sqshlu_fns[4] = { 948 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h, 949 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d, 950 }; 951 TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi, 952 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a) 953 954 /* 955 *** SVE Bitwise Shift - Predicated Group 956 */ 957 958 #define DO_ZPZW(NAME, name) \ 959 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \ 960 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \ 961 gen_helper_sve_##name##_zpzw_s, NULL \ 962 }; \ 963 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \ 964 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0) 965 966 DO_ZPZW(ASR, asr) 967 DO_ZPZW(LSR, lsr) 968 DO_ZPZW(LSL, lsl) 969 970 #undef DO_ZPZW 971 972 /* 973 *** SVE Bitwise Shift - Unpredicated Group 974 */ 975 976 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr, 977 void (*gvec_fn)(unsigned, uint32_t, uint32_t, 978 int64_t, uint32_t, uint32_t)) 979 { 980 if (a->esz < 0) { 981 /* Invalid tsz encoding -- see tszimm_esz. */ 982 return false; 983 } 984 if (sve_access_check(s)) { 985 unsigned vsz = vec_full_reg_size(s); 986 /* Shift by element size is architecturally valid. For 987 arithmetic right-shift, it's the same as by one less. 988 Otherwise it is a zeroing operation. */ 989 if (a->imm >= 8 << a->esz) { 990 if (asr) { 991 a->imm = (8 << a->esz) - 1; 992 } else { 993 do_dupi_z(s, a->rd, 0); 994 return true; 995 } 996 } 997 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 998 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz); 999 } 1000 return true; 1001 } 1002 1003 TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari) 1004 TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri) 1005 TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli) 1006 1007 #define DO_ZZW(NAME, name) \ 1008 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \ 1009 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \ 1010 gen_helper_sve_##name##_zzw_s, NULL \ 1011 }; \ 1012 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \ 1013 name##_zzw_fns[a->esz], a, 0) 1014 1015 DO_ZZW(ASR_zzw, asr) 1016 DO_ZZW(LSR_zzw, lsr) 1017 DO_ZZW(LSL_zzw, lsl) 1018 1019 #undef DO_ZZW 1020 1021 /* 1022 *** SVE Integer Multiply-Add Group 1023 */ 1024 1025 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a, 1026 gen_helper_gvec_5 *fn) 1027 { 1028 if (sve_access_check(s)) { 1029 unsigned vsz = vec_full_reg_size(s); 1030 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd), 1031 vec_full_reg_offset(s, a->ra), 1032 vec_full_reg_offset(s, a->rn), 1033 vec_full_reg_offset(s, a->rm), 1034 pred_full_reg_offset(s, a->pg), 1035 vsz, vsz, 0, fn); 1036 } 1037 return true; 1038 } 1039 1040 static gen_helper_gvec_5 * const mla_fns[4] = { 1041 gen_helper_sve_mla_b, gen_helper_sve_mla_h, 1042 gen_helper_sve_mla_s, gen_helper_sve_mla_d, 1043 }; 1044 TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz]) 1045 1046 static gen_helper_gvec_5 * const mls_fns[4] = { 1047 gen_helper_sve_mls_b, gen_helper_sve_mls_h, 1048 gen_helper_sve_mls_s, gen_helper_sve_mls_d, 1049 }; 1050 TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz]) 1051 1052 /* 1053 *** SVE Index Generation Group 1054 */ 1055 1056 static bool do_index(DisasContext *s, int esz, int rd, 1057 TCGv_i64 start, TCGv_i64 incr) 1058 { 1059 unsigned vsz; 1060 TCGv_i32 desc; 1061 TCGv_ptr t_zd; 1062 1063 if (!sve_access_check(s)) { 1064 return true; 1065 } 1066 1067 vsz = vec_full_reg_size(s); 1068 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1069 t_zd = tcg_temp_new_ptr(); 1070 1071 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd)); 1072 if (esz == 3) { 1073 gen_helper_sve_index_d(t_zd, start, incr, desc); 1074 } else { 1075 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 1076 static index_fn * const fns[3] = { 1077 gen_helper_sve_index_b, 1078 gen_helper_sve_index_h, 1079 gen_helper_sve_index_s, 1080 }; 1081 TCGv_i32 s32 = tcg_temp_new_i32(); 1082 TCGv_i32 i32 = tcg_temp_new_i32(); 1083 1084 tcg_gen_extrl_i64_i32(s32, start); 1085 tcg_gen_extrl_i64_i32(i32, incr); 1086 fns[esz](t_zd, s32, i32, desc); 1087 } 1088 return true; 1089 } 1090 1091 TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd, 1092 tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2)) 1093 TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd, 1094 tcg_constant_i64(a->imm), cpu_reg(s, a->rm)) 1095 TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd, 1096 cpu_reg(s, a->rn), tcg_constant_i64(a->imm)) 1097 TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd, 1098 cpu_reg(s, a->rn), cpu_reg(s, a->rm)) 1099 1100 /* 1101 *** SVE Stack Allocation Group 1102 */ 1103 1104 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a) 1105 { 1106 if (!dc_isar_feature(aa64_sve, s)) { 1107 return false; 1108 } 1109 if (sve_access_check(s)) { 1110 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1111 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1112 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s)); 1113 } 1114 return true; 1115 } 1116 1117 static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a) 1118 { 1119 if (!dc_isar_feature(aa64_sme, s)) { 1120 return false; 1121 } 1122 if (sme_enabled_check(s)) { 1123 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1124 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1125 tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s)); 1126 } 1127 return true; 1128 } 1129 1130 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a) 1131 { 1132 if (!dc_isar_feature(aa64_sve, s)) { 1133 return false; 1134 } 1135 if (sve_access_check(s)) { 1136 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1137 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1138 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s)); 1139 } 1140 return true; 1141 } 1142 1143 static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a) 1144 { 1145 if (!dc_isar_feature(aa64_sme, s)) { 1146 return false; 1147 } 1148 if (sme_enabled_check(s)) { 1149 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1150 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1151 tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s)); 1152 } 1153 return true; 1154 } 1155 1156 static bool trans_RDVL(DisasContext *s, arg_RDVL *a) 1157 { 1158 if (!dc_isar_feature(aa64_sve, s)) { 1159 return false; 1160 } 1161 if (sve_access_check(s)) { 1162 TCGv_i64 reg = cpu_reg(s, a->rd); 1163 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s)); 1164 } 1165 return true; 1166 } 1167 1168 static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a) 1169 { 1170 if (!dc_isar_feature(aa64_sme, s)) { 1171 return false; 1172 } 1173 if (sme_enabled_check(s)) { 1174 TCGv_i64 reg = cpu_reg(s, a->rd); 1175 tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s)); 1176 } 1177 return true; 1178 } 1179 1180 /* 1181 *** SVE Compute Vector Address Group 1182 */ 1183 1184 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn) 1185 { 1186 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm); 1187 } 1188 1189 TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32) 1190 TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64) 1191 TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32) 1192 TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32) 1193 1194 /* 1195 *** SVE Integer Misc - Unpredicated Group 1196 */ 1197 1198 static gen_helper_gvec_2 * const fexpa_fns[4] = { 1199 NULL, gen_helper_sve_fexpa_h, 1200 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d, 1201 }; 1202 TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz, 1203 fexpa_fns[a->esz], a->rd, a->rn, 0) 1204 1205 static gen_helper_gvec_3 * const ftssel_fns[4] = { 1206 NULL, gen_helper_sve_ftssel_h, 1207 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d, 1208 }; 1209 TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, 1210 ftssel_fns[a->esz], a, 0) 1211 1212 /* 1213 *** SVE Predicate Logical Operations Group 1214 */ 1215 1216 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a, 1217 const GVecGen4 *gvec_op) 1218 { 1219 if (!sve_access_check(s)) { 1220 return true; 1221 } 1222 1223 unsigned psz = pred_gvec_reg_size(s); 1224 int dofs = pred_full_reg_offset(s, a->rd); 1225 int nofs = pred_full_reg_offset(s, a->rn); 1226 int mofs = pred_full_reg_offset(s, a->rm); 1227 int gofs = pred_full_reg_offset(s, a->pg); 1228 1229 if (!a->s) { 1230 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1231 return true; 1232 } 1233 1234 if (psz == 8) { 1235 /* Do the operation and the flags generation in temps. */ 1236 TCGv_i64 pd = tcg_temp_new_i64(); 1237 TCGv_i64 pn = tcg_temp_new_i64(); 1238 TCGv_i64 pm = tcg_temp_new_i64(); 1239 TCGv_i64 pg = tcg_temp_new_i64(); 1240 1241 tcg_gen_ld_i64(pn, tcg_env, nofs); 1242 tcg_gen_ld_i64(pm, tcg_env, mofs); 1243 tcg_gen_ld_i64(pg, tcg_env, gofs); 1244 1245 gvec_op->fni8(pd, pn, pm, pg); 1246 tcg_gen_st_i64(pd, tcg_env, dofs); 1247 1248 do_predtest1(pd, pg); 1249 } else { 1250 /* The operation and flags generation is large. The computation 1251 * of the flags depends on the original contents of the guarding 1252 * predicate. If the destination overwrites the guarding predicate, 1253 * then the easiest way to get this right is to save a copy. 1254 */ 1255 int tofs = gofs; 1256 if (a->rd == a->pg) { 1257 tofs = offsetof(CPUARMState, vfp.preg_tmp); 1258 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz); 1259 } 1260 1261 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1262 do_predtest(s, dofs, tofs, psz / 8); 1263 } 1264 return true; 1265 } 1266 1267 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1268 { 1269 tcg_gen_and_i64(pd, pn, pm); 1270 tcg_gen_and_i64(pd, pd, pg); 1271 } 1272 1273 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1274 TCGv_vec pm, TCGv_vec pg) 1275 { 1276 tcg_gen_and_vec(vece, pd, pn, pm); 1277 tcg_gen_and_vec(vece, pd, pd, pg); 1278 } 1279 1280 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a) 1281 { 1282 static const GVecGen4 op = { 1283 .fni8 = gen_and_pg_i64, 1284 .fniv = gen_and_pg_vec, 1285 .fno = gen_helper_sve_and_pppp, 1286 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1287 }; 1288 1289 if (!dc_isar_feature(aa64_sve, s)) { 1290 return false; 1291 } 1292 if (!a->s) { 1293 if (a->rn == a->rm) { 1294 if (a->pg == a->rn) { 1295 return do_mov_p(s, a->rd, a->rn); 1296 } 1297 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg); 1298 } else if (a->pg == a->rn || a->pg == a->rm) { 1299 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm); 1300 } 1301 } 1302 return do_pppp_flags(s, a, &op); 1303 } 1304 1305 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1306 { 1307 tcg_gen_andc_i64(pd, pn, pm); 1308 tcg_gen_and_i64(pd, pd, pg); 1309 } 1310 1311 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1312 TCGv_vec pm, TCGv_vec pg) 1313 { 1314 tcg_gen_andc_vec(vece, pd, pn, pm); 1315 tcg_gen_and_vec(vece, pd, pd, pg); 1316 } 1317 1318 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a) 1319 { 1320 static const GVecGen4 op = { 1321 .fni8 = gen_bic_pg_i64, 1322 .fniv = gen_bic_pg_vec, 1323 .fno = gen_helper_sve_bic_pppp, 1324 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1325 }; 1326 1327 if (!dc_isar_feature(aa64_sve, s)) { 1328 return false; 1329 } 1330 if (!a->s && a->pg == a->rn) { 1331 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm); 1332 } 1333 return do_pppp_flags(s, a, &op); 1334 } 1335 1336 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1337 { 1338 tcg_gen_xor_i64(pd, pn, pm); 1339 tcg_gen_and_i64(pd, pd, pg); 1340 } 1341 1342 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1343 TCGv_vec pm, TCGv_vec pg) 1344 { 1345 tcg_gen_xor_vec(vece, pd, pn, pm); 1346 tcg_gen_and_vec(vece, pd, pd, pg); 1347 } 1348 1349 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a) 1350 { 1351 static const GVecGen4 op = { 1352 .fni8 = gen_eor_pg_i64, 1353 .fniv = gen_eor_pg_vec, 1354 .fno = gen_helper_sve_eor_pppp, 1355 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1356 }; 1357 1358 if (!dc_isar_feature(aa64_sve, s)) { 1359 return false; 1360 } 1361 /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */ 1362 if (!a->s && a->pg == a->rm) { 1363 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn); 1364 } 1365 return do_pppp_flags(s, a, &op); 1366 } 1367 1368 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a) 1369 { 1370 if (a->s || !dc_isar_feature(aa64_sve, s)) { 1371 return false; 1372 } 1373 if (sve_access_check(s)) { 1374 unsigned psz = pred_gvec_reg_size(s); 1375 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd), 1376 pred_full_reg_offset(s, a->pg), 1377 pred_full_reg_offset(s, a->rn), 1378 pred_full_reg_offset(s, a->rm), psz, psz); 1379 } 1380 return true; 1381 } 1382 1383 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1384 { 1385 tcg_gen_or_i64(pd, pn, pm); 1386 tcg_gen_and_i64(pd, pd, pg); 1387 } 1388 1389 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1390 TCGv_vec pm, TCGv_vec pg) 1391 { 1392 tcg_gen_or_vec(vece, pd, pn, pm); 1393 tcg_gen_and_vec(vece, pd, pd, pg); 1394 } 1395 1396 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a) 1397 { 1398 static const GVecGen4 op = { 1399 .fni8 = gen_orr_pg_i64, 1400 .fniv = gen_orr_pg_vec, 1401 .fno = gen_helper_sve_orr_pppp, 1402 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1403 }; 1404 1405 if (!dc_isar_feature(aa64_sve, s)) { 1406 return false; 1407 } 1408 if (!a->s && a->pg == a->rn && a->rn == a->rm) { 1409 return do_mov_p(s, a->rd, a->rn); 1410 } 1411 return do_pppp_flags(s, a, &op); 1412 } 1413 1414 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1415 { 1416 tcg_gen_orc_i64(pd, pn, pm); 1417 tcg_gen_and_i64(pd, pd, pg); 1418 } 1419 1420 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1421 TCGv_vec pm, TCGv_vec pg) 1422 { 1423 tcg_gen_orc_vec(vece, pd, pn, pm); 1424 tcg_gen_and_vec(vece, pd, pd, pg); 1425 } 1426 1427 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a) 1428 { 1429 static const GVecGen4 op = { 1430 .fni8 = gen_orn_pg_i64, 1431 .fniv = gen_orn_pg_vec, 1432 .fno = gen_helper_sve_orn_pppp, 1433 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1434 }; 1435 1436 if (!dc_isar_feature(aa64_sve, s)) { 1437 return false; 1438 } 1439 return do_pppp_flags(s, a, &op); 1440 } 1441 1442 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1443 { 1444 tcg_gen_or_i64(pd, pn, pm); 1445 tcg_gen_andc_i64(pd, pg, pd); 1446 } 1447 1448 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1449 TCGv_vec pm, TCGv_vec pg) 1450 { 1451 tcg_gen_or_vec(vece, pd, pn, pm); 1452 tcg_gen_andc_vec(vece, pd, pg, pd); 1453 } 1454 1455 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a) 1456 { 1457 static const GVecGen4 op = { 1458 .fni8 = gen_nor_pg_i64, 1459 .fniv = gen_nor_pg_vec, 1460 .fno = gen_helper_sve_nor_pppp, 1461 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1462 }; 1463 1464 if (!dc_isar_feature(aa64_sve, s)) { 1465 return false; 1466 } 1467 return do_pppp_flags(s, a, &op); 1468 } 1469 1470 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1471 { 1472 tcg_gen_and_i64(pd, pn, pm); 1473 tcg_gen_andc_i64(pd, pg, pd); 1474 } 1475 1476 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1477 TCGv_vec pm, TCGv_vec pg) 1478 { 1479 tcg_gen_and_vec(vece, pd, pn, pm); 1480 tcg_gen_andc_vec(vece, pd, pg, pd); 1481 } 1482 1483 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a) 1484 { 1485 static const GVecGen4 op = { 1486 .fni8 = gen_nand_pg_i64, 1487 .fniv = gen_nand_pg_vec, 1488 .fno = gen_helper_sve_nand_pppp, 1489 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1490 }; 1491 1492 if (!dc_isar_feature(aa64_sve, s)) { 1493 return false; 1494 } 1495 return do_pppp_flags(s, a, &op); 1496 } 1497 1498 /* 1499 *** SVE Predicate Misc Group 1500 */ 1501 1502 static bool trans_PTEST(DisasContext *s, arg_PTEST *a) 1503 { 1504 if (!dc_isar_feature(aa64_sve, s)) { 1505 return false; 1506 } 1507 if (sve_access_check(s)) { 1508 int nofs = pred_full_reg_offset(s, a->rn); 1509 int gofs = pred_full_reg_offset(s, a->pg); 1510 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8); 1511 1512 if (words == 1) { 1513 TCGv_i64 pn = tcg_temp_new_i64(); 1514 TCGv_i64 pg = tcg_temp_new_i64(); 1515 1516 tcg_gen_ld_i64(pn, tcg_env, nofs); 1517 tcg_gen_ld_i64(pg, tcg_env, gofs); 1518 do_predtest1(pn, pg); 1519 } else { 1520 do_predtest(s, nofs, gofs, words); 1521 } 1522 } 1523 return true; 1524 } 1525 1526 /* See the ARM pseudocode DecodePredCount. */ 1527 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz) 1528 { 1529 unsigned elements = fullsz >> esz; 1530 unsigned bound; 1531 1532 switch (pattern) { 1533 case 0x0: /* POW2 */ 1534 return pow2floor(elements); 1535 case 0x1: /* VL1 */ 1536 case 0x2: /* VL2 */ 1537 case 0x3: /* VL3 */ 1538 case 0x4: /* VL4 */ 1539 case 0x5: /* VL5 */ 1540 case 0x6: /* VL6 */ 1541 case 0x7: /* VL7 */ 1542 case 0x8: /* VL8 */ 1543 bound = pattern; 1544 break; 1545 case 0x9: /* VL16 */ 1546 case 0xa: /* VL32 */ 1547 case 0xb: /* VL64 */ 1548 case 0xc: /* VL128 */ 1549 case 0xd: /* VL256 */ 1550 bound = 16 << (pattern - 9); 1551 break; 1552 case 0x1d: /* MUL4 */ 1553 return elements - elements % 4; 1554 case 0x1e: /* MUL3 */ 1555 return elements - elements % 3; 1556 case 0x1f: /* ALL */ 1557 return elements; 1558 default: /* #uimm5 */ 1559 return 0; 1560 } 1561 return elements >= bound ? bound : 0; 1562 } 1563 1564 /* This handles all of the predicate initialization instructions, 1565 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32 1566 * so that decode_pred_count returns 0. For SETFFR, we will have 1567 * set RD == 16 == FFR. 1568 */ 1569 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag) 1570 { 1571 if (!sve_access_check(s)) { 1572 return true; 1573 } 1574 1575 unsigned fullsz = vec_full_reg_size(s); 1576 unsigned ofs = pred_full_reg_offset(s, rd); 1577 unsigned numelem, setsz, i; 1578 uint64_t word, lastword; 1579 TCGv_i64 t; 1580 1581 numelem = decode_pred_count(fullsz, pat, esz); 1582 1583 /* Determine what we must store into each bit, and how many. */ 1584 if (numelem == 0) { 1585 lastword = word = 0; 1586 setsz = fullsz; 1587 } else { 1588 setsz = numelem << esz; 1589 lastword = word = pred_esz_masks[esz]; 1590 if (setsz % 64) { 1591 lastword &= MAKE_64BIT_MASK(0, setsz % 64); 1592 } 1593 } 1594 1595 t = tcg_temp_new_i64(); 1596 if (fullsz <= 64) { 1597 tcg_gen_movi_i64(t, lastword); 1598 tcg_gen_st_i64(t, tcg_env, ofs); 1599 goto done; 1600 } 1601 1602 if (word == lastword) { 1603 unsigned maxsz = size_for_gvec(fullsz / 8); 1604 unsigned oprsz = size_for_gvec(setsz / 8); 1605 1606 if (oprsz * 8 == setsz) { 1607 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word); 1608 goto done; 1609 } 1610 } 1611 1612 setsz /= 8; 1613 fullsz /= 8; 1614 1615 tcg_gen_movi_i64(t, word); 1616 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) { 1617 tcg_gen_st_i64(t, tcg_env, ofs + i); 1618 } 1619 if (lastword != word) { 1620 tcg_gen_movi_i64(t, lastword); 1621 tcg_gen_st_i64(t, tcg_env, ofs + i); 1622 i += 8; 1623 } 1624 if (i < fullsz) { 1625 tcg_gen_movi_i64(t, 0); 1626 for (; i < fullsz; i += 8) { 1627 tcg_gen_st_i64(t, tcg_env, ofs + i); 1628 } 1629 } 1630 1631 done: 1632 /* PTRUES */ 1633 if (setflag) { 1634 tcg_gen_movi_i32(cpu_NF, -(word != 0)); 1635 tcg_gen_movi_i32(cpu_CF, word == 0); 1636 tcg_gen_movi_i32(cpu_VF, 0); 1637 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1638 } 1639 return true; 1640 } 1641 1642 TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s) 1643 1644 /* Note pat == 31 is #all, to set all elements. */ 1645 TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve, 1646 do_predset, 0, FFR_PRED_NUM, 31, false) 1647 1648 /* Note pat == 32 is #unimp, to set no elements. */ 1649 TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false) 1650 1651 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a) 1652 { 1653 /* The path through do_pppp_flags is complicated enough to want to avoid 1654 * duplication. Frob the arguments into the form of a predicated AND. 1655 */ 1656 arg_rprr_s alt_a = { 1657 .rd = a->rd, .pg = a->pg, .s = a->s, 1658 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM, 1659 }; 1660 1661 s->is_nonstreaming = true; 1662 return trans_AND_pppp(s, &alt_a); 1663 } 1664 1665 TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM) 1666 TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn) 1667 1668 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a, 1669 void (*gen_fn)(TCGv_i32, TCGv_ptr, 1670 TCGv_ptr, TCGv_i32)) 1671 { 1672 if (!sve_access_check(s)) { 1673 return true; 1674 } 1675 1676 TCGv_ptr t_pd = tcg_temp_new_ptr(); 1677 TCGv_ptr t_pg = tcg_temp_new_ptr(); 1678 TCGv_i32 t; 1679 unsigned desc = 0; 1680 1681 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 1682 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 1683 1684 tcg_gen_addi_ptr(t_pd, tcg_env, pred_full_reg_offset(s, a->rd)); 1685 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->rn)); 1686 t = tcg_temp_new_i32(); 1687 1688 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc)); 1689 1690 do_pred_flags(t); 1691 return true; 1692 } 1693 1694 TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst) 1695 TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext) 1696 1697 /* 1698 *** SVE Element Count Group 1699 */ 1700 1701 /* Perform an inline saturating addition of a 32-bit value within 1702 * a 64-bit register. The second operand is known to be positive, 1703 * which halves the comparisons we must perform to bound the result. 1704 */ 1705 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1706 { 1707 int64_t ibound; 1708 1709 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 1710 if (u) { 1711 tcg_gen_ext32u_i64(reg, reg); 1712 } else { 1713 tcg_gen_ext32s_i64(reg, reg); 1714 } 1715 if (d) { 1716 tcg_gen_sub_i64(reg, reg, val); 1717 ibound = (u ? 0 : INT32_MIN); 1718 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound)); 1719 } else { 1720 tcg_gen_add_i64(reg, reg, val); 1721 ibound = (u ? UINT32_MAX : INT32_MAX); 1722 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound)); 1723 } 1724 } 1725 1726 /* Similarly with 64-bit values. */ 1727 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1728 { 1729 TCGv_i64 t0 = tcg_temp_new_i64(); 1730 TCGv_i64 t2; 1731 1732 if (u) { 1733 if (d) { 1734 tcg_gen_sub_i64(t0, reg, val); 1735 t2 = tcg_constant_i64(0); 1736 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0); 1737 } else { 1738 tcg_gen_add_i64(t0, reg, val); 1739 t2 = tcg_constant_i64(-1); 1740 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0); 1741 } 1742 } else { 1743 TCGv_i64 t1 = tcg_temp_new_i64(); 1744 if (d) { 1745 /* Detect signed overflow for subtraction. */ 1746 tcg_gen_xor_i64(t0, reg, val); 1747 tcg_gen_sub_i64(t1, reg, val); 1748 tcg_gen_xor_i64(reg, reg, t1); 1749 tcg_gen_and_i64(t0, t0, reg); 1750 1751 /* Bound the result. */ 1752 tcg_gen_movi_i64(reg, INT64_MIN); 1753 t2 = tcg_constant_i64(0); 1754 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1); 1755 } else { 1756 /* Detect signed overflow for addition. */ 1757 tcg_gen_xor_i64(t0, reg, val); 1758 tcg_gen_add_i64(reg, reg, val); 1759 tcg_gen_xor_i64(t1, reg, val); 1760 tcg_gen_andc_i64(t0, t1, t0); 1761 1762 /* Bound the result. */ 1763 tcg_gen_movi_i64(t1, INT64_MAX); 1764 t2 = tcg_constant_i64(0); 1765 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg); 1766 } 1767 } 1768 } 1769 1770 /* Similarly with a vector and a scalar operand. */ 1771 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn, 1772 TCGv_i64 val, bool u, bool d) 1773 { 1774 unsigned vsz = vec_full_reg_size(s); 1775 TCGv_ptr dptr, nptr; 1776 TCGv_i32 t32, desc; 1777 TCGv_i64 t64; 1778 1779 dptr = tcg_temp_new_ptr(); 1780 nptr = tcg_temp_new_ptr(); 1781 tcg_gen_addi_ptr(dptr, tcg_env, vec_full_reg_offset(s, rd)); 1782 tcg_gen_addi_ptr(nptr, tcg_env, vec_full_reg_offset(s, rn)); 1783 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1784 1785 switch (esz) { 1786 case MO_8: 1787 t32 = tcg_temp_new_i32(); 1788 tcg_gen_extrl_i64_i32(t32, val); 1789 if (d) { 1790 tcg_gen_neg_i32(t32, t32); 1791 } 1792 if (u) { 1793 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc); 1794 } else { 1795 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc); 1796 } 1797 break; 1798 1799 case MO_16: 1800 t32 = tcg_temp_new_i32(); 1801 tcg_gen_extrl_i64_i32(t32, val); 1802 if (d) { 1803 tcg_gen_neg_i32(t32, t32); 1804 } 1805 if (u) { 1806 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc); 1807 } else { 1808 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc); 1809 } 1810 break; 1811 1812 case MO_32: 1813 t64 = tcg_temp_new_i64(); 1814 if (d) { 1815 tcg_gen_neg_i64(t64, val); 1816 } else { 1817 tcg_gen_mov_i64(t64, val); 1818 } 1819 if (u) { 1820 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc); 1821 } else { 1822 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc); 1823 } 1824 break; 1825 1826 case MO_64: 1827 if (u) { 1828 if (d) { 1829 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc); 1830 } else { 1831 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc); 1832 } 1833 } else if (d) { 1834 t64 = tcg_temp_new_i64(); 1835 tcg_gen_neg_i64(t64, val); 1836 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc); 1837 } else { 1838 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc); 1839 } 1840 break; 1841 1842 default: 1843 g_assert_not_reached(); 1844 } 1845 } 1846 1847 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a) 1848 { 1849 if (!dc_isar_feature(aa64_sve, s)) { 1850 return false; 1851 } 1852 if (sve_access_check(s)) { 1853 unsigned fullsz = vec_full_reg_size(s); 1854 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1855 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm); 1856 } 1857 return true; 1858 } 1859 1860 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a) 1861 { 1862 if (!dc_isar_feature(aa64_sve, s)) { 1863 return false; 1864 } 1865 if (sve_access_check(s)) { 1866 unsigned fullsz = vec_full_reg_size(s); 1867 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1868 int inc = numelem * a->imm * (a->d ? -1 : 1); 1869 TCGv_i64 reg = cpu_reg(s, a->rd); 1870 1871 tcg_gen_addi_i64(reg, reg, inc); 1872 } 1873 return true; 1874 } 1875 1876 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a) 1877 { 1878 if (!dc_isar_feature(aa64_sve, s)) { 1879 return false; 1880 } 1881 if (!sve_access_check(s)) { 1882 return true; 1883 } 1884 1885 unsigned fullsz = vec_full_reg_size(s); 1886 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1887 int inc = numelem * a->imm; 1888 TCGv_i64 reg = cpu_reg(s, a->rd); 1889 1890 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 1891 if (inc == 0) { 1892 if (a->u) { 1893 tcg_gen_ext32u_i64(reg, reg); 1894 } else { 1895 tcg_gen_ext32s_i64(reg, reg); 1896 } 1897 } else { 1898 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d); 1899 } 1900 return true; 1901 } 1902 1903 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a) 1904 { 1905 if (!dc_isar_feature(aa64_sve, s)) { 1906 return false; 1907 } 1908 if (!sve_access_check(s)) { 1909 return true; 1910 } 1911 1912 unsigned fullsz = vec_full_reg_size(s); 1913 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1914 int inc = numelem * a->imm; 1915 TCGv_i64 reg = cpu_reg(s, a->rd); 1916 1917 if (inc != 0) { 1918 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d); 1919 } 1920 return true; 1921 } 1922 1923 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 1924 { 1925 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 1926 return false; 1927 } 1928 1929 unsigned fullsz = vec_full_reg_size(s); 1930 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1931 int inc = numelem * a->imm; 1932 1933 if (inc != 0) { 1934 if (sve_access_check(s)) { 1935 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd), 1936 vec_full_reg_offset(s, a->rn), 1937 tcg_constant_i64(a->d ? -inc : inc), 1938 fullsz, fullsz); 1939 } 1940 } else { 1941 do_mov_z(s, a->rd, a->rn); 1942 } 1943 return true; 1944 } 1945 1946 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 1947 { 1948 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 1949 return false; 1950 } 1951 1952 unsigned fullsz = vec_full_reg_size(s); 1953 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1954 int inc = numelem * a->imm; 1955 1956 if (inc != 0) { 1957 if (sve_access_check(s)) { 1958 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 1959 tcg_constant_i64(inc), a->u, a->d); 1960 } 1961 } else { 1962 do_mov_z(s, a->rd, a->rn); 1963 } 1964 return true; 1965 } 1966 1967 /* 1968 *** SVE Bitwise Immediate Group 1969 */ 1970 1971 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn) 1972 { 1973 uint64_t imm; 1974 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 1975 extract32(a->dbm, 0, 6), 1976 extract32(a->dbm, 6, 6))) { 1977 return false; 1978 } 1979 return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm); 1980 } 1981 1982 TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi) 1983 TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori) 1984 TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori) 1985 1986 static bool trans_DUPM(DisasContext *s, arg_DUPM *a) 1987 { 1988 uint64_t imm; 1989 1990 if (!dc_isar_feature(aa64_sve, s)) { 1991 return false; 1992 } 1993 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 1994 extract32(a->dbm, 0, 6), 1995 extract32(a->dbm, 6, 6))) { 1996 return false; 1997 } 1998 if (sve_access_check(s)) { 1999 do_dupi_z(s, a->rd, imm); 2000 } 2001 return true; 2002 } 2003 2004 /* 2005 *** SVE Integer Wide Immediate - Predicated Group 2006 */ 2007 2008 /* Implement all merging copies. This is used for CPY (immediate), 2009 * FCPY, CPY (scalar), CPY (SIMD&FP scalar). 2010 */ 2011 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg, 2012 TCGv_i64 val) 2013 { 2014 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2015 static gen_cpy * const fns[4] = { 2016 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h, 2017 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d, 2018 }; 2019 unsigned vsz = vec_full_reg_size(s); 2020 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2021 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2022 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2023 TCGv_ptr t_pg = tcg_temp_new_ptr(); 2024 2025 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd)); 2026 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, rn)); 2027 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 2028 2029 fns[esz](t_zd, t_zn, t_pg, val, desc); 2030 } 2031 2032 static bool trans_FCPY(DisasContext *s, arg_FCPY *a) 2033 { 2034 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2035 return false; 2036 } 2037 if (sve_access_check(s)) { 2038 /* Decode the VFP immediate. */ 2039 uint64_t imm = vfp_expand_imm(a->esz, a->imm); 2040 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm)); 2041 } 2042 return true; 2043 } 2044 2045 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a) 2046 { 2047 if (!dc_isar_feature(aa64_sve, s)) { 2048 return false; 2049 } 2050 if (sve_access_check(s)) { 2051 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm)); 2052 } 2053 return true; 2054 } 2055 2056 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a) 2057 { 2058 static gen_helper_gvec_2i * const fns[4] = { 2059 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h, 2060 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d, 2061 }; 2062 2063 if (!dc_isar_feature(aa64_sve, s)) { 2064 return false; 2065 } 2066 if (sve_access_check(s)) { 2067 unsigned vsz = vec_full_reg_size(s); 2068 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 2069 pred_full_reg_offset(s, a->pg), 2070 tcg_constant_i64(a->imm), 2071 vsz, vsz, 0, fns[a->esz]); 2072 } 2073 return true; 2074 } 2075 2076 /* 2077 *** SVE Permute Extract Group 2078 */ 2079 2080 static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm) 2081 { 2082 if (!sve_access_check(s)) { 2083 return true; 2084 } 2085 2086 unsigned vsz = vec_full_reg_size(s); 2087 unsigned n_ofs = imm >= vsz ? 0 : imm; 2088 unsigned n_siz = vsz - n_ofs; 2089 unsigned d = vec_full_reg_offset(s, rd); 2090 unsigned n = vec_full_reg_offset(s, rn); 2091 unsigned m = vec_full_reg_offset(s, rm); 2092 2093 /* Use host vector move insns if we have appropriate sizes 2094 * and no unfortunate overlap. 2095 */ 2096 if (m != d 2097 && n_ofs == size_for_gvec(n_ofs) 2098 && n_siz == size_for_gvec(n_siz) 2099 && (d != n || n_siz <= n_ofs)) { 2100 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz); 2101 if (n_ofs != 0) { 2102 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs); 2103 } 2104 } else { 2105 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext); 2106 } 2107 return true; 2108 } 2109 2110 TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm) 2111 TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm) 2112 2113 /* 2114 *** SVE Permute - Unpredicated Group 2115 */ 2116 2117 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a) 2118 { 2119 if (!dc_isar_feature(aa64_sve, s)) { 2120 return false; 2121 } 2122 if (sve_access_check(s)) { 2123 unsigned vsz = vec_full_reg_size(s); 2124 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd), 2125 vsz, vsz, cpu_reg_sp(s, a->rn)); 2126 } 2127 return true; 2128 } 2129 2130 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a) 2131 { 2132 if (!dc_isar_feature(aa64_sve, s)) { 2133 return false; 2134 } 2135 if ((a->imm & 0x1f) == 0) { 2136 return false; 2137 } 2138 if (sve_access_check(s)) { 2139 unsigned vsz = vec_full_reg_size(s); 2140 unsigned dofs = vec_full_reg_offset(s, a->rd); 2141 unsigned esz, index; 2142 2143 esz = ctz32(a->imm); 2144 index = a->imm >> (esz + 1); 2145 2146 if ((index << esz) < vsz) { 2147 unsigned nofs = vec_reg_offset(s, a->rn, index, esz); 2148 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz); 2149 } else { 2150 /* 2151 * While dup_mem handles 128-bit elements, dup_imm does not. 2152 * Thankfully element size doesn't matter for splatting zero. 2153 */ 2154 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); 2155 } 2156 } 2157 return true; 2158 } 2159 2160 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val) 2161 { 2162 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2163 static gen_insr * const fns[4] = { 2164 gen_helper_sve_insr_b, gen_helper_sve_insr_h, 2165 gen_helper_sve_insr_s, gen_helper_sve_insr_d, 2166 }; 2167 unsigned vsz = vec_full_reg_size(s); 2168 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2169 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2170 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2171 2172 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, a->rd)); 2173 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); 2174 2175 fns[a->esz](t_zd, t_zn, val, desc); 2176 } 2177 2178 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a) 2179 { 2180 if (!dc_isar_feature(aa64_sve, s)) { 2181 return false; 2182 } 2183 if (sve_access_check(s)) { 2184 TCGv_i64 t = tcg_temp_new_i64(); 2185 tcg_gen_ld_i64(t, tcg_env, vec_reg_offset(s, a->rm, 0, MO_64)); 2186 do_insr_i64(s, a, t); 2187 } 2188 return true; 2189 } 2190 2191 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a) 2192 { 2193 if (!dc_isar_feature(aa64_sve, s)) { 2194 return false; 2195 } 2196 if (sve_access_check(s)) { 2197 do_insr_i64(s, a, cpu_reg(s, a->rm)); 2198 } 2199 return true; 2200 } 2201 2202 static gen_helper_gvec_2 * const rev_fns[4] = { 2203 gen_helper_sve_rev_b, gen_helper_sve_rev_h, 2204 gen_helper_sve_rev_s, gen_helper_sve_rev_d 2205 }; 2206 TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0) 2207 2208 static gen_helper_gvec_3 * const sve_tbl_fns[4] = { 2209 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h, 2210 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d 2211 }; 2212 TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0) 2213 2214 static gen_helper_gvec_4 * const sve2_tbl_fns[4] = { 2215 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h, 2216 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d 2217 }; 2218 TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz], 2219 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0) 2220 2221 static gen_helper_gvec_3 * const tbx_fns[4] = { 2222 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h, 2223 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d 2224 }; 2225 TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0) 2226 2227 static bool trans_UNPK(DisasContext *s, arg_UNPK *a) 2228 { 2229 static gen_helper_gvec_2 * const fns[4][2] = { 2230 { NULL, NULL }, 2231 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h }, 2232 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s }, 2233 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d }, 2234 }; 2235 2236 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2237 return false; 2238 } 2239 if (sve_access_check(s)) { 2240 unsigned vsz = vec_full_reg_size(s); 2241 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd), 2242 vec_full_reg_offset(s, a->rn) 2243 + (a->h ? vsz / 2 : 0), 2244 vsz, vsz, 0, fns[a->esz][a->u]); 2245 } 2246 return true; 2247 } 2248 2249 /* 2250 *** SVE Permute - Predicates Group 2251 */ 2252 2253 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd, 2254 gen_helper_gvec_3 *fn) 2255 { 2256 if (!sve_access_check(s)) { 2257 return true; 2258 } 2259 2260 unsigned vsz = pred_full_reg_size(s); 2261 2262 TCGv_ptr t_d = tcg_temp_new_ptr(); 2263 TCGv_ptr t_n = tcg_temp_new_ptr(); 2264 TCGv_ptr t_m = tcg_temp_new_ptr(); 2265 uint32_t desc = 0; 2266 2267 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2268 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2269 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2270 2271 tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd)); 2272 tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn)); 2273 tcg_gen_addi_ptr(t_m, tcg_env, pred_full_reg_offset(s, a->rm)); 2274 2275 fn(t_d, t_n, t_m, tcg_constant_i32(desc)); 2276 return true; 2277 } 2278 2279 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd, 2280 gen_helper_gvec_2 *fn) 2281 { 2282 if (!sve_access_check(s)) { 2283 return true; 2284 } 2285 2286 unsigned vsz = pred_full_reg_size(s); 2287 TCGv_ptr t_d = tcg_temp_new_ptr(); 2288 TCGv_ptr t_n = tcg_temp_new_ptr(); 2289 uint32_t desc = 0; 2290 2291 tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd)); 2292 tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn)); 2293 2294 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2295 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2296 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2297 2298 fn(t_d, t_n, tcg_constant_i32(desc)); 2299 return true; 2300 } 2301 2302 TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p) 2303 TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p) 2304 TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p) 2305 TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p) 2306 TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p) 2307 TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p) 2308 2309 TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p) 2310 TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p) 2311 TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p) 2312 2313 /* 2314 *** SVE Permute - Interleaving Group 2315 */ 2316 2317 static gen_helper_gvec_3 * const zip_fns[4] = { 2318 gen_helper_sve_zip_b, gen_helper_sve_zip_h, 2319 gen_helper_sve_zip_s, gen_helper_sve_zip_d, 2320 }; 2321 TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2322 zip_fns[a->esz], a, 0) 2323 TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2324 zip_fns[a->esz], a, vec_full_reg_size(s) / 2) 2325 2326 TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2327 gen_helper_sve2_zip_q, a, 0) 2328 TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2329 gen_helper_sve2_zip_q, a, 2330 QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2) 2331 2332 static gen_helper_gvec_3 * const uzp_fns[4] = { 2333 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h, 2334 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d, 2335 }; 2336 2337 TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2338 uzp_fns[a->esz], a, 0) 2339 TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2340 uzp_fns[a->esz], a, 1 << a->esz) 2341 2342 TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2343 gen_helper_sve2_uzp_q, a, 0) 2344 TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2345 gen_helper_sve2_uzp_q, a, 16) 2346 2347 static gen_helper_gvec_3 * const trn_fns[4] = { 2348 gen_helper_sve_trn_b, gen_helper_sve_trn_h, 2349 gen_helper_sve_trn_s, gen_helper_sve_trn_d, 2350 }; 2351 2352 TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2353 trn_fns[a->esz], a, 0) 2354 TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2355 trn_fns[a->esz], a, 1 << a->esz) 2356 2357 TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2358 gen_helper_sve2_trn_q, a, 0) 2359 TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2360 gen_helper_sve2_trn_q, a, 16) 2361 2362 /* 2363 *** SVE Permute Vector - Predicated Group 2364 */ 2365 2366 static gen_helper_gvec_3 * const compact_fns[4] = { 2367 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d 2368 }; 2369 TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, 2370 compact_fns[a->esz], a, 0) 2371 2372 /* Call the helper that computes the ARM LastActiveElement pseudocode 2373 * function, scaled by the element size. This includes the not found 2374 * indication; e.g. not found for esz=3 is -8. 2375 */ 2376 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg) 2377 { 2378 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot 2379 * round up, as we do elsewhere, because we need the exact size. 2380 */ 2381 TCGv_ptr t_p = tcg_temp_new_ptr(); 2382 unsigned desc = 0; 2383 2384 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 2385 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 2386 2387 tcg_gen_addi_ptr(t_p, tcg_env, pred_full_reg_offset(s, pg)); 2388 2389 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc)); 2390 } 2391 2392 /* Increment LAST to the offset of the next element in the vector, 2393 * wrapping around to 0. 2394 */ 2395 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz) 2396 { 2397 unsigned vsz = vec_full_reg_size(s); 2398 2399 tcg_gen_addi_i32(last, last, 1 << esz); 2400 if (is_power_of_2(vsz)) { 2401 tcg_gen_andi_i32(last, last, vsz - 1); 2402 } else { 2403 TCGv_i32 max = tcg_constant_i32(vsz); 2404 TCGv_i32 zero = tcg_constant_i32(0); 2405 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last); 2406 } 2407 } 2408 2409 /* If LAST < 0, set LAST to the offset of the last element in the vector. */ 2410 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz) 2411 { 2412 unsigned vsz = vec_full_reg_size(s); 2413 2414 if (is_power_of_2(vsz)) { 2415 tcg_gen_andi_i32(last, last, vsz - 1); 2416 } else { 2417 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz)); 2418 TCGv_i32 zero = tcg_constant_i32(0); 2419 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last); 2420 } 2421 } 2422 2423 /* Load an unsigned element of ESZ from BASE+OFS. */ 2424 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz) 2425 { 2426 TCGv_i64 r = tcg_temp_new_i64(); 2427 2428 switch (esz) { 2429 case 0: 2430 tcg_gen_ld8u_i64(r, base, ofs); 2431 break; 2432 case 1: 2433 tcg_gen_ld16u_i64(r, base, ofs); 2434 break; 2435 case 2: 2436 tcg_gen_ld32u_i64(r, base, ofs); 2437 break; 2438 case 3: 2439 tcg_gen_ld_i64(r, base, ofs); 2440 break; 2441 default: 2442 g_assert_not_reached(); 2443 } 2444 return r; 2445 } 2446 2447 /* Load an unsigned element of ESZ from RM[LAST]. */ 2448 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last, 2449 int rm, int esz) 2450 { 2451 TCGv_ptr p = tcg_temp_new_ptr(); 2452 2453 /* Convert offset into vector into offset into ENV. 2454 * The final adjustment for the vector register base 2455 * is added via constant offset to the load. 2456 */ 2457 #if HOST_BIG_ENDIAN 2458 /* Adjust for element ordering. See vec_reg_offset. */ 2459 if (esz < 3) { 2460 tcg_gen_xori_i32(last, last, 8 - (1 << esz)); 2461 } 2462 #endif 2463 tcg_gen_ext_i32_ptr(p, last); 2464 tcg_gen_add_ptr(p, p, tcg_env); 2465 2466 return load_esz(p, vec_full_reg_offset(s, rm), esz); 2467 } 2468 2469 /* Compute CLAST for a Zreg. */ 2470 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before) 2471 { 2472 TCGv_i32 last; 2473 TCGLabel *over; 2474 TCGv_i64 ele; 2475 unsigned vsz, esz = a->esz; 2476 2477 if (!sve_access_check(s)) { 2478 return true; 2479 } 2480 2481 last = tcg_temp_new_i32(); 2482 over = gen_new_label(); 2483 2484 find_last_active(s, last, esz, a->pg); 2485 2486 /* There is of course no movcond for a 2048-bit vector, 2487 * so we must branch over the actual store. 2488 */ 2489 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over); 2490 2491 if (!before) { 2492 incr_last_active(s, last, esz); 2493 } 2494 2495 ele = load_last_active(s, last, a->rm, esz); 2496 2497 vsz = vec_full_reg_size(s); 2498 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele); 2499 2500 /* If this insn used MOVPRFX, we may need a second move. */ 2501 if (a->rd != a->rn) { 2502 TCGLabel *done = gen_new_label(); 2503 tcg_gen_br(done); 2504 2505 gen_set_label(over); 2506 do_mov_z(s, a->rd, a->rn); 2507 2508 gen_set_label(done); 2509 } else { 2510 gen_set_label(over); 2511 } 2512 return true; 2513 } 2514 2515 TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false) 2516 TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true) 2517 2518 /* Compute CLAST for a scalar. */ 2519 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm, 2520 bool before, TCGv_i64 reg_val) 2521 { 2522 TCGv_i32 last = tcg_temp_new_i32(); 2523 TCGv_i64 ele, cmp; 2524 2525 find_last_active(s, last, esz, pg); 2526 2527 /* Extend the original value of last prior to incrementing. */ 2528 cmp = tcg_temp_new_i64(); 2529 tcg_gen_ext_i32_i64(cmp, last); 2530 2531 if (!before) { 2532 incr_last_active(s, last, esz); 2533 } 2534 2535 /* The conceit here is that while last < 0 indicates not found, after 2536 * adjusting for tcg_env->vfp.zregs[rm], it is still a valid address 2537 * from which we can load garbage. We then discard the garbage with 2538 * a conditional move. 2539 */ 2540 ele = load_last_active(s, last, rm, esz); 2541 2542 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0), 2543 ele, reg_val); 2544 } 2545 2546 /* Compute CLAST for a Vreg. */ 2547 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2548 { 2549 if (sve_access_check(s)) { 2550 int esz = a->esz; 2551 int ofs = vec_reg_offset(s, a->rd, 0, esz); 2552 TCGv_i64 reg = load_esz(tcg_env, ofs, esz); 2553 2554 do_clast_scalar(s, esz, a->pg, a->rn, before, reg); 2555 write_fp_dreg(s, a->rd, reg); 2556 } 2557 return true; 2558 } 2559 2560 TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false) 2561 TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true) 2562 2563 /* Compute CLAST for a Xreg. */ 2564 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before) 2565 { 2566 TCGv_i64 reg; 2567 2568 if (!sve_access_check(s)) { 2569 return true; 2570 } 2571 2572 reg = cpu_reg(s, a->rd); 2573 switch (a->esz) { 2574 case 0: 2575 tcg_gen_ext8u_i64(reg, reg); 2576 break; 2577 case 1: 2578 tcg_gen_ext16u_i64(reg, reg); 2579 break; 2580 case 2: 2581 tcg_gen_ext32u_i64(reg, reg); 2582 break; 2583 case 3: 2584 break; 2585 default: 2586 g_assert_not_reached(); 2587 } 2588 2589 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg); 2590 return true; 2591 } 2592 2593 TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false) 2594 TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true) 2595 2596 /* Compute LAST for a scalar. */ 2597 static TCGv_i64 do_last_scalar(DisasContext *s, int esz, 2598 int pg, int rm, bool before) 2599 { 2600 TCGv_i32 last = tcg_temp_new_i32(); 2601 2602 find_last_active(s, last, esz, pg); 2603 if (before) { 2604 wrap_last_active(s, last, esz); 2605 } else { 2606 incr_last_active(s, last, esz); 2607 } 2608 2609 return load_last_active(s, last, rm, esz); 2610 } 2611 2612 /* Compute LAST for a Vreg. */ 2613 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2614 { 2615 if (sve_access_check(s)) { 2616 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2617 write_fp_dreg(s, a->rd, val); 2618 } 2619 return true; 2620 } 2621 2622 TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false) 2623 TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true) 2624 2625 /* Compute LAST for a Xreg. */ 2626 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before) 2627 { 2628 if (sve_access_check(s)) { 2629 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2630 tcg_gen_mov_i64(cpu_reg(s, a->rd), val); 2631 } 2632 return true; 2633 } 2634 2635 TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false) 2636 TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true) 2637 2638 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a) 2639 { 2640 if (!dc_isar_feature(aa64_sve, s)) { 2641 return false; 2642 } 2643 if (sve_access_check(s)) { 2644 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn)); 2645 } 2646 return true; 2647 } 2648 2649 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a) 2650 { 2651 if (!dc_isar_feature(aa64_sve, s)) { 2652 return false; 2653 } 2654 if (sve_access_check(s)) { 2655 int ofs = vec_reg_offset(s, a->rn, 0, a->esz); 2656 TCGv_i64 t = load_esz(tcg_env, ofs, a->esz); 2657 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t); 2658 } 2659 return true; 2660 } 2661 2662 static gen_helper_gvec_3 * const revb_fns[4] = { 2663 NULL, gen_helper_sve_revb_h, 2664 gen_helper_sve_revb_s, gen_helper_sve_revb_d, 2665 }; 2666 TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0) 2667 2668 static gen_helper_gvec_3 * const revh_fns[4] = { 2669 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d, 2670 }; 2671 TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0) 2672 2673 TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz, 2674 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0) 2675 2676 TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0) 2677 2678 TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz, 2679 gen_helper_sve_splice, a, a->esz) 2680 2681 TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice, 2682 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz) 2683 2684 /* 2685 *** SVE Integer Compare - Vectors Group 2686 */ 2687 2688 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a, 2689 gen_helper_gvec_flags_4 *gen_fn) 2690 { 2691 TCGv_ptr pd, zn, zm, pg; 2692 unsigned vsz; 2693 TCGv_i32 t; 2694 2695 if (gen_fn == NULL) { 2696 return false; 2697 } 2698 if (!sve_access_check(s)) { 2699 return true; 2700 } 2701 2702 vsz = vec_full_reg_size(s); 2703 t = tcg_temp_new_i32(); 2704 pd = tcg_temp_new_ptr(); 2705 zn = tcg_temp_new_ptr(); 2706 zm = tcg_temp_new_ptr(); 2707 pg = tcg_temp_new_ptr(); 2708 2709 tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd)); 2710 tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn)); 2711 tcg_gen_addi_ptr(zm, tcg_env, vec_full_reg_offset(s, a->rm)); 2712 tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg)); 2713 2714 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0))); 2715 2716 do_pred_flags(t); 2717 return true; 2718 } 2719 2720 #define DO_PPZZ(NAME, name) \ 2721 static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \ 2722 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \ 2723 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \ 2724 }; \ 2725 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \ 2726 a, name##_ppzz_fns[a->esz]) 2727 2728 DO_PPZZ(CMPEQ, cmpeq) 2729 DO_PPZZ(CMPNE, cmpne) 2730 DO_PPZZ(CMPGT, cmpgt) 2731 DO_PPZZ(CMPGE, cmpge) 2732 DO_PPZZ(CMPHI, cmphi) 2733 DO_PPZZ(CMPHS, cmphs) 2734 2735 #undef DO_PPZZ 2736 2737 #define DO_PPZW(NAME, name) \ 2738 static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \ 2739 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \ 2740 gen_helper_sve_##name##_ppzw_s, NULL \ 2741 }; \ 2742 TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \ 2743 a, name##_ppzw_fns[a->esz]) 2744 2745 DO_PPZW(CMPEQ, cmpeq) 2746 DO_PPZW(CMPNE, cmpne) 2747 DO_PPZW(CMPGT, cmpgt) 2748 DO_PPZW(CMPGE, cmpge) 2749 DO_PPZW(CMPHI, cmphi) 2750 DO_PPZW(CMPHS, cmphs) 2751 DO_PPZW(CMPLT, cmplt) 2752 DO_PPZW(CMPLE, cmple) 2753 DO_PPZW(CMPLO, cmplo) 2754 DO_PPZW(CMPLS, cmpls) 2755 2756 #undef DO_PPZW 2757 2758 /* 2759 *** SVE Integer Compare - Immediate Groups 2760 */ 2761 2762 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a, 2763 gen_helper_gvec_flags_3 *gen_fn) 2764 { 2765 TCGv_ptr pd, zn, pg; 2766 unsigned vsz; 2767 TCGv_i32 t; 2768 2769 if (gen_fn == NULL) { 2770 return false; 2771 } 2772 if (!sve_access_check(s)) { 2773 return true; 2774 } 2775 2776 vsz = vec_full_reg_size(s); 2777 t = tcg_temp_new_i32(); 2778 pd = tcg_temp_new_ptr(); 2779 zn = tcg_temp_new_ptr(); 2780 pg = tcg_temp_new_ptr(); 2781 2782 tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd)); 2783 tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn)); 2784 tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg)); 2785 2786 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm))); 2787 2788 do_pred_flags(t); 2789 return true; 2790 } 2791 2792 #define DO_PPZI(NAME, name) \ 2793 static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \ 2794 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \ 2795 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \ 2796 }; \ 2797 TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \ 2798 name##_ppzi_fns[a->esz]) 2799 2800 DO_PPZI(CMPEQ, cmpeq) 2801 DO_PPZI(CMPNE, cmpne) 2802 DO_PPZI(CMPGT, cmpgt) 2803 DO_PPZI(CMPGE, cmpge) 2804 DO_PPZI(CMPHI, cmphi) 2805 DO_PPZI(CMPHS, cmphs) 2806 DO_PPZI(CMPLT, cmplt) 2807 DO_PPZI(CMPLE, cmple) 2808 DO_PPZI(CMPLO, cmplo) 2809 DO_PPZI(CMPLS, cmpls) 2810 2811 #undef DO_PPZI 2812 2813 /* 2814 *** SVE Partition Break Group 2815 */ 2816 2817 static bool do_brk3(DisasContext *s, arg_rprr_s *a, 2818 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s) 2819 { 2820 if (!sve_access_check(s)) { 2821 return true; 2822 } 2823 2824 unsigned vsz = pred_full_reg_size(s); 2825 2826 /* Predicate sizes may be smaller and cannot use simd_desc. */ 2827 TCGv_ptr d = tcg_temp_new_ptr(); 2828 TCGv_ptr n = tcg_temp_new_ptr(); 2829 TCGv_ptr m = tcg_temp_new_ptr(); 2830 TCGv_ptr g = tcg_temp_new_ptr(); 2831 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 2832 2833 tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd)); 2834 tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn)); 2835 tcg_gen_addi_ptr(m, tcg_env, pred_full_reg_offset(s, a->rm)); 2836 tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg)); 2837 2838 if (a->s) { 2839 TCGv_i32 t = tcg_temp_new_i32(); 2840 fn_s(t, d, n, m, g, desc); 2841 do_pred_flags(t); 2842 } else { 2843 fn(d, n, m, g, desc); 2844 } 2845 return true; 2846 } 2847 2848 static bool do_brk2(DisasContext *s, arg_rpr_s *a, 2849 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s) 2850 { 2851 if (!sve_access_check(s)) { 2852 return true; 2853 } 2854 2855 unsigned vsz = pred_full_reg_size(s); 2856 2857 /* Predicate sizes may be smaller and cannot use simd_desc. */ 2858 TCGv_ptr d = tcg_temp_new_ptr(); 2859 TCGv_ptr n = tcg_temp_new_ptr(); 2860 TCGv_ptr g = tcg_temp_new_ptr(); 2861 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 2862 2863 tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd)); 2864 tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn)); 2865 tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg)); 2866 2867 if (a->s) { 2868 TCGv_i32 t = tcg_temp_new_i32(); 2869 fn_s(t, d, n, g, desc); 2870 do_pred_flags(t); 2871 } else { 2872 fn(d, n, g, desc); 2873 } 2874 return true; 2875 } 2876 2877 TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a, 2878 gen_helper_sve_brkpa, gen_helper_sve_brkpas) 2879 TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a, 2880 gen_helper_sve_brkpb, gen_helper_sve_brkpbs) 2881 2882 TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a, 2883 gen_helper_sve_brka_m, gen_helper_sve_brkas_m) 2884 TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a, 2885 gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m) 2886 2887 TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a, 2888 gen_helper_sve_brka_z, gen_helper_sve_brkas_z) 2889 TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a, 2890 gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z) 2891 2892 TRANS_FEAT(BRKN, aa64_sve, do_brk2, a, 2893 gen_helper_sve_brkn, gen_helper_sve_brkns) 2894 2895 /* 2896 *** SVE Predicate Count Group 2897 */ 2898 2899 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg) 2900 { 2901 unsigned psz = pred_full_reg_size(s); 2902 2903 if (psz <= 8) { 2904 uint64_t psz_mask; 2905 2906 tcg_gen_ld_i64(val, tcg_env, pred_full_reg_offset(s, pn)); 2907 if (pn != pg) { 2908 TCGv_i64 g = tcg_temp_new_i64(); 2909 tcg_gen_ld_i64(g, tcg_env, pred_full_reg_offset(s, pg)); 2910 tcg_gen_and_i64(val, val, g); 2911 } 2912 2913 /* Reduce the pred_esz_masks value simply to reduce the 2914 * size of the code generated here. 2915 */ 2916 psz_mask = MAKE_64BIT_MASK(0, psz * 8); 2917 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask); 2918 2919 tcg_gen_ctpop_i64(val, val); 2920 } else { 2921 TCGv_ptr t_pn = tcg_temp_new_ptr(); 2922 TCGv_ptr t_pg = tcg_temp_new_ptr(); 2923 unsigned desc = 0; 2924 2925 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz); 2926 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 2927 2928 tcg_gen_addi_ptr(t_pn, tcg_env, pred_full_reg_offset(s, pn)); 2929 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 2930 2931 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc)); 2932 } 2933 } 2934 2935 static bool trans_CNTP(DisasContext *s, arg_CNTP *a) 2936 { 2937 if (!dc_isar_feature(aa64_sve, s)) { 2938 return false; 2939 } 2940 if (sve_access_check(s)) { 2941 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg); 2942 } 2943 return true; 2944 } 2945 2946 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a) 2947 { 2948 if (!dc_isar_feature(aa64_sve, s)) { 2949 return false; 2950 } 2951 if (sve_access_check(s)) { 2952 TCGv_i64 reg = cpu_reg(s, a->rd); 2953 TCGv_i64 val = tcg_temp_new_i64(); 2954 2955 do_cntp(s, val, a->esz, a->pg, a->pg); 2956 if (a->d) { 2957 tcg_gen_sub_i64(reg, reg, val); 2958 } else { 2959 tcg_gen_add_i64(reg, reg, val); 2960 } 2961 } 2962 return true; 2963 } 2964 2965 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a) 2966 { 2967 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2968 return false; 2969 } 2970 if (sve_access_check(s)) { 2971 unsigned vsz = vec_full_reg_size(s); 2972 TCGv_i64 val = tcg_temp_new_i64(); 2973 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds; 2974 2975 do_cntp(s, val, a->esz, a->pg, a->pg); 2976 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 2977 vec_full_reg_offset(s, a->rn), val, vsz, vsz); 2978 } 2979 return true; 2980 } 2981 2982 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a) 2983 { 2984 if (!dc_isar_feature(aa64_sve, s)) { 2985 return false; 2986 } 2987 if (sve_access_check(s)) { 2988 TCGv_i64 reg = cpu_reg(s, a->rd); 2989 TCGv_i64 val = tcg_temp_new_i64(); 2990 2991 do_cntp(s, val, a->esz, a->pg, a->pg); 2992 do_sat_addsub_32(reg, val, a->u, a->d); 2993 } 2994 return true; 2995 } 2996 2997 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a) 2998 { 2999 if (!dc_isar_feature(aa64_sve, s)) { 3000 return false; 3001 } 3002 if (sve_access_check(s)) { 3003 TCGv_i64 reg = cpu_reg(s, a->rd); 3004 TCGv_i64 val = tcg_temp_new_i64(); 3005 3006 do_cntp(s, val, a->esz, a->pg, a->pg); 3007 do_sat_addsub_64(reg, val, a->u, a->d); 3008 } 3009 return true; 3010 } 3011 3012 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3013 { 3014 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3015 return false; 3016 } 3017 if (sve_access_check(s)) { 3018 TCGv_i64 val = tcg_temp_new_i64(); 3019 do_cntp(s, val, a->esz, a->pg, a->pg); 3020 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d); 3021 } 3022 return true; 3023 } 3024 3025 /* 3026 *** SVE Integer Compare Scalars Group 3027 */ 3028 3029 static bool trans_CTERM(DisasContext *s, arg_CTERM *a) 3030 { 3031 if (!dc_isar_feature(aa64_sve, s)) { 3032 return false; 3033 } 3034 if (!sve_access_check(s)) { 3035 return true; 3036 } 3037 3038 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ); 3039 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf); 3040 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf); 3041 TCGv_i64 cmp = tcg_temp_new_i64(); 3042 3043 tcg_gen_setcond_i64(cond, cmp, rn, rm); 3044 tcg_gen_extrl_i64_i32(cpu_NF, cmp); 3045 3046 /* VF = !NF & !CF. */ 3047 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1); 3048 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF); 3049 3050 /* Both NF and VF actually look at bit 31. */ 3051 tcg_gen_neg_i32(cpu_NF, cpu_NF); 3052 tcg_gen_neg_i32(cpu_VF, cpu_VF); 3053 return true; 3054 } 3055 3056 static bool trans_WHILE(DisasContext *s, arg_WHILE *a) 3057 { 3058 TCGv_i64 op0, op1, t0, t1, tmax; 3059 TCGv_i32 t2; 3060 TCGv_ptr ptr; 3061 unsigned vsz = vec_full_reg_size(s); 3062 unsigned desc = 0; 3063 TCGCond cond; 3064 uint64_t maxval; 3065 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */ 3066 bool eq = a->eq == a->lt; 3067 3068 /* The greater-than conditions are all SVE2. */ 3069 if (a->lt 3070 ? !dc_isar_feature(aa64_sve, s) 3071 : !dc_isar_feature(aa64_sve2, s)) { 3072 return false; 3073 } 3074 if (!sve_access_check(s)) { 3075 return true; 3076 } 3077 3078 op0 = read_cpu_reg(s, a->rn, 1); 3079 op1 = read_cpu_reg(s, a->rm, 1); 3080 3081 if (!a->sf) { 3082 if (a->u) { 3083 tcg_gen_ext32u_i64(op0, op0); 3084 tcg_gen_ext32u_i64(op1, op1); 3085 } else { 3086 tcg_gen_ext32s_i64(op0, op0); 3087 tcg_gen_ext32s_i64(op1, op1); 3088 } 3089 } 3090 3091 /* For the helper, compress the different conditions into a computation 3092 * of how many iterations for which the condition is true. 3093 */ 3094 t0 = tcg_temp_new_i64(); 3095 t1 = tcg_temp_new_i64(); 3096 3097 if (a->lt) { 3098 tcg_gen_sub_i64(t0, op1, op0); 3099 if (a->u) { 3100 maxval = a->sf ? UINT64_MAX : UINT32_MAX; 3101 cond = eq ? TCG_COND_LEU : TCG_COND_LTU; 3102 } else { 3103 maxval = a->sf ? INT64_MAX : INT32_MAX; 3104 cond = eq ? TCG_COND_LE : TCG_COND_LT; 3105 } 3106 } else { 3107 tcg_gen_sub_i64(t0, op0, op1); 3108 if (a->u) { 3109 maxval = 0; 3110 cond = eq ? TCG_COND_GEU : TCG_COND_GTU; 3111 } else { 3112 maxval = a->sf ? INT64_MIN : INT32_MIN; 3113 cond = eq ? TCG_COND_GE : TCG_COND_GT; 3114 } 3115 } 3116 3117 tmax = tcg_constant_i64(vsz >> a->esz); 3118 if (eq) { 3119 /* Equality means one more iteration. */ 3120 tcg_gen_addi_i64(t0, t0, 1); 3121 3122 /* 3123 * For the less-than while, if op1 is maxval (and the only time 3124 * the addition above could overflow), then we produce an all-true 3125 * predicate by setting the count to the vector length. This is 3126 * because the pseudocode is described as an increment + compare 3127 * loop, and the maximum integer would always compare true. 3128 * Similarly, the greater-than while has the same issue with the 3129 * minimum integer due to the decrement + compare loop. 3130 */ 3131 tcg_gen_movi_i64(t1, maxval); 3132 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0); 3133 } 3134 3135 /* Bound to the maximum. */ 3136 tcg_gen_umin_i64(t0, t0, tmax); 3137 3138 /* Set the count to zero if the condition is false. */ 3139 tcg_gen_movi_i64(t1, 0); 3140 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1); 3141 3142 /* Since we're bounded, pass as a 32-bit type. */ 3143 t2 = tcg_temp_new_i32(); 3144 tcg_gen_extrl_i64_i32(t2, t0); 3145 3146 /* Scale elements to bits. */ 3147 tcg_gen_shli_i32(t2, t2, a->esz); 3148 3149 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3150 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3151 3152 ptr = tcg_temp_new_ptr(); 3153 tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd)); 3154 3155 if (a->lt) { 3156 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); 3157 } else { 3158 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc)); 3159 } 3160 do_pred_flags(t2); 3161 return true; 3162 } 3163 3164 static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) 3165 { 3166 TCGv_i64 op0, op1, diff, t1, tmax; 3167 TCGv_i32 t2; 3168 TCGv_ptr ptr; 3169 unsigned vsz = vec_full_reg_size(s); 3170 unsigned desc = 0; 3171 3172 if (!dc_isar_feature(aa64_sve2, s)) { 3173 return false; 3174 } 3175 if (!sve_access_check(s)) { 3176 return true; 3177 } 3178 3179 op0 = read_cpu_reg(s, a->rn, 1); 3180 op1 = read_cpu_reg(s, a->rm, 1); 3181 3182 tmax = tcg_constant_i64(vsz); 3183 diff = tcg_temp_new_i64(); 3184 3185 if (a->rw) { 3186 /* WHILERW */ 3187 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */ 3188 t1 = tcg_temp_new_i64(); 3189 tcg_gen_sub_i64(diff, op0, op1); 3190 tcg_gen_sub_i64(t1, op1, op0); 3191 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1); 3192 /* Round down to a multiple of ESIZE. */ 3193 tcg_gen_andi_i64(diff, diff, -1 << a->esz); 3194 /* If op1 == op0, diff == 0, and the condition is always true. */ 3195 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff); 3196 } else { 3197 /* WHILEWR */ 3198 tcg_gen_sub_i64(diff, op1, op0); 3199 /* Round down to a multiple of ESIZE. */ 3200 tcg_gen_andi_i64(diff, diff, -1 << a->esz); 3201 /* If op0 >= op1, diff <= 0, the condition is always true. */ 3202 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff); 3203 } 3204 3205 /* Bound to the maximum. */ 3206 tcg_gen_umin_i64(diff, diff, tmax); 3207 3208 /* Since we're bounded, pass as a 32-bit type. */ 3209 t2 = tcg_temp_new_i32(); 3210 tcg_gen_extrl_i64_i32(t2, diff); 3211 3212 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3213 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3214 3215 ptr = tcg_temp_new_ptr(); 3216 tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd)); 3217 3218 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); 3219 do_pred_flags(t2); 3220 return true; 3221 } 3222 3223 /* 3224 *** SVE Integer Wide Immediate - Unpredicated Group 3225 */ 3226 3227 static bool trans_FDUP(DisasContext *s, arg_FDUP *a) 3228 { 3229 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3230 return false; 3231 } 3232 if (sve_access_check(s)) { 3233 unsigned vsz = vec_full_reg_size(s); 3234 int dofs = vec_full_reg_offset(s, a->rd); 3235 uint64_t imm; 3236 3237 /* Decode the VFP immediate. */ 3238 imm = vfp_expand_imm(a->esz, a->imm); 3239 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm); 3240 } 3241 return true; 3242 } 3243 3244 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a) 3245 { 3246 if (!dc_isar_feature(aa64_sve, s)) { 3247 return false; 3248 } 3249 if (sve_access_check(s)) { 3250 unsigned vsz = vec_full_reg_size(s); 3251 int dofs = vec_full_reg_offset(s, a->rd); 3252 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm); 3253 } 3254 return true; 3255 } 3256 3257 TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a) 3258 3259 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a) 3260 { 3261 a->imm = -a->imm; 3262 return trans_ADD_zzi(s, a); 3263 } 3264 3265 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a) 3266 { 3267 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 }; 3268 static const GVecGen2s op[4] = { 3269 { .fni8 = tcg_gen_vec_sub8_i64, 3270 .fniv = tcg_gen_sub_vec, 3271 .fno = gen_helper_sve_subri_b, 3272 .opt_opc = vecop_list, 3273 .vece = MO_8, 3274 .scalar_first = true }, 3275 { .fni8 = tcg_gen_vec_sub16_i64, 3276 .fniv = tcg_gen_sub_vec, 3277 .fno = gen_helper_sve_subri_h, 3278 .opt_opc = vecop_list, 3279 .vece = MO_16, 3280 .scalar_first = true }, 3281 { .fni4 = tcg_gen_sub_i32, 3282 .fniv = tcg_gen_sub_vec, 3283 .fno = gen_helper_sve_subri_s, 3284 .opt_opc = vecop_list, 3285 .vece = MO_32, 3286 .scalar_first = true }, 3287 { .fni8 = tcg_gen_sub_i64, 3288 .fniv = tcg_gen_sub_vec, 3289 .fno = gen_helper_sve_subri_d, 3290 .opt_opc = vecop_list, 3291 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 3292 .vece = MO_64, 3293 .scalar_first = true } 3294 }; 3295 3296 if (!dc_isar_feature(aa64_sve, s)) { 3297 return false; 3298 } 3299 if (sve_access_check(s)) { 3300 unsigned vsz = vec_full_reg_size(s); 3301 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd), 3302 vec_full_reg_offset(s, a->rn), 3303 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]); 3304 } 3305 return true; 3306 } 3307 3308 TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a) 3309 3310 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d) 3311 { 3312 if (sve_access_check(s)) { 3313 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 3314 tcg_constant_i64(a->imm), u, d); 3315 } 3316 return true; 3317 } 3318 3319 TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false) 3320 TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false) 3321 TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true) 3322 TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true) 3323 3324 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn) 3325 { 3326 if (sve_access_check(s)) { 3327 unsigned vsz = vec_full_reg_size(s); 3328 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 3329 vec_full_reg_offset(s, a->rn), 3330 tcg_constant_i64(a->imm), vsz, vsz, 0, fn); 3331 } 3332 return true; 3333 } 3334 3335 #define DO_ZZI(NAME, name) \ 3336 static gen_helper_gvec_2i * const name##i_fns[4] = { \ 3337 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \ 3338 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \ 3339 }; \ 3340 TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz]) 3341 3342 DO_ZZI(SMAX, smax) 3343 DO_ZZI(UMAX, umax) 3344 DO_ZZI(SMIN, smin) 3345 DO_ZZI(UMIN, umin) 3346 3347 #undef DO_ZZI 3348 3349 static gen_helper_gvec_4 * const dot_fns[2][2] = { 3350 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h }, 3351 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h } 3352 }; 3353 TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz, 3354 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0) 3355 3356 /* 3357 * SVE Multiply - Indexed 3358 */ 3359 3360 TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, 3361 gen_helper_gvec_sdot_idx_b, a) 3362 TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, 3363 gen_helper_gvec_sdot_idx_h, a) 3364 TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, 3365 gen_helper_gvec_udot_idx_b, a) 3366 TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, 3367 gen_helper_gvec_udot_idx_h, a) 3368 3369 TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3370 gen_helper_gvec_sudot_idx_b, a) 3371 TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3372 gen_helper_gvec_usdot_idx_b, a) 3373 3374 #define DO_SVE2_RRX(NAME, FUNC) \ 3375 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3376 a->rd, a->rn, a->rm, a->index) 3377 3378 DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h) 3379 DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s) 3380 DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d) 3381 3382 DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h) 3383 DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s) 3384 DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d) 3385 3386 DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h) 3387 DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s) 3388 DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d) 3389 3390 #undef DO_SVE2_RRX 3391 3392 #define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \ 3393 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3394 a->rd, a->rn, a->rm, (a->index << 1) | TOP) 3395 3396 DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false) 3397 DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false) 3398 DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true) 3399 DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true) 3400 3401 DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false) 3402 DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false) 3403 DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true) 3404 DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true) 3405 3406 DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false) 3407 DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false) 3408 DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true) 3409 DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true) 3410 3411 #undef DO_SVE2_RRX_TB 3412 3413 #define DO_SVE2_RRXR(NAME, FUNC) \ 3414 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a) 3415 3416 DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h) 3417 DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s) 3418 DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d) 3419 3420 DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h) 3421 DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s) 3422 DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d) 3423 3424 DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h) 3425 DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s) 3426 DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d) 3427 3428 DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h) 3429 DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s) 3430 DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d) 3431 3432 #undef DO_SVE2_RRXR 3433 3434 #define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \ 3435 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3436 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP) 3437 3438 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false) 3439 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false) 3440 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true) 3441 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true) 3442 3443 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false) 3444 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false) 3445 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true) 3446 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true) 3447 3448 DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false) 3449 DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false) 3450 DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true) 3451 DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true) 3452 3453 DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false) 3454 DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false) 3455 DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true) 3456 DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true) 3457 3458 DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false) 3459 DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false) 3460 DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true) 3461 DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true) 3462 3463 DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false) 3464 DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false) 3465 DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true) 3466 DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true) 3467 3468 #undef DO_SVE2_RRXR_TB 3469 3470 #define DO_SVE2_RRXR_ROT(NAME, FUNC) \ 3471 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3472 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot) 3473 3474 DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h) 3475 DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s) 3476 3477 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h) 3478 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s) 3479 3480 DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s) 3481 DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) 3482 3483 #undef DO_SVE2_RRXR_ROT 3484 3485 /* 3486 *** SVE Floating Point Multiply-Add Indexed Group 3487 */ 3488 3489 static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) 3490 { 3491 static gen_helper_gvec_4_ptr * const fns[4] = { 3492 NULL, 3493 gen_helper_gvec_fmla_idx_h, 3494 gen_helper_gvec_fmla_idx_s, 3495 gen_helper_gvec_fmla_idx_d, 3496 }; 3497 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, 3498 (a->index << 1) | sub, 3499 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3500 } 3501 3502 TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false) 3503 TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true) 3504 3505 /* 3506 *** SVE Floating Point Multiply Indexed Group 3507 */ 3508 3509 static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = { 3510 NULL, gen_helper_gvec_fmul_idx_h, 3511 gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d, 3512 }; 3513 TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz, 3514 fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index, 3515 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3516 3517 /* 3518 *** SVE Floating Point Fast Reduction Group 3519 */ 3520 3521 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr, 3522 TCGv_ptr, TCGv_i32); 3523 3524 static bool do_reduce(DisasContext *s, arg_rpr_esz *a, 3525 gen_helper_fp_reduce *fn) 3526 { 3527 unsigned vsz, p2vsz; 3528 TCGv_i32 t_desc; 3529 TCGv_ptr t_zn, t_pg, status; 3530 TCGv_i64 temp; 3531 3532 if (fn == NULL) { 3533 return false; 3534 } 3535 if (!sve_access_check(s)) { 3536 return true; 3537 } 3538 3539 vsz = vec_full_reg_size(s); 3540 p2vsz = pow2ceil(vsz); 3541 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz)); 3542 temp = tcg_temp_new_i64(); 3543 t_zn = tcg_temp_new_ptr(); 3544 t_pg = tcg_temp_new_ptr(); 3545 3546 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); 3547 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); 3548 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3549 3550 fn(temp, t_zn, t_pg, status, t_desc); 3551 3552 write_fp_dreg(s, a->rd, temp); 3553 return true; 3554 } 3555 3556 #define DO_VPZ(NAME, name) \ 3557 static gen_helper_fp_reduce * const name##_fns[4] = { \ 3558 NULL, gen_helper_sve_##name##_h, \ 3559 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3560 }; \ 3561 TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz]) 3562 3563 DO_VPZ(FADDV, faddv) 3564 DO_VPZ(FMINNMV, fminnmv) 3565 DO_VPZ(FMAXNMV, fmaxnmv) 3566 DO_VPZ(FMINV, fminv) 3567 DO_VPZ(FMAXV, fmaxv) 3568 3569 #undef DO_VPZ 3570 3571 /* 3572 *** SVE Floating Point Unary Operations - Unpredicated Group 3573 */ 3574 3575 static gen_helper_gvec_2_ptr * const frecpe_fns[] = { 3576 NULL, gen_helper_gvec_frecpe_h, 3577 gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, 3578 }; 3579 TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0) 3580 3581 static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { 3582 NULL, gen_helper_gvec_frsqrte_h, 3583 gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, 3584 }; 3585 TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0) 3586 3587 /* 3588 *** SVE Floating Point Compare with Zero Group 3589 */ 3590 3591 static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a, 3592 gen_helper_gvec_3_ptr *fn) 3593 { 3594 if (fn == NULL) { 3595 return false; 3596 } 3597 if (sve_access_check(s)) { 3598 unsigned vsz = vec_full_reg_size(s); 3599 TCGv_ptr status = 3600 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3601 3602 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd), 3603 vec_full_reg_offset(s, a->rn), 3604 pred_full_reg_offset(s, a->pg), 3605 status, vsz, vsz, 0, fn); 3606 } 3607 return true; 3608 } 3609 3610 #define DO_PPZ(NAME, name) \ 3611 static gen_helper_gvec_3_ptr * const name##_fns[] = { \ 3612 NULL, gen_helper_sve_##name##_h, \ 3613 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3614 }; \ 3615 TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz]) 3616 3617 DO_PPZ(FCMGE_ppz0, fcmge0) 3618 DO_PPZ(FCMGT_ppz0, fcmgt0) 3619 DO_PPZ(FCMLE_ppz0, fcmle0) 3620 DO_PPZ(FCMLT_ppz0, fcmlt0) 3621 DO_PPZ(FCMEQ_ppz0, fcmeq0) 3622 DO_PPZ(FCMNE_ppz0, fcmne0) 3623 3624 #undef DO_PPZ 3625 3626 /* 3627 *** SVE floating-point trig multiply-add coefficient 3628 */ 3629 3630 static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { 3631 NULL, gen_helper_sve_ftmad_h, 3632 gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d, 3633 }; 3634 TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, 3635 ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm, 3636 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3637 3638 /* 3639 *** SVE Floating Point Accumulating Reduction Group 3640 */ 3641 3642 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) 3643 { 3644 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr, 3645 TCGv_ptr, TCGv_ptr, TCGv_i32); 3646 static fadda_fn * const fns[3] = { 3647 gen_helper_sve_fadda_h, 3648 gen_helper_sve_fadda_s, 3649 gen_helper_sve_fadda_d, 3650 }; 3651 unsigned vsz = vec_full_reg_size(s); 3652 TCGv_ptr t_rm, t_pg, t_fpst; 3653 TCGv_i64 t_val; 3654 TCGv_i32 t_desc; 3655 3656 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3657 return false; 3658 } 3659 s->is_nonstreaming = true; 3660 if (!sve_access_check(s)) { 3661 return true; 3662 } 3663 3664 t_val = load_esz(tcg_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz); 3665 t_rm = tcg_temp_new_ptr(); 3666 t_pg = tcg_temp_new_ptr(); 3667 tcg_gen_addi_ptr(t_rm, tcg_env, vec_full_reg_offset(s, a->rm)); 3668 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); 3669 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3670 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 3671 3672 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc); 3673 3674 write_fp_dreg(s, a->rd, t_val); 3675 return true; 3676 } 3677 3678 /* 3679 *** SVE Floating Point Arithmetic - Unpredicated Group 3680 */ 3681 3682 #define DO_FP3(NAME, name) \ 3683 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ 3684 NULL, gen_helper_gvec_##name##_h, \ 3685 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ 3686 }; \ 3687 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0) 3688 3689 DO_FP3(FADD_zzz, fadd) 3690 DO_FP3(FSUB_zzz, fsub) 3691 DO_FP3(FMUL_zzz, fmul) 3692 DO_FP3(FRECPS, recps) 3693 DO_FP3(FRSQRTS, rsqrts) 3694 3695 #undef DO_FP3 3696 3697 static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = { 3698 NULL, gen_helper_gvec_ftsmul_h, 3699 gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d 3700 }; 3701 TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, 3702 ftsmul_fns[a->esz], a, 0) 3703 3704 /* 3705 *** SVE Floating Point Arithmetic - Predicated Group 3706 */ 3707 3708 #define DO_ZPZZ_FP(NAME, FEAT, name) \ 3709 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ 3710 NULL, gen_helper_##name##_h, \ 3711 gen_helper_##name##_s, gen_helper_##name##_d \ 3712 }; \ 3713 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) 3714 3715 DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd) 3716 DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub) 3717 DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul) 3718 DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin) 3719 DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax) 3720 DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) 3721 DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) 3722 DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) 3723 DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn) 3724 DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv) 3725 DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx) 3726 3727 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr, 3728 TCGv_i64, TCGv_ptr, TCGv_i32); 3729 3730 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16, 3731 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn) 3732 { 3733 unsigned vsz = vec_full_reg_size(s); 3734 TCGv_ptr t_zd, t_zn, t_pg, status; 3735 TCGv_i32 desc; 3736 3737 t_zd = tcg_temp_new_ptr(); 3738 t_zn = tcg_temp_new_ptr(); 3739 t_pg = tcg_temp_new_ptr(); 3740 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, zd)); 3741 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, zn)); 3742 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 3743 3744 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 3745 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 3746 fn(t_zd, t_zn, t_pg, scalar, status, desc); 3747 } 3748 3749 static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm, 3750 gen_helper_sve_fp2scalar *fn) 3751 { 3752 if (fn == NULL) { 3753 return false; 3754 } 3755 if (sve_access_check(s)) { 3756 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, 3757 tcg_constant_i64(imm), fn); 3758 } 3759 return true; 3760 } 3761 3762 #define DO_FP_IMM(NAME, name, const0, const1) \ 3763 static gen_helper_sve_fp2scalar * const name##_fns[4] = { \ 3764 NULL, gen_helper_sve_##name##_h, \ 3765 gen_helper_sve_##name##_s, \ 3766 gen_helper_sve_##name##_d \ 3767 }; \ 3768 static uint64_t const name##_const[4][2] = { \ 3769 { -1, -1 }, \ 3770 { float16_##const0, float16_##const1 }, \ 3771 { float32_##const0, float32_##const1 }, \ 3772 { float64_##const0, float64_##const1 }, \ 3773 }; \ 3774 TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ 3775 name##_const[a->esz][a->imm], name##_fns[a->esz]) 3776 3777 DO_FP_IMM(FADD, fadds, half, one) 3778 DO_FP_IMM(FSUB, fsubs, half, one) 3779 DO_FP_IMM(FMUL, fmuls, half, two) 3780 DO_FP_IMM(FSUBR, fsubrs, half, one) 3781 DO_FP_IMM(FMAXNM, fmaxnms, zero, one) 3782 DO_FP_IMM(FMINNM, fminnms, zero, one) 3783 DO_FP_IMM(FMAX, fmaxs, zero, one) 3784 DO_FP_IMM(FMIN, fmins, zero, one) 3785 3786 #undef DO_FP_IMM 3787 3788 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a, 3789 gen_helper_gvec_4_ptr *fn) 3790 { 3791 if (fn == NULL) { 3792 return false; 3793 } 3794 if (sve_access_check(s)) { 3795 unsigned vsz = vec_full_reg_size(s); 3796 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3797 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd), 3798 vec_full_reg_offset(s, a->rn), 3799 vec_full_reg_offset(s, a->rm), 3800 pred_full_reg_offset(s, a->pg), 3801 status, vsz, vsz, 0, fn); 3802 } 3803 return true; 3804 } 3805 3806 #define DO_FPCMP(NAME, name) \ 3807 static gen_helper_gvec_4_ptr * const name##_fns[4] = { \ 3808 NULL, gen_helper_sve_##name##_h, \ 3809 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 3810 }; \ 3811 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz]) 3812 3813 DO_FPCMP(FCMGE, fcmge) 3814 DO_FPCMP(FCMGT, fcmgt) 3815 DO_FPCMP(FCMEQ, fcmeq) 3816 DO_FPCMP(FCMNE, fcmne) 3817 DO_FPCMP(FCMUO, fcmuo) 3818 DO_FPCMP(FACGE, facge) 3819 DO_FPCMP(FACGT, facgt) 3820 3821 #undef DO_FPCMP 3822 3823 static gen_helper_gvec_4_ptr * const fcadd_fns[] = { 3824 NULL, gen_helper_sve_fcadd_h, 3825 gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d, 3826 }; 3827 TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], 3828 a->rd, a->rn, a->rm, a->pg, a->rot, 3829 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3830 3831 #define DO_FMLA(NAME, name) \ 3832 static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ 3833 NULL, gen_helper_sve_##name##_h, \ 3834 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 3835 }; \ 3836 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \ 3837 a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ 3838 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3839 3840 DO_FMLA(FMLA_zpzzz, fmla_zpzzz) 3841 DO_FMLA(FMLS_zpzzz, fmls_zpzzz) 3842 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz) 3843 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz) 3844 3845 #undef DO_FMLA 3846 3847 static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { 3848 NULL, gen_helper_sve_fcmla_zpzzz_h, 3849 gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, 3850 }; 3851 TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], 3852 a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, 3853 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3854 3855 static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { 3856 NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL 3857 }; 3858 TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz], 3859 a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot, 3860 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3861 3862 /* 3863 *** SVE Floating Point Unary Operations Predicated Group 3864 */ 3865 3866 TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 3867 gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR) 3868 TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 3869 gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR) 3870 3871 TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 3872 gen_helper_sve_bfcvt, a, 0, FPST_FPCR) 3873 3874 TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 3875 gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR) 3876 TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 3877 gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR) 3878 TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 3879 gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR) 3880 TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 3881 gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR) 3882 3883 TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 3884 gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16) 3885 TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 3886 gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16) 3887 TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 3888 gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16) 3889 TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 3890 gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16) 3891 TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 3892 gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16) 3893 TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 3894 gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16) 3895 3896 TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 3897 gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR) 3898 TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 3899 gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR) 3900 TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 3901 gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR) 3902 TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 3903 gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR) 3904 TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 3905 gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR) 3906 TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 3907 gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR) 3908 3909 TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 3910 gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR) 3911 TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 3912 gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR) 3913 3914 static gen_helper_gvec_3_ptr * const frint_fns[] = { 3915 NULL, 3916 gen_helper_sve_frint_h, 3917 gen_helper_sve_frint_s, 3918 gen_helper_sve_frint_d 3919 }; 3920 TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz], 3921 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3922 3923 static gen_helper_gvec_3_ptr * const frintx_fns[] = { 3924 NULL, 3925 gen_helper_sve_frintx_h, 3926 gen_helper_sve_frintx_s, 3927 gen_helper_sve_frintx_d 3928 }; 3929 TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz], 3930 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3931 3932 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, 3933 ARMFPRounding mode, gen_helper_gvec_3_ptr *fn) 3934 { 3935 unsigned vsz; 3936 TCGv_i32 tmode; 3937 TCGv_ptr status; 3938 3939 if (fn == NULL) { 3940 return false; 3941 } 3942 if (!sve_access_check(s)) { 3943 return true; 3944 } 3945 3946 vsz = vec_full_reg_size(s); 3947 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3948 tmode = gen_set_rmode(mode, status); 3949 3950 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 3951 vec_full_reg_offset(s, a->rn), 3952 pred_full_reg_offset(s, a->pg), 3953 status, vsz, vsz, 0, fn); 3954 3955 gen_restore_rmode(tmode, status); 3956 return true; 3957 } 3958 3959 TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a, 3960 FPROUNDING_TIEEVEN, frint_fns[a->esz]) 3961 TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a, 3962 FPROUNDING_POSINF, frint_fns[a->esz]) 3963 TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a, 3964 FPROUNDING_NEGINF, frint_fns[a->esz]) 3965 TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a, 3966 FPROUNDING_ZERO, frint_fns[a->esz]) 3967 TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a, 3968 FPROUNDING_TIEAWAY, frint_fns[a->esz]) 3969 3970 static gen_helper_gvec_3_ptr * const frecpx_fns[] = { 3971 NULL, gen_helper_sve_frecpx_h, 3972 gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d, 3973 }; 3974 TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz], 3975 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3976 3977 static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { 3978 NULL, gen_helper_sve_fsqrt_h, 3979 gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d, 3980 }; 3981 TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz], 3982 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3983 3984 TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 3985 gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16) 3986 TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 3987 gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16) 3988 TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 3989 gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16) 3990 3991 TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 3992 gen_helper_sve_scvt_ss, a, 0, FPST_FPCR) 3993 TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 3994 gen_helper_sve_scvt_ds, a, 0, FPST_FPCR) 3995 3996 TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 3997 gen_helper_sve_scvt_sd, a, 0, FPST_FPCR) 3998 TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 3999 gen_helper_sve_scvt_dd, a, 0, FPST_FPCR) 4000 4001 TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4002 gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16) 4003 TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4004 gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16) 4005 TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4006 gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16) 4007 4008 TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4009 gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR) 4010 TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4011 gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR) 4012 TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4013 gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR) 4014 4015 TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4016 gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR) 4017 4018 /* 4019 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group 4020 */ 4021 4022 /* Subroutine loading a vector register at VOFS of LEN bytes. 4023 * The load should begin at the address Rn + IMM. 4024 */ 4025 4026 void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, 4027 int len, int rn, int imm) 4028 { 4029 int len_align = QEMU_ALIGN_DOWN(len, 16); 4030 int len_remain = len % 16; 4031 int nparts = len / 16 + ctpop8(len_remain); 4032 int midx = get_mem_index(s); 4033 TCGv_i64 dirty_addr, clean_addr, t0, t1; 4034 TCGv_i128 t16; 4035 4036 dirty_addr = tcg_temp_new_i64(); 4037 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4038 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); 4039 4040 /* 4041 * Note that unpredicated load/store of vector/predicate registers 4042 * are defined as a stream of bytes, which equates to little-endian 4043 * operations on larger quantities. 4044 * Attempt to keep code expansion to a minimum by limiting the 4045 * amount of unrolling done. 4046 */ 4047 if (nparts <= 4) { 4048 int i; 4049 4050 t0 = tcg_temp_new_i64(); 4051 t1 = tcg_temp_new_i64(); 4052 t16 = tcg_temp_new_i128(); 4053 4054 for (i = 0; i < len_align; i += 16) { 4055 tcg_gen_qemu_ld_i128(t16, clean_addr, midx, 4056 MO_LE | MO_128 | MO_ATOM_NONE); 4057 tcg_gen_extr_i128_i64(t0, t1, t16); 4058 tcg_gen_st_i64(t0, base, vofs + i); 4059 tcg_gen_st_i64(t1, base, vofs + i + 8); 4060 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4061 } 4062 } else { 4063 TCGLabel *loop = gen_new_label(); 4064 TCGv_ptr tp, i = tcg_temp_new_ptr(); 4065 4066 tcg_gen_movi_ptr(i, 0); 4067 gen_set_label(loop); 4068 4069 t16 = tcg_temp_new_i128(); 4070 tcg_gen_qemu_ld_i128(t16, clean_addr, midx, 4071 MO_LE | MO_128 | MO_ATOM_NONE); 4072 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4073 4074 tp = tcg_temp_new_ptr(); 4075 tcg_gen_add_ptr(tp, base, i); 4076 tcg_gen_addi_ptr(i, i, 16); 4077 4078 t0 = tcg_temp_new_i64(); 4079 t1 = tcg_temp_new_i64(); 4080 tcg_gen_extr_i128_i64(t0, t1, t16); 4081 4082 tcg_gen_st_i64(t0, tp, vofs); 4083 tcg_gen_st_i64(t1, tp, vofs + 8); 4084 4085 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4086 } 4087 4088 /* 4089 * Predicate register loads can be any multiple of 2. 4090 * Note that we still store the entire 64-bit unit into tcg_env. 4091 */ 4092 if (len_remain >= 8) { 4093 t0 = tcg_temp_new_i64(); 4094 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE); 4095 tcg_gen_st_i64(t0, base, vofs + len_align); 4096 len_remain -= 8; 4097 len_align += 8; 4098 if (len_remain) { 4099 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4100 } 4101 } 4102 if (len_remain) { 4103 t0 = tcg_temp_new_i64(); 4104 switch (len_remain) { 4105 case 2: 4106 case 4: 4107 case 8: 4108 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, 4109 MO_LE | ctz32(len_remain) | MO_ATOM_NONE); 4110 break; 4111 4112 case 6: 4113 t1 = tcg_temp_new_i64(); 4114 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE); 4115 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4116 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); 4117 tcg_gen_deposit_i64(t0, t0, t1, 32, 32); 4118 break; 4119 4120 default: 4121 g_assert_not_reached(); 4122 } 4123 tcg_gen_st_i64(t0, base, vofs + len_align); 4124 } 4125 } 4126 4127 /* Similarly for stores. */ 4128 void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, 4129 int len, int rn, int imm) 4130 { 4131 int len_align = QEMU_ALIGN_DOWN(len, 16); 4132 int len_remain = len % 16; 4133 int nparts = len / 16 + ctpop8(len_remain); 4134 int midx = get_mem_index(s); 4135 TCGv_i64 dirty_addr, clean_addr, t0, t1; 4136 TCGv_i128 t16; 4137 4138 dirty_addr = tcg_temp_new_i64(); 4139 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4140 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); 4141 4142 /* Note that unpredicated load/store of vector/predicate registers 4143 * are defined as a stream of bytes, which equates to little-endian 4144 * operations on larger quantities. There is no nice way to force 4145 * a little-endian store for aarch64_be-linux-user out of line. 4146 * 4147 * Attempt to keep code expansion to a minimum by limiting the 4148 * amount of unrolling done. 4149 */ 4150 if (nparts <= 4) { 4151 int i; 4152 4153 t0 = tcg_temp_new_i64(); 4154 t1 = tcg_temp_new_i64(); 4155 t16 = tcg_temp_new_i128(); 4156 for (i = 0; i < len_align; i += 16) { 4157 tcg_gen_ld_i64(t0, base, vofs + i); 4158 tcg_gen_ld_i64(t1, base, vofs + i + 8); 4159 tcg_gen_concat_i64_i128(t16, t0, t1); 4160 tcg_gen_qemu_st_i128(t16, clean_addr, midx, 4161 MO_LE | MO_128 | MO_ATOM_NONE); 4162 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4163 } 4164 } else { 4165 TCGLabel *loop = gen_new_label(); 4166 TCGv_ptr tp, i = tcg_temp_new_ptr(); 4167 4168 tcg_gen_movi_ptr(i, 0); 4169 gen_set_label(loop); 4170 4171 t0 = tcg_temp_new_i64(); 4172 t1 = tcg_temp_new_i64(); 4173 tp = tcg_temp_new_ptr(); 4174 tcg_gen_add_ptr(tp, base, i); 4175 tcg_gen_ld_i64(t0, tp, vofs); 4176 tcg_gen_ld_i64(t1, tp, vofs + 8); 4177 tcg_gen_addi_ptr(i, i, 16); 4178 4179 t16 = tcg_temp_new_i128(); 4180 tcg_gen_concat_i64_i128(t16, t0, t1); 4181 4182 tcg_gen_qemu_st_i128(t16, clean_addr, midx, 4183 MO_LE | MO_128 | MO_ATOM_NONE); 4184 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4185 4186 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4187 } 4188 4189 /* Predicate register stores can be any multiple of 2. */ 4190 if (len_remain >= 8) { 4191 t0 = tcg_temp_new_i64(); 4192 tcg_gen_ld_i64(t0, base, vofs + len_align); 4193 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE); 4194 len_remain -= 8; 4195 len_align += 8; 4196 if (len_remain) { 4197 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4198 } 4199 } 4200 if (len_remain) { 4201 t0 = tcg_temp_new_i64(); 4202 tcg_gen_ld_i64(t0, base, vofs + len_align); 4203 4204 switch (len_remain) { 4205 case 2: 4206 case 4: 4207 case 8: 4208 tcg_gen_qemu_st_i64(t0, clean_addr, midx, 4209 MO_LE | ctz32(len_remain) | MO_ATOM_NONE); 4210 break; 4211 4212 case 6: 4213 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE); 4214 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4215 tcg_gen_shri_i64(t0, t0, 32); 4216 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); 4217 break; 4218 4219 default: 4220 g_assert_not_reached(); 4221 } 4222 } 4223 } 4224 4225 static bool trans_LDR_zri(DisasContext *s, arg_rri *a) 4226 { 4227 if (!dc_isar_feature(aa64_sve, s)) { 4228 return false; 4229 } 4230 if (sve_access_check(s)) { 4231 int size = vec_full_reg_size(s); 4232 int off = vec_full_reg_offset(s, a->rd); 4233 gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size); 4234 } 4235 return true; 4236 } 4237 4238 static bool trans_LDR_pri(DisasContext *s, arg_rri *a) 4239 { 4240 if (!dc_isar_feature(aa64_sve, s)) { 4241 return false; 4242 } 4243 if (sve_access_check(s)) { 4244 int size = pred_full_reg_size(s); 4245 int off = pred_full_reg_offset(s, a->rd); 4246 gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size); 4247 } 4248 return true; 4249 } 4250 4251 static bool trans_STR_zri(DisasContext *s, arg_rri *a) 4252 { 4253 if (!dc_isar_feature(aa64_sve, s)) { 4254 return false; 4255 } 4256 if (sve_access_check(s)) { 4257 int size = vec_full_reg_size(s); 4258 int off = vec_full_reg_offset(s, a->rd); 4259 gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size); 4260 } 4261 return true; 4262 } 4263 4264 static bool trans_STR_pri(DisasContext *s, arg_rri *a) 4265 { 4266 if (!dc_isar_feature(aa64_sve, s)) { 4267 return false; 4268 } 4269 if (sve_access_check(s)) { 4270 int size = pred_full_reg_size(s); 4271 int off = pred_full_reg_offset(s, a->rd); 4272 gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size); 4273 } 4274 return true; 4275 } 4276 4277 /* 4278 *** SVE Memory - Contiguous Load Group 4279 */ 4280 4281 /* The memory mode of the dtype. */ 4282 static const MemOp dtype_mop[16] = { 4283 MO_UB, MO_UB, MO_UB, MO_UB, 4284 MO_SL, MO_UW, MO_UW, MO_UW, 4285 MO_SW, MO_SW, MO_UL, MO_UL, 4286 MO_SB, MO_SB, MO_SB, MO_UQ 4287 }; 4288 4289 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE) 4290 4291 /* The vector element size of dtype. */ 4292 static const uint8_t dtype_esz[16] = { 4293 0, 1, 2, 3, 4294 3, 1, 2, 3, 4295 3, 2, 2, 3, 4296 3, 2, 1, 3 4297 }; 4298 4299 uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs, 4300 uint32_t msz, bool is_write, uint32_t data) 4301 { 4302 uint32_t sizem1; 4303 uint32_t desc = 0; 4304 4305 /* Assert all of the data fits, with or without MTE enabled. */ 4306 assert(nregs >= 1 && nregs <= 4); 4307 sizem1 = (nregs << msz) - 1; 4308 assert(sizem1 <= R_MTEDESC_SIZEM1_MASK >> R_MTEDESC_SIZEM1_SHIFT); 4309 assert(data < 1u << SVE_MTEDESC_SHIFT); 4310 4311 if (s->mte_active[0]) { 4312 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 4313 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4314 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4315 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 4316 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, sizem1); 4317 desc <<= SVE_MTEDESC_SHIFT; 4318 } 4319 return simd_desc(vsz, vsz, desc | data); 4320 } 4321 4322 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 4323 int dtype, uint32_t nregs, bool is_write, 4324 gen_helper_gvec_mem *fn) 4325 { 4326 TCGv_ptr t_pg; 4327 uint32_t desc; 4328 4329 if (!s->mte_active[0]) { 4330 addr = clean_data_tbi(s, addr); 4331 } 4332 4333 /* 4334 * For e.g. LD4, there are not enough arguments to pass all 4 4335 * registers as pointers, so encode the regno into the data field. 4336 * For consistency, do this even for LD1. 4337 */ 4338 desc = make_svemte_desc(s, vec_full_reg_size(s), nregs, 4339 dtype_msz(dtype), is_write, zt); 4340 t_pg = tcg_temp_new_ptr(); 4341 4342 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 4343 fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); 4344 } 4345 4346 /* Indexed by [mte][be][dtype][nreg] */ 4347 static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = { 4348 { /* mte inactive, little-endian */ 4349 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4350 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4351 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4352 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4353 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4354 4355 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL }, 4356 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r, 4357 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r }, 4358 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL }, 4359 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL }, 4360 4361 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL }, 4362 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL }, 4363 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r, 4364 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r }, 4365 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL }, 4366 4367 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4368 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4369 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4370 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, 4371 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, 4372 4373 /* mte inactive, big-endian */ 4374 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4375 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4376 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4377 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4378 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4379 4380 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL }, 4381 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r, 4382 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r }, 4383 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL }, 4384 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL }, 4385 4386 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL }, 4387 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL }, 4388 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r, 4389 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r }, 4390 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL }, 4391 4392 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4393 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4394 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4395 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, 4396 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } }, 4397 4398 { /* mte active, little-endian */ 4399 { { gen_helper_sve_ld1bb_r_mte, 4400 gen_helper_sve_ld2bb_r_mte, 4401 gen_helper_sve_ld3bb_r_mte, 4402 gen_helper_sve_ld4bb_r_mte }, 4403 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 4404 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 4405 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 4406 4407 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL }, 4408 { gen_helper_sve_ld1hh_le_r_mte, 4409 gen_helper_sve_ld2hh_le_r_mte, 4410 gen_helper_sve_ld3hh_le_r_mte, 4411 gen_helper_sve_ld4hh_le_r_mte }, 4412 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL }, 4413 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL }, 4414 4415 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL }, 4416 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL }, 4417 { gen_helper_sve_ld1ss_le_r_mte, 4418 gen_helper_sve_ld2ss_le_r_mte, 4419 gen_helper_sve_ld3ss_le_r_mte, 4420 gen_helper_sve_ld4ss_le_r_mte }, 4421 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL }, 4422 4423 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 4424 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 4425 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 4426 { gen_helper_sve_ld1dd_le_r_mte, 4427 gen_helper_sve_ld2dd_le_r_mte, 4428 gen_helper_sve_ld3dd_le_r_mte, 4429 gen_helper_sve_ld4dd_le_r_mte } }, 4430 4431 /* mte active, big-endian */ 4432 { { gen_helper_sve_ld1bb_r_mte, 4433 gen_helper_sve_ld2bb_r_mte, 4434 gen_helper_sve_ld3bb_r_mte, 4435 gen_helper_sve_ld4bb_r_mte }, 4436 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 4437 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 4438 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 4439 4440 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL }, 4441 { gen_helper_sve_ld1hh_be_r_mte, 4442 gen_helper_sve_ld2hh_be_r_mte, 4443 gen_helper_sve_ld3hh_be_r_mte, 4444 gen_helper_sve_ld4hh_be_r_mte }, 4445 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL }, 4446 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL }, 4447 4448 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL }, 4449 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL }, 4450 { gen_helper_sve_ld1ss_be_r_mte, 4451 gen_helper_sve_ld2ss_be_r_mte, 4452 gen_helper_sve_ld3ss_be_r_mte, 4453 gen_helper_sve_ld4ss_be_r_mte }, 4454 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL }, 4455 4456 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 4457 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 4458 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 4459 { gen_helper_sve_ld1dd_be_r_mte, 4460 gen_helper_sve_ld2dd_be_r_mte, 4461 gen_helper_sve_ld3dd_be_r_mte, 4462 gen_helper_sve_ld4dd_be_r_mte } } }, 4463 }; 4464 4465 static void do_ld_zpa(DisasContext *s, int zt, int pg, 4466 TCGv_i64 addr, int dtype, int nreg) 4467 { 4468 gen_helper_gvec_mem *fn 4469 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg]; 4470 4471 /* 4472 * While there are holes in the table, they are not 4473 * accessible via the instruction encoding. 4474 */ 4475 assert(fn != NULL); 4476 do_mem_zpa(s, zt, pg, addr, dtype, nreg + 1, false, fn); 4477 } 4478 4479 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) 4480 { 4481 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 4482 return false; 4483 } 4484 if (sve_access_check(s)) { 4485 TCGv_i64 addr = tcg_temp_new_i64(); 4486 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4487 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4488 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 4489 } 4490 return true; 4491 } 4492 4493 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a) 4494 { 4495 if (!dc_isar_feature(aa64_sve, s)) { 4496 return false; 4497 } 4498 if (sve_access_check(s)) { 4499 int vsz = vec_full_reg_size(s); 4500 int elements = vsz >> dtype_esz[a->dtype]; 4501 TCGv_i64 addr = tcg_temp_new_i64(); 4502 4503 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 4504 (a->imm * elements * (a->nreg + 1)) 4505 << dtype_msz(a->dtype)); 4506 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 4507 } 4508 return true; 4509 } 4510 4511 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a) 4512 { 4513 static gen_helper_gvec_mem * const fns[2][2][16] = { 4514 { /* mte inactive, little-endian */ 4515 { gen_helper_sve_ldff1bb_r, 4516 gen_helper_sve_ldff1bhu_r, 4517 gen_helper_sve_ldff1bsu_r, 4518 gen_helper_sve_ldff1bdu_r, 4519 4520 gen_helper_sve_ldff1sds_le_r, 4521 gen_helper_sve_ldff1hh_le_r, 4522 gen_helper_sve_ldff1hsu_le_r, 4523 gen_helper_sve_ldff1hdu_le_r, 4524 4525 gen_helper_sve_ldff1hds_le_r, 4526 gen_helper_sve_ldff1hss_le_r, 4527 gen_helper_sve_ldff1ss_le_r, 4528 gen_helper_sve_ldff1sdu_le_r, 4529 4530 gen_helper_sve_ldff1bds_r, 4531 gen_helper_sve_ldff1bss_r, 4532 gen_helper_sve_ldff1bhs_r, 4533 gen_helper_sve_ldff1dd_le_r }, 4534 4535 /* mte inactive, big-endian */ 4536 { gen_helper_sve_ldff1bb_r, 4537 gen_helper_sve_ldff1bhu_r, 4538 gen_helper_sve_ldff1bsu_r, 4539 gen_helper_sve_ldff1bdu_r, 4540 4541 gen_helper_sve_ldff1sds_be_r, 4542 gen_helper_sve_ldff1hh_be_r, 4543 gen_helper_sve_ldff1hsu_be_r, 4544 gen_helper_sve_ldff1hdu_be_r, 4545 4546 gen_helper_sve_ldff1hds_be_r, 4547 gen_helper_sve_ldff1hss_be_r, 4548 gen_helper_sve_ldff1ss_be_r, 4549 gen_helper_sve_ldff1sdu_be_r, 4550 4551 gen_helper_sve_ldff1bds_r, 4552 gen_helper_sve_ldff1bss_r, 4553 gen_helper_sve_ldff1bhs_r, 4554 gen_helper_sve_ldff1dd_be_r } }, 4555 4556 { /* mte active, little-endian */ 4557 { gen_helper_sve_ldff1bb_r_mte, 4558 gen_helper_sve_ldff1bhu_r_mte, 4559 gen_helper_sve_ldff1bsu_r_mte, 4560 gen_helper_sve_ldff1bdu_r_mte, 4561 4562 gen_helper_sve_ldff1sds_le_r_mte, 4563 gen_helper_sve_ldff1hh_le_r_mte, 4564 gen_helper_sve_ldff1hsu_le_r_mte, 4565 gen_helper_sve_ldff1hdu_le_r_mte, 4566 4567 gen_helper_sve_ldff1hds_le_r_mte, 4568 gen_helper_sve_ldff1hss_le_r_mte, 4569 gen_helper_sve_ldff1ss_le_r_mte, 4570 gen_helper_sve_ldff1sdu_le_r_mte, 4571 4572 gen_helper_sve_ldff1bds_r_mte, 4573 gen_helper_sve_ldff1bss_r_mte, 4574 gen_helper_sve_ldff1bhs_r_mte, 4575 gen_helper_sve_ldff1dd_le_r_mte }, 4576 4577 /* mte active, big-endian */ 4578 { gen_helper_sve_ldff1bb_r_mte, 4579 gen_helper_sve_ldff1bhu_r_mte, 4580 gen_helper_sve_ldff1bsu_r_mte, 4581 gen_helper_sve_ldff1bdu_r_mte, 4582 4583 gen_helper_sve_ldff1sds_be_r_mte, 4584 gen_helper_sve_ldff1hh_be_r_mte, 4585 gen_helper_sve_ldff1hsu_be_r_mte, 4586 gen_helper_sve_ldff1hdu_be_r_mte, 4587 4588 gen_helper_sve_ldff1hds_be_r_mte, 4589 gen_helper_sve_ldff1hss_be_r_mte, 4590 gen_helper_sve_ldff1ss_be_r_mte, 4591 gen_helper_sve_ldff1sdu_be_r_mte, 4592 4593 gen_helper_sve_ldff1bds_r_mte, 4594 gen_helper_sve_ldff1bss_r_mte, 4595 gen_helper_sve_ldff1bhs_r_mte, 4596 gen_helper_sve_ldff1dd_be_r_mte } }, 4597 }; 4598 4599 if (!dc_isar_feature(aa64_sve, s)) { 4600 return false; 4601 } 4602 s->is_nonstreaming = true; 4603 if (sve_access_check(s)) { 4604 TCGv_i64 addr = tcg_temp_new_i64(); 4605 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4606 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4607 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 4608 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 4609 } 4610 return true; 4611 } 4612 4613 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) 4614 { 4615 static gen_helper_gvec_mem * const fns[2][2][16] = { 4616 { /* mte inactive, little-endian */ 4617 { gen_helper_sve_ldnf1bb_r, 4618 gen_helper_sve_ldnf1bhu_r, 4619 gen_helper_sve_ldnf1bsu_r, 4620 gen_helper_sve_ldnf1bdu_r, 4621 4622 gen_helper_sve_ldnf1sds_le_r, 4623 gen_helper_sve_ldnf1hh_le_r, 4624 gen_helper_sve_ldnf1hsu_le_r, 4625 gen_helper_sve_ldnf1hdu_le_r, 4626 4627 gen_helper_sve_ldnf1hds_le_r, 4628 gen_helper_sve_ldnf1hss_le_r, 4629 gen_helper_sve_ldnf1ss_le_r, 4630 gen_helper_sve_ldnf1sdu_le_r, 4631 4632 gen_helper_sve_ldnf1bds_r, 4633 gen_helper_sve_ldnf1bss_r, 4634 gen_helper_sve_ldnf1bhs_r, 4635 gen_helper_sve_ldnf1dd_le_r }, 4636 4637 /* mte inactive, big-endian */ 4638 { gen_helper_sve_ldnf1bb_r, 4639 gen_helper_sve_ldnf1bhu_r, 4640 gen_helper_sve_ldnf1bsu_r, 4641 gen_helper_sve_ldnf1bdu_r, 4642 4643 gen_helper_sve_ldnf1sds_be_r, 4644 gen_helper_sve_ldnf1hh_be_r, 4645 gen_helper_sve_ldnf1hsu_be_r, 4646 gen_helper_sve_ldnf1hdu_be_r, 4647 4648 gen_helper_sve_ldnf1hds_be_r, 4649 gen_helper_sve_ldnf1hss_be_r, 4650 gen_helper_sve_ldnf1ss_be_r, 4651 gen_helper_sve_ldnf1sdu_be_r, 4652 4653 gen_helper_sve_ldnf1bds_r, 4654 gen_helper_sve_ldnf1bss_r, 4655 gen_helper_sve_ldnf1bhs_r, 4656 gen_helper_sve_ldnf1dd_be_r } }, 4657 4658 { /* mte inactive, little-endian */ 4659 { gen_helper_sve_ldnf1bb_r_mte, 4660 gen_helper_sve_ldnf1bhu_r_mte, 4661 gen_helper_sve_ldnf1bsu_r_mte, 4662 gen_helper_sve_ldnf1bdu_r_mte, 4663 4664 gen_helper_sve_ldnf1sds_le_r_mte, 4665 gen_helper_sve_ldnf1hh_le_r_mte, 4666 gen_helper_sve_ldnf1hsu_le_r_mte, 4667 gen_helper_sve_ldnf1hdu_le_r_mte, 4668 4669 gen_helper_sve_ldnf1hds_le_r_mte, 4670 gen_helper_sve_ldnf1hss_le_r_mte, 4671 gen_helper_sve_ldnf1ss_le_r_mte, 4672 gen_helper_sve_ldnf1sdu_le_r_mte, 4673 4674 gen_helper_sve_ldnf1bds_r_mte, 4675 gen_helper_sve_ldnf1bss_r_mte, 4676 gen_helper_sve_ldnf1bhs_r_mte, 4677 gen_helper_sve_ldnf1dd_le_r_mte }, 4678 4679 /* mte inactive, big-endian */ 4680 { gen_helper_sve_ldnf1bb_r_mte, 4681 gen_helper_sve_ldnf1bhu_r_mte, 4682 gen_helper_sve_ldnf1bsu_r_mte, 4683 gen_helper_sve_ldnf1bdu_r_mte, 4684 4685 gen_helper_sve_ldnf1sds_be_r_mte, 4686 gen_helper_sve_ldnf1hh_be_r_mte, 4687 gen_helper_sve_ldnf1hsu_be_r_mte, 4688 gen_helper_sve_ldnf1hdu_be_r_mte, 4689 4690 gen_helper_sve_ldnf1hds_be_r_mte, 4691 gen_helper_sve_ldnf1hss_be_r_mte, 4692 gen_helper_sve_ldnf1ss_be_r_mte, 4693 gen_helper_sve_ldnf1sdu_be_r_mte, 4694 4695 gen_helper_sve_ldnf1bds_r_mte, 4696 gen_helper_sve_ldnf1bss_r_mte, 4697 gen_helper_sve_ldnf1bhs_r_mte, 4698 gen_helper_sve_ldnf1dd_be_r_mte } }, 4699 }; 4700 4701 if (!dc_isar_feature(aa64_sve, s)) { 4702 return false; 4703 } 4704 s->is_nonstreaming = true; 4705 if (sve_access_check(s)) { 4706 int vsz = vec_full_reg_size(s); 4707 int elements = vsz >> dtype_esz[a->dtype]; 4708 int off = (a->imm * elements) << dtype_msz(a->dtype); 4709 TCGv_i64 addr = tcg_temp_new_i64(); 4710 4711 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off); 4712 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 4713 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 4714 } 4715 return true; 4716 } 4717 4718 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 4719 { 4720 unsigned vsz = vec_full_reg_size(s); 4721 TCGv_ptr t_pg; 4722 int poff; 4723 uint32_t desc; 4724 4725 /* Load the first quadword using the normal predicated load helpers. */ 4726 if (!s->mte_active[0]) { 4727 addr = clean_data_tbi(s, addr); 4728 } 4729 4730 poff = pred_full_reg_offset(s, pg); 4731 if (vsz > 16) { 4732 /* 4733 * Zero-extend the first 16 bits of the predicate into a temporary. 4734 * This avoids triggering an assert making sure we don't have bits 4735 * set within a predicate beyond VQ, but we have lowered VQ to 1 4736 * for this load operation. 4737 */ 4738 TCGv_i64 tmp = tcg_temp_new_i64(); 4739 #if HOST_BIG_ENDIAN 4740 poff += 6; 4741 #endif 4742 tcg_gen_ld16u_i64(tmp, tcg_env, poff); 4743 4744 poff = offsetof(CPUARMState, vfp.preg_tmp); 4745 tcg_gen_st_i64(tmp, tcg_env, poff); 4746 } 4747 4748 t_pg = tcg_temp_new_ptr(); 4749 tcg_gen_addi_ptr(t_pg, tcg_env, poff); 4750 4751 gen_helper_gvec_mem *fn 4752 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 4753 desc = make_svemte_desc(s, 16, 1, dtype_msz(dtype), false, zt); 4754 fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); 4755 4756 /* Replicate that first quadword. */ 4757 if (vsz > 16) { 4758 int doff = vec_full_reg_offset(s, zt); 4759 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16); 4760 } 4761 } 4762 4763 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a) 4764 { 4765 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 4766 return false; 4767 } 4768 if (sve_access_check(s)) { 4769 int msz = dtype_msz(a->dtype); 4770 TCGv_i64 addr = tcg_temp_new_i64(); 4771 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz); 4772 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4773 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 4774 } 4775 return true; 4776 } 4777 4778 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a) 4779 { 4780 if (!dc_isar_feature(aa64_sve, s)) { 4781 return false; 4782 } 4783 if (sve_access_check(s)) { 4784 TCGv_i64 addr = tcg_temp_new_i64(); 4785 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16); 4786 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 4787 } 4788 return true; 4789 } 4790 4791 static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 4792 { 4793 unsigned vsz = vec_full_reg_size(s); 4794 unsigned vsz_r32; 4795 TCGv_ptr t_pg; 4796 int poff, doff; 4797 uint32_t desc; 4798 4799 if (vsz < 32) { 4800 /* 4801 * Note that this UNDEFINED check comes after CheckSVEEnabled() 4802 * in the ARM pseudocode, which is the sve_access_check() done 4803 * in our caller. We should not now return false from the caller. 4804 */ 4805 unallocated_encoding(s); 4806 return; 4807 } 4808 4809 /* Load the first octaword using the normal predicated load helpers. */ 4810 if (!s->mte_active[0]) { 4811 addr = clean_data_tbi(s, addr); 4812 } 4813 4814 poff = pred_full_reg_offset(s, pg); 4815 if (vsz > 32) { 4816 /* 4817 * Zero-extend the first 32 bits of the predicate into a temporary. 4818 * This avoids triggering an assert making sure we don't have bits 4819 * set within a predicate beyond VQ, but we have lowered VQ to 2 4820 * for this load operation. 4821 */ 4822 TCGv_i64 tmp = tcg_temp_new_i64(); 4823 #if HOST_BIG_ENDIAN 4824 poff += 4; 4825 #endif 4826 tcg_gen_ld32u_i64(tmp, tcg_env, poff); 4827 4828 poff = offsetof(CPUARMState, vfp.preg_tmp); 4829 tcg_gen_st_i64(tmp, tcg_env, poff); 4830 } 4831 4832 t_pg = tcg_temp_new_ptr(); 4833 tcg_gen_addi_ptr(t_pg, tcg_env, poff); 4834 4835 gen_helper_gvec_mem *fn 4836 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 4837 desc = make_svemte_desc(s, 32, 1, dtype_msz(dtype), false, zt); 4838 fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); 4839 4840 /* 4841 * Replicate that first octaword. 4842 * The replication happens in units of 32; if the full vector size 4843 * is not a multiple of 32, the final bits are zeroed. 4844 */ 4845 doff = vec_full_reg_offset(s, zt); 4846 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32); 4847 if (vsz >= 64) { 4848 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32); 4849 } 4850 vsz -= vsz_r32; 4851 if (vsz) { 4852 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0); 4853 } 4854 } 4855 4856 static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a) 4857 { 4858 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 4859 return false; 4860 } 4861 if (a->rm == 31) { 4862 return false; 4863 } 4864 s->is_nonstreaming = true; 4865 if (sve_access_check(s)) { 4866 TCGv_i64 addr = tcg_temp_new_i64(); 4867 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4868 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4869 do_ldro(s, a->rd, a->pg, addr, a->dtype); 4870 } 4871 return true; 4872 } 4873 4874 static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a) 4875 { 4876 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 4877 return false; 4878 } 4879 s->is_nonstreaming = true; 4880 if (sve_access_check(s)) { 4881 TCGv_i64 addr = tcg_temp_new_i64(); 4882 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32); 4883 do_ldro(s, a->rd, a->pg, addr, a->dtype); 4884 } 4885 return true; 4886 } 4887 4888 /* Load and broadcast element. */ 4889 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) 4890 { 4891 unsigned vsz = vec_full_reg_size(s); 4892 unsigned psz = pred_full_reg_size(s); 4893 unsigned esz = dtype_esz[a->dtype]; 4894 unsigned msz = dtype_msz(a->dtype); 4895 TCGLabel *over; 4896 TCGv_i64 temp, clean_addr; 4897 MemOp memop; 4898 4899 if (!dc_isar_feature(aa64_sve, s)) { 4900 return false; 4901 } 4902 if (!sve_access_check(s)) { 4903 return true; 4904 } 4905 4906 over = gen_new_label(); 4907 4908 /* If the guarding predicate has no bits set, no load occurs. */ 4909 if (psz <= 8) { 4910 /* Reduce the pred_esz_masks value simply to reduce the 4911 * size of the code generated here. 4912 */ 4913 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8); 4914 temp = tcg_temp_new_i64(); 4915 tcg_gen_ld_i64(temp, tcg_env, pred_full_reg_offset(s, a->pg)); 4916 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask); 4917 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over); 4918 } else { 4919 TCGv_i32 t32 = tcg_temp_new_i32(); 4920 find_last_active(s, t32, esz, a->pg); 4921 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over); 4922 } 4923 4924 /* Load the data. */ 4925 temp = tcg_temp_new_i64(); 4926 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz); 4927 4928 memop = finalize_memop(s, dtype_mop[a->dtype]); 4929 clean_addr = gen_mte_check1(s, temp, false, true, memop); 4930 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), memop); 4931 4932 /* Broadcast to *all* elements. */ 4933 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 4934 vsz, vsz, temp); 4935 4936 /* Zero the inactive elements. */ 4937 gen_set_label(over); 4938 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false); 4939 } 4940 4941 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 4942 int msz, int esz, int nreg) 4943 { 4944 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = { 4945 { { { gen_helper_sve_st1bb_r, 4946 gen_helper_sve_st1bh_r, 4947 gen_helper_sve_st1bs_r, 4948 gen_helper_sve_st1bd_r }, 4949 { NULL, 4950 gen_helper_sve_st1hh_le_r, 4951 gen_helper_sve_st1hs_le_r, 4952 gen_helper_sve_st1hd_le_r }, 4953 { NULL, NULL, 4954 gen_helper_sve_st1ss_le_r, 4955 gen_helper_sve_st1sd_le_r }, 4956 { NULL, NULL, NULL, 4957 gen_helper_sve_st1dd_le_r } }, 4958 { { gen_helper_sve_st1bb_r, 4959 gen_helper_sve_st1bh_r, 4960 gen_helper_sve_st1bs_r, 4961 gen_helper_sve_st1bd_r }, 4962 { NULL, 4963 gen_helper_sve_st1hh_be_r, 4964 gen_helper_sve_st1hs_be_r, 4965 gen_helper_sve_st1hd_be_r }, 4966 { NULL, NULL, 4967 gen_helper_sve_st1ss_be_r, 4968 gen_helper_sve_st1sd_be_r }, 4969 { NULL, NULL, NULL, 4970 gen_helper_sve_st1dd_be_r } } }, 4971 4972 { { { gen_helper_sve_st1bb_r_mte, 4973 gen_helper_sve_st1bh_r_mte, 4974 gen_helper_sve_st1bs_r_mte, 4975 gen_helper_sve_st1bd_r_mte }, 4976 { NULL, 4977 gen_helper_sve_st1hh_le_r_mte, 4978 gen_helper_sve_st1hs_le_r_mte, 4979 gen_helper_sve_st1hd_le_r_mte }, 4980 { NULL, NULL, 4981 gen_helper_sve_st1ss_le_r_mte, 4982 gen_helper_sve_st1sd_le_r_mte }, 4983 { NULL, NULL, NULL, 4984 gen_helper_sve_st1dd_le_r_mte } }, 4985 { { gen_helper_sve_st1bb_r_mte, 4986 gen_helper_sve_st1bh_r_mte, 4987 gen_helper_sve_st1bs_r_mte, 4988 gen_helper_sve_st1bd_r_mte }, 4989 { NULL, 4990 gen_helper_sve_st1hh_be_r_mte, 4991 gen_helper_sve_st1hs_be_r_mte, 4992 gen_helper_sve_st1hd_be_r_mte }, 4993 { NULL, NULL, 4994 gen_helper_sve_st1ss_be_r_mte, 4995 gen_helper_sve_st1sd_be_r_mte }, 4996 { NULL, NULL, NULL, 4997 gen_helper_sve_st1dd_be_r_mte } } }, 4998 }; 4999 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = { 5000 { { { gen_helper_sve_st2bb_r, 5001 gen_helper_sve_st2hh_le_r, 5002 gen_helper_sve_st2ss_le_r, 5003 gen_helper_sve_st2dd_le_r }, 5004 { gen_helper_sve_st3bb_r, 5005 gen_helper_sve_st3hh_le_r, 5006 gen_helper_sve_st3ss_le_r, 5007 gen_helper_sve_st3dd_le_r }, 5008 { gen_helper_sve_st4bb_r, 5009 gen_helper_sve_st4hh_le_r, 5010 gen_helper_sve_st4ss_le_r, 5011 gen_helper_sve_st4dd_le_r } }, 5012 { { gen_helper_sve_st2bb_r, 5013 gen_helper_sve_st2hh_be_r, 5014 gen_helper_sve_st2ss_be_r, 5015 gen_helper_sve_st2dd_be_r }, 5016 { gen_helper_sve_st3bb_r, 5017 gen_helper_sve_st3hh_be_r, 5018 gen_helper_sve_st3ss_be_r, 5019 gen_helper_sve_st3dd_be_r }, 5020 { gen_helper_sve_st4bb_r, 5021 gen_helper_sve_st4hh_be_r, 5022 gen_helper_sve_st4ss_be_r, 5023 gen_helper_sve_st4dd_be_r } } }, 5024 { { { gen_helper_sve_st2bb_r_mte, 5025 gen_helper_sve_st2hh_le_r_mte, 5026 gen_helper_sve_st2ss_le_r_mte, 5027 gen_helper_sve_st2dd_le_r_mte }, 5028 { gen_helper_sve_st3bb_r_mte, 5029 gen_helper_sve_st3hh_le_r_mte, 5030 gen_helper_sve_st3ss_le_r_mte, 5031 gen_helper_sve_st3dd_le_r_mte }, 5032 { gen_helper_sve_st4bb_r_mte, 5033 gen_helper_sve_st4hh_le_r_mte, 5034 gen_helper_sve_st4ss_le_r_mte, 5035 gen_helper_sve_st4dd_le_r_mte } }, 5036 { { gen_helper_sve_st2bb_r_mte, 5037 gen_helper_sve_st2hh_be_r_mte, 5038 gen_helper_sve_st2ss_be_r_mte, 5039 gen_helper_sve_st2dd_be_r_mte }, 5040 { gen_helper_sve_st3bb_r_mte, 5041 gen_helper_sve_st3hh_be_r_mte, 5042 gen_helper_sve_st3ss_be_r_mte, 5043 gen_helper_sve_st3dd_be_r_mte }, 5044 { gen_helper_sve_st4bb_r_mte, 5045 gen_helper_sve_st4hh_be_r_mte, 5046 gen_helper_sve_st4ss_be_r_mte, 5047 gen_helper_sve_st4dd_be_r_mte } } }, 5048 }; 5049 gen_helper_gvec_mem *fn; 5050 int be = s->be_data == MO_BE; 5051 5052 if (nreg == 0) { 5053 /* ST1 */ 5054 fn = fn_single[s->mte_active[0]][be][msz][esz]; 5055 } else { 5056 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */ 5057 assert(msz == esz); 5058 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz]; 5059 } 5060 assert(fn != NULL); 5061 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg + 1, true, fn); 5062 } 5063 5064 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) 5065 { 5066 if (!dc_isar_feature(aa64_sve, s)) { 5067 return false; 5068 } 5069 if (a->rm == 31 || a->msz > a->esz) { 5070 return false; 5071 } 5072 if (sve_access_check(s)) { 5073 TCGv_i64 addr = tcg_temp_new_i64(); 5074 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz); 5075 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5076 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5077 } 5078 return true; 5079 } 5080 5081 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a) 5082 { 5083 if (!dc_isar_feature(aa64_sve, s)) { 5084 return false; 5085 } 5086 if (a->msz > a->esz) { 5087 return false; 5088 } 5089 if (sve_access_check(s)) { 5090 int vsz = vec_full_reg_size(s); 5091 int elements = vsz >> a->esz; 5092 TCGv_i64 addr = tcg_temp_new_i64(); 5093 5094 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 5095 (a->imm * elements * (a->nreg + 1)) << a->msz); 5096 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5097 } 5098 return true; 5099 } 5100 5101 /* 5102 *** SVE gather loads / scatter stores 5103 */ 5104 5105 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, 5106 int scale, TCGv_i64 scalar, int msz, bool is_write, 5107 gen_helper_gvec_mem_scatter *fn) 5108 { 5109 TCGv_ptr t_zm = tcg_temp_new_ptr(); 5110 TCGv_ptr t_pg = tcg_temp_new_ptr(); 5111 TCGv_ptr t_zt = tcg_temp_new_ptr(); 5112 uint32_t desc; 5113 5114 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 5115 tcg_gen_addi_ptr(t_zm, tcg_env, vec_full_reg_offset(s, zm)); 5116 tcg_gen_addi_ptr(t_zt, tcg_env, vec_full_reg_offset(s, zt)); 5117 5118 desc = make_svemte_desc(s, vec_full_reg_size(s), 1, msz, is_write, scale); 5119 fn(tcg_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc)); 5120 } 5121 5122 /* Indexed by [mte][be][ff][xs][u][msz]. */ 5123 static gen_helper_gvec_mem_scatter * const 5124 gather_load_fn32[2][2][2][2][2][3] = { 5125 { /* MTE Inactive */ 5126 { /* Little-endian */ 5127 { { { gen_helper_sve_ldbss_zsu, 5128 gen_helper_sve_ldhss_le_zsu, 5129 NULL, }, 5130 { gen_helper_sve_ldbsu_zsu, 5131 gen_helper_sve_ldhsu_le_zsu, 5132 gen_helper_sve_ldss_le_zsu, } }, 5133 { { gen_helper_sve_ldbss_zss, 5134 gen_helper_sve_ldhss_le_zss, 5135 NULL, }, 5136 { gen_helper_sve_ldbsu_zss, 5137 gen_helper_sve_ldhsu_le_zss, 5138 gen_helper_sve_ldss_le_zss, } } }, 5139 5140 /* First-fault */ 5141 { { { gen_helper_sve_ldffbss_zsu, 5142 gen_helper_sve_ldffhss_le_zsu, 5143 NULL, }, 5144 { gen_helper_sve_ldffbsu_zsu, 5145 gen_helper_sve_ldffhsu_le_zsu, 5146 gen_helper_sve_ldffss_le_zsu, } }, 5147 { { gen_helper_sve_ldffbss_zss, 5148 gen_helper_sve_ldffhss_le_zss, 5149 NULL, }, 5150 { gen_helper_sve_ldffbsu_zss, 5151 gen_helper_sve_ldffhsu_le_zss, 5152 gen_helper_sve_ldffss_le_zss, } } } }, 5153 5154 { /* Big-endian */ 5155 { { { gen_helper_sve_ldbss_zsu, 5156 gen_helper_sve_ldhss_be_zsu, 5157 NULL, }, 5158 { gen_helper_sve_ldbsu_zsu, 5159 gen_helper_sve_ldhsu_be_zsu, 5160 gen_helper_sve_ldss_be_zsu, } }, 5161 { { gen_helper_sve_ldbss_zss, 5162 gen_helper_sve_ldhss_be_zss, 5163 NULL, }, 5164 { gen_helper_sve_ldbsu_zss, 5165 gen_helper_sve_ldhsu_be_zss, 5166 gen_helper_sve_ldss_be_zss, } } }, 5167 5168 /* First-fault */ 5169 { { { gen_helper_sve_ldffbss_zsu, 5170 gen_helper_sve_ldffhss_be_zsu, 5171 NULL, }, 5172 { gen_helper_sve_ldffbsu_zsu, 5173 gen_helper_sve_ldffhsu_be_zsu, 5174 gen_helper_sve_ldffss_be_zsu, } }, 5175 { { gen_helper_sve_ldffbss_zss, 5176 gen_helper_sve_ldffhss_be_zss, 5177 NULL, }, 5178 { gen_helper_sve_ldffbsu_zss, 5179 gen_helper_sve_ldffhsu_be_zss, 5180 gen_helper_sve_ldffss_be_zss, } } } } }, 5181 { /* MTE Active */ 5182 { /* Little-endian */ 5183 { { { gen_helper_sve_ldbss_zsu_mte, 5184 gen_helper_sve_ldhss_le_zsu_mte, 5185 NULL, }, 5186 { gen_helper_sve_ldbsu_zsu_mte, 5187 gen_helper_sve_ldhsu_le_zsu_mte, 5188 gen_helper_sve_ldss_le_zsu_mte, } }, 5189 { { gen_helper_sve_ldbss_zss_mte, 5190 gen_helper_sve_ldhss_le_zss_mte, 5191 NULL, }, 5192 { gen_helper_sve_ldbsu_zss_mte, 5193 gen_helper_sve_ldhsu_le_zss_mte, 5194 gen_helper_sve_ldss_le_zss_mte, } } }, 5195 5196 /* First-fault */ 5197 { { { gen_helper_sve_ldffbss_zsu_mte, 5198 gen_helper_sve_ldffhss_le_zsu_mte, 5199 NULL, }, 5200 { gen_helper_sve_ldffbsu_zsu_mte, 5201 gen_helper_sve_ldffhsu_le_zsu_mte, 5202 gen_helper_sve_ldffss_le_zsu_mte, } }, 5203 { { gen_helper_sve_ldffbss_zss_mte, 5204 gen_helper_sve_ldffhss_le_zss_mte, 5205 NULL, }, 5206 { gen_helper_sve_ldffbsu_zss_mte, 5207 gen_helper_sve_ldffhsu_le_zss_mte, 5208 gen_helper_sve_ldffss_le_zss_mte, } } } }, 5209 5210 { /* Big-endian */ 5211 { { { gen_helper_sve_ldbss_zsu_mte, 5212 gen_helper_sve_ldhss_be_zsu_mte, 5213 NULL, }, 5214 { gen_helper_sve_ldbsu_zsu_mte, 5215 gen_helper_sve_ldhsu_be_zsu_mte, 5216 gen_helper_sve_ldss_be_zsu_mte, } }, 5217 { { gen_helper_sve_ldbss_zss_mte, 5218 gen_helper_sve_ldhss_be_zss_mte, 5219 NULL, }, 5220 { gen_helper_sve_ldbsu_zss_mte, 5221 gen_helper_sve_ldhsu_be_zss_mte, 5222 gen_helper_sve_ldss_be_zss_mte, } } }, 5223 5224 /* First-fault */ 5225 { { { gen_helper_sve_ldffbss_zsu_mte, 5226 gen_helper_sve_ldffhss_be_zsu_mte, 5227 NULL, }, 5228 { gen_helper_sve_ldffbsu_zsu_mte, 5229 gen_helper_sve_ldffhsu_be_zsu_mte, 5230 gen_helper_sve_ldffss_be_zsu_mte, } }, 5231 { { gen_helper_sve_ldffbss_zss_mte, 5232 gen_helper_sve_ldffhss_be_zss_mte, 5233 NULL, }, 5234 { gen_helper_sve_ldffbsu_zss_mte, 5235 gen_helper_sve_ldffhsu_be_zss_mte, 5236 gen_helper_sve_ldffss_be_zss_mte, } } } } }, 5237 }; 5238 5239 /* Note that we overload xs=2 to indicate 64-bit offset. */ 5240 static gen_helper_gvec_mem_scatter * const 5241 gather_load_fn64[2][2][2][3][2][4] = { 5242 { /* MTE Inactive */ 5243 { /* Little-endian */ 5244 { { { gen_helper_sve_ldbds_zsu, 5245 gen_helper_sve_ldhds_le_zsu, 5246 gen_helper_sve_ldsds_le_zsu, 5247 NULL, }, 5248 { gen_helper_sve_ldbdu_zsu, 5249 gen_helper_sve_ldhdu_le_zsu, 5250 gen_helper_sve_ldsdu_le_zsu, 5251 gen_helper_sve_lddd_le_zsu, } }, 5252 { { gen_helper_sve_ldbds_zss, 5253 gen_helper_sve_ldhds_le_zss, 5254 gen_helper_sve_ldsds_le_zss, 5255 NULL, }, 5256 { gen_helper_sve_ldbdu_zss, 5257 gen_helper_sve_ldhdu_le_zss, 5258 gen_helper_sve_ldsdu_le_zss, 5259 gen_helper_sve_lddd_le_zss, } }, 5260 { { gen_helper_sve_ldbds_zd, 5261 gen_helper_sve_ldhds_le_zd, 5262 gen_helper_sve_ldsds_le_zd, 5263 NULL, }, 5264 { gen_helper_sve_ldbdu_zd, 5265 gen_helper_sve_ldhdu_le_zd, 5266 gen_helper_sve_ldsdu_le_zd, 5267 gen_helper_sve_lddd_le_zd, } } }, 5268 5269 /* First-fault */ 5270 { { { gen_helper_sve_ldffbds_zsu, 5271 gen_helper_sve_ldffhds_le_zsu, 5272 gen_helper_sve_ldffsds_le_zsu, 5273 NULL, }, 5274 { gen_helper_sve_ldffbdu_zsu, 5275 gen_helper_sve_ldffhdu_le_zsu, 5276 gen_helper_sve_ldffsdu_le_zsu, 5277 gen_helper_sve_ldffdd_le_zsu, } }, 5278 { { gen_helper_sve_ldffbds_zss, 5279 gen_helper_sve_ldffhds_le_zss, 5280 gen_helper_sve_ldffsds_le_zss, 5281 NULL, }, 5282 { gen_helper_sve_ldffbdu_zss, 5283 gen_helper_sve_ldffhdu_le_zss, 5284 gen_helper_sve_ldffsdu_le_zss, 5285 gen_helper_sve_ldffdd_le_zss, } }, 5286 { { gen_helper_sve_ldffbds_zd, 5287 gen_helper_sve_ldffhds_le_zd, 5288 gen_helper_sve_ldffsds_le_zd, 5289 NULL, }, 5290 { gen_helper_sve_ldffbdu_zd, 5291 gen_helper_sve_ldffhdu_le_zd, 5292 gen_helper_sve_ldffsdu_le_zd, 5293 gen_helper_sve_ldffdd_le_zd, } } } }, 5294 { /* Big-endian */ 5295 { { { gen_helper_sve_ldbds_zsu, 5296 gen_helper_sve_ldhds_be_zsu, 5297 gen_helper_sve_ldsds_be_zsu, 5298 NULL, }, 5299 { gen_helper_sve_ldbdu_zsu, 5300 gen_helper_sve_ldhdu_be_zsu, 5301 gen_helper_sve_ldsdu_be_zsu, 5302 gen_helper_sve_lddd_be_zsu, } }, 5303 { { gen_helper_sve_ldbds_zss, 5304 gen_helper_sve_ldhds_be_zss, 5305 gen_helper_sve_ldsds_be_zss, 5306 NULL, }, 5307 { gen_helper_sve_ldbdu_zss, 5308 gen_helper_sve_ldhdu_be_zss, 5309 gen_helper_sve_ldsdu_be_zss, 5310 gen_helper_sve_lddd_be_zss, } }, 5311 { { gen_helper_sve_ldbds_zd, 5312 gen_helper_sve_ldhds_be_zd, 5313 gen_helper_sve_ldsds_be_zd, 5314 NULL, }, 5315 { gen_helper_sve_ldbdu_zd, 5316 gen_helper_sve_ldhdu_be_zd, 5317 gen_helper_sve_ldsdu_be_zd, 5318 gen_helper_sve_lddd_be_zd, } } }, 5319 5320 /* First-fault */ 5321 { { { gen_helper_sve_ldffbds_zsu, 5322 gen_helper_sve_ldffhds_be_zsu, 5323 gen_helper_sve_ldffsds_be_zsu, 5324 NULL, }, 5325 { gen_helper_sve_ldffbdu_zsu, 5326 gen_helper_sve_ldffhdu_be_zsu, 5327 gen_helper_sve_ldffsdu_be_zsu, 5328 gen_helper_sve_ldffdd_be_zsu, } }, 5329 { { gen_helper_sve_ldffbds_zss, 5330 gen_helper_sve_ldffhds_be_zss, 5331 gen_helper_sve_ldffsds_be_zss, 5332 NULL, }, 5333 { gen_helper_sve_ldffbdu_zss, 5334 gen_helper_sve_ldffhdu_be_zss, 5335 gen_helper_sve_ldffsdu_be_zss, 5336 gen_helper_sve_ldffdd_be_zss, } }, 5337 { { gen_helper_sve_ldffbds_zd, 5338 gen_helper_sve_ldffhds_be_zd, 5339 gen_helper_sve_ldffsds_be_zd, 5340 NULL, }, 5341 { gen_helper_sve_ldffbdu_zd, 5342 gen_helper_sve_ldffhdu_be_zd, 5343 gen_helper_sve_ldffsdu_be_zd, 5344 gen_helper_sve_ldffdd_be_zd, } } } } }, 5345 { /* MTE Active */ 5346 { /* Little-endian */ 5347 { { { gen_helper_sve_ldbds_zsu_mte, 5348 gen_helper_sve_ldhds_le_zsu_mte, 5349 gen_helper_sve_ldsds_le_zsu_mte, 5350 NULL, }, 5351 { gen_helper_sve_ldbdu_zsu_mte, 5352 gen_helper_sve_ldhdu_le_zsu_mte, 5353 gen_helper_sve_ldsdu_le_zsu_mte, 5354 gen_helper_sve_lddd_le_zsu_mte, } }, 5355 { { gen_helper_sve_ldbds_zss_mte, 5356 gen_helper_sve_ldhds_le_zss_mte, 5357 gen_helper_sve_ldsds_le_zss_mte, 5358 NULL, }, 5359 { gen_helper_sve_ldbdu_zss_mte, 5360 gen_helper_sve_ldhdu_le_zss_mte, 5361 gen_helper_sve_ldsdu_le_zss_mte, 5362 gen_helper_sve_lddd_le_zss_mte, } }, 5363 { { gen_helper_sve_ldbds_zd_mte, 5364 gen_helper_sve_ldhds_le_zd_mte, 5365 gen_helper_sve_ldsds_le_zd_mte, 5366 NULL, }, 5367 { gen_helper_sve_ldbdu_zd_mte, 5368 gen_helper_sve_ldhdu_le_zd_mte, 5369 gen_helper_sve_ldsdu_le_zd_mte, 5370 gen_helper_sve_lddd_le_zd_mte, } } }, 5371 5372 /* First-fault */ 5373 { { { gen_helper_sve_ldffbds_zsu_mte, 5374 gen_helper_sve_ldffhds_le_zsu_mte, 5375 gen_helper_sve_ldffsds_le_zsu_mte, 5376 NULL, }, 5377 { gen_helper_sve_ldffbdu_zsu_mte, 5378 gen_helper_sve_ldffhdu_le_zsu_mte, 5379 gen_helper_sve_ldffsdu_le_zsu_mte, 5380 gen_helper_sve_ldffdd_le_zsu_mte, } }, 5381 { { gen_helper_sve_ldffbds_zss_mte, 5382 gen_helper_sve_ldffhds_le_zss_mte, 5383 gen_helper_sve_ldffsds_le_zss_mte, 5384 NULL, }, 5385 { gen_helper_sve_ldffbdu_zss_mte, 5386 gen_helper_sve_ldffhdu_le_zss_mte, 5387 gen_helper_sve_ldffsdu_le_zss_mte, 5388 gen_helper_sve_ldffdd_le_zss_mte, } }, 5389 { { gen_helper_sve_ldffbds_zd_mte, 5390 gen_helper_sve_ldffhds_le_zd_mte, 5391 gen_helper_sve_ldffsds_le_zd_mte, 5392 NULL, }, 5393 { gen_helper_sve_ldffbdu_zd_mte, 5394 gen_helper_sve_ldffhdu_le_zd_mte, 5395 gen_helper_sve_ldffsdu_le_zd_mte, 5396 gen_helper_sve_ldffdd_le_zd_mte, } } } }, 5397 { /* Big-endian */ 5398 { { { gen_helper_sve_ldbds_zsu_mte, 5399 gen_helper_sve_ldhds_be_zsu_mte, 5400 gen_helper_sve_ldsds_be_zsu_mte, 5401 NULL, }, 5402 { gen_helper_sve_ldbdu_zsu_mte, 5403 gen_helper_sve_ldhdu_be_zsu_mte, 5404 gen_helper_sve_ldsdu_be_zsu_mte, 5405 gen_helper_sve_lddd_be_zsu_mte, } }, 5406 { { gen_helper_sve_ldbds_zss_mte, 5407 gen_helper_sve_ldhds_be_zss_mte, 5408 gen_helper_sve_ldsds_be_zss_mte, 5409 NULL, }, 5410 { gen_helper_sve_ldbdu_zss_mte, 5411 gen_helper_sve_ldhdu_be_zss_mte, 5412 gen_helper_sve_ldsdu_be_zss_mte, 5413 gen_helper_sve_lddd_be_zss_mte, } }, 5414 { { gen_helper_sve_ldbds_zd_mte, 5415 gen_helper_sve_ldhds_be_zd_mte, 5416 gen_helper_sve_ldsds_be_zd_mte, 5417 NULL, }, 5418 { gen_helper_sve_ldbdu_zd_mte, 5419 gen_helper_sve_ldhdu_be_zd_mte, 5420 gen_helper_sve_ldsdu_be_zd_mte, 5421 gen_helper_sve_lddd_be_zd_mte, } } }, 5422 5423 /* First-fault */ 5424 { { { gen_helper_sve_ldffbds_zsu_mte, 5425 gen_helper_sve_ldffhds_be_zsu_mte, 5426 gen_helper_sve_ldffsds_be_zsu_mte, 5427 NULL, }, 5428 { gen_helper_sve_ldffbdu_zsu_mte, 5429 gen_helper_sve_ldffhdu_be_zsu_mte, 5430 gen_helper_sve_ldffsdu_be_zsu_mte, 5431 gen_helper_sve_ldffdd_be_zsu_mte, } }, 5432 { { gen_helper_sve_ldffbds_zss_mte, 5433 gen_helper_sve_ldffhds_be_zss_mte, 5434 gen_helper_sve_ldffsds_be_zss_mte, 5435 NULL, }, 5436 { gen_helper_sve_ldffbdu_zss_mte, 5437 gen_helper_sve_ldffhdu_be_zss_mte, 5438 gen_helper_sve_ldffsdu_be_zss_mte, 5439 gen_helper_sve_ldffdd_be_zss_mte, } }, 5440 { { gen_helper_sve_ldffbds_zd_mte, 5441 gen_helper_sve_ldffhds_be_zd_mte, 5442 gen_helper_sve_ldffsds_be_zd_mte, 5443 NULL, }, 5444 { gen_helper_sve_ldffbdu_zd_mte, 5445 gen_helper_sve_ldffhdu_be_zd_mte, 5446 gen_helper_sve_ldffsdu_be_zd_mte, 5447 gen_helper_sve_ldffdd_be_zd_mte, } } } } }, 5448 }; 5449 5450 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) 5451 { 5452 gen_helper_gvec_mem_scatter *fn = NULL; 5453 bool be = s->be_data == MO_BE; 5454 bool mte = s->mte_active[0]; 5455 5456 if (!dc_isar_feature(aa64_sve, s)) { 5457 return false; 5458 } 5459 s->is_nonstreaming = true; 5460 if (!sve_access_check(s)) { 5461 return true; 5462 } 5463 5464 switch (a->esz) { 5465 case MO_32: 5466 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz]; 5467 break; 5468 case MO_64: 5469 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz]; 5470 break; 5471 } 5472 assert(fn != NULL); 5473 5474 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 5475 cpu_reg_sp(s, a->rn), a->msz, false, fn); 5476 return true; 5477 } 5478 5479 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) 5480 { 5481 gen_helper_gvec_mem_scatter *fn = NULL; 5482 bool be = s->be_data == MO_BE; 5483 bool mte = s->mte_active[0]; 5484 5485 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) { 5486 return false; 5487 } 5488 if (!dc_isar_feature(aa64_sve, s)) { 5489 return false; 5490 } 5491 s->is_nonstreaming = true; 5492 if (!sve_access_check(s)) { 5493 return true; 5494 } 5495 5496 switch (a->esz) { 5497 case MO_32: 5498 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz]; 5499 break; 5500 case MO_64: 5501 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz]; 5502 break; 5503 } 5504 assert(fn != NULL); 5505 5506 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x]) 5507 * by loading the immediate into the scalar parameter. 5508 */ 5509 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5510 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn); 5511 return true; 5512 } 5513 5514 static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a) 5515 { 5516 gen_helper_gvec_mem_scatter *fn = NULL; 5517 bool be = s->be_data == MO_BE; 5518 bool mte = s->mte_active[0]; 5519 5520 if (a->esz < a->msz + !a->u) { 5521 return false; 5522 } 5523 if (!dc_isar_feature(aa64_sve2, s)) { 5524 return false; 5525 } 5526 s->is_nonstreaming = true; 5527 if (!sve_access_check(s)) { 5528 return true; 5529 } 5530 5531 switch (a->esz) { 5532 case MO_32: 5533 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz]; 5534 break; 5535 case MO_64: 5536 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz]; 5537 break; 5538 } 5539 assert(fn != NULL); 5540 5541 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5542 cpu_reg(s, a->rm), a->msz, false, fn); 5543 return true; 5544 } 5545 5546 /* Indexed by [mte][be][xs][msz]. */ 5547 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = { 5548 { /* MTE Inactive */ 5549 { /* Little-endian */ 5550 { gen_helper_sve_stbs_zsu, 5551 gen_helper_sve_sths_le_zsu, 5552 gen_helper_sve_stss_le_zsu, }, 5553 { gen_helper_sve_stbs_zss, 5554 gen_helper_sve_sths_le_zss, 5555 gen_helper_sve_stss_le_zss, } }, 5556 { /* Big-endian */ 5557 { gen_helper_sve_stbs_zsu, 5558 gen_helper_sve_sths_be_zsu, 5559 gen_helper_sve_stss_be_zsu, }, 5560 { gen_helper_sve_stbs_zss, 5561 gen_helper_sve_sths_be_zss, 5562 gen_helper_sve_stss_be_zss, } } }, 5563 { /* MTE Active */ 5564 { /* Little-endian */ 5565 { gen_helper_sve_stbs_zsu_mte, 5566 gen_helper_sve_sths_le_zsu_mte, 5567 gen_helper_sve_stss_le_zsu_mte, }, 5568 { gen_helper_sve_stbs_zss_mte, 5569 gen_helper_sve_sths_le_zss_mte, 5570 gen_helper_sve_stss_le_zss_mte, } }, 5571 { /* Big-endian */ 5572 { gen_helper_sve_stbs_zsu_mte, 5573 gen_helper_sve_sths_be_zsu_mte, 5574 gen_helper_sve_stss_be_zsu_mte, }, 5575 { gen_helper_sve_stbs_zss_mte, 5576 gen_helper_sve_sths_be_zss_mte, 5577 gen_helper_sve_stss_be_zss_mte, } } }, 5578 }; 5579 5580 /* Note that we overload xs=2 to indicate 64-bit offset. */ 5581 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = { 5582 { /* MTE Inactive */ 5583 { /* Little-endian */ 5584 { gen_helper_sve_stbd_zsu, 5585 gen_helper_sve_sthd_le_zsu, 5586 gen_helper_sve_stsd_le_zsu, 5587 gen_helper_sve_stdd_le_zsu, }, 5588 { gen_helper_sve_stbd_zss, 5589 gen_helper_sve_sthd_le_zss, 5590 gen_helper_sve_stsd_le_zss, 5591 gen_helper_sve_stdd_le_zss, }, 5592 { gen_helper_sve_stbd_zd, 5593 gen_helper_sve_sthd_le_zd, 5594 gen_helper_sve_stsd_le_zd, 5595 gen_helper_sve_stdd_le_zd, } }, 5596 { /* Big-endian */ 5597 { gen_helper_sve_stbd_zsu, 5598 gen_helper_sve_sthd_be_zsu, 5599 gen_helper_sve_stsd_be_zsu, 5600 gen_helper_sve_stdd_be_zsu, }, 5601 { gen_helper_sve_stbd_zss, 5602 gen_helper_sve_sthd_be_zss, 5603 gen_helper_sve_stsd_be_zss, 5604 gen_helper_sve_stdd_be_zss, }, 5605 { gen_helper_sve_stbd_zd, 5606 gen_helper_sve_sthd_be_zd, 5607 gen_helper_sve_stsd_be_zd, 5608 gen_helper_sve_stdd_be_zd, } } }, 5609 { /* MTE Inactive */ 5610 { /* Little-endian */ 5611 { gen_helper_sve_stbd_zsu_mte, 5612 gen_helper_sve_sthd_le_zsu_mte, 5613 gen_helper_sve_stsd_le_zsu_mte, 5614 gen_helper_sve_stdd_le_zsu_mte, }, 5615 { gen_helper_sve_stbd_zss_mte, 5616 gen_helper_sve_sthd_le_zss_mte, 5617 gen_helper_sve_stsd_le_zss_mte, 5618 gen_helper_sve_stdd_le_zss_mte, }, 5619 { gen_helper_sve_stbd_zd_mte, 5620 gen_helper_sve_sthd_le_zd_mte, 5621 gen_helper_sve_stsd_le_zd_mte, 5622 gen_helper_sve_stdd_le_zd_mte, } }, 5623 { /* Big-endian */ 5624 { gen_helper_sve_stbd_zsu_mte, 5625 gen_helper_sve_sthd_be_zsu_mte, 5626 gen_helper_sve_stsd_be_zsu_mte, 5627 gen_helper_sve_stdd_be_zsu_mte, }, 5628 { gen_helper_sve_stbd_zss_mte, 5629 gen_helper_sve_sthd_be_zss_mte, 5630 gen_helper_sve_stsd_be_zss_mte, 5631 gen_helper_sve_stdd_be_zss_mte, }, 5632 { gen_helper_sve_stbd_zd_mte, 5633 gen_helper_sve_sthd_be_zd_mte, 5634 gen_helper_sve_stsd_be_zd_mte, 5635 gen_helper_sve_stdd_be_zd_mte, } } }, 5636 }; 5637 5638 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) 5639 { 5640 gen_helper_gvec_mem_scatter *fn; 5641 bool be = s->be_data == MO_BE; 5642 bool mte = s->mte_active[0]; 5643 5644 if (a->esz < a->msz || (a->msz == 0 && a->scale)) { 5645 return false; 5646 } 5647 if (!dc_isar_feature(aa64_sve, s)) { 5648 return false; 5649 } 5650 s->is_nonstreaming = true; 5651 if (!sve_access_check(s)) { 5652 return true; 5653 } 5654 switch (a->esz) { 5655 case MO_32: 5656 fn = scatter_store_fn32[mte][be][a->xs][a->msz]; 5657 break; 5658 case MO_64: 5659 fn = scatter_store_fn64[mte][be][a->xs][a->msz]; 5660 break; 5661 default: 5662 g_assert_not_reached(); 5663 } 5664 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 5665 cpu_reg_sp(s, a->rn), a->msz, true, fn); 5666 return true; 5667 } 5668 5669 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) 5670 { 5671 gen_helper_gvec_mem_scatter *fn = NULL; 5672 bool be = s->be_data == MO_BE; 5673 bool mte = s->mte_active[0]; 5674 5675 if (a->esz < a->msz) { 5676 return false; 5677 } 5678 if (!dc_isar_feature(aa64_sve, s)) { 5679 return false; 5680 } 5681 s->is_nonstreaming = true; 5682 if (!sve_access_check(s)) { 5683 return true; 5684 } 5685 5686 switch (a->esz) { 5687 case MO_32: 5688 fn = scatter_store_fn32[mte][be][0][a->msz]; 5689 break; 5690 case MO_64: 5691 fn = scatter_store_fn64[mte][be][2][a->msz]; 5692 break; 5693 } 5694 assert(fn != NULL); 5695 5696 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x]) 5697 * by loading the immediate into the scalar parameter. 5698 */ 5699 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5700 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn); 5701 return true; 5702 } 5703 5704 static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a) 5705 { 5706 gen_helper_gvec_mem_scatter *fn; 5707 bool be = s->be_data == MO_BE; 5708 bool mte = s->mte_active[0]; 5709 5710 if (a->esz < a->msz) { 5711 return false; 5712 } 5713 if (!dc_isar_feature(aa64_sve2, s)) { 5714 return false; 5715 } 5716 s->is_nonstreaming = true; 5717 if (!sve_access_check(s)) { 5718 return true; 5719 } 5720 5721 switch (a->esz) { 5722 case MO_32: 5723 fn = scatter_store_fn32[mte][be][0][a->msz]; 5724 break; 5725 case MO_64: 5726 fn = scatter_store_fn64[mte][be][2][a->msz]; 5727 break; 5728 default: 5729 g_assert_not_reached(); 5730 } 5731 5732 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5733 cpu_reg(s, a->rm), a->msz, true, fn); 5734 return true; 5735 } 5736 5737 /* 5738 * Prefetches 5739 */ 5740 5741 static bool trans_PRF(DisasContext *s, arg_PRF *a) 5742 { 5743 if (!dc_isar_feature(aa64_sve, s)) { 5744 return false; 5745 } 5746 /* Prefetch is a nop within QEMU. */ 5747 (void)sve_access_check(s); 5748 return true; 5749 } 5750 5751 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a) 5752 { 5753 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 5754 return false; 5755 } 5756 /* Prefetch is a nop within QEMU. */ 5757 (void)sve_access_check(s); 5758 return true; 5759 } 5760 5761 static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a) 5762 { 5763 if (!dc_isar_feature(aa64_sve, s)) { 5764 return false; 5765 } 5766 /* Prefetch is a nop within QEMU. */ 5767 s->is_nonstreaming = true; 5768 (void)sve_access_check(s); 5769 return true; 5770 } 5771 5772 /* 5773 * Move Prefix 5774 * 5775 * TODO: The implementation so far could handle predicated merging movprfx. 5776 * The helper functions as written take an extra source register to 5777 * use in the operation, but the result is only written when predication 5778 * succeeds. For unpredicated movprfx, we need to rearrange the helpers 5779 * to allow the final write back to the destination to be unconditional. 5780 * For predicated zeroing movprfx, we need to rearrange the helpers to 5781 * allow the final write back to zero inactives. 5782 * 5783 * In the meantime, just emit the moves. 5784 */ 5785 5786 TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn) 5787 TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz) 5788 TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false) 5789 5790 /* 5791 * SVE2 Integer Multiply - Unpredicated 5792 */ 5793 5794 TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a) 5795 5796 static gen_helper_gvec_3 * const smulh_zzz_fns[4] = { 5797 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h, 5798 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d, 5799 }; 5800 TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5801 smulh_zzz_fns[a->esz], a, 0) 5802 5803 static gen_helper_gvec_3 * const umulh_zzz_fns[4] = { 5804 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h, 5805 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d, 5806 }; 5807 TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5808 umulh_zzz_fns[a->esz], a, 0) 5809 5810 TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5811 gen_helper_gvec_pmul_b, a, 0) 5812 5813 static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = { 5814 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h, 5815 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d, 5816 }; 5817 TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5818 sqdmulh_zzz_fns[a->esz], a, 0) 5819 5820 static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = { 5821 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h, 5822 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d, 5823 }; 5824 TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5825 sqrdmulh_zzz_fns[a->esz], a, 0) 5826 5827 /* 5828 * SVE2 Integer - Predicated 5829 */ 5830 5831 static gen_helper_gvec_4 * const sadlp_fns[4] = { 5832 NULL, gen_helper_sve2_sadalp_zpzz_h, 5833 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d, 5834 }; 5835 TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 5836 sadlp_fns[a->esz], a, 0) 5837 5838 static gen_helper_gvec_4 * const uadlp_fns[4] = { 5839 NULL, gen_helper_sve2_uadalp_zpzz_h, 5840 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d, 5841 }; 5842 TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 5843 uadlp_fns[a->esz], a, 0) 5844 5845 /* 5846 * SVE2 integer unary operations (predicated) 5847 */ 5848 5849 TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz, 5850 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0) 5851 5852 TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz, 5853 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0) 5854 5855 static gen_helper_gvec_3 * const sqabs_fns[4] = { 5856 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h, 5857 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d, 5858 }; 5859 TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0) 5860 5861 static gen_helper_gvec_3 * const sqneg_fns[4] = { 5862 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h, 5863 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d, 5864 }; 5865 TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0) 5866 5867 DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl) 5868 DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl) 5869 DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl) 5870 5871 DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl) 5872 DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl) 5873 DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl) 5874 5875 DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd) 5876 DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd) 5877 DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub) 5878 5879 DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd) 5880 DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd) 5881 DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub) 5882 5883 DO_ZPZZ(ADDP, aa64_sve2, sve2_addp) 5884 DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp) 5885 DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp) 5886 DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp) 5887 DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp) 5888 5889 DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd) 5890 DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd) 5891 DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub) 5892 DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub) 5893 DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd) 5894 DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd) 5895 5896 /* 5897 * SVE2 Widening Integer Arithmetic 5898 */ 5899 5900 static gen_helper_gvec_3 * const saddl_fns[4] = { 5901 NULL, gen_helper_sve2_saddl_h, 5902 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d, 5903 }; 5904 TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 5905 saddl_fns[a->esz], a, 0) 5906 TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 5907 saddl_fns[a->esz], a, 3) 5908 TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 5909 saddl_fns[a->esz], a, 2) 5910 5911 static gen_helper_gvec_3 * const ssubl_fns[4] = { 5912 NULL, gen_helper_sve2_ssubl_h, 5913 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d, 5914 }; 5915 TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 5916 ssubl_fns[a->esz], a, 0) 5917 TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 5918 ssubl_fns[a->esz], a, 3) 5919 TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 5920 ssubl_fns[a->esz], a, 2) 5921 TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz, 5922 ssubl_fns[a->esz], a, 1) 5923 5924 static gen_helper_gvec_3 * const sabdl_fns[4] = { 5925 NULL, gen_helper_sve2_sabdl_h, 5926 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d, 5927 }; 5928 TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 5929 sabdl_fns[a->esz], a, 0) 5930 TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 5931 sabdl_fns[a->esz], a, 3) 5932 5933 static gen_helper_gvec_3 * const uaddl_fns[4] = { 5934 NULL, gen_helper_sve2_uaddl_h, 5935 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d, 5936 }; 5937 TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 5938 uaddl_fns[a->esz], a, 0) 5939 TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 5940 uaddl_fns[a->esz], a, 3) 5941 5942 static gen_helper_gvec_3 * const usubl_fns[4] = { 5943 NULL, gen_helper_sve2_usubl_h, 5944 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d, 5945 }; 5946 TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 5947 usubl_fns[a->esz], a, 0) 5948 TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 5949 usubl_fns[a->esz], a, 3) 5950 5951 static gen_helper_gvec_3 * const uabdl_fns[4] = { 5952 NULL, gen_helper_sve2_uabdl_h, 5953 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d, 5954 }; 5955 TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 5956 uabdl_fns[a->esz], a, 0) 5957 TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 5958 uabdl_fns[a->esz], a, 3) 5959 5960 static gen_helper_gvec_3 * const sqdmull_fns[4] = { 5961 NULL, gen_helper_sve2_sqdmull_zzz_h, 5962 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d, 5963 }; 5964 TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5965 sqdmull_fns[a->esz], a, 0) 5966 TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5967 sqdmull_fns[a->esz], a, 3) 5968 5969 static gen_helper_gvec_3 * const smull_fns[4] = { 5970 NULL, gen_helper_sve2_smull_zzz_h, 5971 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d, 5972 }; 5973 TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5974 smull_fns[a->esz], a, 0) 5975 TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5976 smull_fns[a->esz], a, 3) 5977 5978 static gen_helper_gvec_3 * const umull_fns[4] = { 5979 NULL, gen_helper_sve2_umull_zzz_h, 5980 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d, 5981 }; 5982 TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5983 umull_fns[a->esz], a, 0) 5984 TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5985 umull_fns[a->esz], a, 3) 5986 5987 static gen_helper_gvec_3 * const eoril_fns[4] = { 5988 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h, 5989 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d, 5990 }; 5991 TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2) 5992 TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1) 5993 5994 static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel) 5995 { 5996 static gen_helper_gvec_3 * const fns[4] = { 5997 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h, 5998 NULL, gen_helper_sve2_pmull_d, 5999 }; 6000 6001 if (a->esz == 0) { 6002 if (!dc_isar_feature(aa64_sve2_pmull128, s)) { 6003 return false; 6004 } 6005 s->is_nonstreaming = true; 6006 } else if (!dc_isar_feature(aa64_sve, s)) { 6007 return false; 6008 } 6009 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel); 6010 } 6011 6012 TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false) 6013 TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true) 6014 6015 static gen_helper_gvec_3 * const saddw_fns[4] = { 6016 NULL, gen_helper_sve2_saddw_h, 6017 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d, 6018 }; 6019 TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0) 6020 TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1) 6021 6022 static gen_helper_gvec_3 * const ssubw_fns[4] = { 6023 NULL, gen_helper_sve2_ssubw_h, 6024 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d, 6025 }; 6026 TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0) 6027 TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1) 6028 6029 static gen_helper_gvec_3 * const uaddw_fns[4] = { 6030 NULL, gen_helper_sve2_uaddw_h, 6031 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d, 6032 }; 6033 TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0) 6034 TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1) 6035 6036 static gen_helper_gvec_3 * const usubw_fns[4] = { 6037 NULL, gen_helper_sve2_usubw_h, 6038 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d, 6039 }; 6040 TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0) 6041 TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1) 6042 6043 static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6044 { 6045 int top = imm & 1; 6046 int shl = imm >> 1; 6047 int halfbits = 4 << vece; 6048 6049 if (top) { 6050 if (shl == halfbits) { 6051 TCGv_vec t = tcg_temp_new_vec_matching(d); 6052 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); 6053 tcg_gen_and_vec(vece, d, n, t); 6054 } else { 6055 tcg_gen_sari_vec(vece, d, n, halfbits); 6056 tcg_gen_shli_vec(vece, d, d, shl); 6057 } 6058 } else { 6059 tcg_gen_shli_vec(vece, d, n, halfbits); 6060 tcg_gen_sari_vec(vece, d, d, halfbits - shl); 6061 } 6062 } 6063 6064 static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm) 6065 { 6066 int halfbits = 4 << vece; 6067 int top = imm & 1; 6068 int shl = (imm >> 1); 6069 int shift; 6070 uint64_t mask; 6071 6072 mask = MAKE_64BIT_MASK(0, halfbits); 6073 mask <<= shl; 6074 mask = dup_const(vece, mask); 6075 6076 shift = shl - top * halfbits; 6077 if (shift < 0) { 6078 tcg_gen_shri_i64(d, n, -shift); 6079 } else { 6080 tcg_gen_shli_i64(d, n, shift); 6081 } 6082 tcg_gen_andi_i64(d, d, mask); 6083 } 6084 6085 static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6086 { 6087 gen_ushll_i64(MO_16, d, n, imm); 6088 } 6089 6090 static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6091 { 6092 gen_ushll_i64(MO_32, d, n, imm); 6093 } 6094 6095 static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6096 { 6097 gen_ushll_i64(MO_64, d, n, imm); 6098 } 6099 6100 static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6101 { 6102 int halfbits = 4 << vece; 6103 int top = imm & 1; 6104 int shl = imm >> 1; 6105 6106 if (top) { 6107 if (shl == halfbits) { 6108 TCGv_vec t = tcg_temp_new_vec_matching(d); 6109 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); 6110 tcg_gen_and_vec(vece, d, n, t); 6111 } else { 6112 tcg_gen_shri_vec(vece, d, n, halfbits); 6113 tcg_gen_shli_vec(vece, d, d, shl); 6114 } 6115 } else { 6116 if (shl == 0) { 6117 TCGv_vec t = tcg_temp_new_vec_matching(d); 6118 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6119 tcg_gen_and_vec(vece, d, n, t); 6120 } else { 6121 tcg_gen_shli_vec(vece, d, n, halfbits); 6122 tcg_gen_shri_vec(vece, d, d, halfbits - shl); 6123 } 6124 } 6125 } 6126 6127 static bool do_shll_tb(DisasContext *s, arg_rri_esz *a, 6128 const GVecGen2i ops[3], bool sel) 6129 { 6130 6131 if (a->esz < 0 || a->esz > 2) { 6132 return false; 6133 } 6134 if (sve_access_check(s)) { 6135 unsigned vsz = vec_full_reg_size(s); 6136 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 6137 vec_full_reg_offset(s, a->rn), 6138 vsz, vsz, (a->imm << 1) | sel, 6139 &ops[a->esz]); 6140 } 6141 return true; 6142 } 6143 6144 static const TCGOpcode sshll_list[] = { 6145 INDEX_op_shli_vec, INDEX_op_sari_vec, 0 6146 }; 6147 static const GVecGen2i sshll_ops[3] = { 6148 { .fniv = gen_sshll_vec, 6149 .opt_opc = sshll_list, 6150 .fno = gen_helper_sve2_sshll_h, 6151 .vece = MO_16 }, 6152 { .fniv = gen_sshll_vec, 6153 .opt_opc = sshll_list, 6154 .fno = gen_helper_sve2_sshll_s, 6155 .vece = MO_32 }, 6156 { .fniv = gen_sshll_vec, 6157 .opt_opc = sshll_list, 6158 .fno = gen_helper_sve2_sshll_d, 6159 .vece = MO_64 } 6160 }; 6161 TRANS_FEAT(SSHLLB, aa64_sve2, do_shll_tb, a, sshll_ops, false) 6162 TRANS_FEAT(SSHLLT, aa64_sve2, do_shll_tb, a, sshll_ops, true) 6163 6164 static const TCGOpcode ushll_list[] = { 6165 INDEX_op_shli_vec, INDEX_op_shri_vec, 0 6166 }; 6167 static const GVecGen2i ushll_ops[3] = { 6168 { .fni8 = gen_ushll16_i64, 6169 .fniv = gen_ushll_vec, 6170 .opt_opc = ushll_list, 6171 .fno = gen_helper_sve2_ushll_h, 6172 .vece = MO_16 }, 6173 { .fni8 = gen_ushll32_i64, 6174 .fniv = gen_ushll_vec, 6175 .opt_opc = ushll_list, 6176 .fno = gen_helper_sve2_ushll_s, 6177 .vece = MO_32 }, 6178 { .fni8 = gen_ushll64_i64, 6179 .fniv = gen_ushll_vec, 6180 .opt_opc = ushll_list, 6181 .fno = gen_helper_sve2_ushll_d, 6182 .vece = MO_64 }, 6183 }; 6184 TRANS_FEAT(USHLLB, aa64_sve2, do_shll_tb, a, ushll_ops, false) 6185 TRANS_FEAT(USHLLT, aa64_sve2, do_shll_tb, a, ushll_ops, true) 6186 6187 static gen_helper_gvec_3 * const bext_fns[4] = { 6188 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h, 6189 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d, 6190 }; 6191 TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6192 bext_fns[a->esz], a, 0) 6193 6194 static gen_helper_gvec_3 * const bdep_fns[4] = { 6195 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h, 6196 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d, 6197 }; 6198 TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6199 bdep_fns[a->esz], a, 0) 6200 6201 static gen_helper_gvec_3 * const bgrp_fns[4] = { 6202 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h, 6203 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d, 6204 }; 6205 TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6206 bgrp_fns[a->esz], a, 0) 6207 6208 static gen_helper_gvec_3 * const cadd_fns[4] = { 6209 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h, 6210 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d, 6211 }; 6212 TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6213 cadd_fns[a->esz], a, 0) 6214 TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6215 cadd_fns[a->esz], a, 1) 6216 6217 static gen_helper_gvec_3 * const sqcadd_fns[4] = { 6218 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h, 6219 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d, 6220 }; 6221 TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6222 sqcadd_fns[a->esz], a, 0) 6223 TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6224 sqcadd_fns[a->esz], a, 1) 6225 6226 static gen_helper_gvec_4 * const sabal_fns[4] = { 6227 NULL, gen_helper_sve2_sabal_h, 6228 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d, 6229 }; 6230 TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0) 6231 TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1) 6232 6233 static gen_helper_gvec_4 * const uabal_fns[4] = { 6234 NULL, gen_helper_sve2_uabal_h, 6235 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d, 6236 }; 6237 TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0) 6238 TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1) 6239 6240 static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel) 6241 { 6242 static gen_helper_gvec_4 * const fns[2] = { 6243 gen_helper_sve2_adcl_s, 6244 gen_helper_sve2_adcl_d, 6245 }; 6246 /* 6247 * Note that in this case the ESZ field encodes both size and sign. 6248 * Split out 'subtract' into bit 1 of the data field for the helper. 6249 */ 6250 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel); 6251 } 6252 6253 TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false) 6254 TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true) 6255 6256 TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a) 6257 TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a) 6258 TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a) 6259 TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a) 6260 TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a) 6261 TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a) 6262 6263 TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a) 6264 TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a) 6265 6266 static bool do_narrow_extract(DisasContext *s, arg_rri_esz *a, 6267 const GVecGen2 ops[3]) 6268 { 6269 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0) { 6270 return false; 6271 } 6272 if (sve_access_check(s)) { 6273 unsigned vsz = vec_full_reg_size(s); 6274 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd), 6275 vec_full_reg_offset(s, a->rn), 6276 vsz, vsz, &ops[a->esz]); 6277 } 6278 return true; 6279 } 6280 6281 static const TCGOpcode sqxtn_list[] = { 6282 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 6283 }; 6284 6285 static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6286 { 6287 TCGv_vec t = tcg_temp_new_vec_matching(d); 6288 int halfbits = 4 << vece; 6289 int64_t mask = (1ull << halfbits) - 1; 6290 int64_t min = -1ull << (halfbits - 1); 6291 int64_t max = -min - 1; 6292 6293 tcg_gen_dupi_vec(vece, t, min); 6294 tcg_gen_smax_vec(vece, d, n, t); 6295 tcg_gen_dupi_vec(vece, t, max); 6296 tcg_gen_smin_vec(vece, d, d, t); 6297 tcg_gen_dupi_vec(vece, t, mask); 6298 tcg_gen_and_vec(vece, d, d, t); 6299 } 6300 6301 static const GVecGen2 sqxtnb_ops[3] = { 6302 { .fniv = gen_sqxtnb_vec, 6303 .opt_opc = sqxtn_list, 6304 .fno = gen_helper_sve2_sqxtnb_h, 6305 .vece = MO_16 }, 6306 { .fniv = gen_sqxtnb_vec, 6307 .opt_opc = sqxtn_list, 6308 .fno = gen_helper_sve2_sqxtnb_s, 6309 .vece = MO_32 }, 6310 { .fniv = gen_sqxtnb_vec, 6311 .opt_opc = sqxtn_list, 6312 .fno = gen_helper_sve2_sqxtnb_d, 6313 .vece = MO_64 }, 6314 }; 6315 TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops) 6316 6317 static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6318 { 6319 TCGv_vec t = tcg_temp_new_vec_matching(d); 6320 int halfbits = 4 << vece; 6321 int64_t mask = (1ull << halfbits) - 1; 6322 int64_t min = -1ull << (halfbits - 1); 6323 int64_t max = -min - 1; 6324 6325 tcg_gen_dupi_vec(vece, t, min); 6326 tcg_gen_smax_vec(vece, n, n, t); 6327 tcg_gen_dupi_vec(vece, t, max); 6328 tcg_gen_smin_vec(vece, n, n, t); 6329 tcg_gen_shli_vec(vece, n, n, halfbits); 6330 tcg_gen_dupi_vec(vece, t, mask); 6331 tcg_gen_bitsel_vec(vece, d, t, d, n); 6332 } 6333 6334 static const GVecGen2 sqxtnt_ops[3] = { 6335 { .fniv = gen_sqxtnt_vec, 6336 .opt_opc = sqxtn_list, 6337 .load_dest = true, 6338 .fno = gen_helper_sve2_sqxtnt_h, 6339 .vece = MO_16 }, 6340 { .fniv = gen_sqxtnt_vec, 6341 .opt_opc = sqxtn_list, 6342 .load_dest = true, 6343 .fno = gen_helper_sve2_sqxtnt_s, 6344 .vece = MO_32 }, 6345 { .fniv = gen_sqxtnt_vec, 6346 .opt_opc = sqxtn_list, 6347 .load_dest = true, 6348 .fno = gen_helper_sve2_sqxtnt_d, 6349 .vece = MO_64 }, 6350 }; 6351 TRANS_FEAT(SQXTNT, aa64_sve2, do_narrow_extract, a, sqxtnt_ops) 6352 6353 static const TCGOpcode uqxtn_list[] = { 6354 INDEX_op_shli_vec, INDEX_op_umin_vec, 0 6355 }; 6356 6357 static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6358 { 6359 TCGv_vec t = tcg_temp_new_vec_matching(d); 6360 int halfbits = 4 << vece; 6361 int64_t max = (1ull << halfbits) - 1; 6362 6363 tcg_gen_dupi_vec(vece, t, max); 6364 tcg_gen_umin_vec(vece, d, n, t); 6365 } 6366 6367 static const GVecGen2 uqxtnb_ops[3] = { 6368 { .fniv = gen_uqxtnb_vec, 6369 .opt_opc = uqxtn_list, 6370 .fno = gen_helper_sve2_uqxtnb_h, 6371 .vece = MO_16 }, 6372 { .fniv = gen_uqxtnb_vec, 6373 .opt_opc = uqxtn_list, 6374 .fno = gen_helper_sve2_uqxtnb_s, 6375 .vece = MO_32 }, 6376 { .fniv = gen_uqxtnb_vec, 6377 .opt_opc = uqxtn_list, 6378 .fno = gen_helper_sve2_uqxtnb_d, 6379 .vece = MO_64 }, 6380 }; 6381 TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops) 6382 6383 static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6384 { 6385 TCGv_vec t = tcg_temp_new_vec_matching(d); 6386 int halfbits = 4 << vece; 6387 int64_t max = (1ull << halfbits) - 1; 6388 6389 tcg_gen_dupi_vec(vece, t, max); 6390 tcg_gen_umin_vec(vece, n, n, t); 6391 tcg_gen_shli_vec(vece, n, n, halfbits); 6392 tcg_gen_bitsel_vec(vece, d, t, d, n); 6393 } 6394 6395 static const GVecGen2 uqxtnt_ops[3] = { 6396 { .fniv = gen_uqxtnt_vec, 6397 .opt_opc = uqxtn_list, 6398 .load_dest = true, 6399 .fno = gen_helper_sve2_uqxtnt_h, 6400 .vece = MO_16 }, 6401 { .fniv = gen_uqxtnt_vec, 6402 .opt_opc = uqxtn_list, 6403 .load_dest = true, 6404 .fno = gen_helper_sve2_uqxtnt_s, 6405 .vece = MO_32 }, 6406 { .fniv = gen_uqxtnt_vec, 6407 .opt_opc = uqxtn_list, 6408 .load_dest = true, 6409 .fno = gen_helper_sve2_uqxtnt_d, 6410 .vece = MO_64 }, 6411 }; 6412 TRANS_FEAT(UQXTNT, aa64_sve2, do_narrow_extract, a, uqxtnt_ops) 6413 6414 static const TCGOpcode sqxtun_list[] = { 6415 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0 6416 }; 6417 6418 static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6419 { 6420 TCGv_vec t = tcg_temp_new_vec_matching(d); 6421 int halfbits = 4 << vece; 6422 int64_t max = (1ull << halfbits) - 1; 6423 6424 tcg_gen_dupi_vec(vece, t, 0); 6425 tcg_gen_smax_vec(vece, d, n, t); 6426 tcg_gen_dupi_vec(vece, t, max); 6427 tcg_gen_umin_vec(vece, d, d, t); 6428 } 6429 6430 static const GVecGen2 sqxtunb_ops[3] = { 6431 { .fniv = gen_sqxtunb_vec, 6432 .opt_opc = sqxtun_list, 6433 .fno = gen_helper_sve2_sqxtunb_h, 6434 .vece = MO_16 }, 6435 { .fniv = gen_sqxtunb_vec, 6436 .opt_opc = sqxtun_list, 6437 .fno = gen_helper_sve2_sqxtunb_s, 6438 .vece = MO_32 }, 6439 { .fniv = gen_sqxtunb_vec, 6440 .opt_opc = sqxtun_list, 6441 .fno = gen_helper_sve2_sqxtunb_d, 6442 .vece = MO_64 }, 6443 }; 6444 TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops) 6445 6446 static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6447 { 6448 TCGv_vec t = tcg_temp_new_vec_matching(d); 6449 int halfbits = 4 << vece; 6450 int64_t max = (1ull << halfbits) - 1; 6451 6452 tcg_gen_dupi_vec(vece, t, 0); 6453 tcg_gen_smax_vec(vece, n, n, t); 6454 tcg_gen_dupi_vec(vece, t, max); 6455 tcg_gen_umin_vec(vece, n, n, t); 6456 tcg_gen_shli_vec(vece, n, n, halfbits); 6457 tcg_gen_bitsel_vec(vece, d, t, d, n); 6458 } 6459 6460 static const GVecGen2 sqxtunt_ops[3] = { 6461 { .fniv = gen_sqxtunt_vec, 6462 .opt_opc = sqxtun_list, 6463 .load_dest = true, 6464 .fno = gen_helper_sve2_sqxtunt_h, 6465 .vece = MO_16 }, 6466 { .fniv = gen_sqxtunt_vec, 6467 .opt_opc = sqxtun_list, 6468 .load_dest = true, 6469 .fno = gen_helper_sve2_sqxtunt_s, 6470 .vece = MO_32 }, 6471 { .fniv = gen_sqxtunt_vec, 6472 .opt_opc = sqxtun_list, 6473 .load_dest = true, 6474 .fno = gen_helper_sve2_sqxtunt_d, 6475 .vece = MO_64 }, 6476 }; 6477 TRANS_FEAT(SQXTUNT, aa64_sve2, do_narrow_extract, a, sqxtunt_ops) 6478 6479 static bool do_shr_narrow(DisasContext *s, arg_rri_esz *a, 6480 const GVecGen2i ops[3]) 6481 { 6482 if (a->esz < 0 || a->esz > MO_32) { 6483 return false; 6484 } 6485 assert(a->imm > 0 && a->imm <= (8 << a->esz)); 6486 if (sve_access_check(s)) { 6487 unsigned vsz = vec_full_reg_size(s); 6488 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 6489 vec_full_reg_offset(s, a->rn), 6490 vsz, vsz, a->imm, &ops[a->esz]); 6491 } 6492 return true; 6493 } 6494 6495 static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 6496 { 6497 int halfbits = 4 << vece; 6498 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 6499 6500 tcg_gen_shri_i64(d, n, shr); 6501 tcg_gen_andi_i64(d, d, mask); 6502 } 6503 6504 static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6505 { 6506 gen_shrnb_i64(MO_16, d, n, shr); 6507 } 6508 6509 static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6510 { 6511 gen_shrnb_i64(MO_32, d, n, shr); 6512 } 6513 6514 static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6515 { 6516 gen_shrnb_i64(MO_64, d, n, shr); 6517 } 6518 6519 static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 6520 { 6521 TCGv_vec t = tcg_temp_new_vec_matching(d); 6522 int halfbits = 4 << vece; 6523 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 6524 6525 tcg_gen_shri_vec(vece, n, n, shr); 6526 tcg_gen_dupi_vec(vece, t, mask); 6527 tcg_gen_and_vec(vece, d, n, t); 6528 } 6529 6530 static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 }; 6531 static const GVecGen2i shrnb_ops[3] = { 6532 { .fni8 = gen_shrnb16_i64, 6533 .fniv = gen_shrnb_vec, 6534 .opt_opc = shrnb_vec_list, 6535 .fno = gen_helper_sve2_shrnb_h, 6536 .vece = MO_16 }, 6537 { .fni8 = gen_shrnb32_i64, 6538 .fniv = gen_shrnb_vec, 6539 .opt_opc = shrnb_vec_list, 6540 .fno = gen_helper_sve2_shrnb_s, 6541 .vece = MO_32 }, 6542 { .fni8 = gen_shrnb64_i64, 6543 .fniv = gen_shrnb_vec, 6544 .opt_opc = shrnb_vec_list, 6545 .fno = gen_helper_sve2_shrnb_d, 6546 .vece = MO_64 }, 6547 }; 6548 TRANS_FEAT(SHRNB, aa64_sve2, do_shr_narrow, a, shrnb_ops) 6549 6550 static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 6551 { 6552 int halfbits = 4 << vece; 6553 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 6554 6555 tcg_gen_shli_i64(n, n, halfbits - shr); 6556 tcg_gen_andi_i64(n, n, ~mask); 6557 tcg_gen_andi_i64(d, d, mask); 6558 tcg_gen_or_i64(d, d, n); 6559 } 6560 6561 static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6562 { 6563 gen_shrnt_i64(MO_16, d, n, shr); 6564 } 6565 6566 static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6567 { 6568 gen_shrnt_i64(MO_32, d, n, shr); 6569 } 6570 6571 static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6572 { 6573 tcg_gen_shri_i64(n, n, shr); 6574 tcg_gen_deposit_i64(d, d, n, 32, 32); 6575 } 6576 6577 static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 6578 { 6579 TCGv_vec t = tcg_temp_new_vec_matching(d); 6580 int halfbits = 4 << vece; 6581 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 6582 6583 tcg_gen_shli_vec(vece, n, n, halfbits - shr); 6584 tcg_gen_dupi_vec(vece, t, mask); 6585 tcg_gen_bitsel_vec(vece, d, t, d, n); 6586 } 6587 6588 static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 }; 6589 static const GVecGen2i shrnt_ops[3] = { 6590 { .fni8 = gen_shrnt16_i64, 6591 .fniv = gen_shrnt_vec, 6592 .opt_opc = shrnt_vec_list, 6593 .load_dest = true, 6594 .fno = gen_helper_sve2_shrnt_h, 6595 .vece = MO_16 }, 6596 { .fni8 = gen_shrnt32_i64, 6597 .fniv = gen_shrnt_vec, 6598 .opt_opc = shrnt_vec_list, 6599 .load_dest = true, 6600 .fno = gen_helper_sve2_shrnt_s, 6601 .vece = MO_32 }, 6602 { .fni8 = gen_shrnt64_i64, 6603 .fniv = gen_shrnt_vec, 6604 .opt_opc = shrnt_vec_list, 6605 .load_dest = true, 6606 .fno = gen_helper_sve2_shrnt_d, 6607 .vece = MO_64 }, 6608 }; 6609 TRANS_FEAT(SHRNT, aa64_sve2, do_shr_narrow, a, shrnt_ops) 6610 6611 static const GVecGen2i rshrnb_ops[3] = { 6612 { .fno = gen_helper_sve2_rshrnb_h }, 6613 { .fno = gen_helper_sve2_rshrnb_s }, 6614 { .fno = gen_helper_sve2_rshrnb_d }, 6615 }; 6616 TRANS_FEAT(RSHRNB, aa64_sve2, do_shr_narrow, a, rshrnb_ops) 6617 6618 static const GVecGen2i rshrnt_ops[3] = { 6619 { .fno = gen_helper_sve2_rshrnt_h }, 6620 { .fno = gen_helper_sve2_rshrnt_s }, 6621 { .fno = gen_helper_sve2_rshrnt_d }, 6622 }; 6623 TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops) 6624 6625 static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d, 6626 TCGv_vec n, int64_t shr) 6627 { 6628 TCGv_vec t = tcg_temp_new_vec_matching(d); 6629 int halfbits = 4 << vece; 6630 6631 tcg_gen_sari_vec(vece, n, n, shr); 6632 tcg_gen_dupi_vec(vece, t, 0); 6633 tcg_gen_smax_vec(vece, n, n, t); 6634 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6635 tcg_gen_umin_vec(vece, d, n, t); 6636 } 6637 6638 static const TCGOpcode sqshrunb_vec_list[] = { 6639 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0 6640 }; 6641 static const GVecGen2i sqshrunb_ops[3] = { 6642 { .fniv = gen_sqshrunb_vec, 6643 .opt_opc = sqshrunb_vec_list, 6644 .fno = gen_helper_sve2_sqshrunb_h, 6645 .vece = MO_16 }, 6646 { .fniv = gen_sqshrunb_vec, 6647 .opt_opc = sqshrunb_vec_list, 6648 .fno = gen_helper_sve2_sqshrunb_s, 6649 .vece = MO_32 }, 6650 { .fniv = gen_sqshrunb_vec, 6651 .opt_opc = sqshrunb_vec_list, 6652 .fno = gen_helper_sve2_sqshrunb_d, 6653 .vece = MO_64 }, 6654 }; 6655 TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops) 6656 6657 static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d, 6658 TCGv_vec n, int64_t shr) 6659 { 6660 TCGv_vec t = tcg_temp_new_vec_matching(d); 6661 int halfbits = 4 << vece; 6662 6663 tcg_gen_sari_vec(vece, n, n, shr); 6664 tcg_gen_dupi_vec(vece, t, 0); 6665 tcg_gen_smax_vec(vece, n, n, t); 6666 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6667 tcg_gen_umin_vec(vece, n, n, t); 6668 tcg_gen_shli_vec(vece, n, n, halfbits); 6669 tcg_gen_bitsel_vec(vece, d, t, d, n); 6670 } 6671 6672 static const TCGOpcode sqshrunt_vec_list[] = { 6673 INDEX_op_shli_vec, INDEX_op_sari_vec, 6674 INDEX_op_smax_vec, INDEX_op_umin_vec, 0 6675 }; 6676 static const GVecGen2i sqshrunt_ops[3] = { 6677 { .fniv = gen_sqshrunt_vec, 6678 .opt_opc = sqshrunt_vec_list, 6679 .load_dest = true, 6680 .fno = gen_helper_sve2_sqshrunt_h, 6681 .vece = MO_16 }, 6682 { .fniv = gen_sqshrunt_vec, 6683 .opt_opc = sqshrunt_vec_list, 6684 .load_dest = true, 6685 .fno = gen_helper_sve2_sqshrunt_s, 6686 .vece = MO_32 }, 6687 { .fniv = gen_sqshrunt_vec, 6688 .opt_opc = sqshrunt_vec_list, 6689 .load_dest = true, 6690 .fno = gen_helper_sve2_sqshrunt_d, 6691 .vece = MO_64 }, 6692 }; 6693 TRANS_FEAT(SQSHRUNT, aa64_sve2, do_shr_narrow, a, sqshrunt_ops) 6694 6695 static const GVecGen2i sqrshrunb_ops[3] = { 6696 { .fno = gen_helper_sve2_sqrshrunb_h }, 6697 { .fno = gen_helper_sve2_sqrshrunb_s }, 6698 { .fno = gen_helper_sve2_sqrshrunb_d }, 6699 }; 6700 TRANS_FEAT(SQRSHRUNB, aa64_sve2, do_shr_narrow, a, sqrshrunb_ops) 6701 6702 static const GVecGen2i sqrshrunt_ops[3] = { 6703 { .fno = gen_helper_sve2_sqrshrunt_h }, 6704 { .fno = gen_helper_sve2_sqrshrunt_s }, 6705 { .fno = gen_helper_sve2_sqrshrunt_d }, 6706 }; 6707 TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops) 6708 6709 static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d, 6710 TCGv_vec n, int64_t shr) 6711 { 6712 TCGv_vec t = tcg_temp_new_vec_matching(d); 6713 int halfbits = 4 << vece; 6714 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 6715 int64_t min = -max - 1; 6716 6717 tcg_gen_sari_vec(vece, n, n, shr); 6718 tcg_gen_dupi_vec(vece, t, min); 6719 tcg_gen_smax_vec(vece, n, n, t); 6720 tcg_gen_dupi_vec(vece, t, max); 6721 tcg_gen_smin_vec(vece, n, n, t); 6722 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6723 tcg_gen_and_vec(vece, d, n, t); 6724 } 6725 6726 static const TCGOpcode sqshrnb_vec_list[] = { 6727 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0 6728 }; 6729 static const GVecGen2i sqshrnb_ops[3] = { 6730 { .fniv = gen_sqshrnb_vec, 6731 .opt_opc = sqshrnb_vec_list, 6732 .fno = gen_helper_sve2_sqshrnb_h, 6733 .vece = MO_16 }, 6734 { .fniv = gen_sqshrnb_vec, 6735 .opt_opc = sqshrnb_vec_list, 6736 .fno = gen_helper_sve2_sqshrnb_s, 6737 .vece = MO_32 }, 6738 { .fniv = gen_sqshrnb_vec, 6739 .opt_opc = sqshrnb_vec_list, 6740 .fno = gen_helper_sve2_sqshrnb_d, 6741 .vece = MO_64 }, 6742 }; 6743 TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops) 6744 6745 static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d, 6746 TCGv_vec n, int64_t shr) 6747 { 6748 TCGv_vec t = tcg_temp_new_vec_matching(d); 6749 int halfbits = 4 << vece; 6750 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 6751 int64_t min = -max - 1; 6752 6753 tcg_gen_sari_vec(vece, n, n, shr); 6754 tcg_gen_dupi_vec(vece, t, min); 6755 tcg_gen_smax_vec(vece, n, n, t); 6756 tcg_gen_dupi_vec(vece, t, max); 6757 tcg_gen_smin_vec(vece, n, n, t); 6758 tcg_gen_shli_vec(vece, n, n, halfbits); 6759 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6760 tcg_gen_bitsel_vec(vece, d, t, d, n); 6761 } 6762 6763 static const TCGOpcode sqshrnt_vec_list[] = { 6764 INDEX_op_shli_vec, INDEX_op_sari_vec, 6765 INDEX_op_smax_vec, INDEX_op_smin_vec, 0 6766 }; 6767 static const GVecGen2i sqshrnt_ops[3] = { 6768 { .fniv = gen_sqshrnt_vec, 6769 .opt_opc = sqshrnt_vec_list, 6770 .load_dest = true, 6771 .fno = gen_helper_sve2_sqshrnt_h, 6772 .vece = MO_16 }, 6773 { .fniv = gen_sqshrnt_vec, 6774 .opt_opc = sqshrnt_vec_list, 6775 .load_dest = true, 6776 .fno = gen_helper_sve2_sqshrnt_s, 6777 .vece = MO_32 }, 6778 { .fniv = gen_sqshrnt_vec, 6779 .opt_opc = sqshrnt_vec_list, 6780 .load_dest = true, 6781 .fno = gen_helper_sve2_sqshrnt_d, 6782 .vece = MO_64 }, 6783 }; 6784 TRANS_FEAT(SQSHRNT, aa64_sve2, do_shr_narrow, a, sqshrnt_ops) 6785 6786 static const GVecGen2i sqrshrnb_ops[3] = { 6787 { .fno = gen_helper_sve2_sqrshrnb_h }, 6788 { .fno = gen_helper_sve2_sqrshrnb_s }, 6789 { .fno = gen_helper_sve2_sqrshrnb_d }, 6790 }; 6791 TRANS_FEAT(SQRSHRNB, aa64_sve2, do_shr_narrow, a, sqrshrnb_ops) 6792 6793 static const GVecGen2i sqrshrnt_ops[3] = { 6794 { .fno = gen_helper_sve2_sqrshrnt_h }, 6795 { .fno = gen_helper_sve2_sqrshrnt_s }, 6796 { .fno = gen_helper_sve2_sqrshrnt_d }, 6797 }; 6798 TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops) 6799 6800 static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d, 6801 TCGv_vec n, int64_t shr) 6802 { 6803 TCGv_vec t = tcg_temp_new_vec_matching(d); 6804 int halfbits = 4 << vece; 6805 6806 tcg_gen_shri_vec(vece, n, n, shr); 6807 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6808 tcg_gen_umin_vec(vece, d, n, t); 6809 } 6810 6811 static const TCGOpcode uqshrnb_vec_list[] = { 6812 INDEX_op_shri_vec, INDEX_op_umin_vec, 0 6813 }; 6814 static const GVecGen2i uqshrnb_ops[3] = { 6815 { .fniv = gen_uqshrnb_vec, 6816 .opt_opc = uqshrnb_vec_list, 6817 .fno = gen_helper_sve2_uqshrnb_h, 6818 .vece = MO_16 }, 6819 { .fniv = gen_uqshrnb_vec, 6820 .opt_opc = uqshrnb_vec_list, 6821 .fno = gen_helper_sve2_uqshrnb_s, 6822 .vece = MO_32 }, 6823 { .fniv = gen_uqshrnb_vec, 6824 .opt_opc = uqshrnb_vec_list, 6825 .fno = gen_helper_sve2_uqshrnb_d, 6826 .vece = MO_64 }, 6827 }; 6828 TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops) 6829 6830 static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d, 6831 TCGv_vec n, int64_t shr) 6832 { 6833 TCGv_vec t = tcg_temp_new_vec_matching(d); 6834 int halfbits = 4 << vece; 6835 6836 tcg_gen_shri_vec(vece, n, n, shr); 6837 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6838 tcg_gen_umin_vec(vece, n, n, t); 6839 tcg_gen_shli_vec(vece, n, n, halfbits); 6840 tcg_gen_bitsel_vec(vece, d, t, d, n); 6841 } 6842 6843 static const TCGOpcode uqshrnt_vec_list[] = { 6844 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0 6845 }; 6846 static const GVecGen2i uqshrnt_ops[3] = { 6847 { .fniv = gen_uqshrnt_vec, 6848 .opt_opc = uqshrnt_vec_list, 6849 .load_dest = true, 6850 .fno = gen_helper_sve2_uqshrnt_h, 6851 .vece = MO_16 }, 6852 { .fniv = gen_uqshrnt_vec, 6853 .opt_opc = uqshrnt_vec_list, 6854 .load_dest = true, 6855 .fno = gen_helper_sve2_uqshrnt_s, 6856 .vece = MO_32 }, 6857 { .fniv = gen_uqshrnt_vec, 6858 .opt_opc = uqshrnt_vec_list, 6859 .load_dest = true, 6860 .fno = gen_helper_sve2_uqshrnt_d, 6861 .vece = MO_64 }, 6862 }; 6863 TRANS_FEAT(UQSHRNT, aa64_sve2, do_shr_narrow, a, uqshrnt_ops) 6864 6865 static const GVecGen2i uqrshrnb_ops[3] = { 6866 { .fno = gen_helper_sve2_uqrshrnb_h }, 6867 { .fno = gen_helper_sve2_uqrshrnb_s }, 6868 { .fno = gen_helper_sve2_uqrshrnb_d }, 6869 }; 6870 TRANS_FEAT(UQRSHRNB, aa64_sve2, do_shr_narrow, a, uqrshrnb_ops) 6871 6872 static const GVecGen2i uqrshrnt_ops[3] = { 6873 { .fno = gen_helper_sve2_uqrshrnt_h }, 6874 { .fno = gen_helper_sve2_uqrshrnt_s }, 6875 { .fno = gen_helper_sve2_uqrshrnt_d }, 6876 }; 6877 TRANS_FEAT(UQRSHRNT, aa64_sve2, do_shr_narrow, a, uqrshrnt_ops) 6878 6879 #define DO_SVE2_ZZZ_NARROW(NAME, name) \ 6880 static gen_helper_gvec_3 * const name##_fns[4] = { \ 6881 NULL, gen_helper_sve2_##name##_h, \ 6882 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \ 6883 }; \ 6884 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \ 6885 name##_fns[a->esz], a, 0) 6886 6887 DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb) 6888 DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt) 6889 DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb) 6890 DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt) 6891 6892 DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb) 6893 DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt) 6894 DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb) 6895 DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt) 6896 6897 static gen_helper_gvec_flags_4 * const match_fns[4] = { 6898 gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL 6899 }; 6900 TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz]) 6901 6902 static gen_helper_gvec_flags_4 * const nmatch_fns[4] = { 6903 gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL 6904 }; 6905 TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz]) 6906 6907 static gen_helper_gvec_4 * const histcnt_fns[4] = { 6908 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d 6909 }; 6910 TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz, 6911 histcnt_fns[a->esz], a, 0) 6912 6913 TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz, 6914 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0) 6915 6916 DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz) 6917 DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz) 6918 DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz) 6919 DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz) 6920 DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz) 6921 6922 /* 6923 * SVE Integer Multiply-Add (unpredicated) 6924 */ 6925 6926 TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, 6927 gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra, 6928 0, FPST_FPCR) 6929 TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, 6930 gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra, 6931 0, FPST_FPCR) 6932 6933 static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = { 6934 NULL, gen_helper_sve2_sqdmlal_zzzw_h, 6935 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d, 6936 }; 6937 TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 6938 sqdmlal_zzzw_fns[a->esz], a, 0) 6939 TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 6940 sqdmlal_zzzw_fns[a->esz], a, 3) 6941 TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 6942 sqdmlal_zzzw_fns[a->esz], a, 2) 6943 6944 static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = { 6945 NULL, gen_helper_sve2_sqdmlsl_zzzw_h, 6946 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d, 6947 }; 6948 TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 6949 sqdmlsl_zzzw_fns[a->esz], a, 0) 6950 TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 6951 sqdmlsl_zzzw_fns[a->esz], a, 3) 6952 TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 6953 sqdmlsl_zzzw_fns[a->esz], a, 2) 6954 6955 static gen_helper_gvec_4 * const sqrdmlah_fns[] = { 6956 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h, 6957 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d, 6958 }; 6959 TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 6960 sqrdmlah_fns[a->esz], a, 0) 6961 6962 static gen_helper_gvec_4 * const sqrdmlsh_fns[] = { 6963 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h, 6964 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d, 6965 }; 6966 TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 6967 sqrdmlsh_fns[a->esz], a, 0) 6968 6969 static gen_helper_gvec_4 * const smlal_zzzw_fns[] = { 6970 NULL, gen_helper_sve2_smlal_zzzw_h, 6971 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d, 6972 }; 6973 TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 6974 smlal_zzzw_fns[a->esz], a, 0) 6975 TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 6976 smlal_zzzw_fns[a->esz], a, 1) 6977 6978 static gen_helper_gvec_4 * const umlal_zzzw_fns[] = { 6979 NULL, gen_helper_sve2_umlal_zzzw_h, 6980 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d, 6981 }; 6982 TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 6983 umlal_zzzw_fns[a->esz], a, 0) 6984 TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 6985 umlal_zzzw_fns[a->esz], a, 1) 6986 6987 static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = { 6988 NULL, gen_helper_sve2_smlsl_zzzw_h, 6989 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d, 6990 }; 6991 TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 6992 smlsl_zzzw_fns[a->esz], a, 0) 6993 TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 6994 smlsl_zzzw_fns[a->esz], a, 1) 6995 6996 static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = { 6997 NULL, gen_helper_sve2_umlsl_zzzw_h, 6998 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d, 6999 }; 7000 TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7001 umlsl_zzzw_fns[a->esz], a, 0) 7002 TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7003 umlsl_zzzw_fns[a->esz], a, 1) 7004 7005 static gen_helper_gvec_4 * const cmla_fns[] = { 7006 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h, 7007 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d, 7008 }; 7009 TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7010 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7011 7012 static gen_helper_gvec_4 * const cdot_fns[] = { 7013 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d 7014 }; 7015 TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7016 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7017 7018 static gen_helper_gvec_4 * const sqrdcmlah_fns[] = { 7019 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h, 7020 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d, 7021 }; 7022 TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7023 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7024 7025 TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7026 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0) 7027 7028 TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz, 7029 gen_helper_crypto_aesmc, a->rd, a->rd, 0) 7030 TRANS_FEAT_NONSTREAMING(AESIMC, aa64_sve2_aes, gen_gvec_ool_zz, 7031 gen_helper_crypto_aesimc, a->rd, a->rd, 0) 7032 7033 TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7034 gen_helper_crypto_aese, a, 0) 7035 TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7036 gen_helper_crypto_aesd, a, 0) 7037 7038 TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7039 gen_helper_crypto_sm4e, a, 0) 7040 TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7041 gen_helper_crypto_sm4ekey, a, 0) 7042 7043 TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, 7044 gen_gvec_rax1, a) 7045 7046 TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz, 7047 gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR) 7048 TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, 7049 gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR) 7050 7051 TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 7052 gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR) 7053 7054 TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, 7055 gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR) 7056 TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz, 7057 gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR) 7058 7059 TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a, 7060 FPROUNDING_ODD, gen_helper_sve_fcvt_ds) 7061 TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a, 7062 FPROUNDING_ODD, gen_helper_sve2_fcvtnt_ds) 7063 7064 static gen_helper_gvec_3_ptr * const flogb_fns[] = { 7065 NULL, gen_helper_flogb_h, 7066 gen_helper_flogb_s, gen_helper_flogb_d 7067 }; 7068 TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz], 7069 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 7070 7071 static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel) 7072 { 7073 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s, 7074 a->rd, a->rn, a->rm, a->ra, 7075 (sel << 1) | sub, tcg_env); 7076 } 7077 7078 TRANS_FEAT(FMLALB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, false) 7079 TRANS_FEAT(FMLALT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, true) 7080 TRANS_FEAT(FMLSLB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, false) 7081 TRANS_FEAT(FMLSLT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, true) 7082 7083 static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel) 7084 { 7085 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s, 7086 a->rd, a->rn, a->rm, a->ra, 7087 (a->index << 2) | (sel << 1) | sub, tcg_env); 7088 } 7089 7090 TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false) 7091 TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true) 7092 TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false) 7093 TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true) 7094 7095 TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7096 gen_helper_gvec_smmla_b, a, 0) 7097 TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7098 gen_helper_gvec_usmmla_b, a, 0) 7099 TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7100 gen_helper_gvec_ummla_b, a, 0) 7101 7102 TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, 7103 gen_helper_gvec_bfdot, a, 0) 7104 TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz, 7105 gen_helper_gvec_bfdot_idx, a) 7106 7107 TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, 7108 gen_helper_gvec_bfmmla, a, 0) 7109 7110 static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) 7111 { 7112 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, 7113 a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR); 7114 } 7115 7116 TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) 7117 TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true) 7118 7119 static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) 7120 { 7121 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, 7122 a->rd, a->rn, a->rm, a->ra, 7123 (a->index << 1) | sel, FPST_FPCR); 7124 } 7125 7126 TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) 7127 TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true) 7128 7129 static bool trans_PSEL(DisasContext *s, arg_psel *a) 7130 { 7131 int vl = vec_full_reg_size(s); 7132 int pl = pred_gvec_reg_size(s); 7133 int elements = vl >> a->esz; 7134 TCGv_i64 tmp, didx, dbit; 7135 TCGv_ptr ptr; 7136 7137 if (!dc_isar_feature(aa64_sme, s)) { 7138 return false; 7139 } 7140 if (!sve_access_check(s)) { 7141 return true; 7142 } 7143 7144 tmp = tcg_temp_new_i64(); 7145 dbit = tcg_temp_new_i64(); 7146 didx = tcg_temp_new_i64(); 7147 ptr = tcg_temp_new_ptr(); 7148 7149 /* Compute the predicate element. */ 7150 tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm); 7151 if (is_power_of_2(elements)) { 7152 tcg_gen_andi_i64(tmp, tmp, elements - 1); 7153 } else { 7154 tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements)); 7155 } 7156 7157 /* Extract the predicate byte and bit indices. */ 7158 tcg_gen_shli_i64(tmp, tmp, a->esz); 7159 tcg_gen_andi_i64(dbit, tmp, 7); 7160 tcg_gen_shri_i64(didx, tmp, 3); 7161 if (HOST_BIG_ENDIAN) { 7162 tcg_gen_xori_i64(didx, didx, 7); 7163 } 7164 7165 /* Load the predicate word. */ 7166 tcg_gen_trunc_i64_ptr(ptr, didx); 7167 tcg_gen_add_ptr(ptr, ptr, tcg_env); 7168 tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm)); 7169 7170 /* Extract the predicate bit and replicate to MO_64. */ 7171 tcg_gen_shr_i64(tmp, tmp, dbit); 7172 tcg_gen_andi_i64(tmp, tmp, 1); 7173 tcg_gen_neg_i64(tmp, tmp); 7174 7175 /* Apply to either copy the source, or write zeros. */ 7176 tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd), 7177 pred_full_reg_offset(s, a->pn), tmp, pl, pl); 7178 return true; 7179 } 7180 7181 static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7182 { 7183 tcg_gen_smax_i32(d, a, n); 7184 tcg_gen_smin_i32(d, d, m); 7185 } 7186 7187 static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7188 { 7189 tcg_gen_smax_i64(d, a, n); 7190 tcg_gen_smin_i64(d, d, m); 7191 } 7192 7193 static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7194 TCGv_vec m, TCGv_vec a) 7195 { 7196 tcg_gen_smax_vec(vece, d, a, n); 7197 tcg_gen_smin_vec(vece, d, d, m); 7198 } 7199 7200 static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 7201 uint32_t a, uint32_t oprsz, uint32_t maxsz) 7202 { 7203 static const TCGOpcode vecop[] = { 7204 INDEX_op_smin_vec, INDEX_op_smax_vec, 0 7205 }; 7206 static const GVecGen4 ops[4] = { 7207 { .fniv = gen_sclamp_vec, 7208 .fno = gen_helper_gvec_sclamp_b, 7209 .opt_opc = vecop, 7210 .vece = MO_8 }, 7211 { .fniv = gen_sclamp_vec, 7212 .fno = gen_helper_gvec_sclamp_h, 7213 .opt_opc = vecop, 7214 .vece = MO_16 }, 7215 { .fni4 = gen_sclamp_i32, 7216 .fniv = gen_sclamp_vec, 7217 .fno = gen_helper_gvec_sclamp_s, 7218 .opt_opc = vecop, 7219 .vece = MO_32 }, 7220 { .fni8 = gen_sclamp_i64, 7221 .fniv = gen_sclamp_vec, 7222 .fno = gen_helper_gvec_sclamp_d, 7223 .opt_opc = vecop, 7224 .vece = MO_64, 7225 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 7226 }; 7227 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 7228 } 7229 7230 TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a) 7231 7232 static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7233 { 7234 tcg_gen_umax_i32(d, a, n); 7235 tcg_gen_umin_i32(d, d, m); 7236 } 7237 7238 static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7239 { 7240 tcg_gen_umax_i64(d, a, n); 7241 tcg_gen_umin_i64(d, d, m); 7242 } 7243 7244 static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7245 TCGv_vec m, TCGv_vec a) 7246 { 7247 tcg_gen_umax_vec(vece, d, a, n); 7248 tcg_gen_umin_vec(vece, d, d, m); 7249 } 7250 7251 static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 7252 uint32_t a, uint32_t oprsz, uint32_t maxsz) 7253 { 7254 static const TCGOpcode vecop[] = { 7255 INDEX_op_umin_vec, INDEX_op_umax_vec, 0 7256 }; 7257 static const GVecGen4 ops[4] = { 7258 { .fniv = gen_uclamp_vec, 7259 .fno = gen_helper_gvec_uclamp_b, 7260 .opt_opc = vecop, 7261 .vece = MO_8 }, 7262 { .fniv = gen_uclamp_vec, 7263 .fno = gen_helper_gvec_uclamp_h, 7264 .opt_opc = vecop, 7265 .vece = MO_16 }, 7266 { .fni4 = gen_uclamp_i32, 7267 .fniv = gen_uclamp_vec, 7268 .fno = gen_helper_gvec_uclamp_s, 7269 .opt_opc = vecop, 7270 .vece = MO_32 }, 7271 { .fni8 = gen_uclamp_i64, 7272 .fniv = gen_uclamp_vec, 7273 .fno = gen_helper_gvec_uclamp_d, 7274 .opt_opc = vecop, 7275 .vece = MO_64, 7276 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 7277 }; 7278 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 7279 } 7280 7281 TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a) 7282