1 /* 2 * AArch64 SVE translation 3 * 4 * Copyright (c) 2018 Linaro, Ltd 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "translate.h" 22 #include "translate-a64.h" 23 #include "fpu/softfloat.h" 24 25 26 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, 27 TCGv_i64, uint32_t, uint32_t); 28 29 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr, 30 TCGv_ptr, TCGv_i32); 31 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr, 32 TCGv_ptr, TCGv_ptr, TCGv_i32); 33 34 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32); 35 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr, 36 TCGv_ptr, TCGv_i64, TCGv_i32); 37 38 /* 39 * Helpers for extracting complex instruction fields. 40 */ 41 42 /* See e.g. ASR (immediate, predicated). 43 * Returns -1 for unallocated encoding; diagnose later. 44 */ 45 static int tszimm_esz(DisasContext *s, int x) 46 { 47 x >>= 3; /* discard imm3 */ 48 return 31 - clz32(x); 49 } 50 51 static int tszimm_shr(DisasContext *s, int x) 52 { 53 /* 54 * We won't use the tszimm_shr() value if tszimm_esz() returns -1 (the 55 * trans function will check for esz < 0), so we can return any 56 * value we like from here in that case as long as we avoid UB. 57 */ 58 int esz = tszimm_esz(s, x); 59 if (esz < 0) { 60 return esz; 61 } 62 return (16 << esz) - x; 63 } 64 65 /* See e.g. LSL (immediate, predicated). */ 66 static int tszimm_shl(DisasContext *s, int x) 67 { 68 /* As with tszimm_shr(), value will be unused if esz < 0 */ 69 int esz = tszimm_esz(s, x); 70 if (esz < 0) { 71 return esz; 72 } 73 return x - (8 << esz); 74 } 75 76 /* The SH bit is in bit 8. Extract the low 8 and shift. */ 77 static inline int expand_imm_sh8s(DisasContext *s, int x) 78 { 79 return (int8_t)x << (x & 0x100 ? 8 : 0); 80 } 81 82 static inline int expand_imm_sh8u(DisasContext *s, int x) 83 { 84 return (uint8_t)x << (x & 0x100 ? 8 : 0); 85 } 86 87 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype) 88 * with unsigned data. C.f. SVE Memory Contiguous Load Group. 89 */ 90 static inline int msz_dtype(DisasContext *s, int msz) 91 { 92 static const uint8_t dtype[4] = { 0, 5, 10, 15 }; 93 return dtype[msz]; 94 } 95 96 /* 97 * Include the generated decoder. 98 */ 99 100 #include "decode-sve.c.inc" 101 102 /* 103 * Implement all of the translator functions referenced by the decoder. 104 */ 105 106 /* Invoke an out-of-line helper on 2 Zregs. */ 107 static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn, 108 int rd, int rn, int data) 109 { 110 if (fn == NULL) { 111 return false; 112 } 113 if (sve_access_check(s)) { 114 unsigned vsz = vec_full_reg_size(s); 115 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 116 vec_full_reg_offset(s, rn), 117 vsz, vsz, data, fn); 118 } 119 return true; 120 } 121 122 static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 123 int rd, int rn, int data, 124 ARMFPStatusFlavour flavour) 125 { 126 if (fn == NULL) { 127 return false; 128 } 129 if (sve_access_check(s)) { 130 unsigned vsz = vec_full_reg_size(s); 131 TCGv_ptr status = fpstatus_ptr(flavour); 132 133 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 134 vec_full_reg_offset(s, rn), 135 status, vsz, vsz, data, fn); 136 } 137 return true; 138 } 139 140 static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 141 arg_rr_esz *a, int data) 142 { 143 return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data, 144 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 145 } 146 147 /* Invoke an out-of-line helper on 3 Zregs. */ 148 static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 149 int rd, int rn, int rm, int data) 150 { 151 if (fn == NULL) { 152 return false; 153 } 154 if (sve_access_check(s)) { 155 unsigned vsz = vec_full_reg_size(s); 156 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 157 vec_full_reg_offset(s, rn), 158 vec_full_reg_offset(s, rm), 159 vsz, vsz, data, fn); 160 } 161 return true; 162 } 163 164 static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 165 arg_rrr_esz *a, int data) 166 { 167 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data); 168 } 169 170 /* Invoke an out-of-line helper on 3 Zregs, plus float_status. */ 171 static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 172 int rd, int rn, int rm, 173 int data, ARMFPStatusFlavour flavour) 174 { 175 if (fn == NULL) { 176 return false; 177 } 178 if (sve_access_check(s)) { 179 unsigned vsz = vec_full_reg_size(s); 180 TCGv_ptr status = fpstatus_ptr(flavour); 181 182 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 183 vec_full_reg_offset(s, rn), 184 vec_full_reg_offset(s, rm), 185 status, vsz, vsz, data, fn); 186 } 187 return true; 188 } 189 190 static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 191 arg_rrr_esz *a, int data) 192 { 193 return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, 194 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 195 } 196 197 /* Invoke an out-of-line helper on 4 Zregs. */ 198 static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 199 int rd, int rn, int rm, int ra, int data) 200 { 201 if (fn == NULL) { 202 return false; 203 } 204 if (sve_access_check(s)) { 205 unsigned vsz = vec_full_reg_size(s); 206 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 207 vec_full_reg_offset(s, rn), 208 vec_full_reg_offset(s, rm), 209 vec_full_reg_offset(s, ra), 210 vsz, vsz, data, fn); 211 } 212 return true; 213 } 214 215 static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 216 arg_rrrr_esz *a, int data) 217 { 218 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data); 219 } 220 221 static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn, 222 arg_rrxr_esz *a) 223 { 224 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index); 225 } 226 227 /* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */ 228 static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 229 int rd, int rn, int rm, int ra, 230 int data, TCGv_ptr ptr) 231 { 232 if (fn == NULL) { 233 return false; 234 } 235 if (sve_access_check(s)) { 236 unsigned vsz = vec_full_reg_size(s); 237 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 238 vec_full_reg_offset(s, rn), 239 vec_full_reg_offset(s, rm), 240 vec_full_reg_offset(s, ra), 241 ptr, vsz, vsz, data, fn); 242 } 243 return true; 244 } 245 246 static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 247 int rd, int rn, int rm, int ra, 248 int data, ARMFPStatusFlavour flavour) 249 { 250 TCGv_ptr status = fpstatus_ptr(flavour); 251 bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status); 252 return ret; 253 } 254 255 /* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */ 256 static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn, 257 int rd, int rn, int rm, int ra, int pg, 258 int data, ARMFPStatusFlavour flavour) 259 { 260 if (fn == NULL) { 261 return false; 262 } 263 if (sve_access_check(s)) { 264 unsigned vsz = vec_full_reg_size(s); 265 TCGv_ptr status = fpstatus_ptr(flavour); 266 267 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd), 268 vec_full_reg_offset(s, rn), 269 vec_full_reg_offset(s, rm), 270 vec_full_reg_offset(s, ra), 271 pred_full_reg_offset(s, pg), 272 status, vsz, vsz, data, fn); 273 } 274 return true; 275 } 276 277 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */ 278 static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn, 279 int rd, int rn, int pg, int data) 280 { 281 if (fn == NULL) { 282 return false; 283 } 284 if (sve_access_check(s)) { 285 unsigned vsz = vec_full_reg_size(s); 286 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 287 vec_full_reg_offset(s, rn), 288 pred_full_reg_offset(s, pg), 289 vsz, vsz, data, fn); 290 } 291 return true; 292 } 293 294 static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn, 295 arg_rpr_esz *a, int data) 296 { 297 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data); 298 } 299 300 static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn, 301 arg_rpri_esz *a) 302 { 303 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm); 304 } 305 306 static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn, 307 int rd, int rn, int pg, int data, 308 ARMFPStatusFlavour flavour) 309 { 310 if (fn == NULL) { 311 return false; 312 } 313 if (sve_access_check(s)) { 314 unsigned vsz = vec_full_reg_size(s); 315 TCGv_ptr status = fpstatus_ptr(flavour); 316 317 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 318 vec_full_reg_offset(s, rn), 319 pred_full_reg_offset(s, pg), 320 status, vsz, vsz, data, fn); 321 } 322 return true; 323 } 324 325 static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 326 arg_rpr_esz *a, int data, 327 ARMFPStatusFlavour flavour) 328 { 329 return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour); 330 } 331 332 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 333 static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn, 334 int rd, int rn, int rm, int pg, int data) 335 { 336 if (fn == NULL) { 337 return false; 338 } 339 if (sve_access_check(s)) { 340 unsigned vsz = vec_full_reg_size(s); 341 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 342 vec_full_reg_offset(s, rn), 343 vec_full_reg_offset(s, rm), 344 pred_full_reg_offset(s, pg), 345 vsz, vsz, data, fn); 346 } 347 return true; 348 } 349 350 static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn, 351 arg_rprr_esz *a, int data) 352 { 353 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data); 354 } 355 356 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 357 static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn, 358 int rd, int rn, int rm, int pg, int data, 359 ARMFPStatusFlavour flavour) 360 { 361 if (fn == NULL) { 362 return false; 363 } 364 if (sve_access_check(s)) { 365 unsigned vsz = vec_full_reg_size(s); 366 TCGv_ptr status = fpstatus_ptr(flavour); 367 368 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 369 vec_full_reg_offset(s, rn), 370 vec_full_reg_offset(s, rm), 371 pred_full_reg_offset(s, pg), 372 status, vsz, vsz, data, fn); 373 } 374 return true; 375 } 376 377 static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 378 arg_rprr_esz *a) 379 { 380 return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0, 381 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 382 } 383 384 /* Invoke a vector expander on two Zregs and an immediate. */ 385 static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 386 int esz, int rd, int rn, uint64_t imm) 387 { 388 if (gvec_fn == NULL) { 389 return false; 390 } 391 if (sve_access_check(s)) { 392 unsigned vsz = vec_full_reg_size(s); 393 gvec_fn(esz, vec_full_reg_offset(s, rd), 394 vec_full_reg_offset(s, rn), imm, vsz, vsz); 395 } 396 return true; 397 } 398 399 static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 400 arg_rri_esz *a) 401 { 402 if (a->esz < 0) { 403 /* Invalid tsz encoding -- see tszimm_esz. */ 404 return false; 405 } 406 return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm); 407 } 408 409 /* Invoke a vector expander on three Zregs. */ 410 static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn, 411 int esz, int rd, int rn, int rm) 412 { 413 if (gvec_fn == NULL) { 414 return false; 415 } 416 if (sve_access_check(s)) { 417 unsigned vsz = vec_full_reg_size(s); 418 gvec_fn(esz, vec_full_reg_offset(s, rd), 419 vec_full_reg_offset(s, rn), 420 vec_full_reg_offset(s, rm), vsz, vsz); 421 } 422 return true; 423 } 424 425 static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn, 426 arg_rrr_esz *a) 427 { 428 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm); 429 } 430 431 /* Invoke a vector expander on four Zregs. */ 432 static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn, 433 arg_rrrr_esz *a) 434 { 435 if (gvec_fn == NULL) { 436 return false; 437 } 438 if (sve_access_check(s)) { 439 unsigned vsz = vec_full_reg_size(s); 440 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 441 vec_full_reg_offset(s, a->rn), 442 vec_full_reg_offset(s, a->rm), 443 vec_full_reg_offset(s, a->ra), vsz, vsz); 444 } 445 return true; 446 } 447 448 /* Invoke a vector move on two Zregs. */ 449 static bool do_mov_z(DisasContext *s, int rd, int rn) 450 { 451 if (sve_access_check(s)) { 452 unsigned vsz = vec_full_reg_size(s); 453 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd), 454 vec_full_reg_offset(s, rn), vsz, vsz); 455 } 456 return true; 457 } 458 459 /* Initialize a Zreg with replications of a 64-bit immediate. */ 460 static void do_dupi_z(DisasContext *s, int rd, uint64_t word) 461 { 462 unsigned vsz = vec_full_reg_size(s); 463 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word); 464 } 465 466 /* Invoke a vector expander on three Pregs. */ 467 static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn, 468 int rd, int rn, int rm) 469 { 470 if (sve_access_check(s)) { 471 unsigned psz = pred_gvec_reg_size(s); 472 gvec_fn(MO_64, pred_full_reg_offset(s, rd), 473 pred_full_reg_offset(s, rn), 474 pred_full_reg_offset(s, rm), psz, psz); 475 } 476 return true; 477 } 478 479 /* Invoke a vector move on two Pregs. */ 480 static bool do_mov_p(DisasContext *s, int rd, int rn) 481 { 482 if (sve_access_check(s)) { 483 unsigned psz = pred_gvec_reg_size(s); 484 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd), 485 pred_full_reg_offset(s, rn), psz, psz); 486 } 487 return true; 488 } 489 490 /* Set the cpu flags as per a return from an SVE helper. */ 491 static void do_pred_flags(TCGv_i32 t) 492 { 493 tcg_gen_mov_i32(cpu_NF, t); 494 tcg_gen_andi_i32(cpu_ZF, t, 2); 495 tcg_gen_andi_i32(cpu_CF, t, 1); 496 tcg_gen_movi_i32(cpu_VF, 0); 497 } 498 499 /* Subroutines computing the ARM PredTest psuedofunction. */ 500 static void do_predtest1(TCGv_i64 d, TCGv_i64 g) 501 { 502 TCGv_i32 t = tcg_temp_new_i32(); 503 504 gen_helper_sve_predtest1(t, d, g); 505 do_pred_flags(t); 506 } 507 508 static void do_predtest(DisasContext *s, int dofs, int gofs, int words) 509 { 510 TCGv_ptr dptr = tcg_temp_new_ptr(); 511 TCGv_ptr gptr = tcg_temp_new_ptr(); 512 TCGv_i32 t = tcg_temp_new_i32(); 513 514 tcg_gen_addi_ptr(dptr, tcg_env, dofs); 515 tcg_gen_addi_ptr(gptr, tcg_env, gofs); 516 517 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words)); 518 519 do_pred_flags(t); 520 } 521 522 /* For each element size, the bits within a predicate word that are active. */ 523 const uint64_t pred_esz_masks[5] = { 524 0xffffffffffffffffull, 0x5555555555555555ull, 525 0x1111111111111111ull, 0x0101010101010101ull, 526 0x0001000100010001ull, 527 }; 528 529 static bool trans_INVALID(DisasContext *s, arg_INVALID *a) 530 { 531 unallocated_encoding(s); 532 return true; 533 } 534 535 /* 536 *** SVE Logical - Unpredicated Group 537 */ 538 539 TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a) 540 TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a) 541 TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a) 542 TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a) 543 544 static bool trans_XAR(DisasContext *s, arg_rrri_esz *a) 545 { 546 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { 547 return false; 548 } 549 if (sve_access_check(s)) { 550 unsigned vsz = vec_full_reg_size(s); 551 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd), 552 vec_full_reg_offset(s, a->rn), 553 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz); 554 } 555 return true; 556 } 557 558 TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_eor3, a) 559 TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_bcax, a) 560 561 static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 562 uint32_t a, uint32_t oprsz, uint32_t maxsz) 563 { 564 /* BSL differs from the generic bitsel in argument ordering. */ 565 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz); 566 } 567 568 TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a) 569 570 static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 571 { 572 tcg_gen_andc_i64(n, k, n); 573 tcg_gen_andc_i64(m, m, k); 574 tcg_gen_or_i64(d, n, m); 575 } 576 577 static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 578 TCGv_vec m, TCGv_vec k) 579 { 580 if (TCG_TARGET_HAS_bitsel_vec) { 581 tcg_gen_not_vec(vece, n, n); 582 tcg_gen_bitsel_vec(vece, d, k, n, m); 583 } else { 584 tcg_gen_andc_vec(vece, n, k, n); 585 tcg_gen_andc_vec(vece, m, m, k); 586 tcg_gen_or_vec(vece, d, n, m); 587 } 588 } 589 590 static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 591 uint32_t a, uint32_t oprsz, uint32_t maxsz) 592 { 593 static const GVecGen4 op = { 594 .fni8 = gen_bsl1n_i64, 595 .fniv = gen_bsl1n_vec, 596 .fno = gen_helper_sve2_bsl1n, 597 .vece = MO_64, 598 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 599 }; 600 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 601 } 602 603 TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a) 604 605 static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 606 { 607 /* 608 * Z[dn] = (n & k) | (~m & ~k) 609 * = | ~(m | k) 610 */ 611 tcg_gen_and_i64(n, n, k); 612 if (TCG_TARGET_HAS_orc_i64) { 613 tcg_gen_or_i64(m, m, k); 614 tcg_gen_orc_i64(d, n, m); 615 } else { 616 tcg_gen_nor_i64(m, m, k); 617 tcg_gen_or_i64(d, n, m); 618 } 619 } 620 621 static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 622 TCGv_vec m, TCGv_vec k) 623 { 624 if (TCG_TARGET_HAS_bitsel_vec) { 625 tcg_gen_not_vec(vece, m, m); 626 tcg_gen_bitsel_vec(vece, d, k, n, m); 627 } else { 628 tcg_gen_and_vec(vece, n, n, k); 629 tcg_gen_or_vec(vece, m, m, k); 630 tcg_gen_orc_vec(vece, d, n, m); 631 } 632 } 633 634 static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 635 uint32_t a, uint32_t oprsz, uint32_t maxsz) 636 { 637 static const GVecGen4 op = { 638 .fni8 = gen_bsl2n_i64, 639 .fniv = gen_bsl2n_vec, 640 .fno = gen_helper_sve2_bsl2n, 641 .vece = MO_64, 642 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 643 }; 644 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 645 } 646 647 TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a) 648 649 static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 650 { 651 tcg_gen_and_i64(n, n, k); 652 tcg_gen_andc_i64(m, m, k); 653 tcg_gen_nor_i64(d, n, m); 654 } 655 656 static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 657 TCGv_vec m, TCGv_vec k) 658 { 659 tcg_gen_bitsel_vec(vece, d, k, n, m); 660 tcg_gen_not_vec(vece, d, d); 661 } 662 663 static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 664 uint32_t a, uint32_t oprsz, uint32_t maxsz) 665 { 666 static const GVecGen4 op = { 667 .fni8 = gen_nbsl_i64, 668 .fniv = gen_nbsl_vec, 669 .fno = gen_helper_sve2_nbsl, 670 .vece = MO_64, 671 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 672 }; 673 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 674 } 675 676 TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a) 677 678 /* 679 *** SVE Integer Arithmetic - Unpredicated Group 680 */ 681 682 TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a) 683 TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a) 684 TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a) 685 TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a) 686 TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a) 687 TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a) 688 689 /* 690 *** SVE Integer Arithmetic - Binary Predicated Group 691 */ 692 693 /* Select active elememnts from Zn and inactive elements from Zm, 694 * storing the result in Zd. 695 */ 696 static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz) 697 { 698 static gen_helper_gvec_4 * const fns[4] = { 699 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h, 700 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d 701 }; 702 return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0); 703 } 704 705 #define DO_ZPZZ(NAME, FEAT, name) \ 706 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \ 707 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \ 708 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \ 709 }; \ 710 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \ 711 name##_zpzz_fns[a->esz], a, 0) 712 713 DO_ZPZZ(AND_zpzz, aa64_sve, sve_and) 714 DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor) 715 DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr) 716 DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic) 717 718 DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add) 719 DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub) 720 721 DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax) 722 DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax) 723 DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin) 724 DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin) 725 DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd) 726 DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd) 727 728 DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul) 729 DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh) 730 DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh) 731 732 DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr) 733 DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr) 734 DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl) 735 736 static gen_helper_gvec_4 * const sdiv_fns[4] = { 737 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d 738 }; 739 TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0) 740 741 static gen_helper_gvec_4 * const udiv_fns[4] = { 742 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d 743 }; 744 TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0) 745 746 TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz) 747 748 /* 749 *** SVE Integer Arithmetic - Unary Predicated Group 750 */ 751 752 #define DO_ZPZ(NAME, FEAT, name) \ 753 static gen_helper_gvec_3 * const name##_fns[4] = { \ 754 gen_helper_##name##_b, gen_helper_##name##_h, \ 755 gen_helper_##name##_s, gen_helper_##name##_d, \ 756 }; \ 757 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0) 758 759 DO_ZPZ(CLS, aa64_sve, sve_cls) 760 DO_ZPZ(CLZ, aa64_sve, sve_clz) 761 DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz) 762 DO_ZPZ(CNOT, aa64_sve, sve_cnot) 763 DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz) 764 DO_ZPZ(ABS, aa64_sve, sve_abs) 765 DO_ZPZ(NEG, aa64_sve, sve_neg) 766 DO_ZPZ(RBIT, aa64_sve, sve_rbit) 767 768 static gen_helper_gvec_3 * const fabs_fns[4] = { 769 NULL, gen_helper_sve_fabs_h, 770 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d, 771 }; 772 TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0) 773 774 static gen_helper_gvec_3 * const fneg_fns[4] = { 775 NULL, gen_helper_sve_fneg_h, 776 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d, 777 }; 778 TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0) 779 780 static gen_helper_gvec_3 * const sxtb_fns[4] = { 781 NULL, gen_helper_sve_sxtb_h, 782 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d, 783 }; 784 TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0) 785 786 static gen_helper_gvec_3 * const uxtb_fns[4] = { 787 NULL, gen_helper_sve_uxtb_h, 788 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d, 789 }; 790 TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0) 791 792 static gen_helper_gvec_3 * const sxth_fns[4] = { 793 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d 794 }; 795 TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0) 796 797 static gen_helper_gvec_3 * const uxth_fns[4] = { 798 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d 799 }; 800 TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0) 801 802 TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz, 803 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0) 804 TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz, 805 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0) 806 807 /* 808 *** SVE Integer Reduction Group 809 */ 810 811 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32); 812 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a, 813 gen_helper_gvec_reduc *fn) 814 { 815 unsigned vsz = vec_full_reg_size(s); 816 TCGv_ptr t_zn, t_pg; 817 TCGv_i32 desc; 818 TCGv_i64 temp; 819 820 if (fn == NULL) { 821 return false; 822 } 823 if (!sve_access_check(s)) { 824 return true; 825 } 826 827 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 828 temp = tcg_temp_new_i64(); 829 t_zn = tcg_temp_new_ptr(); 830 t_pg = tcg_temp_new_ptr(); 831 832 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); 833 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); 834 fn(temp, t_zn, t_pg, desc); 835 836 write_fp_dreg(s, a->rd, temp); 837 return true; 838 } 839 840 #define DO_VPZ(NAME, name) \ 841 static gen_helper_gvec_reduc * const name##_fns[4] = { \ 842 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \ 843 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 844 }; \ 845 TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz]) 846 847 DO_VPZ(ORV, orv) 848 DO_VPZ(ANDV, andv) 849 DO_VPZ(EORV, eorv) 850 851 DO_VPZ(UADDV, uaddv) 852 DO_VPZ(SMAXV, smaxv) 853 DO_VPZ(UMAXV, umaxv) 854 DO_VPZ(SMINV, sminv) 855 DO_VPZ(UMINV, uminv) 856 857 static gen_helper_gvec_reduc * const saddv_fns[4] = { 858 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h, 859 gen_helper_sve_saddv_s, NULL 860 }; 861 TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz]) 862 863 #undef DO_VPZ 864 865 /* 866 *** SVE Shift by Immediate - Predicated Group 867 */ 868 869 /* 870 * Copy Zn into Zd, storing zeros into inactive elements. 871 * If invert, store zeros into the active elements. 872 */ 873 static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg, 874 int esz, bool invert) 875 { 876 static gen_helper_gvec_3 * const fns[4] = { 877 gen_helper_sve_movz_b, gen_helper_sve_movz_h, 878 gen_helper_sve_movz_s, gen_helper_sve_movz_d, 879 }; 880 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert); 881 } 882 883 static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr, 884 gen_helper_gvec_3 * const fns[4]) 885 { 886 int max; 887 888 if (a->esz < 0) { 889 /* Invalid tsz encoding -- see tszimm_esz. */ 890 return false; 891 } 892 893 /* 894 * Shift by element size is architecturally valid. 895 * For arithmetic right-shift, it's the same as by one less. 896 * For logical shifts and ASRD, it is a zeroing operation. 897 */ 898 max = 8 << a->esz; 899 if (a->imm >= max) { 900 if (asr) { 901 a->imm = max - 1; 902 } else { 903 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); 904 } 905 } 906 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a); 907 } 908 909 static gen_helper_gvec_3 * const asr_zpzi_fns[4] = { 910 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h, 911 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d, 912 }; 913 TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns) 914 915 static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = { 916 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h, 917 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d, 918 }; 919 TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns) 920 921 static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = { 922 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h, 923 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d, 924 }; 925 TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns) 926 927 static gen_helper_gvec_3 * const asrd_fns[4] = { 928 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h, 929 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d, 930 }; 931 TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns) 932 933 static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = { 934 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h, 935 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d, 936 }; 937 TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 938 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a) 939 940 static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = { 941 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h, 942 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d, 943 }; 944 TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 945 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a) 946 947 static gen_helper_gvec_3 * const srshr_fns[4] = { 948 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h, 949 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d, 950 }; 951 TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 952 a->esz < 0 ? NULL : srshr_fns[a->esz], a) 953 954 static gen_helper_gvec_3 * const urshr_fns[4] = { 955 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h, 956 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d, 957 }; 958 TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 959 a->esz < 0 ? NULL : urshr_fns[a->esz], a) 960 961 static gen_helper_gvec_3 * const sqshlu_fns[4] = { 962 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h, 963 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d, 964 }; 965 TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi, 966 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a) 967 968 /* 969 *** SVE Bitwise Shift - Predicated Group 970 */ 971 972 #define DO_ZPZW(NAME, name) \ 973 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \ 974 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \ 975 gen_helper_sve_##name##_zpzw_s, NULL \ 976 }; \ 977 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \ 978 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0) 979 980 DO_ZPZW(ASR, asr) 981 DO_ZPZW(LSR, lsr) 982 DO_ZPZW(LSL, lsl) 983 984 #undef DO_ZPZW 985 986 /* 987 *** SVE Bitwise Shift - Unpredicated Group 988 */ 989 990 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr, 991 void (*gvec_fn)(unsigned, uint32_t, uint32_t, 992 int64_t, uint32_t, uint32_t)) 993 { 994 if (a->esz < 0) { 995 /* Invalid tsz encoding -- see tszimm_esz. */ 996 return false; 997 } 998 if (sve_access_check(s)) { 999 unsigned vsz = vec_full_reg_size(s); 1000 /* Shift by element size is architecturally valid. For 1001 arithmetic right-shift, it's the same as by one less. 1002 Otherwise it is a zeroing operation. */ 1003 if (a->imm >= 8 << a->esz) { 1004 if (asr) { 1005 a->imm = (8 << a->esz) - 1; 1006 } else { 1007 do_dupi_z(s, a->rd, 0); 1008 return true; 1009 } 1010 } 1011 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 1012 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz); 1013 } 1014 return true; 1015 } 1016 1017 TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari) 1018 TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri) 1019 TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli) 1020 1021 #define DO_ZZW(NAME, name) \ 1022 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \ 1023 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \ 1024 gen_helper_sve_##name##_zzw_s, NULL \ 1025 }; \ 1026 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \ 1027 name##_zzw_fns[a->esz], a, 0) 1028 1029 DO_ZZW(ASR_zzw, asr) 1030 DO_ZZW(LSR_zzw, lsr) 1031 DO_ZZW(LSL_zzw, lsl) 1032 1033 #undef DO_ZZW 1034 1035 /* 1036 *** SVE Integer Multiply-Add Group 1037 */ 1038 1039 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a, 1040 gen_helper_gvec_5 *fn) 1041 { 1042 if (sve_access_check(s)) { 1043 unsigned vsz = vec_full_reg_size(s); 1044 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd), 1045 vec_full_reg_offset(s, a->ra), 1046 vec_full_reg_offset(s, a->rn), 1047 vec_full_reg_offset(s, a->rm), 1048 pred_full_reg_offset(s, a->pg), 1049 vsz, vsz, 0, fn); 1050 } 1051 return true; 1052 } 1053 1054 static gen_helper_gvec_5 * const mla_fns[4] = { 1055 gen_helper_sve_mla_b, gen_helper_sve_mla_h, 1056 gen_helper_sve_mla_s, gen_helper_sve_mla_d, 1057 }; 1058 TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz]) 1059 1060 static gen_helper_gvec_5 * const mls_fns[4] = { 1061 gen_helper_sve_mls_b, gen_helper_sve_mls_h, 1062 gen_helper_sve_mls_s, gen_helper_sve_mls_d, 1063 }; 1064 TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz]) 1065 1066 /* 1067 *** SVE Index Generation Group 1068 */ 1069 1070 static bool do_index(DisasContext *s, int esz, int rd, 1071 TCGv_i64 start, TCGv_i64 incr) 1072 { 1073 unsigned vsz; 1074 TCGv_i32 desc; 1075 TCGv_ptr t_zd; 1076 1077 if (!sve_access_check(s)) { 1078 return true; 1079 } 1080 1081 vsz = vec_full_reg_size(s); 1082 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1083 t_zd = tcg_temp_new_ptr(); 1084 1085 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd)); 1086 if (esz == 3) { 1087 gen_helper_sve_index_d(t_zd, start, incr, desc); 1088 } else { 1089 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 1090 static index_fn * const fns[3] = { 1091 gen_helper_sve_index_b, 1092 gen_helper_sve_index_h, 1093 gen_helper_sve_index_s, 1094 }; 1095 TCGv_i32 s32 = tcg_temp_new_i32(); 1096 TCGv_i32 i32 = tcg_temp_new_i32(); 1097 1098 tcg_gen_extrl_i64_i32(s32, start); 1099 tcg_gen_extrl_i64_i32(i32, incr); 1100 fns[esz](t_zd, s32, i32, desc); 1101 } 1102 return true; 1103 } 1104 1105 TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd, 1106 tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2)) 1107 TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd, 1108 tcg_constant_i64(a->imm), cpu_reg(s, a->rm)) 1109 TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd, 1110 cpu_reg(s, a->rn), tcg_constant_i64(a->imm)) 1111 TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd, 1112 cpu_reg(s, a->rn), cpu_reg(s, a->rm)) 1113 1114 /* 1115 *** SVE Stack Allocation Group 1116 */ 1117 1118 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a) 1119 { 1120 if (!dc_isar_feature(aa64_sve, s)) { 1121 return false; 1122 } 1123 if (sve_access_check(s)) { 1124 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1125 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1126 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s)); 1127 } 1128 return true; 1129 } 1130 1131 static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a) 1132 { 1133 if (!dc_isar_feature(aa64_sme, s)) { 1134 return false; 1135 } 1136 if (sme_enabled_check(s)) { 1137 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1138 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1139 tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s)); 1140 } 1141 return true; 1142 } 1143 1144 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a) 1145 { 1146 if (!dc_isar_feature(aa64_sve, s)) { 1147 return false; 1148 } 1149 if (sve_access_check(s)) { 1150 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1151 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1152 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s)); 1153 } 1154 return true; 1155 } 1156 1157 static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a) 1158 { 1159 if (!dc_isar_feature(aa64_sme, s)) { 1160 return false; 1161 } 1162 if (sme_enabled_check(s)) { 1163 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1164 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1165 tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s)); 1166 } 1167 return true; 1168 } 1169 1170 static bool trans_RDVL(DisasContext *s, arg_RDVL *a) 1171 { 1172 if (!dc_isar_feature(aa64_sve, s)) { 1173 return false; 1174 } 1175 if (sve_access_check(s)) { 1176 TCGv_i64 reg = cpu_reg(s, a->rd); 1177 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s)); 1178 } 1179 return true; 1180 } 1181 1182 static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a) 1183 { 1184 if (!dc_isar_feature(aa64_sme, s)) { 1185 return false; 1186 } 1187 if (sme_enabled_check(s)) { 1188 TCGv_i64 reg = cpu_reg(s, a->rd); 1189 tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s)); 1190 } 1191 return true; 1192 } 1193 1194 /* 1195 *** SVE Compute Vector Address Group 1196 */ 1197 1198 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn) 1199 { 1200 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm); 1201 } 1202 1203 TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32) 1204 TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64) 1205 TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32) 1206 TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32) 1207 1208 /* 1209 *** SVE Integer Misc - Unpredicated Group 1210 */ 1211 1212 static gen_helper_gvec_2 * const fexpa_fns[4] = { 1213 NULL, gen_helper_sve_fexpa_h, 1214 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d, 1215 }; 1216 TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz, 1217 fexpa_fns[a->esz], a->rd, a->rn, 0) 1218 1219 static gen_helper_gvec_3 * const ftssel_fns[4] = { 1220 NULL, gen_helper_sve_ftssel_h, 1221 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d, 1222 }; 1223 TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, 1224 ftssel_fns[a->esz], a, 0) 1225 1226 /* 1227 *** SVE Predicate Logical Operations Group 1228 */ 1229 1230 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a, 1231 const GVecGen4 *gvec_op) 1232 { 1233 if (!sve_access_check(s)) { 1234 return true; 1235 } 1236 1237 unsigned psz = pred_gvec_reg_size(s); 1238 int dofs = pred_full_reg_offset(s, a->rd); 1239 int nofs = pred_full_reg_offset(s, a->rn); 1240 int mofs = pred_full_reg_offset(s, a->rm); 1241 int gofs = pred_full_reg_offset(s, a->pg); 1242 1243 if (!a->s) { 1244 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1245 return true; 1246 } 1247 1248 if (psz == 8) { 1249 /* Do the operation and the flags generation in temps. */ 1250 TCGv_i64 pd = tcg_temp_new_i64(); 1251 TCGv_i64 pn = tcg_temp_new_i64(); 1252 TCGv_i64 pm = tcg_temp_new_i64(); 1253 TCGv_i64 pg = tcg_temp_new_i64(); 1254 1255 tcg_gen_ld_i64(pn, tcg_env, nofs); 1256 tcg_gen_ld_i64(pm, tcg_env, mofs); 1257 tcg_gen_ld_i64(pg, tcg_env, gofs); 1258 1259 gvec_op->fni8(pd, pn, pm, pg); 1260 tcg_gen_st_i64(pd, tcg_env, dofs); 1261 1262 do_predtest1(pd, pg); 1263 } else { 1264 /* The operation and flags generation is large. The computation 1265 * of the flags depends on the original contents of the guarding 1266 * predicate. If the destination overwrites the guarding predicate, 1267 * then the easiest way to get this right is to save a copy. 1268 */ 1269 int tofs = gofs; 1270 if (a->rd == a->pg) { 1271 tofs = offsetof(CPUARMState, vfp.preg_tmp); 1272 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz); 1273 } 1274 1275 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1276 do_predtest(s, dofs, tofs, psz / 8); 1277 } 1278 return true; 1279 } 1280 1281 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1282 { 1283 tcg_gen_and_i64(pd, pn, pm); 1284 tcg_gen_and_i64(pd, pd, pg); 1285 } 1286 1287 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1288 TCGv_vec pm, TCGv_vec pg) 1289 { 1290 tcg_gen_and_vec(vece, pd, pn, pm); 1291 tcg_gen_and_vec(vece, pd, pd, pg); 1292 } 1293 1294 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a) 1295 { 1296 static const GVecGen4 op = { 1297 .fni8 = gen_and_pg_i64, 1298 .fniv = gen_and_pg_vec, 1299 .fno = gen_helper_sve_and_pppp, 1300 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1301 }; 1302 1303 if (!dc_isar_feature(aa64_sve, s)) { 1304 return false; 1305 } 1306 if (!a->s) { 1307 if (a->rn == a->rm) { 1308 if (a->pg == a->rn) { 1309 return do_mov_p(s, a->rd, a->rn); 1310 } 1311 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg); 1312 } else if (a->pg == a->rn || a->pg == a->rm) { 1313 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm); 1314 } 1315 } 1316 return do_pppp_flags(s, a, &op); 1317 } 1318 1319 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1320 { 1321 tcg_gen_andc_i64(pd, pn, pm); 1322 tcg_gen_and_i64(pd, pd, pg); 1323 } 1324 1325 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1326 TCGv_vec pm, TCGv_vec pg) 1327 { 1328 tcg_gen_andc_vec(vece, pd, pn, pm); 1329 tcg_gen_and_vec(vece, pd, pd, pg); 1330 } 1331 1332 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a) 1333 { 1334 static const GVecGen4 op = { 1335 .fni8 = gen_bic_pg_i64, 1336 .fniv = gen_bic_pg_vec, 1337 .fno = gen_helper_sve_bic_pppp, 1338 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1339 }; 1340 1341 if (!dc_isar_feature(aa64_sve, s)) { 1342 return false; 1343 } 1344 if (!a->s && a->pg == a->rn) { 1345 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm); 1346 } 1347 return do_pppp_flags(s, a, &op); 1348 } 1349 1350 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1351 { 1352 tcg_gen_xor_i64(pd, pn, pm); 1353 tcg_gen_and_i64(pd, pd, pg); 1354 } 1355 1356 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1357 TCGv_vec pm, TCGv_vec pg) 1358 { 1359 tcg_gen_xor_vec(vece, pd, pn, pm); 1360 tcg_gen_and_vec(vece, pd, pd, pg); 1361 } 1362 1363 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a) 1364 { 1365 static const GVecGen4 op = { 1366 .fni8 = gen_eor_pg_i64, 1367 .fniv = gen_eor_pg_vec, 1368 .fno = gen_helper_sve_eor_pppp, 1369 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1370 }; 1371 1372 if (!dc_isar_feature(aa64_sve, s)) { 1373 return false; 1374 } 1375 /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */ 1376 if (!a->s && a->pg == a->rm) { 1377 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn); 1378 } 1379 return do_pppp_flags(s, a, &op); 1380 } 1381 1382 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a) 1383 { 1384 if (a->s || !dc_isar_feature(aa64_sve, s)) { 1385 return false; 1386 } 1387 if (sve_access_check(s)) { 1388 unsigned psz = pred_gvec_reg_size(s); 1389 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd), 1390 pred_full_reg_offset(s, a->pg), 1391 pred_full_reg_offset(s, a->rn), 1392 pred_full_reg_offset(s, a->rm), psz, psz); 1393 } 1394 return true; 1395 } 1396 1397 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1398 { 1399 tcg_gen_or_i64(pd, pn, pm); 1400 tcg_gen_and_i64(pd, pd, pg); 1401 } 1402 1403 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1404 TCGv_vec pm, TCGv_vec pg) 1405 { 1406 tcg_gen_or_vec(vece, pd, pn, pm); 1407 tcg_gen_and_vec(vece, pd, pd, pg); 1408 } 1409 1410 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a) 1411 { 1412 static const GVecGen4 op = { 1413 .fni8 = gen_orr_pg_i64, 1414 .fniv = gen_orr_pg_vec, 1415 .fno = gen_helper_sve_orr_pppp, 1416 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1417 }; 1418 1419 if (!dc_isar_feature(aa64_sve, s)) { 1420 return false; 1421 } 1422 if (!a->s && a->pg == a->rn && a->rn == a->rm) { 1423 return do_mov_p(s, a->rd, a->rn); 1424 } 1425 return do_pppp_flags(s, a, &op); 1426 } 1427 1428 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1429 { 1430 tcg_gen_orc_i64(pd, pn, pm); 1431 tcg_gen_and_i64(pd, pd, pg); 1432 } 1433 1434 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1435 TCGv_vec pm, TCGv_vec pg) 1436 { 1437 tcg_gen_orc_vec(vece, pd, pn, pm); 1438 tcg_gen_and_vec(vece, pd, pd, pg); 1439 } 1440 1441 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a) 1442 { 1443 static const GVecGen4 op = { 1444 .fni8 = gen_orn_pg_i64, 1445 .fniv = gen_orn_pg_vec, 1446 .fno = gen_helper_sve_orn_pppp, 1447 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1448 }; 1449 1450 if (!dc_isar_feature(aa64_sve, s)) { 1451 return false; 1452 } 1453 return do_pppp_flags(s, a, &op); 1454 } 1455 1456 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1457 { 1458 tcg_gen_or_i64(pd, pn, pm); 1459 tcg_gen_andc_i64(pd, pg, pd); 1460 } 1461 1462 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1463 TCGv_vec pm, TCGv_vec pg) 1464 { 1465 tcg_gen_or_vec(vece, pd, pn, pm); 1466 tcg_gen_andc_vec(vece, pd, pg, pd); 1467 } 1468 1469 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a) 1470 { 1471 static const GVecGen4 op = { 1472 .fni8 = gen_nor_pg_i64, 1473 .fniv = gen_nor_pg_vec, 1474 .fno = gen_helper_sve_nor_pppp, 1475 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1476 }; 1477 1478 if (!dc_isar_feature(aa64_sve, s)) { 1479 return false; 1480 } 1481 return do_pppp_flags(s, a, &op); 1482 } 1483 1484 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1485 { 1486 tcg_gen_and_i64(pd, pn, pm); 1487 tcg_gen_andc_i64(pd, pg, pd); 1488 } 1489 1490 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1491 TCGv_vec pm, TCGv_vec pg) 1492 { 1493 tcg_gen_and_vec(vece, pd, pn, pm); 1494 tcg_gen_andc_vec(vece, pd, pg, pd); 1495 } 1496 1497 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a) 1498 { 1499 static const GVecGen4 op = { 1500 .fni8 = gen_nand_pg_i64, 1501 .fniv = gen_nand_pg_vec, 1502 .fno = gen_helper_sve_nand_pppp, 1503 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1504 }; 1505 1506 if (!dc_isar_feature(aa64_sve, s)) { 1507 return false; 1508 } 1509 return do_pppp_flags(s, a, &op); 1510 } 1511 1512 /* 1513 *** SVE Predicate Misc Group 1514 */ 1515 1516 static bool trans_PTEST(DisasContext *s, arg_PTEST *a) 1517 { 1518 if (!dc_isar_feature(aa64_sve, s)) { 1519 return false; 1520 } 1521 if (sve_access_check(s)) { 1522 int nofs = pred_full_reg_offset(s, a->rn); 1523 int gofs = pred_full_reg_offset(s, a->pg); 1524 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8); 1525 1526 if (words == 1) { 1527 TCGv_i64 pn = tcg_temp_new_i64(); 1528 TCGv_i64 pg = tcg_temp_new_i64(); 1529 1530 tcg_gen_ld_i64(pn, tcg_env, nofs); 1531 tcg_gen_ld_i64(pg, tcg_env, gofs); 1532 do_predtest1(pn, pg); 1533 } else { 1534 do_predtest(s, nofs, gofs, words); 1535 } 1536 } 1537 return true; 1538 } 1539 1540 /* See the ARM pseudocode DecodePredCount. */ 1541 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz) 1542 { 1543 unsigned elements = fullsz >> esz; 1544 unsigned bound; 1545 1546 switch (pattern) { 1547 case 0x0: /* POW2 */ 1548 return pow2floor(elements); 1549 case 0x1: /* VL1 */ 1550 case 0x2: /* VL2 */ 1551 case 0x3: /* VL3 */ 1552 case 0x4: /* VL4 */ 1553 case 0x5: /* VL5 */ 1554 case 0x6: /* VL6 */ 1555 case 0x7: /* VL7 */ 1556 case 0x8: /* VL8 */ 1557 bound = pattern; 1558 break; 1559 case 0x9: /* VL16 */ 1560 case 0xa: /* VL32 */ 1561 case 0xb: /* VL64 */ 1562 case 0xc: /* VL128 */ 1563 case 0xd: /* VL256 */ 1564 bound = 16 << (pattern - 9); 1565 break; 1566 case 0x1d: /* MUL4 */ 1567 return elements - elements % 4; 1568 case 0x1e: /* MUL3 */ 1569 return elements - elements % 3; 1570 case 0x1f: /* ALL */ 1571 return elements; 1572 default: /* #uimm5 */ 1573 return 0; 1574 } 1575 return elements >= bound ? bound : 0; 1576 } 1577 1578 /* This handles all of the predicate initialization instructions, 1579 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32 1580 * so that decode_pred_count returns 0. For SETFFR, we will have 1581 * set RD == 16 == FFR. 1582 */ 1583 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag) 1584 { 1585 if (!sve_access_check(s)) { 1586 return true; 1587 } 1588 1589 unsigned fullsz = vec_full_reg_size(s); 1590 unsigned ofs = pred_full_reg_offset(s, rd); 1591 unsigned numelem, setsz, i; 1592 uint64_t word, lastword; 1593 TCGv_i64 t; 1594 1595 numelem = decode_pred_count(fullsz, pat, esz); 1596 1597 /* Determine what we must store into each bit, and how many. */ 1598 if (numelem == 0) { 1599 lastword = word = 0; 1600 setsz = fullsz; 1601 } else { 1602 setsz = numelem << esz; 1603 lastword = word = pred_esz_masks[esz]; 1604 if (setsz % 64) { 1605 lastword &= MAKE_64BIT_MASK(0, setsz % 64); 1606 } 1607 } 1608 1609 t = tcg_temp_new_i64(); 1610 if (fullsz <= 64) { 1611 tcg_gen_movi_i64(t, lastword); 1612 tcg_gen_st_i64(t, tcg_env, ofs); 1613 goto done; 1614 } 1615 1616 if (word == lastword) { 1617 unsigned maxsz = size_for_gvec(fullsz / 8); 1618 unsigned oprsz = size_for_gvec(setsz / 8); 1619 1620 if (oprsz * 8 == setsz) { 1621 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word); 1622 goto done; 1623 } 1624 } 1625 1626 setsz /= 8; 1627 fullsz /= 8; 1628 1629 tcg_gen_movi_i64(t, word); 1630 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) { 1631 tcg_gen_st_i64(t, tcg_env, ofs + i); 1632 } 1633 if (lastword != word) { 1634 tcg_gen_movi_i64(t, lastword); 1635 tcg_gen_st_i64(t, tcg_env, ofs + i); 1636 i += 8; 1637 } 1638 if (i < fullsz) { 1639 tcg_gen_movi_i64(t, 0); 1640 for (; i < fullsz; i += 8) { 1641 tcg_gen_st_i64(t, tcg_env, ofs + i); 1642 } 1643 } 1644 1645 done: 1646 /* PTRUES */ 1647 if (setflag) { 1648 tcg_gen_movi_i32(cpu_NF, -(word != 0)); 1649 tcg_gen_movi_i32(cpu_CF, word == 0); 1650 tcg_gen_movi_i32(cpu_VF, 0); 1651 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1652 } 1653 return true; 1654 } 1655 1656 TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s) 1657 1658 /* Note pat == 31 is #all, to set all elements. */ 1659 TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve, 1660 do_predset, 0, FFR_PRED_NUM, 31, false) 1661 1662 /* Note pat == 32 is #unimp, to set no elements. */ 1663 TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false) 1664 1665 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a) 1666 { 1667 /* The path through do_pppp_flags is complicated enough to want to avoid 1668 * duplication. Frob the arguments into the form of a predicated AND. 1669 */ 1670 arg_rprr_s alt_a = { 1671 .rd = a->rd, .pg = a->pg, .s = a->s, 1672 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM, 1673 }; 1674 1675 s->is_nonstreaming = true; 1676 return trans_AND_pppp(s, &alt_a); 1677 } 1678 1679 TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM) 1680 TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn) 1681 1682 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a, 1683 void (*gen_fn)(TCGv_i32, TCGv_ptr, 1684 TCGv_ptr, TCGv_i32)) 1685 { 1686 if (!sve_access_check(s)) { 1687 return true; 1688 } 1689 1690 TCGv_ptr t_pd = tcg_temp_new_ptr(); 1691 TCGv_ptr t_pg = tcg_temp_new_ptr(); 1692 TCGv_i32 t; 1693 unsigned desc = 0; 1694 1695 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 1696 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 1697 1698 tcg_gen_addi_ptr(t_pd, tcg_env, pred_full_reg_offset(s, a->rd)); 1699 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->rn)); 1700 t = tcg_temp_new_i32(); 1701 1702 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc)); 1703 1704 do_pred_flags(t); 1705 return true; 1706 } 1707 1708 TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst) 1709 TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext) 1710 1711 /* 1712 *** SVE Element Count Group 1713 */ 1714 1715 /* Perform an inline saturating addition of a 32-bit value within 1716 * a 64-bit register. The second operand is known to be positive, 1717 * which halves the comparisons we must perform to bound the result. 1718 */ 1719 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1720 { 1721 int64_t ibound; 1722 1723 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 1724 if (u) { 1725 tcg_gen_ext32u_i64(reg, reg); 1726 } else { 1727 tcg_gen_ext32s_i64(reg, reg); 1728 } 1729 if (d) { 1730 tcg_gen_sub_i64(reg, reg, val); 1731 ibound = (u ? 0 : INT32_MIN); 1732 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound)); 1733 } else { 1734 tcg_gen_add_i64(reg, reg, val); 1735 ibound = (u ? UINT32_MAX : INT32_MAX); 1736 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound)); 1737 } 1738 } 1739 1740 /* Similarly with 64-bit values. */ 1741 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1742 { 1743 TCGv_i64 t0 = tcg_temp_new_i64(); 1744 TCGv_i64 t2; 1745 1746 if (u) { 1747 if (d) { 1748 tcg_gen_sub_i64(t0, reg, val); 1749 t2 = tcg_constant_i64(0); 1750 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0); 1751 } else { 1752 tcg_gen_add_i64(t0, reg, val); 1753 t2 = tcg_constant_i64(-1); 1754 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0); 1755 } 1756 } else { 1757 TCGv_i64 t1 = tcg_temp_new_i64(); 1758 if (d) { 1759 /* Detect signed overflow for subtraction. */ 1760 tcg_gen_xor_i64(t0, reg, val); 1761 tcg_gen_sub_i64(t1, reg, val); 1762 tcg_gen_xor_i64(reg, reg, t1); 1763 tcg_gen_and_i64(t0, t0, reg); 1764 1765 /* Bound the result. */ 1766 tcg_gen_movi_i64(reg, INT64_MIN); 1767 t2 = tcg_constant_i64(0); 1768 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1); 1769 } else { 1770 /* Detect signed overflow for addition. */ 1771 tcg_gen_xor_i64(t0, reg, val); 1772 tcg_gen_add_i64(reg, reg, val); 1773 tcg_gen_xor_i64(t1, reg, val); 1774 tcg_gen_andc_i64(t0, t1, t0); 1775 1776 /* Bound the result. */ 1777 tcg_gen_movi_i64(t1, INT64_MAX); 1778 t2 = tcg_constant_i64(0); 1779 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg); 1780 } 1781 } 1782 } 1783 1784 /* Similarly with a vector and a scalar operand. */ 1785 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn, 1786 TCGv_i64 val, bool u, bool d) 1787 { 1788 unsigned vsz = vec_full_reg_size(s); 1789 TCGv_ptr dptr, nptr; 1790 TCGv_i32 t32, desc; 1791 TCGv_i64 t64; 1792 1793 dptr = tcg_temp_new_ptr(); 1794 nptr = tcg_temp_new_ptr(); 1795 tcg_gen_addi_ptr(dptr, tcg_env, vec_full_reg_offset(s, rd)); 1796 tcg_gen_addi_ptr(nptr, tcg_env, vec_full_reg_offset(s, rn)); 1797 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1798 1799 switch (esz) { 1800 case MO_8: 1801 t32 = tcg_temp_new_i32(); 1802 tcg_gen_extrl_i64_i32(t32, val); 1803 if (d) { 1804 tcg_gen_neg_i32(t32, t32); 1805 } 1806 if (u) { 1807 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc); 1808 } else { 1809 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc); 1810 } 1811 break; 1812 1813 case MO_16: 1814 t32 = tcg_temp_new_i32(); 1815 tcg_gen_extrl_i64_i32(t32, val); 1816 if (d) { 1817 tcg_gen_neg_i32(t32, t32); 1818 } 1819 if (u) { 1820 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc); 1821 } else { 1822 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc); 1823 } 1824 break; 1825 1826 case MO_32: 1827 t64 = tcg_temp_new_i64(); 1828 if (d) { 1829 tcg_gen_neg_i64(t64, val); 1830 } else { 1831 tcg_gen_mov_i64(t64, val); 1832 } 1833 if (u) { 1834 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc); 1835 } else { 1836 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc); 1837 } 1838 break; 1839 1840 case MO_64: 1841 if (u) { 1842 if (d) { 1843 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc); 1844 } else { 1845 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc); 1846 } 1847 } else if (d) { 1848 t64 = tcg_temp_new_i64(); 1849 tcg_gen_neg_i64(t64, val); 1850 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc); 1851 } else { 1852 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc); 1853 } 1854 break; 1855 1856 default: 1857 g_assert_not_reached(); 1858 } 1859 } 1860 1861 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a) 1862 { 1863 if (!dc_isar_feature(aa64_sve, s)) { 1864 return false; 1865 } 1866 if (sve_access_check(s)) { 1867 unsigned fullsz = vec_full_reg_size(s); 1868 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1869 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm); 1870 } 1871 return true; 1872 } 1873 1874 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a) 1875 { 1876 if (!dc_isar_feature(aa64_sve, s)) { 1877 return false; 1878 } 1879 if (sve_access_check(s)) { 1880 unsigned fullsz = vec_full_reg_size(s); 1881 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1882 int inc = numelem * a->imm * (a->d ? -1 : 1); 1883 TCGv_i64 reg = cpu_reg(s, a->rd); 1884 1885 tcg_gen_addi_i64(reg, reg, inc); 1886 } 1887 return true; 1888 } 1889 1890 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a) 1891 { 1892 if (!dc_isar_feature(aa64_sve, s)) { 1893 return false; 1894 } 1895 if (!sve_access_check(s)) { 1896 return true; 1897 } 1898 1899 unsigned fullsz = vec_full_reg_size(s); 1900 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1901 int inc = numelem * a->imm; 1902 TCGv_i64 reg = cpu_reg(s, a->rd); 1903 1904 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 1905 if (inc == 0) { 1906 if (a->u) { 1907 tcg_gen_ext32u_i64(reg, reg); 1908 } else { 1909 tcg_gen_ext32s_i64(reg, reg); 1910 } 1911 } else { 1912 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d); 1913 } 1914 return true; 1915 } 1916 1917 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a) 1918 { 1919 if (!dc_isar_feature(aa64_sve, s)) { 1920 return false; 1921 } 1922 if (!sve_access_check(s)) { 1923 return true; 1924 } 1925 1926 unsigned fullsz = vec_full_reg_size(s); 1927 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1928 int inc = numelem * a->imm; 1929 TCGv_i64 reg = cpu_reg(s, a->rd); 1930 1931 if (inc != 0) { 1932 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d); 1933 } 1934 return true; 1935 } 1936 1937 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 1938 { 1939 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 1940 return false; 1941 } 1942 1943 unsigned fullsz = vec_full_reg_size(s); 1944 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1945 int inc = numelem * a->imm; 1946 1947 if (inc != 0) { 1948 if (sve_access_check(s)) { 1949 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd), 1950 vec_full_reg_offset(s, a->rn), 1951 tcg_constant_i64(a->d ? -inc : inc), 1952 fullsz, fullsz); 1953 } 1954 } else { 1955 do_mov_z(s, a->rd, a->rn); 1956 } 1957 return true; 1958 } 1959 1960 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 1961 { 1962 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 1963 return false; 1964 } 1965 1966 unsigned fullsz = vec_full_reg_size(s); 1967 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1968 int inc = numelem * a->imm; 1969 1970 if (inc != 0) { 1971 if (sve_access_check(s)) { 1972 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 1973 tcg_constant_i64(inc), a->u, a->d); 1974 } 1975 } else { 1976 do_mov_z(s, a->rd, a->rn); 1977 } 1978 return true; 1979 } 1980 1981 /* 1982 *** SVE Bitwise Immediate Group 1983 */ 1984 1985 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn) 1986 { 1987 uint64_t imm; 1988 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 1989 extract32(a->dbm, 0, 6), 1990 extract32(a->dbm, 6, 6))) { 1991 return false; 1992 } 1993 return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm); 1994 } 1995 1996 TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi) 1997 TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori) 1998 TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori) 1999 2000 static bool trans_DUPM(DisasContext *s, arg_DUPM *a) 2001 { 2002 uint64_t imm; 2003 2004 if (!dc_isar_feature(aa64_sve, s)) { 2005 return false; 2006 } 2007 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2008 extract32(a->dbm, 0, 6), 2009 extract32(a->dbm, 6, 6))) { 2010 return false; 2011 } 2012 if (sve_access_check(s)) { 2013 do_dupi_z(s, a->rd, imm); 2014 } 2015 return true; 2016 } 2017 2018 /* 2019 *** SVE Integer Wide Immediate - Predicated Group 2020 */ 2021 2022 /* Implement all merging copies. This is used for CPY (immediate), 2023 * FCPY, CPY (scalar), CPY (SIMD&FP scalar). 2024 */ 2025 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg, 2026 TCGv_i64 val) 2027 { 2028 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2029 static gen_cpy * const fns[4] = { 2030 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h, 2031 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d, 2032 }; 2033 unsigned vsz = vec_full_reg_size(s); 2034 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2035 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2036 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2037 TCGv_ptr t_pg = tcg_temp_new_ptr(); 2038 2039 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd)); 2040 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, rn)); 2041 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 2042 2043 fns[esz](t_zd, t_zn, t_pg, val, desc); 2044 } 2045 2046 static bool trans_FCPY(DisasContext *s, arg_FCPY *a) 2047 { 2048 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2049 return false; 2050 } 2051 if (sve_access_check(s)) { 2052 /* Decode the VFP immediate. */ 2053 uint64_t imm = vfp_expand_imm(a->esz, a->imm); 2054 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm)); 2055 } 2056 return true; 2057 } 2058 2059 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a) 2060 { 2061 if (!dc_isar_feature(aa64_sve, s)) { 2062 return false; 2063 } 2064 if (sve_access_check(s)) { 2065 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm)); 2066 } 2067 return true; 2068 } 2069 2070 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a) 2071 { 2072 static gen_helper_gvec_2i * const fns[4] = { 2073 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h, 2074 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d, 2075 }; 2076 2077 if (!dc_isar_feature(aa64_sve, s)) { 2078 return false; 2079 } 2080 if (sve_access_check(s)) { 2081 unsigned vsz = vec_full_reg_size(s); 2082 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 2083 pred_full_reg_offset(s, a->pg), 2084 tcg_constant_i64(a->imm), 2085 vsz, vsz, 0, fns[a->esz]); 2086 } 2087 return true; 2088 } 2089 2090 /* 2091 *** SVE Permute Extract Group 2092 */ 2093 2094 static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm) 2095 { 2096 if (!sve_access_check(s)) { 2097 return true; 2098 } 2099 2100 unsigned vsz = vec_full_reg_size(s); 2101 unsigned n_ofs = imm >= vsz ? 0 : imm; 2102 unsigned n_siz = vsz - n_ofs; 2103 unsigned d = vec_full_reg_offset(s, rd); 2104 unsigned n = vec_full_reg_offset(s, rn); 2105 unsigned m = vec_full_reg_offset(s, rm); 2106 2107 /* Use host vector move insns if we have appropriate sizes 2108 * and no unfortunate overlap. 2109 */ 2110 if (m != d 2111 && n_ofs == size_for_gvec(n_ofs) 2112 && n_siz == size_for_gvec(n_siz) 2113 && (d != n || n_siz <= n_ofs)) { 2114 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz); 2115 if (n_ofs != 0) { 2116 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs); 2117 } 2118 } else { 2119 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext); 2120 } 2121 return true; 2122 } 2123 2124 TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm) 2125 TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm) 2126 2127 /* 2128 *** SVE Permute - Unpredicated Group 2129 */ 2130 2131 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a) 2132 { 2133 if (!dc_isar_feature(aa64_sve, s)) { 2134 return false; 2135 } 2136 if (sve_access_check(s)) { 2137 unsigned vsz = vec_full_reg_size(s); 2138 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd), 2139 vsz, vsz, cpu_reg_sp(s, a->rn)); 2140 } 2141 return true; 2142 } 2143 2144 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a) 2145 { 2146 if (!dc_isar_feature(aa64_sve, s)) { 2147 return false; 2148 } 2149 if ((a->imm & 0x1f) == 0) { 2150 return false; 2151 } 2152 if (sve_access_check(s)) { 2153 unsigned vsz = vec_full_reg_size(s); 2154 unsigned dofs = vec_full_reg_offset(s, a->rd); 2155 unsigned esz, index; 2156 2157 esz = ctz32(a->imm); 2158 index = a->imm >> (esz + 1); 2159 2160 if ((index << esz) < vsz) { 2161 unsigned nofs = vec_reg_offset(s, a->rn, index, esz); 2162 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz); 2163 } else { 2164 /* 2165 * While dup_mem handles 128-bit elements, dup_imm does not. 2166 * Thankfully element size doesn't matter for splatting zero. 2167 */ 2168 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); 2169 } 2170 } 2171 return true; 2172 } 2173 2174 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val) 2175 { 2176 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2177 static gen_insr * const fns[4] = { 2178 gen_helper_sve_insr_b, gen_helper_sve_insr_h, 2179 gen_helper_sve_insr_s, gen_helper_sve_insr_d, 2180 }; 2181 unsigned vsz = vec_full_reg_size(s); 2182 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2183 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2184 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2185 2186 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, a->rd)); 2187 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); 2188 2189 fns[a->esz](t_zd, t_zn, val, desc); 2190 } 2191 2192 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a) 2193 { 2194 if (!dc_isar_feature(aa64_sve, s)) { 2195 return false; 2196 } 2197 if (sve_access_check(s)) { 2198 TCGv_i64 t = tcg_temp_new_i64(); 2199 tcg_gen_ld_i64(t, tcg_env, vec_reg_offset(s, a->rm, 0, MO_64)); 2200 do_insr_i64(s, a, t); 2201 } 2202 return true; 2203 } 2204 2205 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a) 2206 { 2207 if (!dc_isar_feature(aa64_sve, s)) { 2208 return false; 2209 } 2210 if (sve_access_check(s)) { 2211 do_insr_i64(s, a, cpu_reg(s, a->rm)); 2212 } 2213 return true; 2214 } 2215 2216 static gen_helper_gvec_2 * const rev_fns[4] = { 2217 gen_helper_sve_rev_b, gen_helper_sve_rev_h, 2218 gen_helper_sve_rev_s, gen_helper_sve_rev_d 2219 }; 2220 TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0) 2221 2222 static gen_helper_gvec_3 * const sve_tbl_fns[4] = { 2223 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h, 2224 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d 2225 }; 2226 TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0) 2227 2228 static gen_helper_gvec_4 * const sve2_tbl_fns[4] = { 2229 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h, 2230 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d 2231 }; 2232 TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz], 2233 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0) 2234 2235 static gen_helper_gvec_3 * const tbx_fns[4] = { 2236 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h, 2237 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d 2238 }; 2239 TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0) 2240 2241 static bool trans_UNPK(DisasContext *s, arg_UNPK *a) 2242 { 2243 static gen_helper_gvec_2 * const fns[4][2] = { 2244 { NULL, NULL }, 2245 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h }, 2246 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s }, 2247 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d }, 2248 }; 2249 2250 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2251 return false; 2252 } 2253 if (sve_access_check(s)) { 2254 unsigned vsz = vec_full_reg_size(s); 2255 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd), 2256 vec_full_reg_offset(s, a->rn) 2257 + (a->h ? vsz / 2 : 0), 2258 vsz, vsz, 0, fns[a->esz][a->u]); 2259 } 2260 return true; 2261 } 2262 2263 /* 2264 *** SVE Permute - Predicates Group 2265 */ 2266 2267 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd, 2268 gen_helper_gvec_3 *fn) 2269 { 2270 if (!sve_access_check(s)) { 2271 return true; 2272 } 2273 2274 unsigned vsz = pred_full_reg_size(s); 2275 2276 TCGv_ptr t_d = tcg_temp_new_ptr(); 2277 TCGv_ptr t_n = tcg_temp_new_ptr(); 2278 TCGv_ptr t_m = tcg_temp_new_ptr(); 2279 uint32_t desc = 0; 2280 2281 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2282 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2283 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2284 2285 tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd)); 2286 tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn)); 2287 tcg_gen_addi_ptr(t_m, tcg_env, pred_full_reg_offset(s, a->rm)); 2288 2289 fn(t_d, t_n, t_m, tcg_constant_i32(desc)); 2290 return true; 2291 } 2292 2293 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd, 2294 gen_helper_gvec_2 *fn) 2295 { 2296 if (!sve_access_check(s)) { 2297 return true; 2298 } 2299 2300 unsigned vsz = pred_full_reg_size(s); 2301 TCGv_ptr t_d = tcg_temp_new_ptr(); 2302 TCGv_ptr t_n = tcg_temp_new_ptr(); 2303 uint32_t desc = 0; 2304 2305 tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd)); 2306 tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn)); 2307 2308 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2309 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2310 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2311 2312 fn(t_d, t_n, tcg_constant_i32(desc)); 2313 return true; 2314 } 2315 2316 TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p) 2317 TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p) 2318 TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p) 2319 TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p) 2320 TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p) 2321 TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p) 2322 2323 TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p) 2324 TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p) 2325 TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p) 2326 2327 /* 2328 *** SVE Permute - Interleaving Group 2329 */ 2330 2331 static gen_helper_gvec_3 * const zip_fns[4] = { 2332 gen_helper_sve_zip_b, gen_helper_sve_zip_h, 2333 gen_helper_sve_zip_s, gen_helper_sve_zip_d, 2334 }; 2335 TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2336 zip_fns[a->esz], a, 0) 2337 TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2338 zip_fns[a->esz], a, vec_full_reg_size(s) / 2) 2339 2340 TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2341 gen_helper_sve2_zip_q, a, 0) 2342 TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2343 gen_helper_sve2_zip_q, a, 2344 QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2) 2345 2346 static gen_helper_gvec_3 * const uzp_fns[4] = { 2347 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h, 2348 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d, 2349 }; 2350 2351 TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2352 uzp_fns[a->esz], a, 0) 2353 TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2354 uzp_fns[a->esz], a, 1 << a->esz) 2355 2356 TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2357 gen_helper_sve2_uzp_q, a, 0) 2358 TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2359 gen_helper_sve2_uzp_q, a, 16) 2360 2361 static gen_helper_gvec_3 * const trn_fns[4] = { 2362 gen_helper_sve_trn_b, gen_helper_sve_trn_h, 2363 gen_helper_sve_trn_s, gen_helper_sve_trn_d, 2364 }; 2365 2366 TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2367 trn_fns[a->esz], a, 0) 2368 TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2369 trn_fns[a->esz], a, 1 << a->esz) 2370 2371 TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2372 gen_helper_sve2_trn_q, a, 0) 2373 TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2374 gen_helper_sve2_trn_q, a, 16) 2375 2376 /* 2377 *** SVE Permute Vector - Predicated Group 2378 */ 2379 2380 static gen_helper_gvec_3 * const compact_fns[4] = { 2381 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d 2382 }; 2383 TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, 2384 compact_fns[a->esz], a, 0) 2385 2386 /* Call the helper that computes the ARM LastActiveElement pseudocode 2387 * function, scaled by the element size. This includes the not found 2388 * indication; e.g. not found for esz=3 is -8. 2389 */ 2390 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg) 2391 { 2392 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot 2393 * round up, as we do elsewhere, because we need the exact size. 2394 */ 2395 TCGv_ptr t_p = tcg_temp_new_ptr(); 2396 unsigned desc = 0; 2397 2398 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 2399 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 2400 2401 tcg_gen_addi_ptr(t_p, tcg_env, pred_full_reg_offset(s, pg)); 2402 2403 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc)); 2404 } 2405 2406 /* Increment LAST to the offset of the next element in the vector, 2407 * wrapping around to 0. 2408 */ 2409 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz) 2410 { 2411 unsigned vsz = vec_full_reg_size(s); 2412 2413 tcg_gen_addi_i32(last, last, 1 << esz); 2414 if (is_power_of_2(vsz)) { 2415 tcg_gen_andi_i32(last, last, vsz - 1); 2416 } else { 2417 TCGv_i32 max = tcg_constant_i32(vsz); 2418 TCGv_i32 zero = tcg_constant_i32(0); 2419 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last); 2420 } 2421 } 2422 2423 /* If LAST < 0, set LAST to the offset of the last element in the vector. */ 2424 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz) 2425 { 2426 unsigned vsz = vec_full_reg_size(s); 2427 2428 if (is_power_of_2(vsz)) { 2429 tcg_gen_andi_i32(last, last, vsz - 1); 2430 } else { 2431 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz)); 2432 TCGv_i32 zero = tcg_constant_i32(0); 2433 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last); 2434 } 2435 } 2436 2437 /* Load an unsigned element of ESZ from BASE+OFS. */ 2438 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz) 2439 { 2440 TCGv_i64 r = tcg_temp_new_i64(); 2441 2442 switch (esz) { 2443 case 0: 2444 tcg_gen_ld8u_i64(r, base, ofs); 2445 break; 2446 case 1: 2447 tcg_gen_ld16u_i64(r, base, ofs); 2448 break; 2449 case 2: 2450 tcg_gen_ld32u_i64(r, base, ofs); 2451 break; 2452 case 3: 2453 tcg_gen_ld_i64(r, base, ofs); 2454 break; 2455 default: 2456 g_assert_not_reached(); 2457 } 2458 return r; 2459 } 2460 2461 /* Load an unsigned element of ESZ from RM[LAST]. */ 2462 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last, 2463 int rm, int esz) 2464 { 2465 TCGv_ptr p = tcg_temp_new_ptr(); 2466 2467 /* Convert offset into vector into offset into ENV. 2468 * The final adjustment for the vector register base 2469 * is added via constant offset to the load. 2470 */ 2471 #if HOST_BIG_ENDIAN 2472 /* Adjust for element ordering. See vec_reg_offset. */ 2473 if (esz < 3) { 2474 tcg_gen_xori_i32(last, last, 8 - (1 << esz)); 2475 } 2476 #endif 2477 tcg_gen_ext_i32_ptr(p, last); 2478 tcg_gen_add_ptr(p, p, tcg_env); 2479 2480 return load_esz(p, vec_full_reg_offset(s, rm), esz); 2481 } 2482 2483 /* Compute CLAST for a Zreg. */ 2484 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before) 2485 { 2486 TCGv_i32 last; 2487 TCGLabel *over; 2488 TCGv_i64 ele; 2489 unsigned vsz, esz = a->esz; 2490 2491 if (!sve_access_check(s)) { 2492 return true; 2493 } 2494 2495 last = tcg_temp_new_i32(); 2496 over = gen_new_label(); 2497 2498 find_last_active(s, last, esz, a->pg); 2499 2500 /* There is of course no movcond for a 2048-bit vector, 2501 * so we must branch over the actual store. 2502 */ 2503 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over); 2504 2505 if (!before) { 2506 incr_last_active(s, last, esz); 2507 } 2508 2509 ele = load_last_active(s, last, a->rm, esz); 2510 2511 vsz = vec_full_reg_size(s); 2512 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele); 2513 2514 /* If this insn used MOVPRFX, we may need a second move. */ 2515 if (a->rd != a->rn) { 2516 TCGLabel *done = gen_new_label(); 2517 tcg_gen_br(done); 2518 2519 gen_set_label(over); 2520 do_mov_z(s, a->rd, a->rn); 2521 2522 gen_set_label(done); 2523 } else { 2524 gen_set_label(over); 2525 } 2526 return true; 2527 } 2528 2529 TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false) 2530 TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true) 2531 2532 /* Compute CLAST for a scalar. */ 2533 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm, 2534 bool before, TCGv_i64 reg_val) 2535 { 2536 TCGv_i32 last = tcg_temp_new_i32(); 2537 TCGv_i64 ele, cmp; 2538 2539 find_last_active(s, last, esz, pg); 2540 2541 /* Extend the original value of last prior to incrementing. */ 2542 cmp = tcg_temp_new_i64(); 2543 tcg_gen_ext_i32_i64(cmp, last); 2544 2545 if (!before) { 2546 incr_last_active(s, last, esz); 2547 } 2548 2549 /* The conceit here is that while last < 0 indicates not found, after 2550 * adjusting for tcg_env->vfp.zregs[rm], it is still a valid address 2551 * from which we can load garbage. We then discard the garbage with 2552 * a conditional move. 2553 */ 2554 ele = load_last_active(s, last, rm, esz); 2555 2556 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0), 2557 ele, reg_val); 2558 } 2559 2560 /* Compute CLAST for a Vreg. */ 2561 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2562 { 2563 if (sve_access_check(s)) { 2564 int esz = a->esz; 2565 int ofs = vec_reg_offset(s, a->rd, 0, esz); 2566 TCGv_i64 reg = load_esz(tcg_env, ofs, esz); 2567 2568 do_clast_scalar(s, esz, a->pg, a->rn, before, reg); 2569 write_fp_dreg(s, a->rd, reg); 2570 } 2571 return true; 2572 } 2573 2574 TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false) 2575 TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true) 2576 2577 /* Compute CLAST for a Xreg. */ 2578 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before) 2579 { 2580 TCGv_i64 reg; 2581 2582 if (!sve_access_check(s)) { 2583 return true; 2584 } 2585 2586 reg = cpu_reg(s, a->rd); 2587 switch (a->esz) { 2588 case 0: 2589 tcg_gen_ext8u_i64(reg, reg); 2590 break; 2591 case 1: 2592 tcg_gen_ext16u_i64(reg, reg); 2593 break; 2594 case 2: 2595 tcg_gen_ext32u_i64(reg, reg); 2596 break; 2597 case 3: 2598 break; 2599 default: 2600 g_assert_not_reached(); 2601 } 2602 2603 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg); 2604 return true; 2605 } 2606 2607 TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false) 2608 TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true) 2609 2610 /* Compute LAST for a scalar. */ 2611 static TCGv_i64 do_last_scalar(DisasContext *s, int esz, 2612 int pg, int rm, bool before) 2613 { 2614 TCGv_i32 last = tcg_temp_new_i32(); 2615 2616 find_last_active(s, last, esz, pg); 2617 if (before) { 2618 wrap_last_active(s, last, esz); 2619 } else { 2620 incr_last_active(s, last, esz); 2621 } 2622 2623 return load_last_active(s, last, rm, esz); 2624 } 2625 2626 /* Compute LAST for a Vreg. */ 2627 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2628 { 2629 if (sve_access_check(s)) { 2630 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2631 write_fp_dreg(s, a->rd, val); 2632 } 2633 return true; 2634 } 2635 2636 TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false) 2637 TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true) 2638 2639 /* Compute LAST for a Xreg. */ 2640 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before) 2641 { 2642 if (sve_access_check(s)) { 2643 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2644 tcg_gen_mov_i64(cpu_reg(s, a->rd), val); 2645 } 2646 return true; 2647 } 2648 2649 TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false) 2650 TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true) 2651 2652 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a) 2653 { 2654 if (!dc_isar_feature(aa64_sve, s)) { 2655 return false; 2656 } 2657 if (sve_access_check(s)) { 2658 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn)); 2659 } 2660 return true; 2661 } 2662 2663 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a) 2664 { 2665 if (!dc_isar_feature(aa64_sve, s)) { 2666 return false; 2667 } 2668 if (sve_access_check(s)) { 2669 int ofs = vec_reg_offset(s, a->rn, 0, a->esz); 2670 TCGv_i64 t = load_esz(tcg_env, ofs, a->esz); 2671 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t); 2672 } 2673 return true; 2674 } 2675 2676 static gen_helper_gvec_3 * const revb_fns[4] = { 2677 NULL, gen_helper_sve_revb_h, 2678 gen_helper_sve_revb_s, gen_helper_sve_revb_d, 2679 }; 2680 TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0) 2681 2682 static gen_helper_gvec_3 * const revh_fns[4] = { 2683 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d, 2684 }; 2685 TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0) 2686 2687 TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz, 2688 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0) 2689 2690 TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0) 2691 2692 TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz, 2693 gen_helper_sve_splice, a, a->esz) 2694 2695 TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice, 2696 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz) 2697 2698 /* 2699 *** SVE Integer Compare - Vectors Group 2700 */ 2701 2702 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a, 2703 gen_helper_gvec_flags_4 *gen_fn) 2704 { 2705 TCGv_ptr pd, zn, zm, pg; 2706 unsigned vsz; 2707 TCGv_i32 t; 2708 2709 if (gen_fn == NULL) { 2710 return false; 2711 } 2712 if (!sve_access_check(s)) { 2713 return true; 2714 } 2715 2716 vsz = vec_full_reg_size(s); 2717 t = tcg_temp_new_i32(); 2718 pd = tcg_temp_new_ptr(); 2719 zn = tcg_temp_new_ptr(); 2720 zm = tcg_temp_new_ptr(); 2721 pg = tcg_temp_new_ptr(); 2722 2723 tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd)); 2724 tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn)); 2725 tcg_gen_addi_ptr(zm, tcg_env, vec_full_reg_offset(s, a->rm)); 2726 tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg)); 2727 2728 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0))); 2729 2730 do_pred_flags(t); 2731 return true; 2732 } 2733 2734 #define DO_PPZZ(NAME, name) \ 2735 static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \ 2736 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \ 2737 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \ 2738 }; \ 2739 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \ 2740 a, name##_ppzz_fns[a->esz]) 2741 2742 DO_PPZZ(CMPEQ, cmpeq) 2743 DO_PPZZ(CMPNE, cmpne) 2744 DO_PPZZ(CMPGT, cmpgt) 2745 DO_PPZZ(CMPGE, cmpge) 2746 DO_PPZZ(CMPHI, cmphi) 2747 DO_PPZZ(CMPHS, cmphs) 2748 2749 #undef DO_PPZZ 2750 2751 #define DO_PPZW(NAME, name) \ 2752 static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \ 2753 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \ 2754 gen_helper_sve_##name##_ppzw_s, NULL \ 2755 }; \ 2756 TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \ 2757 a, name##_ppzw_fns[a->esz]) 2758 2759 DO_PPZW(CMPEQ, cmpeq) 2760 DO_PPZW(CMPNE, cmpne) 2761 DO_PPZW(CMPGT, cmpgt) 2762 DO_PPZW(CMPGE, cmpge) 2763 DO_PPZW(CMPHI, cmphi) 2764 DO_PPZW(CMPHS, cmphs) 2765 DO_PPZW(CMPLT, cmplt) 2766 DO_PPZW(CMPLE, cmple) 2767 DO_PPZW(CMPLO, cmplo) 2768 DO_PPZW(CMPLS, cmpls) 2769 2770 #undef DO_PPZW 2771 2772 /* 2773 *** SVE Integer Compare - Immediate Groups 2774 */ 2775 2776 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a, 2777 gen_helper_gvec_flags_3 *gen_fn) 2778 { 2779 TCGv_ptr pd, zn, pg; 2780 unsigned vsz; 2781 TCGv_i32 t; 2782 2783 if (gen_fn == NULL) { 2784 return false; 2785 } 2786 if (!sve_access_check(s)) { 2787 return true; 2788 } 2789 2790 vsz = vec_full_reg_size(s); 2791 t = tcg_temp_new_i32(); 2792 pd = tcg_temp_new_ptr(); 2793 zn = tcg_temp_new_ptr(); 2794 pg = tcg_temp_new_ptr(); 2795 2796 tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd)); 2797 tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn)); 2798 tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg)); 2799 2800 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm))); 2801 2802 do_pred_flags(t); 2803 return true; 2804 } 2805 2806 #define DO_PPZI(NAME, name) \ 2807 static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \ 2808 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \ 2809 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \ 2810 }; \ 2811 TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \ 2812 name##_ppzi_fns[a->esz]) 2813 2814 DO_PPZI(CMPEQ, cmpeq) 2815 DO_PPZI(CMPNE, cmpne) 2816 DO_PPZI(CMPGT, cmpgt) 2817 DO_PPZI(CMPGE, cmpge) 2818 DO_PPZI(CMPHI, cmphi) 2819 DO_PPZI(CMPHS, cmphs) 2820 DO_PPZI(CMPLT, cmplt) 2821 DO_PPZI(CMPLE, cmple) 2822 DO_PPZI(CMPLO, cmplo) 2823 DO_PPZI(CMPLS, cmpls) 2824 2825 #undef DO_PPZI 2826 2827 /* 2828 *** SVE Partition Break Group 2829 */ 2830 2831 static bool do_brk3(DisasContext *s, arg_rprr_s *a, 2832 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s) 2833 { 2834 if (!sve_access_check(s)) { 2835 return true; 2836 } 2837 2838 unsigned vsz = pred_full_reg_size(s); 2839 2840 /* Predicate sizes may be smaller and cannot use simd_desc. */ 2841 TCGv_ptr d = tcg_temp_new_ptr(); 2842 TCGv_ptr n = tcg_temp_new_ptr(); 2843 TCGv_ptr m = tcg_temp_new_ptr(); 2844 TCGv_ptr g = tcg_temp_new_ptr(); 2845 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 2846 2847 tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd)); 2848 tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn)); 2849 tcg_gen_addi_ptr(m, tcg_env, pred_full_reg_offset(s, a->rm)); 2850 tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg)); 2851 2852 if (a->s) { 2853 TCGv_i32 t = tcg_temp_new_i32(); 2854 fn_s(t, d, n, m, g, desc); 2855 do_pred_flags(t); 2856 } else { 2857 fn(d, n, m, g, desc); 2858 } 2859 return true; 2860 } 2861 2862 static bool do_brk2(DisasContext *s, arg_rpr_s *a, 2863 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s) 2864 { 2865 if (!sve_access_check(s)) { 2866 return true; 2867 } 2868 2869 unsigned vsz = pred_full_reg_size(s); 2870 2871 /* Predicate sizes may be smaller and cannot use simd_desc. */ 2872 TCGv_ptr d = tcg_temp_new_ptr(); 2873 TCGv_ptr n = tcg_temp_new_ptr(); 2874 TCGv_ptr g = tcg_temp_new_ptr(); 2875 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 2876 2877 tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd)); 2878 tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn)); 2879 tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg)); 2880 2881 if (a->s) { 2882 TCGv_i32 t = tcg_temp_new_i32(); 2883 fn_s(t, d, n, g, desc); 2884 do_pred_flags(t); 2885 } else { 2886 fn(d, n, g, desc); 2887 } 2888 return true; 2889 } 2890 2891 TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a, 2892 gen_helper_sve_brkpa, gen_helper_sve_brkpas) 2893 TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a, 2894 gen_helper_sve_brkpb, gen_helper_sve_brkpbs) 2895 2896 TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a, 2897 gen_helper_sve_brka_m, gen_helper_sve_brkas_m) 2898 TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a, 2899 gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m) 2900 2901 TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a, 2902 gen_helper_sve_brka_z, gen_helper_sve_brkas_z) 2903 TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a, 2904 gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z) 2905 2906 TRANS_FEAT(BRKN, aa64_sve, do_brk2, a, 2907 gen_helper_sve_brkn, gen_helper_sve_brkns) 2908 2909 /* 2910 *** SVE Predicate Count Group 2911 */ 2912 2913 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg) 2914 { 2915 unsigned psz = pred_full_reg_size(s); 2916 2917 if (psz <= 8) { 2918 uint64_t psz_mask; 2919 2920 tcg_gen_ld_i64(val, tcg_env, pred_full_reg_offset(s, pn)); 2921 if (pn != pg) { 2922 TCGv_i64 g = tcg_temp_new_i64(); 2923 tcg_gen_ld_i64(g, tcg_env, pred_full_reg_offset(s, pg)); 2924 tcg_gen_and_i64(val, val, g); 2925 } 2926 2927 /* Reduce the pred_esz_masks value simply to reduce the 2928 * size of the code generated here. 2929 */ 2930 psz_mask = MAKE_64BIT_MASK(0, psz * 8); 2931 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask); 2932 2933 tcg_gen_ctpop_i64(val, val); 2934 } else { 2935 TCGv_ptr t_pn = tcg_temp_new_ptr(); 2936 TCGv_ptr t_pg = tcg_temp_new_ptr(); 2937 unsigned desc = 0; 2938 2939 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz); 2940 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 2941 2942 tcg_gen_addi_ptr(t_pn, tcg_env, pred_full_reg_offset(s, pn)); 2943 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 2944 2945 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc)); 2946 } 2947 } 2948 2949 static bool trans_CNTP(DisasContext *s, arg_CNTP *a) 2950 { 2951 if (!dc_isar_feature(aa64_sve, s)) { 2952 return false; 2953 } 2954 if (sve_access_check(s)) { 2955 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg); 2956 } 2957 return true; 2958 } 2959 2960 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a) 2961 { 2962 if (!dc_isar_feature(aa64_sve, s)) { 2963 return false; 2964 } 2965 if (sve_access_check(s)) { 2966 TCGv_i64 reg = cpu_reg(s, a->rd); 2967 TCGv_i64 val = tcg_temp_new_i64(); 2968 2969 do_cntp(s, val, a->esz, a->pg, a->pg); 2970 if (a->d) { 2971 tcg_gen_sub_i64(reg, reg, val); 2972 } else { 2973 tcg_gen_add_i64(reg, reg, val); 2974 } 2975 } 2976 return true; 2977 } 2978 2979 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a) 2980 { 2981 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2982 return false; 2983 } 2984 if (sve_access_check(s)) { 2985 unsigned vsz = vec_full_reg_size(s); 2986 TCGv_i64 val = tcg_temp_new_i64(); 2987 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds; 2988 2989 do_cntp(s, val, a->esz, a->pg, a->pg); 2990 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 2991 vec_full_reg_offset(s, a->rn), val, vsz, vsz); 2992 } 2993 return true; 2994 } 2995 2996 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a) 2997 { 2998 if (!dc_isar_feature(aa64_sve, s)) { 2999 return false; 3000 } 3001 if (sve_access_check(s)) { 3002 TCGv_i64 reg = cpu_reg(s, a->rd); 3003 TCGv_i64 val = tcg_temp_new_i64(); 3004 3005 do_cntp(s, val, a->esz, a->pg, a->pg); 3006 do_sat_addsub_32(reg, val, a->u, a->d); 3007 } 3008 return true; 3009 } 3010 3011 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a) 3012 { 3013 if (!dc_isar_feature(aa64_sve, s)) { 3014 return false; 3015 } 3016 if (sve_access_check(s)) { 3017 TCGv_i64 reg = cpu_reg(s, a->rd); 3018 TCGv_i64 val = tcg_temp_new_i64(); 3019 3020 do_cntp(s, val, a->esz, a->pg, a->pg); 3021 do_sat_addsub_64(reg, val, a->u, a->d); 3022 } 3023 return true; 3024 } 3025 3026 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3027 { 3028 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3029 return false; 3030 } 3031 if (sve_access_check(s)) { 3032 TCGv_i64 val = tcg_temp_new_i64(); 3033 do_cntp(s, val, a->esz, a->pg, a->pg); 3034 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d); 3035 } 3036 return true; 3037 } 3038 3039 /* 3040 *** SVE Integer Compare Scalars Group 3041 */ 3042 3043 static bool trans_CTERM(DisasContext *s, arg_CTERM *a) 3044 { 3045 if (!dc_isar_feature(aa64_sve, s)) { 3046 return false; 3047 } 3048 if (!sve_access_check(s)) { 3049 return true; 3050 } 3051 3052 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ); 3053 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf); 3054 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf); 3055 TCGv_i64 cmp = tcg_temp_new_i64(); 3056 3057 tcg_gen_setcond_i64(cond, cmp, rn, rm); 3058 tcg_gen_extrl_i64_i32(cpu_NF, cmp); 3059 3060 /* VF = !NF & !CF. */ 3061 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1); 3062 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF); 3063 3064 /* Both NF and VF actually look at bit 31. */ 3065 tcg_gen_neg_i32(cpu_NF, cpu_NF); 3066 tcg_gen_neg_i32(cpu_VF, cpu_VF); 3067 return true; 3068 } 3069 3070 static bool trans_WHILE(DisasContext *s, arg_WHILE *a) 3071 { 3072 TCGv_i64 op0, op1, t0, t1, tmax; 3073 TCGv_i32 t2; 3074 TCGv_ptr ptr; 3075 unsigned vsz = vec_full_reg_size(s); 3076 unsigned desc = 0; 3077 TCGCond cond; 3078 uint64_t maxval; 3079 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */ 3080 bool eq = a->eq == a->lt; 3081 3082 /* The greater-than conditions are all SVE2. */ 3083 if (a->lt 3084 ? !dc_isar_feature(aa64_sve, s) 3085 : !dc_isar_feature(aa64_sve2, s)) { 3086 return false; 3087 } 3088 if (!sve_access_check(s)) { 3089 return true; 3090 } 3091 3092 op0 = read_cpu_reg(s, a->rn, 1); 3093 op1 = read_cpu_reg(s, a->rm, 1); 3094 3095 if (!a->sf) { 3096 if (a->u) { 3097 tcg_gen_ext32u_i64(op0, op0); 3098 tcg_gen_ext32u_i64(op1, op1); 3099 } else { 3100 tcg_gen_ext32s_i64(op0, op0); 3101 tcg_gen_ext32s_i64(op1, op1); 3102 } 3103 } 3104 3105 /* For the helper, compress the different conditions into a computation 3106 * of how many iterations for which the condition is true. 3107 */ 3108 t0 = tcg_temp_new_i64(); 3109 t1 = tcg_temp_new_i64(); 3110 3111 if (a->lt) { 3112 tcg_gen_sub_i64(t0, op1, op0); 3113 if (a->u) { 3114 maxval = a->sf ? UINT64_MAX : UINT32_MAX; 3115 cond = eq ? TCG_COND_LEU : TCG_COND_LTU; 3116 } else { 3117 maxval = a->sf ? INT64_MAX : INT32_MAX; 3118 cond = eq ? TCG_COND_LE : TCG_COND_LT; 3119 } 3120 } else { 3121 tcg_gen_sub_i64(t0, op0, op1); 3122 if (a->u) { 3123 maxval = 0; 3124 cond = eq ? TCG_COND_GEU : TCG_COND_GTU; 3125 } else { 3126 maxval = a->sf ? INT64_MIN : INT32_MIN; 3127 cond = eq ? TCG_COND_GE : TCG_COND_GT; 3128 } 3129 } 3130 3131 tmax = tcg_constant_i64(vsz >> a->esz); 3132 if (eq) { 3133 /* Equality means one more iteration. */ 3134 tcg_gen_addi_i64(t0, t0, 1); 3135 3136 /* 3137 * For the less-than while, if op1 is maxval (and the only time 3138 * the addition above could overflow), then we produce an all-true 3139 * predicate by setting the count to the vector length. This is 3140 * because the pseudocode is described as an increment + compare 3141 * loop, and the maximum integer would always compare true. 3142 * Similarly, the greater-than while has the same issue with the 3143 * minimum integer due to the decrement + compare loop. 3144 */ 3145 tcg_gen_movi_i64(t1, maxval); 3146 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0); 3147 } 3148 3149 /* Bound to the maximum. */ 3150 tcg_gen_umin_i64(t0, t0, tmax); 3151 3152 /* Set the count to zero if the condition is false. */ 3153 tcg_gen_movi_i64(t1, 0); 3154 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1); 3155 3156 /* Since we're bounded, pass as a 32-bit type. */ 3157 t2 = tcg_temp_new_i32(); 3158 tcg_gen_extrl_i64_i32(t2, t0); 3159 3160 /* Scale elements to bits. */ 3161 tcg_gen_shli_i32(t2, t2, a->esz); 3162 3163 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3164 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3165 3166 ptr = tcg_temp_new_ptr(); 3167 tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd)); 3168 3169 if (a->lt) { 3170 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); 3171 } else { 3172 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc)); 3173 } 3174 do_pred_flags(t2); 3175 return true; 3176 } 3177 3178 static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) 3179 { 3180 TCGv_i64 op0, op1, diff, t1, tmax; 3181 TCGv_i32 t2; 3182 TCGv_ptr ptr; 3183 unsigned vsz = vec_full_reg_size(s); 3184 unsigned desc = 0; 3185 3186 if (!dc_isar_feature(aa64_sve2, s)) { 3187 return false; 3188 } 3189 if (!sve_access_check(s)) { 3190 return true; 3191 } 3192 3193 op0 = read_cpu_reg(s, a->rn, 1); 3194 op1 = read_cpu_reg(s, a->rm, 1); 3195 3196 tmax = tcg_constant_i64(vsz); 3197 diff = tcg_temp_new_i64(); 3198 3199 if (a->rw) { 3200 /* WHILERW */ 3201 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */ 3202 t1 = tcg_temp_new_i64(); 3203 tcg_gen_sub_i64(diff, op0, op1); 3204 tcg_gen_sub_i64(t1, op1, op0); 3205 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1); 3206 /* Round down to a multiple of ESIZE. */ 3207 tcg_gen_andi_i64(diff, diff, -1 << a->esz); 3208 /* If op1 == op0, diff == 0, and the condition is always true. */ 3209 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff); 3210 } else { 3211 /* WHILEWR */ 3212 tcg_gen_sub_i64(diff, op1, op0); 3213 /* Round down to a multiple of ESIZE. */ 3214 tcg_gen_andi_i64(diff, diff, -1 << a->esz); 3215 /* If op0 >= op1, diff <= 0, the condition is always true. */ 3216 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff); 3217 } 3218 3219 /* Bound to the maximum. */ 3220 tcg_gen_umin_i64(diff, diff, tmax); 3221 3222 /* Since we're bounded, pass as a 32-bit type. */ 3223 t2 = tcg_temp_new_i32(); 3224 tcg_gen_extrl_i64_i32(t2, diff); 3225 3226 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3227 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3228 3229 ptr = tcg_temp_new_ptr(); 3230 tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd)); 3231 3232 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); 3233 do_pred_flags(t2); 3234 return true; 3235 } 3236 3237 /* 3238 *** SVE Integer Wide Immediate - Unpredicated Group 3239 */ 3240 3241 static bool trans_FDUP(DisasContext *s, arg_FDUP *a) 3242 { 3243 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3244 return false; 3245 } 3246 if (sve_access_check(s)) { 3247 unsigned vsz = vec_full_reg_size(s); 3248 int dofs = vec_full_reg_offset(s, a->rd); 3249 uint64_t imm; 3250 3251 /* Decode the VFP immediate. */ 3252 imm = vfp_expand_imm(a->esz, a->imm); 3253 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm); 3254 } 3255 return true; 3256 } 3257 3258 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a) 3259 { 3260 if (!dc_isar_feature(aa64_sve, s)) { 3261 return false; 3262 } 3263 if (sve_access_check(s)) { 3264 unsigned vsz = vec_full_reg_size(s); 3265 int dofs = vec_full_reg_offset(s, a->rd); 3266 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm); 3267 } 3268 return true; 3269 } 3270 3271 TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a) 3272 3273 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a) 3274 { 3275 a->imm = -a->imm; 3276 return trans_ADD_zzi(s, a); 3277 } 3278 3279 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a) 3280 { 3281 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 }; 3282 static const GVecGen2s op[4] = { 3283 { .fni8 = tcg_gen_vec_sub8_i64, 3284 .fniv = tcg_gen_sub_vec, 3285 .fno = gen_helper_sve_subri_b, 3286 .opt_opc = vecop_list, 3287 .vece = MO_8, 3288 .scalar_first = true }, 3289 { .fni8 = tcg_gen_vec_sub16_i64, 3290 .fniv = tcg_gen_sub_vec, 3291 .fno = gen_helper_sve_subri_h, 3292 .opt_opc = vecop_list, 3293 .vece = MO_16, 3294 .scalar_first = true }, 3295 { .fni4 = tcg_gen_sub_i32, 3296 .fniv = tcg_gen_sub_vec, 3297 .fno = gen_helper_sve_subri_s, 3298 .opt_opc = vecop_list, 3299 .vece = MO_32, 3300 .scalar_first = true }, 3301 { .fni8 = tcg_gen_sub_i64, 3302 .fniv = tcg_gen_sub_vec, 3303 .fno = gen_helper_sve_subri_d, 3304 .opt_opc = vecop_list, 3305 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 3306 .vece = MO_64, 3307 .scalar_first = true } 3308 }; 3309 3310 if (!dc_isar_feature(aa64_sve, s)) { 3311 return false; 3312 } 3313 if (sve_access_check(s)) { 3314 unsigned vsz = vec_full_reg_size(s); 3315 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd), 3316 vec_full_reg_offset(s, a->rn), 3317 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]); 3318 } 3319 return true; 3320 } 3321 3322 TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a) 3323 3324 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d) 3325 { 3326 if (sve_access_check(s)) { 3327 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 3328 tcg_constant_i64(a->imm), u, d); 3329 } 3330 return true; 3331 } 3332 3333 TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false) 3334 TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false) 3335 TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true) 3336 TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true) 3337 3338 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn) 3339 { 3340 if (sve_access_check(s)) { 3341 unsigned vsz = vec_full_reg_size(s); 3342 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 3343 vec_full_reg_offset(s, a->rn), 3344 tcg_constant_i64(a->imm), vsz, vsz, 0, fn); 3345 } 3346 return true; 3347 } 3348 3349 #define DO_ZZI(NAME, name) \ 3350 static gen_helper_gvec_2i * const name##i_fns[4] = { \ 3351 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \ 3352 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \ 3353 }; \ 3354 TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz]) 3355 3356 DO_ZZI(SMAX, smax) 3357 DO_ZZI(UMAX, umax) 3358 DO_ZZI(SMIN, smin) 3359 DO_ZZI(UMIN, umin) 3360 3361 #undef DO_ZZI 3362 3363 static gen_helper_gvec_4 * const dot_fns[2][2] = { 3364 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h }, 3365 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h } 3366 }; 3367 TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz, 3368 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0) 3369 3370 /* 3371 * SVE Multiply - Indexed 3372 */ 3373 3374 TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, 3375 gen_helper_gvec_sdot_idx_b, a) 3376 TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, 3377 gen_helper_gvec_sdot_idx_h, a) 3378 TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, 3379 gen_helper_gvec_udot_idx_b, a) 3380 TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, 3381 gen_helper_gvec_udot_idx_h, a) 3382 3383 TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3384 gen_helper_gvec_sudot_idx_b, a) 3385 TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3386 gen_helper_gvec_usdot_idx_b, a) 3387 3388 #define DO_SVE2_RRX(NAME, FUNC) \ 3389 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3390 a->rd, a->rn, a->rm, a->index) 3391 3392 DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h) 3393 DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s) 3394 DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d) 3395 3396 DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h) 3397 DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s) 3398 DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d) 3399 3400 DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h) 3401 DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s) 3402 DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d) 3403 3404 #undef DO_SVE2_RRX 3405 3406 #define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \ 3407 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3408 a->rd, a->rn, a->rm, (a->index << 1) | TOP) 3409 3410 DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false) 3411 DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false) 3412 DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true) 3413 DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true) 3414 3415 DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false) 3416 DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false) 3417 DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true) 3418 DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true) 3419 3420 DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false) 3421 DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false) 3422 DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true) 3423 DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true) 3424 3425 #undef DO_SVE2_RRX_TB 3426 3427 #define DO_SVE2_RRXR(NAME, FUNC) \ 3428 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a) 3429 3430 DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h) 3431 DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s) 3432 DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d) 3433 3434 DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h) 3435 DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s) 3436 DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d) 3437 3438 DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h) 3439 DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s) 3440 DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d) 3441 3442 DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h) 3443 DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s) 3444 DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d) 3445 3446 #undef DO_SVE2_RRXR 3447 3448 #define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \ 3449 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3450 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP) 3451 3452 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false) 3453 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false) 3454 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true) 3455 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true) 3456 3457 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false) 3458 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false) 3459 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true) 3460 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true) 3461 3462 DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false) 3463 DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false) 3464 DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true) 3465 DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true) 3466 3467 DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false) 3468 DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false) 3469 DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true) 3470 DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true) 3471 3472 DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false) 3473 DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false) 3474 DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true) 3475 DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true) 3476 3477 DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false) 3478 DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false) 3479 DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true) 3480 DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true) 3481 3482 #undef DO_SVE2_RRXR_TB 3483 3484 #define DO_SVE2_RRXR_ROT(NAME, FUNC) \ 3485 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3486 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot) 3487 3488 DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h) 3489 DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s) 3490 3491 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h) 3492 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s) 3493 3494 DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s) 3495 DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) 3496 3497 #undef DO_SVE2_RRXR_ROT 3498 3499 /* 3500 *** SVE Floating Point Multiply-Add Indexed Group 3501 */ 3502 3503 static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) 3504 { 3505 static gen_helper_gvec_4_ptr * const fns[4] = { 3506 NULL, 3507 gen_helper_gvec_fmla_idx_h, 3508 gen_helper_gvec_fmla_idx_s, 3509 gen_helper_gvec_fmla_idx_d, 3510 }; 3511 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, 3512 (a->index << 1) | sub, 3513 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3514 } 3515 3516 TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false) 3517 TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true) 3518 3519 /* 3520 *** SVE Floating Point Multiply Indexed Group 3521 */ 3522 3523 static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = { 3524 NULL, gen_helper_gvec_fmul_idx_h, 3525 gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d, 3526 }; 3527 TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz, 3528 fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index, 3529 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3530 3531 /* 3532 *** SVE Floating Point Fast Reduction Group 3533 */ 3534 3535 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr, 3536 TCGv_ptr, TCGv_i32); 3537 3538 static bool do_reduce(DisasContext *s, arg_rpr_esz *a, 3539 gen_helper_fp_reduce *fn) 3540 { 3541 unsigned vsz, p2vsz; 3542 TCGv_i32 t_desc; 3543 TCGv_ptr t_zn, t_pg, status; 3544 TCGv_i64 temp; 3545 3546 if (fn == NULL) { 3547 return false; 3548 } 3549 if (!sve_access_check(s)) { 3550 return true; 3551 } 3552 3553 vsz = vec_full_reg_size(s); 3554 p2vsz = pow2ceil(vsz); 3555 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz)); 3556 temp = tcg_temp_new_i64(); 3557 t_zn = tcg_temp_new_ptr(); 3558 t_pg = tcg_temp_new_ptr(); 3559 3560 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); 3561 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); 3562 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3563 3564 fn(temp, t_zn, t_pg, status, t_desc); 3565 3566 write_fp_dreg(s, a->rd, temp); 3567 return true; 3568 } 3569 3570 #define DO_VPZ(NAME, name) \ 3571 static gen_helper_fp_reduce * const name##_fns[4] = { \ 3572 NULL, gen_helper_sve_##name##_h, \ 3573 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3574 }; \ 3575 TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz]) 3576 3577 DO_VPZ(FADDV, faddv) 3578 DO_VPZ(FMINNMV, fminnmv) 3579 DO_VPZ(FMAXNMV, fmaxnmv) 3580 DO_VPZ(FMINV, fminv) 3581 DO_VPZ(FMAXV, fmaxv) 3582 3583 #undef DO_VPZ 3584 3585 /* 3586 *** SVE Floating Point Unary Operations - Unpredicated Group 3587 */ 3588 3589 static gen_helper_gvec_2_ptr * const frecpe_fns[] = { 3590 NULL, gen_helper_gvec_frecpe_h, 3591 gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, 3592 }; 3593 TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0) 3594 3595 static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { 3596 NULL, gen_helper_gvec_frsqrte_h, 3597 gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, 3598 }; 3599 TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0) 3600 3601 /* 3602 *** SVE Floating Point Compare with Zero Group 3603 */ 3604 3605 static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a, 3606 gen_helper_gvec_3_ptr *fn) 3607 { 3608 if (fn == NULL) { 3609 return false; 3610 } 3611 if (sve_access_check(s)) { 3612 unsigned vsz = vec_full_reg_size(s); 3613 TCGv_ptr status = 3614 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3615 3616 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd), 3617 vec_full_reg_offset(s, a->rn), 3618 pred_full_reg_offset(s, a->pg), 3619 status, vsz, vsz, 0, fn); 3620 } 3621 return true; 3622 } 3623 3624 #define DO_PPZ(NAME, name) \ 3625 static gen_helper_gvec_3_ptr * const name##_fns[] = { \ 3626 NULL, gen_helper_sve_##name##_h, \ 3627 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3628 }; \ 3629 TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz]) 3630 3631 DO_PPZ(FCMGE_ppz0, fcmge0) 3632 DO_PPZ(FCMGT_ppz0, fcmgt0) 3633 DO_PPZ(FCMLE_ppz0, fcmle0) 3634 DO_PPZ(FCMLT_ppz0, fcmlt0) 3635 DO_PPZ(FCMEQ_ppz0, fcmeq0) 3636 DO_PPZ(FCMNE_ppz0, fcmne0) 3637 3638 #undef DO_PPZ 3639 3640 /* 3641 *** SVE floating-point trig multiply-add coefficient 3642 */ 3643 3644 static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { 3645 NULL, gen_helper_sve_ftmad_h, 3646 gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d, 3647 }; 3648 TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, 3649 ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm, 3650 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3651 3652 /* 3653 *** SVE Floating Point Accumulating Reduction Group 3654 */ 3655 3656 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) 3657 { 3658 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr, 3659 TCGv_ptr, TCGv_ptr, TCGv_i32); 3660 static fadda_fn * const fns[3] = { 3661 gen_helper_sve_fadda_h, 3662 gen_helper_sve_fadda_s, 3663 gen_helper_sve_fadda_d, 3664 }; 3665 unsigned vsz = vec_full_reg_size(s); 3666 TCGv_ptr t_rm, t_pg, t_fpst; 3667 TCGv_i64 t_val; 3668 TCGv_i32 t_desc; 3669 3670 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3671 return false; 3672 } 3673 s->is_nonstreaming = true; 3674 if (!sve_access_check(s)) { 3675 return true; 3676 } 3677 3678 t_val = load_esz(tcg_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz); 3679 t_rm = tcg_temp_new_ptr(); 3680 t_pg = tcg_temp_new_ptr(); 3681 tcg_gen_addi_ptr(t_rm, tcg_env, vec_full_reg_offset(s, a->rm)); 3682 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); 3683 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3684 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 3685 3686 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc); 3687 3688 write_fp_dreg(s, a->rd, t_val); 3689 return true; 3690 } 3691 3692 /* 3693 *** SVE Floating Point Arithmetic - Unpredicated Group 3694 */ 3695 3696 #define DO_FP3(NAME, name) \ 3697 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ 3698 NULL, gen_helper_gvec_##name##_h, \ 3699 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ 3700 }; \ 3701 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0) 3702 3703 DO_FP3(FADD_zzz, fadd) 3704 DO_FP3(FSUB_zzz, fsub) 3705 DO_FP3(FMUL_zzz, fmul) 3706 DO_FP3(FRECPS, recps) 3707 DO_FP3(FRSQRTS, rsqrts) 3708 3709 #undef DO_FP3 3710 3711 static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = { 3712 NULL, gen_helper_gvec_ftsmul_h, 3713 gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d 3714 }; 3715 TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, 3716 ftsmul_fns[a->esz], a, 0) 3717 3718 /* 3719 *** SVE Floating Point Arithmetic - Predicated Group 3720 */ 3721 3722 #define DO_ZPZZ_FP(NAME, FEAT, name) \ 3723 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ 3724 NULL, gen_helper_##name##_h, \ 3725 gen_helper_##name##_s, gen_helper_##name##_d \ 3726 }; \ 3727 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) 3728 3729 DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd) 3730 DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub) 3731 DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul) 3732 DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin) 3733 DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax) 3734 DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) 3735 DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) 3736 DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) 3737 DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn) 3738 DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv) 3739 DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx) 3740 3741 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr, 3742 TCGv_i64, TCGv_ptr, TCGv_i32); 3743 3744 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16, 3745 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn) 3746 { 3747 unsigned vsz = vec_full_reg_size(s); 3748 TCGv_ptr t_zd, t_zn, t_pg, status; 3749 TCGv_i32 desc; 3750 3751 t_zd = tcg_temp_new_ptr(); 3752 t_zn = tcg_temp_new_ptr(); 3753 t_pg = tcg_temp_new_ptr(); 3754 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, zd)); 3755 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, zn)); 3756 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 3757 3758 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 3759 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 3760 fn(t_zd, t_zn, t_pg, scalar, status, desc); 3761 } 3762 3763 static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm, 3764 gen_helper_sve_fp2scalar *fn) 3765 { 3766 if (fn == NULL) { 3767 return false; 3768 } 3769 if (sve_access_check(s)) { 3770 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, 3771 tcg_constant_i64(imm), fn); 3772 } 3773 return true; 3774 } 3775 3776 #define DO_FP_IMM(NAME, name, const0, const1) \ 3777 static gen_helper_sve_fp2scalar * const name##_fns[4] = { \ 3778 NULL, gen_helper_sve_##name##_h, \ 3779 gen_helper_sve_##name##_s, \ 3780 gen_helper_sve_##name##_d \ 3781 }; \ 3782 static uint64_t const name##_const[4][2] = { \ 3783 { -1, -1 }, \ 3784 { float16_##const0, float16_##const1 }, \ 3785 { float32_##const0, float32_##const1 }, \ 3786 { float64_##const0, float64_##const1 }, \ 3787 }; \ 3788 TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ 3789 name##_const[a->esz][a->imm], name##_fns[a->esz]) 3790 3791 DO_FP_IMM(FADD, fadds, half, one) 3792 DO_FP_IMM(FSUB, fsubs, half, one) 3793 DO_FP_IMM(FMUL, fmuls, half, two) 3794 DO_FP_IMM(FSUBR, fsubrs, half, one) 3795 DO_FP_IMM(FMAXNM, fmaxnms, zero, one) 3796 DO_FP_IMM(FMINNM, fminnms, zero, one) 3797 DO_FP_IMM(FMAX, fmaxs, zero, one) 3798 DO_FP_IMM(FMIN, fmins, zero, one) 3799 3800 #undef DO_FP_IMM 3801 3802 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a, 3803 gen_helper_gvec_4_ptr *fn) 3804 { 3805 if (fn == NULL) { 3806 return false; 3807 } 3808 if (sve_access_check(s)) { 3809 unsigned vsz = vec_full_reg_size(s); 3810 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3811 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd), 3812 vec_full_reg_offset(s, a->rn), 3813 vec_full_reg_offset(s, a->rm), 3814 pred_full_reg_offset(s, a->pg), 3815 status, vsz, vsz, 0, fn); 3816 } 3817 return true; 3818 } 3819 3820 #define DO_FPCMP(NAME, name) \ 3821 static gen_helper_gvec_4_ptr * const name##_fns[4] = { \ 3822 NULL, gen_helper_sve_##name##_h, \ 3823 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 3824 }; \ 3825 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz]) 3826 3827 DO_FPCMP(FCMGE, fcmge) 3828 DO_FPCMP(FCMGT, fcmgt) 3829 DO_FPCMP(FCMEQ, fcmeq) 3830 DO_FPCMP(FCMNE, fcmne) 3831 DO_FPCMP(FCMUO, fcmuo) 3832 DO_FPCMP(FACGE, facge) 3833 DO_FPCMP(FACGT, facgt) 3834 3835 #undef DO_FPCMP 3836 3837 static gen_helper_gvec_4_ptr * const fcadd_fns[] = { 3838 NULL, gen_helper_sve_fcadd_h, 3839 gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d, 3840 }; 3841 TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], 3842 a->rd, a->rn, a->rm, a->pg, a->rot, 3843 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3844 3845 #define DO_FMLA(NAME, name) \ 3846 static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ 3847 NULL, gen_helper_sve_##name##_h, \ 3848 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 3849 }; \ 3850 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \ 3851 a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ 3852 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3853 3854 DO_FMLA(FMLA_zpzzz, fmla_zpzzz) 3855 DO_FMLA(FMLS_zpzzz, fmls_zpzzz) 3856 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz) 3857 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz) 3858 3859 #undef DO_FMLA 3860 3861 static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { 3862 NULL, gen_helper_sve_fcmla_zpzzz_h, 3863 gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, 3864 }; 3865 TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], 3866 a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, 3867 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3868 3869 static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { 3870 NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL 3871 }; 3872 TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz], 3873 a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot, 3874 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3875 3876 /* 3877 *** SVE Floating Point Unary Operations Predicated Group 3878 */ 3879 3880 TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 3881 gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR) 3882 TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 3883 gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR) 3884 3885 TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 3886 gen_helper_sve_bfcvt, a, 0, FPST_FPCR) 3887 3888 TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 3889 gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR) 3890 TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 3891 gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR) 3892 TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 3893 gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR) 3894 TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 3895 gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR) 3896 3897 TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 3898 gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16) 3899 TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 3900 gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16) 3901 TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 3902 gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16) 3903 TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 3904 gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16) 3905 TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 3906 gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16) 3907 TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 3908 gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16) 3909 3910 TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 3911 gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR) 3912 TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 3913 gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR) 3914 TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 3915 gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR) 3916 TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 3917 gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR) 3918 TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 3919 gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR) 3920 TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 3921 gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR) 3922 3923 TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 3924 gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR) 3925 TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 3926 gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR) 3927 3928 static gen_helper_gvec_3_ptr * const frint_fns[] = { 3929 NULL, 3930 gen_helper_sve_frint_h, 3931 gen_helper_sve_frint_s, 3932 gen_helper_sve_frint_d 3933 }; 3934 TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz], 3935 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3936 3937 static gen_helper_gvec_3_ptr * const frintx_fns[] = { 3938 NULL, 3939 gen_helper_sve_frintx_h, 3940 gen_helper_sve_frintx_s, 3941 gen_helper_sve_frintx_d 3942 }; 3943 TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz], 3944 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3945 3946 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, 3947 ARMFPRounding mode, gen_helper_gvec_3_ptr *fn) 3948 { 3949 unsigned vsz; 3950 TCGv_i32 tmode; 3951 TCGv_ptr status; 3952 3953 if (fn == NULL) { 3954 return false; 3955 } 3956 if (!sve_access_check(s)) { 3957 return true; 3958 } 3959 3960 vsz = vec_full_reg_size(s); 3961 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3962 tmode = gen_set_rmode(mode, status); 3963 3964 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 3965 vec_full_reg_offset(s, a->rn), 3966 pred_full_reg_offset(s, a->pg), 3967 status, vsz, vsz, 0, fn); 3968 3969 gen_restore_rmode(tmode, status); 3970 return true; 3971 } 3972 3973 TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a, 3974 FPROUNDING_TIEEVEN, frint_fns[a->esz]) 3975 TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a, 3976 FPROUNDING_POSINF, frint_fns[a->esz]) 3977 TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a, 3978 FPROUNDING_NEGINF, frint_fns[a->esz]) 3979 TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a, 3980 FPROUNDING_ZERO, frint_fns[a->esz]) 3981 TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a, 3982 FPROUNDING_TIEAWAY, frint_fns[a->esz]) 3983 3984 static gen_helper_gvec_3_ptr * const frecpx_fns[] = { 3985 NULL, gen_helper_sve_frecpx_h, 3986 gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d, 3987 }; 3988 TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz], 3989 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3990 3991 static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { 3992 NULL, gen_helper_sve_fsqrt_h, 3993 gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d, 3994 }; 3995 TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz], 3996 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3997 3998 TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 3999 gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16) 4000 TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4001 gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16) 4002 TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4003 gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16) 4004 4005 TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4006 gen_helper_sve_scvt_ss, a, 0, FPST_FPCR) 4007 TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4008 gen_helper_sve_scvt_ds, a, 0, FPST_FPCR) 4009 4010 TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4011 gen_helper_sve_scvt_sd, a, 0, FPST_FPCR) 4012 TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4013 gen_helper_sve_scvt_dd, a, 0, FPST_FPCR) 4014 4015 TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4016 gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16) 4017 TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4018 gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16) 4019 TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4020 gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16) 4021 4022 TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4023 gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR) 4024 TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4025 gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR) 4026 TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4027 gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR) 4028 4029 TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4030 gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR) 4031 4032 /* 4033 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group 4034 */ 4035 4036 /* Subroutine loading a vector register at VOFS of LEN bytes. 4037 * The load should begin at the address Rn + IMM. 4038 */ 4039 4040 void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, 4041 int len, int rn, int imm) 4042 { 4043 int len_align = QEMU_ALIGN_DOWN(len, 16); 4044 int len_remain = len % 16; 4045 int nparts = len / 16 + ctpop8(len_remain); 4046 int midx = get_mem_index(s); 4047 TCGv_i64 dirty_addr, clean_addr, t0, t1; 4048 TCGv_i128 t16; 4049 4050 dirty_addr = tcg_temp_new_i64(); 4051 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4052 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); 4053 4054 /* 4055 * Note that unpredicated load/store of vector/predicate registers 4056 * are defined as a stream of bytes, which equates to little-endian 4057 * operations on larger quantities. 4058 * Attempt to keep code expansion to a minimum by limiting the 4059 * amount of unrolling done. 4060 */ 4061 if (nparts <= 4) { 4062 int i; 4063 4064 t0 = tcg_temp_new_i64(); 4065 t1 = tcg_temp_new_i64(); 4066 t16 = tcg_temp_new_i128(); 4067 4068 for (i = 0; i < len_align; i += 16) { 4069 tcg_gen_qemu_ld_i128(t16, clean_addr, midx, 4070 MO_LE | MO_128 | MO_ATOM_NONE); 4071 tcg_gen_extr_i128_i64(t0, t1, t16); 4072 tcg_gen_st_i64(t0, base, vofs + i); 4073 tcg_gen_st_i64(t1, base, vofs + i + 8); 4074 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4075 } 4076 } else { 4077 TCGLabel *loop = gen_new_label(); 4078 TCGv_ptr tp, i = tcg_temp_new_ptr(); 4079 4080 tcg_gen_movi_ptr(i, 0); 4081 gen_set_label(loop); 4082 4083 t16 = tcg_temp_new_i128(); 4084 tcg_gen_qemu_ld_i128(t16, clean_addr, midx, 4085 MO_LE | MO_128 | MO_ATOM_NONE); 4086 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4087 4088 tp = tcg_temp_new_ptr(); 4089 tcg_gen_add_ptr(tp, base, i); 4090 tcg_gen_addi_ptr(i, i, 16); 4091 4092 t0 = tcg_temp_new_i64(); 4093 t1 = tcg_temp_new_i64(); 4094 tcg_gen_extr_i128_i64(t0, t1, t16); 4095 4096 tcg_gen_st_i64(t0, tp, vofs); 4097 tcg_gen_st_i64(t1, tp, vofs + 8); 4098 4099 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4100 } 4101 4102 /* 4103 * Predicate register loads can be any multiple of 2. 4104 * Note that we still store the entire 64-bit unit into tcg_env. 4105 */ 4106 if (len_remain >= 8) { 4107 t0 = tcg_temp_new_i64(); 4108 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE); 4109 tcg_gen_st_i64(t0, base, vofs + len_align); 4110 len_remain -= 8; 4111 len_align += 8; 4112 if (len_remain) { 4113 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4114 } 4115 } 4116 if (len_remain) { 4117 t0 = tcg_temp_new_i64(); 4118 switch (len_remain) { 4119 case 2: 4120 case 4: 4121 case 8: 4122 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, 4123 MO_LE | ctz32(len_remain) | MO_ATOM_NONE); 4124 break; 4125 4126 case 6: 4127 t1 = tcg_temp_new_i64(); 4128 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE); 4129 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4130 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); 4131 tcg_gen_deposit_i64(t0, t0, t1, 32, 32); 4132 break; 4133 4134 default: 4135 g_assert_not_reached(); 4136 } 4137 tcg_gen_st_i64(t0, base, vofs + len_align); 4138 } 4139 } 4140 4141 /* Similarly for stores. */ 4142 void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, 4143 int len, int rn, int imm) 4144 { 4145 int len_align = QEMU_ALIGN_DOWN(len, 16); 4146 int len_remain = len % 16; 4147 int nparts = len / 16 + ctpop8(len_remain); 4148 int midx = get_mem_index(s); 4149 TCGv_i64 dirty_addr, clean_addr, t0, t1; 4150 TCGv_i128 t16; 4151 4152 dirty_addr = tcg_temp_new_i64(); 4153 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4154 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); 4155 4156 /* Note that unpredicated load/store of vector/predicate registers 4157 * are defined as a stream of bytes, which equates to little-endian 4158 * operations on larger quantities. There is no nice way to force 4159 * a little-endian store for aarch64_be-linux-user out of line. 4160 * 4161 * Attempt to keep code expansion to a minimum by limiting the 4162 * amount of unrolling done. 4163 */ 4164 if (nparts <= 4) { 4165 int i; 4166 4167 t0 = tcg_temp_new_i64(); 4168 t1 = tcg_temp_new_i64(); 4169 t16 = tcg_temp_new_i128(); 4170 for (i = 0; i < len_align; i += 16) { 4171 tcg_gen_ld_i64(t0, base, vofs + i); 4172 tcg_gen_ld_i64(t1, base, vofs + i + 8); 4173 tcg_gen_concat_i64_i128(t16, t0, t1); 4174 tcg_gen_qemu_st_i128(t16, clean_addr, midx, 4175 MO_LE | MO_128 | MO_ATOM_NONE); 4176 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4177 } 4178 } else { 4179 TCGLabel *loop = gen_new_label(); 4180 TCGv_ptr tp, i = tcg_temp_new_ptr(); 4181 4182 tcg_gen_movi_ptr(i, 0); 4183 gen_set_label(loop); 4184 4185 t0 = tcg_temp_new_i64(); 4186 t1 = tcg_temp_new_i64(); 4187 tp = tcg_temp_new_ptr(); 4188 tcg_gen_add_ptr(tp, base, i); 4189 tcg_gen_ld_i64(t0, tp, vofs); 4190 tcg_gen_ld_i64(t1, tp, vofs + 8); 4191 tcg_gen_addi_ptr(i, i, 16); 4192 4193 t16 = tcg_temp_new_i128(); 4194 tcg_gen_concat_i64_i128(t16, t0, t1); 4195 4196 tcg_gen_qemu_st_i128(t16, clean_addr, midx, 4197 MO_LE | MO_128 | MO_ATOM_NONE); 4198 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4199 4200 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4201 } 4202 4203 /* Predicate register stores can be any multiple of 2. */ 4204 if (len_remain >= 8) { 4205 t0 = tcg_temp_new_i64(); 4206 tcg_gen_ld_i64(t0, base, vofs + len_align); 4207 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE); 4208 len_remain -= 8; 4209 len_align += 8; 4210 if (len_remain) { 4211 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4212 } 4213 } 4214 if (len_remain) { 4215 t0 = tcg_temp_new_i64(); 4216 tcg_gen_ld_i64(t0, base, vofs + len_align); 4217 4218 switch (len_remain) { 4219 case 2: 4220 case 4: 4221 case 8: 4222 tcg_gen_qemu_st_i64(t0, clean_addr, midx, 4223 MO_LE | ctz32(len_remain) | MO_ATOM_NONE); 4224 break; 4225 4226 case 6: 4227 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE); 4228 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4229 tcg_gen_shri_i64(t0, t0, 32); 4230 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); 4231 break; 4232 4233 default: 4234 g_assert_not_reached(); 4235 } 4236 } 4237 } 4238 4239 static bool trans_LDR_zri(DisasContext *s, arg_rri *a) 4240 { 4241 if (!dc_isar_feature(aa64_sve, s)) { 4242 return false; 4243 } 4244 if (sve_access_check(s)) { 4245 int size = vec_full_reg_size(s); 4246 int off = vec_full_reg_offset(s, a->rd); 4247 gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size); 4248 } 4249 return true; 4250 } 4251 4252 static bool trans_LDR_pri(DisasContext *s, arg_rri *a) 4253 { 4254 if (!dc_isar_feature(aa64_sve, s)) { 4255 return false; 4256 } 4257 if (sve_access_check(s)) { 4258 int size = pred_full_reg_size(s); 4259 int off = pred_full_reg_offset(s, a->rd); 4260 gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size); 4261 } 4262 return true; 4263 } 4264 4265 static bool trans_STR_zri(DisasContext *s, arg_rri *a) 4266 { 4267 if (!dc_isar_feature(aa64_sve, s)) { 4268 return false; 4269 } 4270 if (sve_access_check(s)) { 4271 int size = vec_full_reg_size(s); 4272 int off = vec_full_reg_offset(s, a->rd); 4273 gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size); 4274 } 4275 return true; 4276 } 4277 4278 static bool trans_STR_pri(DisasContext *s, arg_rri *a) 4279 { 4280 if (!dc_isar_feature(aa64_sve, s)) { 4281 return false; 4282 } 4283 if (sve_access_check(s)) { 4284 int size = pred_full_reg_size(s); 4285 int off = pred_full_reg_offset(s, a->rd); 4286 gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size); 4287 } 4288 return true; 4289 } 4290 4291 /* 4292 *** SVE Memory - Contiguous Load Group 4293 */ 4294 4295 /* The memory mode of the dtype. */ 4296 static const MemOp dtype_mop[16] = { 4297 MO_UB, MO_UB, MO_UB, MO_UB, 4298 MO_SL, MO_UW, MO_UW, MO_UW, 4299 MO_SW, MO_SW, MO_UL, MO_UL, 4300 MO_SB, MO_SB, MO_SB, MO_UQ 4301 }; 4302 4303 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE) 4304 4305 /* The vector element size of dtype. */ 4306 static const uint8_t dtype_esz[16] = { 4307 0, 1, 2, 3, 4308 3, 1, 2, 3, 4309 3, 2, 2, 3, 4310 3, 2, 1, 3 4311 }; 4312 4313 uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs, 4314 uint32_t msz, bool is_write, uint32_t data) 4315 { 4316 uint32_t sizem1; 4317 uint32_t desc = 0; 4318 4319 /* Assert all of the data fits, with or without MTE enabled. */ 4320 assert(nregs >= 1 && nregs <= 4); 4321 sizem1 = (nregs << msz) - 1; 4322 assert(sizem1 <= R_MTEDESC_SIZEM1_MASK >> R_MTEDESC_SIZEM1_SHIFT); 4323 assert(data < 1u << SVE_MTEDESC_SHIFT); 4324 4325 if (s->mte_active[0]) { 4326 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 4327 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4328 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4329 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 4330 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, sizem1); 4331 desc <<= SVE_MTEDESC_SHIFT; 4332 } 4333 return simd_desc(vsz, vsz, desc | data); 4334 } 4335 4336 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 4337 int dtype, uint32_t nregs, bool is_write, 4338 gen_helper_gvec_mem *fn) 4339 { 4340 TCGv_ptr t_pg; 4341 uint32_t desc; 4342 4343 if (!s->mte_active[0]) { 4344 addr = clean_data_tbi(s, addr); 4345 } 4346 4347 /* 4348 * For e.g. LD4, there are not enough arguments to pass all 4 4349 * registers as pointers, so encode the regno into the data field. 4350 * For consistency, do this even for LD1. 4351 */ 4352 desc = make_svemte_desc(s, vec_full_reg_size(s), nregs, 4353 dtype_msz(dtype), is_write, zt); 4354 t_pg = tcg_temp_new_ptr(); 4355 4356 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 4357 fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); 4358 } 4359 4360 /* Indexed by [mte][be][dtype][nreg] */ 4361 static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = { 4362 { /* mte inactive, little-endian */ 4363 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4364 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4365 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4366 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4367 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4368 4369 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL }, 4370 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r, 4371 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r }, 4372 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL }, 4373 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL }, 4374 4375 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL }, 4376 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL }, 4377 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r, 4378 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r }, 4379 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL }, 4380 4381 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4382 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4383 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4384 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, 4385 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, 4386 4387 /* mte inactive, big-endian */ 4388 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4389 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4390 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4391 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4392 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4393 4394 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL }, 4395 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r, 4396 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r }, 4397 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL }, 4398 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL }, 4399 4400 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL }, 4401 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL }, 4402 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r, 4403 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r }, 4404 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL }, 4405 4406 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4407 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4408 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4409 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, 4410 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } }, 4411 4412 { /* mte active, little-endian */ 4413 { { gen_helper_sve_ld1bb_r_mte, 4414 gen_helper_sve_ld2bb_r_mte, 4415 gen_helper_sve_ld3bb_r_mte, 4416 gen_helper_sve_ld4bb_r_mte }, 4417 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 4418 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 4419 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 4420 4421 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL }, 4422 { gen_helper_sve_ld1hh_le_r_mte, 4423 gen_helper_sve_ld2hh_le_r_mte, 4424 gen_helper_sve_ld3hh_le_r_mte, 4425 gen_helper_sve_ld4hh_le_r_mte }, 4426 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL }, 4427 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL }, 4428 4429 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL }, 4430 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL }, 4431 { gen_helper_sve_ld1ss_le_r_mte, 4432 gen_helper_sve_ld2ss_le_r_mte, 4433 gen_helper_sve_ld3ss_le_r_mte, 4434 gen_helper_sve_ld4ss_le_r_mte }, 4435 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL }, 4436 4437 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 4438 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 4439 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 4440 { gen_helper_sve_ld1dd_le_r_mte, 4441 gen_helper_sve_ld2dd_le_r_mte, 4442 gen_helper_sve_ld3dd_le_r_mte, 4443 gen_helper_sve_ld4dd_le_r_mte } }, 4444 4445 /* mte active, big-endian */ 4446 { { gen_helper_sve_ld1bb_r_mte, 4447 gen_helper_sve_ld2bb_r_mte, 4448 gen_helper_sve_ld3bb_r_mte, 4449 gen_helper_sve_ld4bb_r_mte }, 4450 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 4451 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 4452 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 4453 4454 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL }, 4455 { gen_helper_sve_ld1hh_be_r_mte, 4456 gen_helper_sve_ld2hh_be_r_mte, 4457 gen_helper_sve_ld3hh_be_r_mte, 4458 gen_helper_sve_ld4hh_be_r_mte }, 4459 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL }, 4460 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL }, 4461 4462 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL }, 4463 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL }, 4464 { gen_helper_sve_ld1ss_be_r_mte, 4465 gen_helper_sve_ld2ss_be_r_mte, 4466 gen_helper_sve_ld3ss_be_r_mte, 4467 gen_helper_sve_ld4ss_be_r_mte }, 4468 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL }, 4469 4470 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 4471 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 4472 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 4473 { gen_helper_sve_ld1dd_be_r_mte, 4474 gen_helper_sve_ld2dd_be_r_mte, 4475 gen_helper_sve_ld3dd_be_r_mte, 4476 gen_helper_sve_ld4dd_be_r_mte } } }, 4477 }; 4478 4479 static void do_ld_zpa(DisasContext *s, int zt, int pg, 4480 TCGv_i64 addr, int dtype, int nreg) 4481 { 4482 gen_helper_gvec_mem *fn 4483 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg]; 4484 4485 /* 4486 * While there are holes in the table, they are not 4487 * accessible via the instruction encoding. 4488 */ 4489 assert(fn != NULL); 4490 do_mem_zpa(s, zt, pg, addr, dtype, nreg + 1, false, fn); 4491 } 4492 4493 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) 4494 { 4495 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 4496 return false; 4497 } 4498 if (sve_access_check(s)) { 4499 TCGv_i64 addr = tcg_temp_new_i64(); 4500 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4501 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4502 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 4503 } 4504 return true; 4505 } 4506 4507 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a) 4508 { 4509 if (!dc_isar_feature(aa64_sve, s)) { 4510 return false; 4511 } 4512 if (sve_access_check(s)) { 4513 int vsz = vec_full_reg_size(s); 4514 int elements = vsz >> dtype_esz[a->dtype]; 4515 TCGv_i64 addr = tcg_temp_new_i64(); 4516 4517 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 4518 (a->imm * elements * (a->nreg + 1)) 4519 << dtype_msz(a->dtype)); 4520 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 4521 } 4522 return true; 4523 } 4524 4525 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a) 4526 { 4527 static gen_helper_gvec_mem * const fns[2][2][16] = { 4528 { /* mte inactive, little-endian */ 4529 { gen_helper_sve_ldff1bb_r, 4530 gen_helper_sve_ldff1bhu_r, 4531 gen_helper_sve_ldff1bsu_r, 4532 gen_helper_sve_ldff1bdu_r, 4533 4534 gen_helper_sve_ldff1sds_le_r, 4535 gen_helper_sve_ldff1hh_le_r, 4536 gen_helper_sve_ldff1hsu_le_r, 4537 gen_helper_sve_ldff1hdu_le_r, 4538 4539 gen_helper_sve_ldff1hds_le_r, 4540 gen_helper_sve_ldff1hss_le_r, 4541 gen_helper_sve_ldff1ss_le_r, 4542 gen_helper_sve_ldff1sdu_le_r, 4543 4544 gen_helper_sve_ldff1bds_r, 4545 gen_helper_sve_ldff1bss_r, 4546 gen_helper_sve_ldff1bhs_r, 4547 gen_helper_sve_ldff1dd_le_r }, 4548 4549 /* mte inactive, big-endian */ 4550 { gen_helper_sve_ldff1bb_r, 4551 gen_helper_sve_ldff1bhu_r, 4552 gen_helper_sve_ldff1bsu_r, 4553 gen_helper_sve_ldff1bdu_r, 4554 4555 gen_helper_sve_ldff1sds_be_r, 4556 gen_helper_sve_ldff1hh_be_r, 4557 gen_helper_sve_ldff1hsu_be_r, 4558 gen_helper_sve_ldff1hdu_be_r, 4559 4560 gen_helper_sve_ldff1hds_be_r, 4561 gen_helper_sve_ldff1hss_be_r, 4562 gen_helper_sve_ldff1ss_be_r, 4563 gen_helper_sve_ldff1sdu_be_r, 4564 4565 gen_helper_sve_ldff1bds_r, 4566 gen_helper_sve_ldff1bss_r, 4567 gen_helper_sve_ldff1bhs_r, 4568 gen_helper_sve_ldff1dd_be_r } }, 4569 4570 { /* mte active, little-endian */ 4571 { gen_helper_sve_ldff1bb_r_mte, 4572 gen_helper_sve_ldff1bhu_r_mte, 4573 gen_helper_sve_ldff1bsu_r_mte, 4574 gen_helper_sve_ldff1bdu_r_mte, 4575 4576 gen_helper_sve_ldff1sds_le_r_mte, 4577 gen_helper_sve_ldff1hh_le_r_mte, 4578 gen_helper_sve_ldff1hsu_le_r_mte, 4579 gen_helper_sve_ldff1hdu_le_r_mte, 4580 4581 gen_helper_sve_ldff1hds_le_r_mte, 4582 gen_helper_sve_ldff1hss_le_r_mte, 4583 gen_helper_sve_ldff1ss_le_r_mte, 4584 gen_helper_sve_ldff1sdu_le_r_mte, 4585 4586 gen_helper_sve_ldff1bds_r_mte, 4587 gen_helper_sve_ldff1bss_r_mte, 4588 gen_helper_sve_ldff1bhs_r_mte, 4589 gen_helper_sve_ldff1dd_le_r_mte }, 4590 4591 /* mte active, big-endian */ 4592 { gen_helper_sve_ldff1bb_r_mte, 4593 gen_helper_sve_ldff1bhu_r_mte, 4594 gen_helper_sve_ldff1bsu_r_mte, 4595 gen_helper_sve_ldff1bdu_r_mte, 4596 4597 gen_helper_sve_ldff1sds_be_r_mte, 4598 gen_helper_sve_ldff1hh_be_r_mte, 4599 gen_helper_sve_ldff1hsu_be_r_mte, 4600 gen_helper_sve_ldff1hdu_be_r_mte, 4601 4602 gen_helper_sve_ldff1hds_be_r_mte, 4603 gen_helper_sve_ldff1hss_be_r_mte, 4604 gen_helper_sve_ldff1ss_be_r_mte, 4605 gen_helper_sve_ldff1sdu_be_r_mte, 4606 4607 gen_helper_sve_ldff1bds_r_mte, 4608 gen_helper_sve_ldff1bss_r_mte, 4609 gen_helper_sve_ldff1bhs_r_mte, 4610 gen_helper_sve_ldff1dd_be_r_mte } }, 4611 }; 4612 4613 if (!dc_isar_feature(aa64_sve, s)) { 4614 return false; 4615 } 4616 s->is_nonstreaming = true; 4617 if (sve_access_check(s)) { 4618 TCGv_i64 addr = tcg_temp_new_i64(); 4619 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4620 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4621 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 4622 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 4623 } 4624 return true; 4625 } 4626 4627 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) 4628 { 4629 static gen_helper_gvec_mem * const fns[2][2][16] = { 4630 { /* mte inactive, little-endian */ 4631 { gen_helper_sve_ldnf1bb_r, 4632 gen_helper_sve_ldnf1bhu_r, 4633 gen_helper_sve_ldnf1bsu_r, 4634 gen_helper_sve_ldnf1bdu_r, 4635 4636 gen_helper_sve_ldnf1sds_le_r, 4637 gen_helper_sve_ldnf1hh_le_r, 4638 gen_helper_sve_ldnf1hsu_le_r, 4639 gen_helper_sve_ldnf1hdu_le_r, 4640 4641 gen_helper_sve_ldnf1hds_le_r, 4642 gen_helper_sve_ldnf1hss_le_r, 4643 gen_helper_sve_ldnf1ss_le_r, 4644 gen_helper_sve_ldnf1sdu_le_r, 4645 4646 gen_helper_sve_ldnf1bds_r, 4647 gen_helper_sve_ldnf1bss_r, 4648 gen_helper_sve_ldnf1bhs_r, 4649 gen_helper_sve_ldnf1dd_le_r }, 4650 4651 /* mte inactive, big-endian */ 4652 { gen_helper_sve_ldnf1bb_r, 4653 gen_helper_sve_ldnf1bhu_r, 4654 gen_helper_sve_ldnf1bsu_r, 4655 gen_helper_sve_ldnf1bdu_r, 4656 4657 gen_helper_sve_ldnf1sds_be_r, 4658 gen_helper_sve_ldnf1hh_be_r, 4659 gen_helper_sve_ldnf1hsu_be_r, 4660 gen_helper_sve_ldnf1hdu_be_r, 4661 4662 gen_helper_sve_ldnf1hds_be_r, 4663 gen_helper_sve_ldnf1hss_be_r, 4664 gen_helper_sve_ldnf1ss_be_r, 4665 gen_helper_sve_ldnf1sdu_be_r, 4666 4667 gen_helper_sve_ldnf1bds_r, 4668 gen_helper_sve_ldnf1bss_r, 4669 gen_helper_sve_ldnf1bhs_r, 4670 gen_helper_sve_ldnf1dd_be_r } }, 4671 4672 { /* mte inactive, little-endian */ 4673 { gen_helper_sve_ldnf1bb_r_mte, 4674 gen_helper_sve_ldnf1bhu_r_mte, 4675 gen_helper_sve_ldnf1bsu_r_mte, 4676 gen_helper_sve_ldnf1bdu_r_mte, 4677 4678 gen_helper_sve_ldnf1sds_le_r_mte, 4679 gen_helper_sve_ldnf1hh_le_r_mte, 4680 gen_helper_sve_ldnf1hsu_le_r_mte, 4681 gen_helper_sve_ldnf1hdu_le_r_mte, 4682 4683 gen_helper_sve_ldnf1hds_le_r_mte, 4684 gen_helper_sve_ldnf1hss_le_r_mte, 4685 gen_helper_sve_ldnf1ss_le_r_mte, 4686 gen_helper_sve_ldnf1sdu_le_r_mte, 4687 4688 gen_helper_sve_ldnf1bds_r_mte, 4689 gen_helper_sve_ldnf1bss_r_mte, 4690 gen_helper_sve_ldnf1bhs_r_mte, 4691 gen_helper_sve_ldnf1dd_le_r_mte }, 4692 4693 /* mte inactive, big-endian */ 4694 { gen_helper_sve_ldnf1bb_r_mte, 4695 gen_helper_sve_ldnf1bhu_r_mte, 4696 gen_helper_sve_ldnf1bsu_r_mte, 4697 gen_helper_sve_ldnf1bdu_r_mte, 4698 4699 gen_helper_sve_ldnf1sds_be_r_mte, 4700 gen_helper_sve_ldnf1hh_be_r_mte, 4701 gen_helper_sve_ldnf1hsu_be_r_mte, 4702 gen_helper_sve_ldnf1hdu_be_r_mte, 4703 4704 gen_helper_sve_ldnf1hds_be_r_mte, 4705 gen_helper_sve_ldnf1hss_be_r_mte, 4706 gen_helper_sve_ldnf1ss_be_r_mte, 4707 gen_helper_sve_ldnf1sdu_be_r_mte, 4708 4709 gen_helper_sve_ldnf1bds_r_mte, 4710 gen_helper_sve_ldnf1bss_r_mte, 4711 gen_helper_sve_ldnf1bhs_r_mte, 4712 gen_helper_sve_ldnf1dd_be_r_mte } }, 4713 }; 4714 4715 if (!dc_isar_feature(aa64_sve, s)) { 4716 return false; 4717 } 4718 s->is_nonstreaming = true; 4719 if (sve_access_check(s)) { 4720 int vsz = vec_full_reg_size(s); 4721 int elements = vsz >> dtype_esz[a->dtype]; 4722 int off = (a->imm * elements) << dtype_msz(a->dtype); 4723 TCGv_i64 addr = tcg_temp_new_i64(); 4724 4725 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off); 4726 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 4727 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 4728 } 4729 return true; 4730 } 4731 4732 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 4733 { 4734 unsigned vsz = vec_full_reg_size(s); 4735 TCGv_ptr t_pg; 4736 int poff; 4737 uint32_t desc; 4738 4739 /* Load the first quadword using the normal predicated load helpers. */ 4740 if (!s->mte_active[0]) { 4741 addr = clean_data_tbi(s, addr); 4742 } 4743 4744 poff = pred_full_reg_offset(s, pg); 4745 if (vsz > 16) { 4746 /* 4747 * Zero-extend the first 16 bits of the predicate into a temporary. 4748 * This avoids triggering an assert making sure we don't have bits 4749 * set within a predicate beyond VQ, but we have lowered VQ to 1 4750 * for this load operation. 4751 */ 4752 TCGv_i64 tmp = tcg_temp_new_i64(); 4753 #if HOST_BIG_ENDIAN 4754 poff += 6; 4755 #endif 4756 tcg_gen_ld16u_i64(tmp, tcg_env, poff); 4757 4758 poff = offsetof(CPUARMState, vfp.preg_tmp); 4759 tcg_gen_st_i64(tmp, tcg_env, poff); 4760 } 4761 4762 t_pg = tcg_temp_new_ptr(); 4763 tcg_gen_addi_ptr(t_pg, tcg_env, poff); 4764 4765 gen_helper_gvec_mem *fn 4766 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 4767 desc = make_svemte_desc(s, 16, 1, dtype_msz(dtype), false, zt); 4768 fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); 4769 4770 /* Replicate that first quadword. */ 4771 if (vsz > 16) { 4772 int doff = vec_full_reg_offset(s, zt); 4773 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16); 4774 } 4775 } 4776 4777 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a) 4778 { 4779 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 4780 return false; 4781 } 4782 if (sve_access_check(s)) { 4783 int msz = dtype_msz(a->dtype); 4784 TCGv_i64 addr = tcg_temp_new_i64(); 4785 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz); 4786 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4787 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 4788 } 4789 return true; 4790 } 4791 4792 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a) 4793 { 4794 if (!dc_isar_feature(aa64_sve, s)) { 4795 return false; 4796 } 4797 if (sve_access_check(s)) { 4798 TCGv_i64 addr = tcg_temp_new_i64(); 4799 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16); 4800 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 4801 } 4802 return true; 4803 } 4804 4805 static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 4806 { 4807 unsigned vsz = vec_full_reg_size(s); 4808 unsigned vsz_r32; 4809 TCGv_ptr t_pg; 4810 int poff, doff; 4811 uint32_t desc; 4812 4813 if (vsz < 32) { 4814 /* 4815 * Note that this UNDEFINED check comes after CheckSVEEnabled() 4816 * in the ARM pseudocode, which is the sve_access_check() done 4817 * in our caller. We should not now return false from the caller. 4818 */ 4819 unallocated_encoding(s); 4820 return; 4821 } 4822 4823 /* Load the first octaword using the normal predicated load helpers. */ 4824 if (!s->mte_active[0]) { 4825 addr = clean_data_tbi(s, addr); 4826 } 4827 4828 poff = pred_full_reg_offset(s, pg); 4829 if (vsz > 32) { 4830 /* 4831 * Zero-extend the first 32 bits of the predicate into a temporary. 4832 * This avoids triggering an assert making sure we don't have bits 4833 * set within a predicate beyond VQ, but we have lowered VQ to 2 4834 * for this load operation. 4835 */ 4836 TCGv_i64 tmp = tcg_temp_new_i64(); 4837 #if HOST_BIG_ENDIAN 4838 poff += 4; 4839 #endif 4840 tcg_gen_ld32u_i64(tmp, tcg_env, poff); 4841 4842 poff = offsetof(CPUARMState, vfp.preg_tmp); 4843 tcg_gen_st_i64(tmp, tcg_env, poff); 4844 } 4845 4846 t_pg = tcg_temp_new_ptr(); 4847 tcg_gen_addi_ptr(t_pg, tcg_env, poff); 4848 4849 gen_helper_gvec_mem *fn 4850 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 4851 desc = make_svemte_desc(s, 32, 1, dtype_msz(dtype), false, zt); 4852 fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); 4853 4854 /* 4855 * Replicate that first octaword. 4856 * The replication happens in units of 32; if the full vector size 4857 * is not a multiple of 32, the final bits are zeroed. 4858 */ 4859 doff = vec_full_reg_offset(s, zt); 4860 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32); 4861 if (vsz >= 64) { 4862 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32); 4863 } 4864 vsz -= vsz_r32; 4865 if (vsz) { 4866 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0); 4867 } 4868 } 4869 4870 static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a) 4871 { 4872 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 4873 return false; 4874 } 4875 if (a->rm == 31) { 4876 return false; 4877 } 4878 s->is_nonstreaming = true; 4879 if (sve_access_check(s)) { 4880 TCGv_i64 addr = tcg_temp_new_i64(); 4881 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4882 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4883 do_ldro(s, a->rd, a->pg, addr, a->dtype); 4884 } 4885 return true; 4886 } 4887 4888 static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a) 4889 { 4890 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 4891 return false; 4892 } 4893 s->is_nonstreaming = true; 4894 if (sve_access_check(s)) { 4895 TCGv_i64 addr = tcg_temp_new_i64(); 4896 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32); 4897 do_ldro(s, a->rd, a->pg, addr, a->dtype); 4898 } 4899 return true; 4900 } 4901 4902 /* Load and broadcast element. */ 4903 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) 4904 { 4905 unsigned vsz = vec_full_reg_size(s); 4906 unsigned psz = pred_full_reg_size(s); 4907 unsigned esz = dtype_esz[a->dtype]; 4908 unsigned msz = dtype_msz(a->dtype); 4909 TCGLabel *over; 4910 TCGv_i64 temp, clean_addr; 4911 MemOp memop; 4912 4913 if (!dc_isar_feature(aa64_sve, s)) { 4914 return false; 4915 } 4916 if (!sve_access_check(s)) { 4917 return true; 4918 } 4919 4920 over = gen_new_label(); 4921 4922 /* If the guarding predicate has no bits set, no load occurs. */ 4923 if (psz <= 8) { 4924 /* Reduce the pred_esz_masks value simply to reduce the 4925 * size of the code generated here. 4926 */ 4927 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8); 4928 temp = tcg_temp_new_i64(); 4929 tcg_gen_ld_i64(temp, tcg_env, pred_full_reg_offset(s, a->pg)); 4930 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask); 4931 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over); 4932 } else { 4933 TCGv_i32 t32 = tcg_temp_new_i32(); 4934 find_last_active(s, t32, esz, a->pg); 4935 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over); 4936 } 4937 4938 /* Load the data. */ 4939 temp = tcg_temp_new_i64(); 4940 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz); 4941 4942 memop = finalize_memop(s, dtype_mop[a->dtype]); 4943 clean_addr = gen_mte_check1(s, temp, false, true, memop); 4944 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), memop); 4945 4946 /* Broadcast to *all* elements. */ 4947 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 4948 vsz, vsz, temp); 4949 4950 /* Zero the inactive elements. */ 4951 gen_set_label(over); 4952 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false); 4953 } 4954 4955 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 4956 int msz, int esz, int nreg) 4957 { 4958 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = { 4959 { { { gen_helper_sve_st1bb_r, 4960 gen_helper_sve_st1bh_r, 4961 gen_helper_sve_st1bs_r, 4962 gen_helper_sve_st1bd_r }, 4963 { NULL, 4964 gen_helper_sve_st1hh_le_r, 4965 gen_helper_sve_st1hs_le_r, 4966 gen_helper_sve_st1hd_le_r }, 4967 { NULL, NULL, 4968 gen_helper_sve_st1ss_le_r, 4969 gen_helper_sve_st1sd_le_r }, 4970 { NULL, NULL, NULL, 4971 gen_helper_sve_st1dd_le_r } }, 4972 { { gen_helper_sve_st1bb_r, 4973 gen_helper_sve_st1bh_r, 4974 gen_helper_sve_st1bs_r, 4975 gen_helper_sve_st1bd_r }, 4976 { NULL, 4977 gen_helper_sve_st1hh_be_r, 4978 gen_helper_sve_st1hs_be_r, 4979 gen_helper_sve_st1hd_be_r }, 4980 { NULL, NULL, 4981 gen_helper_sve_st1ss_be_r, 4982 gen_helper_sve_st1sd_be_r }, 4983 { NULL, NULL, NULL, 4984 gen_helper_sve_st1dd_be_r } } }, 4985 4986 { { { gen_helper_sve_st1bb_r_mte, 4987 gen_helper_sve_st1bh_r_mte, 4988 gen_helper_sve_st1bs_r_mte, 4989 gen_helper_sve_st1bd_r_mte }, 4990 { NULL, 4991 gen_helper_sve_st1hh_le_r_mte, 4992 gen_helper_sve_st1hs_le_r_mte, 4993 gen_helper_sve_st1hd_le_r_mte }, 4994 { NULL, NULL, 4995 gen_helper_sve_st1ss_le_r_mte, 4996 gen_helper_sve_st1sd_le_r_mte }, 4997 { NULL, NULL, NULL, 4998 gen_helper_sve_st1dd_le_r_mte } }, 4999 { { gen_helper_sve_st1bb_r_mte, 5000 gen_helper_sve_st1bh_r_mte, 5001 gen_helper_sve_st1bs_r_mte, 5002 gen_helper_sve_st1bd_r_mte }, 5003 { NULL, 5004 gen_helper_sve_st1hh_be_r_mte, 5005 gen_helper_sve_st1hs_be_r_mte, 5006 gen_helper_sve_st1hd_be_r_mte }, 5007 { NULL, NULL, 5008 gen_helper_sve_st1ss_be_r_mte, 5009 gen_helper_sve_st1sd_be_r_mte }, 5010 { NULL, NULL, NULL, 5011 gen_helper_sve_st1dd_be_r_mte } } }, 5012 }; 5013 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = { 5014 { { { gen_helper_sve_st2bb_r, 5015 gen_helper_sve_st2hh_le_r, 5016 gen_helper_sve_st2ss_le_r, 5017 gen_helper_sve_st2dd_le_r }, 5018 { gen_helper_sve_st3bb_r, 5019 gen_helper_sve_st3hh_le_r, 5020 gen_helper_sve_st3ss_le_r, 5021 gen_helper_sve_st3dd_le_r }, 5022 { gen_helper_sve_st4bb_r, 5023 gen_helper_sve_st4hh_le_r, 5024 gen_helper_sve_st4ss_le_r, 5025 gen_helper_sve_st4dd_le_r } }, 5026 { { gen_helper_sve_st2bb_r, 5027 gen_helper_sve_st2hh_be_r, 5028 gen_helper_sve_st2ss_be_r, 5029 gen_helper_sve_st2dd_be_r }, 5030 { gen_helper_sve_st3bb_r, 5031 gen_helper_sve_st3hh_be_r, 5032 gen_helper_sve_st3ss_be_r, 5033 gen_helper_sve_st3dd_be_r }, 5034 { gen_helper_sve_st4bb_r, 5035 gen_helper_sve_st4hh_be_r, 5036 gen_helper_sve_st4ss_be_r, 5037 gen_helper_sve_st4dd_be_r } } }, 5038 { { { gen_helper_sve_st2bb_r_mte, 5039 gen_helper_sve_st2hh_le_r_mte, 5040 gen_helper_sve_st2ss_le_r_mte, 5041 gen_helper_sve_st2dd_le_r_mte }, 5042 { gen_helper_sve_st3bb_r_mte, 5043 gen_helper_sve_st3hh_le_r_mte, 5044 gen_helper_sve_st3ss_le_r_mte, 5045 gen_helper_sve_st3dd_le_r_mte }, 5046 { gen_helper_sve_st4bb_r_mte, 5047 gen_helper_sve_st4hh_le_r_mte, 5048 gen_helper_sve_st4ss_le_r_mte, 5049 gen_helper_sve_st4dd_le_r_mte } }, 5050 { { gen_helper_sve_st2bb_r_mte, 5051 gen_helper_sve_st2hh_be_r_mte, 5052 gen_helper_sve_st2ss_be_r_mte, 5053 gen_helper_sve_st2dd_be_r_mte }, 5054 { gen_helper_sve_st3bb_r_mte, 5055 gen_helper_sve_st3hh_be_r_mte, 5056 gen_helper_sve_st3ss_be_r_mte, 5057 gen_helper_sve_st3dd_be_r_mte }, 5058 { gen_helper_sve_st4bb_r_mte, 5059 gen_helper_sve_st4hh_be_r_mte, 5060 gen_helper_sve_st4ss_be_r_mte, 5061 gen_helper_sve_st4dd_be_r_mte } } }, 5062 }; 5063 gen_helper_gvec_mem *fn; 5064 int be = s->be_data == MO_BE; 5065 5066 if (nreg == 0) { 5067 /* ST1 */ 5068 fn = fn_single[s->mte_active[0]][be][msz][esz]; 5069 } else { 5070 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */ 5071 assert(msz == esz); 5072 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz]; 5073 } 5074 assert(fn != NULL); 5075 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg + 1, true, fn); 5076 } 5077 5078 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) 5079 { 5080 if (!dc_isar_feature(aa64_sve, s)) { 5081 return false; 5082 } 5083 if (a->rm == 31 || a->msz > a->esz) { 5084 return false; 5085 } 5086 if (sve_access_check(s)) { 5087 TCGv_i64 addr = tcg_temp_new_i64(); 5088 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz); 5089 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5090 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5091 } 5092 return true; 5093 } 5094 5095 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a) 5096 { 5097 if (!dc_isar_feature(aa64_sve, s)) { 5098 return false; 5099 } 5100 if (a->msz > a->esz) { 5101 return false; 5102 } 5103 if (sve_access_check(s)) { 5104 int vsz = vec_full_reg_size(s); 5105 int elements = vsz >> a->esz; 5106 TCGv_i64 addr = tcg_temp_new_i64(); 5107 5108 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 5109 (a->imm * elements * (a->nreg + 1)) << a->msz); 5110 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5111 } 5112 return true; 5113 } 5114 5115 /* 5116 *** SVE gather loads / scatter stores 5117 */ 5118 5119 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, 5120 int scale, TCGv_i64 scalar, int msz, bool is_write, 5121 gen_helper_gvec_mem_scatter *fn) 5122 { 5123 TCGv_ptr t_zm = tcg_temp_new_ptr(); 5124 TCGv_ptr t_pg = tcg_temp_new_ptr(); 5125 TCGv_ptr t_zt = tcg_temp_new_ptr(); 5126 uint32_t desc; 5127 5128 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 5129 tcg_gen_addi_ptr(t_zm, tcg_env, vec_full_reg_offset(s, zm)); 5130 tcg_gen_addi_ptr(t_zt, tcg_env, vec_full_reg_offset(s, zt)); 5131 5132 desc = make_svemte_desc(s, vec_full_reg_size(s), 1, msz, is_write, scale); 5133 fn(tcg_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc)); 5134 } 5135 5136 /* Indexed by [mte][be][ff][xs][u][msz]. */ 5137 static gen_helper_gvec_mem_scatter * const 5138 gather_load_fn32[2][2][2][2][2][3] = { 5139 { /* MTE Inactive */ 5140 { /* Little-endian */ 5141 { { { gen_helper_sve_ldbss_zsu, 5142 gen_helper_sve_ldhss_le_zsu, 5143 NULL, }, 5144 { gen_helper_sve_ldbsu_zsu, 5145 gen_helper_sve_ldhsu_le_zsu, 5146 gen_helper_sve_ldss_le_zsu, } }, 5147 { { gen_helper_sve_ldbss_zss, 5148 gen_helper_sve_ldhss_le_zss, 5149 NULL, }, 5150 { gen_helper_sve_ldbsu_zss, 5151 gen_helper_sve_ldhsu_le_zss, 5152 gen_helper_sve_ldss_le_zss, } } }, 5153 5154 /* First-fault */ 5155 { { { gen_helper_sve_ldffbss_zsu, 5156 gen_helper_sve_ldffhss_le_zsu, 5157 NULL, }, 5158 { gen_helper_sve_ldffbsu_zsu, 5159 gen_helper_sve_ldffhsu_le_zsu, 5160 gen_helper_sve_ldffss_le_zsu, } }, 5161 { { gen_helper_sve_ldffbss_zss, 5162 gen_helper_sve_ldffhss_le_zss, 5163 NULL, }, 5164 { gen_helper_sve_ldffbsu_zss, 5165 gen_helper_sve_ldffhsu_le_zss, 5166 gen_helper_sve_ldffss_le_zss, } } } }, 5167 5168 { /* Big-endian */ 5169 { { { gen_helper_sve_ldbss_zsu, 5170 gen_helper_sve_ldhss_be_zsu, 5171 NULL, }, 5172 { gen_helper_sve_ldbsu_zsu, 5173 gen_helper_sve_ldhsu_be_zsu, 5174 gen_helper_sve_ldss_be_zsu, } }, 5175 { { gen_helper_sve_ldbss_zss, 5176 gen_helper_sve_ldhss_be_zss, 5177 NULL, }, 5178 { gen_helper_sve_ldbsu_zss, 5179 gen_helper_sve_ldhsu_be_zss, 5180 gen_helper_sve_ldss_be_zss, } } }, 5181 5182 /* First-fault */ 5183 { { { gen_helper_sve_ldffbss_zsu, 5184 gen_helper_sve_ldffhss_be_zsu, 5185 NULL, }, 5186 { gen_helper_sve_ldffbsu_zsu, 5187 gen_helper_sve_ldffhsu_be_zsu, 5188 gen_helper_sve_ldffss_be_zsu, } }, 5189 { { gen_helper_sve_ldffbss_zss, 5190 gen_helper_sve_ldffhss_be_zss, 5191 NULL, }, 5192 { gen_helper_sve_ldffbsu_zss, 5193 gen_helper_sve_ldffhsu_be_zss, 5194 gen_helper_sve_ldffss_be_zss, } } } } }, 5195 { /* MTE Active */ 5196 { /* Little-endian */ 5197 { { { gen_helper_sve_ldbss_zsu_mte, 5198 gen_helper_sve_ldhss_le_zsu_mte, 5199 NULL, }, 5200 { gen_helper_sve_ldbsu_zsu_mte, 5201 gen_helper_sve_ldhsu_le_zsu_mte, 5202 gen_helper_sve_ldss_le_zsu_mte, } }, 5203 { { gen_helper_sve_ldbss_zss_mte, 5204 gen_helper_sve_ldhss_le_zss_mte, 5205 NULL, }, 5206 { gen_helper_sve_ldbsu_zss_mte, 5207 gen_helper_sve_ldhsu_le_zss_mte, 5208 gen_helper_sve_ldss_le_zss_mte, } } }, 5209 5210 /* First-fault */ 5211 { { { gen_helper_sve_ldffbss_zsu_mte, 5212 gen_helper_sve_ldffhss_le_zsu_mte, 5213 NULL, }, 5214 { gen_helper_sve_ldffbsu_zsu_mte, 5215 gen_helper_sve_ldffhsu_le_zsu_mte, 5216 gen_helper_sve_ldffss_le_zsu_mte, } }, 5217 { { gen_helper_sve_ldffbss_zss_mte, 5218 gen_helper_sve_ldffhss_le_zss_mte, 5219 NULL, }, 5220 { gen_helper_sve_ldffbsu_zss_mte, 5221 gen_helper_sve_ldffhsu_le_zss_mte, 5222 gen_helper_sve_ldffss_le_zss_mte, } } } }, 5223 5224 { /* Big-endian */ 5225 { { { gen_helper_sve_ldbss_zsu_mte, 5226 gen_helper_sve_ldhss_be_zsu_mte, 5227 NULL, }, 5228 { gen_helper_sve_ldbsu_zsu_mte, 5229 gen_helper_sve_ldhsu_be_zsu_mte, 5230 gen_helper_sve_ldss_be_zsu_mte, } }, 5231 { { gen_helper_sve_ldbss_zss_mte, 5232 gen_helper_sve_ldhss_be_zss_mte, 5233 NULL, }, 5234 { gen_helper_sve_ldbsu_zss_mte, 5235 gen_helper_sve_ldhsu_be_zss_mte, 5236 gen_helper_sve_ldss_be_zss_mte, } } }, 5237 5238 /* First-fault */ 5239 { { { gen_helper_sve_ldffbss_zsu_mte, 5240 gen_helper_sve_ldffhss_be_zsu_mte, 5241 NULL, }, 5242 { gen_helper_sve_ldffbsu_zsu_mte, 5243 gen_helper_sve_ldffhsu_be_zsu_mte, 5244 gen_helper_sve_ldffss_be_zsu_mte, } }, 5245 { { gen_helper_sve_ldffbss_zss_mte, 5246 gen_helper_sve_ldffhss_be_zss_mte, 5247 NULL, }, 5248 { gen_helper_sve_ldffbsu_zss_mte, 5249 gen_helper_sve_ldffhsu_be_zss_mte, 5250 gen_helper_sve_ldffss_be_zss_mte, } } } } }, 5251 }; 5252 5253 /* Note that we overload xs=2 to indicate 64-bit offset. */ 5254 static gen_helper_gvec_mem_scatter * const 5255 gather_load_fn64[2][2][2][3][2][4] = { 5256 { /* MTE Inactive */ 5257 { /* Little-endian */ 5258 { { { gen_helper_sve_ldbds_zsu, 5259 gen_helper_sve_ldhds_le_zsu, 5260 gen_helper_sve_ldsds_le_zsu, 5261 NULL, }, 5262 { gen_helper_sve_ldbdu_zsu, 5263 gen_helper_sve_ldhdu_le_zsu, 5264 gen_helper_sve_ldsdu_le_zsu, 5265 gen_helper_sve_lddd_le_zsu, } }, 5266 { { gen_helper_sve_ldbds_zss, 5267 gen_helper_sve_ldhds_le_zss, 5268 gen_helper_sve_ldsds_le_zss, 5269 NULL, }, 5270 { gen_helper_sve_ldbdu_zss, 5271 gen_helper_sve_ldhdu_le_zss, 5272 gen_helper_sve_ldsdu_le_zss, 5273 gen_helper_sve_lddd_le_zss, } }, 5274 { { gen_helper_sve_ldbds_zd, 5275 gen_helper_sve_ldhds_le_zd, 5276 gen_helper_sve_ldsds_le_zd, 5277 NULL, }, 5278 { gen_helper_sve_ldbdu_zd, 5279 gen_helper_sve_ldhdu_le_zd, 5280 gen_helper_sve_ldsdu_le_zd, 5281 gen_helper_sve_lddd_le_zd, } } }, 5282 5283 /* First-fault */ 5284 { { { gen_helper_sve_ldffbds_zsu, 5285 gen_helper_sve_ldffhds_le_zsu, 5286 gen_helper_sve_ldffsds_le_zsu, 5287 NULL, }, 5288 { gen_helper_sve_ldffbdu_zsu, 5289 gen_helper_sve_ldffhdu_le_zsu, 5290 gen_helper_sve_ldffsdu_le_zsu, 5291 gen_helper_sve_ldffdd_le_zsu, } }, 5292 { { gen_helper_sve_ldffbds_zss, 5293 gen_helper_sve_ldffhds_le_zss, 5294 gen_helper_sve_ldffsds_le_zss, 5295 NULL, }, 5296 { gen_helper_sve_ldffbdu_zss, 5297 gen_helper_sve_ldffhdu_le_zss, 5298 gen_helper_sve_ldffsdu_le_zss, 5299 gen_helper_sve_ldffdd_le_zss, } }, 5300 { { gen_helper_sve_ldffbds_zd, 5301 gen_helper_sve_ldffhds_le_zd, 5302 gen_helper_sve_ldffsds_le_zd, 5303 NULL, }, 5304 { gen_helper_sve_ldffbdu_zd, 5305 gen_helper_sve_ldffhdu_le_zd, 5306 gen_helper_sve_ldffsdu_le_zd, 5307 gen_helper_sve_ldffdd_le_zd, } } } }, 5308 { /* Big-endian */ 5309 { { { gen_helper_sve_ldbds_zsu, 5310 gen_helper_sve_ldhds_be_zsu, 5311 gen_helper_sve_ldsds_be_zsu, 5312 NULL, }, 5313 { gen_helper_sve_ldbdu_zsu, 5314 gen_helper_sve_ldhdu_be_zsu, 5315 gen_helper_sve_ldsdu_be_zsu, 5316 gen_helper_sve_lddd_be_zsu, } }, 5317 { { gen_helper_sve_ldbds_zss, 5318 gen_helper_sve_ldhds_be_zss, 5319 gen_helper_sve_ldsds_be_zss, 5320 NULL, }, 5321 { gen_helper_sve_ldbdu_zss, 5322 gen_helper_sve_ldhdu_be_zss, 5323 gen_helper_sve_ldsdu_be_zss, 5324 gen_helper_sve_lddd_be_zss, } }, 5325 { { gen_helper_sve_ldbds_zd, 5326 gen_helper_sve_ldhds_be_zd, 5327 gen_helper_sve_ldsds_be_zd, 5328 NULL, }, 5329 { gen_helper_sve_ldbdu_zd, 5330 gen_helper_sve_ldhdu_be_zd, 5331 gen_helper_sve_ldsdu_be_zd, 5332 gen_helper_sve_lddd_be_zd, } } }, 5333 5334 /* First-fault */ 5335 { { { gen_helper_sve_ldffbds_zsu, 5336 gen_helper_sve_ldffhds_be_zsu, 5337 gen_helper_sve_ldffsds_be_zsu, 5338 NULL, }, 5339 { gen_helper_sve_ldffbdu_zsu, 5340 gen_helper_sve_ldffhdu_be_zsu, 5341 gen_helper_sve_ldffsdu_be_zsu, 5342 gen_helper_sve_ldffdd_be_zsu, } }, 5343 { { gen_helper_sve_ldffbds_zss, 5344 gen_helper_sve_ldffhds_be_zss, 5345 gen_helper_sve_ldffsds_be_zss, 5346 NULL, }, 5347 { gen_helper_sve_ldffbdu_zss, 5348 gen_helper_sve_ldffhdu_be_zss, 5349 gen_helper_sve_ldffsdu_be_zss, 5350 gen_helper_sve_ldffdd_be_zss, } }, 5351 { { gen_helper_sve_ldffbds_zd, 5352 gen_helper_sve_ldffhds_be_zd, 5353 gen_helper_sve_ldffsds_be_zd, 5354 NULL, }, 5355 { gen_helper_sve_ldffbdu_zd, 5356 gen_helper_sve_ldffhdu_be_zd, 5357 gen_helper_sve_ldffsdu_be_zd, 5358 gen_helper_sve_ldffdd_be_zd, } } } } }, 5359 { /* MTE Active */ 5360 { /* Little-endian */ 5361 { { { gen_helper_sve_ldbds_zsu_mte, 5362 gen_helper_sve_ldhds_le_zsu_mte, 5363 gen_helper_sve_ldsds_le_zsu_mte, 5364 NULL, }, 5365 { gen_helper_sve_ldbdu_zsu_mte, 5366 gen_helper_sve_ldhdu_le_zsu_mte, 5367 gen_helper_sve_ldsdu_le_zsu_mte, 5368 gen_helper_sve_lddd_le_zsu_mte, } }, 5369 { { gen_helper_sve_ldbds_zss_mte, 5370 gen_helper_sve_ldhds_le_zss_mte, 5371 gen_helper_sve_ldsds_le_zss_mte, 5372 NULL, }, 5373 { gen_helper_sve_ldbdu_zss_mte, 5374 gen_helper_sve_ldhdu_le_zss_mte, 5375 gen_helper_sve_ldsdu_le_zss_mte, 5376 gen_helper_sve_lddd_le_zss_mte, } }, 5377 { { gen_helper_sve_ldbds_zd_mte, 5378 gen_helper_sve_ldhds_le_zd_mte, 5379 gen_helper_sve_ldsds_le_zd_mte, 5380 NULL, }, 5381 { gen_helper_sve_ldbdu_zd_mte, 5382 gen_helper_sve_ldhdu_le_zd_mte, 5383 gen_helper_sve_ldsdu_le_zd_mte, 5384 gen_helper_sve_lddd_le_zd_mte, } } }, 5385 5386 /* First-fault */ 5387 { { { gen_helper_sve_ldffbds_zsu_mte, 5388 gen_helper_sve_ldffhds_le_zsu_mte, 5389 gen_helper_sve_ldffsds_le_zsu_mte, 5390 NULL, }, 5391 { gen_helper_sve_ldffbdu_zsu_mte, 5392 gen_helper_sve_ldffhdu_le_zsu_mte, 5393 gen_helper_sve_ldffsdu_le_zsu_mte, 5394 gen_helper_sve_ldffdd_le_zsu_mte, } }, 5395 { { gen_helper_sve_ldffbds_zss_mte, 5396 gen_helper_sve_ldffhds_le_zss_mte, 5397 gen_helper_sve_ldffsds_le_zss_mte, 5398 NULL, }, 5399 { gen_helper_sve_ldffbdu_zss_mte, 5400 gen_helper_sve_ldffhdu_le_zss_mte, 5401 gen_helper_sve_ldffsdu_le_zss_mte, 5402 gen_helper_sve_ldffdd_le_zss_mte, } }, 5403 { { gen_helper_sve_ldffbds_zd_mte, 5404 gen_helper_sve_ldffhds_le_zd_mte, 5405 gen_helper_sve_ldffsds_le_zd_mte, 5406 NULL, }, 5407 { gen_helper_sve_ldffbdu_zd_mte, 5408 gen_helper_sve_ldffhdu_le_zd_mte, 5409 gen_helper_sve_ldffsdu_le_zd_mte, 5410 gen_helper_sve_ldffdd_le_zd_mte, } } } }, 5411 { /* Big-endian */ 5412 { { { gen_helper_sve_ldbds_zsu_mte, 5413 gen_helper_sve_ldhds_be_zsu_mte, 5414 gen_helper_sve_ldsds_be_zsu_mte, 5415 NULL, }, 5416 { gen_helper_sve_ldbdu_zsu_mte, 5417 gen_helper_sve_ldhdu_be_zsu_mte, 5418 gen_helper_sve_ldsdu_be_zsu_mte, 5419 gen_helper_sve_lddd_be_zsu_mte, } }, 5420 { { gen_helper_sve_ldbds_zss_mte, 5421 gen_helper_sve_ldhds_be_zss_mte, 5422 gen_helper_sve_ldsds_be_zss_mte, 5423 NULL, }, 5424 { gen_helper_sve_ldbdu_zss_mte, 5425 gen_helper_sve_ldhdu_be_zss_mte, 5426 gen_helper_sve_ldsdu_be_zss_mte, 5427 gen_helper_sve_lddd_be_zss_mte, } }, 5428 { { gen_helper_sve_ldbds_zd_mte, 5429 gen_helper_sve_ldhds_be_zd_mte, 5430 gen_helper_sve_ldsds_be_zd_mte, 5431 NULL, }, 5432 { gen_helper_sve_ldbdu_zd_mte, 5433 gen_helper_sve_ldhdu_be_zd_mte, 5434 gen_helper_sve_ldsdu_be_zd_mte, 5435 gen_helper_sve_lddd_be_zd_mte, } } }, 5436 5437 /* First-fault */ 5438 { { { gen_helper_sve_ldffbds_zsu_mte, 5439 gen_helper_sve_ldffhds_be_zsu_mte, 5440 gen_helper_sve_ldffsds_be_zsu_mte, 5441 NULL, }, 5442 { gen_helper_sve_ldffbdu_zsu_mte, 5443 gen_helper_sve_ldffhdu_be_zsu_mte, 5444 gen_helper_sve_ldffsdu_be_zsu_mte, 5445 gen_helper_sve_ldffdd_be_zsu_mte, } }, 5446 { { gen_helper_sve_ldffbds_zss_mte, 5447 gen_helper_sve_ldffhds_be_zss_mte, 5448 gen_helper_sve_ldffsds_be_zss_mte, 5449 NULL, }, 5450 { gen_helper_sve_ldffbdu_zss_mte, 5451 gen_helper_sve_ldffhdu_be_zss_mte, 5452 gen_helper_sve_ldffsdu_be_zss_mte, 5453 gen_helper_sve_ldffdd_be_zss_mte, } }, 5454 { { gen_helper_sve_ldffbds_zd_mte, 5455 gen_helper_sve_ldffhds_be_zd_mte, 5456 gen_helper_sve_ldffsds_be_zd_mte, 5457 NULL, }, 5458 { gen_helper_sve_ldffbdu_zd_mte, 5459 gen_helper_sve_ldffhdu_be_zd_mte, 5460 gen_helper_sve_ldffsdu_be_zd_mte, 5461 gen_helper_sve_ldffdd_be_zd_mte, } } } } }, 5462 }; 5463 5464 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) 5465 { 5466 gen_helper_gvec_mem_scatter *fn = NULL; 5467 bool be = s->be_data == MO_BE; 5468 bool mte = s->mte_active[0]; 5469 5470 if (!dc_isar_feature(aa64_sve, s)) { 5471 return false; 5472 } 5473 s->is_nonstreaming = true; 5474 if (!sve_access_check(s)) { 5475 return true; 5476 } 5477 5478 switch (a->esz) { 5479 case MO_32: 5480 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz]; 5481 break; 5482 case MO_64: 5483 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz]; 5484 break; 5485 } 5486 assert(fn != NULL); 5487 5488 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 5489 cpu_reg_sp(s, a->rn), a->msz, false, fn); 5490 return true; 5491 } 5492 5493 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) 5494 { 5495 gen_helper_gvec_mem_scatter *fn = NULL; 5496 bool be = s->be_data == MO_BE; 5497 bool mte = s->mte_active[0]; 5498 5499 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) { 5500 return false; 5501 } 5502 if (!dc_isar_feature(aa64_sve, s)) { 5503 return false; 5504 } 5505 s->is_nonstreaming = true; 5506 if (!sve_access_check(s)) { 5507 return true; 5508 } 5509 5510 switch (a->esz) { 5511 case MO_32: 5512 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz]; 5513 break; 5514 case MO_64: 5515 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz]; 5516 break; 5517 } 5518 assert(fn != NULL); 5519 5520 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x]) 5521 * by loading the immediate into the scalar parameter. 5522 */ 5523 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5524 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn); 5525 return true; 5526 } 5527 5528 static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a) 5529 { 5530 gen_helper_gvec_mem_scatter *fn = NULL; 5531 bool be = s->be_data == MO_BE; 5532 bool mte = s->mte_active[0]; 5533 5534 if (a->esz < a->msz + !a->u) { 5535 return false; 5536 } 5537 if (!dc_isar_feature(aa64_sve2, s)) { 5538 return false; 5539 } 5540 s->is_nonstreaming = true; 5541 if (!sve_access_check(s)) { 5542 return true; 5543 } 5544 5545 switch (a->esz) { 5546 case MO_32: 5547 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz]; 5548 break; 5549 case MO_64: 5550 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz]; 5551 break; 5552 } 5553 assert(fn != NULL); 5554 5555 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5556 cpu_reg(s, a->rm), a->msz, false, fn); 5557 return true; 5558 } 5559 5560 /* Indexed by [mte][be][xs][msz]. */ 5561 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = { 5562 { /* MTE Inactive */ 5563 { /* Little-endian */ 5564 { gen_helper_sve_stbs_zsu, 5565 gen_helper_sve_sths_le_zsu, 5566 gen_helper_sve_stss_le_zsu, }, 5567 { gen_helper_sve_stbs_zss, 5568 gen_helper_sve_sths_le_zss, 5569 gen_helper_sve_stss_le_zss, } }, 5570 { /* Big-endian */ 5571 { gen_helper_sve_stbs_zsu, 5572 gen_helper_sve_sths_be_zsu, 5573 gen_helper_sve_stss_be_zsu, }, 5574 { gen_helper_sve_stbs_zss, 5575 gen_helper_sve_sths_be_zss, 5576 gen_helper_sve_stss_be_zss, } } }, 5577 { /* MTE Active */ 5578 { /* Little-endian */ 5579 { gen_helper_sve_stbs_zsu_mte, 5580 gen_helper_sve_sths_le_zsu_mte, 5581 gen_helper_sve_stss_le_zsu_mte, }, 5582 { gen_helper_sve_stbs_zss_mte, 5583 gen_helper_sve_sths_le_zss_mte, 5584 gen_helper_sve_stss_le_zss_mte, } }, 5585 { /* Big-endian */ 5586 { gen_helper_sve_stbs_zsu_mte, 5587 gen_helper_sve_sths_be_zsu_mte, 5588 gen_helper_sve_stss_be_zsu_mte, }, 5589 { gen_helper_sve_stbs_zss_mte, 5590 gen_helper_sve_sths_be_zss_mte, 5591 gen_helper_sve_stss_be_zss_mte, } } }, 5592 }; 5593 5594 /* Note that we overload xs=2 to indicate 64-bit offset. */ 5595 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = { 5596 { /* MTE Inactive */ 5597 { /* Little-endian */ 5598 { gen_helper_sve_stbd_zsu, 5599 gen_helper_sve_sthd_le_zsu, 5600 gen_helper_sve_stsd_le_zsu, 5601 gen_helper_sve_stdd_le_zsu, }, 5602 { gen_helper_sve_stbd_zss, 5603 gen_helper_sve_sthd_le_zss, 5604 gen_helper_sve_stsd_le_zss, 5605 gen_helper_sve_stdd_le_zss, }, 5606 { gen_helper_sve_stbd_zd, 5607 gen_helper_sve_sthd_le_zd, 5608 gen_helper_sve_stsd_le_zd, 5609 gen_helper_sve_stdd_le_zd, } }, 5610 { /* Big-endian */ 5611 { gen_helper_sve_stbd_zsu, 5612 gen_helper_sve_sthd_be_zsu, 5613 gen_helper_sve_stsd_be_zsu, 5614 gen_helper_sve_stdd_be_zsu, }, 5615 { gen_helper_sve_stbd_zss, 5616 gen_helper_sve_sthd_be_zss, 5617 gen_helper_sve_stsd_be_zss, 5618 gen_helper_sve_stdd_be_zss, }, 5619 { gen_helper_sve_stbd_zd, 5620 gen_helper_sve_sthd_be_zd, 5621 gen_helper_sve_stsd_be_zd, 5622 gen_helper_sve_stdd_be_zd, } } }, 5623 { /* MTE Inactive */ 5624 { /* Little-endian */ 5625 { gen_helper_sve_stbd_zsu_mte, 5626 gen_helper_sve_sthd_le_zsu_mte, 5627 gen_helper_sve_stsd_le_zsu_mte, 5628 gen_helper_sve_stdd_le_zsu_mte, }, 5629 { gen_helper_sve_stbd_zss_mte, 5630 gen_helper_sve_sthd_le_zss_mte, 5631 gen_helper_sve_stsd_le_zss_mte, 5632 gen_helper_sve_stdd_le_zss_mte, }, 5633 { gen_helper_sve_stbd_zd_mte, 5634 gen_helper_sve_sthd_le_zd_mte, 5635 gen_helper_sve_stsd_le_zd_mte, 5636 gen_helper_sve_stdd_le_zd_mte, } }, 5637 { /* Big-endian */ 5638 { gen_helper_sve_stbd_zsu_mte, 5639 gen_helper_sve_sthd_be_zsu_mte, 5640 gen_helper_sve_stsd_be_zsu_mte, 5641 gen_helper_sve_stdd_be_zsu_mte, }, 5642 { gen_helper_sve_stbd_zss_mte, 5643 gen_helper_sve_sthd_be_zss_mte, 5644 gen_helper_sve_stsd_be_zss_mte, 5645 gen_helper_sve_stdd_be_zss_mte, }, 5646 { gen_helper_sve_stbd_zd_mte, 5647 gen_helper_sve_sthd_be_zd_mte, 5648 gen_helper_sve_stsd_be_zd_mte, 5649 gen_helper_sve_stdd_be_zd_mte, } } }, 5650 }; 5651 5652 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) 5653 { 5654 gen_helper_gvec_mem_scatter *fn; 5655 bool be = s->be_data == MO_BE; 5656 bool mte = s->mte_active[0]; 5657 5658 if (a->esz < a->msz || (a->msz == 0 && a->scale)) { 5659 return false; 5660 } 5661 if (!dc_isar_feature(aa64_sve, s)) { 5662 return false; 5663 } 5664 s->is_nonstreaming = true; 5665 if (!sve_access_check(s)) { 5666 return true; 5667 } 5668 switch (a->esz) { 5669 case MO_32: 5670 fn = scatter_store_fn32[mte][be][a->xs][a->msz]; 5671 break; 5672 case MO_64: 5673 fn = scatter_store_fn64[mte][be][a->xs][a->msz]; 5674 break; 5675 default: 5676 g_assert_not_reached(); 5677 } 5678 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 5679 cpu_reg_sp(s, a->rn), a->msz, true, fn); 5680 return true; 5681 } 5682 5683 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) 5684 { 5685 gen_helper_gvec_mem_scatter *fn = NULL; 5686 bool be = s->be_data == MO_BE; 5687 bool mte = s->mte_active[0]; 5688 5689 if (a->esz < a->msz) { 5690 return false; 5691 } 5692 if (!dc_isar_feature(aa64_sve, s)) { 5693 return false; 5694 } 5695 s->is_nonstreaming = true; 5696 if (!sve_access_check(s)) { 5697 return true; 5698 } 5699 5700 switch (a->esz) { 5701 case MO_32: 5702 fn = scatter_store_fn32[mte][be][0][a->msz]; 5703 break; 5704 case MO_64: 5705 fn = scatter_store_fn64[mte][be][2][a->msz]; 5706 break; 5707 } 5708 assert(fn != NULL); 5709 5710 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x]) 5711 * by loading the immediate into the scalar parameter. 5712 */ 5713 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5714 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn); 5715 return true; 5716 } 5717 5718 static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a) 5719 { 5720 gen_helper_gvec_mem_scatter *fn; 5721 bool be = s->be_data == MO_BE; 5722 bool mte = s->mte_active[0]; 5723 5724 if (a->esz < a->msz) { 5725 return false; 5726 } 5727 if (!dc_isar_feature(aa64_sve2, s)) { 5728 return false; 5729 } 5730 s->is_nonstreaming = true; 5731 if (!sve_access_check(s)) { 5732 return true; 5733 } 5734 5735 switch (a->esz) { 5736 case MO_32: 5737 fn = scatter_store_fn32[mte][be][0][a->msz]; 5738 break; 5739 case MO_64: 5740 fn = scatter_store_fn64[mte][be][2][a->msz]; 5741 break; 5742 default: 5743 g_assert_not_reached(); 5744 } 5745 5746 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5747 cpu_reg(s, a->rm), a->msz, true, fn); 5748 return true; 5749 } 5750 5751 /* 5752 * Prefetches 5753 */ 5754 5755 static bool trans_PRF(DisasContext *s, arg_PRF *a) 5756 { 5757 if (!dc_isar_feature(aa64_sve, s)) { 5758 return false; 5759 } 5760 /* Prefetch is a nop within QEMU. */ 5761 (void)sve_access_check(s); 5762 return true; 5763 } 5764 5765 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a) 5766 { 5767 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 5768 return false; 5769 } 5770 /* Prefetch is a nop within QEMU. */ 5771 (void)sve_access_check(s); 5772 return true; 5773 } 5774 5775 static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a) 5776 { 5777 if (!dc_isar_feature(aa64_sve, s)) { 5778 return false; 5779 } 5780 /* Prefetch is a nop within QEMU. */ 5781 s->is_nonstreaming = true; 5782 (void)sve_access_check(s); 5783 return true; 5784 } 5785 5786 /* 5787 * Move Prefix 5788 * 5789 * TODO: The implementation so far could handle predicated merging movprfx. 5790 * The helper functions as written take an extra source register to 5791 * use in the operation, but the result is only written when predication 5792 * succeeds. For unpredicated movprfx, we need to rearrange the helpers 5793 * to allow the final write back to the destination to be unconditional. 5794 * For predicated zeroing movprfx, we need to rearrange the helpers to 5795 * allow the final write back to zero inactives. 5796 * 5797 * In the meantime, just emit the moves. 5798 */ 5799 5800 TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn) 5801 TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz) 5802 TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false) 5803 5804 /* 5805 * SVE2 Integer Multiply - Unpredicated 5806 */ 5807 5808 TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a) 5809 5810 static gen_helper_gvec_3 * const smulh_zzz_fns[4] = { 5811 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h, 5812 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d, 5813 }; 5814 TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5815 smulh_zzz_fns[a->esz], a, 0) 5816 5817 static gen_helper_gvec_3 * const umulh_zzz_fns[4] = { 5818 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h, 5819 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d, 5820 }; 5821 TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5822 umulh_zzz_fns[a->esz], a, 0) 5823 5824 TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5825 gen_helper_gvec_pmul_b, a, 0) 5826 5827 static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = { 5828 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h, 5829 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d, 5830 }; 5831 TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5832 sqdmulh_zzz_fns[a->esz], a, 0) 5833 5834 static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = { 5835 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h, 5836 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d, 5837 }; 5838 TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5839 sqrdmulh_zzz_fns[a->esz], a, 0) 5840 5841 /* 5842 * SVE2 Integer - Predicated 5843 */ 5844 5845 static gen_helper_gvec_4 * const sadlp_fns[4] = { 5846 NULL, gen_helper_sve2_sadalp_zpzz_h, 5847 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d, 5848 }; 5849 TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 5850 sadlp_fns[a->esz], a, 0) 5851 5852 static gen_helper_gvec_4 * const uadlp_fns[4] = { 5853 NULL, gen_helper_sve2_uadalp_zpzz_h, 5854 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d, 5855 }; 5856 TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 5857 uadlp_fns[a->esz], a, 0) 5858 5859 /* 5860 * SVE2 integer unary operations (predicated) 5861 */ 5862 5863 TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz, 5864 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0) 5865 5866 TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz, 5867 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0) 5868 5869 static gen_helper_gvec_3 * const sqabs_fns[4] = { 5870 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h, 5871 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d, 5872 }; 5873 TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0) 5874 5875 static gen_helper_gvec_3 * const sqneg_fns[4] = { 5876 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h, 5877 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d, 5878 }; 5879 TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0) 5880 5881 DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl) 5882 DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl) 5883 DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl) 5884 5885 DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl) 5886 DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl) 5887 DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl) 5888 5889 DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd) 5890 DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd) 5891 DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub) 5892 5893 DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd) 5894 DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd) 5895 DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub) 5896 5897 DO_ZPZZ(ADDP, aa64_sve2, sve2_addp) 5898 DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp) 5899 DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp) 5900 DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp) 5901 DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp) 5902 5903 DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd) 5904 DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd) 5905 DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub) 5906 DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub) 5907 DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd) 5908 DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd) 5909 5910 /* 5911 * SVE2 Widening Integer Arithmetic 5912 */ 5913 5914 static gen_helper_gvec_3 * const saddl_fns[4] = { 5915 NULL, gen_helper_sve2_saddl_h, 5916 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d, 5917 }; 5918 TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 5919 saddl_fns[a->esz], a, 0) 5920 TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 5921 saddl_fns[a->esz], a, 3) 5922 TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 5923 saddl_fns[a->esz], a, 2) 5924 5925 static gen_helper_gvec_3 * const ssubl_fns[4] = { 5926 NULL, gen_helper_sve2_ssubl_h, 5927 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d, 5928 }; 5929 TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 5930 ssubl_fns[a->esz], a, 0) 5931 TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 5932 ssubl_fns[a->esz], a, 3) 5933 TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 5934 ssubl_fns[a->esz], a, 2) 5935 TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz, 5936 ssubl_fns[a->esz], a, 1) 5937 5938 static gen_helper_gvec_3 * const sabdl_fns[4] = { 5939 NULL, gen_helper_sve2_sabdl_h, 5940 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d, 5941 }; 5942 TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 5943 sabdl_fns[a->esz], a, 0) 5944 TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 5945 sabdl_fns[a->esz], a, 3) 5946 5947 static gen_helper_gvec_3 * const uaddl_fns[4] = { 5948 NULL, gen_helper_sve2_uaddl_h, 5949 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d, 5950 }; 5951 TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 5952 uaddl_fns[a->esz], a, 0) 5953 TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 5954 uaddl_fns[a->esz], a, 3) 5955 5956 static gen_helper_gvec_3 * const usubl_fns[4] = { 5957 NULL, gen_helper_sve2_usubl_h, 5958 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d, 5959 }; 5960 TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 5961 usubl_fns[a->esz], a, 0) 5962 TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 5963 usubl_fns[a->esz], a, 3) 5964 5965 static gen_helper_gvec_3 * const uabdl_fns[4] = { 5966 NULL, gen_helper_sve2_uabdl_h, 5967 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d, 5968 }; 5969 TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 5970 uabdl_fns[a->esz], a, 0) 5971 TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 5972 uabdl_fns[a->esz], a, 3) 5973 5974 static gen_helper_gvec_3 * const sqdmull_fns[4] = { 5975 NULL, gen_helper_sve2_sqdmull_zzz_h, 5976 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d, 5977 }; 5978 TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5979 sqdmull_fns[a->esz], a, 0) 5980 TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5981 sqdmull_fns[a->esz], a, 3) 5982 5983 static gen_helper_gvec_3 * const smull_fns[4] = { 5984 NULL, gen_helper_sve2_smull_zzz_h, 5985 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d, 5986 }; 5987 TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5988 smull_fns[a->esz], a, 0) 5989 TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5990 smull_fns[a->esz], a, 3) 5991 5992 static gen_helper_gvec_3 * const umull_fns[4] = { 5993 NULL, gen_helper_sve2_umull_zzz_h, 5994 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d, 5995 }; 5996 TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5997 umull_fns[a->esz], a, 0) 5998 TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5999 umull_fns[a->esz], a, 3) 6000 6001 static gen_helper_gvec_3 * const eoril_fns[4] = { 6002 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h, 6003 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d, 6004 }; 6005 TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2) 6006 TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1) 6007 6008 static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel) 6009 { 6010 static gen_helper_gvec_3 * const fns[4] = { 6011 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h, 6012 NULL, gen_helper_sve2_pmull_d, 6013 }; 6014 6015 if (a->esz == 0) { 6016 if (!dc_isar_feature(aa64_sve2_pmull128, s)) { 6017 return false; 6018 } 6019 s->is_nonstreaming = true; 6020 } else if (!dc_isar_feature(aa64_sve, s)) { 6021 return false; 6022 } 6023 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel); 6024 } 6025 6026 TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false) 6027 TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true) 6028 6029 static gen_helper_gvec_3 * const saddw_fns[4] = { 6030 NULL, gen_helper_sve2_saddw_h, 6031 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d, 6032 }; 6033 TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0) 6034 TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1) 6035 6036 static gen_helper_gvec_3 * const ssubw_fns[4] = { 6037 NULL, gen_helper_sve2_ssubw_h, 6038 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d, 6039 }; 6040 TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0) 6041 TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1) 6042 6043 static gen_helper_gvec_3 * const uaddw_fns[4] = { 6044 NULL, gen_helper_sve2_uaddw_h, 6045 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d, 6046 }; 6047 TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0) 6048 TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1) 6049 6050 static gen_helper_gvec_3 * const usubw_fns[4] = { 6051 NULL, gen_helper_sve2_usubw_h, 6052 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d, 6053 }; 6054 TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0) 6055 TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1) 6056 6057 static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6058 { 6059 int top = imm & 1; 6060 int shl = imm >> 1; 6061 int halfbits = 4 << vece; 6062 6063 if (top) { 6064 if (shl == halfbits) { 6065 TCGv_vec t = tcg_temp_new_vec_matching(d); 6066 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); 6067 tcg_gen_and_vec(vece, d, n, t); 6068 } else { 6069 tcg_gen_sari_vec(vece, d, n, halfbits); 6070 tcg_gen_shli_vec(vece, d, d, shl); 6071 } 6072 } else { 6073 tcg_gen_shli_vec(vece, d, n, halfbits); 6074 tcg_gen_sari_vec(vece, d, d, halfbits - shl); 6075 } 6076 } 6077 6078 static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm) 6079 { 6080 int halfbits = 4 << vece; 6081 int top = imm & 1; 6082 int shl = (imm >> 1); 6083 int shift; 6084 uint64_t mask; 6085 6086 mask = MAKE_64BIT_MASK(0, halfbits); 6087 mask <<= shl; 6088 mask = dup_const(vece, mask); 6089 6090 shift = shl - top * halfbits; 6091 if (shift < 0) { 6092 tcg_gen_shri_i64(d, n, -shift); 6093 } else { 6094 tcg_gen_shli_i64(d, n, shift); 6095 } 6096 tcg_gen_andi_i64(d, d, mask); 6097 } 6098 6099 static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6100 { 6101 gen_ushll_i64(MO_16, d, n, imm); 6102 } 6103 6104 static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6105 { 6106 gen_ushll_i64(MO_32, d, n, imm); 6107 } 6108 6109 static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6110 { 6111 gen_ushll_i64(MO_64, d, n, imm); 6112 } 6113 6114 static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6115 { 6116 int halfbits = 4 << vece; 6117 int top = imm & 1; 6118 int shl = imm >> 1; 6119 6120 if (top) { 6121 if (shl == halfbits) { 6122 TCGv_vec t = tcg_temp_new_vec_matching(d); 6123 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); 6124 tcg_gen_and_vec(vece, d, n, t); 6125 } else { 6126 tcg_gen_shri_vec(vece, d, n, halfbits); 6127 tcg_gen_shli_vec(vece, d, d, shl); 6128 } 6129 } else { 6130 if (shl == 0) { 6131 TCGv_vec t = tcg_temp_new_vec_matching(d); 6132 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6133 tcg_gen_and_vec(vece, d, n, t); 6134 } else { 6135 tcg_gen_shli_vec(vece, d, n, halfbits); 6136 tcg_gen_shri_vec(vece, d, d, halfbits - shl); 6137 } 6138 } 6139 } 6140 6141 static bool do_shll_tb(DisasContext *s, arg_rri_esz *a, 6142 const GVecGen2i ops[3], bool sel) 6143 { 6144 6145 if (a->esz < 0 || a->esz > 2) { 6146 return false; 6147 } 6148 if (sve_access_check(s)) { 6149 unsigned vsz = vec_full_reg_size(s); 6150 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 6151 vec_full_reg_offset(s, a->rn), 6152 vsz, vsz, (a->imm << 1) | sel, 6153 &ops[a->esz]); 6154 } 6155 return true; 6156 } 6157 6158 static const TCGOpcode sshll_list[] = { 6159 INDEX_op_shli_vec, INDEX_op_sari_vec, 0 6160 }; 6161 static const GVecGen2i sshll_ops[3] = { 6162 { .fniv = gen_sshll_vec, 6163 .opt_opc = sshll_list, 6164 .fno = gen_helper_sve2_sshll_h, 6165 .vece = MO_16 }, 6166 { .fniv = gen_sshll_vec, 6167 .opt_opc = sshll_list, 6168 .fno = gen_helper_sve2_sshll_s, 6169 .vece = MO_32 }, 6170 { .fniv = gen_sshll_vec, 6171 .opt_opc = sshll_list, 6172 .fno = gen_helper_sve2_sshll_d, 6173 .vece = MO_64 } 6174 }; 6175 TRANS_FEAT(SSHLLB, aa64_sve2, do_shll_tb, a, sshll_ops, false) 6176 TRANS_FEAT(SSHLLT, aa64_sve2, do_shll_tb, a, sshll_ops, true) 6177 6178 static const TCGOpcode ushll_list[] = { 6179 INDEX_op_shli_vec, INDEX_op_shri_vec, 0 6180 }; 6181 static const GVecGen2i ushll_ops[3] = { 6182 { .fni8 = gen_ushll16_i64, 6183 .fniv = gen_ushll_vec, 6184 .opt_opc = ushll_list, 6185 .fno = gen_helper_sve2_ushll_h, 6186 .vece = MO_16 }, 6187 { .fni8 = gen_ushll32_i64, 6188 .fniv = gen_ushll_vec, 6189 .opt_opc = ushll_list, 6190 .fno = gen_helper_sve2_ushll_s, 6191 .vece = MO_32 }, 6192 { .fni8 = gen_ushll64_i64, 6193 .fniv = gen_ushll_vec, 6194 .opt_opc = ushll_list, 6195 .fno = gen_helper_sve2_ushll_d, 6196 .vece = MO_64 }, 6197 }; 6198 TRANS_FEAT(USHLLB, aa64_sve2, do_shll_tb, a, ushll_ops, false) 6199 TRANS_FEAT(USHLLT, aa64_sve2, do_shll_tb, a, ushll_ops, true) 6200 6201 static gen_helper_gvec_3 * const bext_fns[4] = { 6202 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h, 6203 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d, 6204 }; 6205 TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6206 bext_fns[a->esz], a, 0) 6207 6208 static gen_helper_gvec_3 * const bdep_fns[4] = { 6209 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h, 6210 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d, 6211 }; 6212 TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6213 bdep_fns[a->esz], a, 0) 6214 6215 static gen_helper_gvec_3 * const bgrp_fns[4] = { 6216 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h, 6217 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d, 6218 }; 6219 TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6220 bgrp_fns[a->esz], a, 0) 6221 6222 static gen_helper_gvec_3 * const cadd_fns[4] = { 6223 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h, 6224 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d, 6225 }; 6226 TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6227 cadd_fns[a->esz], a, 0) 6228 TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6229 cadd_fns[a->esz], a, 1) 6230 6231 static gen_helper_gvec_3 * const sqcadd_fns[4] = { 6232 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h, 6233 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d, 6234 }; 6235 TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6236 sqcadd_fns[a->esz], a, 0) 6237 TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6238 sqcadd_fns[a->esz], a, 1) 6239 6240 static gen_helper_gvec_4 * const sabal_fns[4] = { 6241 NULL, gen_helper_sve2_sabal_h, 6242 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d, 6243 }; 6244 TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0) 6245 TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1) 6246 6247 static gen_helper_gvec_4 * const uabal_fns[4] = { 6248 NULL, gen_helper_sve2_uabal_h, 6249 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d, 6250 }; 6251 TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0) 6252 TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1) 6253 6254 static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel) 6255 { 6256 static gen_helper_gvec_4 * const fns[2] = { 6257 gen_helper_sve2_adcl_s, 6258 gen_helper_sve2_adcl_d, 6259 }; 6260 /* 6261 * Note that in this case the ESZ field encodes both size and sign. 6262 * Split out 'subtract' into bit 1 of the data field for the helper. 6263 */ 6264 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel); 6265 } 6266 6267 TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false) 6268 TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true) 6269 6270 TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a) 6271 TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a) 6272 TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a) 6273 TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a) 6274 TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a) 6275 TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a) 6276 6277 TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a) 6278 TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a) 6279 6280 static bool do_narrow_extract(DisasContext *s, arg_rri_esz *a, 6281 const GVecGen2 ops[3]) 6282 { 6283 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0) { 6284 return false; 6285 } 6286 if (sve_access_check(s)) { 6287 unsigned vsz = vec_full_reg_size(s); 6288 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd), 6289 vec_full_reg_offset(s, a->rn), 6290 vsz, vsz, &ops[a->esz]); 6291 } 6292 return true; 6293 } 6294 6295 static const TCGOpcode sqxtn_list[] = { 6296 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 6297 }; 6298 6299 static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6300 { 6301 TCGv_vec t = tcg_temp_new_vec_matching(d); 6302 int halfbits = 4 << vece; 6303 int64_t mask = (1ull << halfbits) - 1; 6304 int64_t min = -1ull << (halfbits - 1); 6305 int64_t max = -min - 1; 6306 6307 tcg_gen_dupi_vec(vece, t, min); 6308 tcg_gen_smax_vec(vece, d, n, t); 6309 tcg_gen_dupi_vec(vece, t, max); 6310 tcg_gen_smin_vec(vece, d, d, t); 6311 tcg_gen_dupi_vec(vece, t, mask); 6312 tcg_gen_and_vec(vece, d, d, t); 6313 } 6314 6315 static const GVecGen2 sqxtnb_ops[3] = { 6316 { .fniv = gen_sqxtnb_vec, 6317 .opt_opc = sqxtn_list, 6318 .fno = gen_helper_sve2_sqxtnb_h, 6319 .vece = MO_16 }, 6320 { .fniv = gen_sqxtnb_vec, 6321 .opt_opc = sqxtn_list, 6322 .fno = gen_helper_sve2_sqxtnb_s, 6323 .vece = MO_32 }, 6324 { .fniv = gen_sqxtnb_vec, 6325 .opt_opc = sqxtn_list, 6326 .fno = gen_helper_sve2_sqxtnb_d, 6327 .vece = MO_64 }, 6328 }; 6329 TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops) 6330 6331 static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6332 { 6333 TCGv_vec t = tcg_temp_new_vec_matching(d); 6334 int halfbits = 4 << vece; 6335 int64_t mask = (1ull << halfbits) - 1; 6336 int64_t min = -1ull << (halfbits - 1); 6337 int64_t max = -min - 1; 6338 6339 tcg_gen_dupi_vec(vece, t, min); 6340 tcg_gen_smax_vec(vece, n, n, t); 6341 tcg_gen_dupi_vec(vece, t, max); 6342 tcg_gen_smin_vec(vece, n, n, t); 6343 tcg_gen_shli_vec(vece, n, n, halfbits); 6344 tcg_gen_dupi_vec(vece, t, mask); 6345 tcg_gen_bitsel_vec(vece, d, t, d, n); 6346 } 6347 6348 static const GVecGen2 sqxtnt_ops[3] = { 6349 { .fniv = gen_sqxtnt_vec, 6350 .opt_opc = sqxtn_list, 6351 .load_dest = true, 6352 .fno = gen_helper_sve2_sqxtnt_h, 6353 .vece = MO_16 }, 6354 { .fniv = gen_sqxtnt_vec, 6355 .opt_opc = sqxtn_list, 6356 .load_dest = true, 6357 .fno = gen_helper_sve2_sqxtnt_s, 6358 .vece = MO_32 }, 6359 { .fniv = gen_sqxtnt_vec, 6360 .opt_opc = sqxtn_list, 6361 .load_dest = true, 6362 .fno = gen_helper_sve2_sqxtnt_d, 6363 .vece = MO_64 }, 6364 }; 6365 TRANS_FEAT(SQXTNT, aa64_sve2, do_narrow_extract, a, sqxtnt_ops) 6366 6367 static const TCGOpcode uqxtn_list[] = { 6368 INDEX_op_shli_vec, INDEX_op_umin_vec, 0 6369 }; 6370 6371 static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6372 { 6373 TCGv_vec t = tcg_temp_new_vec_matching(d); 6374 int halfbits = 4 << vece; 6375 int64_t max = (1ull << halfbits) - 1; 6376 6377 tcg_gen_dupi_vec(vece, t, max); 6378 tcg_gen_umin_vec(vece, d, n, t); 6379 } 6380 6381 static const GVecGen2 uqxtnb_ops[3] = { 6382 { .fniv = gen_uqxtnb_vec, 6383 .opt_opc = uqxtn_list, 6384 .fno = gen_helper_sve2_uqxtnb_h, 6385 .vece = MO_16 }, 6386 { .fniv = gen_uqxtnb_vec, 6387 .opt_opc = uqxtn_list, 6388 .fno = gen_helper_sve2_uqxtnb_s, 6389 .vece = MO_32 }, 6390 { .fniv = gen_uqxtnb_vec, 6391 .opt_opc = uqxtn_list, 6392 .fno = gen_helper_sve2_uqxtnb_d, 6393 .vece = MO_64 }, 6394 }; 6395 TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops) 6396 6397 static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6398 { 6399 TCGv_vec t = tcg_temp_new_vec_matching(d); 6400 int halfbits = 4 << vece; 6401 int64_t max = (1ull << halfbits) - 1; 6402 6403 tcg_gen_dupi_vec(vece, t, max); 6404 tcg_gen_umin_vec(vece, n, n, t); 6405 tcg_gen_shli_vec(vece, n, n, halfbits); 6406 tcg_gen_bitsel_vec(vece, d, t, d, n); 6407 } 6408 6409 static const GVecGen2 uqxtnt_ops[3] = { 6410 { .fniv = gen_uqxtnt_vec, 6411 .opt_opc = uqxtn_list, 6412 .load_dest = true, 6413 .fno = gen_helper_sve2_uqxtnt_h, 6414 .vece = MO_16 }, 6415 { .fniv = gen_uqxtnt_vec, 6416 .opt_opc = uqxtn_list, 6417 .load_dest = true, 6418 .fno = gen_helper_sve2_uqxtnt_s, 6419 .vece = MO_32 }, 6420 { .fniv = gen_uqxtnt_vec, 6421 .opt_opc = uqxtn_list, 6422 .load_dest = true, 6423 .fno = gen_helper_sve2_uqxtnt_d, 6424 .vece = MO_64 }, 6425 }; 6426 TRANS_FEAT(UQXTNT, aa64_sve2, do_narrow_extract, a, uqxtnt_ops) 6427 6428 static const TCGOpcode sqxtun_list[] = { 6429 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0 6430 }; 6431 6432 static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6433 { 6434 TCGv_vec t = tcg_temp_new_vec_matching(d); 6435 int halfbits = 4 << vece; 6436 int64_t max = (1ull << halfbits) - 1; 6437 6438 tcg_gen_dupi_vec(vece, t, 0); 6439 tcg_gen_smax_vec(vece, d, n, t); 6440 tcg_gen_dupi_vec(vece, t, max); 6441 tcg_gen_umin_vec(vece, d, d, t); 6442 } 6443 6444 static const GVecGen2 sqxtunb_ops[3] = { 6445 { .fniv = gen_sqxtunb_vec, 6446 .opt_opc = sqxtun_list, 6447 .fno = gen_helper_sve2_sqxtunb_h, 6448 .vece = MO_16 }, 6449 { .fniv = gen_sqxtunb_vec, 6450 .opt_opc = sqxtun_list, 6451 .fno = gen_helper_sve2_sqxtunb_s, 6452 .vece = MO_32 }, 6453 { .fniv = gen_sqxtunb_vec, 6454 .opt_opc = sqxtun_list, 6455 .fno = gen_helper_sve2_sqxtunb_d, 6456 .vece = MO_64 }, 6457 }; 6458 TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops) 6459 6460 static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6461 { 6462 TCGv_vec t = tcg_temp_new_vec_matching(d); 6463 int halfbits = 4 << vece; 6464 int64_t max = (1ull << halfbits) - 1; 6465 6466 tcg_gen_dupi_vec(vece, t, 0); 6467 tcg_gen_smax_vec(vece, n, n, t); 6468 tcg_gen_dupi_vec(vece, t, max); 6469 tcg_gen_umin_vec(vece, n, n, t); 6470 tcg_gen_shli_vec(vece, n, n, halfbits); 6471 tcg_gen_bitsel_vec(vece, d, t, d, n); 6472 } 6473 6474 static const GVecGen2 sqxtunt_ops[3] = { 6475 { .fniv = gen_sqxtunt_vec, 6476 .opt_opc = sqxtun_list, 6477 .load_dest = true, 6478 .fno = gen_helper_sve2_sqxtunt_h, 6479 .vece = MO_16 }, 6480 { .fniv = gen_sqxtunt_vec, 6481 .opt_opc = sqxtun_list, 6482 .load_dest = true, 6483 .fno = gen_helper_sve2_sqxtunt_s, 6484 .vece = MO_32 }, 6485 { .fniv = gen_sqxtunt_vec, 6486 .opt_opc = sqxtun_list, 6487 .load_dest = true, 6488 .fno = gen_helper_sve2_sqxtunt_d, 6489 .vece = MO_64 }, 6490 }; 6491 TRANS_FEAT(SQXTUNT, aa64_sve2, do_narrow_extract, a, sqxtunt_ops) 6492 6493 static bool do_shr_narrow(DisasContext *s, arg_rri_esz *a, 6494 const GVecGen2i ops[3]) 6495 { 6496 if (a->esz < 0 || a->esz > MO_32) { 6497 return false; 6498 } 6499 assert(a->imm > 0 && a->imm <= (8 << a->esz)); 6500 if (sve_access_check(s)) { 6501 unsigned vsz = vec_full_reg_size(s); 6502 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 6503 vec_full_reg_offset(s, a->rn), 6504 vsz, vsz, a->imm, &ops[a->esz]); 6505 } 6506 return true; 6507 } 6508 6509 static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 6510 { 6511 int halfbits = 4 << vece; 6512 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 6513 6514 tcg_gen_shri_i64(d, n, shr); 6515 tcg_gen_andi_i64(d, d, mask); 6516 } 6517 6518 static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6519 { 6520 gen_shrnb_i64(MO_16, d, n, shr); 6521 } 6522 6523 static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6524 { 6525 gen_shrnb_i64(MO_32, d, n, shr); 6526 } 6527 6528 static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6529 { 6530 gen_shrnb_i64(MO_64, d, n, shr); 6531 } 6532 6533 static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 6534 { 6535 TCGv_vec t = tcg_temp_new_vec_matching(d); 6536 int halfbits = 4 << vece; 6537 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 6538 6539 tcg_gen_shri_vec(vece, n, n, shr); 6540 tcg_gen_dupi_vec(vece, t, mask); 6541 tcg_gen_and_vec(vece, d, n, t); 6542 } 6543 6544 static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 }; 6545 static const GVecGen2i shrnb_ops[3] = { 6546 { .fni8 = gen_shrnb16_i64, 6547 .fniv = gen_shrnb_vec, 6548 .opt_opc = shrnb_vec_list, 6549 .fno = gen_helper_sve2_shrnb_h, 6550 .vece = MO_16 }, 6551 { .fni8 = gen_shrnb32_i64, 6552 .fniv = gen_shrnb_vec, 6553 .opt_opc = shrnb_vec_list, 6554 .fno = gen_helper_sve2_shrnb_s, 6555 .vece = MO_32 }, 6556 { .fni8 = gen_shrnb64_i64, 6557 .fniv = gen_shrnb_vec, 6558 .opt_opc = shrnb_vec_list, 6559 .fno = gen_helper_sve2_shrnb_d, 6560 .vece = MO_64 }, 6561 }; 6562 TRANS_FEAT(SHRNB, aa64_sve2, do_shr_narrow, a, shrnb_ops) 6563 6564 static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 6565 { 6566 int halfbits = 4 << vece; 6567 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 6568 6569 tcg_gen_shli_i64(n, n, halfbits - shr); 6570 tcg_gen_andi_i64(n, n, ~mask); 6571 tcg_gen_andi_i64(d, d, mask); 6572 tcg_gen_or_i64(d, d, n); 6573 } 6574 6575 static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6576 { 6577 gen_shrnt_i64(MO_16, d, n, shr); 6578 } 6579 6580 static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6581 { 6582 gen_shrnt_i64(MO_32, d, n, shr); 6583 } 6584 6585 static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6586 { 6587 tcg_gen_shri_i64(n, n, shr); 6588 tcg_gen_deposit_i64(d, d, n, 32, 32); 6589 } 6590 6591 static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 6592 { 6593 TCGv_vec t = tcg_temp_new_vec_matching(d); 6594 int halfbits = 4 << vece; 6595 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 6596 6597 tcg_gen_shli_vec(vece, n, n, halfbits - shr); 6598 tcg_gen_dupi_vec(vece, t, mask); 6599 tcg_gen_bitsel_vec(vece, d, t, d, n); 6600 } 6601 6602 static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 }; 6603 static const GVecGen2i shrnt_ops[3] = { 6604 { .fni8 = gen_shrnt16_i64, 6605 .fniv = gen_shrnt_vec, 6606 .opt_opc = shrnt_vec_list, 6607 .load_dest = true, 6608 .fno = gen_helper_sve2_shrnt_h, 6609 .vece = MO_16 }, 6610 { .fni8 = gen_shrnt32_i64, 6611 .fniv = gen_shrnt_vec, 6612 .opt_opc = shrnt_vec_list, 6613 .load_dest = true, 6614 .fno = gen_helper_sve2_shrnt_s, 6615 .vece = MO_32 }, 6616 { .fni8 = gen_shrnt64_i64, 6617 .fniv = gen_shrnt_vec, 6618 .opt_opc = shrnt_vec_list, 6619 .load_dest = true, 6620 .fno = gen_helper_sve2_shrnt_d, 6621 .vece = MO_64 }, 6622 }; 6623 TRANS_FEAT(SHRNT, aa64_sve2, do_shr_narrow, a, shrnt_ops) 6624 6625 static const GVecGen2i rshrnb_ops[3] = { 6626 { .fno = gen_helper_sve2_rshrnb_h }, 6627 { .fno = gen_helper_sve2_rshrnb_s }, 6628 { .fno = gen_helper_sve2_rshrnb_d }, 6629 }; 6630 TRANS_FEAT(RSHRNB, aa64_sve2, do_shr_narrow, a, rshrnb_ops) 6631 6632 static const GVecGen2i rshrnt_ops[3] = { 6633 { .fno = gen_helper_sve2_rshrnt_h }, 6634 { .fno = gen_helper_sve2_rshrnt_s }, 6635 { .fno = gen_helper_sve2_rshrnt_d }, 6636 }; 6637 TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops) 6638 6639 static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d, 6640 TCGv_vec n, int64_t shr) 6641 { 6642 TCGv_vec t = tcg_temp_new_vec_matching(d); 6643 int halfbits = 4 << vece; 6644 6645 tcg_gen_sari_vec(vece, n, n, shr); 6646 tcg_gen_dupi_vec(vece, t, 0); 6647 tcg_gen_smax_vec(vece, n, n, t); 6648 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6649 tcg_gen_umin_vec(vece, d, n, t); 6650 } 6651 6652 static const TCGOpcode sqshrunb_vec_list[] = { 6653 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0 6654 }; 6655 static const GVecGen2i sqshrunb_ops[3] = { 6656 { .fniv = gen_sqshrunb_vec, 6657 .opt_opc = sqshrunb_vec_list, 6658 .fno = gen_helper_sve2_sqshrunb_h, 6659 .vece = MO_16 }, 6660 { .fniv = gen_sqshrunb_vec, 6661 .opt_opc = sqshrunb_vec_list, 6662 .fno = gen_helper_sve2_sqshrunb_s, 6663 .vece = MO_32 }, 6664 { .fniv = gen_sqshrunb_vec, 6665 .opt_opc = sqshrunb_vec_list, 6666 .fno = gen_helper_sve2_sqshrunb_d, 6667 .vece = MO_64 }, 6668 }; 6669 TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops) 6670 6671 static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d, 6672 TCGv_vec n, int64_t shr) 6673 { 6674 TCGv_vec t = tcg_temp_new_vec_matching(d); 6675 int halfbits = 4 << vece; 6676 6677 tcg_gen_sari_vec(vece, n, n, shr); 6678 tcg_gen_dupi_vec(vece, t, 0); 6679 tcg_gen_smax_vec(vece, n, n, t); 6680 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6681 tcg_gen_umin_vec(vece, n, n, t); 6682 tcg_gen_shli_vec(vece, n, n, halfbits); 6683 tcg_gen_bitsel_vec(vece, d, t, d, n); 6684 } 6685 6686 static const TCGOpcode sqshrunt_vec_list[] = { 6687 INDEX_op_shli_vec, INDEX_op_sari_vec, 6688 INDEX_op_smax_vec, INDEX_op_umin_vec, 0 6689 }; 6690 static const GVecGen2i sqshrunt_ops[3] = { 6691 { .fniv = gen_sqshrunt_vec, 6692 .opt_opc = sqshrunt_vec_list, 6693 .load_dest = true, 6694 .fno = gen_helper_sve2_sqshrunt_h, 6695 .vece = MO_16 }, 6696 { .fniv = gen_sqshrunt_vec, 6697 .opt_opc = sqshrunt_vec_list, 6698 .load_dest = true, 6699 .fno = gen_helper_sve2_sqshrunt_s, 6700 .vece = MO_32 }, 6701 { .fniv = gen_sqshrunt_vec, 6702 .opt_opc = sqshrunt_vec_list, 6703 .load_dest = true, 6704 .fno = gen_helper_sve2_sqshrunt_d, 6705 .vece = MO_64 }, 6706 }; 6707 TRANS_FEAT(SQSHRUNT, aa64_sve2, do_shr_narrow, a, sqshrunt_ops) 6708 6709 static const GVecGen2i sqrshrunb_ops[3] = { 6710 { .fno = gen_helper_sve2_sqrshrunb_h }, 6711 { .fno = gen_helper_sve2_sqrshrunb_s }, 6712 { .fno = gen_helper_sve2_sqrshrunb_d }, 6713 }; 6714 TRANS_FEAT(SQRSHRUNB, aa64_sve2, do_shr_narrow, a, sqrshrunb_ops) 6715 6716 static const GVecGen2i sqrshrunt_ops[3] = { 6717 { .fno = gen_helper_sve2_sqrshrunt_h }, 6718 { .fno = gen_helper_sve2_sqrshrunt_s }, 6719 { .fno = gen_helper_sve2_sqrshrunt_d }, 6720 }; 6721 TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops) 6722 6723 static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d, 6724 TCGv_vec n, int64_t shr) 6725 { 6726 TCGv_vec t = tcg_temp_new_vec_matching(d); 6727 int halfbits = 4 << vece; 6728 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 6729 int64_t min = -max - 1; 6730 6731 tcg_gen_sari_vec(vece, n, n, shr); 6732 tcg_gen_dupi_vec(vece, t, min); 6733 tcg_gen_smax_vec(vece, n, n, t); 6734 tcg_gen_dupi_vec(vece, t, max); 6735 tcg_gen_smin_vec(vece, n, n, t); 6736 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6737 tcg_gen_and_vec(vece, d, n, t); 6738 } 6739 6740 static const TCGOpcode sqshrnb_vec_list[] = { 6741 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0 6742 }; 6743 static const GVecGen2i sqshrnb_ops[3] = { 6744 { .fniv = gen_sqshrnb_vec, 6745 .opt_opc = sqshrnb_vec_list, 6746 .fno = gen_helper_sve2_sqshrnb_h, 6747 .vece = MO_16 }, 6748 { .fniv = gen_sqshrnb_vec, 6749 .opt_opc = sqshrnb_vec_list, 6750 .fno = gen_helper_sve2_sqshrnb_s, 6751 .vece = MO_32 }, 6752 { .fniv = gen_sqshrnb_vec, 6753 .opt_opc = sqshrnb_vec_list, 6754 .fno = gen_helper_sve2_sqshrnb_d, 6755 .vece = MO_64 }, 6756 }; 6757 TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops) 6758 6759 static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d, 6760 TCGv_vec n, int64_t shr) 6761 { 6762 TCGv_vec t = tcg_temp_new_vec_matching(d); 6763 int halfbits = 4 << vece; 6764 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 6765 int64_t min = -max - 1; 6766 6767 tcg_gen_sari_vec(vece, n, n, shr); 6768 tcg_gen_dupi_vec(vece, t, min); 6769 tcg_gen_smax_vec(vece, n, n, t); 6770 tcg_gen_dupi_vec(vece, t, max); 6771 tcg_gen_smin_vec(vece, n, n, t); 6772 tcg_gen_shli_vec(vece, n, n, halfbits); 6773 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6774 tcg_gen_bitsel_vec(vece, d, t, d, n); 6775 } 6776 6777 static const TCGOpcode sqshrnt_vec_list[] = { 6778 INDEX_op_shli_vec, INDEX_op_sari_vec, 6779 INDEX_op_smax_vec, INDEX_op_smin_vec, 0 6780 }; 6781 static const GVecGen2i sqshrnt_ops[3] = { 6782 { .fniv = gen_sqshrnt_vec, 6783 .opt_opc = sqshrnt_vec_list, 6784 .load_dest = true, 6785 .fno = gen_helper_sve2_sqshrnt_h, 6786 .vece = MO_16 }, 6787 { .fniv = gen_sqshrnt_vec, 6788 .opt_opc = sqshrnt_vec_list, 6789 .load_dest = true, 6790 .fno = gen_helper_sve2_sqshrnt_s, 6791 .vece = MO_32 }, 6792 { .fniv = gen_sqshrnt_vec, 6793 .opt_opc = sqshrnt_vec_list, 6794 .load_dest = true, 6795 .fno = gen_helper_sve2_sqshrnt_d, 6796 .vece = MO_64 }, 6797 }; 6798 TRANS_FEAT(SQSHRNT, aa64_sve2, do_shr_narrow, a, sqshrnt_ops) 6799 6800 static const GVecGen2i sqrshrnb_ops[3] = { 6801 { .fno = gen_helper_sve2_sqrshrnb_h }, 6802 { .fno = gen_helper_sve2_sqrshrnb_s }, 6803 { .fno = gen_helper_sve2_sqrshrnb_d }, 6804 }; 6805 TRANS_FEAT(SQRSHRNB, aa64_sve2, do_shr_narrow, a, sqrshrnb_ops) 6806 6807 static const GVecGen2i sqrshrnt_ops[3] = { 6808 { .fno = gen_helper_sve2_sqrshrnt_h }, 6809 { .fno = gen_helper_sve2_sqrshrnt_s }, 6810 { .fno = gen_helper_sve2_sqrshrnt_d }, 6811 }; 6812 TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops) 6813 6814 static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d, 6815 TCGv_vec n, int64_t shr) 6816 { 6817 TCGv_vec t = tcg_temp_new_vec_matching(d); 6818 int halfbits = 4 << vece; 6819 6820 tcg_gen_shri_vec(vece, n, n, shr); 6821 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6822 tcg_gen_umin_vec(vece, d, n, t); 6823 } 6824 6825 static const TCGOpcode uqshrnb_vec_list[] = { 6826 INDEX_op_shri_vec, INDEX_op_umin_vec, 0 6827 }; 6828 static const GVecGen2i uqshrnb_ops[3] = { 6829 { .fniv = gen_uqshrnb_vec, 6830 .opt_opc = uqshrnb_vec_list, 6831 .fno = gen_helper_sve2_uqshrnb_h, 6832 .vece = MO_16 }, 6833 { .fniv = gen_uqshrnb_vec, 6834 .opt_opc = uqshrnb_vec_list, 6835 .fno = gen_helper_sve2_uqshrnb_s, 6836 .vece = MO_32 }, 6837 { .fniv = gen_uqshrnb_vec, 6838 .opt_opc = uqshrnb_vec_list, 6839 .fno = gen_helper_sve2_uqshrnb_d, 6840 .vece = MO_64 }, 6841 }; 6842 TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops) 6843 6844 static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d, 6845 TCGv_vec n, int64_t shr) 6846 { 6847 TCGv_vec t = tcg_temp_new_vec_matching(d); 6848 int halfbits = 4 << vece; 6849 6850 tcg_gen_shri_vec(vece, n, n, shr); 6851 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6852 tcg_gen_umin_vec(vece, n, n, t); 6853 tcg_gen_shli_vec(vece, n, n, halfbits); 6854 tcg_gen_bitsel_vec(vece, d, t, d, n); 6855 } 6856 6857 static const TCGOpcode uqshrnt_vec_list[] = { 6858 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0 6859 }; 6860 static const GVecGen2i uqshrnt_ops[3] = { 6861 { .fniv = gen_uqshrnt_vec, 6862 .opt_opc = uqshrnt_vec_list, 6863 .load_dest = true, 6864 .fno = gen_helper_sve2_uqshrnt_h, 6865 .vece = MO_16 }, 6866 { .fniv = gen_uqshrnt_vec, 6867 .opt_opc = uqshrnt_vec_list, 6868 .load_dest = true, 6869 .fno = gen_helper_sve2_uqshrnt_s, 6870 .vece = MO_32 }, 6871 { .fniv = gen_uqshrnt_vec, 6872 .opt_opc = uqshrnt_vec_list, 6873 .load_dest = true, 6874 .fno = gen_helper_sve2_uqshrnt_d, 6875 .vece = MO_64 }, 6876 }; 6877 TRANS_FEAT(UQSHRNT, aa64_sve2, do_shr_narrow, a, uqshrnt_ops) 6878 6879 static const GVecGen2i uqrshrnb_ops[3] = { 6880 { .fno = gen_helper_sve2_uqrshrnb_h }, 6881 { .fno = gen_helper_sve2_uqrshrnb_s }, 6882 { .fno = gen_helper_sve2_uqrshrnb_d }, 6883 }; 6884 TRANS_FEAT(UQRSHRNB, aa64_sve2, do_shr_narrow, a, uqrshrnb_ops) 6885 6886 static const GVecGen2i uqrshrnt_ops[3] = { 6887 { .fno = gen_helper_sve2_uqrshrnt_h }, 6888 { .fno = gen_helper_sve2_uqrshrnt_s }, 6889 { .fno = gen_helper_sve2_uqrshrnt_d }, 6890 }; 6891 TRANS_FEAT(UQRSHRNT, aa64_sve2, do_shr_narrow, a, uqrshrnt_ops) 6892 6893 #define DO_SVE2_ZZZ_NARROW(NAME, name) \ 6894 static gen_helper_gvec_3 * const name##_fns[4] = { \ 6895 NULL, gen_helper_sve2_##name##_h, \ 6896 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \ 6897 }; \ 6898 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \ 6899 name##_fns[a->esz], a, 0) 6900 6901 DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb) 6902 DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt) 6903 DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb) 6904 DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt) 6905 6906 DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb) 6907 DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt) 6908 DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb) 6909 DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt) 6910 6911 static gen_helper_gvec_flags_4 * const match_fns[4] = { 6912 gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL 6913 }; 6914 TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz]) 6915 6916 static gen_helper_gvec_flags_4 * const nmatch_fns[4] = { 6917 gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL 6918 }; 6919 TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz]) 6920 6921 static gen_helper_gvec_4 * const histcnt_fns[4] = { 6922 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d 6923 }; 6924 TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz, 6925 histcnt_fns[a->esz], a, 0) 6926 6927 TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz, 6928 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0) 6929 6930 DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz) 6931 DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz) 6932 DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz) 6933 DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz) 6934 DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz) 6935 6936 /* 6937 * SVE Integer Multiply-Add (unpredicated) 6938 */ 6939 6940 TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, 6941 gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra, 6942 0, FPST_FPCR) 6943 TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, 6944 gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra, 6945 0, FPST_FPCR) 6946 6947 static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = { 6948 NULL, gen_helper_sve2_sqdmlal_zzzw_h, 6949 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d, 6950 }; 6951 TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 6952 sqdmlal_zzzw_fns[a->esz], a, 0) 6953 TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 6954 sqdmlal_zzzw_fns[a->esz], a, 3) 6955 TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 6956 sqdmlal_zzzw_fns[a->esz], a, 2) 6957 6958 static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = { 6959 NULL, gen_helper_sve2_sqdmlsl_zzzw_h, 6960 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d, 6961 }; 6962 TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 6963 sqdmlsl_zzzw_fns[a->esz], a, 0) 6964 TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 6965 sqdmlsl_zzzw_fns[a->esz], a, 3) 6966 TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 6967 sqdmlsl_zzzw_fns[a->esz], a, 2) 6968 6969 static gen_helper_gvec_4 * const sqrdmlah_fns[] = { 6970 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h, 6971 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d, 6972 }; 6973 TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 6974 sqrdmlah_fns[a->esz], a, 0) 6975 6976 static gen_helper_gvec_4 * const sqrdmlsh_fns[] = { 6977 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h, 6978 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d, 6979 }; 6980 TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 6981 sqrdmlsh_fns[a->esz], a, 0) 6982 6983 static gen_helper_gvec_4 * const smlal_zzzw_fns[] = { 6984 NULL, gen_helper_sve2_smlal_zzzw_h, 6985 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d, 6986 }; 6987 TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 6988 smlal_zzzw_fns[a->esz], a, 0) 6989 TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 6990 smlal_zzzw_fns[a->esz], a, 1) 6991 6992 static gen_helper_gvec_4 * const umlal_zzzw_fns[] = { 6993 NULL, gen_helper_sve2_umlal_zzzw_h, 6994 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d, 6995 }; 6996 TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 6997 umlal_zzzw_fns[a->esz], a, 0) 6998 TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 6999 umlal_zzzw_fns[a->esz], a, 1) 7000 7001 static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = { 7002 NULL, gen_helper_sve2_smlsl_zzzw_h, 7003 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d, 7004 }; 7005 TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7006 smlsl_zzzw_fns[a->esz], a, 0) 7007 TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7008 smlsl_zzzw_fns[a->esz], a, 1) 7009 7010 static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = { 7011 NULL, gen_helper_sve2_umlsl_zzzw_h, 7012 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d, 7013 }; 7014 TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7015 umlsl_zzzw_fns[a->esz], a, 0) 7016 TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7017 umlsl_zzzw_fns[a->esz], a, 1) 7018 7019 static gen_helper_gvec_4 * const cmla_fns[] = { 7020 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h, 7021 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d, 7022 }; 7023 TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7024 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7025 7026 static gen_helper_gvec_4 * const cdot_fns[] = { 7027 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d 7028 }; 7029 TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7030 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7031 7032 static gen_helper_gvec_4 * const sqrdcmlah_fns[] = { 7033 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h, 7034 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d, 7035 }; 7036 TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7037 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7038 7039 TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7040 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0) 7041 7042 TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz, 7043 gen_helper_crypto_aesmc, a->rd, a->rd, 0) 7044 TRANS_FEAT_NONSTREAMING(AESIMC, aa64_sve2_aes, gen_gvec_ool_zz, 7045 gen_helper_crypto_aesimc, a->rd, a->rd, 0) 7046 7047 TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7048 gen_helper_crypto_aese, a, 0) 7049 TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7050 gen_helper_crypto_aesd, a, 0) 7051 7052 TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7053 gen_helper_crypto_sm4e, a, 0) 7054 TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7055 gen_helper_crypto_sm4ekey, a, 0) 7056 7057 TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, 7058 gen_gvec_rax1, a) 7059 7060 TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz, 7061 gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR) 7062 TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, 7063 gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR) 7064 7065 TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 7066 gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR) 7067 7068 TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, 7069 gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR) 7070 TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz, 7071 gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR) 7072 7073 TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a, 7074 FPROUNDING_ODD, gen_helper_sve_fcvt_ds) 7075 TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a, 7076 FPROUNDING_ODD, gen_helper_sve2_fcvtnt_ds) 7077 7078 static gen_helper_gvec_3_ptr * const flogb_fns[] = { 7079 NULL, gen_helper_flogb_h, 7080 gen_helper_flogb_s, gen_helper_flogb_d 7081 }; 7082 TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz], 7083 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 7084 7085 static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel) 7086 { 7087 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s, 7088 a->rd, a->rn, a->rm, a->ra, 7089 (sel << 1) | sub, tcg_env); 7090 } 7091 7092 TRANS_FEAT(FMLALB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, false) 7093 TRANS_FEAT(FMLALT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, true) 7094 TRANS_FEAT(FMLSLB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, false) 7095 TRANS_FEAT(FMLSLT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, true) 7096 7097 static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel) 7098 { 7099 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s, 7100 a->rd, a->rn, a->rm, a->ra, 7101 (a->index << 2) | (sel << 1) | sub, tcg_env); 7102 } 7103 7104 TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false) 7105 TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true) 7106 TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false) 7107 TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true) 7108 7109 TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7110 gen_helper_gvec_smmla_b, a, 0) 7111 TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7112 gen_helper_gvec_usmmla_b, a, 0) 7113 TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7114 gen_helper_gvec_ummla_b, a, 0) 7115 7116 TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, 7117 gen_helper_gvec_bfdot, a, 0) 7118 TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz, 7119 gen_helper_gvec_bfdot_idx, a) 7120 7121 TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, 7122 gen_helper_gvec_bfmmla, a, 0) 7123 7124 static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) 7125 { 7126 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, 7127 a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR); 7128 } 7129 7130 TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) 7131 TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true) 7132 7133 static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) 7134 { 7135 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, 7136 a->rd, a->rn, a->rm, a->ra, 7137 (a->index << 1) | sel, FPST_FPCR); 7138 } 7139 7140 TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) 7141 TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true) 7142 7143 static bool trans_PSEL(DisasContext *s, arg_psel *a) 7144 { 7145 int vl = vec_full_reg_size(s); 7146 int pl = pred_gvec_reg_size(s); 7147 int elements = vl >> a->esz; 7148 TCGv_i64 tmp, didx, dbit; 7149 TCGv_ptr ptr; 7150 7151 if (!dc_isar_feature(aa64_sme, s)) { 7152 return false; 7153 } 7154 if (!sve_access_check(s)) { 7155 return true; 7156 } 7157 7158 tmp = tcg_temp_new_i64(); 7159 dbit = tcg_temp_new_i64(); 7160 didx = tcg_temp_new_i64(); 7161 ptr = tcg_temp_new_ptr(); 7162 7163 /* Compute the predicate element. */ 7164 tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm); 7165 if (is_power_of_2(elements)) { 7166 tcg_gen_andi_i64(tmp, tmp, elements - 1); 7167 } else { 7168 tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements)); 7169 } 7170 7171 /* Extract the predicate byte and bit indices. */ 7172 tcg_gen_shli_i64(tmp, tmp, a->esz); 7173 tcg_gen_andi_i64(dbit, tmp, 7); 7174 tcg_gen_shri_i64(didx, tmp, 3); 7175 if (HOST_BIG_ENDIAN) { 7176 tcg_gen_xori_i64(didx, didx, 7); 7177 } 7178 7179 /* Load the predicate word. */ 7180 tcg_gen_trunc_i64_ptr(ptr, didx); 7181 tcg_gen_add_ptr(ptr, ptr, tcg_env); 7182 tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm)); 7183 7184 /* Extract the predicate bit and replicate to MO_64. */ 7185 tcg_gen_shr_i64(tmp, tmp, dbit); 7186 tcg_gen_andi_i64(tmp, tmp, 1); 7187 tcg_gen_neg_i64(tmp, tmp); 7188 7189 /* Apply to either copy the source, or write zeros. */ 7190 tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd), 7191 pred_full_reg_offset(s, a->pn), tmp, pl, pl); 7192 return true; 7193 } 7194 7195 static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7196 { 7197 tcg_gen_smax_i32(d, a, n); 7198 tcg_gen_smin_i32(d, d, m); 7199 } 7200 7201 static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7202 { 7203 tcg_gen_smax_i64(d, a, n); 7204 tcg_gen_smin_i64(d, d, m); 7205 } 7206 7207 static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7208 TCGv_vec m, TCGv_vec a) 7209 { 7210 tcg_gen_smax_vec(vece, d, a, n); 7211 tcg_gen_smin_vec(vece, d, d, m); 7212 } 7213 7214 static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 7215 uint32_t a, uint32_t oprsz, uint32_t maxsz) 7216 { 7217 static const TCGOpcode vecop[] = { 7218 INDEX_op_smin_vec, INDEX_op_smax_vec, 0 7219 }; 7220 static const GVecGen4 ops[4] = { 7221 { .fniv = gen_sclamp_vec, 7222 .fno = gen_helper_gvec_sclamp_b, 7223 .opt_opc = vecop, 7224 .vece = MO_8 }, 7225 { .fniv = gen_sclamp_vec, 7226 .fno = gen_helper_gvec_sclamp_h, 7227 .opt_opc = vecop, 7228 .vece = MO_16 }, 7229 { .fni4 = gen_sclamp_i32, 7230 .fniv = gen_sclamp_vec, 7231 .fno = gen_helper_gvec_sclamp_s, 7232 .opt_opc = vecop, 7233 .vece = MO_32 }, 7234 { .fni8 = gen_sclamp_i64, 7235 .fniv = gen_sclamp_vec, 7236 .fno = gen_helper_gvec_sclamp_d, 7237 .opt_opc = vecop, 7238 .vece = MO_64, 7239 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 7240 }; 7241 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 7242 } 7243 7244 TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a) 7245 7246 static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7247 { 7248 tcg_gen_umax_i32(d, a, n); 7249 tcg_gen_umin_i32(d, d, m); 7250 } 7251 7252 static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7253 { 7254 tcg_gen_umax_i64(d, a, n); 7255 tcg_gen_umin_i64(d, d, m); 7256 } 7257 7258 static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7259 TCGv_vec m, TCGv_vec a) 7260 { 7261 tcg_gen_umax_vec(vece, d, a, n); 7262 tcg_gen_umin_vec(vece, d, d, m); 7263 } 7264 7265 static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 7266 uint32_t a, uint32_t oprsz, uint32_t maxsz) 7267 { 7268 static const TCGOpcode vecop[] = { 7269 INDEX_op_umin_vec, INDEX_op_umax_vec, 0 7270 }; 7271 static const GVecGen4 ops[4] = { 7272 { .fniv = gen_uclamp_vec, 7273 .fno = gen_helper_gvec_uclamp_b, 7274 .opt_opc = vecop, 7275 .vece = MO_8 }, 7276 { .fniv = gen_uclamp_vec, 7277 .fno = gen_helper_gvec_uclamp_h, 7278 .opt_opc = vecop, 7279 .vece = MO_16 }, 7280 { .fni4 = gen_uclamp_i32, 7281 .fniv = gen_uclamp_vec, 7282 .fno = gen_helper_gvec_uclamp_s, 7283 .opt_opc = vecop, 7284 .vece = MO_32 }, 7285 { .fni8 = gen_uclamp_i64, 7286 .fniv = gen_uclamp_vec, 7287 .fno = gen_helper_gvec_uclamp_d, 7288 .opt_opc = vecop, 7289 .vece = MO_64, 7290 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 7291 }; 7292 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 7293 } 7294 7295 TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a) 7296