1 /* 2 * AArch64 SVE translation 3 * 4 * Copyright (c) 2018 Linaro, Ltd 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "translate.h" 22 #include "translate-a64.h" 23 #include "fpu/softfloat.h" 24 25 26 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, 27 TCGv_i64, uint32_t, uint32_t); 28 29 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr, 30 TCGv_ptr, TCGv_i32); 31 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr, 32 TCGv_ptr, TCGv_ptr, TCGv_i32); 33 34 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32); 35 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr, 36 TCGv_ptr, TCGv_i64, TCGv_i32); 37 38 /* 39 * Helpers for extracting complex instruction fields. 40 */ 41 42 /* See e.g. ASR (immediate, predicated). 43 * Returns -1 for unallocated encoding; diagnose later. 44 */ 45 static int tszimm_esz(DisasContext *s, int x) 46 { 47 x >>= 3; /* discard imm3 */ 48 return 31 - clz32(x); 49 } 50 51 static int tszimm_shr(DisasContext *s, int x) 52 { 53 /* 54 * We won't use the tszimm_shr() value if tszimm_esz() returns -1 (the 55 * trans function will check for esz < 0), so we can return any 56 * value we like from here in that case as long as we avoid UB. 57 */ 58 int esz = tszimm_esz(s, x); 59 if (esz < 0) { 60 return esz; 61 } 62 return (16 << esz) - x; 63 } 64 65 /* See e.g. LSL (immediate, predicated). */ 66 static int tszimm_shl(DisasContext *s, int x) 67 { 68 /* As with tszimm_shr(), value will be unused if esz < 0 */ 69 int esz = tszimm_esz(s, x); 70 if (esz < 0) { 71 return esz; 72 } 73 return x - (8 << esz); 74 } 75 76 /* The SH bit is in bit 8. Extract the low 8 and shift. */ 77 static inline int expand_imm_sh8s(DisasContext *s, int x) 78 { 79 return (int8_t)x << (x & 0x100 ? 8 : 0); 80 } 81 82 static inline int expand_imm_sh8u(DisasContext *s, int x) 83 { 84 return (uint8_t)x << (x & 0x100 ? 8 : 0); 85 } 86 87 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype) 88 * with unsigned data. C.f. SVE Memory Contiguous Load Group. 89 */ 90 static inline int msz_dtype(DisasContext *s, int msz) 91 { 92 static const uint8_t dtype[4] = { 0, 5, 10, 15 }; 93 return dtype[msz]; 94 } 95 96 /* 97 * Include the generated decoder. 98 */ 99 100 #include "decode-sve.c.inc" 101 102 /* 103 * Implement all of the translator functions referenced by the decoder. 104 */ 105 106 /* Invoke an out-of-line helper on 2 Zregs. */ 107 static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn, 108 int rd, int rn, int data) 109 { 110 if (fn == NULL) { 111 return false; 112 } 113 if (sve_access_check(s)) { 114 unsigned vsz = vec_full_reg_size(s); 115 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 116 vec_full_reg_offset(s, rn), 117 vsz, vsz, data, fn); 118 } 119 return true; 120 } 121 122 static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 123 int rd, int rn, int data, 124 ARMFPStatusFlavour flavour) 125 { 126 if (fn == NULL) { 127 return false; 128 } 129 if (sve_access_check(s)) { 130 unsigned vsz = vec_full_reg_size(s); 131 TCGv_ptr status = fpstatus_ptr(flavour); 132 133 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 134 vec_full_reg_offset(s, rn), 135 status, vsz, vsz, data, fn); 136 } 137 return true; 138 } 139 140 static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 141 arg_rr_esz *a, int data) 142 { 143 return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data, 144 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 145 } 146 147 /* Invoke an out-of-line helper on 3 Zregs. */ 148 static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 149 int rd, int rn, int rm, int data) 150 { 151 if (fn == NULL) { 152 return false; 153 } 154 if (sve_access_check(s)) { 155 unsigned vsz = vec_full_reg_size(s); 156 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 157 vec_full_reg_offset(s, rn), 158 vec_full_reg_offset(s, rm), 159 vsz, vsz, data, fn); 160 } 161 return true; 162 } 163 164 static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 165 arg_rrr_esz *a, int data) 166 { 167 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data); 168 } 169 170 /* Invoke an out-of-line helper on 3 Zregs, plus float_status. */ 171 static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 172 int rd, int rn, int rm, 173 int data, ARMFPStatusFlavour flavour) 174 { 175 if (fn == NULL) { 176 return false; 177 } 178 if (sve_access_check(s)) { 179 unsigned vsz = vec_full_reg_size(s); 180 TCGv_ptr status = fpstatus_ptr(flavour); 181 182 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 183 vec_full_reg_offset(s, rn), 184 vec_full_reg_offset(s, rm), 185 status, vsz, vsz, data, fn); 186 } 187 return true; 188 } 189 190 static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 191 arg_rrr_esz *a, int data) 192 { 193 return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, 194 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 195 } 196 197 /* Invoke an out-of-line helper on 4 Zregs. */ 198 static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 199 int rd, int rn, int rm, int ra, int data) 200 { 201 if (fn == NULL) { 202 return false; 203 } 204 if (sve_access_check(s)) { 205 unsigned vsz = vec_full_reg_size(s); 206 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 207 vec_full_reg_offset(s, rn), 208 vec_full_reg_offset(s, rm), 209 vec_full_reg_offset(s, ra), 210 vsz, vsz, data, fn); 211 } 212 return true; 213 } 214 215 static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 216 arg_rrrr_esz *a, int data) 217 { 218 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data); 219 } 220 221 static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn, 222 arg_rrxr_esz *a) 223 { 224 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index); 225 } 226 227 /* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */ 228 static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 229 int rd, int rn, int rm, int ra, 230 int data, TCGv_ptr ptr) 231 { 232 if (fn == NULL) { 233 return false; 234 } 235 if (sve_access_check(s)) { 236 unsigned vsz = vec_full_reg_size(s); 237 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 238 vec_full_reg_offset(s, rn), 239 vec_full_reg_offset(s, rm), 240 vec_full_reg_offset(s, ra), 241 ptr, vsz, vsz, data, fn); 242 } 243 return true; 244 } 245 246 static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 247 int rd, int rn, int rm, int ra, 248 int data, ARMFPStatusFlavour flavour) 249 { 250 TCGv_ptr status = fpstatus_ptr(flavour); 251 bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status); 252 return ret; 253 } 254 255 static bool gen_gvec_env_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 256 int rd, int rn, int rm, int ra, 257 int data) 258 { 259 return gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, tcg_env); 260 } 261 262 static bool gen_gvec_env_arg_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 263 arg_rrrr_esz *a, int data) 264 { 265 return gen_gvec_env_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data); 266 } 267 268 static bool gen_gvec_env_arg_zzxz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 269 arg_rrxr_esz *a) 270 { 271 return gen_gvec_env_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index); 272 } 273 274 /* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */ 275 static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn, 276 int rd, int rn, int rm, int ra, int pg, 277 int data, ARMFPStatusFlavour flavour) 278 { 279 if (fn == NULL) { 280 return false; 281 } 282 if (sve_access_check(s)) { 283 unsigned vsz = vec_full_reg_size(s); 284 TCGv_ptr status = fpstatus_ptr(flavour); 285 286 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd), 287 vec_full_reg_offset(s, rn), 288 vec_full_reg_offset(s, rm), 289 vec_full_reg_offset(s, ra), 290 pred_full_reg_offset(s, pg), 291 status, vsz, vsz, data, fn); 292 } 293 return true; 294 } 295 296 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */ 297 static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn, 298 int rd, int rn, int pg, int data) 299 { 300 if (fn == NULL) { 301 return false; 302 } 303 if (sve_access_check(s)) { 304 unsigned vsz = vec_full_reg_size(s); 305 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 306 vec_full_reg_offset(s, rn), 307 pred_full_reg_offset(s, pg), 308 vsz, vsz, data, fn); 309 } 310 return true; 311 } 312 313 static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn, 314 arg_rpr_esz *a, int data) 315 { 316 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data); 317 } 318 319 static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn, 320 arg_rpri_esz *a) 321 { 322 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm); 323 } 324 325 static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn, 326 int rd, int rn, int pg, int data, 327 ARMFPStatusFlavour flavour) 328 { 329 if (fn == NULL) { 330 return false; 331 } 332 if (sve_access_check(s)) { 333 unsigned vsz = vec_full_reg_size(s); 334 TCGv_ptr status = fpstatus_ptr(flavour); 335 336 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 337 vec_full_reg_offset(s, rn), 338 pred_full_reg_offset(s, pg), 339 status, vsz, vsz, data, fn); 340 } 341 return true; 342 } 343 344 static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 345 arg_rpr_esz *a, int data, 346 ARMFPStatusFlavour flavour) 347 { 348 return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour); 349 } 350 351 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 352 static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn, 353 int rd, int rn, int rm, int pg, int data) 354 { 355 if (fn == NULL) { 356 return false; 357 } 358 if (sve_access_check(s)) { 359 unsigned vsz = vec_full_reg_size(s); 360 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 361 vec_full_reg_offset(s, rn), 362 vec_full_reg_offset(s, rm), 363 pred_full_reg_offset(s, pg), 364 vsz, vsz, data, fn); 365 } 366 return true; 367 } 368 369 static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn, 370 arg_rprr_esz *a, int data) 371 { 372 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data); 373 } 374 375 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 376 static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn, 377 int rd, int rn, int rm, int pg, int data, 378 ARMFPStatusFlavour flavour) 379 { 380 if (fn == NULL) { 381 return false; 382 } 383 if (sve_access_check(s)) { 384 unsigned vsz = vec_full_reg_size(s); 385 TCGv_ptr status = fpstatus_ptr(flavour); 386 387 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 388 vec_full_reg_offset(s, rn), 389 vec_full_reg_offset(s, rm), 390 pred_full_reg_offset(s, pg), 391 status, vsz, vsz, data, fn); 392 } 393 return true; 394 } 395 396 static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 397 arg_rprr_esz *a) 398 { 399 return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0, 400 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 401 } 402 403 /* Invoke a vector expander on two Zregs and an immediate. */ 404 static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 405 int esz, int rd, int rn, uint64_t imm) 406 { 407 if (gvec_fn == NULL) { 408 return false; 409 } 410 if (sve_access_check(s)) { 411 unsigned vsz = vec_full_reg_size(s); 412 gvec_fn(esz, vec_full_reg_offset(s, rd), 413 vec_full_reg_offset(s, rn), imm, vsz, vsz); 414 } 415 return true; 416 } 417 418 static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 419 arg_rri_esz *a) 420 { 421 if (a->esz < 0) { 422 /* Invalid tsz encoding -- see tszimm_esz. */ 423 return false; 424 } 425 return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm); 426 } 427 428 /* Invoke a vector expander on three Zregs. */ 429 static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn, 430 int esz, int rd, int rn, int rm) 431 { 432 if (gvec_fn == NULL) { 433 return false; 434 } 435 if (sve_access_check(s)) { 436 unsigned vsz = vec_full_reg_size(s); 437 gvec_fn(esz, vec_full_reg_offset(s, rd), 438 vec_full_reg_offset(s, rn), 439 vec_full_reg_offset(s, rm), vsz, vsz); 440 } 441 return true; 442 } 443 444 static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn, 445 arg_rrr_esz *a) 446 { 447 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm); 448 } 449 450 /* Invoke a vector expander on four Zregs. */ 451 static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn, 452 arg_rrrr_esz *a) 453 { 454 if (gvec_fn == NULL) { 455 return false; 456 } 457 if (sve_access_check(s)) { 458 unsigned vsz = vec_full_reg_size(s); 459 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 460 vec_full_reg_offset(s, a->rn), 461 vec_full_reg_offset(s, a->rm), 462 vec_full_reg_offset(s, a->ra), vsz, vsz); 463 } 464 return true; 465 } 466 467 /* Invoke a vector move on two Zregs. */ 468 static bool do_mov_z(DisasContext *s, int rd, int rn) 469 { 470 if (sve_access_check(s)) { 471 unsigned vsz = vec_full_reg_size(s); 472 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd), 473 vec_full_reg_offset(s, rn), vsz, vsz); 474 } 475 return true; 476 } 477 478 /* Initialize a Zreg with replications of a 64-bit immediate. */ 479 static void do_dupi_z(DisasContext *s, int rd, uint64_t word) 480 { 481 unsigned vsz = vec_full_reg_size(s); 482 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word); 483 } 484 485 /* Invoke a vector expander on three Pregs. */ 486 static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn, 487 int rd, int rn, int rm) 488 { 489 if (sve_access_check(s)) { 490 unsigned psz = pred_gvec_reg_size(s); 491 gvec_fn(MO_64, pred_full_reg_offset(s, rd), 492 pred_full_reg_offset(s, rn), 493 pred_full_reg_offset(s, rm), psz, psz); 494 } 495 return true; 496 } 497 498 /* Invoke a vector move on two Pregs. */ 499 static bool do_mov_p(DisasContext *s, int rd, int rn) 500 { 501 if (sve_access_check(s)) { 502 unsigned psz = pred_gvec_reg_size(s); 503 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd), 504 pred_full_reg_offset(s, rn), psz, psz); 505 } 506 return true; 507 } 508 509 /* Set the cpu flags as per a return from an SVE helper. */ 510 static void do_pred_flags(TCGv_i32 t) 511 { 512 tcg_gen_mov_i32(cpu_NF, t); 513 tcg_gen_andi_i32(cpu_ZF, t, 2); 514 tcg_gen_andi_i32(cpu_CF, t, 1); 515 tcg_gen_movi_i32(cpu_VF, 0); 516 } 517 518 /* Subroutines computing the ARM PredTest psuedofunction. */ 519 static void do_predtest1(TCGv_i64 d, TCGv_i64 g) 520 { 521 TCGv_i32 t = tcg_temp_new_i32(); 522 523 gen_helper_sve_predtest1(t, d, g); 524 do_pred_flags(t); 525 } 526 527 static void do_predtest(DisasContext *s, int dofs, int gofs, int words) 528 { 529 TCGv_ptr dptr = tcg_temp_new_ptr(); 530 TCGv_ptr gptr = tcg_temp_new_ptr(); 531 TCGv_i32 t = tcg_temp_new_i32(); 532 533 tcg_gen_addi_ptr(dptr, tcg_env, dofs); 534 tcg_gen_addi_ptr(gptr, tcg_env, gofs); 535 536 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words)); 537 538 do_pred_flags(t); 539 } 540 541 /* For each element size, the bits within a predicate word that are active. */ 542 const uint64_t pred_esz_masks[5] = { 543 0xffffffffffffffffull, 0x5555555555555555ull, 544 0x1111111111111111ull, 0x0101010101010101ull, 545 0x0001000100010001ull, 546 }; 547 548 static bool trans_INVALID(DisasContext *s, arg_INVALID *a) 549 { 550 unallocated_encoding(s); 551 return true; 552 } 553 554 /* 555 *** SVE Logical - Unpredicated Group 556 */ 557 558 TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a) 559 TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a) 560 TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a) 561 TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a) 562 563 static bool trans_XAR(DisasContext *s, arg_rrri_esz *a) 564 { 565 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { 566 return false; 567 } 568 if (sve_access_check(s)) { 569 unsigned vsz = vec_full_reg_size(s); 570 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd), 571 vec_full_reg_offset(s, a->rn), 572 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz); 573 } 574 return true; 575 } 576 577 TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_eor3, a) 578 TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_bcax, a) 579 580 static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 581 uint32_t a, uint32_t oprsz, uint32_t maxsz) 582 { 583 /* BSL differs from the generic bitsel in argument ordering. */ 584 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz); 585 } 586 587 TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a) 588 589 static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 590 { 591 tcg_gen_andc_i64(n, k, n); 592 tcg_gen_andc_i64(m, m, k); 593 tcg_gen_or_i64(d, n, m); 594 } 595 596 static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 597 TCGv_vec m, TCGv_vec k) 598 { 599 if (TCG_TARGET_HAS_bitsel_vec) { 600 tcg_gen_not_vec(vece, n, n); 601 tcg_gen_bitsel_vec(vece, d, k, n, m); 602 } else { 603 tcg_gen_andc_vec(vece, n, k, n); 604 tcg_gen_andc_vec(vece, m, m, k); 605 tcg_gen_or_vec(vece, d, n, m); 606 } 607 } 608 609 static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 610 uint32_t a, uint32_t oprsz, uint32_t maxsz) 611 { 612 static const GVecGen4 op = { 613 .fni8 = gen_bsl1n_i64, 614 .fniv = gen_bsl1n_vec, 615 .fno = gen_helper_sve2_bsl1n, 616 .vece = MO_64, 617 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 618 }; 619 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 620 } 621 622 TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a) 623 624 static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 625 { 626 /* 627 * Z[dn] = (n & k) | (~m & ~k) 628 * = | ~(m | k) 629 */ 630 tcg_gen_and_i64(n, n, k); 631 if (TCG_TARGET_HAS_orc_i64) { 632 tcg_gen_or_i64(m, m, k); 633 tcg_gen_orc_i64(d, n, m); 634 } else { 635 tcg_gen_nor_i64(m, m, k); 636 tcg_gen_or_i64(d, n, m); 637 } 638 } 639 640 static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 641 TCGv_vec m, TCGv_vec k) 642 { 643 if (TCG_TARGET_HAS_bitsel_vec) { 644 tcg_gen_not_vec(vece, m, m); 645 tcg_gen_bitsel_vec(vece, d, k, n, m); 646 } else { 647 tcg_gen_and_vec(vece, n, n, k); 648 tcg_gen_or_vec(vece, m, m, k); 649 tcg_gen_orc_vec(vece, d, n, m); 650 } 651 } 652 653 static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 654 uint32_t a, uint32_t oprsz, uint32_t maxsz) 655 { 656 static const GVecGen4 op = { 657 .fni8 = gen_bsl2n_i64, 658 .fniv = gen_bsl2n_vec, 659 .fno = gen_helper_sve2_bsl2n, 660 .vece = MO_64, 661 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 662 }; 663 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 664 } 665 666 TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a) 667 668 static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 669 { 670 tcg_gen_and_i64(n, n, k); 671 tcg_gen_andc_i64(m, m, k); 672 tcg_gen_nor_i64(d, n, m); 673 } 674 675 static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 676 TCGv_vec m, TCGv_vec k) 677 { 678 tcg_gen_bitsel_vec(vece, d, k, n, m); 679 tcg_gen_not_vec(vece, d, d); 680 } 681 682 static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 683 uint32_t a, uint32_t oprsz, uint32_t maxsz) 684 { 685 static const GVecGen4 op = { 686 .fni8 = gen_nbsl_i64, 687 .fniv = gen_nbsl_vec, 688 .fno = gen_helper_sve2_nbsl, 689 .vece = MO_64, 690 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 691 }; 692 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 693 } 694 695 TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a) 696 697 /* 698 *** SVE Integer Arithmetic - Unpredicated Group 699 */ 700 701 TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a) 702 TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a) 703 TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a) 704 TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a) 705 TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a) 706 TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a) 707 708 /* 709 *** SVE Integer Arithmetic - Binary Predicated Group 710 */ 711 712 /* Select active elememnts from Zn and inactive elements from Zm, 713 * storing the result in Zd. 714 */ 715 static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz) 716 { 717 static gen_helper_gvec_4 * const fns[4] = { 718 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h, 719 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d 720 }; 721 return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0); 722 } 723 724 #define DO_ZPZZ(NAME, FEAT, name) \ 725 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \ 726 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \ 727 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \ 728 }; \ 729 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \ 730 name##_zpzz_fns[a->esz], a, 0) 731 732 DO_ZPZZ(AND_zpzz, aa64_sve, sve_and) 733 DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor) 734 DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr) 735 DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic) 736 737 DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add) 738 DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub) 739 740 DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax) 741 DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax) 742 DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin) 743 DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin) 744 DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd) 745 DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd) 746 747 DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul) 748 DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh) 749 DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh) 750 751 DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr) 752 DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr) 753 DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl) 754 755 static gen_helper_gvec_4 * const sdiv_fns[4] = { 756 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d 757 }; 758 TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0) 759 760 static gen_helper_gvec_4 * const udiv_fns[4] = { 761 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d 762 }; 763 TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0) 764 765 TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz) 766 767 /* 768 *** SVE Integer Arithmetic - Unary Predicated Group 769 */ 770 771 #define DO_ZPZ(NAME, FEAT, name) \ 772 static gen_helper_gvec_3 * const name##_fns[4] = { \ 773 gen_helper_##name##_b, gen_helper_##name##_h, \ 774 gen_helper_##name##_s, gen_helper_##name##_d, \ 775 }; \ 776 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0) 777 778 DO_ZPZ(CLS, aa64_sve, sve_cls) 779 DO_ZPZ(CLZ, aa64_sve, sve_clz) 780 DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz) 781 DO_ZPZ(CNOT, aa64_sve, sve_cnot) 782 DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz) 783 DO_ZPZ(ABS, aa64_sve, sve_abs) 784 DO_ZPZ(NEG, aa64_sve, sve_neg) 785 DO_ZPZ(RBIT, aa64_sve, sve_rbit) 786 787 static gen_helper_gvec_3 * const fabs_fns[4] = { 788 NULL, gen_helper_sve_fabs_h, 789 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d, 790 }; 791 TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0) 792 793 static gen_helper_gvec_3 * const fneg_fns[4] = { 794 NULL, gen_helper_sve_fneg_h, 795 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d, 796 }; 797 TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0) 798 799 static gen_helper_gvec_3 * const sxtb_fns[4] = { 800 NULL, gen_helper_sve_sxtb_h, 801 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d, 802 }; 803 TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0) 804 805 static gen_helper_gvec_3 * const uxtb_fns[4] = { 806 NULL, gen_helper_sve_uxtb_h, 807 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d, 808 }; 809 TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0) 810 811 static gen_helper_gvec_3 * const sxth_fns[4] = { 812 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d 813 }; 814 TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0) 815 816 static gen_helper_gvec_3 * const uxth_fns[4] = { 817 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d 818 }; 819 TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0) 820 821 TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz, 822 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0) 823 TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz, 824 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0) 825 826 /* 827 *** SVE Integer Reduction Group 828 */ 829 830 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32); 831 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a, 832 gen_helper_gvec_reduc *fn) 833 { 834 unsigned vsz = vec_full_reg_size(s); 835 TCGv_ptr t_zn, t_pg; 836 TCGv_i32 desc; 837 TCGv_i64 temp; 838 839 if (fn == NULL) { 840 return false; 841 } 842 if (!sve_access_check(s)) { 843 return true; 844 } 845 846 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 847 temp = tcg_temp_new_i64(); 848 t_zn = tcg_temp_new_ptr(); 849 t_pg = tcg_temp_new_ptr(); 850 851 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); 852 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); 853 fn(temp, t_zn, t_pg, desc); 854 855 write_fp_dreg(s, a->rd, temp); 856 return true; 857 } 858 859 #define DO_VPZ(NAME, name) \ 860 static gen_helper_gvec_reduc * const name##_fns[4] = { \ 861 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \ 862 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 863 }; \ 864 TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz]) 865 866 DO_VPZ(ORV, orv) 867 DO_VPZ(ANDV, andv) 868 DO_VPZ(EORV, eorv) 869 870 DO_VPZ(UADDV, uaddv) 871 DO_VPZ(SMAXV, smaxv) 872 DO_VPZ(UMAXV, umaxv) 873 DO_VPZ(SMINV, sminv) 874 DO_VPZ(UMINV, uminv) 875 876 static gen_helper_gvec_reduc * const saddv_fns[4] = { 877 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h, 878 gen_helper_sve_saddv_s, NULL 879 }; 880 TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz]) 881 882 #undef DO_VPZ 883 884 /* 885 *** SVE Shift by Immediate - Predicated Group 886 */ 887 888 /* 889 * Copy Zn into Zd, storing zeros into inactive elements. 890 * If invert, store zeros into the active elements. 891 */ 892 static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg, 893 int esz, bool invert) 894 { 895 static gen_helper_gvec_3 * const fns[4] = { 896 gen_helper_sve_movz_b, gen_helper_sve_movz_h, 897 gen_helper_sve_movz_s, gen_helper_sve_movz_d, 898 }; 899 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert); 900 } 901 902 static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr, 903 gen_helper_gvec_3 * const fns[4]) 904 { 905 int max; 906 907 if (a->esz < 0) { 908 /* Invalid tsz encoding -- see tszimm_esz. */ 909 return false; 910 } 911 912 /* 913 * Shift by element size is architecturally valid. 914 * For arithmetic right-shift, it's the same as by one less. 915 * For logical shifts and ASRD, it is a zeroing operation. 916 */ 917 max = 8 << a->esz; 918 if (a->imm >= max) { 919 if (asr) { 920 a->imm = max - 1; 921 } else { 922 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); 923 } 924 } 925 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a); 926 } 927 928 static gen_helper_gvec_3 * const asr_zpzi_fns[4] = { 929 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h, 930 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d, 931 }; 932 TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns) 933 934 static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = { 935 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h, 936 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d, 937 }; 938 TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns) 939 940 static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = { 941 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h, 942 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d, 943 }; 944 TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns) 945 946 static gen_helper_gvec_3 * const asrd_fns[4] = { 947 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h, 948 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d, 949 }; 950 TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns) 951 952 static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = { 953 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h, 954 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d, 955 }; 956 TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 957 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a) 958 959 static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = { 960 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h, 961 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d, 962 }; 963 TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 964 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a) 965 966 static gen_helper_gvec_3 * const srshr_fns[4] = { 967 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h, 968 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d, 969 }; 970 TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 971 a->esz < 0 ? NULL : srshr_fns[a->esz], a) 972 973 static gen_helper_gvec_3 * const urshr_fns[4] = { 974 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h, 975 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d, 976 }; 977 TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 978 a->esz < 0 ? NULL : urshr_fns[a->esz], a) 979 980 static gen_helper_gvec_3 * const sqshlu_fns[4] = { 981 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h, 982 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d, 983 }; 984 TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi, 985 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a) 986 987 /* 988 *** SVE Bitwise Shift - Predicated Group 989 */ 990 991 #define DO_ZPZW(NAME, name) \ 992 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \ 993 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \ 994 gen_helper_sve_##name##_zpzw_s, NULL \ 995 }; \ 996 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \ 997 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0) 998 999 DO_ZPZW(ASR, asr) 1000 DO_ZPZW(LSR, lsr) 1001 DO_ZPZW(LSL, lsl) 1002 1003 #undef DO_ZPZW 1004 1005 /* 1006 *** SVE Bitwise Shift - Unpredicated Group 1007 */ 1008 1009 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr, 1010 void (*gvec_fn)(unsigned, uint32_t, uint32_t, 1011 int64_t, uint32_t, uint32_t)) 1012 { 1013 if (a->esz < 0) { 1014 /* Invalid tsz encoding -- see tszimm_esz. */ 1015 return false; 1016 } 1017 if (sve_access_check(s)) { 1018 unsigned vsz = vec_full_reg_size(s); 1019 /* Shift by element size is architecturally valid. For 1020 arithmetic right-shift, it's the same as by one less. 1021 Otherwise it is a zeroing operation. */ 1022 if (a->imm >= 8 << a->esz) { 1023 if (asr) { 1024 a->imm = (8 << a->esz) - 1; 1025 } else { 1026 do_dupi_z(s, a->rd, 0); 1027 return true; 1028 } 1029 } 1030 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 1031 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz); 1032 } 1033 return true; 1034 } 1035 1036 TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari) 1037 TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri) 1038 TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli) 1039 1040 #define DO_ZZW(NAME, name) \ 1041 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \ 1042 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \ 1043 gen_helper_sve_##name##_zzw_s, NULL \ 1044 }; \ 1045 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \ 1046 name##_zzw_fns[a->esz], a, 0) 1047 1048 DO_ZZW(ASR_zzw, asr) 1049 DO_ZZW(LSR_zzw, lsr) 1050 DO_ZZW(LSL_zzw, lsl) 1051 1052 #undef DO_ZZW 1053 1054 /* 1055 *** SVE Integer Multiply-Add Group 1056 */ 1057 1058 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a, 1059 gen_helper_gvec_5 *fn) 1060 { 1061 if (sve_access_check(s)) { 1062 unsigned vsz = vec_full_reg_size(s); 1063 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd), 1064 vec_full_reg_offset(s, a->ra), 1065 vec_full_reg_offset(s, a->rn), 1066 vec_full_reg_offset(s, a->rm), 1067 pred_full_reg_offset(s, a->pg), 1068 vsz, vsz, 0, fn); 1069 } 1070 return true; 1071 } 1072 1073 static gen_helper_gvec_5 * const mla_fns[4] = { 1074 gen_helper_sve_mla_b, gen_helper_sve_mla_h, 1075 gen_helper_sve_mla_s, gen_helper_sve_mla_d, 1076 }; 1077 TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz]) 1078 1079 static gen_helper_gvec_5 * const mls_fns[4] = { 1080 gen_helper_sve_mls_b, gen_helper_sve_mls_h, 1081 gen_helper_sve_mls_s, gen_helper_sve_mls_d, 1082 }; 1083 TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz]) 1084 1085 /* 1086 *** SVE Index Generation Group 1087 */ 1088 1089 static bool do_index(DisasContext *s, int esz, int rd, 1090 TCGv_i64 start, TCGv_i64 incr) 1091 { 1092 unsigned vsz; 1093 TCGv_i32 desc; 1094 TCGv_ptr t_zd; 1095 1096 if (!sve_access_check(s)) { 1097 return true; 1098 } 1099 1100 vsz = vec_full_reg_size(s); 1101 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1102 t_zd = tcg_temp_new_ptr(); 1103 1104 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd)); 1105 if (esz == 3) { 1106 gen_helper_sve_index_d(t_zd, start, incr, desc); 1107 } else { 1108 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 1109 static index_fn * const fns[3] = { 1110 gen_helper_sve_index_b, 1111 gen_helper_sve_index_h, 1112 gen_helper_sve_index_s, 1113 }; 1114 TCGv_i32 s32 = tcg_temp_new_i32(); 1115 TCGv_i32 i32 = tcg_temp_new_i32(); 1116 1117 tcg_gen_extrl_i64_i32(s32, start); 1118 tcg_gen_extrl_i64_i32(i32, incr); 1119 fns[esz](t_zd, s32, i32, desc); 1120 } 1121 return true; 1122 } 1123 1124 TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd, 1125 tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2)) 1126 TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd, 1127 tcg_constant_i64(a->imm), cpu_reg(s, a->rm)) 1128 TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd, 1129 cpu_reg(s, a->rn), tcg_constant_i64(a->imm)) 1130 TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd, 1131 cpu_reg(s, a->rn), cpu_reg(s, a->rm)) 1132 1133 /* 1134 *** SVE Stack Allocation Group 1135 */ 1136 1137 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a) 1138 { 1139 if (!dc_isar_feature(aa64_sve, s)) { 1140 return false; 1141 } 1142 if (sve_access_check(s)) { 1143 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1144 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1145 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s)); 1146 } 1147 return true; 1148 } 1149 1150 static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a) 1151 { 1152 if (!dc_isar_feature(aa64_sme, s)) { 1153 return false; 1154 } 1155 if (sme_enabled_check(s)) { 1156 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1157 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1158 tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s)); 1159 } 1160 return true; 1161 } 1162 1163 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a) 1164 { 1165 if (!dc_isar_feature(aa64_sve, s)) { 1166 return false; 1167 } 1168 if (sve_access_check(s)) { 1169 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1170 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1171 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s)); 1172 } 1173 return true; 1174 } 1175 1176 static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a) 1177 { 1178 if (!dc_isar_feature(aa64_sme, s)) { 1179 return false; 1180 } 1181 if (sme_enabled_check(s)) { 1182 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1183 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1184 tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s)); 1185 } 1186 return true; 1187 } 1188 1189 static bool trans_RDVL(DisasContext *s, arg_RDVL *a) 1190 { 1191 if (!dc_isar_feature(aa64_sve, s)) { 1192 return false; 1193 } 1194 if (sve_access_check(s)) { 1195 TCGv_i64 reg = cpu_reg(s, a->rd); 1196 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s)); 1197 } 1198 return true; 1199 } 1200 1201 static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a) 1202 { 1203 if (!dc_isar_feature(aa64_sme, s)) { 1204 return false; 1205 } 1206 if (sme_enabled_check(s)) { 1207 TCGv_i64 reg = cpu_reg(s, a->rd); 1208 tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s)); 1209 } 1210 return true; 1211 } 1212 1213 /* 1214 *** SVE Compute Vector Address Group 1215 */ 1216 1217 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn) 1218 { 1219 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm); 1220 } 1221 1222 TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32) 1223 TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64) 1224 TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32) 1225 TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32) 1226 1227 /* 1228 *** SVE Integer Misc - Unpredicated Group 1229 */ 1230 1231 static gen_helper_gvec_2 * const fexpa_fns[4] = { 1232 NULL, gen_helper_sve_fexpa_h, 1233 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d, 1234 }; 1235 TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz, 1236 fexpa_fns[a->esz], a->rd, a->rn, 0) 1237 1238 static gen_helper_gvec_3 * const ftssel_fns[4] = { 1239 NULL, gen_helper_sve_ftssel_h, 1240 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d, 1241 }; 1242 TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, 1243 ftssel_fns[a->esz], a, 0) 1244 1245 /* 1246 *** SVE Predicate Logical Operations Group 1247 */ 1248 1249 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a, 1250 const GVecGen4 *gvec_op) 1251 { 1252 if (!sve_access_check(s)) { 1253 return true; 1254 } 1255 1256 unsigned psz = pred_gvec_reg_size(s); 1257 int dofs = pred_full_reg_offset(s, a->rd); 1258 int nofs = pred_full_reg_offset(s, a->rn); 1259 int mofs = pred_full_reg_offset(s, a->rm); 1260 int gofs = pred_full_reg_offset(s, a->pg); 1261 1262 if (!a->s) { 1263 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1264 return true; 1265 } 1266 1267 if (psz == 8) { 1268 /* Do the operation and the flags generation in temps. */ 1269 TCGv_i64 pd = tcg_temp_new_i64(); 1270 TCGv_i64 pn = tcg_temp_new_i64(); 1271 TCGv_i64 pm = tcg_temp_new_i64(); 1272 TCGv_i64 pg = tcg_temp_new_i64(); 1273 1274 tcg_gen_ld_i64(pn, tcg_env, nofs); 1275 tcg_gen_ld_i64(pm, tcg_env, mofs); 1276 tcg_gen_ld_i64(pg, tcg_env, gofs); 1277 1278 gvec_op->fni8(pd, pn, pm, pg); 1279 tcg_gen_st_i64(pd, tcg_env, dofs); 1280 1281 do_predtest1(pd, pg); 1282 } else { 1283 /* The operation and flags generation is large. The computation 1284 * of the flags depends on the original contents of the guarding 1285 * predicate. If the destination overwrites the guarding predicate, 1286 * then the easiest way to get this right is to save a copy. 1287 */ 1288 int tofs = gofs; 1289 if (a->rd == a->pg) { 1290 tofs = offsetof(CPUARMState, vfp.preg_tmp); 1291 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz); 1292 } 1293 1294 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1295 do_predtest(s, dofs, tofs, psz / 8); 1296 } 1297 return true; 1298 } 1299 1300 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1301 { 1302 tcg_gen_and_i64(pd, pn, pm); 1303 tcg_gen_and_i64(pd, pd, pg); 1304 } 1305 1306 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1307 TCGv_vec pm, TCGv_vec pg) 1308 { 1309 tcg_gen_and_vec(vece, pd, pn, pm); 1310 tcg_gen_and_vec(vece, pd, pd, pg); 1311 } 1312 1313 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a) 1314 { 1315 static const GVecGen4 op = { 1316 .fni8 = gen_and_pg_i64, 1317 .fniv = gen_and_pg_vec, 1318 .fno = gen_helper_sve_and_pppp, 1319 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1320 }; 1321 1322 if (!dc_isar_feature(aa64_sve, s)) { 1323 return false; 1324 } 1325 if (!a->s) { 1326 if (a->rn == a->rm) { 1327 if (a->pg == a->rn) { 1328 return do_mov_p(s, a->rd, a->rn); 1329 } 1330 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg); 1331 } else if (a->pg == a->rn || a->pg == a->rm) { 1332 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm); 1333 } 1334 } 1335 return do_pppp_flags(s, a, &op); 1336 } 1337 1338 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1339 { 1340 tcg_gen_andc_i64(pd, pn, pm); 1341 tcg_gen_and_i64(pd, pd, pg); 1342 } 1343 1344 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1345 TCGv_vec pm, TCGv_vec pg) 1346 { 1347 tcg_gen_andc_vec(vece, pd, pn, pm); 1348 tcg_gen_and_vec(vece, pd, pd, pg); 1349 } 1350 1351 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a) 1352 { 1353 static const GVecGen4 op = { 1354 .fni8 = gen_bic_pg_i64, 1355 .fniv = gen_bic_pg_vec, 1356 .fno = gen_helper_sve_bic_pppp, 1357 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1358 }; 1359 1360 if (!dc_isar_feature(aa64_sve, s)) { 1361 return false; 1362 } 1363 if (!a->s && a->pg == a->rn) { 1364 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm); 1365 } 1366 return do_pppp_flags(s, a, &op); 1367 } 1368 1369 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1370 { 1371 tcg_gen_xor_i64(pd, pn, pm); 1372 tcg_gen_and_i64(pd, pd, pg); 1373 } 1374 1375 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1376 TCGv_vec pm, TCGv_vec pg) 1377 { 1378 tcg_gen_xor_vec(vece, pd, pn, pm); 1379 tcg_gen_and_vec(vece, pd, pd, pg); 1380 } 1381 1382 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a) 1383 { 1384 static const GVecGen4 op = { 1385 .fni8 = gen_eor_pg_i64, 1386 .fniv = gen_eor_pg_vec, 1387 .fno = gen_helper_sve_eor_pppp, 1388 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1389 }; 1390 1391 if (!dc_isar_feature(aa64_sve, s)) { 1392 return false; 1393 } 1394 /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */ 1395 if (!a->s && a->pg == a->rm) { 1396 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn); 1397 } 1398 return do_pppp_flags(s, a, &op); 1399 } 1400 1401 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a) 1402 { 1403 if (a->s || !dc_isar_feature(aa64_sve, s)) { 1404 return false; 1405 } 1406 if (sve_access_check(s)) { 1407 unsigned psz = pred_gvec_reg_size(s); 1408 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd), 1409 pred_full_reg_offset(s, a->pg), 1410 pred_full_reg_offset(s, a->rn), 1411 pred_full_reg_offset(s, a->rm), psz, psz); 1412 } 1413 return true; 1414 } 1415 1416 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1417 { 1418 tcg_gen_or_i64(pd, pn, pm); 1419 tcg_gen_and_i64(pd, pd, pg); 1420 } 1421 1422 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1423 TCGv_vec pm, TCGv_vec pg) 1424 { 1425 tcg_gen_or_vec(vece, pd, pn, pm); 1426 tcg_gen_and_vec(vece, pd, pd, pg); 1427 } 1428 1429 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a) 1430 { 1431 static const GVecGen4 op = { 1432 .fni8 = gen_orr_pg_i64, 1433 .fniv = gen_orr_pg_vec, 1434 .fno = gen_helper_sve_orr_pppp, 1435 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1436 }; 1437 1438 if (!dc_isar_feature(aa64_sve, s)) { 1439 return false; 1440 } 1441 if (!a->s && a->pg == a->rn && a->rn == a->rm) { 1442 return do_mov_p(s, a->rd, a->rn); 1443 } 1444 return do_pppp_flags(s, a, &op); 1445 } 1446 1447 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1448 { 1449 tcg_gen_orc_i64(pd, pn, pm); 1450 tcg_gen_and_i64(pd, pd, pg); 1451 } 1452 1453 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1454 TCGv_vec pm, TCGv_vec pg) 1455 { 1456 tcg_gen_orc_vec(vece, pd, pn, pm); 1457 tcg_gen_and_vec(vece, pd, pd, pg); 1458 } 1459 1460 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a) 1461 { 1462 static const GVecGen4 op = { 1463 .fni8 = gen_orn_pg_i64, 1464 .fniv = gen_orn_pg_vec, 1465 .fno = gen_helper_sve_orn_pppp, 1466 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1467 }; 1468 1469 if (!dc_isar_feature(aa64_sve, s)) { 1470 return false; 1471 } 1472 return do_pppp_flags(s, a, &op); 1473 } 1474 1475 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1476 { 1477 tcg_gen_or_i64(pd, pn, pm); 1478 tcg_gen_andc_i64(pd, pg, pd); 1479 } 1480 1481 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1482 TCGv_vec pm, TCGv_vec pg) 1483 { 1484 tcg_gen_or_vec(vece, pd, pn, pm); 1485 tcg_gen_andc_vec(vece, pd, pg, pd); 1486 } 1487 1488 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a) 1489 { 1490 static const GVecGen4 op = { 1491 .fni8 = gen_nor_pg_i64, 1492 .fniv = gen_nor_pg_vec, 1493 .fno = gen_helper_sve_nor_pppp, 1494 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1495 }; 1496 1497 if (!dc_isar_feature(aa64_sve, s)) { 1498 return false; 1499 } 1500 return do_pppp_flags(s, a, &op); 1501 } 1502 1503 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1504 { 1505 tcg_gen_and_i64(pd, pn, pm); 1506 tcg_gen_andc_i64(pd, pg, pd); 1507 } 1508 1509 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1510 TCGv_vec pm, TCGv_vec pg) 1511 { 1512 tcg_gen_and_vec(vece, pd, pn, pm); 1513 tcg_gen_andc_vec(vece, pd, pg, pd); 1514 } 1515 1516 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a) 1517 { 1518 static const GVecGen4 op = { 1519 .fni8 = gen_nand_pg_i64, 1520 .fniv = gen_nand_pg_vec, 1521 .fno = gen_helper_sve_nand_pppp, 1522 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1523 }; 1524 1525 if (!dc_isar_feature(aa64_sve, s)) { 1526 return false; 1527 } 1528 return do_pppp_flags(s, a, &op); 1529 } 1530 1531 /* 1532 *** SVE Predicate Misc Group 1533 */ 1534 1535 static bool trans_PTEST(DisasContext *s, arg_PTEST *a) 1536 { 1537 if (!dc_isar_feature(aa64_sve, s)) { 1538 return false; 1539 } 1540 if (sve_access_check(s)) { 1541 int nofs = pred_full_reg_offset(s, a->rn); 1542 int gofs = pred_full_reg_offset(s, a->pg); 1543 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8); 1544 1545 if (words == 1) { 1546 TCGv_i64 pn = tcg_temp_new_i64(); 1547 TCGv_i64 pg = tcg_temp_new_i64(); 1548 1549 tcg_gen_ld_i64(pn, tcg_env, nofs); 1550 tcg_gen_ld_i64(pg, tcg_env, gofs); 1551 do_predtest1(pn, pg); 1552 } else { 1553 do_predtest(s, nofs, gofs, words); 1554 } 1555 } 1556 return true; 1557 } 1558 1559 /* See the ARM pseudocode DecodePredCount. */ 1560 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz) 1561 { 1562 unsigned elements = fullsz >> esz; 1563 unsigned bound; 1564 1565 switch (pattern) { 1566 case 0x0: /* POW2 */ 1567 return pow2floor(elements); 1568 case 0x1: /* VL1 */ 1569 case 0x2: /* VL2 */ 1570 case 0x3: /* VL3 */ 1571 case 0x4: /* VL4 */ 1572 case 0x5: /* VL5 */ 1573 case 0x6: /* VL6 */ 1574 case 0x7: /* VL7 */ 1575 case 0x8: /* VL8 */ 1576 bound = pattern; 1577 break; 1578 case 0x9: /* VL16 */ 1579 case 0xa: /* VL32 */ 1580 case 0xb: /* VL64 */ 1581 case 0xc: /* VL128 */ 1582 case 0xd: /* VL256 */ 1583 bound = 16 << (pattern - 9); 1584 break; 1585 case 0x1d: /* MUL4 */ 1586 return elements - elements % 4; 1587 case 0x1e: /* MUL3 */ 1588 return elements - elements % 3; 1589 case 0x1f: /* ALL */ 1590 return elements; 1591 default: /* #uimm5 */ 1592 return 0; 1593 } 1594 return elements >= bound ? bound : 0; 1595 } 1596 1597 /* This handles all of the predicate initialization instructions, 1598 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32 1599 * so that decode_pred_count returns 0. For SETFFR, we will have 1600 * set RD == 16 == FFR. 1601 */ 1602 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag) 1603 { 1604 if (!sve_access_check(s)) { 1605 return true; 1606 } 1607 1608 unsigned fullsz = vec_full_reg_size(s); 1609 unsigned ofs = pred_full_reg_offset(s, rd); 1610 unsigned numelem, setsz, i; 1611 uint64_t word, lastword; 1612 TCGv_i64 t; 1613 1614 numelem = decode_pred_count(fullsz, pat, esz); 1615 1616 /* Determine what we must store into each bit, and how many. */ 1617 if (numelem == 0) { 1618 lastword = word = 0; 1619 setsz = fullsz; 1620 } else { 1621 setsz = numelem << esz; 1622 lastword = word = pred_esz_masks[esz]; 1623 if (setsz % 64) { 1624 lastword &= MAKE_64BIT_MASK(0, setsz % 64); 1625 } 1626 } 1627 1628 t = tcg_temp_new_i64(); 1629 if (fullsz <= 64) { 1630 tcg_gen_movi_i64(t, lastword); 1631 tcg_gen_st_i64(t, tcg_env, ofs); 1632 goto done; 1633 } 1634 1635 if (word == lastword) { 1636 unsigned maxsz = size_for_gvec(fullsz / 8); 1637 unsigned oprsz = size_for_gvec(setsz / 8); 1638 1639 if (oprsz * 8 == setsz) { 1640 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word); 1641 goto done; 1642 } 1643 } 1644 1645 setsz /= 8; 1646 fullsz /= 8; 1647 1648 tcg_gen_movi_i64(t, word); 1649 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) { 1650 tcg_gen_st_i64(t, tcg_env, ofs + i); 1651 } 1652 if (lastword != word) { 1653 tcg_gen_movi_i64(t, lastword); 1654 tcg_gen_st_i64(t, tcg_env, ofs + i); 1655 i += 8; 1656 } 1657 if (i < fullsz) { 1658 tcg_gen_movi_i64(t, 0); 1659 for (; i < fullsz; i += 8) { 1660 tcg_gen_st_i64(t, tcg_env, ofs + i); 1661 } 1662 } 1663 1664 done: 1665 /* PTRUES */ 1666 if (setflag) { 1667 tcg_gen_movi_i32(cpu_NF, -(word != 0)); 1668 tcg_gen_movi_i32(cpu_CF, word == 0); 1669 tcg_gen_movi_i32(cpu_VF, 0); 1670 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1671 } 1672 return true; 1673 } 1674 1675 TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s) 1676 1677 /* Note pat == 31 is #all, to set all elements. */ 1678 TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve, 1679 do_predset, 0, FFR_PRED_NUM, 31, false) 1680 1681 /* Note pat == 32 is #unimp, to set no elements. */ 1682 TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false) 1683 1684 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a) 1685 { 1686 /* The path through do_pppp_flags is complicated enough to want to avoid 1687 * duplication. Frob the arguments into the form of a predicated AND. 1688 */ 1689 arg_rprr_s alt_a = { 1690 .rd = a->rd, .pg = a->pg, .s = a->s, 1691 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM, 1692 }; 1693 1694 s->is_nonstreaming = true; 1695 return trans_AND_pppp(s, &alt_a); 1696 } 1697 1698 TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM) 1699 TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn) 1700 1701 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a, 1702 void (*gen_fn)(TCGv_i32, TCGv_ptr, 1703 TCGv_ptr, TCGv_i32)) 1704 { 1705 if (!sve_access_check(s)) { 1706 return true; 1707 } 1708 1709 TCGv_ptr t_pd = tcg_temp_new_ptr(); 1710 TCGv_ptr t_pg = tcg_temp_new_ptr(); 1711 TCGv_i32 t; 1712 unsigned desc = 0; 1713 1714 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 1715 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 1716 1717 tcg_gen_addi_ptr(t_pd, tcg_env, pred_full_reg_offset(s, a->rd)); 1718 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->rn)); 1719 t = tcg_temp_new_i32(); 1720 1721 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc)); 1722 1723 do_pred_flags(t); 1724 return true; 1725 } 1726 1727 TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst) 1728 TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext) 1729 1730 /* 1731 *** SVE Element Count Group 1732 */ 1733 1734 /* Perform an inline saturating addition of a 32-bit value within 1735 * a 64-bit register. The second operand is known to be positive, 1736 * which halves the comparisons we must perform to bound the result. 1737 */ 1738 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1739 { 1740 int64_t ibound; 1741 1742 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 1743 if (u) { 1744 tcg_gen_ext32u_i64(reg, reg); 1745 } else { 1746 tcg_gen_ext32s_i64(reg, reg); 1747 } 1748 if (d) { 1749 tcg_gen_sub_i64(reg, reg, val); 1750 ibound = (u ? 0 : INT32_MIN); 1751 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound)); 1752 } else { 1753 tcg_gen_add_i64(reg, reg, val); 1754 ibound = (u ? UINT32_MAX : INT32_MAX); 1755 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound)); 1756 } 1757 } 1758 1759 /* Similarly with 64-bit values. */ 1760 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1761 { 1762 TCGv_i64 t0 = tcg_temp_new_i64(); 1763 TCGv_i64 t2; 1764 1765 if (u) { 1766 if (d) { 1767 tcg_gen_sub_i64(t0, reg, val); 1768 t2 = tcg_constant_i64(0); 1769 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0); 1770 } else { 1771 tcg_gen_add_i64(t0, reg, val); 1772 t2 = tcg_constant_i64(-1); 1773 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0); 1774 } 1775 } else { 1776 TCGv_i64 t1 = tcg_temp_new_i64(); 1777 if (d) { 1778 /* Detect signed overflow for subtraction. */ 1779 tcg_gen_xor_i64(t0, reg, val); 1780 tcg_gen_sub_i64(t1, reg, val); 1781 tcg_gen_xor_i64(reg, reg, t1); 1782 tcg_gen_and_i64(t0, t0, reg); 1783 1784 /* Bound the result. */ 1785 tcg_gen_movi_i64(reg, INT64_MIN); 1786 t2 = tcg_constant_i64(0); 1787 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1); 1788 } else { 1789 /* Detect signed overflow for addition. */ 1790 tcg_gen_xor_i64(t0, reg, val); 1791 tcg_gen_add_i64(reg, reg, val); 1792 tcg_gen_xor_i64(t1, reg, val); 1793 tcg_gen_andc_i64(t0, t1, t0); 1794 1795 /* Bound the result. */ 1796 tcg_gen_movi_i64(t1, INT64_MAX); 1797 t2 = tcg_constant_i64(0); 1798 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg); 1799 } 1800 } 1801 } 1802 1803 /* Similarly with a vector and a scalar operand. */ 1804 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn, 1805 TCGv_i64 val, bool u, bool d) 1806 { 1807 unsigned vsz = vec_full_reg_size(s); 1808 TCGv_ptr dptr, nptr; 1809 TCGv_i32 t32, desc; 1810 TCGv_i64 t64; 1811 1812 dptr = tcg_temp_new_ptr(); 1813 nptr = tcg_temp_new_ptr(); 1814 tcg_gen_addi_ptr(dptr, tcg_env, vec_full_reg_offset(s, rd)); 1815 tcg_gen_addi_ptr(nptr, tcg_env, vec_full_reg_offset(s, rn)); 1816 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1817 1818 switch (esz) { 1819 case MO_8: 1820 t32 = tcg_temp_new_i32(); 1821 tcg_gen_extrl_i64_i32(t32, val); 1822 if (d) { 1823 tcg_gen_neg_i32(t32, t32); 1824 } 1825 if (u) { 1826 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc); 1827 } else { 1828 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc); 1829 } 1830 break; 1831 1832 case MO_16: 1833 t32 = tcg_temp_new_i32(); 1834 tcg_gen_extrl_i64_i32(t32, val); 1835 if (d) { 1836 tcg_gen_neg_i32(t32, t32); 1837 } 1838 if (u) { 1839 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc); 1840 } else { 1841 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc); 1842 } 1843 break; 1844 1845 case MO_32: 1846 t64 = tcg_temp_new_i64(); 1847 if (d) { 1848 tcg_gen_neg_i64(t64, val); 1849 } else { 1850 tcg_gen_mov_i64(t64, val); 1851 } 1852 if (u) { 1853 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc); 1854 } else { 1855 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc); 1856 } 1857 break; 1858 1859 case MO_64: 1860 if (u) { 1861 if (d) { 1862 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc); 1863 } else { 1864 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc); 1865 } 1866 } else if (d) { 1867 t64 = tcg_temp_new_i64(); 1868 tcg_gen_neg_i64(t64, val); 1869 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc); 1870 } else { 1871 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc); 1872 } 1873 break; 1874 1875 default: 1876 g_assert_not_reached(); 1877 } 1878 } 1879 1880 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a) 1881 { 1882 if (!dc_isar_feature(aa64_sve, s)) { 1883 return false; 1884 } 1885 if (sve_access_check(s)) { 1886 unsigned fullsz = vec_full_reg_size(s); 1887 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1888 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm); 1889 } 1890 return true; 1891 } 1892 1893 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a) 1894 { 1895 if (!dc_isar_feature(aa64_sve, s)) { 1896 return false; 1897 } 1898 if (sve_access_check(s)) { 1899 unsigned fullsz = vec_full_reg_size(s); 1900 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1901 int inc = numelem * a->imm * (a->d ? -1 : 1); 1902 TCGv_i64 reg = cpu_reg(s, a->rd); 1903 1904 tcg_gen_addi_i64(reg, reg, inc); 1905 } 1906 return true; 1907 } 1908 1909 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a) 1910 { 1911 if (!dc_isar_feature(aa64_sve, s)) { 1912 return false; 1913 } 1914 if (!sve_access_check(s)) { 1915 return true; 1916 } 1917 1918 unsigned fullsz = vec_full_reg_size(s); 1919 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1920 int inc = numelem * a->imm; 1921 TCGv_i64 reg = cpu_reg(s, a->rd); 1922 1923 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 1924 if (inc == 0) { 1925 if (a->u) { 1926 tcg_gen_ext32u_i64(reg, reg); 1927 } else { 1928 tcg_gen_ext32s_i64(reg, reg); 1929 } 1930 } else { 1931 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d); 1932 } 1933 return true; 1934 } 1935 1936 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a) 1937 { 1938 if (!dc_isar_feature(aa64_sve, s)) { 1939 return false; 1940 } 1941 if (!sve_access_check(s)) { 1942 return true; 1943 } 1944 1945 unsigned fullsz = vec_full_reg_size(s); 1946 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1947 int inc = numelem * a->imm; 1948 TCGv_i64 reg = cpu_reg(s, a->rd); 1949 1950 if (inc != 0) { 1951 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d); 1952 } 1953 return true; 1954 } 1955 1956 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 1957 { 1958 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 1959 return false; 1960 } 1961 1962 unsigned fullsz = vec_full_reg_size(s); 1963 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1964 int inc = numelem * a->imm; 1965 1966 if (inc != 0) { 1967 if (sve_access_check(s)) { 1968 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd), 1969 vec_full_reg_offset(s, a->rn), 1970 tcg_constant_i64(a->d ? -inc : inc), 1971 fullsz, fullsz); 1972 } 1973 } else { 1974 do_mov_z(s, a->rd, a->rn); 1975 } 1976 return true; 1977 } 1978 1979 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 1980 { 1981 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 1982 return false; 1983 } 1984 1985 unsigned fullsz = vec_full_reg_size(s); 1986 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1987 int inc = numelem * a->imm; 1988 1989 if (inc != 0) { 1990 if (sve_access_check(s)) { 1991 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 1992 tcg_constant_i64(inc), a->u, a->d); 1993 } 1994 } else { 1995 do_mov_z(s, a->rd, a->rn); 1996 } 1997 return true; 1998 } 1999 2000 /* 2001 *** SVE Bitwise Immediate Group 2002 */ 2003 2004 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn) 2005 { 2006 uint64_t imm; 2007 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2008 extract32(a->dbm, 0, 6), 2009 extract32(a->dbm, 6, 6))) { 2010 return false; 2011 } 2012 return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm); 2013 } 2014 2015 TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi) 2016 TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori) 2017 TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori) 2018 2019 static bool trans_DUPM(DisasContext *s, arg_DUPM *a) 2020 { 2021 uint64_t imm; 2022 2023 if (!dc_isar_feature(aa64_sve, s)) { 2024 return false; 2025 } 2026 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2027 extract32(a->dbm, 0, 6), 2028 extract32(a->dbm, 6, 6))) { 2029 return false; 2030 } 2031 if (sve_access_check(s)) { 2032 do_dupi_z(s, a->rd, imm); 2033 } 2034 return true; 2035 } 2036 2037 /* 2038 *** SVE Integer Wide Immediate - Predicated Group 2039 */ 2040 2041 /* Implement all merging copies. This is used for CPY (immediate), 2042 * FCPY, CPY (scalar), CPY (SIMD&FP scalar). 2043 */ 2044 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg, 2045 TCGv_i64 val) 2046 { 2047 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2048 static gen_cpy * const fns[4] = { 2049 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h, 2050 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d, 2051 }; 2052 unsigned vsz = vec_full_reg_size(s); 2053 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2054 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2055 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2056 TCGv_ptr t_pg = tcg_temp_new_ptr(); 2057 2058 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd)); 2059 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, rn)); 2060 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 2061 2062 fns[esz](t_zd, t_zn, t_pg, val, desc); 2063 } 2064 2065 static bool trans_FCPY(DisasContext *s, arg_FCPY *a) 2066 { 2067 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2068 return false; 2069 } 2070 if (sve_access_check(s)) { 2071 /* Decode the VFP immediate. */ 2072 uint64_t imm = vfp_expand_imm(a->esz, a->imm); 2073 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm)); 2074 } 2075 return true; 2076 } 2077 2078 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a) 2079 { 2080 if (!dc_isar_feature(aa64_sve, s)) { 2081 return false; 2082 } 2083 if (sve_access_check(s)) { 2084 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm)); 2085 } 2086 return true; 2087 } 2088 2089 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a) 2090 { 2091 static gen_helper_gvec_2i * const fns[4] = { 2092 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h, 2093 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d, 2094 }; 2095 2096 if (!dc_isar_feature(aa64_sve, s)) { 2097 return false; 2098 } 2099 if (sve_access_check(s)) { 2100 unsigned vsz = vec_full_reg_size(s); 2101 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 2102 pred_full_reg_offset(s, a->pg), 2103 tcg_constant_i64(a->imm), 2104 vsz, vsz, 0, fns[a->esz]); 2105 } 2106 return true; 2107 } 2108 2109 /* 2110 *** SVE Permute Extract Group 2111 */ 2112 2113 static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm) 2114 { 2115 if (!sve_access_check(s)) { 2116 return true; 2117 } 2118 2119 unsigned vsz = vec_full_reg_size(s); 2120 unsigned n_ofs = imm >= vsz ? 0 : imm; 2121 unsigned n_siz = vsz - n_ofs; 2122 unsigned d = vec_full_reg_offset(s, rd); 2123 unsigned n = vec_full_reg_offset(s, rn); 2124 unsigned m = vec_full_reg_offset(s, rm); 2125 2126 /* Use host vector move insns if we have appropriate sizes 2127 * and no unfortunate overlap. 2128 */ 2129 if (m != d 2130 && n_ofs == size_for_gvec(n_ofs) 2131 && n_siz == size_for_gvec(n_siz) 2132 && (d != n || n_siz <= n_ofs)) { 2133 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz); 2134 if (n_ofs != 0) { 2135 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs); 2136 } 2137 } else { 2138 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext); 2139 } 2140 return true; 2141 } 2142 2143 TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm) 2144 TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm) 2145 2146 /* 2147 *** SVE Permute - Unpredicated Group 2148 */ 2149 2150 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a) 2151 { 2152 if (!dc_isar_feature(aa64_sve, s)) { 2153 return false; 2154 } 2155 if (sve_access_check(s)) { 2156 unsigned vsz = vec_full_reg_size(s); 2157 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd), 2158 vsz, vsz, cpu_reg_sp(s, a->rn)); 2159 } 2160 return true; 2161 } 2162 2163 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a) 2164 { 2165 if (!dc_isar_feature(aa64_sve, s)) { 2166 return false; 2167 } 2168 if ((a->imm & 0x1f) == 0) { 2169 return false; 2170 } 2171 if (sve_access_check(s)) { 2172 unsigned vsz = vec_full_reg_size(s); 2173 unsigned dofs = vec_full_reg_offset(s, a->rd); 2174 unsigned esz, index; 2175 2176 esz = ctz32(a->imm); 2177 index = a->imm >> (esz + 1); 2178 2179 if ((index << esz) < vsz) { 2180 unsigned nofs = vec_reg_offset(s, a->rn, index, esz); 2181 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz); 2182 } else { 2183 /* 2184 * While dup_mem handles 128-bit elements, dup_imm does not. 2185 * Thankfully element size doesn't matter for splatting zero. 2186 */ 2187 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); 2188 } 2189 } 2190 return true; 2191 } 2192 2193 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val) 2194 { 2195 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2196 static gen_insr * const fns[4] = { 2197 gen_helper_sve_insr_b, gen_helper_sve_insr_h, 2198 gen_helper_sve_insr_s, gen_helper_sve_insr_d, 2199 }; 2200 unsigned vsz = vec_full_reg_size(s); 2201 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2202 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2203 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2204 2205 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, a->rd)); 2206 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); 2207 2208 fns[a->esz](t_zd, t_zn, val, desc); 2209 } 2210 2211 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a) 2212 { 2213 if (!dc_isar_feature(aa64_sve, s)) { 2214 return false; 2215 } 2216 if (sve_access_check(s)) { 2217 TCGv_i64 t = tcg_temp_new_i64(); 2218 tcg_gen_ld_i64(t, tcg_env, vec_reg_offset(s, a->rm, 0, MO_64)); 2219 do_insr_i64(s, a, t); 2220 } 2221 return true; 2222 } 2223 2224 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a) 2225 { 2226 if (!dc_isar_feature(aa64_sve, s)) { 2227 return false; 2228 } 2229 if (sve_access_check(s)) { 2230 do_insr_i64(s, a, cpu_reg(s, a->rm)); 2231 } 2232 return true; 2233 } 2234 2235 static gen_helper_gvec_2 * const rev_fns[4] = { 2236 gen_helper_sve_rev_b, gen_helper_sve_rev_h, 2237 gen_helper_sve_rev_s, gen_helper_sve_rev_d 2238 }; 2239 TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0) 2240 2241 static gen_helper_gvec_3 * const sve_tbl_fns[4] = { 2242 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h, 2243 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d 2244 }; 2245 TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0) 2246 2247 static gen_helper_gvec_4 * const sve2_tbl_fns[4] = { 2248 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h, 2249 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d 2250 }; 2251 TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz], 2252 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0) 2253 2254 static gen_helper_gvec_3 * const tbx_fns[4] = { 2255 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h, 2256 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d 2257 }; 2258 TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0) 2259 2260 static bool trans_UNPK(DisasContext *s, arg_UNPK *a) 2261 { 2262 static gen_helper_gvec_2 * const fns[4][2] = { 2263 { NULL, NULL }, 2264 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h }, 2265 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s }, 2266 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d }, 2267 }; 2268 2269 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2270 return false; 2271 } 2272 if (sve_access_check(s)) { 2273 unsigned vsz = vec_full_reg_size(s); 2274 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd), 2275 vec_full_reg_offset(s, a->rn) 2276 + (a->h ? vsz / 2 : 0), 2277 vsz, vsz, 0, fns[a->esz][a->u]); 2278 } 2279 return true; 2280 } 2281 2282 /* 2283 *** SVE Permute - Predicates Group 2284 */ 2285 2286 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd, 2287 gen_helper_gvec_3 *fn) 2288 { 2289 if (!sve_access_check(s)) { 2290 return true; 2291 } 2292 2293 unsigned vsz = pred_full_reg_size(s); 2294 2295 TCGv_ptr t_d = tcg_temp_new_ptr(); 2296 TCGv_ptr t_n = tcg_temp_new_ptr(); 2297 TCGv_ptr t_m = tcg_temp_new_ptr(); 2298 uint32_t desc = 0; 2299 2300 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2301 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2302 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2303 2304 tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd)); 2305 tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn)); 2306 tcg_gen_addi_ptr(t_m, tcg_env, pred_full_reg_offset(s, a->rm)); 2307 2308 fn(t_d, t_n, t_m, tcg_constant_i32(desc)); 2309 return true; 2310 } 2311 2312 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd, 2313 gen_helper_gvec_2 *fn) 2314 { 2315 if (!sve_access_check(s)) { 2316 return true; 2317 } 2318 2319 unsigned vsz = pred_full_reg_size(s); 2320 TCGv_ptr t_d = tcg_temp_new_ptr(); 2321 TCGv_ptr t_n = tcg_temp_new_ptr(); 2322 uint32_t desc = 0; 2323 2324 tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd)); 2325 tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn)); 2326 2327 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2328 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2329 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2330 2331 fn(t_d, t_n, tcg_constant_i32(desc)); 2332 return true; 2333 } 2334 2335 TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p) 2336 TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p) 2337 TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p) 2338 TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p) 2339 TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p) 2340 TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p) 2341 2342 TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p) 2343 TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p) 2344 TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p) 2345 2346 /* 2347 *** SVE Permute - Interleaving Group 2348 */ 2349 2350 static gen_helper_gvec_3 * const zip_fns[4] = { 2351 gen_helper_sve_zip_b, gen_helper_sve_zip_h, 2352 gen_helper_sve_zip_s, gen_helper_sve_zip_d, 2353 }; 2354 TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2355 zip_fns[a->esz], a, 0) 2356 TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2357 zip_fns[a->esz], a, vec_full_reg_size(s) / 2) 2358 2359 TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2360 gen_helper_sve2_zip_q, a, 0) 2361 TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2362 gen_helper_sve2_zip_q, a, 2363 QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2) 2364 2365 static gen_helper_gvec_3 * const uzp_fns[4] = { 2366 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h, 2367 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d, 2368 }; 2369 2370 TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2371 uzp_fns[a->esz], a, 0) 2372 TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2373 uzp_fns[a->esz], a, 1 << a->esz) 2374 2375 TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2376 gen_helper_sve2_uzp_q, a, 0) 2377 TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2378 gen_helper_sve2_uzp_q, a, 16) 2379 2380 static gen_helper_gvec_3 * const trn_fns[4] = { 2381 gen_helper_sve_trn_b, gen_helper_sve_trn_h, 2382 gen_helper_sve_trn_s, gen_helper_sve_trn_d, 2383 }; 2384 2385 TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2386 trn_fns[a->esz], a, 0) 2387 TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2388 trn_fns[a->esz], a, 1 << a->esz) 2389 2390 TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2391 gen_helper_sve2_trn_q, a, 0) 2392 TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2393 gen_helper_sve2_trn_q, a, 16) 2394 2395 /* 2396 *** SVE Permute Vector - Predicated Group 2397 */ 2398 2399 static gen_helper_gvec_3 * const compact_fns[4] = { 2400 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d 2401 }; 2402 TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, 2403 compact_fns[a->esz], a, 0) 2404 2405 /* Call the helper that computes the ARM LastActiveElement pseudocode 2406 * function, scaled by the element size. This includes the not found 2407 * indication; e.g. not found for esz=3 is -8. 2408 */ 2409 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg) 2410 { 2411 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot 2412 * round up, as we do elsewhere, because we need the exact size. 2413 */ 2414 TCGv_ptr t_p = tcg_temp_new_ptr(); 2415 unsigned desc = 0; 2416 2417 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 2418 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 2419 2420 tcg_gen_addi_ptr(t_p, tcg_env, pred_full_reg_offset(s, pg)); 2421 2422 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc)); 2423 } 2424 2425 /* Increment LAST to the offset of the next element in the vector, 2426 * wrapping around to 0. 2427 */ 2428 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz) 2429 { 2430 unsigned vsz = vec_full_reg_size(s); 2431 2432 tcg_gen_addi_i32(last, last, 1 << esz); 2433 if (is_power_of_2(vsz)) { 2434 tcg_gen_andi_i32(last, last, vsz - 1); 2435 } else { 2436 TCGv_i32 max = tcg_constant_i32(vsz); 2437 TCGv_i32 zero = tcg_constant_i32(0); 2438 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last); 2439 } 2440 } 2441 2442 /* If LAST < 0, set LAST to the offset of the last element in the vector. */ 2443 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz) 2444 { 2445 unsigned vsz = vec_full_reg_size(s); 2446 2447 if (is_power_of_2(vsz)) { 2448 tcg_gen_andi_i32(last, last, vsz - 1); 2449 } else { 2450 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz)); 2451 TCGv_i32 zero = tcg_constant_i32(0); 2452 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last); 2453 } 2454 } 2455 2456 /* Load an unsigned element of ESZ from BASE+OFS. */ 2457 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz) 2458 { 2459 TCGv_i64 r = tcg_temp_new_i64(); 2460 2461 switch (esz) { 2462 case 0: 2463 tcg_gen_ld8u_i64(r, base, ofs); 2464 break; 2465 case 1: 2466 tcg_gen_ld16u_i64(r, base, ofs); 2467 break; 2468 case 2: 2469 tcg_gen_ld32u_i64(r, base, ofs); 2470 break; 2471 case 3: 2472 tcg_gen_ld_i64(r, base, ofs); 2473 break; 2474 default: 2475 g_assert_not_reached(); 2476 } 2477 return r; 2478 } 2479 2480 /* Load an unsigned element of ESZ from RM[LAST]. */ 2481 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last, 2482 int rm, int esz) 2483 { 2484 TCGv_ptr p = tcg_temp_new_ptr(); 2485 2486 /* Convert offset into vector into offset into ENV. 2487 * The final adjustment for the vector register base 2488 * is added via constant offset to the load. 2489 */ 2490 #if HOST_BIG_ENDIAN 2491 /* Adjust for element ordering. See vec_reg_offset. */ 2492 if (esz < 3) { 2493 tcg_gen_xori_i32(last, last, 8 - (1 << esz)); 2494 } 2495 #endif 2496 tcg_gen_ext_i32_ptr(p, last); 2497 tcg_gen_add_ptr(p, p, tcg_env); 2498 2499 return load_esz(p, vec_full_reg_offset(s, rm), esz); 2500 } 2501 2502 /* Compute CLAST for a Zreg. */ 2503 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before) 2504 { 2505 TCGv_i32 last; 2506 TCGLabel *over; 2507 TCGv_i64 ele; 2508 unsigned vsz, esz = a->esz; 2509 2510 if (!sve_access_check(s)) { 2511 return true; 2512 } 2513 2514 last = tcg_temp_new_i32(); 2515 over = gen_new_label(); 2516 2517 find_last_active(s, last, esz, a->pg); 2518 2519 /* There is of course no movcond for a 2048-bit vector, 2520 * so we must branch over the actual store. 2521 */ 2522 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over); 2523 2524 if (!before) { 2525 incr_last_active(s, last, esz); 2526 } 2527 2528 ele = load_last_active(s, last, a->rm, esz); 2529 2530 vsz = vec_full_reg_size(s); 2531 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele); 2532 2533 /* If this insn used MOVPRFX, we may need a second move. */ 2534 if (a->rd != a->rn) { 2535 TCGLabel *done = gen_new_label(); 2536 tcg_gen_br(done); 2537 2538 gen_set_label(over); 2539 do_mov_z(s, a->rd, a->rn); 2540 2541 gen_set_label(done); 2542 } else { 2543 gen_set_label(over); 2544 } 2545 return true; 2546 } 2547 2548 TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false) 2549 TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true) 2550 2551 /* Compute CLAST for a scalar. */ 2552 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm, 2553 bool before, TCGv_i64 reg_val) 2554 { 2555 TCGv_i32 last = tcg_temp_new_i32(); 2556 TCGv_i64 ele, cmp; 2557 2558 find_last_active(s, last, esz, pg); 2559 2560 /* Extend the original value of last prior to incrementing. */ 2561 cmp = tcg_temp_new_i64(); 2562 tcg_gen_ext_i32_i64(cmp, last); 2563 2564 if (!before) { 2565 incr_last_active(s, last, esz); 2566 } 2567 2568 /* The conceit here is that while last < 0 indicates not found, after 2569 * adjusting for tcg_env->vfp.zregs[rm], it is still a valid address 2570 * from which we can load garbage. We then discard the garbage with 2571 * a conditional move. 2572 */ 2573 ele = load_last_active(s, last, rm, esz); 2574 2575 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0), 2576 ele, reg_val); 2577 } 2578 2579 /* Compute CLAST for a Vreg. */ 2580 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2581 { 2582 if (sve_access_check(s)) { 2583 int esz = a->esz; 2584 int ofs = vec_reg_offset(s, a->rd, 0, esz); 2585 TCGv_i64 reg = load_esz(tcg_env, ofs, esz); 2586 2587 do_clast_scalar(s, esz, a->pg, a->rn, before, reg); 2588 write_fp_dreg(s, a->rd, reg); 2589 } 2590 return true; 2591 } 2592 2593 TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false) 2594 TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true) 2595 2596 /* Compute CLAST for a Xreg. */ 2597 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before) 2598 { 2599 TCGv_i64 reg; 2600 2601 if (!sve_access_check(s)) { 2602 return true; 2603 } 2604 2605 reg = cpu_reg(s, a->rd); 2606 switch (a->esz) { 2607 case 0: 2608 tcg_gen_ext8u_i64(reg, reg); 2609 break; 2610 case 1: 2611 tcg_gen_ext16u_i64(reg, reg); 2612 break; 2613 case 2: 2614 tcg_gen_ext32u_i64(reg, reg); 2615 break; 2616 case 3: 2617 break; 2618 default: 2619 g_assert_not_reached(); 2620 } 2621 2622 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg); 2623 return true; 2624 } 2625 2626 TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false) 2627 TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true) 2628 2629 /* Compute LAST for a scalar. */ 2630 static TCGv_i64 do_last_scalar(DisasContext *s, int esz, 2631 int pg, int rm, bool before) 2632 { 2633 TCGv_i32 last = tcg_temp_new_i32(); 2634 2635 find_last_active(s, last, esz, pg); 2636 if (before) { 2637 wrap_last_active(s, last, esz); 2638 } else { 2639 incr_last_active(s, last, esz); 2640 } 2641 2642 return load_last_active(s, last, rm, esz); 2643 } 2644 2645 /* Compute LAST for a Vreg. */ 2646 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2647 { 2648 if (sve_access_check(s)) { 2649 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2650 write_fp_dreg(s, a->rd, val); 2651 } 2652 return true; 2653 } 2654 2655 TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false) 2656 TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true) 2657 2658 /* Compute LAST for a Xreg. */ 2659 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before) 2660 { 2661 if (sve_access_check(s)) { 2662 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2663 tcg_gen_mov_i64(cpu_reg(s, a->rd), val); 2664 } 2665 return true; 2666 } 2667 2668 TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false) 2669 TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true) 2670 2671 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a) 2672 { 2673 if (!dc_isar_feature(aa64_sve, s)) { 2674 return false; 2675 } 2676 if (sve_access_check(s)) { 2677 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn)); 2678 } 2679 return true; 2680 } 2681 2682 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a) 2683 { 2684 if (!dc_isar_feature(aa64_sve, s)) { 2685 return false; 2686 } 2687 if (sve_access_check(s)) { 2688 int ofs = vec_reg_offset(s, a->rn, 0, a->esz); 2689 TCGv_i64 t = load_esz(tcg_env, ofs, a->esz); 2690 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t); 2691 } 2692 return true; 2693 } 2694 2695 static gen_helper_gvec_3 * const revb_fns[4] = { 2696 NULL, gen_helper_sve_revb_h, 2697 gen_helper_sve_revb_s, gen_helper_sve_revb_d, 2698 }; 2699 TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0) 2700 2701 static gen_helper_gvec_3 * const revh_fns[4] = { 2702 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d, 2703 }; 2704 TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0) 2705 2706 TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz, 2707 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0) 2708 2709 TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0) 2710 2711 TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz, 2712 gen_helper_sve_splice, a, a->esz) 2713 2714 TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice, 2715 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz) 2716 2717 /* 2718 *** SVE Integer Compare - Vectors Group 2719 */ 2720 2721 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a, 2722 gen_helper_gvec_flags_4 *gen_fn) 2723 { 2724 TCGv_ptr pd, zn, zm, pg; 2725 unsigned vsz; 2726 TCGv_i32 t; 2727 2728 if (gen_fn == NULL) { 2729 return false; 2730 } 2731 if (!sve_access_check(s)) { 2732 return true; 2733 } 2734 2735 vsz = vec_full_reg_size(s); 2736 t = tcg_temp_new_i32(); 2737 pd = tcg_temp_new_ptr(); 2738 zn = tcg_temp_new_ptr(); 2739 zm = tcg_temp_new_ptr(); 2740 pg = tcg_temp_new_ptr(); 2741 2742 tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd)); 2743 tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn)); 2744 tcg_gen_addi_ptr(zm, tcg_env, vec_full_reg_offset(s, a->rm)); 2745 tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg)); 2746 2747 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0))); 2748 2749 do_pred_flags(t); 2750 return true; 2751 } 2752 2753 #define DO_PPZZ(NAME, name) \ 2754 static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \ 2755 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \ 2756 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \ 2757 }; \ 2758 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \ 2759 a, name##_ppzz_fns[a->esz]) 2760 2761 DO_PPZZ(CMPEQ, cmpeq) 2762 DO_PPZZ(CMPNE, cmpne) 2763 DO_PPZZ(CMPGT, cmpgt) 2764 DO_PPZZ(CMPGE, cmpge) 2765 DO_PPZZ(CMPHI, cmphi) 2766 DO_PPZZ(CMPHS, cmphs) 2767 2768 #undef DO_PPZZ 2769 2770 #define DO_PPZW(NAME, name) \ 2771 static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \ 2772 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \ 2773 gen_helper_sve_##name##_ppzw_s, NULL \ 2774 }; \ 2775 TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \ 2776 a, name##_ppzw_fns[a->esz]) 2777 2778 DO_PPZW(CMPEQ, cmpeq) 2779 DO_PPZW(CMPNE, cmpne) 2780 DO_PPZW(CMPGT, cmpgt) 2781 DO_PPZW(CMPGE, cmpge) 2782 DO_PPZW(CMPHI, cmphi) 2783 DO_PPZW(CMPHS, cmphs) 2784 DO_PPZW(CMPLT, cmplt) 2785 DO_PPZW(CMPLE, cmple) 2786 DO_PPZW(CMPLO, cmplo) 2787 DO_PPZW(CMPLS, cmpls) 2788 2789 #undef DO_PPZW 2790 2791 /* 2792 *** SVE Integer Compare - Immediate Groups 2793 */ 2794 2795 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a, 2796 gen_helper_gvec_flags_3 *gen_fn) 2797 { 2798 TCGv_ptr pd, zn, pg; 2799 unsigned vsz; 2800 TCGv_i32 t; 2801 2802 if (gen_fn == NULL) { 2803 return false; 2804 } 2805 if (!sve_access_check(s)) { 2806 return true; 2807 } 2808 2809 vsz = vec_full_reg_size(s); 2810 t = tcg_temp_new_i32(); 2811 pd = tcg_temp_new_ptr(); 2812 zn = tcg_temp_new_ptr(); 2813 pg = tcg_temp_new_ptr(); 2814 2815 tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd)); 2816 tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn)); 2817 tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg)); 2818 2819 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm))); 2820 2821 do_pred_flags(t); 2822 return true; 2823 } 2824 2825 #define DO_PPZI(NAME, name) \ 2826 static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \ 2827 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \ 2828 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \ 2829 }; \ 2830 TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \ 2831 name##_ppzi_fns[a->esz]) 2832 2833 DO_PPZI(CMPEQ, cmpeq) 2834 DO_PPZI(CMPNE, cmpne) 2835 DO_PPZI(CMPGT, cmpgt) 2836 DO_PPZI(CMPGE, cmpge) 2837 DO_PPZI(CMPHI, cmphi) 2838 DO_PPZI(CMPHS, cmphs) 2839 DO_PPZI(CMPLT, cmplt) 2840 DO_PPZI(CMPLE, cmple) 2841 DO_PPZI(CMPLO, cmplo) 2842 DO_PPZI(CMPLS, cmpls) 2843 2844 #undef DO_PPZI 2845 2846 /* 2847 *** SVE Partition Break Group 2848 */ 2849 2850 static bool do_brk3(DisasContext *s, arg_rprr_s *a, 2851 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s) 2852 { 2853 if (!sve_access_check(s)) { 2854 return true; 2855 } 2856 2857 unsigned vsz = pred_full_reg_size(s); 2858 2859 /* Predicate sizes may be smaller and cannot use simd_desc. */ 2860 TCGv_ptr d = tcg_temp_new_ptr(); 2861 TCGv_ptr n = tcg_temp_new_ptr(); 2862 TCGv_ptr m = tcg_temp_new_ptr(); 2863 TCGv_ptr g = tcg_temp_new_ptr(); 2864 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 2865 2866 tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd)); 2867 tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn)); 2868 tcg_gen_addi_ptr(m, tcg_env, pred_full_reg_offset(s, a->rm)); 2869 tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg)); 2870 2871 if (a->s) { 2872 TCGv_i32 t = tcg_temp_new_i32(); 2873 fn_s(t, d, n, m, g, desc); 2874 do_pred_flags(t); 2875 } else { 2876 fn(d, n, m, g, desc); 2877 } 2878 return true; 2879 } 2880 2881 static bool do_brk2(DisasContext *s, arg_rpr_s *a, 2882 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s) 2883 { 2884 if (!sve_access_check(s)) { 2885 return true; 2886 } 2887 2888 unsigned vsz = pred_full_reg_size(s); 2889 2890 /* Predicate sizes may be smaller and cannot use simd_desc. */ 2891 TCGv_ptr d = tcg_temp_new_ptr(); 2892 TCGv_ptr n = tcg_temp_new_ptr(); 2893 TCGv_ptr g = tcg_temp_new_ptr(); 2894 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 2895 2896 tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd)); 2897 tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn)); 2898 tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg)); 2899 2900 if (a->s) { 2901 TCGv_i32 t = tcg_temp_new_i32(); 2902 fn_s(t, d, n, g, desc); 2903 do_pred_flags(t); 2904 } else { 2905 fn(d, n, g, desc); 2906 } 2907 return true; 2908 } 2909 2910 TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a, 2911 gen_helper_sve_brkpa, gen_helper_sve_brkpas) 2912 TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a, 2913 gen_helper_sve_brkpb, gen_helper_sve_brkpbs) 2914 2915 TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a, 2916 gen_helper_sve_brka_m, gen_helper_sve_brkas_m) 2917 TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a, 2918 gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m) 2919 2920 TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a, 2921 gen_helper_sve_brka_z, gen_helper_sve_brkas_z) 2922 TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a, 2923 gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z) 2924 2925 TRANS_FEAT(BRKN, aa64_sve, do_brk2, a, 2926 gen_helper_sve_brkn, gen_helper_sve_brkns) 2927 2928 /* 2929 *** SVE Predicate Count Group 2930 */ 2931 2932 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg) 2933 { 2934 unsigned psz = pred_full_reg_size(s); 2935 2936 if (psz <= 8) { 2937 uint64_t psz_mask; 2938 2939 tcg_gen_ld_i64(val, tcg_env, pred_full_reg_offset(s, pn)); 2940 if (pn != pg) { 2941 TCGv_i64 g = tcg_temp_new_i64(); 2942 tcg_gen_ld_i64(g, tcg_env, pred_full_reg_offset(s, pg)); 2943 tcg_gen_and_i64(val, val, g); 2944 } 2945 2946 /* Reduce the pred_esz_masks value simply to reduce the 2947 * size of the code generated here. 2948 */ 2949 psz_mask = MAKE_64BIT_MASK(0, psz * 8); 2950 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask); 2951 2952 tcg_gen_ctpop_i64(val, val); 2953 } else { 2954 TCGv_ptr t_pn = tcg_temp_new_ptr(); 2955 TCGv_ptr t_pg = tcg_temp_new_ptr(); 2956 unsigned desc = 0; 2957 2958 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz); 2959 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 2960 2961 tcg_gen_addi_ptr(t_pn, tcg_env, pred_full_reg_offset(s, pn)); 2962 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 2963 2964 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc)); 2965 } 2966 } 2967 2968 static bool trans_CNTP(DisasContext *s, arg_CNTP *a) 2969 { 2970 if (!dc_isar_feature(aa64_sve, s)) { 2971 return false; 2972 } 2973 if (sve_access_check(s)) { 2974 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg); 2975 } 2976 return true; 2977 } 2978 2979 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a) 2980 { 2981 if (!dc_isar_feature(aa64_sve, s)) { 2982 return false; 2983 } 2984 if (sve_access_check(s)) { 2985 TCGv_i64 reg = cpu_reg(s, a->rd); 2986 TCGv_i64 val = tcg_temp_new_i64(); 2987 2988 do_cntp(s, val, a->esz, a->pg, a->pg); 2989 if (a->d) { 2990 tcg_gen_sub_i64(reg, reg, val); 2991 } else { 2992 tcg_gen_add_i64(reg, reg, val); 2993 } 2994 } 2995 return true; 2996 } 2997 2998 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a) 2999 { 3000 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3001 return false; 3002 } 3003 if (sve_access_check(s)) { 3004 unsigned vsz = vec_full_reg_size(s); 3005 TCGv_i64 val = tcg_temp_new_i64(); 3006 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds; 3007 3008 do_cntp(s, val, a->esz, a->pg, a->pg); 3009 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 3010 vec_full_reg_offset(s, a->rn), val, vsz, vsz); 3011 } 3012 return true; 3013 } 3014 3015 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a) 3016 { 3017 if (!dc_isar_feature(aa64_sve, s)) { 3018 return false; 3019 } 3020 if (sve_access_check(s)) { 3021 TCGv_i64 reg = cpu_reg(s, a->rd); 3022 TCGv_i64 val = tcg_temp_new_i64(); 3023 3024 do_cntp(s, val, a->esz, a->pg, a->pg); 3025 do_sat_addsub_32(reg, val, a->u, a->d); 3026 } 3027 return true; 3028 } 3029 3030 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a) 3031 { 3032 if (!dc_isar_feature(aa64_sve, s)) { 3033 return false; 3034 } 3035 if (sve_access_check(s)) { 3036 TCGv_i64 reg = cpu_reg(s, a->rd); 3037 TCGv_i64 val = tcg_temp_new_i64(); 3038 3039 do_cntp(s, val, a->esz, a->pg, a->pg); 3040 do_sat_addsub_64(reg, val, a->u, a->d); 3041 } 3042 return true; 3043 } 3044 3045 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3046 { 3047 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3048 return false; 3049 } 3050 if (sve_access_check(s)) { 3051 TCGv_i64 val = tcg_temp_new_i64(); 3052 do_cntp(s, val, a->esz, a->pg, a->pg); 3053 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d); 3054 } 3055 return true; 3056 } 3057 3058 /* 3059 *** SVE Integer Compare Scalars Group 3060 */ 3061 3062 static bool trans_CTERM(DisasContext *s, arg_CTERM *a) 3063 { 3064 if (!dc_isar_feature(aa64_sve, s)) { 3065 return false; 3066 } 3067 if (!sve_access_check(s)) { 3068 return true; 3069 } 3070 3071 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ); 3072 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf); 3073 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf); 3074 TCGv_i64 cmp = tcg_temp_new_i64(); 3075 3076 tcg_gen_setcond_i64(cond, cmp, rn, rm); 3077 tcg_gen_extrl_i64_i32(cpu_NF, cmp); 3078 3079 /* VF = !NF & !CF. */ 3080 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1); 3081 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF); 3082 3083 /* Both NF and VF actually look at bit 31. */ 3084 tcg_gen_neg_i32(cpu_NF, cpu_NF); 3085 tcg_gen_neg_i32(cpu_VF, cpu_VF); 3086 return true; 3087 } 3088 3089 static bool trans_WHILE(DisasContext *s, arg_WHILE *a) 3090 { 3091 TCGv_i64 op0, op1, t0, t1, tmax; 3092 TCGv_i32 t2; 3093 TCGv_ptr ptr; 3094 unsigned vsz = vec_full_reg_size(s); 3095 unsigned desc = 0; 3096 TCGCond cond; 3097 uint64_t maxval; 3098 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */ 3099 bool eq = a->eq == a->lt; 3100 3101 /* The greater-than conditions are all SVE2. */ 3102 if (a->lt 3103 ? !dc_isar_feature(aa64_sve, s) 3104 : !dc_isar_feature(aa64_sve2, s)) { 3105 return false; 3106 } 3107 if (!sve_access_check(s)) { 3108 return true; 3109 } 3110 3111 op0 = read_cpu_reg(s, a->rn, 1); 3112 op1 = read_cpu_reg(s, a->rm, 1); 3113 3114 if (!a->sf) { 3115 if (a->u) { 3116 tcg_gen_ext32u_i64(op0, op0); 3117 tcg_gen_ext32u_i64(op1, op1); 3118 } else { 3119 tcg_gen_ext32s_i64(op0, op0); 3120 tcg_gen_ext32s_i64(op1, op1); 3121 } 3122 } 3123 3124 /* For the helper, compress the different conditions into a computation 3125 * of how many iterations for which the condition is true. 3126 */ 3127 t0 = tcg_temp_new_i64(); 3128 t1 = tcg_temp_new_i64(); 3129 3130 if (a->lt) { 3131 tcg_gen_sub_i64(t0, op1, op0); 3132 if (a->u) { 3133 maxval = a->sf ? UINT64_MAX : UINT32_MAX; 3134 cond = eq ? TCG_COND_LEU : TCG_COND_LTU; 3135 } else { 3136 maxval = a->sf ? INT64_MAX : INT32_MAX; 3137 cond = eq ? TCG_COND_LE : TCG_COND_LT; 3138 } 3139 } else { 3140 tcg_gen_sub_i64(t0, op0, op1); 3141 if (a->u) { 3142 maxval = 0; 3143 cond = eq ? TCG_COND_GEU : TCG_COND_GTU; 3144 } else { 3145 maxval = a->sf ? INT64_MIN : INT32_MIN; 3146 cond = eq ? TCG_COND_GE : TCG_COND_GT; 3147 } 3148 } 3149 3150 tmax = tcg_constant_i64(vsz >> a->esz); 3151 if (eq) { 3152 /* Equality means one more iteration. */ 3153 tcg_gen_addi_i64(t0, t0, 1); 3154 3155 /* 3156 * For the less-than while, if op1 is maxval (and the only time 3157 * the addition above could overflow), then we produce an all-true 3158 * predicate by setting the count to the vector length. This is 3159 * because the pseudocode is described as an increment + compare 3160 * loop, and the maximum integer would always compare true. 3161 * Similarly, the greater-than while has the same issue with the 3162 * minimum integer due to the decrement + compare loop. 3163 */ 3164 tcg_gen_movi_i64(t1, maxval); 3165 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0); 3166 } 3167 3168 /* Bound to the maximum. */ 3169 tcg_gen_umin_i64(t0, t0, tmax); 3170 3171 /* Set the count to zero if the condition is false. */ 3172 tcg_gen_movi_i64(t1, 0); 3173 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1); 3174 3175 /* Since we're bounded, pass as a 32-bit type. */ 3176 t2 = tcg_temp_new_i32(); 3177 tcg_gen_extrl_i64_i32(t2, t0); 3178 3179 /* Scale elements to bits. */ 3180 tcg_gen_shli_i32(t2, t2, a->esz); 3181 3182 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3183 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3184 3185 ptr = tcg_temp_new_ptr(); 3186 tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd)); 3187 3188 if (a->lt) { 3189 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); 3190 } else { 3191 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc)); 3192 } 3193 do_pred_flags(t2); 3194 return true; 3195 } 3196 3197 static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) 3198 { 3199 TCGv_i64 op0, op1, diff, t1, tmax; 3200 TCGv_i32 t2; 3201 TCGv_ptr ptr; 3202 unsigned vsz = vec_full_reg_size(s); 3203 unsigned desc = 0; 3204 3205 if (!dc_isar_feature(aa64_sve2, s)) { 3206 return false; 3207 } 3208 if (!sve_access_check(s)) { 3209 return true; 3210 } 3211 3212 op0 = read_cpu_reg(s, a->rn, 1); 3213 op1 = read_cpu_reg(s, a->rm, 1); 3214 3215 tmax = tcg_constant_i64(vsz); 3216 diff = tcg_temp_new_i64(); 3217 3218 if (a->rw) { 3219 /* WHILERW */ 3220 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */ 3221 t1 = tcg_temp_new_i64(); 3222 tcg_gen_sub_i64(diff, op0, op1); 3223 tcg_gen_sub_i64(t1, op1, op0); 3224 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1); 3225 /* Round down to a multiple of ESIZE. */ 3226 tcg_gen_andi_i64(diff, diff, -1 << a->esz); 3227 /* If op1 == op0, diff == 0, and the condition is always true. */ 3228 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff); 3229 } else { 3230 /* WHILEWR */ 3231 tcg_gen_sub_i64(diff, op1, op0); 3232 /* Round down to a multiple of ESIZE. */ 3233 tcg_gen_andi_i64(diff, diff, -1 << a->esz); 3234 /* If op0 >= op1, diff <= 0, the condition is always true. */ 3235 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff); 3236 } 3237 3238 /* Bound to the maximum. */ 3239 tcg_gen_umin_i64(diff, diff, tmax); 3240 3241 /* Since we're bounded, pass as a 32-bit type. */ 3242 t2 = tcg_temp_new_i32(); 3243 tcg_gen_extrl_i64_i32(t2, diff); 3244 3245 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3246 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3247 3248 ptr = tcg_temp_new_ptr(); 3249 tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd)); 3250 3251 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); 3252 do_pred_flags(t2); 3253 return true; 3254 } 3255 3256 /* 3257 *** SVE Integer Wide Immediate - Unpredicated Group 3258 */ 3259 3260 static bool trans_FDUP(DisasContext *s, arg_FDUP *a) 3261 { 3262 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3263 return false; 3264 } 3265 if (sve_access_check(s)) { 3266 unsigned vsz = vec_full_reg_size(s); 3267 int dofs = vec_full_reg_offset(s, a->rd); 3268 uint64_t imm; 3269 3270 /* Decode the VFP immediate. */ 3271 imm = vfp_expand_imm(a->esz, a->imm); 3272 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm); 3273 } 3274 return true; 3275 } 3276 3277 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a) 3278 { 3279 if (!dc_isar_feature(aa64_sve, s)) { 3280 return false; 3281 } 3282 if (sve_access_check(s)) { 3283 unsigned vsz = vec_full_reg_size(s); 3284 int dofs = vec_full_reg_offset(s, a->rd); 3285 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm); 3286 } 3287 return true; 3288 } 3289 3290 TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a) 3291 3292 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a) 3293 { 3294 a->imm = -a->imm; 3295 return trans_ADD_zzi(s, a); 3296 } 3297 3298 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a) 3299 { 3300 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 }; 3301 static const GVecGen2s op[4] = { 3302 { .fni8 = tcg_gen_vec_sub8_i64, 3303 .fniv = tcg_gen_sub_vec, 3304 .fno = gen_helper_sve_subri_b, 3305 .opt_opc = vecop_list, 3306 .vece = MO_8, 3307 .scalar_first = true }, 3308 { .fni8 = tcg_gen_vec_sub16_i64, 3309 .fniv = tcg_gen_sub_vec, 3310 .fno = gen_helper_sve_subri_h, 3311 .opt_opc = vecop_list, 3312 .vece = MO_16, 3313 .scalar_first = true }, 3314 { .fni4 = tcg_gen_sub_i32, 3315 .fniv = tcg_gen_sub_vec, 3316 .fno = gen_helper_sve_subri_s, 3317 .opt_opc = vecop_list, 3318 .vece = MO_32, 3319 .scalar_first = true }, 3320 { .fni8 = tcg_gen_sub_i64, 3321 .fniv = tcg_gen_sub_vec, 3322 .fno = gen_helper_sve_subri_d, 3323 .opt_opc = vecop_list, 3324 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 3325 .vece = MO_64, 3326 .scalar_first = true } 3327 }; 3328 3329 if (!dc_isar_feature(aa64_sve, s)) { 3330 return false; 3331 } 3332 if (sve_access_check(s)) { 3333 unsigned vsz = vec_full_reg_size(s); 3334 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd), 3335 vec_full_reg_offset(s, a->rn), 3336 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]); 3337 } 3338 return true; 3339 } 3340 3341 TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a) 3342 3343 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d) 3344 { 3345 if (sve_access_check(s)) { 3346 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 3347 tcg_constant_i64(a->imm), u, d); 3348 } 3349 return true; 3350 } 3351 3352 TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false) 3353 TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false) 3354 TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true) 3355 TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true) 3356 3357 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn) 3358 { 3359 if (sve_access_check(s)) { 3360 unsigned vsz = vec_full_reg_size(s); 3361 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 3362 vec_full_reg_offset(s, a->rn), 3363 tcg_constant_i64(a->imm), vsz, vsz, 0, fn); 3364 } 3365 return true; 3366 } 3367 3368 #define DO_ZZI(NAME, name) \ 3369 static gen_helper_gvec_2i * const name##i_fns[4] = { \ 3370 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \ 3371 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \ 3372 }; \ 3373 TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz]) 3374 3375 DO_ZZI(SMAX, smax) 3376 DO_ZZI(UMAX, umax) 3377 DO_ZZI(SMIN, smin) 3378 DO_ZZI(UMIN, umin) 3379 3380 #undef DO_ZZI 3381 3382 static gen_helper_gvec_4 * const dot_fns[2][2] = { 3383 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h }, 3384 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h } 3385 }; 3386 TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz, 3387 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0) 3388 3389 /* 3390 * SVE Multiply - Indexed 3391 */ 3392 3393 TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, 3394 gen_helper_gvec_sdot_idx_b, a) 3395 TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, 3396 gen_helper_gvec_sdot_idx_h, a) 3397 TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, 3398 gen_helper_gvec_udot_idx_b, a) 3399 TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, 3400 gen_helper_gvec_udot_idx_h, a) 3401 3402 TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3403 gen_helper_gvec_sudot_idx_b, a) 3404 TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3405 gen_helper_gvec_usdot_idx_b, a) 3406 3407 #define DO_SVE2_RRX(NAME, FUNC) \ 3408 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3409 a->rd, a->rn, a->rm, a->index) 3410 3411 DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h) 3412 DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s) 3413 DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d) 3414 3415 DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h) 3416 DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s) 3417 DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d) 3418 3419 DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h) 3420 DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s) 3421 DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d) 3422 3423 #undef DO_SVE2_RRX 3424 3425 #define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \ 3426 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3427 a->rd, a->rn, a->rm, (a->index << 1) | TOP) 3428 3429 DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false) 3430 DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false) 3431 DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true) 3432 DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true) 3433 3434 DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false) 3435 DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false) 3436 DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true) 3437 DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true) 3438 3439 DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false) 3440 DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false) 3441 DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true) 3442 DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true) 3443 3444 #undef DO_SVE2_RRX_TB 3445 3446 #define DO_SVE2_RRXR(NAME, FUNC) \ 3447 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a) 3448 3449 DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h) 3450 DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s) 3451 DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d) 3452 3453 DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h) 3454 DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s) 3455 DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d) 3456 3457 DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h) 3458 DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s) 3459 DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d) 3460 3461 DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h) 3462 DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s) 3463 DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d) 3464 3465 #undef DO_SVE2_RRXR 3466 3467 #define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \ 3468 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3469 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP) 3470 3471 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false) 3472 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false) 3473 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true) 3474 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true) 3475 3476 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false) 3477 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false) 3478 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true) 3479 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true) 3480 3481 DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false) 3482 DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false) 3483 DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true) 3484 DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true) 3485 3486 DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false) 3487 DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false) 3488 DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true) 3489 DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true) 3490 3491 DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false) 3492 DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false) 3493 DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true) 3494 DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true) 3495 3496 DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false) 3497 DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false) 3498 DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true) 3499 DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true) 3500 3501 #undef DO_SVE2_RRXR_TB 3502 3503 #define DO_SVE2_RRXR_ROT(NAME, FUNC) \ 3504 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3505 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot) 3506 3507 DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h) 3508 DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s) 3509 3510 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h) 3511 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s) 3512 3513 DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s) 3514 DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) 3515 3516 #undef DO_SVE2_RRXR_ROT 3517 3518 /* 3519 *** SVE Floating Point Multiply-Add Indexed Group 3520 */ 3521 3522 static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) 3523 { 3524 static gen_helper_gvec_4_ptr * const fns[4] = { 3525 NULL, 3526 gen_helper_gvec_fmla_idx_h, 3527 gen_helper_gvec_fmla_idx_s, 3528 gen_helper_gvec_fmla_idx_d, 3529 }; 3530 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, 3531 (a->index << 1) | sub, 3532 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3533 } 3534 3535 TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false) 3536 TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true) 3537 3538 /* 3539 *** SVE Floating Point Multiply Indexed Group 3540 */ 3541 3542 static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = { 3543 NULL, gen_helper_gvec_fmul_idx_h, 3544 gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d, 3545 }; 3546 TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz, 3547 fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index, 3548 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3549 3550 /* 3551 *** SVE Floating Point Fast Reduction Group 3552 */ 3553 3554 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr, 3555 TCGv_ptr, TCGv_i32); 3556 3557 static bool do_reduce(DisasContext *s, arg_rpr_esz *a, 3558 gen_helper_fp_reduce *fn) 3559 { 3560 unsigned vsz, p2vsz; 3561 TCGv_i32 t_desc; 3562 TCGv_ptr t_zn, t_pg, status; 3563 TCGv_i64 temp; 3564 3565 if (fn == NULL) { 3566 return false; 3567 } 3568 if (!sve_access_check(s)) { 3569 return true; 3570 } 3571 3572 vsz = vec_full_reg_size(s); 3573 p2vsz = pow2ceil(vsz); 3574 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz)); 3575 temp = tcg_temp_new_i64(); 3576 t_zn = tcg_temp_new_ptr(); 3577 t_pg = tcg_temp_new_ptr(); 3578 3579 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); 3580 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); 3581 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3582 3583 fn(temp, t_zn, t_pg, status, t_desc); 3584 3585 write_fp_dreg(s, a->rd, temp); 3586 return true; 3587 } 3588 3589 #define DO_VPZ(NAME, name) \ 3590 static gen_helper_fp_reduce * const name##_fns[4] = { \ 3591 NULL, gen_helper_sve_##name##_h, \ 3592 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3593 }; \ 3594 TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz]) 3595 3596 DO_VPZ(FADDV, faddv) 3597 DO_VPZ(FMINNMV, fminnmv) 3598 DO_VPZ(FMAXNMV, fmaxnmv) 3599 DO_VPZ(FMINV, fminv) 3600 DO_VPZ(FMAXV, fmaxv) 3601 3602 #undef DO_VPZ 3603 3604 /* 3605 *** SVE Floating Point Unary Operations - Unpredicated Group 3606 */ 3607 3608 static gen_helper_gvec_2_ptr * const frecpe_fns[] = { 3609 NULL, gen_helper_gvec_frecpe_h, 3610 gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, 3611 }; 3612 TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0) 3613 3614 static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { 3615 NULL, gen_helper_gvec_frsqrte_h, 3616 gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, 3617 }; 3618 TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0) 3619 3620 /* 3621 *** SVE Floating Point Compare with Zero Group 3622 */ 3623 3624 static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a, 3625 gen_helper_gvec_3_ptr *fn) 3626 { 3627 if (fn == NULL) { 3628 return false; 3629 } 3630 if (sve_access_check(s)) { 3631 unsigned vsz = vec_full_reg_size(s); 3632 TCGv_ptr status = 3633 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3634 3635 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd), 3636 vec_full_reg_offset(s, a->rn), 3637 pred_full_reg_offset(s, a->pg), 3638 status, vsz, vsz, 0, fn); 3639 } 3640 return true; 3641 } 3642 3643 #define DO_PPZ(NAME, name) \ 3644 static gen_helper_gvec_3_ptr * const name##_fns[] = { \ 3645 NULL, gen_helper_sve_##name##_h, \ 3646 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3647 }; \ 3648 TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz]) 3649 3650 DO_PPZ(FCMGE_ppz0, fcmge0) 3651 DO_PPZ(FCMGT_ppz0, fcmgt0) 3652 DO_PPZ(FCMLE_ppz0, fcmle0) 3653 DO_PPZ(FCMLT_ppz0, fcmlt0) 3654 DO_PPZ(FCMEQ_ppz0, fcmeq0) 3655 DO_PPZ(FCMNE_ppz0, fcmne0) 3656 3657 #undef DO_PPZ 3658 3659 /* 3660 *** SVE floating-point trig multiply-add coefficient 3661 */ 3662 3663 static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { 3664 NULL, gen_helper_sve_ftmad_h, 3665 gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d, 3666 }; 3667 TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, 3668 ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm, 3669 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3670 3671 /* 3672 *** SVE Floating Point Accumulating Reduction Group 3673 */ 3674 3675 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) 3676 { 3677 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr, 3678 TCGv_ptr, TCGv_ptr, TCGv_i32); 3679 static fadda_fn * const fns[3] = { 3680 gen_helper_sve_fadda_h, 3681 gen_helper_sve_fadda_s, 3682 gen_helper_sve_fadda_d, 3683 }; 3684 unsigned vsz = vec_full_reg_size(s); 3685 TCGv_ptr t_rm, t_pg, t_fpst; 3686 TCGv_i64 t_val; 3687 TCGv_i32 t_desc; 3688 3689 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3690 return false; 3691 } 3692 s->is_nonstreaming = true; 3693 if (!sve_access_check(s)) { 3694 return true; 3695 } 3696 3697 t_val = load_esz(tcg_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz); 3698 t_rm = tcg_temp_new_ptr(); 3699 t_pg = tcg_temp_new_ptr(); 3700 tcg_gen_addi_ptr(t_rm, tcg_env, vec_full_reg_offset(s, a->rm)); 3701 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); 3702 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3703 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 3704 3705 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc); 3706 3707 write_fp_dreg(s, a->rd, t_val); 3708 return true; 3709 } 3710 3711 /* 3712 *** SVE Floating Point Arithmetic - Unpredicated Group 3713 */ 3714 3715 #define DO_FP3(NAME, name) \ 3716 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ 3717 NULL, gen_helper_gvec_##name##_h, \ 3718 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ 3719 }; \ 3720 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0) 3721 3722 DO_FP3(FADD_zzz, fadd) 3723 DO_FP3(FSUB_zzz, fsub) 3724 DO_FP3(FMUL_zzz, fmul) 3725 DO_FP3(FRECPS, recps) 3726 DO_FP3(FRSQRTS, rsqrts) 3727 3728 #undef DO_FP3 3729 3730 static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = { 3731 NULL, gen_helper_gvec_ftsmul_h, 3732 gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d 3733 }; 3734 TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, 3735 ftsmul_fns[a->esz], a, 0) 3736 3737 /* 3738 *** SVE Floating Point Arithmetic - Predicated Group 3739 */ 3740 3741 #define DO_ZPZZ_FP(NAME, FEAT, name) \ 3742 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ 3743 NULL, gen_helper_##name##_h, \ 3744 gen_helper_##name##_s, gen_helper_##name##_d \ 3745 }; \ 3746 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) 3747 3748 DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd) 3749 DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub) 3750 DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul) 3751 DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin) 3752 DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax) 3753 DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) 3754 DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) 3755 DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) 3756 DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn) 3757 DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv) 3758 DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx) 3759 3760 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr, 3761 TCGv_i64, TCGv_ptr, TCGv_i32); 3762 3763 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16, 3764 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn) 3765 { 3766 unsigned vsz = vec_full_reg_size(s); 3767 TCGv_ptr t_zd, t_zn, t_pg, status; 3768 TCGv_i32 desc; 3769 3770 t_zd = tcg_temp_new_ptr(); 3771 t_zn = tcg_temp_new_ptr(); 3772 t_pg = tcg_temp_new_ptr(); 3773 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, zd)); 3774 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, zn)); 3775 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 3776 3777 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 3778 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 3779 fn(t_zd, t_zn, t_pg, scalar, status, desc); 3780 } 3781 3782 static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm, 3783 gen_helper_sve_fp2scalar *fn) 3784 { 3785 if (fn == NULL) { 3786 return false; 3787 } 3788 if (sve_access_check(s)) { 3789 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, 3790 tcg_constant_i64(imm), fn); 3791 } 3792 return true; 3793 } 3794 3795 #define DO_FP_IMM(NAME, name, const0, const1) \ 3796 static gen_helper_sve_fp2scalar * const name##_fns[4] = { \ 3797 NULL, gen_helper_sve_##name##_h, \ 3798 gen_helper_sve_##name##_s, \ 3799 gen_helper_sve_##name##_d \ 3800 }; \ 3801 static uint64_t const name##_const[4][2] = { \ 3802 { -1, -1 }, \ 3803 { float16_##const0, float16_##const1 }, \ 3804 { float32_##const0, float32_##const1 }, \ 3805 { float64_##const0, float64_##const1 }, \ 3806 }; \ 3807 TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ 3808 name##_const[a->esz][a->imm], name##_fns[a->esz]) 3809 3810 DO_FP_IMM(FADD, fadds, half, one) 3811 DO_FP_IMM(FSUB, fsubs, half, one) 3812 DO_FP_IMM(FMUL, fmuls, half, two) 3813 DO_FP_IMM(FSUBR, fsubrs, half, one) 3814 DO_FP_IMM(FMAXNM, fmaxnms, zero, one) 3815 DO_FP_IMM(FMINNM, fminnms, zero, one) 3816 DO_FP_IMM(FMAX, fmaxs, zero, one) 3817 DO_FP_IMM(FMIN, fmins, zero, one) 3818 3819 #undef DO_FP_IMM 3820 3821 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a, 3822 gen_helper_gvec_4_ptr *fn) 3823 { 3824 if (fn == NULL) { 3825 return false; 3826 } 3827 if (sve_access_check(s)) { 3828 unsigned vsz = vec_full_reg_size(s); 3829 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3830 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd), 3831 vec_full_reg_offset(s, a->rn), 3832 vec_full_reg_offset(s, a->rm), 3833 pred_full_reg_offset(s, a->pg), 3834 status, vsz, vsz, 0, fn); 3835 } 3836 return true; 3837 } 3838 3839 #define DO_FPCMP(NAME, name) \ 3840 static gen_helper_gvec_4_ptr * const name##_fns[4] = { \ 3841 NULL, gen_helper_sve_##name##_h, \ 3842 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 3843 }; \ 3844 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz]) 3845 3846 DO_FPCMP(FCMGE, fcmge) 3847 DO_FPCMP(FCMGT, fcmgt) 3848 DO_FPCMP(FCMEQ, fcmeq) 3849 DO_FPCMP(FCMNE, fcmne) 3850 DO_FPCMP(FCMUO, fcmuo) 3851 DO_FPCMP(FACGE, facge) 3852 DO_FPCMP(FACGT, facgt) 3853 3854 #undef DO_FPCMP 3855 3856 static gen_helper_gvec_4_ptr * const fcadd_fns[] = { 3857 NULL, gen_helper_sve_fcadd_h, 3858 gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d, 3859 }; 3860 TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], 3861 a->rd, a->rn, a->rm, a->pg, a->rot, 3862 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3863 3864 #define DO_FMLA(NAME, name) \ 3865 static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ 3866 NULL, gen_helper_sve_##name##_h, \ 3867 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 3868 }; \ 3869 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \ 3870 a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ 3871 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3872 3873 DO_FMLA(FMLA_zpzzz, fmla_zpzzz) 3874 DO_FMLA(FMLS_zpzzz, fmls_zpzzz) 3875 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz) 3876 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz) 3877 3878 #undef DO_FMLA 3879 3880 static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { 3881 NULL, gen_helper_sve_fcmla_zpzzz_h, 3882 gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, 3883 }; 3884 TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], 3885 a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, 3886 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3887 3888 static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { 3889 NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL 3890 }; 3891 TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz], 3892 a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot, 3893 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3894 3895 /* 3896 *** SVE Floating Point Unary Operations Predicated Group 3897 */ 3898 3899 TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 3900 gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR) 3901 TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 3902 gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR) 3903 3904 TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 3905 gen_helper_sve_bfcvt, a, 0, FPST_FPCR) 3906 3907 TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 3908 gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR) 3909 TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 3910 gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR) 3911 TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 3912 gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR) 3913 TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 3914 gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR) 3915 3916 TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 3917 gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16) 3918 TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 3919 gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16) 3920 TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 3921 gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16) 3922 TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 3923 gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16) 3924 TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 3925 gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16) 3926 TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 3927 gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16) 3928 3929 TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 3930 gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR) 3931 TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 3932 gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR) 3933 TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 3934 gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR) 3935 TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 3936 gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR) 3937 TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 3938 gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR) 3939 TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 3940 gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR) 3941 3942 TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 3943 gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR) 3944 TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 3945 gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR) 3946 3947 static gen_helper_gvec_3_ptr * const frint_fns[] = { 3948 NULL, 3949 gen_helper_sve_frint_h, 3950 gen_helper_sve_frint_s, 3951 gen_helper_sve_frint_d 3952 }; 3953 TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz], 3954 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3955 3956 static gen_helper_gvec_3_ptr * const frintx_fns[] = { 3957 NULL, 3958 gen_helper_sve_frintx_h, 3959 gen_helper_sve_frintx_s, 3960 gen_helper_sve_frintx_d 3961 }; 3962 TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz], 3963 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3964 3965 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, 3966 ARMFPRounding mode, gen_helper_gvec_3_ptr *fn) 3967 { 3968 unsigned vsz; 3969 TCGv_i32 tmode; 3970 TCGv_ptr status; 3971 3972 if (fn == NULL) { 3973 return false; 3974 } 3975 if (!sve_access_check(s)) { 3976 return true; 3977 } 3978 3979 vsz = vec_full_reg_size(s); 3980 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3981 tmode = gen_set_rmode(mode, status); 3982 3983 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 3984 vec_full_reg_offset(s, a->rn), 3985 pred_full_reg_offset(s, a->pg), 3986 status, vsz, vsz, 0, fn); 3987 3988 gen_restore_rmode(tmode, status); 3989 return true; 3990 } 3991 3992 TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a, 3993 FPROUNDING_TIEEVEN, frint_fns[a->esz]) 3994 TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a, 3995 FPROUNDING_POSINF, frint_fns[a->esz]) 3996 TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a, 3997 FPROUNDING_NEGINF, frint_fns[a->esz]) 3998 TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a, 3999 FPROUNDING_ZERO, frint_fns[a->esz]) 4000 TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a, 4001 FPROUNDING_TIEAWAY, frint_fns[a->esz]) 4002 4003 static gen_helper_gvec_3_ptr * const frecpx_fns[] = { 4004 NULL, gen_helper_sve_frecpx_h, 4005 gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d, 4006 }; 4007 TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz], 4008 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4009 4010 static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { 4011 NULL, gen_helper_sve_fsqrt_h, 4012 gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d, 4013 }; 4014 TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz], 4015 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4016 4017 TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4018 gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16) 4019 TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4020 gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16) 4021 TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4022 gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16) 4023 4024 TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4025 gen_helper_sve_scvt_ss, a, 0, FPST_FPCR) 4026 TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4027 gen_helper_sve_scvt_ds, a, 0, FPST_FPCR) 4028 4029 TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4030 gen_helper_sve_scvt_sd, a, 0, FPST_FPCR) 4031 TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4032 gen_helper_sve_scvt_dd, a, 0, FPST_FPCR) 4033 4034 TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4035 gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16) 4036 TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4037 gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16) 4038 TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4039 gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16) 4040 4041 TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4042 gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR) 4043 TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4044 gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR) 4045 TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4046 gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR) 4047 4048 TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4049 gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR) 4050 4051 /* 4052 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group 4053 */ 4054 4055 /* Subroutine loading a vector register at VOFS of LEN bytes. 4056 * The load should begin at the address Rn + IMM. 4057 */ 4058 4059 void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, 4060 int len, int rn, int imm) 4061 { 4062 int len_align = QEMU_ALIGN_DOWN(len, 16); 4063 int len_remain = len % 16; 4064 int nparts = len / 16 + ctpop8(len_remain); 4065 int midx = get_mem_index(s); 4066 TCGv_i64 dirty_addr, clean_addr, t0, t1; 4067 TCGv_i128 t16; 4068 4069 dirty_addr = tcg_temp_new_i64(); 4070 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4071 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); 4072 4073 /* 4074 * Note that unpredicated load/store of vector/predicate registers 4075 * are defined as a stream of bytes, which equates to little-endian 4076 * operations on larger quantities. 4077 * Attempt to keep code expansion to a minimum by limiting the 4078 * amount of unrolling done. 4079 */ 4080 if (nparts <= 4) { 4081 int i; 4082 4083 t0 = tcg_temp_new_i64(); 4084 t1 = tcg_temp_new_i64(); 4085 t16 = tcg_temp_new_i128(); 4086 4087 for (i = 0; i < len_align; i += 16) { 4088 tcg_gen_qemu_ld_i128(t16, clean_addr, midx, 4089 MO_LE | MO_128 | MO_ATOM_NONE); 4090 tcg_gen_extr_i128_i64(t0, t1, t16); 4091 tcg_gen_st_i64(t0, base, vofs + i); 4092 tcg_gen_st_i64(t1, base, vofs + i + 8); 4093 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4094 } 4095 } else { 4096 TCGLabel *loop = gen_new_label(); 4097 TCGv_ptr tp, i = tcg_temp_new_ptr(); 4098 4099 tcg_gen_movi_ptr(i, 0); 4100 gen_set_label(loop); 4101 4102 t16 = tcg_temp_new_i128(); 4103 tcg_gen_qemu_ld_i128(t16, clean_addr, midx, 4104 MO_LE | MO_128 | MO_ATOM_NONE); 4105 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4106 4107 tp = tcg_temp_new_ptr(); 4108 tcg_gen_add_ptr(tp, base, i); 4109 tcg_gen_addi_ptr(i, i, 16); 4110 4111 t0 = tcg_temp_new_i64(); 4112 t1 = tcg_temp_new_i64(); 4113 tcg_gen_extr_i128_i64(t0, t1, t16); 4114 4115 tcg_gen_st_i64(t0, tp, vofs); 4116 tcg_gen_st_i64(t1, tp, vofs + 8); 4117 4118 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4119 } 4120 4121 /* 4122 * Predicate register loads can be any multiple of 2. 4123 * Note that we still store the entire 64-bit unit into tcg_env. 4124 */ 4125 if (len_remain >= 8) { 4126 t0 = tcg_temp_new_i64(); 4127 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE); 4128 tcg_gen_st_i64(t0, base, vofs + len_align); 4129 len_remain -= 8; 4130 len_align += 8; 4131 if (len_remain) { 4132 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4133 } 4134 } 4135 if (len_remain) { 4136 t0 = tcg_temp_new_i64(); 4137 switch (len_remain) { 4138 case 2: 4139 case 4: 4140 case 8: 4141 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, 4142 MO_LE | ctz32(len_remain) | MO_ATOM_NONE); 4143 break; 4144 4145 case 6: 4146 t1 = tcg_temp_new_i64(); 4147 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE); 4148 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4149 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); 4150 tcg_gen_deposit_i64(t0, t0, t1, 32, 32); 4151 break; 4152 4153 default: 4154 g_assert_not_reached(); 4155 } 4156 tcg_gen_st_i64(t0, base, vofs + len_align); 4157 } 4158 } 4159 4160 /* Similarly for stores. */ 4161 void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, 4162 int len, int rn, int imm) 4163 { 4164 int len_align = QEMU_ALIGN_DOWN(len, 16); 4165 int len_remain = len % 16; 4166 int nparts = len / 16 + ctpop8(len_remain); 4167 int midx = get_mem_index(s); 4168 TCGv_i64 dirty_addr, clean_addr, t0, t1; 4169 TCGv_i128 t16; 4170 4171 dirty_addr = tcg_temp_new_i64(); 4172 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4173 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); 4174 4175 /* Note that unpredicated load/store of vector/predicate registers 4176 * are defined as a stream of bytes, which equates to little-endian 4177 * operations on larger quantities. There is no nice way to force 4178 * a little-endian store for aarch64_be-linux-user out of line. 4179 * 4180 * Attempt to keep code expansion to a minimum by limiting the 4181 * amount of unrolling done. 4182 */ 4183 if (nparts <= 4) { 4184 int i; 4185 4186 t0 = tcg_temp_new_i64(); 4187 t1 = tcg_temp_new_i64(); 4188 t16 = tcg_temp_new_i128(); 4189 for (i = 0; i < len_align; i += 16) { 4190 tcg_gen_ld_i64(t0, base, vofs + i); 4191 tcg_gen_ld_i64(t1, base, vofs + i + 8); 4192 tcg_gen_concat_i64_i128(t16, t0, t1); 4193 tcg_gen_qemu_st_i128(t16, clean_addr, midx, 4194 MO_LE | MO_128 | MO_ATOM_NONE); 4195 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4196 } 4197 } else { 4198 TCGLabel *loop = gen_new_label(); 4199 TCGv_ptr tp, i = tcg_temp_new_ptr(); 4200 4201 tcg_gen_movi_ptr(i, 0); 4202 gen_set_label(loop); 4203 4204 t0 = tcg_temp_new_i64(); 4205 t1 = tcg_temp_new_i64(); 4206 tp = tcg_temp_new_ptr(); 4207 tcg_gen_add_ptr(tp, base, i); 4208 tcg_gen_ld_i64(t0, tp, vofs); 4209 tcg_gen_ld_i64(t1, tp, vofs + 8); 4210 tcg_gen_addi_ptr(i, i, 16); 4211 4212 t16 = tcg_temp_new_i128(); 4213 tcg_gen_concat_i64_i128(t16, t0, t1); 4214 4215 tcg_gen_qemu_st_i128(t16, clean_addr, midx, 4216 MO_LE | MO_128 | MO_ATOM_NONE); 4217 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4218 4219 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4220 } 4221 4222 /* Predicate register stores can be any multiple of 2. */ 4223 if (len_remain >= 8) { 4224 t0 = tcg_temp_new_i64(); 4225 tcg_gen_ld_i64(t0, base, vofs + len_align); 4226 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE); 4227 len_remain -= 8; 4228 len_align += 8; 4229 if (len_remain) { 4230 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4231 } 4232 } 4233 if (len_remain) { 4234 t0 = tcg_temp_new_i64(); 4235 tcg_gen_ld_i64(t0, base, vofs + len_align); 4236 4237 switch (len_remain) { 4238 case 2: 4239 case 4: 4240 case 8: 4241 tcg_gen_qemu_st_i64(t0, clean_addr, midx, 4242 MO_LE | ctz32(len_remain) | MO_ATOM_NONE); 4243 break; 4244 4245 case 6: 4246 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE); 4247 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4248 tcg_gen_shri_i64(t0, t0, 32); 4249 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); 4250 break; 4251 4252 default: 4253 g_assert_not_reached(); 4254 } 4255 } 4256 } 4257 4258 static bool trans_LDR_zri(DisasContext *s, arg_rri *a) 4259 { 4260 if (!dc_isar_feature(aa64_sve, s)) { 4261 return false; 4262 } 4263 if (sve_access_check(s)) { 4264 int size = vec_full_reg_size(s); 4265 int off = vec_full_reg_offset(s, a->rd); 4266 gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size); 4267 } 4268 return true; 4269 } 4270 4271 static bool trans_LDR_pri(DisasContext *s, arg_rri *a) 4272 { 4273 if (!dc_isar_feature(aa64_sve, s)) { 4274 return false; 4275 } 4276 if (sve_access_check(s)) { 4277 int size = pred_full_reg_size(s); 4278 int off = pred_full_reg_offset(s, a->rd); 4279 gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size); 4280 } 4281 return true; 4282 } 4283 4284 static bool trans_STR_zri(DisasContext *s, arg_rri *a) 4285 { 4286 if (!dc_isar_feature(aa64_sve, s)) { 4287 return false; 4288 } 4289 if (sve_access_check(s)) { 4290 int size = vec_full_reg_size(s); 4291 int off = vec_full_reg_offset(s, a->rd); 4292 gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size); 4293 } 4294 return true; 4295 } 4296 4297 static bool trans_STR_pri(DisasContext *s, arg_rri *a) 4298 { 4299 if (!dc_isar_feature(aa64_sve, s)) { 4300 return false; 4301 } 4302 if (sve_access_check(s)) { 4303 int size = pred_full_reg_size(s); 4304 int off = pred_full_reg_offset(s, a->rd); 4305 gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size); 4306 } 4307 return true; 4308 } 4309 4310 /* 4311 *** SVE Memory - Contiguous Load Group 4312 */ 4313 4314 /* The memory mode of the dtype. */ 4315 static const MemOp dtype_mop[16] = { 4316 MO_UB, MO_UB, MO_UB, MO_UB, 4317 MO_SL, MO_UW, MO_UW, MO_UW, 4318 MO_SW, MO_SW, MO_UL, MO_UL, 4319 MO_SB, MO_SB, MO_SB, MO_UQ 4320 }; 4321 4322 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE) 4323 4324 /* The vector element size of dtype. */ 4325 static const uint8_t dtype_esz[16] = { 4326 0, 1, 2, 3, 4327 3, 1, 2, 3, 4328 3, 2, 2, 3, 4329 3, 2, 1, 3 4330 }; 4331 4332 uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs, 4333 uint32_t msz, bool is_write, uint32_t data) 4334 { 4335 uint32_t sizem1; 4336 uint32_t desc = 0; 4337 4338 /* Assert all of the data fits, with or without MTE enabled. */ 4339 assert(nregs >= 1 && nregs <= 4); 4340 sizem1 = (nregs << msz) - 1; 4341 assert(sizem1 <= R_MTEDESC_SIZEM1_MASK >> R_MTEDESC_SIZEM1_SHIFT); 4342 assert(data < 1u << SVE_MTEDESC_SHIFT); 4343 4344 if (s->mte_active[0]) { 4345 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 4346 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4347 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4348 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 4349 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, sizem1); 4350 desc <<= SVE_MTEDESC_SHIFT; 4351 } 4352 return simd_desc(vsz, vsz, desc | data); 4353 } 4354 4355 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 4356 int dtype, uint32_t nregs, bool is_write, 4357 gen_helper_gvec_mem *fn) 4358 { 4359 TCGv_ptr t_pg; 4360 uint32_t desc; 4361 4362 if (!s->mte_active[0]) { 4363 addr = clean_data_tbi(s, addr); 4364 } 4365 4366 /* 4367 * For e.g. LD4, there are not enough arguments to pass all 4 4368 * registers as pointers, so encode the regno into the data field. 4369 * For consistency, do this even for LD1. 4370 */ 4371 desc = make_svemte_desc(s, vec_full_reg_size(s), nregs, 4372 dtype_msz(dtype), is_write, zt); 4373 t_pg = tcg_temp_new_ptr(); 4374 4375 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 4376 fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); 4377 } 4378 4379 /* Indexed by [mte][be][dtype][nreg] */ 4380 static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = { 4381 { /* mte inactive, little-endian */ 4382 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4383 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4384 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4385 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4386 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4387 4388 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL }, 4389 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r, 4390 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r }, 4391 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL }, 4392 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL }, 4393 4394 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL }, 4395 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL }, 4396 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r, 4397 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r }, 4398 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL }, 4399 4400 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4401 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4402 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4403 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, 4404 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, 4405 4406 /* mte inactive, big-endian */ 4407 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4408 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4409 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4410 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4411 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4412 4413 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL }, 4414 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r, 4415 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r }, 4416 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL }, 4417 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL }, 4418 4419 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL }, 4420 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL }, 4421 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r, 4422 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r }, 4423 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL }, 4424 4425 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4426 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4427 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4428 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, 4429 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } }, 4430 4431 { /* mte active, little-endian */ 4432 { { gen_helper_sve_ld1bb_r_mte, 4433 gen_helper_sve_ld2bb_r_mte, 4434 gen_helper_sve_ld3bb_r_mte, 4435 gen_helper_sve_ld4bb_r_mte }, 4436 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 4437 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 4438 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 4439 4440 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL }, 4441 { gen_helper_sve_ld1hh_le_r_mte, 4442 gen_helper_sve_ld2hh_le_r_mte, 4443 gen_helper_sve_ld3hh_le_r_mte, 4444 gen_helper_sve_ld4hh_le_r_mte }, 4445 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL }, 4446 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL }, 4447 4448 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL }, 4449 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL }, 4450 { gen_helper_sve_ld1ss_le_r_mte, 4451 gen_helper_sve_ld2ss_le_r_mte, 4452 gen_helper_sve_ld3ss_le_r_mte, 4453 gen_helper_sve_ld4ss_le_r_mte }, 4454 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL }, 4455 4456 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 4457 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 4458 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 4459 { gen_helper_sve_ld1dd_le_r_mte, 4460 gen_helper_sve_ld2dd_le_r_mte, 4461 gen_helper_sve_ld3dd_le_r_mte, 4462 gen_helper_sve_ld4dd_le_r_mte } }, 4463 4464 /* mte active, big-endian */ 4465 { { gen_helper_sve_ld1bb_r_mte, 4466 gen_helper_sve_ld2bb_r_mte, 4467 gen_helper_sve_ld3bb_r_mte, 4468 gen_helper_sve_ld4bb_r_mte }, 4469 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 4470 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 4471 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 4472 4473 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL }, 4474 { gen_helper_sve_ld1hh_be_r_mte, 4475 gen_helper_sve_ld2hh_be_r_mte, 4476 gen_helper_sve_ld3hh_be_r_mte, 4477 gen_helper_sve_ld4hh_be_r_mte }, 4478 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL }, 4479 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL }, 4480 4481 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL }, 4482 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL }, 4483 { gen_helper_sve_ld1ss_be_r_mte, 4484 gen_helper_sve_ld2ss_be_r_mte, 4485 gen_helper_sve_ld3ss_be_r_mte, 4486 gen_helper_sve_ld4ss_be_r_mte }, 4487 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL }, 4488 4489 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 4490 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 4491 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 4492 { gen_helper_sve_ld1dd_be_r_mte, 4493 gen_helper_sve_ld2dd_be_r_mte, 4494 gen_helper_sve_ld3dd_be_r_mte, 4495 gen_helper_sve_ld4dd_be_r_mte } } }, 4496 }; 4497 4498 static void do_ld_zpa(DisasContext *s, int zt, int pg, 4499 TCGv_i64 addr, int dtype, int nreg) 4500 { 4501 gen_helper_gvec_mem *fn 4502 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg]; 4503 4504 /* 4505 * While there are holes in the table, they are not 4506 * accessible via the instruction encoding. 4507 */ 4508 assert(fn != NULL); 4509 do_mem_zpa(s, zt, pg, addr, dtype, nreg + 1, false, fn); 4510 } 4511 4512 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) 4513 { 4514 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 4515 return false; 4516 } 4517 if (sve_access_check(s)) { 4518 TCGv_i64 addr = tcg_temp_new_i64(); 4519 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4520 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4521 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 4522 } 4523 return true; 4524 } 4525 4526 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a) 4527 { 4528 if (!dc_isar_feature(aa64_sve, s)) { 4529 return false; 4530 } 4531 if (sve_access_check(s)) { 4532 int vsz = vec_full_reg_size(s); 4533 int elements = vsz >> dtype_esz[a->dtype]; 4534 TCGv_i64 addr = tcg_temp_new_i64(); 4535 4536 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 4537 (a->imm * elements * (a->nreg + 1)) 4538 << dtype_msz(a->dtype)); 4539 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 4540 } 4541 return true; 4542 } 4543 4544 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a) 4545 { 4546 static gen_helper_gvec_mem * const fns[2][2][16] = { 4547 { /* mte inactive, little-endian */ 4548 { gen_helper_sve_ldff1bb_r, 4549 gen_helper_sve_ldff1bhu_r, 4550 gen_helper_sve_ldff1bsu_r, 4551 gen_helper_sve_ldff1bdu_r, 4552 4553 gen_helper_sve_ldff1sds_le_r, 4554 gen_helper_sve_ldff1hh_le_r, 4555 gen_helper_sve_ldff1hsu_le_r, 4556 gen_helper_sve_ldff1hdu_le_r, 4557 4558 gen_helper_sve_ldff1hds_le_r, 4559 gen_helper_sve_ldff1hss_le_r, 4560 gen_helper_sve_ldff1ss_le_r, 4561 gen_helper_sve_ldff1sdu_le_r, 4562 4563 gen_helper_sve_ldff1bds_r, 4564 gen_helper_sve_ldff1bss_r, 4565 gen_helper_sve_ldff1bhs_r, 4566 gen_helper_sve_ldff1dd_le_r }, 4567 4568 /* mte inactive, big-endian */ 4569 { gen_helper_sve_ldff1bb_r, 4570 gen_helper_sve_ldff1bhu_r, 4571 gen_helper_sve_ldff1bsu_r, 4572 gen_helper_sve_ldff1bdu_r, 4573 4574 gen_helper_sve_ldff1sds_be_r, 4575 gen_helper_sve_ldff1hh_be_r, 4576 gen_helper_sve_ldff1hsu_be_r, 4577 gen_helper_sve_ldff1hdu_be_r, 4578 4579 gen_helper_sve_ldff1hds_be_r, 4580 gen_helper_sve_ldff1hss_be_r, 4581 gen_helper_sve_ldff1ss_be_r, 4582 gen_helper_sve_ldff1sdu_be_r, 4583 4584 gen_helper_sve_ldff1bds_r, 4585 gen_helper_sve_ldff1bss_r, 4586 gen_helper_sve_ldff1bhs_r, 4587 gen_helper_sve_ldff1dd_be_r } }, 4588 4589 { /* mte active, little-endian */ 4590 { gen_helper_sve_ldff1bb_r_mte, 4591 gen_helper_sve_ldff1bhu_r_mte, 4592 gen_helper_sve_ldff1bsu_r_mte, 4593 gen_helper_sve_ldff1bdu_r_mte, 4594 4595 gen_helper_sve_ldff1sds_le_r_mte, 4596 gen_helper_sve_ldff1hh_le_r_mte, 4597 gen_helper_sve_ldff1hsu_le_r_mte, 4598 gen_helper_sve_ldff1hdu_le_r_mte, 4599 4600 gen_helper_sve_ldff1hds_le_r_mte, 4601 gen_helper_sve_ldff1hss_le_r_mte, 4602 gen_helper_sve_ldff1ss_le_r_mte, 4603 gen_helper_sve_ldff1sdu_le_r_mte, 4604 4605 gen_helper_sve_ldff1bds_r_mte, 4606 gen_helper_sve_ldff1bss_r_mte, 4607 gen_helper_sve_ldff1bhs_r_mte, 4608 gen_helper_sve_ldff1dd_le_r_mte }, 4609 4610 /* mte active, big-endian */ 4611 { gen_helper_sve_ldff1bb_r_mte, 4612 gen_helper_sve_ldff1bhu_r_mte, 4613 gen_helper_sve_ldff1bsu_r_mte, 4614 gen_helper_sve_ldff1bdu_r_mte, 4615 4616 gen_helper_sve_ldff1sds_be_r_mte, 4617 gen_helper_sve_ldff1hh_be_r_mte, 4618 gen_helper_sve_ldff1hsu_be_r_mte, 4619 gen_helper_sve_ldff1hdu_be_r_mte, 4620 4621 gen_helper_sve_ldff1hds_be_r_mte, 4622 gen_helper_sve_ldff1hss_be_r_mte, 4623 gen_helper_sve_ldff1ss_be_r_mte, 4624 gen_helper_sve_ldff1sdu_be_r_mte, 4625 4626 gen_helper_sve_ldff1bds_r_mte, 4627 gen_helper_sve_ldff1bss_r_mte, 4628 gen_helper_sve_ldff1bhs_r_mte, 4629 gen_helper_sve_ldff1dd_be_r_mte } }, 4630 }; 4631 4632 if (!dc_isar_feature(aa64_sve, s)) { 4633 return false; 4634 } 4635 s->is_nonstreaming = true; 4636 if (sve_access_check(s)) { 4637 TCGv_i64 addr = tcg_temp_new_i64(); 4638 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4639 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4640 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 4641 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 4642 } 4643 return true; 4644 } 4645 4646 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) 4647 { 4648 static gen_helper_gvec_mem * const fns[2][2][16] = { 4649 { /* mte inactive, little-endian */ 4650 { gen_helper_sve_ldnf1bb_r, 4651 gen_helper_sve_ldnf1bhu_r, 4652 gen_helper_sve_ldnf1bsu_r, 4653 gen_helper_sve_ldnf1bdu_r, 4654 4655 gen_helper_sve_ldnf1sds_le_r, 4656 gen_helper_sve_ldnf1hh_le_r, 4657 gen_helper_sve_ldnf1hsu_le_r, 4658 gen_helper_sve_ldnf1hdu_le_r, 4659 4660 gen_helper_sve_ldnf1hds_le_r, 4661 gen_helper_sve_ldnf1hss_le_r, 4662 gen_helper_sve_ldnf1ss_le_r, 4663 gen_helper_sve_ldnf1sdu_le_r, 4664 4665 gen_helper_sve_ldnf1bds_r, 4666 gen_helper_sve_ldnf1bss_r, 4667 gen_helper_sve_ldnf1bhs_r, 4668 gen_helper_sve_ldnf1dd_le_r }, 4669 4670 /* mte inactive, big-endian */ 4671 { gen_helper_sve_ldnf1bb_r, 4672 gen_helper_sve_ldnf1bhu_r, 4673 gen_helper_sve_ldnf1bsu_r, 4674 gen_helper_sve_ldnf1bdu_r, 4675 4676 gen_helper_sve_ldnf1sds_be_r, 4677 gen_helper_sve_ldnf1hh_be_r, 4678 gen_helper_sve_ldnf1hsu_be_r, 4679 gen_helper_sve_ldnf1hdu_be_r, 4680 4681 gen_helper_sve_ldnf1hds_be_r, 4682 gen_helper_sve_ldnf1hss_be_r, 4683 gen_helper_sve_ldnf1ss_be_r, 4684 gen_helper_sve_ldnf1sdu_be_r, 4685 4686 gen_helper_sve_ldnf1bds_r, 4687 gen_helper_sve_ldnf1bss_r, 4688 gen_helper_sve_ldnf1bhs_r, 4689 gen_helper_sve_ldnf1dd_be_r } }, 4690 4691 { /* mte inactive, little-endian */ 4692 { gen_helper_sve_ldnf1bb_r_mte, 4693 gen_helper_sve_ldnf1bhu_r_mte, 4694 gen_helper_sve_ldnf1bsu_r_mte, 4695 gen_helper_sve_ldnf1bdu_r_mte, 4696 4697 gen_helper_sve_ldnf1sds_le_r_mte, 4698 gen_helper_sve_ldnf1hh_le_r_mte, 4699 gen_helper_sve_ldnf1hsu_le_r_mte, 4700 gen_helper_sve_ldnf1hdu_le_r_mte, 4701 4702 gen_helper_sve_ldnf1hds_le_r_mte, 4703 gen_helper_sve_ldnf1hss_le_r_mte, 4704 gen_helper_sve_ldnf1ss_le_r_mte, 4705 gen_helper_sve_ldnf1sdu_le_r_mte, 4706 4707 gen_helper_sve_ldnf1bds_r_mte, 4708 gen_helper_sve_ldnf1bss_r_mte, 4709 gen_helper_sve_ldnf1bhs_r_mte, 4710 gen_helper_sve_ldnf1dd_le_r_mte }, 4711 4712 /* mte inactive, big-endian */ 4713 { gen_helper_sve_ldnf1bb_r_mte, 4714 gen_helper_sve_ldnf1bhu_r_mte, 4715 gen_helper_sve_ldnf1bsu_r_mte, 4716 gen_helper_sve_ldnf1bdu_r_mte, 4717 4718 gen_helper_sve_ldnf1sds_be_r_mte, 4719 gen_helper_sve_ldnf1hh_be_r_mte, 4720 gen_helper_sve_ldnf1hsu_be_r_mte, 4721 gen_helper_sve_ldnf1hdu_be_r_mte, 4722 4723 gen_helper_sve_ldnf1hds_be_r_mte, 4724 gen_helper_sve_ldnf1hss_be_r_mte, 4725 gen_helper_sve_ldnf1ss_be_r_mte, 4726 gen_helper_sve_ldnf1sdu_be_r_mte, 4727 4728 gen_helper_sve_ldnf1bds_r_mte, 4729 gen_helper_sve_ldnf1bss_r_mte, 4730 gen_helper_sve_ldnf1bhs_r_mte, 4731 gen_helper_sve_ldnf1dd_be_r_mte } }, 4732 }; 4733 4734 if (!dc_isar_feature(aa64_sve, s)) { 4735 return false; 4736 } 4737 s->is_nonstreaming = true; 4738 if (sve_access_check(s)) { 4739 int vsz = vec_full_reg_size(s); 4740 int elements = vsz >> dtype_esz[a->dtype]; 4741 int off = (a->imm * elements) << dtype_msz(a->dtype); 4742 TCGv_i64 addr = tcg_temp_new_i64(); 4743 4744 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off); 4745 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 4746 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 4747 } 4748 return true; 4749 } 4750 4751 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 4752 { 4753 unsigned vsz = vec_full_reg_size(s); 4754 TCGv_ptr t_pg; 4755 int poff; 4756 uint32_t desc; 4757 4758 /* Load the first quadword using the normal predicated load helpers. */ 4759 if (!s->mte_active[0]) { 4760 addr = clean_data_tbi(s, addr); 4761 } 4762 4763 poff = pred_full_reg_offset(s, pg); 4764 if (vsz > 16) { 4765 /* 4766 * Zero-extend the first 16 bits of the predicate into a temporary. 4767 * This avoids triggering an assert making sure we don't have bits 4768 * set within a predicate beyond VQ, but we have lowered VQ to 1 4769 * for this load operation. 4770 */ 4771 TCGv_i64 tmp = tcg_temp_new_i64(); 4772 #if HOST_BIG_ENDIAN 4773 poff += 6; 4774 #endif 4775 tcg_gen_ld16u_i64(tmp, tcg_env, poff); 4776 4777 poff = offsetof(CPUARMState, vfp.preg_tmp); 4778 tcg_gen_st_i64(tmp, tcg_env, poff); 4779 } 4780 4781 t_pg = tcg_temp_new_ptr(); 4782 tcg_gen_addi_ptr(t_pg, tcg_env, poff); 4783 4784 gen_helper_gvec_mem *fn 4785 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 4786 desc = make_svemte_desc(s, 16, 1, dtype_msz(dtype), false, zt); 4787 fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); 4788 4789 /* Replicate that first quadword. */ 4790 if (vsz > 16) { 4791 int doff = vec_full_reg_offset(s, zt); 4792 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16); 4793 } 4794 } 4795 4796 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a) 4797 { 4798 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 4799 return false; 4800 } 4801 if (sve_access_check(s)) { 4802 int msz = dtype_msz(a->dtype); 4803 TCGv_i64 addr = tcg_temp_new_i64(); 4804 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz); 4805 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4806 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 4807 } 4808 return true; 4809 } 4810 4811 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a) 4812 { 4813 if (!dc_isar_feature(aa64_sve, s)) { 4814 return false; 4815 } 4816 if (sve_access_check(s)) { 4817 TCGv_i64 addr = tcg_temp_new_i64(); 4818 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16); 4819 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 4820 } 4821 return true; 4822 } 4823 4824 static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 4825 { 4826 unsigned vsz = vec_full_reg_size(s); 4827 unsigned vsz_r32; 4828 TCGv_ptr t_pg; 4829 int poff, doff; 4830 uint32_t desc; 4831 4832 if (vsz < 32) { 4833 /* 4834 * Note that this UNDEFINED check comes after CheckSVEEnabled() 4835 * in the ARM pseudocode, which is the sve_access_check() done 4836 * in our caller. We should not now return false from the caller. 4837 */ 4838 unallocated_encoding(s); 4839 return; 4840 } 4841 4842 /* Load the first octaword using the normal predicated load helpers. */ 4843 if (!s->mte_active[0]) { 4844 addr = clean_data_tbi(s, addr); 4845 } 4846 4847 poff = pred_full_reg_offset(s, pg); 4848 if (vsz > 32) { 4849 /* 4850 * Zero-extend the first 32 bits of the predicate into a temporary. 4851 * This avoids triggering an assert making sure we don't have bits 4852 * set within a predicate beyond VQ, but we have lowered VQ to 2 4853 * for this load operation. 4854 */ 4855 TCGv_i64 tmp = tcg_temp_new_i64(); 4856 #if HOST_BIG_ENDIAN 4857 poff += 4; 4858 #endif 4859 tcg_gen_ld32u_i64(tmp, tcg_env, poff); 4860 4861 poff = offsetof(CPUARMState, vfp.preg_tmp); 4862 tcg_gen_st_i64(tmp, tcg_env, poff); 4863 } 4864 4865 t_pg = tcg_temp_new_ptr(); 4866 tcg_gen_addi_ptr(t_pg, tcg_env, poff); 4867 4868 gen_helper_gvec_mem *fn 4869 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 4870 desc = make_svemte_desc(s, 32, 1, dtype_msz(dtype), false, zt); 4871 fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); 4872 4873 /* 4874 * Replicate that first octaword. 4875 * The replication happens in units of 32; if the full vector size 4876 * is not a multiple of 32, the final bits are zeroed. 4877 */ 4878 doff = vec_full_reg_offset(s, zt); 4879 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32); 4880 if (vsz >= 64) { 4881 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32); 4882 } 4883 vsz -= vsz_r32; 4884 if (vsz) { 4885 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0); 4886 } 4887 } 4888 4889 static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a) 4890 { 4891 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 4892 return false; 4893 } 4894 if (a->rm == 31) { 4895 return false; 4896 } 4897 s->is_nonstreaming = true; 4898 if (sve_access_check(s)) { 4899 TCGv_i64 addr = tcg_temp_new_i64(); 4900 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4901 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4902 do_ldro(s, a->rd, a->pg, addr, a->dtype); 4903 } 4904 return true; 4905 } 4906 4907 static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a) 4908 { 4909 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 4910 return false; 4911 } 4912 s->is_nonstreaming = true; 4913 if (sve_access_check(s)) { 4914 TCGv_i64 addr = tcg_temp_new_i64(); 4915 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32); 4916 do_ldro(s, a->rd, a->pg, addr, a->dtype); 4917 } 4918 return true; 4919 } 4920 4921 /* Load and broadcast element. */ 4922 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) 4923 { 4924 unsigned vsz = vec_full_reg_size(s); 4925 unsigned psz = pred_full_reg_size(s); 4926 unsigned esz = dtype_esz[a->dtype]; 4927 unsigned msz = dtype_msz(a->dtype); 4928 TCGLabel *over; 4929 TCGv_i64 temp, clean_addr; 4930 MemOp memop; 4931 4932 if (!dc_isar_feature(aa64_sve, s)) { 4933 return false; 4934 } 4935 if (!sve_access_check(s)) { 4936 return true; 4937 } 4938 4939 over = gen_new_label(); 4940 4941 /* If the guarding predicate has no bits set, no load occurs. */ 4942 if (psz <= 8) { 4943 /* Reduce the pred_esz_masks value simply to reduce the 4944 * size of the code generated here. 4945 */ 4946 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8); 4947 temp = tcg_temp_new_i64(); 4948 tcg_gen_ld_i64(temp, tcg_env, pred_full_reg_offset(s, a->pg)); 4949 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask); 4950 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over); 4951 } else { 4952 TCGv_i32 t32 = tcg_temp_new_i32(); 4953 find_last_active(s, t32, esz, a->pg); 4954 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over); 4955 } 4956 4957 /* Load the data. */ 4958 temp = tcg_temp_new_i64(); 4959 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz); 4960 4961 memop = finalize_memop(s, dtype_mop[a->dtype]); 4962 clean_addr = gen_mte_check1(s, temp, false, true, memop); 4963 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), memop); 4964 4965 /* Broadcast to *all* elements. */ 4966 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 4967 vsz, vsz, temp); 4968 4969 /* Zero the inactive elements. */ 4970 gen_set_label(over); 4971 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false); 4972 } 4973 4974 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 4975 int msz, int esz, int nreg) 4976 { 4977 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = { 4978 { { { gen_helper_sve_st1bb_r, 4979 gen_helper_sve_st1bh_r, 4980 gen_helper_sve_st1bs_r, 4981 gen_helper_sve_st1bd_r }, 4982 { NULL, 4983 gen_helper_sve_st1hh_le_r, 4984 gen_helper_sve_st1hs_le_r, 4985 gen_helper_sve_st1hd_le_r }, 4986 { NULL, NULL, 4987 gen_helper_sve_st1ss_le_r, 4988 gen_helper_sve_st1sd_le_r }, 4989 { NULL, NULL, NULL, 4990 gen_helper_sve_st1dd_le_r } }, 4991 { { gen_helper_sve_st1bb_r, 4992 gen_helper_sve_st1bh_r, 4993 gen_helper_sve_st1bs_r, 4994 gen_helper_sve_st1bd_r }, 4995 { NULL, 4996 gen_helper_sve_st1hh_be_r, 4997 gen_helper_sve_st1hs_be_r, 4998 gen_helper_sve_st1hd_be_r }, 4999 { NULL, NULL, 5000 gen_helper_sve_st1ss_be_r, 5001 gen_helper_sve_st1sd_be_r }, 5002 { NULL, NULL, NULL, 5003 gen_helper_sve_st1dd_be_r } } }, 5004 5005 { { { gen_helper_sve_st1bb_r_mte, 5006 gen_helper_sve_st1bh_r_mte, 5007 gen_helper_sve_st1bs_r_mte, 5008 gen_helper_sve_st1bd_r_mte }, 5009 { NULL, 5010 gen_helper_sve_st1hh_le_r_mte, 5011 gen_helper_sve_st1hs_le_r_mte, 5012 gen_helper_sve_st1hd_le_r_mte }, 5013 { NULL, NULL, 5014 gen_helper_sve_st1ss_le_r_mte, 5015 gen_helper_sve_st1sd_le_r_mte }, 5016 { NULL, NULL, NULL, 5017 gen_helper_sve_st1dd_le_r_mte } }, 5018 { { gen_helper_sve_st1bb_r_mte, 5019 gen_helper_sve_st1bh_r_mte, 5020 gen_helper_sve_st1bs_r_mte, 5021 gen_helper_sve_st1bd_r_mte }, 5022 { NULL, 5023 gen_helper_sve_st1hh_be_r_mte, 5024 gen_helper_sve_st1hs_be_r_mte, 5025 gen_helper_sve_st1hd_be_r_mte }, 5026 { NULL, NULL, 5027 gen_helper_sve_st1ss_be_r_mte, 5028 gen_helper_sve_st1sd_be_r_mte }, 5029 { NULL, NULL, NULL, 5030 gen_helper_sve_st1dd_be_r_mte } } }, 5031 }; 5032 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = { 5033 { { { gen_helper_sve_st2bb_r, 5034 gen_helper_sve_st2hh_le_r, 5035 gen_helper_sve_st2ss_le_r, 5036 gen_helper_sve_st2dd_le_r }, 5037 { gen_helper_sve_st3bb_r, 5038 gen_helper_sve_st3hh_le_r, 5039 gen_helper_sve_st3ss_le_r, 5040 gen_helper_sve_st3dd_le_r }, 5041 { gen_helper_sve_st4bb_r, 5042 gen_helper_sve_st4hh_le_r, 5043 gen_helper_sve_st4ss_le_r, 5044 gen_helper_sve_st4dd_le_r } }, 5045 { { gen_helper_sve_st2bb_r, 5046 gen_helper_sve_st2hh_be_r, 5047 gen_helper_sve_st2ss_be_r, 5048 gen_helper_sve_st2dd_be_r }, 5049 { gen_helper_sve_st3bb_r, 5050 gen_helper_sve_st3hh_be_r, 5051 gen_helper_sve_st3ss_be_r, 5052 gen_helper_sve_st3dd_be_r }, 5053 { gen_helper_sve_st4bb_r, 5054 gen_helper_sve_st4hh_be_r, 5055 gen_helper_sve_st4ss_be_r, 5056 gen_helper_sve_st4dd_be_r } } }, 5057 { { { gen_helper_sve_st2bb_r_mte, 5058 gen_helper_sve_st2hh_le_r_mte, 5059 gen_helper_sve_st2ss_le_r_mte, 5060 gen_helper_sve_st2dd_le_r_mte }, 5061 { gen_helper_sve_st3bb_r_mte, 5062 gen_helper_sve_st3hh_le_r_mte, 5063 gen_helper_sve_st3ss_le_r_mte, 5064 gen_helper_sve_st3dd_le_r_mte }, 5065 { gen_helper_sve_st4bb_r_mte, 5066 gen_helper_sve_st4hh_le_r_mte, 5067 gen_helper_sve_st4ss_le_r_mte, 5068 gen_helper_sve_st4dd_le_r_mte } }, 5069 { { gen_helper_sve_st2bb_r_mte, 5070 gen_helper_sve_st2hh_be_r_mte, 5071 gen_helper_sve_st2ss_be_r_mte, 5072 gen_helper_sve_st2dd_be_r_mte }, 5073 { gen_helper_sve_st3bb_r_mte, 5074 gen_helper_sve_st3hh_be_r_mte, 5075 gen_helper_sve_st3ss_be_r_mte, 5076 gen_helper_sve_st3dd_be_r_mte }, 5077 { gen_helper_sve_st4bb_r_mte, 5078 gen_helper_sve_st4hh_be_r_mte, 5079 gen_helper_sve_st4ss_be_r_mte, 5080 gen_helper_sve_st4dd_be_r_mte } } }, 5081 }; 5082 gen_helper_gvec_mem *fn; 5083 int be = s->be_data == MO_BE; 5084 5085 if (nreg == 0) { 5086 /* ST1 */ 5087 fn = fn_single[s->mte_active[0]][be][msz][esz]; 5088 } else { 5089 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */ 5090 assert(msz == esz); 5091 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz]; 5092 } 5093 assert(fn != NULL); 5094 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg + 1, true, fn); 5095 } 5096 5097 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) 5098 { 5099 if (!dc_isar_feature(aa64_sve, s)) { 5100 return false; 5101 } 5102 if (a->rm == 31 || a->msz > a->esz) { 5103 return false; 5104 } 5105 if (sve_access_check(s)) { 5106 TCGv_i64 addr = tcg_temp_new_i64(); 5107 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz); 5108 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5109 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5110 } 5111 return true; 5112 } 5113 5114 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a) 5115 { 5116 if (!dc_isar_feature(aa64_sve, s)) { 5117 return false; 5118 } 5119 if (a->msz > a->esz) { 5120 return false; 5121 } 5122 if (sve_access_check(s)) { 5123 int vsz = vec_full_reg_size(s); 5124 int elements = vsz >> a->esz; 5125 TCGv_i64 addr = tcg_temp_new_i64(); 5126 5127 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 5128 (a->imm * elements * (a->nreg + 1)) << a->msz); 5129 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5130 } 5131 return true; 5132 } 5133 5134 /* 5135 *** SVE gather loads / scatter stores 5136 */ 5137 5138 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, 5139 int scale, TCGv_i64 scalar, int msz, bool is_write, 5140 gen_helper_gvec_mem_scatter *fn) 5141 { 5142 TCGv_ptr t_zm = tcg_temp_new_ptr(); 5143 TCGv_ptr t_pg = tcg_temp_new_ptr(); 5144 TCGv_ptr t_zt = tcg_temp_new_ptr(); 5145 uint32_t desc; 5146 5147 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 5148 tcg_gen_addi_ptr(t_zm, tcg_env, vec_full_reg_offset(s, zm)); 5149 tcg_gen_addi_ptr(t_zt, tcg_env, vec_full_reg_offset(s, zt)); 5150 5151 desc = make_svemte_desc(s, vec_full_reg_size(s), 1, msz, is_write, scale); 5152 fn(tcg_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc)); 5153 } 5154 5155 /* Indexed by [mte][be][ff][xs][u][msz]. */ 5156 static gen_helper_gvec_mem_scatter * const 5157 gather_load_fn32[2][2][2][2][2][3] = { 5158 { /* MTE Inactive */ 5159 { /* Little-endian */ 5160 { { { gen_helper_sve_ldbss_zsu, 5161 gen_helper_sve_ldhss_le_zsu, 5162 NULL, }, 5163 { gen_helper_sve_ldbsu_zsu, 5164 gen_helper_sve_ldhsu_le_zsu, 5165 gen_helper_sve_ldss_le_zsu, } }, 5166 { { gen_helper_sve_ldbss_zss, 5167 gen_helper_sve_ldhss_le_zss, 5168 NULL, }, 5169 { gen_helper_sve_ldbsu_zss, 5170 gen_helper_sve_ldhsu_le_zss, 5171 gen_helper_sve_ldss_le_zss, } } }, 5172 5173 /* First-fault */ 5174 { { { gen_helper_sve_ldffbss_zsu, 5175 gen_helper_sve_ldffhss_le_zsu, 5176 NULL, }, 5177 { gen_helper_sve_ldffbsu_zsu, 5178 gen_helper_sve_ldffhsu_le_zsu, 5179 gen_helper_sve_ldffss_le_zsu, } }, 5180 { { gen_helper_sve_ldffbss_zss, 5181 gen_helper_sve_ldffhss_le_zss, 5182 NULL, }, 5183 { gen_helper_sve_ldffbsu_zss, 5184 gen_helper_sve_ldffhsu_le_zss, 5185 gen_helper_sve_ldffss_le_zss, } } } }, 5186 5187 { /* Big-endian */ 5188 { { { gen_helper_sve_ldbss_zsu, 5189 gen_helper_sve_ldhss_be_zsu, 5190 NULL, }, 5191 { gen_helper_sve_ldbsu_zsu, 5192 gen_helper_sve_ldhsu_be_zsu, 5193 gen_helper_sve_ldss_be_zsu, } }, 5194 { { gen_helper_sve_ldbss_zss, 5195 gen_helper_sve_ldhss_be_zss, 5196 NULL, }, 5197 { gen_helper_sve_ldbsu_zss, 5198 gen_helper_sve_ldhsu_be_zss, 5199 gen_helper_sve_ldss_be_zss, } } }, 5200 5201 /* First-fault */ 5202 { { { gen_helper_sve_ldffbss_zsu, 5203 gen_helper_sve_ldffhss_be_zsu, 5204 NULL, }, 5205 { gen_helper_sve_ldffbsu_zsu, 5206 gen_helper_sve_ldffhsu_be_zsu, 5207 gen_helper_sve_ldffss_be_zsu, } }, 5208 { { gen_helper_sve_ldffbss_zss, 5209 gen_helper_sve_ldffhss_be_zss, 5210 NULL, }, 5211 { gen_helper_sve_ldffbsu_zss, 5212 gen_helper_sve_ldffhsu_be_zss, 5213 gen_helper_sve_ldffss_be_zss, } } } } }, 5214 { /* MTE Active */ 5215 { /* Little-endian */ 5216 { { { gen_helper_sve_ldbss_zsu_mte, 5217 gen_helper_sve_ldhss_le_zsu_mte, 5218 NULL, }, 5219 { gen_helper_sve_ldbsu_zsu_mte, 5220 gen_helper_sve_ldhsu_le_zsu_mte, 5221 gen_helper_sve_ldss_le_zsu_mte, } }, 5222 { { gen_helper_sve_ldbss_zss_mte, 5223 gen_helper_sve_ldhss_le_zss_mte, 5224 NULL, }, 5225 { gen_helper_sve_ldbsu_zss_mte, 5226 gen_helper_sve_ldhsu_le_zss_mte, 5227 gen_helper_sve_ldss_le_zss_mte, } } }, 5228 5229 /* First-fault */ 5230 { { { gen_helper_sve_ldffbss_zsu_mte, 5231 gen_helper_sve_ldffhss_le_zsu_mte, 5232 NULL, }, 5233 { gen_helper_sve_ldffbsu_zsu_mte, 5234 gen_helper_sve_ldffhsu_le_zsu_mte, 5235 gen_helper_sve_ldffss_le_zsu_mte, } }, 5236 { { gen_helper_sve_ldffbss_zss_mte, 5237 gen_helper_sve_ldffhss_le_zss_mte, 5238 NULL, }, 5239 { gen_helper_sve_ldffbsu_zss_mte, 5240 gen_helper_sve_ldffhsu_le_zss_mte, 5241 gen_helper_sve_ldffss_le_zss_mte, } } } }, 5242 5243 { /* Big-endian */ 5244 { { { gen_helper_sve_ldbss_zsu_mte, 5245 gen_helper_sve_ldhss_be_zsu_mte, 5246 NULL, }, 5247 { gen_helper_sve_ldbsu_zsu_mte, 5248 gen_helper_sve_ldhsu_be_zsu_mte, 5249 gen_helper_sve_ldss_be_zsu_mte, } }, 5250 { { gen_helper_sve_ldbss_zss_mte, 5251 gen_helper_sve_ldhss_be_zss_mte, 5252 NULL, }, 5253 { gen_helper_sve_ldbsu_zss_mte, 5254 gen_helper_sve_ldhsu_be_zss_mte, 5255 gen_helper_sve_ldss_be_zss_mte, } } }, 5256 5257 /* First-fault */ 5258 { { { gen_helper_sve_ldffbss_zsu_mte, 5259 gen_helper_sve_ldffhss_be_zsu_mte, 5260 NULL, }, 5261 { gen_helper_sve_ldffbsu_zsu_mte, 5262 gen_helper_sve_ldffhsu_be_zsu_mte, 5263 gen_helper_sve_ldffss_be_zsu_mte, } }, 5264 { { gen_helper_sve_ldffbss_zss_mte, 5265 gen_helper_sve_ldffhss_be_zss_mte, 5266 NULL, }, 5267 { gen_helper_sve_ldffbsu_zss_mte, 5268 gen_helper_sve_ldffhsu_be_zss_mte, 5269 gen_helper_sve_ldffss_be_zss_mte, } } } } }, 5270 }; 5271 5272 /* Note that we overload xs=2 to indicate 64-bit offset. */ 5273 static gen_helper_gvec_mem_scatter * const 5274 gather_load_fn64[2][2][2][3][2][4] = { 5275 { /* MTE Inactive */ 5276 { /* Little-endian */ 5277 { { { gen_helper_sve_ldbds_zsu, 5278 gen_helper_sve_ldhds_le_zsu, 5279 gen_helper_sve_ldsds_le_zsu, 5280 NULL, }, 5281 { gen_helper_sve_ldbdu_zsu, 5282 gen_helper_sve_ldhdu_le_zsu, 5283 gen_helper_sve_ldsdu_le_zsu, 5284 gen_helper_sve_lddd_le_zsu, } }, 5285 { { gen_helper_sve_ldbds_zss, 5286 gen_helper_sve_ldhds_le_zss, 5287 gen_helper_sve_ldsds_le_zss, 5288 NULL, }, 5289 { gen_helper_sve_ldbdu_zss, 5290 gen_helper_sve_ldhdu_le_zss, 5291 gen_helper_sve_ldsdu_le_zss, 5292 gen_helper_sve_lddd_le_zss, } }, 5293 { { gen_helper_sve_ldbds_zd, 5294 gen_helper_sve_ldhds_le_zd, 5295 gen_helper_sve_ldsds_le_zd, 5296 NULL, }, 5297 { gen_helper_sve_ldbdu_zd, 5298 gen_helper_sve_ldhdu_le_zd, 5299 gen_helper_sve_ldsdu_le_zd, 5300 gen_helper_sve_lddd_le_zd, } } }, 5301 5302 /* First-fault */ 5303 { { { gen_helper_sve_ldffbds_zsu, 5304 gen_helper_sve_ldffhds_le_zsu, 5305 gen_helper_sve_ldffsds_le_zsu, 5306 NULL, }, 5307 { gen_helper_sve_ldffbdu_zsu, 5308 gen_helper_sve_ldffhdu_le_zsu, 5309 gen_helper_sve_ldffsdu_le_zsu, 5310 gen_helper_sve_ldffdd_le_zsu, } }, 5311 { { gen_helper_sve_ldffbds_zss, 5312 gen_helper_sve_ldffhds_le_zss, 5313 gen_helper_sve_ldffsds_le_zss, 5314 NULL, }, 5315 { gen_helper_sve_ldffbdu_zss, 5316 gen_helper_sve_ldffhdu_le_zss, 5317 gen_helper_sve_ldffsdu_le_zss, 5318 gen_helper_sve_ldffdd_le_zss, } }, 5319 { { gen_helper_sve_ldffbds_zd, 5320 gen_helper_sve_ldffhds_le_zd, 5321 gen_helper_sve_ldffsds_le_zd, 5322 NULL, }, 5323 { gen_helper_sve_ldffbdu_zd, 5324 gen_helper_sve_ldffhdu_le_zd, 5325 gen_helper_sve_ldffsdu_le_zd, 5326 gen_helper_sve_ldffdd_le_zd, } } } }, 5327 { /* Big-endian */ 5328 { { { gen_helper_sve_ldbds_zsu, 5329 gen_helper_sve_ldhds_be_zsu, 5330 gen_helper_sve_ldsds_be_zsu, 5331 NULL, }, 5332 { gen_helper_sve_ldbdu_zsu, 5333 gen_helper_sve_ldhdu_be_zsu, 5334 gen_helper_sve_ldsdu_be_zsu, 5335 gen_helper_sve_lddd_be_zsu, } }, 5336 { { gen_helper_sve_ldbds_zss, 5337 gen_helper_sve_ldhds_be_zss, 5338 gen_helper_sve_ldsds_be_zss, 5339 NULL, }, 5340 { gen_helper_sve_ldbdu_zss, 5341 gen_helper_sve_ldhdu_be_zss, 5342 gen_helper_sve_ldsdu_be_zss, 5343 gen_helper_sve_lddd_be_zss, } }, 5344 { { gen_helper_sve_ldbds_zd, 5345 gen_helper_sve_ldhds_be_zd, 5346 gen_helper_sve_ldsds_be_zd, 5347 NULL, }, 5348 { gen_helper_sve_ldbdu_zd, 5349 gen_helper_sve_ldhdu_be_zd, 5350 gen_helper_sve_ldsdu_be_zd, 5351 gen_helper_sve_lddd_be_zd, } } }, 5352 5353 /* First-fault */ 5354 { { { gen_helper_sve_ldffbds_zsu, 5355 gen_helper_sve_ldffhds_be_zsu, 5356 gen_helper_sve_ldffsds_be_zsu, 5357 NULL, }, 5358 { gen_helper_sve_ldffbdu_zsu, 5359 gen_helper_sve_ldffhdu_be_zsu, 5360 gen_helper_sve_ldffsdu_be_zsu, 5361 gen_helper_sve_ldffdd_be_zsu, } }, 5362 { { gen_helper_sve_ldffbds_zss, 5363 gen_helper_sve_ldffhds_be_zss, 5364 gen_helper_sve_ldffsds_be_zss, 5365 NULL, }, 5366 { gen_helper_sve_ldffbdu_zss, 5367 gen_helper_sve_ldffhdu_be_zss, 5368 gen_helper_sve_ldffsdu_be_zss, 5369 gen_helper_sve_ldffdd_be_zss, } }, 5370 { { gen_helper_sve_ldffbds_zd, 5371 gen_helper_sve_ldffhds_be_zd, 5372 gen_helper_sve_ldffsds_be_zd, 5373 NULL, }, 5374 { gen_helper_sve_ldffbdu_zd, 5375 gen_helper_sve_ldffhdu_be_zd, 5376 gen_helper_sve_ldffsdu_be_zd, 5377 gen_helper_sve_ldffdd_be_zd, } } } } }, 5378 { /* MTE Active */ 5379 { /* Little-endian */ 5380 { { { gen_helper_sve_ldbds_zsu_mte, 5381 gen_helper_sve_ldhds_le_zsu_mte, 5382 gen_helper_sve_ldsds_le_zsu_mte, 5383 NULL, }, 5384 { gen_helper_sve_ldbdu_zsu_mte, 5385 gen_helper_sve_ldhdu_le_zsu_mte, 5386 gen_helper_sve_ldsdu_le_zsu_mte, 5387 gen_helper_sve_lddd_le_zsu_mte, } }, 5388 { { gen_helper_sve_ldbds_zss_mte, 5389 gen_helper_sve_ldhds_le_zss_mte, 5390 gen_helper_sve_ldsds_le_zss_mte, 5391 NULL, }, 5392 { gen_helper_sve_ldbdu_zss_mte, 5393 gen_helper_sve_ldhdu_le_zss_mte, 5394 gen_helper_sve_ldsdu_le_zss_mte, 5395 gen_helper_sve_lddd_le_zss_mte, } }, 5396 { { gen_helper_sve_ldbds_zd_mte, 5397 gen_helper_sve_ldhds_le_zd_mte, 5398 gen_helper_sve_ldsds_le_zd_mte, 5399 NULL, }, 5400 { gen_helper_sve_ldbdu_zd_mte, 5401 gen_helper_sve_ldhdu_le_zd_mte, 5402 gen_helper_sve_ldsdu_le_zd_mte, 5403 gen_helper_sve_lddd_le_zd_mte, } } }, 5404 5405 /* First-fault */ 5406 { { { gen_helper_sve_ldffbds_zsu_mte, 5407 gen_helper_sve_ldffhds_le_zsu_mte, 5408 gen_helper_sve_ldffsds_le_zsu_mte, 5409 NULL, }, 5410 { gen_helper_sve_ldffbdu_zsu_mte, 5411 gen_helper_sve_ldffhdu_le_zsu_mte, 5412 gen_helper_sve_ldffsdu_le_zsu_mte, 5413 gen_helper_sve_ldffdd_le_zsu_mte, } }, 5414 { { gen_helper_sve_ldffbds_zss_mte, 5415 gen_helper_sve_ldffhds_le_zss_mte, 5416 gen_helper_sve_ldffsds_le_zss_mte, 5417 NULL, }, 5418 { gen_helper_sve_ldffbdu_zss_mte, 5419 gen_helper_sve_ldffhdu_le_zss_mte, 5420 gen_helper_sve_ldffsdu_le_zss_mte, 5421 gen_helper_sve_ldffdd_le_zss_mte, } }, 5422 { { gen_helper_sve_ldffbds_zd_mte, 5423 gen_helper_sve_ldffhds_le_zd_mte, 5424 gen_helper_sve_ldffsds_le_zd_mte, 5425 NULL, }, 5426 { gen_helper_sve_ldffbdu_zd_mte, 5427 gen_helper_sve_ldffhdu_le_zd_mte, 5428 gen_helper_sve_ldffsdu_le_zd_mte, 5429 gen_helper_sve_ldffdd_le_zd_mte, } } } }, 5430 { /* Big-endian */ 5431 { { { gen_helper_sve_ldbds_zsu_mte, 5432 gen_helper_sve_ldhds_be_zsu_mte, 5433 gen_helper_sve_ldsds_be_zsu_mte, 5434 NULL, }, 5435 { gen_helper_sve_ldbdu_zsu_mte, 5436 gen_helper_sve_ldhdu_be_zsu_mte, 5437 gen_helper_sve_ldsdu_be_zsu_mte, 5438 gen_helper_sve_lddd_be_zsu_mte, } }, 5439 { { gen_helper_sve_ldbds_zss_mte, 5440 gen_helper_sve_ldhds_be_zss_mte, 5441 gen_helper_sve_ldsds_be_zss_mte, 5442 NULL, }, 5443 { gen_helper_sve_ldbdu_zss_mte, 5444 gen_helper_sve_ldhdu_be_zss_mte, 5445 gen_helper_sve_ldsdu_be_zss_mte, 5446 gen_helper_sve_lddd_be_zss_mte, } }, 5447 { { gen_helper_sve_ldbds_zd_mte, 5448 gen_helper_sve_ldhds_be_zd_mte, 5449 gen_helper_sve_ldsds_be_zd_mte, 5450 NULL, }, 5451 { gen_helper_sve_ldbdu_zd_mte, 5452 gen_helper_sve_ldhdu_be_zd_mte, 5453 gen_helper_sve_ldsdu_be_zd_mte, 5454 gen_helper_sve_lddd_be_zd_mte, } } }, 5455 5456 /* First-fault */ 5457 { { { gen_helper_sve_ldffbds_zsu_mte, 5458 gen_helper_sve_ldffhds_be_zsu_mte, 5459 gen_helper_sve_ldffsds_be_zsu_mte, 5460 NULL, }, 5461 { gen_helper_sve_ldffbdu_zsu_mte, 5462 gen_helper_sve_ldffhdu_be_zsu_mte, 5463 gen_helper_sve_ldffsdu_be_zsu_mte, 5464 gen_helper_sve_ldffdd_be_zsu_mte, } }, 5465 { { gen_helper_sve_ldffbds_zss_mte, 5466 gen_helper_sve_ldffhds_be_zss_mte, 5467 gen_helper_sve_ldffsds_be_zss_mte, 5468 NULL, }, 5469 { gen_helper_sve_ldffbdu_zss_mte, 5470 gen_helper_sve_ldffhdu_be_zss_mte, 5471 gen_helper_sve_ldffsdu_be_zss_mte, 5472 gen_helper_sve_ldffdd_be_zss_mte, } }, 5473 { { gen_helper_sve_ldffbds_zd_mte, 5474 gen_helper_sve_ldffhds_be_zd_mte, 5475 gen_helper_sve_ldffsds_be_zd_mte, 5476 NULL, }, 5477 { gen_helper_sve_ldffbdu_zd_mte, 5478 gen_helper_sve_ldffhdu_be_zd_mte, 5479 gen_helper_sve_ldffsdu_be_zd_mte, 5480 gen_helper_sve_ldffdd_be_zd_mte, } } } } }, 5481 }; 5482 5483 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) 5484 { 5485 gen_helper_gvec_mem_scatter *fn = NULL; 5486 bool be = s->be_data == MO_BE; 5487 bool mte = s->mte_active[0]; 5488 5489 if (!dc_isar_feature(aa64_sve, s)) { 5490 return false; 5491 } 5492 s->is_nonstreaming = true; 5493 if (!sve_access_check(s)) { 5494 return true; 5495 } 5496 5497 switch (a->esz) { 5498 case MO_32: 5499 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz]; 5500 break; 5501 case MO_64: 5502 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz]; 5503 break; 5504 } 5505 assert(fn != NULL); 5506 5507 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 5508 cpu_reg_sp(s, a->rn), a->msz, false, fn); 5509 return true; 5510 } 5511 5512 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) 5513 { 5514 gen_helper_gvec_mem_scatter *fn = NULL; 5515 bool be = s->be_data == MO_BE; 5516 bool mte = s->mte_active[0]; 5517 5518 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) { 5519 return false; 5520 } 5521 if (!dc_isar_feature(aa64_sve, s)) { 5522 return false; 5523 } 5524 s->is_nonstreaming = true; 5525 if (!sve_access_check(s)) { 5526 return true; 5527 } 5528 5529 switch (a->esz) { 5530 case MO_32: 5531 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz]; 5532 break; 5533 case MO_64: 5534 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz]; 5535 break; 5536 } 5537 assert(fn != NULL); 5538 5539 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x]) 5540 * by loading the immediate into the scalar parameter. 5541 */ 5542 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5543 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn); 5544 return true; 5545 } 5546 5547 static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a) 5548 { 5549 gen_helper_gvec_mem_scatter *fn = NULL; 5550 bool be = s->be_data == MO_BE; 5551 bool mte = s->mte_active[0]; 5552 5553 if (a->esz < a->msz + !a->u) { 5554 return false; 5555 } 5556 if (!dc_isar_feature(aa64_sve2, s)) { 5557 return false; 5558 } 5559 s->is_nonstreaming = true; 5560 if (!sve_access_check(s)) { 5561 return true; 5562 } 5563 5564 switch (a->esz) { 5565 case MO_32: 5566 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz]; 5567 break; 5568 case MO_64: 5569 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz]; 5570 break; 5571 } 5572 assert(fn != NULL); 5573 5574 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5575 cpu_reg(s, a->rm), a->msz, false, fn); 5576 return true; 5577 } 5578 5579 /* Indexed by [mte][be][xs][msz]. */ 5580 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = { 5581 { /* MTE Inactive */ 5582 { /* Little-endian */ 5583 { gen_helper_sve_stbs_zsu, 5584 gen_helper_sve_sths_le_zsu, 5585 gen_helper_sve_stss_le_zsu, }, 5586 { gen_helper_sve_stbs_zss, 5587 gen_helper_sve_sths_le_zss, 5588 gen_helper_sve_stss_le_zss, } }, 5589 { /* Big-endian */ 5590 { gen_helper_sve_stbs_zsu, 5591 gen_helper_sve_sths_be_zsu, 5592 gen_helper_sve_stss_be_zsu, }, 5593 { gen_helper_sve_stbs_zss, 5594 gen_helper_sve_sths_be_zss, 5595 gen_helper_sve_stss_be_zss, } } }, 5596 { /* MTE Active */ 5597 { /* Little-endian */ 5598 { gen_helper_sve_stbs_zsu_mte, 5599 gen_helper_sve_sths_le_zsu_mte, 5600 gen_helper_sve_stss_le_zsu_mte, }, 5601 { gen_helper_sve_stbs_zss_mte, 5602 gen_helper_sve_sths_le_zss_mte, 5603 gen_helper_sve_stss_le_zss_mte, } }, 5604 { /* Big-endian */ 5605 { gen_helper_sve_stbs_zsu_mte, 5606 gen_helper_sve_sths_be_zsu_mte, 5607 gen_helper_sve_stss_be_zsu_mte, }, 5608 { gen_helper_sve_stbs_zss_mte, 5609 gen_helper_sve_sths_be_zss_mte, 5610 gen_helper_sve_stss_be_zss_mte, } } }, 5611 }; 5612 5613 /* Note that we overload xs=2 to indicate 64-bit offset. */ 5614 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = { 5615 { /* MTE Inactive */ 5616 { /* Little-endian */ 5617 { gen_helper_sve_stbd_zsu, 5618 gen_helper_sve_sthd_le_zsu, 5619 gen_helper_sve_stsd_le_zsu, 5620 gen_helper_sve_stdd_le_zsu, }, 5621 { gen_helper_sve_stbd_zss, 5622 gen_helper_sve_sthd_le_zss, 5623 gen_helper_sve_stsd_le_zss, 5624 gen_helper_sve_stdd_le_zss, }, 5625 { gen_helper_sve_stbd_zd, 5626 gen_helper_sve_sthd_le_zd, 5627 gen_helper_sve_stsd_le_zd, 5628 gen_helper_sve_stdd_le_zd, } }, 5629 { /* Big-endian */ 5630 { gen_helper_sve_stbd_zsu, 5631 gen_helper_sve_sthd_be_zsu, 5632 gen_helper_sve_stsd_be_zsu, 5633 gen_helper_sve_stdd_be_zsu, }, 5634 { gen_helper_sve_stbd_zss, 5635 gen_helper_sve_sthd_be_zss, 5636 gen_helper_sve_stsd_be_zss, 5637 gen_helper_sve_stdd_be_zss, }, 5638 { gen_helper_sve_stbd_zd, 5639 gen_helper_sve_sthd_be_zd, 5640 gen_helper_sve_stsd_be_zd, 5641 gen_helper_sve_stdd_be_zd, } } }, 5642 { /* MTE Inactive */ 5643 { /* Little-endian */ 5644 { gen_helper_sve_stbd_zsu_mte, 5645 gen_helper_sve_sthd_le_zsu_mte, 5646 gen_helper_sve_stsd_le_zsu_mte, 5647 gen_helper_sve_stdd_le_zsu_mte, }, 5648 { gen_helper_sve_stbd_zss_mte, 5649 gen_helper_sve_sthd_le_zss_mte, 5650 gen_helper_sve_stsd_le_zss_mte, 5651 gen_helper_sve_stdd_le_zss_mte, }, 5652 { gen_helper_sve_stbd_zd_mte, 5653 gen_helper_sve_sthd_le_zd_mte, 5654 gen_helper_sve_stsd_le_zd_mte, 5655 gen_helper_sve_stdd_le_zd_mte, } }, 5656 { /* Big-endian */ 5657 { gen_helper_sve_stbd_zsu_mte, 5658 gen_helper_sve_sthd_be_zsu_mte, 5659 gen_helper_sve_stsd_be_zsu_mte, 5660 gen_helper_sve_stdd_be_zsu_mte, }, 5661 { gen_helper_sve_stbd_zss_mte, 5662 gen_helper_sve_sthd_be_zss_mte, 5663 gen_helper_sve_stsd_be_zss_mte, 5664 gen_helper_sve_stdd_be_zss_mte, }, 5665 { gen_helper_sve_stbd_zd_mte, 5666 gen_helper_sve_sthd_be_zd_mte, 5667 gen_helper_sve_stsd_be_zd_mte, 5668 gen_helper_sve_stdd_be_zd_mte, } } }, 5669 }; 5670 5671 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) 5672 { 5673 gen_helper_gvec_mem_scatter *fn; 5674 bool be = s->be_data == MO_BE; 5675 bool mte = s->mte_active[0]; 5676 5677 if (a->esz < a->msz || (a->msz == 0 && a->scale)) { 5678 return false; 5679 } 5680 if (!dc_isar_feature(aa64_sve, s)) { 5681 return false; 5682 } 5683 s->is_nonstreaming = true; 5684 if (!sve_access_check(s)) { 5685 return true; 5686 } 5687 switch (a->esz) { 5688 case MO_32: 5689 fn = scatter_store_fn32[mte][be][a->xs][a->msz]; 5690 break; 5691 case MO_64: 5692 fn = scatter_store_fn64[mte][be][a->xs][a->msz]; 5693 break; 5694 default: 5695 g_assert_not_reached(); 5696 } 5697 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 5698 cpu_reg_sp(s, a->rn), a->msz, true, fn); 5699 return true; 5700 } 5701 5702 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) 5703 { 5704 gen_helper_gvec_mem_scatter *fn = NULL; 5705 bool be = s->be_data == MO_BE; 5706 bool mte = s->mte_active[0]; 5707 5708 if (a->esz < a->msz) { 5709 return false; 5710 } 5711 if (!dc_isar_feature(aa64_sve, s)) { 5712 return false; 5713 } 5714 s->is_nonstreaming = true; 5715 if (!sve_access_check(s)) { 5716 return true; 5717 } 5718 5719 switch (a->esz) { 5720 case MO_32: 5721 fn = scatter_store_fn32[mte][be][0][a->msz]; 5722 break; 5723 case MO_64: 5724 fn = scatter_store_fn64[mte][be][2][a->msz]; 5725 break; 5726 } 5727 assert(fn != NULL); 5728 5729 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x]) 5730 * by loading the immediate into the scalar parameter. 5731 */ 5732 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5733 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn); 5734 return true; 5735 } 5736 5737 static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a) 5738 { 5739 gen_helper_gvec_mem_scatter *fn; 5740 bool be = s->be_data == MO_BE; 5741 bool mte = s->mte_active[0]; 5742 5743 if (a->esz < a->msz) { 5744 return false; 5745 } 5746 if (!dc_isar_feature(aa64_sve2, s)) { 5747 return false; 5748 } 5749 s->is_nonstreaming = true; 5750 if (!sve_access_check(s)) { 5751 return true; 5752 } 5753 5754 switch (a->esz) { 5755 case MO_32: 5756 fn = scatter_store_fn32[mte][be][0][a->msz]; 5757 break; 5758 case MO_64: 5759 fn = scatter_store_fn64[mte][be][2][a->msz]; 5760 break; 5761 default: 5762 g_assert_not_reached(); 5763 } 5764 5765 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5766 cpu_reg(s, a->rm), a->msz, true, fn); 5767 return true; 5768 } 5769 5770 /* 5771 * Prefetches 5772 */ 5773 5774 static bool trans_PRF(DisasContext *s, arg_PRF *a) 5775 { 5776 if (!dc_isar_feature(aa64_sve, s)) { 5777 return false; 5778 } 5779 /* Prefetch is a nop within QEMU. */ 5780 (void)sve_access_check(s); 5781 return true; 5782 } 5783 5784 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a) 5785 { 5786 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 5787 return false; 5788 } 5789 /* Prefetch is a nop within QEMU. */ 5790 (void)sve_access_check(s); 5791 return true; 5792 } 5793 5794 static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a) 5795 { 5796 if (!dc_isar_feature(aa64_sve, s)) { 5797 return false; 5798 } 5799 /* Prefetch is a nop within QEMU. */ 5800 s->is_nonstreaming = true; 5801 (void)sve_access_check(s); 5802 return true; 5803 } 5804 5805 /* 5806 * Move Prefix 5807 * 5808 * TODO: The implementation so far could handle predicated merging movprfx. 5809 * The helper functions as written take an extra source register to 5810 * use in the operation, but the result is only written when predication 5811 * succeeds. For unpredicated movprfx, we need to rearrange the helpers 5812 * to allow the final write back to the destination to be unconditional. 5813 * For predicated zeroing movprfx, we need to rearrange the helpers to 5814 * allow the final write back to zero inactives. 5815 * 5816 * In the meantime, just emit the moves. 5817 */ 5818 5819 TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn) 5820 TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz) 5821 TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false) 5822 5823 /* 5824 * SVE2 Integer Multiply - Unpredicated 5825 */ 5826 5827 TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a) 5828 5829 static gen_helper_gvec_3 * const smulh_zzz_fns[4] = { 5830 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h, 5831 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d, 5832 }; 5833 TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5834 smulh_zzz_fns[a->esz], a, 0) 5835 5836 static gen_helper_gvec_3 * const umulh_zzz_fns[4] = { 5837 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h, 5838 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d, 5839 }; 5840 TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5841 umulh_zzz_fns[a->esz], a, 0) 5842 5843 TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5844 gen_helper_gvec_pmul_b, a, 0) 5845 5846 static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = { 5847 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h, 5848 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d, 5849 }; 5850 TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5851 sqdmulh_zzz_fns[a->esz], a, 0) 5852 5853 static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = { 5854 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h, 5855 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d, 5856 }; 5857 TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5858 sqrdmulh_zzz_fns[a->esz], a, 0) 5859 5860 /* 5861 * SVE2 Integer - Predicated 5862 */ 5863 5864 static gen_helper_gvec_4 * const sadlp_fns[4] = { 5865 NULL, gen_helper_sve2_sadalp_zpzz_h, 5866 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d, 5867 }; 5868 TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 5869 sadlp_fns[a->esz], a, 0) 5870 5871 static gen_helper_gvec_4 * const uadlp_fns[4] = { 5872 NULL, gen_helper_sve2_uadalp_zpzz_h, 5873 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d, 5874 }; 5875 TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 5876 uadlp_fns[a->esz], a, 0) 5877 5878 /* 5879 * SVE2 integer unary operations (predicated) 5880 */ 5881 5882 TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz, 5883 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0) 5884 5885 TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz, 5886 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0) 5887 5888 static gen_helper_gvec_3 * const sqabs_fns[4] = { 5889 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h, 5890 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d, 5891 }; 5892 TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0) 5893 5894 static gen_helper_gvec_3 * const sqneg_fns[4] = { 5895 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h, 5896 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d, 5897 }; 5898 TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0) 5899 5900 DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl) 5901 DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl) 5902 DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl) 5903 5904 DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl) 5905 DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl) 5906 DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl) 5907 5908 DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd) 5909 DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd) 5910 DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub) 5911 5912 DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd) 5913 DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd) 5914 DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub) 5915 5916 DO_ZPZZ(ADDP, aa64_sve2, sve2_addp) 5917 DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp) 5918 DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp) 5919 DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp) 5920 DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp) 5921 5922 DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd) 5923 DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd) 5924 DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub) 5925 DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub) 5926 DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd) 5927 DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd) 5928 5929 /* 5930 * SVE2 Widening Integer Arithmetic 5931 */ 5932 5933 static gen_helper_gvec_3 * const saddl_fns[4] = { 5934 NULL, gen_helper_sve2_saddl_h, 5935 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d, 5936 }; 5937 TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 5938 saddl_fns[a->esz], a, 0) 5939 TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 5940 saddl_fns[a->esz], a, 3) 5941 TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 5942 saddl_fns[a->esz], a, 2) 5943 5944 static gen_helper_gvec_3 * const ssubl_fns[4] = { 5945 NULL, gen_helper_sve2_ssubl_h, 5946 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d, 5947 }; 5948 TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 5949 ssubl_fns[a->esz], a, 0) 5950 TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 5951 ssubl_fns[a->esz], a, 3) 5952 TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 5953 ssubl_fns[a->esz], a, 2) 5954 TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz, 5955 ssubl_fns[a->esz], a, 1) 5956 5957 static gen_helper_gvec_3 * const sabdl_fns[4] = { 5958 NULL, gen_helper_sve2_sabdl_h, 5959 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d, 5960 }; 5961 TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 5962 sabdl_fns[a->esz], a, 0) 5963 TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 5964 sabdl_fns[a->esz], a, 3) 5965 5966 static gen_helper_gvec_3 * const uaddl_fns[4] = { 5967 NULL, gen_helper_sve2_uaddl_h, 5968 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d, 5969 }; 5970 TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 5971 uaddl_fns[a->esz], a, 0) 5972 TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 5973 uaddl_fns[a->esz], a, 3) 5974 5975 static gen_helper_gvec_3 * const usubl_fns[4] = { 5976 NULL, gen_helper_sve2_usubl_h, 5977 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d, 5978 }; 5979 TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 5980 usubl_fns[a->esz], a, 0) 5981 TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 5982 usubl_fns[a->esz], a, 3) 5983 5984 static gen_helper_gvec_3 * const uabdl_fns[4] = { 5985 NULL, gen_helper_sve2_uabdl_h, 5986 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d, 5987 }; 5988 TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 5989 uabdl_fns[a->esz], a, 0) 5990 TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 5991 uabdl_fns[a->esz], a, 3) 5992 5993 static gen_helper_gvec_3 * const sqdmull_fns[4] = { 5994 NULL, gen_helper_sve2_sqdmull_zzz_h, 5995 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d, 5996 }; 5997 TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5998 sqdmull_fns[a->esz], a, 0) 5999 TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6000 sqdmull_fns[a->esz], a, 3) 6001 6002 static gen_helper_gvec_3 * const smull_fns[4] = { 6003 NULL, gen_helper_sve2_smull_zzz_h, 6004 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d, 6005 }; 6006 TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6007 smull_fns[a->esz], a, 0) 6008 TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6009 smull_fns[a->esz], a, 3) 6010 6011 static gen_helper_gvec_3 * const umull_fns[4] = { 6012 NULL, gen_helper_sve2_umull_zzz_h, 6013 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d, 6014 }; 6015 TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6016 umull_fns[a->esz], a, 0) 6017 TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6018 umull_fns[a->esz], a, 3) 6019 6020 static gen_helper_gvec_3 * const eoril_fns[4] = { 6021 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h, 6022 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d, 6023 }; 6024 TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2) 6025 TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1) 6026 6027 static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel) 6028 { 6029 static gen_helper_gvec_3 * const fns[4] = { 6030 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h, 6031 NULL, gen_helper_sve2_pmull_d, 6032 }; 6033 6034 if (a->esz == 0) { 6035 if (!dc_isar_feature(aa64_sve2_pmull128, s)) { 6036 return false; 6037 } 6038 s->is_nonstreaming = true; 6039 } else if (!dc_isar_feature(aa64_sve, s)) { 6040 return false; 6041 } 6042 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel); 6043 } 6044 6045 TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false) 6046 TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true) 6047 6048 static gen_helper_gvec_3 * const saddw_fns[4] = { 6049 NULL, gen_helper_sve2_saddw_h, 6050 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d, 6051 }; 6052 TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0) 6053 TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1) 6054 6055 static gen_helper_gvec_3 * const ssubw_fns[4] = { 6056 NULL, gen_helper_sve2_ssubw_h, 6057 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d, 6058 }; 6059 TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0) 6060 TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1) 6061 6062 static gen_helper_gvec_3 * const uaddw_fns[4] = { 6063 NULL, gen_helper_sve2_uaddw_h, 6064 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d, 6065 }; 6066 TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0) 6067 TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1) 6068 6069 static gen_helper_gvec_3 * const usubw_fns[4] = { 6070 NULL, gen_helper_sve2_usubw_h, 6071 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d, 6072 }; 6073 TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0) 6074 TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1) 6075 6076 static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6077 { 6078 int top = imm & 1; 6079 int shl = imm >> 1; 6080 int halfbits = 4 << vece; 6081 6082 if (top) { 6083 if (shl == halfbits) { 6084 TCGv_vec t = tcg_temp_new_vec_matching(d); 6085 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); 6086 tcg_gen_and_vec(vece, d, n, t); 6087 } else { 6088 tcg_gen_sari_vec(vece, d, n, halfbits); 6089 tcg_gen_shli_vec(vece, d, d, shl); 6090 } 6091 } else { 6092 tcg_gen_shli_vec(vece, d, n, halfbits); 6093 tcg_gen_sari_vec(vece, d, d, halfbits - shl); 6094 } 6095 } 6096 6097 static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm) 6098 { 6099 int halfbits = 4 << vece; 6100 int top = imm & 1; 6101 int shl = (imm >> 1); 6102 int shift; 6103 uint64_t mask; 6104 6105 mask = MAKE_64BIT_MASK(0, halfbits); 6106 mask <<= shl; 6107 mask = dup_const(vece, mask); 6108 6109 shift = shl - top * halfbits; 6110 if (shift < 0) { 6111 tcg_gen_shri_i64(d, n, -shift); 6112 } else { 6113 tcg_gen_shli_i64(d, n, shift); 6114 } 6115 tcg_gen_andi_i64(d, d, mask); 6116 } 6117 6118 static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6119 { 6120 gen_ushll_i64(MO_16, d, n, imm); 6121 } 6122 6123 static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6124 { 6125 gen_ushll_i64(MO_32, d, n, imm); 6126 } 6127 6128 static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6129 { 6130 gen_ushll_i64(MO_64, d, n, imm); 6131 } 6132 6133 static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6134 { 6135 int halfbits = 4 << vece; 6136 int top = imm & 1; 6137 int shl = imm >> 1; 6138 6139 if (top) { 6140 if (shl == halfbits) { 6141 TCGv_vec t = tcg_temp_new_vec_matching(d); 6142 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); 6143 tcg_gen_and_vec(vece, d, n, t); 6144 } else { 6145 tcg_gen_shri_vec(vece, d, n, halfbits); 6146 tcg_gen_shli_vec(vece, d, d, shl); 6147 } 6148 } else { 6149 if (shl == 0) { 6150 TCGv_vec t = tcg_temp_new_vec_matching(d); 6151 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6152 tcg_gen_and_vec(vece, d, n, t); 6153 } else { 6154 tcg_gen_shli_vec(vece, d, n, halfbits); 6155 tcg_gen_shri_vec(vece, d, d, halfbits - shl); 6156 } 6157 } 6158 } 6159 6160 static bool do_shll_tb(DisasContext *s, arg_rri_esz *a, 6161 const GVecGen2i ops[3], bool sel) 6162 { 6163 6164 if (a->esz < 0 || a->esz > 2) { 6165 return false; 6166 } 6167 if (sve_access_check(s)) { 6168 unsigned vsz = vec_full_reg_size(s); 6169 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 6170 vec_full_reg_offset(s, a->rn), 6171 vsz, vsz, (a->imm << 1) | sel, 6172 &ops[a->esz]); 6173 } 6174 return true; 6175 } 6176 6177 static const TCGOpcode sshll_list[] = { 6178 INDEX_op_shli_vec, INDEX_op_sari_vec, 0 6179 }; 6180 static const GVecGen2i sshll_ops[3] = { 6181 { .fniv = gen_sshll_vec, 6182 .opt_opc = sshll_list, 6183 .fno = gen_helper_sve2_sshll_h, 6184 .vece = MO_16 }, 6185 { .fniv = gen_sshll_vec, 6186 .opt_opc = sshll_list, 6187 .fno = gen_helper_sve2_sshll_s, 6188 .vece = MO_32 }, 6189 { .fniv = gen_sshll_vec, 6190 .opt_opc = sshll_list, 6191 .fno = gen_helper_sve2_sshll_d, 6192 .vece = MO_64 } 6193 }; 6194 TRANS_FEAT(SSHLLB, aa64_sve2, do_shll_tb, a, sshll_ops, false) 6195 TRANS_FEAT(SSHLLT, aa64_sve2, do_shll_tb, a, sshll_ops, true) 6196 6197 static const TCGOpcode ushll_list[] = { 6198 INDEX_op_shli_vec, INDEX_op_shri_vec, 0 6199 }; 6200 static const GVecGen2i ushll_ops[3] = { 6201 { .fni8 = gen_ushll16_i64, 6202 .fniv = gen_ushll_vec, 6203 .opt_opc = ushll_list, 6204 .fno = gen_helper_sve2_ushll_h, 6205 .vece = MO_16 }, 6206 { .fni8 = gen_ushll32_i64, 6207 .fniv = gen_ushll_vec, 6208 .opt_opc = ushll_list, 6209 .fno = gen_helper_sve2_ushll_s, 6210 .vece = MO_32 }, 6211 { .fni8 = gen_ushll64_i64, 6212 .fniv = gen_ushll_vec, 6213 .opt_opc = ushll_list, 6214 .fno = gen_helper_sve2_ushll_d, 6215 .vece = MO_64 }, 6216 }; 6217 TRANS_FEAT(USHLLB, aa64_sve2, do_shll_tb, a, ushll_ops, false) 6218 TRANS_FEAT(USHLLT, aa64_sve2, do_shll_tb, a, ushll_ops, true) 6219 6220 static gen_helper_gvec_3 * const bext_fns[4] = { 6221 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h, 6222 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d, 6223 }; 6224 TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6225 bext_fns[a->esz], a, 0) 6226 6227 static gen_helper_gvec_3 * const bdep_fns[4] = { 6228 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h, 6229 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d, 6230 }; 6231 TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6232 bdep_fns[a->esz], a, 0) 6233 6234 static gen_helper_gvec_3 * const bgrp_fns[4] = { 6235 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h, 6236 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d, 6237 }; 6238 TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6239 bgrp_fns[a->esz], a, 0) 6240 6241 static gen_helper_gvec_3 * const cadd_fns[4] = { 6242 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h, 6243 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d, 6244 }; 6245 TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6246 cadd_fns[a->esz], a, 0) 6247 TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6248 cadd_fns[a->esz], a, 1) 6249 6250 static gen_helper_gvec_3 * const sqcadd_fns[4] = { 6251 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h, 6252 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d, 6253 }; 6254 TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6255 sqcadd_fns[a->esz], a, 0) 6256 TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6257 sqcadd_fns[a->esz], a, 1) 6258 6259 static gen_helper_gvec_4 * const sabal_fns[4] = { 6260 NULL, gen_helper_sve2_sabal_h, 6261 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d, 6262 }; 6263 TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0) 6264 TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1) 6265 6266 static gen_helper_gvec_4 * const uabal_fns[4] = { 6267 NULL, gen_helper_sve2_uabal_h, 6268 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d, 6269 }; 6270 TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0) 6271 TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1) 6272 6273 static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel) 6274 { 6275 static gen_helper_gvec_4 * const fns[2] = { 6276 gen_helper_sve2_adcl_s, 6277 gen_helper_sve2_adcl_d, 6278 }; 6279 /* 6280 * Note that in this case the ESZ field encodes both size and sign. 6281 * Split out 'subtract' into bit 1 of the data field for the helper. 6282 */ 6283 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel); 6284 } 6285 6286 TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false) 6287 TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true) 6288 6289 TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a) 6290 TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a) 6291 TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a) 6292 TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a) 6293 TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a) 6294 TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a) 6295 6296 TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a) 6297 TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a) 6298 6299 static bool do_narrow_extract(DisasContext *s, arg_rri_esz *a, 6300 const GVecGen2 ops[3]) 6301 { 6302 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0) { 6303 return false; 6304 } 6305 if (sve_access_check(s)) { 6306 unsigned vsz = vec_full_reg_size(s); 6307 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd), 6308 vec_full_reg_offset(s, a->rn), 6309 vsz, vsz, &ops[a->esz]); 6310 } 6311 return true; 6312 } 6313 6314 static const TCGOpcode sqxtn_list[] = { 6315 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 6316 }; 6317 6318 static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6319 { 6320 TCGv_vec t = tcg_temp_new_vec_matching(d); 6321 int halfbits = 4 << vece; 6322 int64_t mask = (1ull << halfbits) - 1; 6323 int64_t min = -1ull << (halfbits - 1); 6324 int64_t max = -min - 1; 6325 6326 tcg_gen_dupi_vec(vece, t, min); 6327 tcg_gen_smax_vec(vece, d, n, t); 6328 tcg_gen_dupi_vec(vece, t, max); 6329 tcg_gen_smin_vec(vece, d, d, t); 6330 tcg_gen_dupi_vec(vece, t, mask); 6331 tcg_gen_and_vec(vece, d, d, t); 6332 } 6333 6334 static const GVecGen2 sqxtnb_ops[3] = { 6335 { .fniv = gen_sqxtnb_vec, 6336 .opt_opc = sqxtn_list, 6337 .fno = gen_helper_sve2_sqxtnb_h, 6338 .vece = MO_16 }, 6339 { .fniv = gen_sqxtnb_vec, 6340 .opt_opc = sqxtn_list, 6341 .fno = gen_helper_sve2_sqxtnb_s, 6342 .vece = MO_32 }, 6343 { .fniv = gen_sqxtnb_vec, 6344 .opt_opc = sqxtn_list, 6345 .fno = gen_helper_sve2_sqxtnb_d, 6346 .vece = MO_64 }, 6347 }; 6348 TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops) 6349 6350 static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6351 { 6352 TCGv_vec t = tcg_temp_new_vec_matching(d); 6353 int halfbits = 4 << vece; 6354 int64_t mask = (1ull << halfbits) - 1; 6355 int64_t min = -1ull << (halfbits - 1); 6356 int64_t max = -min - 1; 6357 6358 tcg_gen_dupi_vec(vece, t, min); 6359 tcg_gen_smax_vec(vece, n, n, t); 6360 tcg_gen_dupi_vec(vece, t, max); 6361 tcg_gen_smin_vec(vece, n, n, t); 6362 tcg_gen_shli_vec(vece, n, n, halfbits); 6363 tcg_gen_dupi_vec(vece, t, mask); 6364 tcg_gen_bitsel_vec(vece, d, t, d, n); 6365 } 6366 6367 static const GVecGen2 sqxtnt_ops[3] = { 6368 { .fniv = gen_sqxtnt_vec, 6369 .opt_opc = sqxtn_list, 6370 .load_dest = true, 6371 .fno = gen_helper_sve2_sqxtnt_h, 6372 .vece = MO_16 }, 6373 { .fniv = gen_sqxtnt_vec, 6374 .opt_opc = sqxtn_list, 6375 .load_dest = true, 6376 .fno = gen_helper_sve2_sqxtnt_s, 6377 .vece = MO_32 }, 6378 { .fniv = gen_sqxtnt_vec, 6379 .opt_opc = sqxtn_list, 6380 .load_dest = true, 6381 .fno = gen_helper_sve2_sqxtnt_d, 6382 .vece = MO_64 }, 6383 }; 6384 TRANS_FEAT(SQXTNT, aa64_sve2, do_narrow_extract, a, sqxtnt_ops) 6385 6386 static const TCGOpcode uqxtn_list[] = { 6387 INDEX_op_shli_vec, INDEX_op_umin_vec, 0 6388 }; 6389 6390 static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6391 { 6392 TCGv_vec t = tcg_temp_new_vec_matching(d); 6393 int halfbits = 4 << vece; 6394 int64_t max = (1ull << halfbits) - 1; 6395 6396 tcg_gen_dupi_vec(vece, t, max); 6397 tcg_gen_umin_vec(vece, d, n, t); 6398 } 6399 6400 static const GVecGen2 uqxtnb_ops[3] = { 6401 { .fniv = gen_uqxtnb_vec, 6402 .opt_opc = uqxtn_list, 6403 .fno = gen_helper_sve2_uqxtnb_h, 6404 .vece = MO_16 }, 6405 { .fniv = gen_uqxtnb_vec, 6406 .opt_opc = uqxtn_list, 6407 .fno = gen_helper_sve2_uqxtnb_s, 6408 .vece = MO_32 }, 6409 { .fniv = gen_uqxtnb_vec, 6410 .opt_opc = uqxtn_list, 6411 .fno = gen_helper_sve2_uqxtnb_d, 6412 .vece = MO_64 }, 6413 }; 6414 TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops) 6415 6416 static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6417 { 6418 TCGv_vec t = tcg_temp_new_vec_matching(d); 6419 int halfbits = 4 << vece; 6420 int64_t max = (1ull << halfbits) - 1; 6421 6422 tcg_gen_dupi_vec(vece, t, max); 6423 tcg_gen_umin_vec(vece, n, n, t); 6424 tcg_gen_shli_vec(vece, n, n, halfbits); 6425 tcg_gen_bitsel_vec(vece, d, t, d, n); 6426 } 6427 6428 static const GVecGen2 uqxtnt_ops[3] = { 6429 { .fniv = gen_uqxtnt_vec, 6430 .opt_opc = uqxtn_list, 6431 .load_dest = true, 6432 .fno = gen_helper_sve2_uqxtnt_h, 6433 .vece = MO_16 }, 6434 { .fniv = gen_uqxtnt_vec, 6435 .opt_opc = uqxtn_list, 6436 .load_dest = true, 6437 .fno = gen_helper_sve2_uqxtnt_s, 6438 .vece = MO_32 }, 6439 { .fniv = gen_uqxtnt_vec, 6440 .opt_opc = uqxtn_list, 6441 .load_dest = true, 6442 .fno = gen_helper_sve2_uqxtnt_d, 6443 .vece = MO_64 }, 6444 }; 6445 TRANS_FEAT(UQXTNT, aa64_sve2, do_narrow_extract, a, uqxtnt_ops) 6446 6447 static const TCGOpcode sqxtun_list[] = { 6448 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0 6449 }; 6450 6451 static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6452 { 6453 TCGv_vec t = tcg_temp_new_vec_matching(d); 6454 int halfbits = 4 << vece; 6455 int64_t max = (1ull << halfbits) - 1; 6456 6457 tcg_gen_dupi_vec(vece, t, 0); 6458 tcg_gen_smax_vec(vece, d, n, t); 6459 tcg_gen_dupi_vec(vece, t, max); 6460 tcg_gen_umin_vec(vece, d, d, t); 6461 } 6462 6463 static const GVecGen2 sqxtunb_ops[3] = { 6464 { .fniv = gen_sqxtunb_vec, 6465 .opt_opc = sqxtun_list, 6466 .fno = gen_helper_sve2_sqxtunb_h, 6467 .vece = MO_16 }, 6468 { .fniv = gen_sqxtunb_vec, 6469 .opt_opc = sqxtun_list, 6470 .fno = gen_helper_sve2_sqxtunb_s, 6471 .vece = MO_32 }, 6472 { .fniv = gen_sqxtunb_vec, 6473 .opt_opc = sqxtun_list, 6474 .fno = gen_helper_sve2_sqxtunb_d, 6475 .vece = MO_64 }, 6476 }; 6477 TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops) 6478 6479 static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6480 { 6481 TCGv_vec t = tcg_temp_new_vec_matching(d); 6482 int halfbits = 4 << vece; 6483 int64_t max = (1ull << halfbits) - 1; 6484 6485 tcg_gen_dupi_vec(vece, t, 0); 6486 tcg_gen_smax_vec(vece, n, n, t); 6487 tcg_gen_dupi_vec(vece, t, max); 6488 tcg_gen_umin_vec(vece, n, n, t); 6489 tcg_gen_shli_vec(vece, n, n, halfbits); 6490 tcg_gen_bitsel_vec(vece, d, t, d, n); 6491 } 6492 6493 static const GVecGen2 sqxtunt_ops[3] = { 6494 { .fniv = gen_sqxtunt_vec, 6495 .opt_opc = sqxtun_list, 6496 .load_dest = true, 6497 .fno = gen_helper_sve2_sqxtunt_h, 6498 .vece = MO_16 }, 6499 { .fniv = gen_sqxtunt_vec, 6500 .opt_opc = sqxtun_list, 6501 .load_dest = true, 6502 .fno = gen_helper_sve2_sqxtunt_s, 6503 .vece = MO_32 }, 6504 { .fniv = gen_sqxtunt_vec, 6505 .opt_opc = sqxtun_list, 6506 .load_dest = true, 6507 .fno = gen_helper_sve2_sqxtunt_d, 6508 .vece = MO_64 }, 6509 }; 6510 TRANS_FEAT(SQXTUNT, aa64_sve2, do_narrow_extract, a, sqxtunt_ops) 6511 6512 static bool do_shr_narrow(DisasContext *s, arg_rri_esz *a, 6513 const GVecGen2i ops[3]) 6514 { 6515 if (a->esz < 0 || a->esz > MO_32) { 6516 return false; 6517 } 6518 assert(a->imm > 0 && a->imm <= (8 << a->esz)); 6519 if (sve_access_check(s)) { 6520 unsigned vsz = vec_full_reg_size(s); 6521 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 6522 vec_full_reg_offset(s, a->rn), 6523 vsz, vsz, a->imm, &ops[a->esz]); 6524 } 6525 return true; 6526 } 6527 6528 static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 6529 { 6530 int halfbits = 4 << vece; 6531 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 6532 6533 tcg_gen_shri_i64(d, n, shr); 6534 tcg_gen_andi_i64(d, d, mask); 6535 } 6536 6537 static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6538 { 6539 gen_shrnb_i64(MO_16, d, n, shr); 6540 } 6541 6542 static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6543 { 6544 gen_shrnb_i64(MO_32, d, n, shr); 6545 } 6546 6547 static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6548 { 6549 gen_shrnb_i64(MO_64, d, n, shr); 6550 } 6551 6552 static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 6553 { 6554 TCGv_vec t = tcg_temp_new_vec_matching(d); 6555 int halfbits = 4 << vece; 6556 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 6557 6558 tcg_gen_shri_vec(vece, n, n, shr); 6559 tcg_gen_dupi_vec(vece, t, mask); 6560 tcg_gen_and_vec(vece, d, n, t); 6561 } 6562 6563 static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 }; 6564 static const GVecGen2i shrnb_ops[3] = { 6565 { .fni8 = gen_shrnb16_i64, 6566 .fniv = gen_shrnb_vec, 6567 .opt_opc = shrnb_vec_list, 6568 .fno = gen_helper_sve2_shrnb_h, 6569 .vece = MO_16 }, 6570 { .fni8 = gen_shrnb32_i64, 6571 .fniv = gen_shrnb_vec, 6572 .opt_opc = shrnb_vec_list, 6573 .fno = gen_helper_sve2_shrnb_s, 6574 .vece = MO_32 }, 6575 { .fni8 = gen_shrnb64_i64, 6576 .fniv = gen_shrnb_vec, 6577 .opt_opc = shrnb_vec_list, 6578 .fno = gen_helper_sve2_shrnb_d, 6579 .vece = MO_64 }, 6580 }; 6581 TRANS_FEAT(SHRNB, aa64_sve2, do_shr_narrow, a, shrnb_ops) 6582 6583 static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 6584 { 6585 int halfbits = 4 << vece; 6586 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 6587 6588 tcg_gen_shli_i64(n, n, halfbits - shr); 6589 tcg_gen_andi_i64(n, n, ~mask); 6590 tcg_gen_andi_i64(d, d, mask); 6591 tcg_gen_or_i64(d, d, n); 6592 } 6593 6594 static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6595 { 6596 gen_shrnt_i64(MO_16, d, n, shr); 6597 } 6598 6599 static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6600 { 6601 gen_shrnt_i64(MO_32, d, n, shr); 6602 } 6603 6604 static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6605 { 6606 tcg_gen_shri_i64(n, n, shr); 6607 tcg_gen_deposit_i64(d, d, n, 32, 32); 6608 } 6609 6610 static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 6611 { 6612 TCGv_vec t = tcg_temp_new_vec_matching(d); 6613 int halfbits = 4 << vece; 6614 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 6615 6616 tcg_gen_shli_vec(vece, n, n, halfbits - shr); 6617 tcg_gen_dupi_vec(vece, t, mask); 6618 tcg_gen_bitsel_vec(vece, d, t, d, n); 6619 } 6620 6621 static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 }; 6622 static const GVecGen2i shrnt_ops[3] = { 6623 { .fni8 = gen_shrnt16_i64, 6624 .fniv = gen_shrnt_vec, 6625 .opt_opc = shrnt_vec_list, 6626 .load_dest = true, 6627 .fno = gen_helper_sve2_shrnt_h, 6628 .vece = MO_16 }, 6629 { .fni8 = gen_shrnt32_i64, 6630 .fniv = gen_shrnt_vec, 6631 .opt_opc = shrnt_vec_list, 6632 .load_dest = true, 6633 .fno = gen_helper_sve2_shrnt_s, 6634 .vece = MO_32 }, 6635 { .fni8 = gen_shrnt64_i64, 6636 .fniv = gen_shrnt_vec, 6637 .opt_opc = shrnt_vec_list, 6638 .load_dest = true, 6639 .fno = gen_helper_sve2_shrnt_d, 6640 .vece = MO_64 }, 6641 }; 6642 TRANS_FEAT(SHRNT, aa64_sve2, do_shr_narrow, a, shrnt_ops) 6643 6644 static const GVecGen2i rshrnb_ops[3] = { 6645 { .fno = gen_helper_sve2_rshrnb_h }, 6646 { .fno = gen_helper_sve2_rshrnb_s }, 6647 { .fno = gen_helper_sve2_rshrnb_d }, 6648 }; 6649 TRANS_FEAT(RSHRNB, aa64_sve2, do_shr_narrow, a, rshrnb_ops) 6650 6651 static const GVecGen2i rshrnt_ops[3] = { 6652 { .fno = gen_helper_sve2_rshrnt_h }, 6653 { .fno = gen_helper_sve2_rshrnt_s }, 6654 { .fno = gen_helper_sve2_rshrnt_d }, 6655 }; 6656 TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops) 6657 6658 static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d, 6659 TCGv_vec n, int64_t shr) 6660 { 6661 TCGv_vec t = tcg_temp_new_vec_matching(d); 6662 int halfbits = 4 << vece; 6663 6664 tcg_gen_sari_vec(vece, n, n, shr); 6665 tcg_gen_dupi_vec(vece, t, 0); 6666 tcg_gen_smax_vec(vece, n, n, t); 6667 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6668 tcg_gen_umin_vec(vece, d, n, t); 6669 } 6670 6671 static const TCGOpcode sqshrunb_vec_list[] = { 6672 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0 6673 }; 6674 static const GVecGen2i sqshrunb_ops[3] = { 6675 { .fniv = gen_sqshrunb_vec, 6676 .opt_opc = sqshrunb_vec_list, 6677 .fno = gen_helper_sve2_sqshrunb_h, 6678 .vece = MO_16 }, 6679 { .fniv = gen_sqshrunb_vec, 6680 .opt_opc = sqshrunb_vec_list, 6681 .fno = gen_helper_sve2_sqshrunb_s, 6682 .vece = MO_32 }, 6683 { .fniv = gen_sqshrunb_vec, 6684 .opt_opc = sqshrunb_vec_list, 6685 .fno = gen_helper_sve2_sqshrunb_d, 6686 .vece = MO_64 }, 6687 }; 6688 TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops) 6689 6690 static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d, 6691 TCGv_vec n, int64_t shr) 6692 { 6693 TCGv_vec t = tcg_temp_new_vec_matching(d); 6694 int halfbits = 4 << vece; 6695 6696 tcg_gen_sari_vec(vece, n, n, shr); 6697 tcg_gen_dupi_vec(vece, t, 0); 6698 tcg_gen_smax_vec(vece, n, n, t); 6699 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6700 tcg_gen_umin_vec(vece, n, n, t); 6701 tcg_gen_shli_vec(vece, n, n, halfbits); 6702 tcg_gen_bitsel_vec(vece, d, t, d, n); 6703 } 6704 6705 static const TCGOpcode sqshrunt_vec_list[] = { 6706 INDEX_op_shli_vec, INDEX_op_sari_vec, 6707 INDEX_op_smax_vec, INDEX_op_umin_vec, 0 6708 }; 6709 static const GVecGen2i sqshrunt_ops[3] = { 6710 { .fniv = gen_sqshrunt_vec, 6711 .opt_opc = sqshrunt_vec_list, 6712 .load_dest = true, 6713 .fno = gen_helper_sve2_sqshrunt_h, 6714 .vece = MO_16 }, 6715 { .fniv = gen_sqshrunt_vec, 6716 .opt_opc = sqshrunt_vec_list, 6717 .load_dest = true, 6718 .fno = gen_helper_sve2_sqshrunt_s, 6719 .vece = MO_32 }, 6720 { .fniv = gen_sqshrunt_vec, 6721 .opt_opc = sqshrunt_vec_list, 6722 .load_dest = true, 6723 .fno = gen_helper_sve2_sqshrunt_d, 6724 .vece = MO_64 }, 6725 }; 6726 TRANS_FEAT(SQSHRUNT, aa64_sve2, do_shr_narrow, a, sqshrunt_ops) 6727 6728 static const GVecGen2i sqrshrunb_ops[3] = { 6729 { .fno = gen_helper_sve2_sqrshrunb_h }, 6730 { .fno = gen_helper_sve2_sqrshrunb_s }, 6731 { .fno = gen_helper_sve2_sqrshrunb_d }, 6732 }; 6733 TRANS_FEAT(SQRSHRUNB, aa64_sve2, do_shr_narrow, a, sqrshrunb_ops) 6734 6735 static const GVecGen2i sqrshrunt_ops[3] = { 6736 { .fno = gen_helper_sve2_sqrshrunt_h }, 6737 { .fno = gen_helper_sve2_sqrshrunt_s }, 6738 { .fno = gen_helper_sve2_sqrshrunt_d }, 6739 }; 6740 TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops) 6741 6742 static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d, 6743 TCGv_vec n, int64_t shr) 6744 { 6745 TCGv_vec t = tcg_temp_new_vec_matching(d); 6746 int halfbits = 4 << vece; 6747 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 6748 int64_t min = -max - 1; 6749 6750 tcg_gen_sari_vec(vece, n, n, shr); 6751 tcg_gen_dupi_vec(vece, t, min); 6752 tcg_gen_smax_vec(vece, n, n, t); 6753 tcg_gen_dupi_vec(vece, t, max); 6754 tcg_gen_smin_vec(vece, n, n, t); 6755 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6756 tcg_gen_and_vec(vece, d, n, t); 6757 } 6758 6759 static const TCGOpcode sqshrnb_vec_list[] = { 6760 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0 6761 }; 6762 static const GVecGen2i sqshrnb_ops[3] = { 6763 { .fniv = gen_sqshrnb_vec, 6764 .opt_opc = sqshrnb_vec_list, 6765 .fno = gen_helper_sve2_sqshrnb_h, 6766 .vece = MO_16 }, 6767 { .fniv = gen_sqshrnb_vec, 6768 .opt_opc = sqshrnb_vec_list, 6769 .fno = gen_helper_sve2_sqshrnb_s, 6770 .vece = MO_32 }, 6771 { .fniv = gen_sqshrnb_vec, 6772 .opt_opc = sqshrnb_vec_list, 6773 .fno = gen_helper_sve2_sqshrnb_d, 6774 .vece = MO_64 }, 6775 }; 6776 TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops) 6777 6778 static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d, 6779 TCGv_vec n, int64_t shr) 6780 { 6781 TCGv_vec t = tcg_temp_new_vec_matching(d); 6782 int halfbits = 4 << vece; 6783 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 6784 int64_t min = -max - 1; 6785 6786 tcg_gen_sari_vec(vece, n, n, shr); 6787 tcg_gen_dupi_vec(vece, t, min); 6788 tcg_gen_smax_vec(vece, n, n, t); 6789 tcg_gen_dupi_vec(vece, t, max); 6790 tcg_gen_smin_vec(vece, n, n, t); 6791 tcg_gen_shli_vec(vece, n, n, halfbits); 6792 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6793 tcg_gen_bitsel_vec(vece, d, t, d, n); 6794 } 6795 6796 static const TCGOpcode sqshrnt_vec_list[] = { 6797 INDEX_op_shli_vec, INDEX_op_sari_vec, 6798 INDEX_op_smax_vec, INDEX_op_smin_vec, 0 6799 }; 6800 static const GVecGen2i sqshrnt_ops[3] = { 6801 { .fniv = gen_sqshrnt_vec, 6802 .opt_opc = sqshrnt_vec_list, 6803 .load_dest = true, 6804 .fno = gen_helper_sve2_sqshrnt_h, 6805 .vece = MO_16 }, 6806 { .fniv = gen_sqshrnt_vec, 6807 .opt_opc = sqshrnt_vec_list, 6808 .load_dest = true, 6809 .fno = gen_helper_sve2_sqshrnt_s, 6810 .vece = MO_32 }, 6811 { .fniv = gen_sqshrnt_vec, 6812 .opt_opc = sqshrnt_vec_list, 6813 .load_dest = true, 6814 .fno = gen_helper_sve2_sqshrnt_d, 6815 .vece = MO_64 }, 6816 }; 6817 TRANS_FEAT(SQSHRNT, aa64_sve2, do_shr_narrow, a, sqshrnt_ops) 6818 6819 static const GVecGen2i sqrshrnb_ops[3] = { 6820 { .fno = gen_helper_sve2_sqrshrnb_h }, 6821 { .fno = gen_helper_sve2_sqrshrnb_s }, 6822 { .fno = gen_helper_sve2_sqrshrnb_d }, 6823 }; 6824 TRANS_FEAT(SQRSHRNB, aa64_sve2, do_shr_narrow, a, sqrshrnb_ops) 6825 6826 static const GVecGen2i sqrshrnt_ops[3] = { 6827 { .fno = gen_helper_sve2_sqrshrnt_h }, 6828 { .fno = gen_helper_sve2_sqrshrnt_s }, 6829 { .fno = gen_helper_sve2_sqrshrnt_d }, 6830 }; 6831 TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops) 6832 6833 static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d, 6834 TCGv_vec n, int64_t shr) 6835 { 6836 TCGv_vec t = tcg_temp_new_vec_matching(d); 6837 int halfbits = 4 << vece; 6838 6839 tcg_gen_shri_vec(vece, n, n, shr); 6840 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6841 tcg_gen_umin_vec(vece, d, n, t); 6842 } 6843 6844 static const TCGOpcode uqshrnb_vec_list[] = { 6845 INDEX_op_shri_vec, INDEX_op_umin_vec, 0 6846 }; 6847 static const GVecGen2i uqshrnb_ops[3] = { 6848 { .fniv = gen_uqshrnb_vec, 6849 .opt_opc = uqshrnb_vec_list, 6850 .fno = gen_helper_sve2_uqshrnb_h, 6851 .vece = MO_16 }, 6852 { .fniv = gen_uqshrnb_vec, 6853 .opt_opc = uqshrnb_vec_list, 6854 .fno = gen_helper_sve2_uqshrnb_s, 6855 .vece = MO_32 }, 6856 { .fniv = gen_uqshrnb_vec, 6857 .opt_opc = uqshrnb_vec_list, 6858 .fno = gen_helper_sve2_uqshrnb_d, 6859 .vece = MO_64 }, 6860 }; 6861 TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops) 6862 6863 static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d, 6864 TCGv_vec n, int64_t shr) 6865 { 6866 TCGv_vec t = tcg_temp_new_vec_matching(d); 6867 int halfbits = 4 << vece; 6868 6869 tcg_gen_shri_vec(vece, n, n, shr); 6870 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6871 tcg_gen_umin_vec(vece, n, n, t); 6872 tcg_gen_shli_vec(vece, n, n, halfbits); 6873 tcg_gen_bitsel_vec(vece, d, t, d, n); 6874 } 6875 6876 static const TCGOpcode uqshrnt_vec_list[] = { 6877 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0 6878 }; 6879 static const GVecGen2i uqshrnt_ops[3] = { 6880 { .fniv = gen_uqshrnt_vec, 6881 .opt_opc = uqshrnt_vec_list, 6882 .load_dest = true, 6883 .fno = gen_helper_sve2_uqshrnt_h, 6884 .vece = MO_16 }, 6885 { .fniv = gen_uqshrnt_vec, 6886 .opt_opc = uqshrnt_vec_list, 6887 .load_dest = true, 6888 .fno = gen_helper_sve2_uqshrnt_s, 6889 .vece = MO_32 }, 6890 { .fniv = gen_uqshrnt_vec, 6891 .opt_opc = uqshrnt_vec_list, 6892 .load_dest = true, 6893 .fno = gen_helper_sve2_uqshrnt_d, 6894 .vece = MO_64 }, 6895 }; 6896 TRANS_FEAT(UQSHRNT, aa64_sve2, do_shr_narrow, a, uqshrnt_ops) 6897 6898 static const GVecGen2i uqrshrnb_ops[3] = { 6899 { .fno = gen_helper_sve2_uqrshrnb_h }, 6900 { .fno = gen_helper_sve2_uqrshrnb_s }, 6901 { .fno = gen_helper_sve2_uqrshrnb_d }, 6902 }; 6903 TRANS_FEAT(UQRSHRNB, aa64_sve2, do_shr_narrow, a, uqrshrnb_ops) 6904 6905 static const GVecGen2i uqrshrnt_ops[3] = { 6906 { .fno = gen_helper_sve2_uqrshrnt_h }, 6907 { .fno = gen_helper_sve2_uqrshrnt_s }, 6908 { .fno = gen_helper_sve2_uqrshrnt_d }, 6909 }; 6910 TRANS_FEAT(UQRSHRNT, aa64_sve2, do_shr_narrow, a, uqrshrnt_ops) 6911 6912 #define DO_SVE2_ZZZ_NARROW(NAME, name) \ 6913 static gen_helper_gvec_3 * const name##_fns[4] = { \ 6914 NULL, gen_helper_sve2_##name##_h, \ 6915 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \ 6916 }; \ 6917 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \ 6918 name##_fns[a->esz], a, 0) 6919 6920 DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb) 6921 DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt) 6922 DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb) 6923 DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt) 6924 6925 DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb) 6926 DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt) 6927 DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb) 6928 DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt) 6929 6930 static gen_helper_gvec_flags_4 * const match_fns[4] = { 6931 gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL 6932 }; 6933 TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz]) 6934 6935 static gen_helper_gvec_flags_4 * const nmatch_fns[4] = { 6936 gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL 6937 }; 6938 TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz]) 6939 6940 static gen_helper_gvec_4 * const histcnt_fns[4] = { 6941 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d 6942 }; 6943 TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz, 6944 histcnt_fns[a->esz], a, 0) 6945 6946 TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz, 6947 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0) 6948 6949 DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz) 6950 DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz) 6951 DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz) 6952 DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz) 6953 DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz) 6954 6955 /* 6956 * SVE Integer Multiply-Add (unpredicated) 6957 */ 6958 6959 TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, 6960 gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra, 6961 0, FPST_FPCR) 6962 TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, 6963 gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra, 6964 0, FPST_FPCR) 6965 6966 static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = { 6967 NULL, gen_helper_sve2_sqdmlal_zzzw_h, 6968 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d, 6969 }; 6970 TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 6971 sqdmlal_zzzw_fns[a->esz], a, 0) 6972 TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 6973 sqdmlal_zzzw_fns[a->esz], a, 3) 6974 TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 6975 sqdmlal_zzzw_fns[a->esz], a, 2) 6976 6977 static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = { 6978 NULL, gen_helper_sve2_sqdmlsl_zzzw_h, 6979 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d, 6980 }; 6981 TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 6982 sqdmlsl_zzzw_fns[a->esz], a, 0) 6983 TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 6984 sqdmlsl_zzzw_fns[a->esz], a, 3) 6985 TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 6986 sqdmlsl_zzzw_fns[a->esz], a, 2) 6987 6988 static gen_helper_gvec_4 * const sqrdmlah_fns[] = { 6989 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h, 6990 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d, 6991 }; 6992 TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 6993 sqrdmlah_fns[a->esz], a, 0) 6994 6995 static gen_helper_gvec_4 * const sqrdmlsh_fns[] = { 6996 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h, 6997 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d, 6998 }; 6999 TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 7000 sqrdmlsh_fns[a->esz], a, 0) 7001 7002 static gen_helper_gvec_4 * const smlal_zzzw_fns[] = { 7003 NULL, gen_helper_sve2_smlal_zzzw_h, 7004 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d, 7005 }; 7006 TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7007 smlal_zzzw_fns[a->esz], a, 0) 7008 TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7009 smlal_zzzw_fns[a->esz], a, 1) 7010 7011 static gen_helper_gvec_4 * const umlal_zzzw_fns[] = { 7012 NULL, gen_helper_sve2_umlal_zzzw_h, 7013 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d, 7014 }; 7015 TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7016 umlal_zzzw_fns[a->esz], a, 0) 7017 TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7018 umlal_zzzw_fns[a->esz], a, 1) 7019 7020 static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = { 7021 NULL, gen_helper_sve2_smlsl_zzzw_h, 7022 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d, 7023 }; 7024 TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7025 smlsl_zzzw_fns[a->esz], a, 0) 7026 TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7027 smlsl_zzzw_fns[a->esz], a, 1) 7028 7029 static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = { 7030 NULL, gen_helper_sve2_umlsl_zzzw_h, 7031 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d, 7032 }; 7033 TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7034 umlsl_zzzw_fns[a->esz], a, 0) 7035 TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7036 umlsl_zzzw_fns[a->esz], a, 1) 7037 7038 static gen_helper_gvec_4 * const cmla_fns[] = { 7039 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h, 7040 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d, 7041 }; 7042 TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7043 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7044 7045 static gen_helper_gvec_4 * const cdot_fns[] = { 7046 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d 7047 }; 7048 TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7049 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7050 7051 static gen_helper_gvec_4 * const sqrdcmlah_fns[] = { 7052 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h, 7053 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d, 7054 }; 7055 TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7056 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7057 7058 TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7059 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0) 7060 7061 TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz, 7062 gen_helper_crypto_aesmc, a->rd, a->rd, 0) 7063 TRANS_FEAT_NONSTREAMING(AESIMC, aa64_sve2_aes, gen_gvec_ool_zz, 7064 gen_helper_crypto_aesimc, a->rd, a->rd, 0) 7065 7066 TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7067 gen_helper_crypto_aese, a, 0) 7068 TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7069 gen_helper_crypto_aesd, a, 0) 7070 7071 TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7072 gen_helper_crypto_sm4e, a, 0) 7073 TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7074 gen_helper_crypto_sm4ekey, a, 0) 7075 7076 TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, 7077 gen_gvec_rax1, a) 7078 7079 TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz, 7080 gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR) 7081 TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, 7082 gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR) 7083 7084 TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 7085 gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR) 7086 7087 TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, 7088 gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR) 7089 TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz, 7090 gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR) 7091 7092 TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a, 7093 FPROUNDING_ODD, gen_helper_sve_fcvt_ds) 7094 TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a, 7095 FPROUNDING_ODD, gen_helper_sve2_fcvtnt_ds) 7096 7097 static gen_helper_gvec_3_ptr * const flogb_fns[] = { 7098 NULL, gen_helper_flogb_h, 7099 gen_helper_flogb_s, gen_helper_flogb_d 7100 }; 7101 TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz], 7102 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 7103 7104 static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel) 7105 { 7106 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s, 7107 a->rd, a->rn, a->rm, a->ra, 7108 (sel << 1) | sub, tcg_env); 7109 } 7110 7111 TRANS_FEAT(FMLALB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, false) 7112 TRANS_FEAT(FMLALT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, true) 7113 TRANS_FEAT(FMLSLB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, false) 7114 TRANS_FEAT(FMLSLT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, true) 7115 7116 static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel) 7117 { 7118 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s, 7119 a->rd, a->rn, a->rm, a->ra, 7120 (a->index << 2) | (sel << 1) | sub, tcg_env); 7121 } 7122 7123 TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false) 7124 TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true) 7125 TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false) 7126 TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true) 7127 7128 TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7129 gen_helper_gvec_smmla_b, a, 0) 7130 TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7131 gen_helper_gvec_usmmla_b, a, 0) 7132 TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7133 gen_helper_gvec_ummla_b, a, 0) 7134 7135 TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_env_arg_zzzz, 7136 gen_helper_gvec_bfdot, a, 0) 7137 TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_env_arg_zzxz, 7138 gen_helper_gvec_bfdot_idx, a) 7139 7140 TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_env_arg_zzzz, 7141 gen_helper_gvec_bfmmla, a, 0) 7142 7143 static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) 7144 { 7145 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, 7146 a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR); 7147 } 7148 7149 TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) 7150 TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true) 7151 7152 static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) 7153 { 7154 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, 7155 a->rd, a->rn, a->rm, a->ra, 7156 (a->index << 1) | sel, FPST_FPCR); 7157 } 7158 7159 TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) 7160 TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true) 7161 7162 static bool trans_PSEL(DisasContext *s, arg_psel *a) 7163 { 7164 int vl = vec_full_reg_size(s); 7165 int pl = pred_gvec_reg_size(s); 7166 int elements = vl >> a->esz; 7167 TCGv_i64 tmp, didx, dbit; 7168 TCGv_ptr ptr; 7169 7170 if (!dc_isar_feature(aa64_sme, s)) { 7171 return false; 7172 } 7173 if (!sve_access_check(s)) { 7174 return true; 7175 } 7176 7177 tmp = tcg_temp_new_i64(); 7178 dbit = tcg_temp_new_i64(); 7179 didx = tcg_temp_new_i64(); 7180 ptr = tcg_temp_new_ptr(); 7181 7182 /* Compute the predicate element. */ 7183 tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm); 7184 if (is_power_of_2(elements)) { 7185 tcg_gen_andi_i64(tmp, tmp, elements - 1); 7186 } else { 7187 tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements)); 7188 } 7189 7190 /* Extract the predicate byte and bit indices. */ 7191 tcg_gen_shli_i64(tmp, tmp, a->esz); 7192 tcg_gen_andi_i64(dbit, tmp, 7); 7193 tcg_gen_shri_i64(didx, tmp, 3); 7194 if (HOST_BIG_ENDIAN) { 7195 tcg_gen_xori_i64(didx, didx, 7); 7196 } 7197 7198 /* Load the predicate word. */ 7199 tcg_gen_trunc_i64_ptr(ptr, didx); 7200 tcg_gen_add_ptr(ptr, ptr, tcg_env); 7201 tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm)); 7202 7203 /* Extract the predicate bit and replicate to MO_64. */ 7204 tcg_gen_shr_i64(tmp, tmp, dbit); 7205 tcg_gen_andi_i64(tmp, tmp, 1); 7206 tcg_gen_neg_i64(tmp, tmp); 7207 7208 /* Apply to either copy the source, or write zeros. */ 7209 tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd), 7210 pred_full_reg_offset(s, a->pn), tmp, pl, pl); 7211 return true; 7212 } 7213 7214 static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7215 { 7216 tcg_gen_smax_i32(d, a, n); 7217 tcg_gen_smin_i32(d, d, m); 7218 } 7219 7220 static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7221 { 7222 tcg_gen_smax_i64(d, a, n); 7223 tcg_gen_smin_i64(d, d, m); 7224 } 7225 7226 static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7227 TCGv_vec m, TCGv_vec a) 7228 { 7229 tcg_gen_smax_vec(vece, d, a, n); 7230 tcg_gen_smin_vec(vece, d, d, m); 7231 } 7232 7233 static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 7234 uint32_t a, uint32_t oprsz, uint32_t maxsz) 7235 { 7236 static const TCGOpcode vecop[] = { 7237 INDEX_op_smin_vec, INDEX_op_smax_vec, 0 7238 }; 7239 static const GVecGen4 ops[4] = { 7240 { .fniv = gen_sclamp_vec, 7241 .fno = gen_helper_gvec_sclamp_b, 7242 .opt_opc = vecop, 7243 .vece = MO_8 }, 7244 { .fniv = gen_sclamp_vec, 7245 .fno = gen_helper_gvec_sclamp_h, 7246 .opt_opc = vecop, 7247 .vece = MO_16 }, 7248 { .fni4 = gen_sclamp_i32, 7249 .fniv = gen_sclamp_vec, 7250 .fno = gen_helper_gvec_sclamp_s, 7251 .opt_opc = vecop, 7252 .vece = MO_32 }, 7253 { .fni8 = gen_sclamp_i64, 7254 .fniv = gen_sclamp_vec, 7255 .fno = gen_helper_gvec_sclamp_d, 7256 .opt_opc = vecop, 7257 .vece = MO_64, 7258 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 7259 }; 7260 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 7261 } 7262 7263 TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a) 7264 7265 static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7266 { 7267 tcg_gen_umax_i32(d, a, n); 7268 tcg_gen_umin_i32(d, d, m); 7269 } 7270 7271 static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7272 { 7273 tcg_gen_umax_i64(d, a, n); 7274 tcg_gen_umin_i64(d, d, m); 7275 } 7276 7277 static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7278 TCGv_vec m, TCGv_vec a) 7279 { 7280 tcg_gen_umax_vec(vece, d, a, n); 7281 tcg_gen_umin_vec(vece, d, d, m); 7282 } 7283 7284 static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 7285 uint32_t a, uint32_t oprsz, uint32_t maxsz) 7286 { 7287 static const TCGOpcode vecop[] = { 7288 INDEX_op_umin_vec, INDEX_op_umax_vec, 0 7289 }; 7290 static const GVecGen4 ops[4] = { 7291 { .fniv = gen_uclamp_vec, 7292 .fno = gen_helper_gvec_uclamp_b, 7293 .opt_opc = vecop, 7294 .vece = MO_8 }, 7295 { .fniv = gen_uclamp_vec, 7296 .fno = gen_helper_gvec_uclamp_h, 7297 .opt_opc = vecop, 7298 .vece = MO_16 }, 7299 { .fni4 = gen_uclamp_i32, 7300 .fniv = gen_uclamp_vec, 7301 .fno = gen_helper_gvec_uclamp_s, 7302 .opt_opc = vecop, 7303 .vece = MO_32 }, 7304 { .fni8 = gen_uclamp_i64, 7305 .fniv = gen_uclamp_vec, 7306 .fno = gen_helper_gvec_uclamp_d, 7307 .opt_opc = vecop, 7308 .vece = MO_64, 7309 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 7310 }; 7311 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 7312 } 7313 7314 TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a) 7315