1 /* 2 * AArch64 SVE translation 3 * 4 * Copyright (c) 2018 Linaro, Ltd 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "translate.h" 22 #include "translate-a64.h" 23 #include "fpu/softfloat.h" 24 25 26 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, 27 TCGv_i64, uint32_t, uint32_t); 28 29 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr, 30 TCGv_ptr, TCGv_i32); 31 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr, 32 TCGv_ptr, TCGv_ptr, TCGv_i32); 33 34 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i64); 35 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr, 36 TCGv_ptr, TCGv_i64, TCGv_i64); 37 38 /* 39 * Helpers for extracting complex instruction fields. 40 */ 41 42 /* See e.g. ASR (immediate, predicated). 43 * Returns -1 for unallocated encoding; diagnose later. 44 */ 45 static int tszimm_esz(DisasContext *s, int x) 46 { 47 x >>= 3; /* discard imm3 */ 48 return 31 - clz32(x); 49 } 50 51 static int tszimm_shr(DisasContext *s, int x) 52 { 53 /* 54 * We won't use the tszimm_shr() value if tszimm_esz() returns -1 (the 55 * trans function will check for esz < 0), so we can return any 56 * value we like from here in that case as long as we avoid UB. 57 */ 58 int esz = tszimm_esz(s, x); 59 if (esz < 0) { 60 return esz; 61 } 62 return (16 << esz) - x; 63 } 64 65 /* See e.g. LSL (immediate, predicated). */ 66 static int tszimm_shl(DisasContext *s, int x) 67 { 68 /* As with tszimm_shr(), value will be unused if esz < 0 */ 69 int esz = tszimm_esz(s, x); 70 if (esz < 0) { 71 return esz; 72 } 73 return x - (8 << esz); 74 } 75 76 /* The SH bit is in bit 8. Extract the low 8 and shift. */ 77 static inline int expand_imm_sh8s(DisasContext *s, int x) 78 { 79 return (int8_t)x << (x & 0x100 ? 8 : 0); 80 } 81 82 static inline int expand_imm_sh8u(DisasContext *s, int x) 83 { 84 return (uint8_t)x << (x & 0x100 ? 8 : 0); 85 } 86 87 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype) 88 * with unsigned data. C.f. SVE Memory Contiguous Load Group. 89 */ 90 static inline int msz_dtype(DisasContext *s, int msz) 91 { 92 static const uint8_t dtype[5] = { 0, 5, 10, 15, 18 }; 93 return dtype[msz]; 94 } 95 96 /* 97 * Include the generated decoder. 98 */ 99 100 #include "decode-sve.c.inc" 101 102 /* 103 * Implement all of the translator functions referenced by the decoder. 104 */ 105 106 /* Invoke an out-of-line helper on 2 Zregs. */ 107 static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn, 108 int rd, int rn, int data) 109 { 110 if (fn == NULL) { 111 return false; 112 } 113 if (sve_access_check(s)) { 114 unsigned vsz = vec_full_reg_size(s); 115 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 116 vec_full_reg_offset(s, rn), 117 vsz, vsz, data, fn); 118 } 119 return true; 120 } 121 122 static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 123 int rd, int rn, int data, 124 ARMFPStatusFlavour flavour) 125 { 126 if (fn == NULL) { 127 return false; 128 } 129 if (sve_access_check(s)) { 130 unsigned vsz = vec_full_reg_size(s); 131 TCGv_ptr status = fpstatus_ptr(flavour); 132 133 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 134 vec_full_reg_offset(s, rn), 135 status, vsz, vsz, data, fn); 136 } 137 return true; 138 } 139 140 static bool gen_gvec_fpst_ah_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 141 arg_rr_esz *a, int data) 142 { 143 return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data, 144 select_ah_fpst(s, a->esz)); 145 } 146 147 /* Invoke an out-of-line helper on 3 Zregs. */ 148 static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 149 int rd, int rn, int rm, int data) 150 { 151 if (fn == NULL) { 152 return false; 153 } 154 if (sve_access_check(s)) { 155 unsigned vsz = vec_full_reg_size(s); 156 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 157 vec_full_reg_offset(s, rn), 158 vec_full_reg_offset(s, rm), 159 vsz, vsz, data, fn); 160 } 161 return true; 162 } 163 164 static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 165 arg_rrr_esz *a, int data) 166 { 167 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data); 168 } 169 170 /* Invoke an out-of-line helper on 3 Zregs, plus float_status. */ 171 static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 172 int rd, int rn, int rm, 173 int data, ARMFPStatusFlavour flavour) 174 { 175 if (fn == NULL) { 176 return false; 177 } 178 if (sve_access_check(s)) { 179 unsigned vsz = vec_full_reg_size(s); 180 TCGv_ptr status = fpstatus_ptr(flavour); 181 182 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 183 vec_full_reg_offset(s, rn), 184 vec_full_reg_offset(s, rm), 185 status, vsz, vsz, data, fn); 186 } 187 return true; 188 } 189 190 static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 191 arg_rrr_esz *a, int data) 192 { 193 /* These insns use MO_8 to encode BFloat16 */ 194 if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) { 195 return false; 196 } 197 return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, 198 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 199 } 200 201 static bool gen_gvec_fpst_ah_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 202 arg_rrr_esz *a, int data) 203 { 204 return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, 205 select_ah_fpst(s, a->esz)); 206 } 207 208 /* Invoke an out-of-line helper on 4 Zregs. */ 209 static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 210 int rd, int rn, int rm, int ra, int data) 211 { 212 if (fn == NULL) { 213 return false; 214 } 215 if (sve_access_check(s)) { 216 unsigned vsz = vec_full_reg_size(s); 217 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 218 vec_full_reg_offset(s, rn), 219 vec_full_reg_offset(s, rm), 220 vec_full_reg_offset(s, ra), 221 vsz, vsz, data, fn); 222 } 223 return true; 224 } 225 226 static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 227 arg_rrrr_esz *a, int data) 228 { 229 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data); 230 } 231 232 static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn, 233 arg_rrxr_esz *a) 234 { 235 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index); 236 } 237 238 /* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */ 239 static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 240 int rd, int rn, int rm, int ra, 241 int data, TCGv_ptr ptr) 242 { 243 if (fn == NULL) { 244 return false; 245 } 246 if (sve_access_check(s)) { 247 unsigned vsz = vec_full_reg_size(s); 248 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 249 vec_full_reg_offset(s, rn), 250 vec_full_reg_offset(s, rm), 251 vec_full_reg_offset(s, ra), 252 ptr, vsz, vsz, data, fn); 253 } 254 return true; 255 } 256 257 static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 258 int rd, int rn, int rm, int ra, 259 int data, ARMFPStatusFlavour flavour) 260 { 261 TCGv_ptr status = fpstatus_ptr(flavour); 262 bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status); 263 return ret; 264 } 265 266 static bool gen_gvec_env_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 267 int rd, int rn, int rm, int ra, 268 int data) 269 { 270 return gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, tcg_env); 271 } 272 273 static bool gen_gvec_env_arg_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 274 arg_rrrr_esz *a, int data) 275 { 276 return gen_gvec_env_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data); 277 } 278 279 static bool gen_gvec_env_arg_zzxz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 280 arg_rrxr_esz *a) 281 { 282 return gen_gvec_env_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index); 283 } 284 285 /* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */ 286 static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn, 287 int rd, int rn, int rm, int ra, int pg, 288 int data, ARMFPStatusFlavour flavour) 289 { 290 if (fn == NULL) { 291 return false; 292 } 293 if (sve_access_check(s)) { 294 unsigned vsz = vec_full_reg_size(s); 295 TCGv_ptr status = fpstatus_ptr(flavour); 296 297 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd), 298 vec_full_reg_offset(s, rn), 299 vec_full_reg_offset(s, rm), 300 vec_full_reg_offset(s, ra), 301 pred_full_reg_offset(s, pg), 302 status, vsz, vsz, data, fn); 303 } 304 return true; 305 } 306 307 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */ 308 static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn, 309 int rd, int rn, int pg, int data) 310 { 311 if (fn == NULL) { 312 return false; 313 } 314 if (sve_access_check(s)) { 315 unsigned vsz = vec_full_reg_size(s); 316 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 317 vec_full_reg_offset(s, rn), 318 pred_full_reg_offset(s, pg), 319 vsz, vsz, data, fn); 320 } 321 return true; 322 } 323 324 static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn, 325 arg_rpr_esz *a, int data) 326 { 327 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data); 328 } 329 330 static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn, 331 arg_rpri_esz *a) 332 { 333 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm); 334 } 335 336 static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn, 337 int rd, int rn, int pg, int data, 338 ARMFPStatusFlavour flavour) 339 { 340 if (fn == NULL) { 341 return false; 342 } 343 if (sve_access_check(s)) { 344 unsigned vsz = vec_full_reg_size(s); 345 TCGv_ptr status = fpstatus_ptr(flavour); 346 347 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 348 vec_full_reg_offset(s, rn), 349 pred_full_reg_offset(s, pg), 350 status, vsz, vsz, data, fn); 351 } 352 return true; 353 } 354 355 static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 356 arg_rpr_esz *a, int data, 357 ARMFPStatusFlavour flavour) 358 { 359 return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour); 360 } 361 362 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 363 static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn, 364 int rd, int rn, int rm, int pg, int data) 365 { 366 if (fn == NULL) { 367 return false; 368 } 369 if (sve_access_check(s)) { 370 unsigned vsz = vec_full_reg_size(s); 371 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 372 vec_full_reg_offset(s, rn), 373 vec_full_reg_offset(s, rm), 374 pred_full_reg_offset(s, pg), 375 vsz, vsz, data, fn); 376 } 377 return true; 378 } 379 380 static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn, 381 arg_rprr_esz *a, int data) 382 { 383 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data); 384 } 385 386 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 387 static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn, 388 int rd, int rn, int rm, int pg, int data, 389 ARMFPStatusFlavour flavour) 390 { 391 if (fn == NULL) { 392 return false; 393 } 394 if (sve_access_check(s)) { 395 unsigned vsz = vec_full_reg_size(s); 396 TCGv_ptr status = fpstatus_ptr(flavour); 397 398 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 399 vec_full_reg_offset(s, rn), 400 vec_full_reg_offset(s, rm), 401 pred_full_reg_offset(s, pg), 402 status, vsz, vsz, data, fn); 403 } 404 return true; 405 } 406 407 static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 408 arg_rprr_esz *a) 409 { 410 /* These insns use MO_8 to encode BFloat16. */ 411 if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) { 412 return false; 413 } 414 return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0, 415 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 416 } 417 418 /* Invoke a vector expander on two Zregs and an immediate. */ 419 static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 420 int esz, int rd, int rn, uint64_t imm) 421 { 422 if (gvec_fn == NULL) { 423 return false; 424 } 425 if (sve_access_check(s)) { 426 unsigned vsz = vec_full_reg_size(s); 427 gvec_fn(esz, vec_full_reg_offset(s, rd), 428 vec_full_reg_offset(s, rn), imm, vsz, vsz); 429 } 430 return true; 431 } 432 433 static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 434 arg_rri_esz *a) 435 { 436 if (a->esz < 0) { 437 /* Invalid tsz encoding -- see tszimm_esz. */ 438 return false; 439 } 440 return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm); 441 } 442 443 /* Invoke a vector expander on three Zregs. */ 444 static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn, 445 int esz, int rd, int rn, int rm) 446 { 447 if (gvec_fn == NULL) { 448 return false; 449 } 450 if (sve_access_check(s)) { 451 unsigned vsz = vec_full_reg_size(s); 452 gvec_fn(esz, vec_full_reg_offset(s, rd), 453 vec_full_reg_offset(s, rn), 454 vec_full_reg_offset(s, rm), vsz, vsz); 455 } 456 return true; 457 } 458 459 static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn, 460 arg_rrr_esz *a) 461 { 462 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm); 463 } 464 465 /* Invoke a vector expander on four Zregs. */ 466 static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn, 467 arg_rrrr_esz *a) 468 { 469 if (gvec_fn == NULL) { 470 return false; 471 } 472 if (sve_access_check(s)) { 473 unsigned vsz = vec_full_reg_size(s); 474 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 475 vec_full_reg_offset(s, a->rn), 476 vec_full_reg_offset(s, a->rm), 477 vec_full_reg_offset(s, a->ra), vsz, vsz); 478 } 479 return true; 480 } 481 482 /* Invoke a vector move on two Zregs. */ 483 static bool do_mov_z(DisasContext *s, int rd, int rn) 484 { 485 if (sve_access_check(s)) { 486 unsigned vsz = vec_full_reg_size(s); 487 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd), 488 vec_full_reg_offset(s, rn), vsz, vsz); 489 } 490 return true; 491 } 492 493 /* Initialize a Zreg with replications of a 64-bit immediate. */ 494 static void do_dupi_z(DisasContext *s, int rd, uint64_t word) 495 { 496 unsigned vsz = vec_full_reg_size(s); 497 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word); 498 } 499 500 /* Invoke a vector expander on three Pregs. */ 501 static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn, 502 int rd, int rn, int rm) 503 { 504 if (sve_access_check(s)) { 505 unsigned psz = pred_gvec_reg_size(s); 506 gvec_fn(MO_64, pred_full_reg_offset(s, rd), 507 pred_full_reg_offset(s, rn), 508 pred_full_reg_offset(s, rm), psz, psz); 509 } 510 return true; 511 } 512 513 /* Invoke a vector move on two Pregs. */ 514 static bool do_mov_p(DisasContext *s, int rd, int rn) 515 { 516 if (sve_access_check(s)) { 517 unsigned psz = pred_gvec_reg_size(s); 518 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd), 519 pred_full_reg_offset(s, rn), psz, psz); 520 } 521 return true; 522 } 523 524 /* Set the cpu flags as per a return from an SVE helper. */ 525 static void do_pred_flags(TCGv_i32 t) 526 { 527 tcg_gen_mov_i32(cpu_NF, t); 528 tcg_gen_andi_i32(cpu_ZF, t, 2); 529 tcg_gen_andi_i32(cpu_CF, t, 1); 530 tcg_gen_movi_i32(cpu_VF, 0); 531 } 532 533 /* Subroutines computing the ARM PredTest psuedofunction. */ 534 static void do_predtest1(TCGv_i64 d, TCGv_i64 g) 535 { 536 TCGv_i32 t = tcg_temp_new_i32(); 537 538 gen_helper_sve_predtest1(t, d, g); 539 do_pred_flags(t); 540 } 541 542 static void do_predtest(DisasContext *s, int dofs, int gofs, int words) 543 { 544 TCGv_ptr dptr = tcg_temp_new_ptr(); 545 TCGv_ptr gptr = tcg_temp_new_ptr(); 546 TCGv_i32 t = tcg_temp_new_i32(); 547 548 tcg_gen_addi_ptr(dptr, tcg_env, dofs); 549 tcg_gen_addi_ptr(gptr, tcg_env, gofs); 550 551 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words)); 552 553 do_pred_flags(t); 554 } 555 556 /* For each element size, the bits within a predicate word that are active. */ 557 const uint64_t pred_esz_masks[5] = { 558 0xffffffffffffffffull, 0x5555555555555555ull, 559 0x1111111111111111ull, 0x0101010101010101ull, 560 0x0001000100010001ull, 561 }; 562 563 static bool trans_INVALID(DisasContext *s, arg_INVALID *a) 564 { 565 unallocated_encoding(s); 566 return true; 567 } 568 569 /* 570 *** SVE Logical - Unpredicated Group 571 */ 572 573 TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a) 574 TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a) 575 TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a) 576 TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a) 577 578 static bool trans_XAR(DisasContext *s, arg_rrri_esz *a) 579 { 580 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { 581 return false; 582 } 583 if (sve_access_check(s)) { 584 unsigned vsz = vec_full_reg_size(s); 585 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd), 586 vec_full_reg_offset(s, a->rn), 587 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz); 588 } 589 return true; 590 } 591 592 TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_eor3, a) 593 TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_bcax, a) 594 595 static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 596 uint32_t a, uint32_t oprsz, uint32_t maxsz) 597 { 598 /* BSL differs from the generic bitsel in argument ordering. */ 599 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz); 600 } 601 602 TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a) 603 604 static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 605 { 606 tcg_gen_andc_i64(n, k, n); 607 tcg_gen_andc_i64(m, m, k); 608 tcg_gen_or_i64(d, n, m); 609 } 610 611 static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 612 TCGv_vec m, TCGv_vec k) 613 { 614 tcg_gen_not_vec(vece, n, n); 615 tcg_gen_bitsel_vec(vece, d, k, n, m); 616 } 617 618 static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 619 uint32_t a, uint32_t oprsz, uint32_t maxsz) 620 { 621 static const GVecGen4 op = { 622 .fni8 = gen_bsl1n_i64, 623 .fniv = gen_bsl1n_vec, 624 .fno = gen_helper_sve2_bsl1n, 625 .vece = MO_64, 626 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 627 }; 628 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 629 } 630 631 TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a) 632 633 static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 634 { 635 /* 636 * Z[dn] = (n & k) | (~m & ~k) 637 * = | ~(m | k) 638 */ 639 tcg_gen_and_i64(n, n, k); 640 if (tcg_op_supported(INDEX_op_orc, TCG_TYPE_I64, 0)) { 641 tcg_gen_or_i64(m, m, k); 642 tcg_gen_orc_i64(d, n, m); 643 } else { 644 tcg_gen_nor_i64(m, m, k); 645 tcg_gen_or_i64(d, n, m); 646 } 647 } 648 649 static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 650 TCGv_vec m, TCGv_vec k) 651 { 652 tcg_gen_not_vec(vece, m, m); 653 tcg_gen_bitsel_vec(vece, d, k, n, m); 654 } 655 656 static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 657 uint32_t a, uint32_t oprsz, uint32_t maxsz) 658 { 659 static const GVecGen4 op = { 660 .fni8 = gen_bsl2n_i64, 661 .fniv = gen_bsl2n_vec, 662 .fno = gen_helper_sve2_bsl2n, 663 .vece = MO_64, 664 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 665 }; 666 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 667 } 668 669 TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a) 670 671 static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 672 { 673 tcg_gen_and_i64(n, n, k); 674 tcg_gen_andc_i64(m, m, k); 675 tcg_gen_nor_i64(d, n, m); 676 } 677 678 static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 679 TCGv_vec m, TCGv_vec k) 680 { 681 tcg_gen_bitsel_vec(vece, d, k, n, m); 682 tcg_gen_not_vec(vece, d, d); 683 } 684 685 static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 686 uint32_t a, uint32_t oprsz, uint32_t maxsz) 687 { 688 static const GVecGen4 op = { 689 .fni8 = gen_nbsl_i64, 690 .fniv = gen_nbsl_vec, 691 .fno = gen_helper_sve2_nbsl, 692 .vece = MO_64, 693 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 694 }; 695 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 696 } 697 698 TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a) 699 700 /* 701 *** SVE Integer Arithmetic - Unpredicated Group 702 */ 703 704 TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a) 705 TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a) 706 TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a) 707 TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a) 708 TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a) 709 TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a) 710 711 /* 712 *** SVE Integer Arithmetic - Binary Predicated Group 713 */ 714 715 /* Select active elememnts from Zn and inactive elements from Zm, 716 * storing the result in Zd. 717 */ 718 static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz) 719 { 720 static gen_helper_gvec_4 * const fns[4] = { 721 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h, 722 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d 723 }; 724 return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0); 725 } 726 727 #define DO_ZPZZ(NAME, FEAT, name) \ 728 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \ 729 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \ 730 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \ 731 }; \ 732 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \ 733 name##_zpzz_fns[a->esz], a, 0) 734 735 DO_ZPZZ(AND_zpzz, aa64_sve, sve_and) 736 DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor) 737 DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr) 738 DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic) 739 740 DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add) 741 DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub) 742 743 DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax) 744 DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax) 745 DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin) 746 DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin) 747 DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd) 748 DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd) 749 750 DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul) 751 DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh) 752 DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh) 753 754 DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr) 755 DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr) 756 DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl) 757 758 static gen_helper_gvec_4 * const sdiv_fns[4] = { 759 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d 760 }; 761 TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0) 762 763 static gen_helper_gvec_4 * const udiv_fns[4] = { 764 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d 765 }; 766 TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0) 767 768 TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz) 769 770 /* 771 *** SVE Integer Arithmetic - Unary Predicated Group 772 */ 773 774 #define DO_ZPZ(NAME, FEAT, name) \ 775 static gen_helper_gvec_3 * const name##_fns[4] = { \ 776 gen_helper_##name##_b, gen_helper_##name##_h, \ 777 gen_helper_##name##_s, gen_helper_##name##_d, \ 778 }; \ 779 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0) 780 781 DO_ZPZ(CLS, aa64_sve, sve_cls) 782 DO_ZPZ(CLZ, aa64_sve, sve_clz) 783 DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz) 784 DO_ZPZ(CNOT, aa64_sve, sve_cnot) 785 DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz) 786 DO_ZPZ(ABS, aa64_sve, sve_abs) 787 DO_ZPZ(NEG, aa64_sve, sve_neg) 788 DO_ZPZ(RBIT, aa64_sve, sve_rbit) 789 DO_ZPZ(ORQV, aa64_sme2p1_or_sve2p1, sve2p1_orqv) 790 DO_ZPZ(EORQV, aa64_sme2p1_or_sve2p1, sve2p1_eorqv) 791 DO_ZPZ(ANDQV, aa64_sme2p1_or_sve2p1, sve2p1_andqv) 792 793 static gen_helper_gvec_3 * const fabs_fns[4] = { 794 NULL, gen_helper_sve_fabs_h, 795 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d, 796 }; 797 static gen_helper_gvec_3 * const fabs_ah_fns[4] = { 798 NULL, gen_helper_sve_ah_fabs_h, 799 gen_helper_sve_ah_fabs_s, gen_helper_sve_ah_fabs_d, 800 }; 801 TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, 802 s->fpcr_ah ? fabs_ah_fns[a->esz] : fabs_fns[a->esz], a, 0) 803 804 static gen_helper_gvec_3 * const fneg_fns[4] = { 805 NULL, gen_helper_sve_fneg_h, 806 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d, 807 }; 808 static gen_helper_gvec_3 * const fneg_ah_fns[4] = { 809 NULL, gen_helper_sve_ah_fneg_h, 810 gen_helper_sve_ah_fneg_s, gen_helper_sve_ah_fneg_d, 811 }; 812 TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, 813 s->fpcr_ah ? fneg_ah_fns[a->esz] : fneg_fns[a->esz], a, 0) 814 815 static gen_helper_gvec_3 * const sxtb_fns[4] = { 816 NULL, gen_helper_sve_sxtb_h, 817 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d, 818 }; 819 TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0) 820 821 static gen_helper_gvec_3 * const uxtb_fns[4] = { 822 NULL, gen_helper_sve_uxtb_h, 823 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d, 824 }; 825 TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0) 826 827 static gen_helper_gvec_3 * const sxth_fns[4] = { 828 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d 829 }; 830 TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0) 831 832 static gen_helper_gvec_3 * const uxth_fns[4] = { 833 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d 834 }; 835 TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0) 836 837 TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz, 838 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0) 839 TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz, 840 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0) 841 842 static gen_helper_gvec_3 * const addqv_fns[4] = { 843 gen_helper_sve2p1_addqv_b, gen_helper_sve2p1_addqv_h, 844 gen_helper_sve2p1_addqv_s, gen_helper_sve2p1_addqv_d, 845 }; 846 TRANS_FEAT(ADDQV, aa64_sme2p1_or_sve2p1, 847 gen_gvec_ool_arg_zpz, addqv_fns[a->esz], a, 0) 848 849 static gen_helper_gvec_3 * const smaxqv_fns[4] = { 850 gen_helper_sve2p1_smaxqv_b, gen_helper_sve2p1_smaxqv_h, 851 gen_helper_sve2p1_smaxqv_s, gen_helper_sve2p1_smaxqv_d, 852 }; 853 TRANS_FEAT(SMAXQV, aa64_sme2p1_or_sve2p1, 854 gen_gvec_ool_arg_zpz, smaxqv_fns[a->esz], a, 0) 855 856 static gen_helper_gvec_3 * const sminqv_fns[4] = { 857 gen_helper_sve2p1_sminqv_b, gen_helper_sve2p1_sminqv_h, 858 gen_helper_sve2p1_sminqv_s, gen_helper_sve2p1_sminqv_d, 859 }; 860 TRANS_FEAT(SMINQV, aa64_sme2p1_or_sve2p1, 861 gen_gvec_ool_arg_zpz, sminqv_fns[a->esz], a, 0) 862 863 static gen_helper_gvec_3 * const umaxqv_fns[4] = { 864 gen_helper_sve2p1_umaxqv_b, gen_helper_sve2p1_umaxqv_h, 865 gen_helper_sve2p1_umaxqv_s, gen_helper_sve2p1_umaxqv_d, 866 }; 867 TRANS_FEAT(UMAXQV, aa64_sme2p1_or_sve2p1, 868 gen_gvec_ool_arg_zpz, umaxqv_fns[a->esz], a, 0) 869 870 static gen_helper_gvec_3 * const uminqv_fns[4] = { 871 gen_helper_sve2p1_uminqv_b, gen_helper_sve2p1_uminqv_h, 872 gen_helper_sve2p1_uminqv_s, gen_helper_sve2p1_uminqv_d, 873 }; 874 TRANS_FEAT(UMINQV, aa64_sme2p1_or_sve2p1, 875 gen_gvec_ool_arg_zpz, uminqv_fns[a->esz], a, 0) 876 877 /* 878 *** SVE Integer Reduction Group 879 */ 880 881 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32); 882 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a, 883 gen_helper_gvec_reduc *fn) 884 { 885 unsigned vsz = vec_full_reg_size(s); 886 TCGv_ptr t_zn, t_pg; 887 TCGv_i32 desc; 888 TCGv_i64 temp; 889 890 if (fn == NULL) { 891 return false; 892 } 893 if (!sve_access_check(s)) { 894 return true; 895 } 896 897 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 898 temp = tcg_temp_new_i64(); 899 t_zn = tcg_temp_new_ptr(); 900 t_pg = tcg_temp_new_ptr(); 901 902 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); 903 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); 904 fn(temp, t_zn, t_pg, desc); 905 906 write_fp_dreg(s, a->rd, temp); 907 return true; 908 } 909 910 #define DO_VPZ(NAME, name) \ 911 static gen_helper_gvec_reduc * const name##_fns[4] = { \ 912 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \ 913 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 914 }; \ 915 TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz]) 916 917 DO_VPZ(ORV, orv) 918 DO_VPZ(ANDV, andv) 919 DO_VPZ(EORV, eorv) 920 921 DO_VPZ(UADDV, uaddv) 922 DO_VPZ(SMAXV, smaxv) 923 DO_VPZ(UMAXV, umaxv) 924 DO_VPZ(SMINV, sminv) 925 DO_VPZ(UMINV, uminv) 926 927 static gen_helper_gvec_reduc * const saddv_fns[4] = { 928 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h, 929 gen_helper_sve_saddv_s, NULL 930 }; 931 TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz]) 932 933 #undef DO_VPZ 934 935 /* 936 *** SVE Shift by Immediate - Predicated Group 937 */ 938 939 /* 940 * Copy Zn into Zd, storing zeros into inactive elements. 941 * If invert, store zeros into the active elements. 942 */ 943 static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg, 944 int esz, bool invert) 945 { 946 static gen_helper_gvec_3 * const fns[4] = { 947 gen_helper_sve_movz_b, gen_helper_sve_movz_h, 948 gen_helper_sve_movz_s, gen_helper_sve_movz_d, 949 }; 950 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert); 951 } 952 953 static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr, 954 gen_helper_gvec_3 * const fns[4]) 955 { 956 int max; 957 958 if (a->esz < 0) { 959 /* Invalid tsz encoding -- see tszimm_esz. */ 960 return false; 961 } 962 963 /* 964 * Shift by element size is architecturally valid. 965 * For arithmetic right-shift, it's the same as by one less. 966 * For logical shifts and ASRD, it is a zeroing operation. 967 */ 968 max = 8 << a->esz; 969 if (a->imm >= max) { 970 if (asr) { 971 a->imm = max - 1; 972 } else { 973 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); 974 } 975 } 976 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a); 977 } 978 979 static gen_helper_gvec_3 * const asr_zpzi_fns[4] = { 980 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h, 981 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d, 982 }; 983 TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns) 984 985 static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = { 986 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h, 987 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d, 988 }; 989 TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns) 990 991 static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = { 992 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h, 993 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d, 994 }; 995 TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns) 996 997 static gen_helper_gvec_3 * const asrd_fns[4] = { 998 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h, 999 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d, 1000 }; 1001 TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns) 1002 1003 static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = { 1004 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h, 1005 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d, 1006 }; 1007 TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 1008 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a) 1009 1010 static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = { 1011 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h, 1012 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d, 1013 }; 1014 TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 1015 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a) 1016 1017 static gen_helper_gvec_3 * const srshr_fns[4] = { 1018 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h, 1019 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d, 1020 }; 1021 TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 1022 a->esz < 0 ? NULL : srshr_fns[a->esz], a) 1023 1024 static gen_helper_gvec_3 * const urshr_fns[4] = { 1025 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h, 1026 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d, 1027 }; 1028 TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 1029 a->esz < 0 ? NULL : urshr_fns[a->esz], a) 1030 1031 static gen_helper_gvec_3 * const sqshlu_fns[4] = { 1032 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h, 1033 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d, 1034 }; 1035 TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi, 1036 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a) 1037 1038 /* 1039 *** SVE Bitwise Shift - Predicated Group 1040 */ 1041 1042 #define DO_ZPZW(NAME, name) \ 1043 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \ 1044 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \ 1045 gen_helper_sve_##name##_zpzw_s, NULL \ 1046 }; \ 1047 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \ 1048 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0) 1049 1050 DO_ZPZW(ASR, asr) 1051 DO_ZPZW(LSR, lsr) 1052 DO_ZPZW(LSL, lsl) 1053 1054 #undef DO_ZPZW 1055 1056 /* 1057 *** SVE Bitwise Shift - Unpredicated Group 1058 */ 1059 1060 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr, 1061 void (*gvec_fn)(unsigned, uint32_t, uint32_t, 1062 int64_t, uint32_t, uint32_t)) 1063 { 1064 if (a->esz < 0) { 1065 /* Invalid tsz encoding -- see tszimm_esz. */ 1066 return false; 1067 } 1068 if (sve_access_check(s)) { 1069 unsigned vsz = vec_full_reg_size(s); 1070 /* Shift by element size is architecturally valid. For 1071 arithmetic right-shift, it's the same as by one less. 1072 Otherwise it is a zeroing operation. */ 1073 if (a->imm >= 8 << a->esz) { 1074 if (asr) { 1075 a->imm = (8 << a->esz) - 1; 1076 } else { 1077 do_dupi_z(s, a->rd, 0); 1078 return true; 1079 } 1080 } 1081 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 1082 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz); 1083 } 1084 return true; 1085 } 1086 1087 TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari) 1088 TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri) 1089 TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli) 1090 1091 #define DO_ZZW(NAME, name) \ 1092 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \ 1093 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \ 1094 gen_helper_sve_##name##_zzw_s, NULL \ 1095 }; \ 1096 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \ 1097 name##_zzw_fns[a->esz], a, 0) 1098 1099 DO_ZZW(ASR_zzw, asr) 1100 DO_ZZW(LSR_zzw, lsr) 1101 DO_ZZW(LSL_zzw, lsl) 1102 1103 #undef DO_ZZW 1104 1105 /* 1106 *** SVE Integer Multiply-Add Group 1107 */ 1108 1109 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a, 1110 gen_helper_gvec_5 *fn) 1111 { 1112 if (sve_access_check(s)) { 1113 unsigned vsz = vec_full_reg_size(s); 1114 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd), 1115 vec_full_reg_offset(s, a->ra), 1116 vec_full_reg_offset(s, a->rn), 1117 vec_full_reg_offset(s, a->rm), 1118 pred_full_reg_offset(s, a->pg), 1119 vsz, vsz, 0, fn); 1120 } 1121 return true; 1122 } 1123 1124 static gen_helper_gvec_5 * const mla_fns[4] = { 1125 gen_helper_sve_mla_b, gen_helper_sve_mla_h, 1126 gen_helper_sve_mla_s, gen_helper_sve_mla_d, 1127 }; 1128 TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz]) 1129 1130 static gen_helper_gvec_5 * const mls_fns[4] = { 1131 gen_helper_sve_mls_b, gen_helper_sve_mls_h, 1132 gen_helper_sve_mls_s, gen_helper_sve_mls_d, 1133 }; 1134 TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz]) 1135 1136 /* 1137 *** SVE Index Generation Group 1138 */ 1139 1140 static bool do_index(DisasContext *s, int esz, int rd, 1141 TCGv_i64 start, TCGv_i64 incr) 1142 { 1143 unsigned vsz; 1144 TCGv_i32 desc; 1145 TCGv_ptr t_zd; 1146 1147 if (!sve_access_check(s)) { 1148 return true; 1149 } 1150 1151 vsz = vec_full_reg_size(s); 1152 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1153 t_zd = tcg_temp_new_ptr(); 1154 1155 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd)); 1156 if (esz == 3) { 1157 gen_helper_sve_index_d(t_zd, start, incr, desc); 1158 } else { 1159 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 1160 static index_fn * const fns[3] = { 1161 gen_helper_sve_index_b, 1162 gen_helper_sve_index_h, 1163 gen_helper_sve_index_s, 1164 }; 1165 TCGv_i32 s32 = tcg_temp_new_i32(); 1166 TCGv_i32 i32 = tcg_temp_new_i32(); 1167 1168 tcg_gen_extrl_i64_i32(s32, start); 1169 tcg_gen_extrl_i64_i32(i32, incr); 1170 fns[esz](t_zd, s32, i32, desc); 1171 } 1172 return true; 1173 } 1174 1175 TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd, 1176 tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2)) 1177 TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd, 1178 tcg_constant_i64(a->imm), cpu_reg(s, a->rm)) 1179 TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd, 1180 cpu_reg(s, a->rn), tcg_constant_i64(a->imm)) 1181 TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd, 1182 cpu_reg(s, a->rn), cpu_reg(s, a->rm)) 1183 1184 /* 1185 *** SVE Stack Allocation Group 1186 */ 1187 1188 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a) 1189 { 1190 if (!dc_isar_feature(aa64_sve, s)) { 1191 return false; 1192 } 1193 if (sve_access_check(s)) { 1194 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1195 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1196 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s)); 1197 } 1198 return true; 1199 } 1200 1201 static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a) 1202 { 1203 if (!dc_isar_feature(aa64_sme, s)) { 1204 return false; 1205 } 1206 if (sme_enabled_check(s)) { 1207 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1208 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1209 tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s)); 1210 } 1211 return true; 1212 } 1213 1214 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a) 1215 { 1216 if (!dc_isar_feature(aa64_sve, s)) { 1217 return false; 1218 } 1219 if (sve_access_check(s)) { 1220 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1221 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1222 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s)); 1223 } 1224 return true; 1225 } 1226 1227 static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a) 1228 { 1229 if (!dc_isar_feature(aa64_sme, s)) { 1230 return false; 1231 } 1232 if (sme_enabled_check(s)) { 1233 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1234 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1235 tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s)); 1236 } 1237 return true; 1238 } 1239 1240 static bool trans_RDVL(DisasContext *s, arg_RDVL *a) 1241 { 1242 if (!dc_isar_feature(aa64_sve, s)) { 1243 return false; 1244 } 1245 if (sve_access_check(s)) { 1246 TCGv_i64 reg = cpu_reg(s, a->rd); 1247 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s)); 1248 } 1249 return true; 1250 } 1251 1252 static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a) 1253 { 1254 if (!dc_isar_feature(aa64_sme, s)) { 1255 return false; 1256 } 1257 if (sme_enabled_check(s)) { 1258 TCGv_i64 reg = cpu_reg(s, a->rd); 1259 tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s)); 1260 } 1261 return true; 1262 } 1263 1264 /* 1265 *** SVE Compute Vector Address Group 1266 */ 1267 1268 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn) 1269 { 1270 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm); 1271 } 1272 1273 TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32) 1274 TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64) 1275 TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32) 1276 TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32) 1277 1278 /* 1279 *** SVE Integer Misc - Unpredicated Group 1280 */ 1281 1282 static gen_helper_gvec_2 * const fexpa_fns[4] = { 1283 NULL, gen_helper_sve_fexpa_h, 1284 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d, 1285 }; 1286 TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz, 1287 fexpa_fns[a->esz], a->rd, a->rn, s->fpcr_ah) 1288 1289 static gen_helper_gvec_3 * const ftssel_fns[4] = { 1290 NULL, gen_helper_sve_ftssel_h, 1291 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d, 1292 }; 1293 TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, 1294 ftssel_fns[a->esz], a, s->fpcr_ah) 1295 1296 /* 1297 *** SVE Predicate Logical Operations Group 1298 */ 1299 1300 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a, 1301 const GVecGen4 *gvec_op) 1302 { 1303 if (!sve_access_check(s)) { 1304 return true; 1305 } 1306 1307 unsigned psz = pred_gvec_reg_size(s); 1308 int dofs = pred_full_reg_offset(s, a->rd); 1309 int nofs = pred_full_reg_offset(s, a->rn); 1310 int mofs = pred_full_reg_offset(s, a->rm); 1311 int gofs = pred_full_reg_offset(s, a->pg); 1312 1313 if (!a->s) { 1314 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1315 return true; 1316 } 1317 1318 if (psz == 8) { 1319 /* Do the operation and the flags generation in temps. */ 1320 TCGv_i64 pd = tcg_temp_new_i64(); 1321 TCGv_i64 pn = tcg_temp_new_i64(); 1322 TCGv_i64 pm = tcg_temp_new_i64(); 1323 TCGv_i64 pg = tcg_temp_new_i64(); 1324 1325 tcg_gen_ld_i64(pn, tcg_env, nofs); 1326 tcg_gen_ld_i64(pm, tcg_env, mofs); 1327 tcg_gen_ld_i64(pg, tcg_env, gofs); 1328 1329 gvec_op->fni8(pd, pn, pm, pg); 1330 tcg_gen_st_i64(pd, tcg_env, dofs); 1331 1332 do_predtest1(pd, pg); 1333 } else { 1334 /* The operation and flags generation is large. The computation 1335 * of the flags depends on the original contents of the guarding 1336 * predicate. If the destination overwrites the guarding predicate, 1337 * then the easiest way to get this right is to save a copy. 1338 */ 1339 int tofs = gofs; 1340 if (a->rd == a->pg) { 1341 tofs = offsetof(CPUARMState, vfp.preg_tmp); 1342 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz); 1343 } 1344 1345 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1346 do_predtest(s, dofs, tofs, psz / 8); 1347 } 1348 return true; 1349 } 1350 1351 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1352 { 1353 tcg_gen_and_i64(pd, pn, pm); 1354 tcg_gen_and_i64(pd, pd, pg); 1355 } 1356 1357 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1358 TCGv_vec pm, TCGv_vec pg) 1359 { 1360 tcg_gen_and_vec(vece, pd, pn, pm); 1361 tcg_gen_and_vec(vece, pd, pd, pg); 1362 } 1363 1364 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a) 1365 { 1366 static const GVecGen4 op = { 1367 .fni8 = gen_and_pg_i64, 1368 .fniv = gen_and_pg_vec, 1369 .fno = gen_helper_sve_and_pppp, 1370 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1371 }; 1372 1373 if (!dc_isar_feature(aa64_sve, s)) { 1374 return false; 1375 } 1376 if (!a->s) { 1377 if (a->rn == a->rm) { 1378 if (a->pg == a->rn) { 1379 return do_mov_p(s, a->rd, a->rn); 1380 } 1381 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg); 1382 } else if (a->pg == a->rn || a->pg == a->rm) { 1383 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm); 1384 } 1385 } 1386 return do_pppp_flags(s, a, &op); 1387 } 1388 1389 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1390 { 1391 tcg_gen_andc_i64(pd, pn, pm); 1392 tcg_gen_and_i64(pd, pd, pg); 1393 } 1394 1395 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1396 TCGv_vec pm, TCGv_vec pg) 1397 { 1398 tcg_gen_andc_vec(vece, pd, pn, pm); 1399 tcg_gen_and_vec(vece, pd, pd, pg); 1400 } 1401 1402 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a) 1403 { 1404 static const GVecGen4 op = { 1405 .fni8 = gen_bic_pg_i64, 1406 .fniv = gen_bic_pg_vec, 1407 .fno = gen_helper_sve_bic_pppp, 1408 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1409 }; 1410 1411 if (!dc_isar_feature(aa64_sve, s)) { 1412 return false; 1413 } 1414 if (!a->s && a->pg == a->rn) { 1415 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm); 1416 } 1417 return do_pppp_flags(s, a, &op); 1418 } 1419 1420 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1421 { 1422 tcg_gen_xor_i64(pd, pn, pm); 1423 tcg_gen_and_i64(pd, pd, pg); 1424 } 1425 1426 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1427 TCGv_vec pm, TCGv_vec pg) 1428 { 1429 tcg_gen_xor_vec(vece, pd, pn, pm); 1430 tcg_gen_and_vec(vece, pd, pd, pg); 1431 } 1432 1433 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a) 1434 { 1435 static const GVecGen4 op = { 1436 .fni8 = gen_eor_pg_i64, 1437 .fniv = gen_eor_pg_vec, 1438 .fno = gen_helper_sve_eor_pppp, 1439 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1440 }; 1441 1442 if (!dc_isar_feature(aa64_sve, s)) { 1443 return false; 1444 } 1445 /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */ 1446 if (!a->s && a->pg == a->rm) { 1447 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn); 1448 } 1449 return do_pppp_flags(s, a, &op); 1450 } 1451 1452 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a) 1453 { 1454 if (a->s || !dc_isar_feature(aa64_sve, s)) { 1455 return false; 1456 } 1457 if (sve_access_check(s)) { 1458 unsigned psz = pred_gvec_reg_size(s); 1459 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd), 1460 pred_full_reg_offset(s, a->pg), 1461 pred_full_reg_offset(s, a->rn), 1462 pred_full_reg_offset(s, a->rm), psz, psz); 1463 } 1464 return true; 1465 } 1466 1467 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1468 { 1469 tcg_gen_or_i64(pd, pn, pm); 1470 tcg_gen_and_i64(pd, pd, pg); 1471 } 1472 1473 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1474 TCGv_vec pm, TCGv_vec pg) 1475 { 1476 tcg_gen_or_vec(vece, pd, pn, pm); 1477 tcg_gen_and_vec(vece, pd, pd, pg); 1478 } 1479 1480 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a) 1481 { 1482 static const GVecGen4 op = { 1483 .fni8 = gen_orr_pg_i64, 1484 .fniv = gen_orr_pg_vec, 1485 .fno = gen_helper_sve_orr_pppp, 1486 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1487 }; 1488 1489 if (!dc_isar_feature(aa64_sve, s)) { 1490 return false; 1491 } 1492 if (!a->s && a->pg == a->rn && a->rn == a->rm) { 1493 return do_mov_p(s, a->rd, a->rn); 1494 } 1495 return do_pppp_flags(s, a, &op); 1496 } 1497 1498 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1499 { 1500 tcg_gen_orc_i64(pd, pn, pm); 1501 tcg_gen_and_i64(pd, pd, pg); 1502 } 1503 1504 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1505 TCGv_vec pm, TCGv_vec pg) 1506 { 1507 tcg_gen_orc_vec(vece, pd, pn, pm); 1508 tcg_gen_and_vec(vece, pd, pd, pg); 1509 } 1510 1511 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a) 1512 { 1513 static const GVecGen4 op = { 1514 .fni8 = gen_orn_pg_i64, 1515 .fniv = gen_orn_pg_vec, 1516 .fno = gen_helper_sve_orn_pppp, 1517 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1518 }; 1519 1520 if (!dc_isar_feature(aa64_sve, s)) { 1521 return false; 1522 } 1523 return do_pppp_flags(s, a, &op); 1524 } 1525 1526 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1527 { 1528 tcg_gen_or_i64(pd, pn, pm); 1529 tcg_gen_andc_i64(pd, pg, pd); 1530 } 1531 1532 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1533 TCGv_vec pm, TCGv_vec pg) 1534 { 1535 tcg_gen_or_vec(vece, pd, pn, pm); 1536 tcg_gen_andc_vec(vece, pd, pg, pd); 1537 } 1538 1539 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a) 1540 { 1541 static const GVecGen4 op = { 1542 .fni8 = gen_nor_pg_i64, 1543 .fniv = gen_nor_pg_vec, 1544 .fno = gen_helper_sve_nor_pppp, 1545 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1546 }; 1547 1548 if (!dc_isar_feature(aa64_sve, s)) { 1549 return false; 1550 } 1551 return do_pppp_flags(s, a, &op); 1552 } 1553 1554 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1555 { 1556 tcg_gen_and_i64(pd, pn, pm); 1557 tcg_gen_andc_i64(pd, pg, pd); 1558 } 1559 1560 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1561 TCGv_vec pm, TCGv_vec pg) 1562 { 1563 tcg_gen_and_vec(vece, pd, pn, pm); 1564 tcg_gen_andc_vec(vece, pd, pg, pd); 1565 } 1566 1567 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a) 1568 { 1569 static const GVecGen4 op = { 1570 .fni8 = gen_nand_pg_i64, 1571 .fniv = gen_nand_pg_vec, 1572 .fno = gen_helper_sve_nand_pppp, 1573 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1574 }; 1575 1576 if (!dc_isar_feature(aa64_sve, s)) { 1577 return false; 1578 } 1579 return do_pppp_flags(s, a, &op); 1580 } 1581 1582 /* 1583 *** SVE Predicate Misc Group 1584 */ 1585 1586 static bool trans_PTEST(DisasContext *s, arg_PTEST *a) 1587 { 1588 if (!dc_isar_feature(aa64_sve, s)) { 1589 return false; 1590 } 1591 if (sve_access_check(s)) { 1592 int nofs = pred_full_reg_offset(s, a->rn); 1593 int gofs = pred_full_reg_offset(s, a->pg); 1594 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8); 1595 1596 if (words == 1) { 1597 TCGv_i64 pn = tcg_temp_new_i64(); 1598 TCGv_i64 pg = tcg_temp_new_i64(); 1599 1600 tcg_gen_ld_i64(pn, tcg_env, nofs); 1601 tcg_gen_ld_i64(pg, tcg_env, gofs); 1602 do_predtest1(pn, pg); 1603 } else { 1604 do_predtest(s, nofs, gofs, words); 1605 } 1606 } 1607 return true; 1608 } 1609 1610 /* See the ARM pseudocode DecodePredCount. */ 1611 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz) 1612 { 1613 unsigned elements = fullsz >> esz; 1614 unsigned bound; 1615 1616 switch (pattern) { 1617 case 0x0: /* POW2 */ 1618 return pow2floor(elements); 1619 case 0x1: /* VL1 */ 1620 case 0x2: /* VL2 */ 1621 case 0x3: /* VL3 */ 1622 case 0x4: /* VL4 */ 1623 case 0x5: /* VL5 */ 1624 case 0x6: /* VL6 */ 1625 case 0x7: /* VL7 */ 1626 case 0x8: /* VL8 */ 1627 bound = pattern; 1628 break; 1629 case 0x9: /* VL16 */ 1630 case 0xa: /* VL32 */ 1631 case 0xb: /* VL64 */ 1632 case 0xc: /* VL128 */ 1633 case 0xd: /* VL256 */ 1634 bound = 16 << (pattern - 9); 1635 break; 1636 case 0x1d: /* MUL4 */ 1637 return elements - elements % 4; 1638 case 0x1e: /* MUL3 */ 1639 return elements - elements % 3; 1640 case 0x1f: /* ALL */ 1641 return elements; 1642 default: /* #uimm5 */ 1643 return 0; 1644 } 1645 return elements >= bound ? bound : 0; 1646 } 1647 1648 /* This handles all of the predicate initialization instructions, 1649 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32 1650 * so that decode_pred_count returns 0. For SETFFR, we will have 1651 * set RD == 16 == FFR. 1652 */ 1653 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag) 1654 { 1655 if (!sve_access_check(s)) { 1656 return true; 1657 } 1658 1659 unsigned fullsz = vec_full_reg_size(s); 1660 unsigned ofs = pred_full_reg_offset(s, rd); 1661 unsigned numelem, setsz, i; 1662 uint64_t word, lastword; 1663 TCGv_i64 t; 1664 1665 numelem = decode_pred_count(fullsz, pat, esz); 1666 1667 /* Determine what we must store into each bit, and how many. */ 1668 if (numelem == 0) { 1669 lastword = word = 0; 1670 setsz = fullsz; 1671 } else { 1672 setsz = numelem << esz; 1673 lastword = word = pred_esz_masks[esz]; 1674 if (setsz % 64) { 1675 lastword &= MAKE_64BIT_MASK(0, setsz % 64); 1676 } 1677 } 1678 1679 t = tcg_temp_new_i64(); 1680 if (fullsz <= 64) { 1681 tcg_gen_movi_i64(t, lastword); 1682 tcg_gen_st_i64(t, tcg_env, ofs); 1683 goto done; 1684 } 1685 1686 if (word == lastword) { 1687 unsigned maxsz = size_for_gvec(fullsz / 8); 1688 unsigned oprsz = size_for_gvec(setsz / 8); 1689 1690 if (oprsz * 8 == setsz) { 1691 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word); 1692 goto done; 1693 } 1694 } 1695 1696 setsz /= 8; 1697 fullsz /= 8; 1698 1699 tcg_gen_movi_i64(t, word); 1700 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) { 1701 tcg_gen_st_i64(t, tcg_env, ofs + i); 1702 } 1703 if (lastword != word) { 1704 tcg_gen_movi_i64(t, lastword); 1705 tcg_gen_st_i64(t, tcg_env, ofs + i); 1706 i += 8; 1707 } 1708 if (i < fullsz) { 1709 tcg_gen_movi_i64(t, 0); 1710 for (; i < fullsz; i += 8) { 1711 tcg_gen_st_i64(t, tcg_env, ofs + i); 1712 } 1713 } 1714 1715 done: 1716 /* PTRUES */ 1717 if (setflag) { 1718 tcg_gen_movi_i32(cpu_NF, -(word != 0)); 1719 tcg_gen_movi_i32(cpu_CF, word == 0); 1720 tcg_gen_movi_i32(cpu_VF, 0); 1721 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1722 } 1723 return true; 1724 } 1725 1726 TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s) 1727 1728 static bool trans_PTRUE_cnt(DisasContext *s, arg_PTRUE_cnt *a) 1729 { 1730 if (!dc_isar_feature(aa64_sme2_or_sve2p1, s)) { 1731 return false; 1732 } 1733 if (sve_access_check(s)) { 1734 /* Canonical TRUE is 0 count, invert bit, plus element size. */ 1735 int val = (1 << 15) | (1 << a->esz); 1736 1737 /* Write val to the first uint64_t; clear all of the rest. */ 1738 tcg_gen_gvec_dup_imm(MO_64, pred_full_reg_offset(s, a->rd), 1739 8, size_for_gvec(pred_full_reg_size(s)), val); 1740 } 1741 return true; 1742 } 1743 1744 /* Note pat == 31 is #all, to set all elements. */ 1745 TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve, 1746 do_predset, 0, FFR_PRED_NUM, 31, false) 1747 1748 /* Note pat == 32 is #unimp, to set no elements. */ 1749 TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false) 1750 1751 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a) 1752 { 1753 /* The path through do_pppp_flags is complicated enough to want to avoid 1754 * duplication. Frob the arguments into the form of a predicated AND. 1755 */ 1756 arg_rprr_s alt_a = { 1757 .rd = a->rd, .pg = a->pg, .s = a->s, 1758 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM, 1759 }; 1760 1761 s->is_nonstreaming = true; 1762 return trans_AND_pppp(s, &alt_a); 1763 } 1764 1765 TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM) 1766 TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn) 1767 1768 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a, 1769 void (*gen_fn)(TCGv_i32, TCGv_ptr, 1770 TCGv_ptr, TCGv_i32)) 1771 { 1772 if (!sve_access_check(s)) { 1773 return true; 1774 } 1775 1776 TCGv_ptr t_pd = tcg_temp_new_ptr(); 1777 TCGv_ptr t_pg = tcg_temp_new_ptr(); 1778 TCGv_i32 t; 1779 unsigned desc = 0; 1780 1781 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 1782 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 1783 1784 tcg_gen_addi_ptr(t_pd, tcg_env, pred_full_reg_offset(s, a->rd)); 1785 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->rn)); 1786 t = tcg_temp_new_i32(); 1787 1788 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc)); 1789 1790 do_pred_flags(t); 1791 return true; 1792 } 1793 1794 TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst) 1795 TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext) 1796 1797 /* 1798 *** SVE Element Count Group 1799 */ 1800 1801 /* Perform an inline saturating addition of a 32-bit value within 1802 * a 64-bit register. The second operand is known to be positive, 1803 * which halves the comparisons we must perform to bound the result. 1804 */ 1805 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1806 { 1807 int64_t ibound; 1808 1809 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 1810 if (u) { 1811 tcg_gen_ext32u_i64(reg, reg); 1812 } else { 1813 tcg_gen_ext32s_i64(reg, reg); 1814 } 1815 if (d) { 1816 tcg_gen_sub_i64(reg, reg, val); 1817 ibound = (u ? 0 : INT32_MIN); 1818 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound)); 1819 } else { 1820 tcg_gen_add_i64(reg, reg, val); 1821 ibound = (u ? UINT32_MAX : INT32_MAX); 1822 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound)); 1823 } 1824 } 1825 1826 /* Similarly with 64-bit values. */ 1827 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1828 { 1829 TCGv_i64 t0 = tcg_temp_new_i64(); 1830 TCGv_i64 t2; 1831 1832 if (u) { 1833 if (d) { 1834 tcg_gen_sub_i64(t0, reg, val); 1835 t2 = tcg_constant_i64(0); 1836 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0); 1837 } else { 1838 tcg_gen_add_i64(t0, reg, val); 1839 t2 = tcg_constant_i64(-1); 1840 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0); 1841 } 1842 } else { 1843 TCGv_i64 t1 = tcg_temp_new_i64(); 1844 if (d) { 1845 /* Detect signed overflow for subtraction. */ 1846 tcg_gen_xor_i64(t0, reg, val); 1847 tcg_gen_sub_i64(t1, reg, val); 1848 tcg_gen_xor_i64(reg, reg, t1); 1849 tcg_gen_and_i64(t0, t0, reg); 1850 1851 /* Bound the result. */ 1852 tcg_gen_movi_i64(reg, INT64_MIN); 1853 t2 = tcg_constant_i64(0); 1854 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1); 1855 } else { 1856 /* Detect signed overflow for addition. */ 1857 tcg_gen_xor_i64(t0, reg, val); 1858 tcg_gen_add_i64(reg, reg, val); 1859 tcg_gen_xor_i64(t1, reg, val); 1860 tcg_gen_andc_i64(t0, t1, t0); 1861 1862 /* Bound the result. */ 1863 tcg_gen_movi_i64(t1, INT64_MAX); 1864 t2 = tcg_constant_i64(0); 1865 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg); 1866 } 1867 } 1868 } 1869 1870 /* Similarly with a vector and a scalar operand. */ 1871 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn, 1872 TCGv_i64 val, bool u, bool d) 1873 { 1874 unsigned vsz = vec_full_reg_size(s); 1875 TCGv_ptr dptr, nptr; 1876 TCGv_i32 t32, desc; 1877 TCGv_i64 t64; 1878 1879 dptr = tcg_temp_new_ptr(); 1880 nptr = tcg_temp_new_ptr(); 1881 tcg_gen_addi_ptr(dptr, tcg_env, vec_full_reg_offset(s, rd)); 1882 tcg_gen_addi_ptr(nptr, tcg_env, vec_full_reg_offset(s, rn)); 1883 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1884 1885 switch (esz) { 1886 case MO_8: 1887 t32 = tcg_temp_new_i32(); 1888 tcg_gen_extrl_i64_i32(t32, val); 1889 if (d) { 1890 tcg_gen_neg_i32(t32, t32); 1891 } 1892 if (u) { 1893 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc); 1894 } else { 1895 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc); 1896 } 1897 break; 1898 1899 case MO_16: 1900 t32 = tcg_temp_new_i32(); 1901 tcg_gen_extrl_i64_i32(t32, val); 1902 if (d) { 1903 tcg_gen_neg_i32(t32, t32); 1904 } 1905 if (u) { 1906 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc); 1907 } else { 1908 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc); 1909 } 1910 break; 1911 1912 case MO_32: 1913 t64 = tcg_temp_new_i64(); 1914 if (d) { 1915 tcg_gen_neg_i64(t64, val); 1916 } else { 1917 tcg_gen_mov_i64(t64, val); 1918 } 1919 if (u) { 1920 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc); 1921 } else { 1922 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc); 1923 } 1924 break; 1925 1926 case MO_64: 1927 if (u) { 1928 if (d) { 1929 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc); 1930 } else { 1931 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc); 1932 } 1933 } else if (d) { 1934 t64 = tcg_temp_new_i64(); 1935 tcg_gen_neg_i64(t64, val); 1936 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc); 1937 } else { 1938 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc); 1939 } 1940 break; 1941 1942 default: 1943 g_assert_not_reached(); 1944 } 1945 } 1946 1947 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a) 1948 { 1949 if (!dc_isar_feature(aa64_sve, s)) { 1950 return false; 1951 } 1952 if (sve_access_check(s)) { 1953 unsigned fullsz = vec_full_reg_size(s); 1954 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1955 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm); 1956 } 1957 return true; 1958 } 1959 1960 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a) 1961 { 1962 if (!dc_isar_feature(aa64_sve, s)) { 1963 return false; 1964 } 1965 if (sve_access_check(s)) { 1966 unsigned fullsz = vec_full_reg_size(s); 1967 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1968 int inc = numelem * a->imm * (a->d ? -1 : 1); 1969 TCGv_i64 reg = cpu_reg(s, a->rd); 1970 1971 tcg_gen_addi_i64(reg, reg, inc); 1972 } 1973 return true; 1974 } 1975 1976 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a) 1977 { 1978 if (!dc_isar_feature(aa64_sve, s)) { 1979 return false; 1980 } 1981 if (!sve_access_check(s)) { 1982 return true; 1983 } 1984 1985 unsigned fullsz = vec_full_reg_size(s); 1986 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1987 int inc = numelem * a->imm; 1988 TCGv_i64 reg = cpu_reg(s, a->rd); 1989 1990 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 1991 if (inc == 0) { 1992 if (a->u) { 1993 tcg_gen_ext32u_i64(reg, reg); 1994 } else { 1995 tcg_gen_ext32s_i64(reg, reg); 1996 } 1997 } else { 1998 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d); 1999 } 2000 return true; 2001 } 2002 2003 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a) 2004 { 2005 if (!dc_isar_feature(aa64_sve, s)) { 2006 return false; 2007 } 2008 if (!sve_access_check(s)) { 2009 return true; 2010 } 2011 2012 unsigned fullsz = vec_full_reg_size(s); 2013 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2014 int inc = numelem * a->imm; 2015 TCGv_i64 reg = cpu_reg(s, a->rd); 2016 2017 if (inc != 0) { 2018 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d); 2019 } 2020 return true; 2021 } 2022 2023 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 2024 { 2025 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2026 return false; 2027 } 2028 2029 unsigned fullsz = vec_full_reg_size(s); 2030 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2031 int inc = numelem * a->imm; 2032 2033 if (inc != 0) { 2034 if (sve_access_check(s)) { 2035 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd), 2036 vec_full_reg_offset(s, a->rn), 2037 tcg_constant_i64(a->d ? -inc : inc), 2038 fullsz, fullsz); 2039 } 2040 } else { 2041 do_mov_z(s, a->rd, a->rn); 2042 } 2043 return true; 2044 } 2045 2046 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 2047 { 2048 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2049 return false; 2050 } 2051 2052 unsigned fullsz = vec_full_reg_size(s); 2053 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2054 int inc = numelem * a->imm; 2055 2056 if (inc != 0) { 2057 if (sve_access_check(s)) { 2058 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 2059 tcg_constant_i64(inc), a->u, a->d); 2060 } 2061 } else { 2062 do_mov_z(s, a->rd, a->rn); 2063 } 2064 return true; 2065 } 2066 2067 /* 2068 *** SVE Bitwise Immediate Group 2069 */ 2070 2071 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn) 2072 { 2073 uint64_t imm; 2074 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2075 extract32(a->dbm, 0, 6), 2076 extract32(a->dbm, 6, 6))) { 2077 return false; 2078 } 2079 return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm); 2080 } 2081 2082 TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi) 2083 TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori) 2084 TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori) 2085 2086 static bool trans_DUPM(DisasContext *s, arg_DUPM *a) 2087 { 2088 uint64_t imm; 2089 2090 if (!dc_isar_feature(aa64_sve, s)) { 2091 return false; 2092 } 2093 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2094 extract32(a->dbm, 0, 6), 2095 extract32(a->dbm, 6, 6))) { 2096 return false; 2097 } 2098 if (sve_access_check(s)) { 2099 do_dupi_z(s, a->rd, imm); 2100 } 2101 return true; 2102 } 2103 2104 /* 2105 *** SVE Integer Wide Immediate - Predicated Group 2106 */ 2107 2108 /* Implement all merging copies. This is used for CPY (immediate), 2109 * FCPY, CPY (scalar), CPY (SIMD&FP scalar). 2110 */ 2111 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg, 2112 TCGv_i64 val) 2113 { 2114 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2115 static gen_cpy * const fns[4] = { 2116 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h, 2117 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d, 2118 }; 2119 unsigned vsz = vec_full_reg_size(s); 2120 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2121 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2122 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2123 TCGv_ptr t_pg = tcg_temp_new_ptr(); 2124 2125 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd)); 2126 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, rn)); 2127 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 2128 2129 fns[esz](t_zd, t_zn, t_pg, val, desc); 2130 } 2131 2132 static bool trans_FCPY(DisasContext *s, arg_FCPY *a) 2133 { 2134 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2135 return false; 2136 } 2137 if (sve_access_check(s)) { 2138 /* Decode the VFP immediate. */ 2139 uint64_t imm = vfp_expand_imm(a->esz, a->imm); 2140 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm)); 2141 } 2142 return true; 2143 } 2144 2145 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a) 2146 { 2147 if (!dc_isar_feature(aa64_sve, s)) { 2148 return false; 2149 } 2150 if (sve_access_check(s)) { 2151 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm)); 2152 } 2153 return true; 2154 } 2155 2156 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a) 2157 { 2158 static gen_helper_gvec_2i * const fns[4] = { 2159 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h, 2160 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d, 2161 }; 2162 2163 if (!dc_isar_feature(aa64_sve, s)) { 2164 return false; 2165 } 2166 if (sve_access_check(s)) { 2167 unsigned vsz = vec_full_reg_size(s); 2168 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 2169 pred_full_reg_offset(s, a->pg), 2170 tcg_constant_i64(a->imm), 2171 vsz, vsz, 0, fns[a->esz]); 2172 } 2173 return true; 2174 } 2175 2176 /* 2177 *** SVE Permute Extract Group 2178 */ 2179 2180 static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm) 2181 { 2182 if (!sve_access_check(s)) { 2183 return true; 2184 } 2185 2186 unsigned vsz = vec_full_reg_size(s); 2187 unsigned n_ofs = imm >= vsz ? 0 : imm; 2188 unsigned n_siz = vsz - n_ofs; 2189 unsigned d = vec_full_reg_offset(s, rd); 2190 unsigned n = vec_full_reg_offset(s, rn); 2191 unsigned m = vec_full_reg_offset(s, rm); 2192 2193 /* Use host vector move insns if we have appropriate sizes 2194 * and no unfortunate overlap. 2195 */ 2196 if (m != d 2197 && n_ofs == size_for_gvec(n_ofs) 2198 && n_siz == size_for_gvec(n_siz) 2199 && (d != n || n_siz <= n_ofs)) { 2200 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz); 2201 if (n_ofs != 0) { 2202 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs); 2203 } 2204 } else { 2205 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext); 2206 } 2207 return true; 2208 } 2209 2210 TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm) 2211 TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm) 2212 2213 static bool trans_EXTQ(DisasContext *s, arg_EXTQ *a) 2214 { 2215 unsigned vl, dofs, sofs0, sofs1, sofs2, imm; 2216 2217 if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { 2218 return false; 2219 } 2220 if (!sve_access_check(s)) { 2221 return true; 2222 } 2223 2224 imm = a->imm; 2225 if (imm == 0) { 2226 /* So far we never optimize Zdn with MOVPRFX, so zd = zn is a nop. */ 2227 return true; 2228 } 2229 2230 vl = vec_full_reg_size(s); 2231 dofs = vec_full_reg_offset(s, a->rd); 2232 sofs2 = vec_full_reg_offset(s, a->rn); 2233 2234 if (imm & 8) { 2235 sofs0 = dofs + 8; 2236 sofs1 = sofs2; 2237 sofs2 += 8; 2238 } else { 2239 sofs0 = dofs; 2240 sofs1 = dofs + 8; 2241 } 2242 imm = (imm & 7) << 3; 2243 2244 for (unsigned i = 0; i < vl; i += 16) { 2245 TCGv_i64 s0 = tcg_temp_new_i64(); 2246 TCGv_i64 s1 = tcg_temp_new_i64(); 2247 TCGv_i64 s2 = tcg_temp_new_i64(); 2248 2249 tcg_gen_ld_i64(s0, tcg_env, sofs0 + i); 2250 tcg_gen_ld_i64(s1, tcg_env, sofs1 + i); 2251 tcg_gen_ld_i64(s2, tcg_env, sofs2 + i); 2252 2253 tcg_gen_extract2_i64(s0, s0, s1, imm); 2254 tcg_gen_extract2_i64(s1, s1, s2, imm); 2255 2256 tcg_gen_st_i64(s0, tcg_env, dofs + i); 2257 tcg_gen_st_i64(s1, tcg_env, dofs + i + 8); 2258 } 2259 return true; 2260 } 2261 2262 /* 2263 *** SVE Permute - Unpredicated Group 2264 */ 2265 2266 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a) 2267 { 2268 if (!dc_isar_feature(aa64_sve, s)) { 2269 return false; 2270 } 2271 if (sve_access_check(s)) { 2272 unsigned vsz = vec_full_reg_size(s); 2273 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd), 2274 vsz, vsz, cpu_reg_sp(s, a->rn)); 2275 } 2276 return true; 2277 } 2278 2279 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a) 2280 { 2281 if (!dc_isar_feature(aa64_sve, s)) { 2282 return false; 2283 } 2284 if ((a->imm & 0x1f) == 0) { 2285 return false; 2286 } 2287 if (sve_access_check(s)) { 2288 unsigned vsz = vec_full_reg_size(s); 2289 unsigned dofs = vec_full_reg_offset(s, a->rd); 2290 unsigned esz, index; 2291 2292 esz = ctz32(a->imm); 2293 index = a->imm >> (esz + 1); 2294 2295 if ((index << esz) < vsz) { 2296 unsigned nofs = vec_reg_offset(s, a->rn, index, esz); 2297 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz); 2298 } else { 2299 /* 2300 * While dup_mem handles 128-bit elements, dup_imm does not. 2301 * Thankfully element size doesn't matter for splatting zero. 2302 */ 2303 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); 2304 } 2305 } 2306 return true; 2307 } 2308 2309 static bool trans_DUPQ(DisasContext *s, arg_DUPQ *a) 2310 { 2311 unsigned vl, dofs, nofs; 2312 2313 if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { 2314 return false; 2315 } 2316 if (!sve_access_check(s)) { 2317 return true; 2318 } 2319 2320 vl = vec_full_reg_size(s); 2321 dofs = vec_full_reg_offset(s, a->rd); 2322 nofs = vec_reg_offset(s, a->rn, a->imm, a->esz); 2323 2324 for (unsigned i = 0; i < vl; i += 16) { 2325 tcg_gen_gvec_dup_mem(a->esz, dofs + i, nofs + i, 16, 16); 2326 } 2327 return true; 2328 } 2329 2330 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val) 2331 { 2332 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2333 static gen_insr * const fns[4] = { 2334 gen_helper_sve_insr_b, gen_helper_sve_insr_h, 2335 gen_helper_sve_insr_s, gen_helper_sve_insr_d, 2336 }; 2337 unsigned vsz = vec_full_reg_size(s); 2338 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2339 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2340 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2341 2342 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, a->rd)); 2343 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); 2344 2345 fns[a->esz](t_zd, t_zn, val, desc); 2346 } 2347 2348 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a) 2349 { 2350 if (!dc_isar_feature(aa64_sve, s)) { 2351 return false; 2352 } 2353 if (sve_access_check(s)) { 2354 TCGv_i64 t = tcg_temp_new_i64(); 2355 tcg_gen_ld_i64(t, tcg_env, vec_reg_offset(s, a->rm, 0, MO_64)); 2356 do_insr_i64(s, a, t); 2357 } 2358 return true; 2359 } 2360 2361 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a) 2362 { 2363 if (!dc_isar_feature(aa64_sve, s)) { 2364 return false; 2365 } 2366 if (sve_access_check(s)) { 2367 do_insr_i64(s, a, cpu_reg(s, a->rm)); 2368 } 2369 return true; 2370 } 2371 2372 static gen_helper_gvec_2 * const rev_fns[4] = { 2373 gen_helper_sve_rev_b, gen_helper_sve_rev_h, 2374 gen_helper_sve_rev_s, gen_helper_sve_rev_d 2375 }; 2376 TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0) 2377 2378 static gen_helper_gvec_3 * const sve_tbl_fns[4] = { 2379 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h, 2380 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d 2381 }; 2382 TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0) 2383 2384 static gen_helper_gvec_4 * const sve2_tbl_fns[4] = { 2385 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h, 2386 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d 2387 }; 2388 TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz], 2389 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0) 2390 2391 static gen_helper_gvec_3 * const tblq_fns[4] = { 2392 gen_helper_sve2p1_tblq_b, gen_helper_sve2p1_tblq_h, 2393 gen_helper_sve2p1_tblq_s, gen_helper_sve2p1_tblq_d 2394 }; 2395 TRANS_FEAT(TBLQ, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz, 2396 tblq_fns[a->esz], a, 0) 2397 2398 static gen_helper_gvec_3 * const tbx_fns[4] = { 2399 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h, 2400 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d 2401 }; 2402 TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0) 2403 2404 static gen_helper_gvec_3 * const tbxq_fns[4] = { 2405 gen_helper_sve2p1_tbxq_b, gen_helper_sve2p1_tbxq_h, 2406 gen_helper_sve2p1_tbxq_s, gen_helper_sve2p1_tbxq_d 2407 }; 2408 TRANS_FEAT(TBXQ, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz, 2409 tbxq_fns[a->esz], a, 0) 2410 2411 static bool trans_PMOV_pv(DisasContext *s, arg_PMOV_pv *a) 2412 { 2413 static gen_helper_gvec_2 * const fns[4] = { 2414 NULL, gen_helper_pmov_pv_h, 2415 gen_helper_pmov_pv_s, gen_helper_pmov_pv_d 2416 }; 2417 unsigned vl, pl, vofs, pofs; 2418 TCGv_i64 tmp; 2419 2420 if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { 2421 return false; 2422 } 2423 if (!sve_access_check(s)) { 2424 return true; 2425 } 2426 2427 vl = vec_full_reg_size(s); 2428 if (a->esz != MO_8) { 2429 tcg_gen_gvec_2_ool(pred_full_reg_offset(s, a->rd), 2430 vec_full_reg_offset(s, a->rn), 2431 vl, vl, a->imm, fns[a->esz]); 2432 return true; 2433 } 2434 2435 /* 2436 * Copy the low PL bytes from vector Zn, zero-extending to a 2437 * multiple of 8 bytes, so that Pd is properly cleared. 2438 */ 2439 2440 pl = vl / 8; 2441 pofs = pred_full_reg_offset(s, a->rd); 2442 vofs = vec_full_reg_offset(s, a->rn); 2443 2444 QEMU_BUILD_BUG_ON(sizeof(ARMPredicateReg) != 32); 2445 for (unsigned i = 32; i >= 8; i >>= 1) { 2446 if (pl & i) { 2447 tcg_gen_gvec_mov(MO_64, pofs, vofs, i, i); 2448 pofs += i; 2449 vofs += i; 2450 } 2451 } 2452 switch (pl & 7) { 2453 case 0: 2454 return true; 2455 case 2: 2456 tmp = tcg_temp_new_i64(); 2457 tcg_gen_ld16u_i64(tmp, tcg_env, vofs + (HOST_BIG_ENDIAN ? 6 : 0)); 2458 break; 2459 case 4: 2460 tmp = tcg_temp_new_i64(); 2461 tcg_gen_ld32u_i64(tmp, tcg_env, vofs + (HOST_BIG_ENDIAN ? 4 : 0)); 2462 break; 2463 case 6: 2464 tmp = tcg_temp_new_i64(); 2465 tcg_gen_ld_i64(tmp, tcg_env, vofs); 2466 tcg_gen_extract_i64(tmp, tmp, 0, 48); 2467 break; 2468 default: 2469 g_assert_not_reached(); 2470 } 2471 tcg_gen_st_i64(tmp, tcg_env, pofs); 2472 return true; 2473 } 2474 2475 static bool trans_PMOV_vp(DisasContext *s, arg_PMOV_pv *a) 2476 { 2477 static gen_helper_gvec_2 * const fns[4] = { 2478 NULL, gen_helper_pmov_vp_h, 2479 gen_helper_pmov_vp_s, gen_helper_pmov_vp_d 2480 }; 2481 unsigned vl; 2482 2483 if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { 2484 return false; 2485 } 2486 if (!sve_access_check(s)) { 2487 return true; 2488 } 2489 2490 vl = vec_full_reg_size(s); 2491 2492 if (a->esz == MO_8) { 2493 /* 2494 * The low PL bytes are copied from Pn to Zd unchanged. 2495 * We know that the unused portion of Pn is zero, and 2496 * that imm == 0, so the balance of Zd must be zeroed. 2497 */ 2498 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, a->rd), 2499 pred_full_reg_offset(s, a->rn), 2500 size_for_gvec(vl / 8), vl); 2501 } else { 2502 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd), 2503 pred_full_reg_offset(s, a->rn), 2504 vl, vl, a->imm, fns[a->esz]); 2505 } 2506 return true; 2507 } 2508 2509 static bool trans_UNPK(DisasContext *s, arg_UNPK *a) 2510 { 2511 static gen_helper_gvec_2 * const fns[4][2] = { 2512 { NULL, NULL }, 2513 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h }, 2514 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s }, 2515 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d }, 2516 }; 2517 2518 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2519 return false; 2520 } 2521 if (sve_access_check(s)) { 2522 unsigned vsz = vec_full_reg_size(s); 2523 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd), 2524 vec_full_reg_offset(s, a->rn) 2525 + (a->h ? vsz / 2 : 0), 2526 vsz, vsz, 0, fns[a->esz][a->u]); 2527 } 2528 return true; 2529 } 2530 2531 /* 2532 *** SVE Permute - Predicates Group 2533 */ 2534 2535 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd, 2536 gen_helper_gvec_3 *fn) 2537 { 2538 if (!sve_access_check(s)) { 2539 return true; 2540 } 2541 2542 unsigned vsz = pred_full_reg_size(s); 2543 2544 TCGv_ptr t_d = tcg_temp_new_ptr(); 2545 TCGv_ptr t_n = tcg_temp_new_ptr(); 2546 TCGv_ptr t_m = tcg_temp_new_ptr(); 2547 uint32_t desc = 0; 2548 2549 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2550 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2551 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2552 2553 tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd)); 2554 tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn)); 2555 tcg_gen_addi_ptr(t_m, tcg_env, pred_full_reg_offset(s, a->rm)); 2556 2557 fn(t_d, t_n, t_m, tcg_constant_i32(desc)); 2558 return true; 2559 } 2560 2561 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd, 2562 gen_helper_gvec_2 *fn) 2563 { 2564 if (!sve_access_check(s)) { 2565 return true; 2566 } 2567 2568 unsigned vsz = pred_full_reg_size(s); 2569 TCGv_ptr t_d = tcg_temp_new_ptr(); 2570 TCGv_ptr t_n = tcg_temp_new_ptr(); 2571 uint32_t desc = 0; 2572 2573 tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd)); 2574 tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn)); 2575 2576 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2577 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2578 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2579 2580 fn(t_d, t_n, tcg_constant_i32(desc)); 2581 return true; 2582 } 2583 2584 TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p) 2585 TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p) 2586 TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p) 2587 TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p) 2588 TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p) 2589 TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p) 2590 2591 TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p) 2592 TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p) 2593 TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p) 2594 2595 /* 2596 *** SVE Permute - Interleaving Group 2597 */ 2598 2599 static bool do_interleave_q(DisasContext *s, gen_helper_gvec_3 *fn, 2600 arg_rrr_esz *a, int data) 2601 { 2602 if (sve_access_check(s)) { 2603 unsigned vsz = vec_full_reg_size(s); 2604 if (vsz < 32) { 2605 unallocated_encoding(s); 2606 } else { 2607 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), 2608 vec_full_reg_offset(s, a->rn), 2609 vec_full_reg_offset(s, a->rm), 2610 vsz, vsz, data, fn); 2611 } 2612 } 2613 return true; 2614 } 2615 2616 static gen_helper_gvec_3 * const zip_fns[4] = { 2617 gen_helper_sve_zip_b, gen_helper_sve_zip_h, 2618 gen_helper_sve_zip_s, gen_helper_sve_zip_d, 2619 }; 2620 TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2621 zip_fns[a->esz], a, 0) 2622 TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2623 zip_fns[a->esz], a, vec_full_reg_size(s) / 2) 2624 2625 TRANS_FEAT_NONSTREAMING(ZIP1_q, aa64_sve_f64mm, do_interleave_q, 2626 gen_helper_sve2_zip_q, a, 0) 2627 TRANS_FEAT_NONSTREAMING(ZIP2_q, aa64_sve_f64mm, do_interleave_q, 2628 gen_helper_sve2_zip_q, a, 2629 QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2) 2630 2631 static gen_helper_gvec_3 * const zipq_fns[4] = { 2632 gen_helper_sve2p1_zipq_b, gen_helper_sve2p1_zipq_h, 2633 gen_helper_sve2p1_zipq_s, gen_helper_sve2p1_zipq_d, 2634 }; 2635 TRANS_FEAT(ZIPQ1, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz, 2636 zipq_fns[a->esz], a, 0) 2637 TRANS_FEAT(ZIPQ2, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz, 2638 zipq_fns[a->esz], a, 16 / 2) 2639 2640 static gen_helper_gvec_3 * const uzp_fns[4] = { 2641 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h, 2642 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d, 2643 }; 2644 TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2645 uzp_fns[a->esz], a, 0) 2646 TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2647 uzp_fns[a->esz], a, 1 << a->esz) 2648 2649 TRANS_FEAT_NONSTREAMING(UZP1_q, aa64_sve_f64mm, do_interleave_q, 2650 gen_helper_sve2_uzp_q, a, 0) 2651 TRANS_FEAT_NONSTREAMING(UZP2_q, aa64_sve_f64mm, do_interleave_q, 2652 gen_helper_sve2_uzp_q, a, 16) 2653 2654 static gen_helper_gvec_3 * const uzpq_fns[4] = { 2655 gen_helper_sve2p1_uzpq_b, gen_helper_sve2p1_uzpq_h, 2656 gen_helper_sve2p1_uzpq_s, gen_helper_sve2p1_uzpq_d, 2657 }; 2658 TRANS_FEAT(UZPQ1, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz, 2659 uzpq_fns[a->esz], a, 0) 2660 TRANS_FEAT(UZPQ2, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz, 2661 uzpq_fns[a->esz], a, 1 << a->esz) 2662 2663 static gen_helper_gvec_3 * const trn_fns[4] = { 2664 gen_helper_sve_trn_b, gen_helper_sve_trn_h, 2665 gen_helper_sve_trn_s, gen_helper_sve_trn_d, 2666 }; 2667 2668 TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2669 trn_fns[a->esz], a, 0) 2670 TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2671 trn_fns[a->esz], a, 1 << a->esz) 2672 2673 TRANS_FEAT_NONSTREAMING(TRN1_q, aa64_sve_f64mm, do_interleave_q, 2674 gen_helper_sve2_trn_q, a, 0) 2675 TRANS_FEAT_NONSTREAMING(TRN2_q, aa64_sve_f64mm, do_interleave_q, 2676 gen_helper_sve2_trn_q, a, 16) 2677 2678 /* 2679 *** SVE Permute Vector - Predicated Group 2680 */ 2681 2682 static gen_helper_gvec_3 * const compact_fns[4] = { 2683 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d 2684 }; 2685 TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, 2686 compact_fns[a->esz], a, 0) 2687 2688 /* Call the helper that computes the ARM LastActiveElement pseudocode 2689 * function, scaled by the element size. This includes the not found 2690 * indication; e.g. not found for esz=3 is -8. 2691 */ 2692 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg) 2693 { 2694 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot 2695 * round up, as we do elsewhere, because we need the exact size. 2696 */ 2697 TCGv_ptr t_p = tcg_temp_new_ptr(); 2698 unsigned desc = 0; 2699 2700 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 2701 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 2702 2703 tcg_gen_addi_ptr(t_p, tcg_env, pred_full_reg_offset(s, pg)); 2704 2705 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc)); 2706 } 2707 2708 /* Increment LAST to the offset of the next element in the vector, 2709 * wrapping around to 0. 2710 */ 2711 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz) 2712 { 2713 unsigned vsz = vec_full_reg_size(s); 2714 2715 tcg_gen_addi_i32(last, last, 1 << esz); 2716 if (is_power_of_2(vsz)) { 2717 tcg_gen_andi_i32(last, last, vsz - 1); 2718 } else { 2719 TCGv_i32 max = tcg_constant_i32(vsz); 2720 TCGv_i32 zero = tcg_constant_i32(0); 2721 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last); 2722 } 2723 } 2724 2725 /* If LAST < 0, set LAST to the offset of the last element in the vector. */ 2726 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz) 2727 { 2728 unsigned vsz = vec_full_reg_size(s); 2729 2730 if (is_power_of_2(vsz)) { 2731 tcg_gen_andi_i32(last, last, vsz - 1); 2732 } else { 2733 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz)); 2734 TCGv_i32 zero = tcg_constant_i32(0); 2735 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last); 2736 } 2737 } 2738 2739 /* Load an unsigned element of ESZ from BASE+OFS. */ 2740 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz) 2741 { 2742 TCGv_i64 r = tcg_temp_new_i64(); 2743 2744 switch (esz) { 2745 case 0: 2746 tcg_gen_ld8u_i64(r, base, ofs); 2747 break; 2748 case 1: 2749 tcg_gen_ld16u_i64(r, base, ofs); 2750 break; 2751 case 2: 2752 tcg_gen_ld32u_i64(r, base, ofs); 2753 break; 2754 case 3: 2755 tcg_gen_ld_i64(r, base, ofs); 2756 break; 2757 default: 2758 g_assert_not_reached(); 2759 } 2760 return r; 2761 } 2762 2763 /* Load an unsigned element of ESZ from RM[LAST]. */ 2764 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last, 2765 int rm, int esz) 2766 { 2767 TCGv_ptr p = tcg_temp_new_ptr(); 2768 2769 /* Convert offset into vector into offset into ENV. 2770 * The final adjustment for the vector register base 2771 * is added via constant offset to the load. 2772 */ 2773 #if HOST_BIG_ENDIAN 2774 /* Adjust for element ordering. See vec_reg_offset. */ 2775 if (esz < 3) { 2776 tcg_gen_xori_i32(last, last, 8 - (1 << esz)); 2777 } 2778 #endif 2779 tcg_gen_ext_i32_ptr(p, last); 2780 tcg_gen_add_ptr(p, p, tcg_env); 2781 2782 return load_esz(p, vec_full_reg_offset(s, rm), esz); 2783 } 2784 2785 /* Compute CLAST for a Zreg. */ 2786 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before) 2787 { 2788 TCGv_i32 last; 2789 TCGLabel *over; 2790 TCGv_i64 ele; 2791 unsigned vsz, esz = a->esz; 2792 2793 if (!sve_access_check(s)) { 2794 return true; 2795 } 2796 2797 last = tcg_temp_new_i32(); 2798 over = gen_new_label(); 2799 2800 find_last_active(s, last, esz, a->pg); 2801 2802 /* There is of course no movcond for a 2048-bit vector, 2803 * so we must branch over the actual store. 2804 */ 2805 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over); 2806 2807 if (!before) { 2808 incr_last_active(s, last, esz); 2809 } 2810 2811 ele = load_last_active(s, last, a->rm, esz); 2812 2813 vsz = vec_full_reg_size(s); 2814 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele); 2815 2816 /* If this insn used MOVPRFX, we may need a second move. */ 2817 if (a->rd != a->rn) { 2818 TCGLabel *done = gen_new_label(); 2819 tcg_gen_br(done); 2820 2821 gen_set_label(over); 2822 do_mov_z(s, a->rd, a->rn); 2823 2824 gen_set_label(done); 2825 } else { 2826 gen_set_label(over); 2827 } 2828 return true; 2829 } 2830 2831 TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false) 2832 TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true) 2833 2834 /* Compute CLAST for a scalar. */ 2835 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm, 2836 bool before, TCGv_i64 reg_val) 2837 { 2838 TCGv_i32 last = tcg_temp_new_i32(); 2839 TCGv_i64 ele, cmp; 2840 2841 find_last_active(s, last, esz, pg); 2842 2843 /* Extend the original value of last prior to incrementing. */ 2844 cmp = tcg_temp_new_i64(); 2845 tcg_gen_ext_i32_i64(cmp, last); 2846 2847 if (!before) { 2848 incr_last_active(s, last, esz); 2849 } 2850 2851 /* The conceit here is that while last < 0 indicates not found, after 2852 * adjusting for tcg_env->vfp.zregs[rm], it is still a valid address 2853 * from which we can load garbage. We then discard the garbage with 2854 * a conditional move. 2855 */ 2856 ele = load_last_active(s, last, rm, esz); 2857 2858 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0), 2859 ele, reg_val); 2860 } 2861 2862 /* Compute CLAST for a Vreg. */ 2863 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2864 { 2865 if (sve_access_check(s)) { 2866 int esz = a->esz; 2867 int ofs = vec_reg_offset(s, a->rd, 0, esz); 2868 TCGv_i64 reg = load_esz(tcg_env, ofs, esz); 2869 2870 do_clast_scalar(s, esz, a->pg, a->rn, before, reg); 2871 write_fp_dreg(s, a->rd, reg); 2872 } 2873 return true; 2874 } 2875 2876 TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false) 2877 TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true) 2878 2879 /* Compute CLAST for a Xreg. */ 2880 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before) 2881 { 2882 TCGv_i64 reg; 2883 2884 if (!sve_access_check(s)) { 2885 return true; 2886 } 2887 2888 reg = cpu_reg(s, a->rd); 2889 switch (a->esz) { 2890 case 0: 2891 tcg_gen_ext8u_i64(reg, reg); 2892 break; 2893 case 1: 2894 tcg_gen_ext16u_i64(reg, reg); 2895 break; 2896 case 2: 2897 tcg_gen_ext32u_i64(reg, reg); 2898 break; 2899 case 3: 2900 break; 2901 default: 2902 g_assert_not_reached(); 2903 } 2904 2905 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg); 2906 return true; 2907 } 2908 2909 TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false) 2910 TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true) 2911 2912 /* Compute LAST for a scalar. */ 2913 static TCGv_i64 do_last_scalar(DisasContext *s, int esz, 2914 int pg, int rm, bool before) 2915 { 2916 TCGv_i32 last = tcg_temp_new_i32(); 2917 2918 find_last_active(s, last, esz, pg); 2919 if (before) { 2920 wrap_last_active(s, last, esz); 2921 } else { 2922 incr_last_active(s, last, esz); 2923 } 2924 2925 return load_last_active(s, last, rm, esz); 2926 } 2927 2928 /* Compute LAST for a Vreg. */ 2929 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2930 { 2931 if (sve_access_check(s)) { 2932 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2933 write_fp_dreg(s, a->rd, val); 2934 } 2935 return true; 2936 } 2937 2938 TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false) 2939 TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true) 2940 2941 /* Compute LAST for a Xreg. */ 2942 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before) 2943 { 2944 if (sve_access_check(s)) { 2945 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2946 tcg_gen_mov_i64(cpu_reg(s, a->rd), val); 2947 } 2948 return true; 2949 } 2950 2951 TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false) 2952 TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true) 2953 2954 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a) 2955 { 2956 if (!dc_isar_feature(aa64_sve, s)) { 2957 return false; 2958 } 2959 if (sve_access_check(s)) { 2960 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn)); 2961 } 2962 return true; 2963 } 2964 2965 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a) 2966 { 2967 if (!dc_isar_feature(aa64_sve, s)) { 2968 return false; 2969 } 2970 if (sve_access_check(s)) { 2971 int ofs = vec_reg_offset(s, a->rn, 0, a->esz); 2972 TCGv_i64 t = load_esz(tcg_env, ofs, a->esz); 2973 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t); 2974 } 2975 return true; 2976 } 2977 2978 static gen_helper_gvec_3 * const revb_fns[4] = { 2979 NULL, gen_helper_sve_revb_h, 2980 gen_helper_sve_revb_s, gen_helper_sve_revb_d, 2981 }; 2982 TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0) 2983 2984 static gen_helper_gvec_3 * const revh_fns[4] = { 2985 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d, 2986 }; 2987 TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0) 2988 2989 TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz, 2990 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0) 2991 2992 TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0) 2993 2994 TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz, 2995 gen_helper_sve_splice, a, a->esz) 2996 2997 TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice, 2998 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz) 2999 3000 /* 3001 *** SVE Integer Compare - Vectors Group 3002 */ 3003 3004 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a, 3005 gen_helper_gvec_flags_4 *gen_fn) 3006 { 3007 TCGv_ptr pd, zn, zm, pg; 3008 unsigned vsz; 3009 TCGv_i32 t; 3010 3011 if (gen_fn == NULL) { 3012 return false; 3013 } 3014 if (!sve_access_check(s)) { 3015 return true; 3016 } 3017 3018 vsz = vec_full_reg_size(s); 3019 t = tcg_temp_new_i32(); 3020 pd = tcg_temp_new_ptr(); 3021 zn = tcg_temp_new_ptr(); 3022 zm = tcg_temp_new_ptr(); 3023 pg = tcg_temp_new_ptr(); 3024 3025 tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd)); 3026 tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn)); 3027 tcg_gen_addi_ptr(zm, tcg_env, vec_full_reg_offset(s, a->rm)); 3028 tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg)); 3029 3030 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0))); 3031 3032 do_pred_flags(t); 3033 return true; 3034 } 3035 3036 #define DO_PPZZ(NAME, name) \ 3037 static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \ 3038 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \ 3039 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \ 3040 }; \ 3041 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \ 3042 a, name##_ppzz_fns[a->esz]) 3043 3044 DO_PPZZ(CMPEQ, cmpeq) 3045 DO_PPZZ(CMPNE, cmpne) 3046 DO_PPZZ(CMPGT, cmpgt) 3047 DO_PPZZ(CMPGE, cmpge) 3048 DO_PPZZ(CMPHI, cmphi) 3049 DO_PPZZ(CMPHS, cmphs) 3050 3051 #undef DO_PPZZ 3052 3053 #define DO_PPZW(NAME, name) \ 3054 static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \ 3055 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \ 3056 gen_helper_sve_##name##_ppzw_s, NULL \ 3057 }; \ 3058 TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \ 3059 a, name##_ppzw_fns[a->esz]) 3060 3061 DO_PPZW(CMPEQ, cmpeq) 3062 DO_PPZW(CMPNE, cmpne) 3063 DO_PPZW(CMPGT, cmpgt) 3064 DO_PPZW(CMPGE, cmpge) 3065 DO_PPZW(CMPHI, cmphi) 3066 DO_PPZW(CMPHS, cmphs) 3067 DO_PPZW(CMPLT, cmplt) 3068 DO_PPZW(CMPLE, cmple) 3069 DO_PPZW(CMPLO, cmplo) 3070 DO_PPZW(CMPLS, cmpls) 3071 3072 #undef DO_PPZW 3073 3074 /* 3075 *** SVE Integer Compare - Immediate Groups 3076 */ 3077 3078 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a, 3079 gen_helper_gvec_flags_3 *gen_fn) 3080 { 3081 TCGv_ptr pd, zn, pg; 3082 unsigned vsz; 3083 TCGv_i32 t; 3084 3085 if (gen_fn == NULL) { 3086 return false; 3087 } 3088 if (!sve_access_check(s)) { 3089 return true; 3090 } 3091 3092 vsz = vec_full_reg_size(s); 3093 t = tcg_temp_new_i32(); 3094 pd = tcg_temp_new_ptr(); 3095 zn = tcg_temp_new_ptr(); 3096 pg = tcg_temp_new_ptr(); 3097 3098 tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd)); 3099 tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn)); 3100 tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg)); 3101 3102 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm))); 3103 3104 do_pred_flags(t); 3105 return true; 3106 } 3107 3108 #define DO_PPZI(NAME, name) \ 3109 static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \ 3110 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \ 3111 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \ 3112 }; \ 3113 TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \ 3114 name##_ppzi_fns[a->esz]) 3115 3116 DO_PPZI(CMPEQ, cmpeq) 3117 DO_PPZI(CMPNE, cmpne) 3118 DO_PPZI(CMPGT, cmpgt) 3119 DO_PPZI(CMPGE, cmpge) 3120 DO_PPZI(CMPHI, cmphi) 3121 DO_PPZI(CMPHS, cmphs) 3122 DO_PPZI(CMPLT, cmplt) 3123 DO_PPZI(CMPLE, cmple) 3124 DO_PPZI(CMPLO, cmplo) 3125 DO_PPZI(CMPLS, cmpls) 3126 3127 #undef DO_PPZI 3128 3129 /* 3130 *** SVE Partition Break Group 3131 */ 3132 3133 static bool do_brk3(DisasContext *s, arg_rprr_s *a, 3134 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s) 3135 { 3136 if (!sve_access_check(s)) { 3137 return true; 3138 } 3139 3140 unsigned vsz = pred_full_reg_size(s); 3141 3142 /* Predicate sizes may be smaller and cannot use simd_desc. */ 3143 TCGv_ptr d = tcg_temp_new_ptr(); 3144 TCGv_ptr n = tcg_temp_new_ptr(); 3145 TCGv_ptr m = tcg_temp_new_ptr(); 3146 TCGv_ptr g = tcg_temp_new_ptr(); 3147 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 3148 3149 tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd)); 3150 tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn)); 3151 tcg_gen_addi_ptr(m, tcg_env, pred_full_reg_offset(s, a->rm)); 3152 tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg)); 3153 3154 if (a->s) { 3155 TCGv_i32 t = tcg_temp_new_i32(); 3156 fn_s(t, d, n, m, g, desc); 3157 do_pred_flags(t); 3158 } else { 3159 fn(d, n, m, g, desc); 3160 } 3161 return true; 3162 } 3163 3164 static bool do_brk2(DisasContext *s, arg_rpr_s *a, 3165 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s) 3166 { 3167 if (!sve_access_check(s)) { 3168 return true; 3169 } 3170 3171 unsigned vsz = pred_full_reg_size(s); 3172 3173 /* Predicate sizes may be smaller and cannot use simd_desc. */ 3174 TCGv_ptr d = tcg_temp_new_ptr(); 3175 TCGv_ptr n = tcg_temp_new_ptr(); 3176 TCGv_ptr g = tcg_temp_new_ptr(); 3177 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 3178 3179 tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd)); 3180 tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn)); 3181 tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg)); 3182 3183 if (a->s) { 3184 TCGv_i32 t = tcg_temp_new_i32(); 3185 fn_s(t, d, n, g, desc); 3186 do_pred_flags(t); 3187 } else { 3188 fn(d, n, g, desc); 3189 } 3190 return true; 3191 } 3192 3193 TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a, 3194 gen_helper_sve_brkpa, gen_helper_sve_brkpas) 3195 TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a, 3196 gen_helper_sve_brkpb, gen_helper_sve_brkpbs) 3197 3198 TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a, 3199 gen_helper_sve_brka_m, gen_helper_sve_brkas_m) 3200 TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a, 3201 gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m) 3202 3203 TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a, 3204 gen_helper_sve_brka_z, gen_helper_sve_brkas_z) 3205 TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a, 3206 gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z) 3207 3208 TRANS_FEAT(BRKN, aa64_sve, do_brk2, a, 3209 gen_helper_sve_brkn, gen_helper_sve_brkns) 3210 3211 /* 3212 *** SVE Predicate Count Group 3213 */ 3214 3215 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg) 3216 { 3217 unsigned psz = pred_full_reg_size(s); 3218 3219 if (psz <= 8) { 3220 uint64_t psz_mask; 3221 3222 tcg_gen_ld_i64(val, tcg_env, pred_full_reg_offset(s, pn)); 3223 if (pn != pg) { 3224 TCGv_i64 g = tcg_temp_new_i64(); 3225 tcg_gen_ld_i64(g, tcg_env, pred_full_reg_offset(s, pg)); 3226 tcg_gen_and_i64(val, val, g); 3227 } 3228 3229 /* Reduce the pred_esz_masks value simply to reduce the 3230 * size of the code generated here. 3231 */ 3232 psz_mask = MAKE_64BIT_MASK(0, psz * 8); 3233 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask); 3234 3235 tcg_gen_ctpop_i64(val, val); 3236 } else { 3237 TCGv_ptr t_pn = tcg_temp_new_ptr(); 3238 TCGv_ptr t_pg = tcg_temp_new_ptr(); 3239 unsigned desc = 0; 3240 3241 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz); 3242 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 3243 3244 tcg_gen_addi_ptr(t_pn, tcg_env, pred_full_reg_offset(s, pn)); 3245 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 3246 3247 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc)); 3248 } 3249 } 3250 3251 static bool trans_CNTP(DisasContext *s, arg_CNTP *a) 3252 { 3253 if (!dc_isar_feature(aa64_sve, s)) { 3254 return false; 3255 } 3256 if (sve_access_check(s)) { 3257 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg); 3258 } 3259 return true; 3260 } 3261 3262 static bool trans_CNTP_c(DisasContext *s, arg_CNTP_c *a) 3263 { 3264 TCGv_i32 t_png; 3265 uint32_t desc = 0; 3266 3267 if (dc_isar_feature(aa64_sve2p1, s)) { 3268 if (!sve_access_check(s)) { 3269 return true; 3270 } 3271 } else if (dc_isar_feature(aa64_sme2, s)) { 3272 if (!sme_sm_enabled_check(s)) { 3273 return true; 3274 } 3275 } else { 3276 return false; 3277 } 3278 3279 t_png = tcg_temp_new_i32(); 3280 tcg_gen_ld16u_i32(t_png, tcg_env, 3281 pred_full_reg_offset(s, a->rn) ^ 3282 (HOST_BIG_ENDIAN ? 6 : 0)); 3283 3284 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 3285 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3286 desc = FIELD_DP32(desc, PREDDESC, DATA, a->vl); 3287 3288 gen_helper_sve2p1_cntp_c(cpu_reg(s, a->rd), t_png, tcg_constant_i32(desc)); 3289 return true; 3290 } 3291 3292 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a) 3293 { 3294 if (!dc_isar_feature(aa64_sve, s)) { 3295 return false; 3296 } 3297 if (sve_access_check(s)) { 3298 TCGv_i64 reg = cpu_reg(s, a->rd); 3299 TCGv_i64 val = tcg_temp_new_i64(); 3300 3301 do_cntp(s, val, a->esz, a->pg, a->pg); 3302 if (a->d) { 3303 tcg_gen_sub_i64(reg, reg, val); 3304 } else { 3305 tcg_gen_add_i64(reg, reg, val); 3306 } 3307 } 3308 return true; 3309 } 3310 3311 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3312 { 3313 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3314 return false; 3315 } 3316 if (sve_access_check(s)) { 3317 unsigned vsz = vec_full_reg_size(s); 3318 TCGv_i64 val = tcg_temp_new_i64(); 3319 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds; 3320 3321 do_cntp(s, val, a->esz, a->pg, a->pg); 3322 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 3323 vec_full_reg_offset(s, a->rn), val, vsz, vsz); 3324 } 3325 return true; 3326 } 3327 3328 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a) 3329 { 3330 if (!dc_isar_feature(aa64_sve, s)) { 3331 return false; 3332 } 3333 if (sve_access_check(s)) { 3334 TCGv_i64 reg = cpu_reg(s, a->rd); 3335 TCGv_i64 val = tcg_temp_new_i64(); 3336 3337 do_cntp(s, val, a->esz, a->pg, a->pg); 3338 do_sat_addsub_32(reg, val, a->u, a->d); 3339 } 3340 return true; 3341 } 3342 3343 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a) 3344 { 3345 if (!dc_isar_feature(aa64_sve, s)) { 3346 return false; 3347 } 3348 if (sve_access_check(s)) { 3349 TCGv_i64 reg = cpu_reg(s, a->rd); 3350 TCGv_i64 val = tcg_temp_new_i64(); 3351 3352 do_cntp(s, val, a->esz, a->pg, a->pg); 3353 do_sat_addsub_64(reg, val, a->u, a->d); 3354 } 3355 return true; 3356 } 3357 3358 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3359 { 3360 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3361 return false; 3362 } 3363 if (sve_access_check(s)) { 3364 TCGv_i64 val = tcg_temp_new_i64(); 3365 do_cntp(s, val, a->esz, a->pg, a->pg); 3366 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d); 3367 } 3368 return true; 3369 } 3370 3371 /* 3372 *** SVE Integer Compare Scalars Group 3373 */ 3374 3375 static bool trans_CTERM(DisasContext *s, arg_CTERM *a) 3376 { 3377 if (!dc_isar_feature(aa64_sve, s)) { 3378 return false; 3379 } 3380 if (!sve_access_check(s)) { 3381 return true; 3382 } 3383 3384 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ); 3385 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf); 3386 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf); 3387 TCGv_i64 cmp = tcg_temp_new_i64(); 3388 3389 tcg_gen_setcond_i64(cond, cmp, rn, rm); 3390 tcg_gen_extrl_i64_i32(cpu_NF, cmp); 3391 3392 /* VF = !NF & !CF. */ 3393 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1); 3394 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF); 3395 3396 /* Both NF and VF actually look at bit 31. */ 3397 tcg_gen_neg_i32(cpu_NF, cpu_NF); 3398 tcg_gen_neg_i32(cpu_VF, cpu_VF); 3399 return true; 3400 } 3401 3402 typedef void gen_while_fn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32); 3403 static bool do_WHILE(DisasContext *s, arg_while *a, 3404 bool lt, int scale, int data, gen_while_fn *fn) 3405 { 3406 TCGv_i64 op0, op1, t0, t1, tmax; 3407 TCGv_i32 t2; 3408 TCGv_ptr ptr; 3409 unsigned vsz = vec_full_reg_size(s); 3410 unsigned desc = 0; 3411 TCGCond cond; 3412 uint64_t maxval; 3413 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */ 3414 bool eq = a->eq == lt; 3415 3416 if (!sve_access_check(s)) { 3417 return true; 3418 } 3419 3420 op0 = read_cpu_reg(s, a->rn, 1); 3421 op1 = read_cpu_reg(s, a->rm, 1); 3422 3423 if (!a->sf) { 3424 if (a->u) { 3425 tcg_gen_ext32u_i64(op0, op0); 3426 tcg_gen_ext32u_i64(op1, op1); 3427 } else { 3428 tcg_gen_ext32s_i64(op0, op0); 3429 tcg_gen_ext32s_i64(op1, op1); 3430 } 3431 } 3432 3433 /* For the helper, compress the different conditions into a computation 3434 * of how many iterations for which the condition is true. 3435 */ 3436 t0 = tcg_temp_new_i64(); 3437 t1 = tcg_temp_new_i64(); 3438 3439 if (lt) { 3440 tcg_gen_sub_i64(t0, op1, op0); 3441 if (a->u) { 3442 maxval = a->sf ? UINT64_MAX : UINT32_MAX; 3443 cond = eq ? TCG_COND_LEU : TCG_COND_LTU; 3444 } else { 3445 maxval = a->sf ? INT64_MAX : INT32_MAX; 3446 cond = eq ? TCG_COND_LE : TCG_COND_LT; 3447 } 3448 } else { 3449 tcg_gen_sub_i64(t0, op0, op1); 3450 if (a->u) { 3451 maxval = 0; 3452 cond = eq ? TCG_COND_GEU : TCG_COND_GTU; 3453 } else { 3454 maxval = a->sf ? INT64_MIN : INT32_MIN; 3455 cond = eq ? TCG_COND_GE : TCG_COND_GT; 3456 } 3457 } 3458 3459 tmax = tcg_constant_i64((vsz << scale) >> a->esz); 3460 if (eq) { 3461 /* Equality means one more iteration. */ 3462 tcg_gen_addi_i64(t0, t0, 1); 3463 3464 /* 3465 * For the less-than while, if op1 is maxval (and the only time 3466 * the addition above could overflow), then we produce an all-true 3467 * predicate by setting the count to the vector length. This is 3468 * because the pseudocode is described as an increment + compare 3469 * loop, and the maximum integer would always compare true. 3470 * Similarly, the greater-than while has the same issue with the 3471 * minimum integer due to the decrement + compare loop. 3472 */ 3473 tcg_gen_movi_i64(t1, maxval); 3474 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0); 3475 } 3476 3477 /* Bound to the maximum. */ 3478 tcg_gen_umin_i64(t0, t0, tmax); 3479 3480 /* Set the count to zero if the condition is false. */ 3481 tcg_gen_movi_i64(t1, 0); 3482 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1); 3483 3484 /* Since we're bounded, pass as a 32-bit type. */ 3485 t2 = tcg_temp_new_i32(); 3486 tcg_gen_extrl_i64_i32(t2, t0); 3487 3488 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3489 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3490 desc = FIELD_DP32(desc, PREDDESC, DATA, data); 3491 3492 ptr = tcg_temp_new_ptr(); 3493 tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd)); 3494 3495 fn(t2, ptr, t2, tcg_constant_i32(desc)); 3496 3497 do_pred_flags(t2); 3498 return true; 3499 } 3500 3501 TRANS_FEAT(WHILE_lt, aa64_sve, do_WHILE, 3502 a, true, 0, 0, gen_helper_sve_whilel) 3503 TRANS_FEAT(WHILE_gt, aa64_sve2, do_WHILE, 3504 a, false, 0, 0, gen_helper_sve_whileg) 3505 3506 TRANS_FEAT(WHILE_lt_pair, aa64_sme2_or_sve2p1, do_WHILE, 3507 a, true, 1, 0, gen_helper_sve_while2l) 3508 TRANS_FEAT(WHILE_gt_pair, aa64_sme2_or_sve2p1, do_WHILE, 3509 a, false, 1, 0, gen_helper_sve_while2g) 3510 3511 TRANS_FEAT(WHILE_lt_cnt2, aa64_sme2_or_sve2p1, do_WHILE, 3512 a, true, 1, 1, gen_helper_sve_whilecl) 3513 TRANS_FEAT(WHILE_lt_cnt4, aa64_sme2_or_sve2p1, do_WHILE, 3514 a, true, 2, 2, gen_helper_sve_whilecl) 3515 TRANS_FEAT(WHILE_gt_cnt2, aa64_sme2_or_sve2p1, do_WHILE, 3516 a, false, 1, 1, gen_helper_sve_whilecg) 3517 TRANS_FEAT(WHILE_gt_cnt4, aa64_sme2_or_sve2p1, do_WHILE, 3518 a, false, 2, 2, gen_helper_sve_whilecg) 3519 3520 static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) 3521 { 3522 TCGv_i64 op0, op1, diff, t1, tmax; 3523 TCGv_i32 t2; 3524 TCGv_ptr ptr; 3525 unsigned vsz = vec_full_reg_size(s); 3526 unsigned desc = 0; 3527 3528 if (!dc_isar_feature(aa64_sve2, s)) { 3529 return false; 3530 } 3531 if (!sve_access_check(s)) { 3532 return true; 3533 } 3534 3535 op0 = read_cpu_reg(s, a->rn, 1); 3536 op1 = read_cpu_reg(s, a->rm, 1); 3537 3538 tmax = tcg_constant_i64(vsz >> a->esz); 3539 diff = tcg_temp_new_i64(); 3540 3541 if (a->rw) { 3542 /* WHILERW */ 3543 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */ 3544 t1 = tcg_temp_new_i64(); 3545 tcg_gen_sub_i64(diff, op0, op1); 3546 tcg_gen_sub_i64(t1, op1, op0); 3547 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1); 3548 /* Divide, rounding down, by ESIZE. */ 3549 tcg_gen_shri_i64(diff, diff, a->esz); 3550 /* If op1 == op0, diff == 0, and the condition is always true. */ 3551 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff); 3552 } else { 3553 /* WHILEWR */ 3554 tcg_gen_sub_i64(diff, op1, op0); 3555 /* Divide, rounding down, by ESIZE. */ 3556 tcg_gen_shri_i64(diff, diff, a->esz); 3557 /* If op0 >= op1, diff <= 0, the condition is always true. */ 3558 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff); 3559 } 3560 3561 /* Bound to the maximum. */ 3562 tcg_gen_umin_i64(diff, diff, tmax); 3563 3564 /* Since we're bounded, pass as a 32-bit type. */ 3565 t2 = tcg_temp_new_i32(); 3566 tcg_gen_extrl_i64_i32(t2, diff); 3567 3568 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3569 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3570 3571 ptr = tcg_temp_new_ptr(); 3572 tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd)); 3573 3574 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); 3575 do_pred_flags(t2); 3576 return true; 3577 } 3578 3579 static bool do_pext(DisasContext *s, arg_pext *a, int n) 3580 { 3581 TCGv_i32 t_png; 3582 TCGv_ptr t_pd; 3583 int pl; 3584 3585 if (!sve_access_check(s)) { 3586 return true; 3587 } 3588 3589 t_png = tcg_temp_new_i32(); 3590 tcg_gen_ld16u_i32(t_png, tcg_env, 3591 pred_full_reg_offset(s, a->rn) ^ 3592 (HOST_BIG_ENDIAN ? 6 : 0)); 3593 3594 t_pd = tcg_temp_new_ptr(); 3595 pl = pred_full_reg_size(s); 3596 3597 for (int i = 0; i < n; ++i) { 3598 int rd = (a->rd + i) % 16; 3599 int part = a->imm * n + i; 3600 unsigned desc = 0; 3601 3602 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pl); 3603 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3604 desc = FIELD_DP32(desc, PREDDESC, DATA, part); 3605 3606 tcg_gen_addi_ptr(t_pd, tcg_env, pred_full_reg_offset(s, rd)); 3607 gen_helper_pext(t_pd, t_png, tcg_constant_i32(desc)); 3608 } 3609 return true; 3610 } 3611 3612 TRANS_FEAT(PEXT_1, aa64_sme2_or_sve2p1, do_pext, a, 1) 3613 TRANS_FEAT(PEXT_2, aa64_sme2_or_sve2p1, do_pext, a, 2) 3614 3615 /* 3616 *** SVE Integer Wide Immediate - Unpredicated Group 3617 */ 3618 3619 static bool trans_FDUP(DisasContext *s, arg_FDUP *a) 3620 { 3621 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3622 return false; 3623 } 3624 if (sve_access_check(s)) { 3625 unsigned vsz = vec_full_reg_size(s); 3626 int dofs = vec_full_reg_offset(s, a->rd); 3627 uint64_t imm; 3628 3629 /* Decode the VFP immediate. */ 3630 imm = vfp_expand_imm(a->esz, a->imm); 3631 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm); 3632 } 3633 return true; 3634 } 3635 3636 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a) 3637 { 3638 if (!dc_isar_feature(aa64_sve, s)) { 3639 return false; 3640 } 3641 if (sve_access_check(s)) { 3642 unsigned vsz = vec_full_reg_size(s); 3643 int dofs = vec_full_reg_offset(s, a->rd); 3644 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm); 3645 } 3646 return true; 3647 } 3648 3649 TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a) 3650 3651 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a) 3652 { 3653 a->imm = -a->imm; 3654 return trans_ADD_zzi(s, a); 3655 } 3656 3657 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a) 3658 { 3659 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 }; 3660 static const GVecGen2s op[4] = { 3661 { .fni8 = tcg_gen_vec_sub8_i64, 3662 .fniv = tcg_gen_sub_vec, 3663 .fno = gen_helper_sve_subri_b, 3664 .opt_opc = vecop_list, 3665 .vece = MO_8, 3666 .scalar_first = true }, 3667 { .fni8 = tcg_gen_vec_sub16_i64, 3668 .fniv = tcg_gen_sub_vec, 3669 .fno = gen_helper_sve_subri_h, 3670 .opt_opc = vecop_list, 3671 .vece = MO_16, 3672 .scalar_first = true }, 3673 { .fni4 = tcg_gen_sub_i32, 3674 .fniv = tcg_gen_sub_vec, 3675 .fno = gen_helper_sve_subri_s, 3676 .opt_opc = vecop_list, 3677 .vece = MO_32, 3678 .scalar_first = true }, 3679 { .fni8 = tcg_gen_sub_i64, 3680 .fniv = tcg_gen_sub_vec, 3681 .fno = gen_helper_sve_subri_d, 3682 .opt_opc = vecop_list, 3683 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 3684 .vece = MO_64, 3685 .scalar_first = true } 3686 }; 3687 3688 if (!dc_isar_feature(aa64_sve, s)) { 3689 return false; 3690 } 3691 if (sve_access_check(s)) { 3692 unsigned vsz = vec_full_reg_size(s); 3693 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd), 3694 vec_full_reg_offset(s, a->rn), 3695 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]); 3696 } 3697 return true; 3698 } 3699 3700 TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a) 3701 3702 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d) 3703 { 3704 if (sve_access_check(s)) { 3705 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 3706 tcg_constant_i64(a->imm), u, d); 3707 } 3708 return true; 3709 } 3710 3711 TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false) 3712 TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false) 3713 TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true) 3714 TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true) 3715 3716 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn) 3717 { 3718 if (sve_access_check(s)) { 3719 unsigned vsz = vec_full_reg_size(s); 3720 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 3721 vec_full_reg_offset(s, a->rn), 3722 tcg_constant_i64(a->imm), vsz, vsz, 0, fn); 3723 } 3724 return true; 3725 } 3726 3727 #define DO_ZZI(NAME, name) \ 3728 static gen_helper_gvec_2i * const name##i_fns[4] = { \ 3729 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \ 3730 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \ 3731 }; \ 3732 TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz]) 3733 3734 DO_ZZI(SMAX, smax) 3735 DO_ZZI(UMAX, umax) 3736 DO_ZZI(SMIN, smin) 3737 DO_ZZI(UMIN, umin) 3738 3739 #undef DO_ZZI 3740 3741 static gen_helper_gvec_4 * const dot_fns[2][2] = { 3742 { gen_helper_gvec_sdot_4b, gen_helper_gvec_sdot_4h }, 3743 { gen_helper_gvec_udot_4b, gen_helper_gvec_udot_4h } 3744 }; 3745 TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz, 3746 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0) 3747 3748 /* 3749 * SVE Multiply - Indexed 3750 */ 3751 3752 TRANS_FEAT(SDOT_zzxw_4s, aa64_sve, gen_gvec_ool_arg_zzxz, 3753 gen_helper_gvec_sdot_idx_4b, a) 3754 TRANS_FEAT(SDOT_zzxw_4d, aa64_sve, gen_gvec_ool_arg_zzxz, 3755 gen_helper_gvec_sdot_idx_4h, a) 3756 TRANS_FEAT(UDOT_zzxw_4s, aa64_sve, gen_gvec_ool_arg_zzxz, 3757 gen_helper_gvec_udot_idx_4b, a) 3758 TRANS_FEAT(UDOT_zzxw_4d, aa64_sve, gen_gvec_ool_arg_zzxz, 3759 gen_helper_gvec_udot_idx_4h, a) 3760 3761 TRANS_FEAT(SUDOT_zzxw_4s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3762 gen_helper_gvec_sudot_idx_4b, a) 3763 TRANS_FEAT(USDOT_zzxw_4s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3764 gen_helper_gvec_usdot_idx_4b, a) 3765 3766 TRANS_FEAT(SDOT_zzxw_2s, aa64_sme2_or_sve2p1, gen_gvec_ool_arg_zzxz, 3767 gen_helper_gvec_sdot_idx_2h, a) 3768 TRANS_FEAT(UDOT_zzxw_2s, aa64_sme2_or_sve2p1, gen_gvec_ool_arg_zzxz, 3769 gen_helper_gvec_udot_idx_2h, a) 3770 3771 #define DO_SVE2_RRX(NAME, FUNC) \ 3772 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3773 a->rd, a->rn, a->rm, a->index) 3774 3775 DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h) 3776 DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s) 3777 DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d) 3778 3779 DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h) 3780 DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s) 3781 DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d) 3782 3783 DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h) 3784 DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s) 3785 DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d) 3786 3787 #undef DO_SVE2_RRX 3788 3789 #define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \ 3790 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3791 a->rd, a->rn, a->rm, (a->index << 1) | TOP) 3792 3793 DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false) 3794 DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false) 3795 DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true) 3796 DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true) 3797 3798 DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false) 3799 DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false) 3800 DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true) 3801 DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true) 3802 3803 DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false) 3804 DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false) 3805 DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true) 3806 DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true) 3807 3808 #undef DO_SVE2_RRX_TB 3809 3810 #define DO_SVE2_RRXR(NAME, FUNC) \ 3811 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a) 3812 3813 DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h) 3814 DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s) 3815 DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d) 3816 3817 DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h) 3818 DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s) 3819 DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d) 3820 3821 DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h) 3822 DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s) 3823 DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d) 3824 3825 DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h) 3826 DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s) 3827 DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d) 3828 3829 #undef DO_SVE2_RRXR 3830 3831 #define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \ 3832 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3833 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP) 3834 3835 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false) 3836 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false) 3837 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true) 3838 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true) 3839 3840 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false) 3841 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false) 3842 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true) 3843 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true) 3844 3845 DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false) 3846 DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false) 3847 DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true) 3848 DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true) 3849 3850 DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false) 3851 DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false) 3852 DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true) 3853 DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true) 3854 3855 DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false) 3856 DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false) 3857 DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true) 3858 DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true) 3859 3860 DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false) 3861 DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false) 3862 DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true) 3863 DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true) 3864 3865 #undef DO_SVE2_RRXR_TB 3866 3867 #define DO_SVE2_RRXR_ROT(NAME, FUNC) \ 3868 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3869 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot) 3870 3871 DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h) 3872 DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s) 3873 3874 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h) 3875 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s) 3876 3877 DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s) 3878 DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) 3879 3880 #undef DO_SVE2_RRXR_ROT 3881 3882 /* 3883 *** SVE Floating Point Multiply-Add Indexed Group 3884 */ 3885 3886 static bool do_fmla_zzxz(DisasContext *s, arg_rrxr_esz *a, 3887 gen_helper_gvec_4_ptr *fn) 3888 { 3889 /* These insns use MO_8 to encode BFloat16 */ 3890 if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) { 3891 return false; 3892 } 3893 return gen_gvec_fpst_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index, 3894 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 3895 } 3896 3897 static gen_helper_gvec_4_ptr * const fmla_idx_fns[4] = { 3898 gen_helper_gvec_bfmla_idx, gen_helper_gvec_fmla_idx_h, 3899 gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d 3900 }; 3901 TRANS_FEAT(FMLA_zzxz, aa64_sve, do_fmla_zzxz, a, fmla_idx_fns[a->esz]) 3902 3903 static gen_helper_gvec_4_ptr * const fmls_idx_fns[4][2] = { 3904 { gen_helper_gvec_bfmls_idx, gen_helper_gvec_ah_bfmls_idx }, 3905 { gen_helper_gvec_fmls_idx_h, gen_helper_gvec_ah_fmls_idx_h }, 3906 { gen_helper_gvec_fmls_idx_s, gen_helper_gvec_ah_fmls_idx_s }, 3907 { gen_helper_gvec_fmls_idx_d, gen_helper_gvec_ah_fmls_idx_d }, 3908 }; 3909 TRANS_FEAT(FMLS_zzxz, aa64_sve, do_fmla_zzxz, a, 3910 fmls_idx_fns[a->esz][s->fpcr_ah]) 3911 3912 /* 3913 *** SVE Floating Point Multiply Indexed Group 3914 */ 3915 3916 static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = { 3917 gen_helper_gvec_fmul_idx_b16, gen_helper_gvec_fmul_idx_h, 3918 gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d, 3919 }; 3920 TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz, 3921 fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index, 3922 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 3923 3924 /* 3925 *** SVE Floating Point Fast Reduction Group 3926 */ 3927 3928 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr, 3929 TCGv_ptr, TCGv_i32); 3930 3931 static bool do_reduce(DisasContext *s, arg_rpr_esz *a, 3932 gen_helper_fp_reduce *fn) 3933 { 3934 unsigned vsz, p2vsz; 3935 TCGv_i32 t_desc; 3936 TCGv_ptr t_zn, t_pg, status; 3937 TCGv_i64 temp; 3938 3939 if (fn == NULL) { 3940 return false; 3941 } 3942 if (!sve_access_check(s)) { 3943 return true; 3944 } 3945 3946 vsz = vec_full_reg_size(s); 3947 p2vsz = pow2ceil(vsz); 3948 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz)); 3949 temp = tcg_temp_new_i64(); 3950 t_zn = tcg_temp_new_ptr(); 3951 t_pg = tcg_temp_new_ptr(); 3952 3953 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); 3954 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); 3955 status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 3956 3957 fn(temp, t_zn, t_pg, status, t_desc); 3958 3959 write_fp_dreg(s, a->rd, temp); 3960 return true; 3961 } 3962 3963 #define DO_VPZ(NAME, name) \ 3964 static gen_helper_fp_reduce * const name##_fns[4] = { \ 3965 NULL, gen_helper_sve_##name##_h, \ 3966 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3967 }; \ 3968 TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz]) 3969 3970 #define DO_VPZ_AH(NAME, name) \ 3971 static gen_helper_fp_reduce * const name##_fns[4] = { \ 3972 NULL, gen_helper_sve_##name##_h, \ 3973 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3974 }; \ 3975 static gen_helper_fp_reduce * const name##_ah_fns[4] = { \ 3976 NULL, gen_helper_sve_ah_##name##_h, \ 3977 gen_helper_sve_ah_##name##_s, gen_helper_sve_ah_##name##_d, \ 3978 }; \ 3979 TRANS_FEAT(NAME, aa64_sve, do_reduce, a, \ 3980 s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz]) 3981 3982 DO_VPZ(FADDV, faddv) 3983 DO_VPZ(FMINNMV, fminnmv) 3984 DO_VPZ(FMAXNMV, fmaxnmv) 3985 DO_VPZ_AH(FMINV, fminv) 3986 DO_VPZ_AH(FMAXV, fmaxv) 3987 3988 #undef DO_VPZ 3989 3990 static gen_helper_gvec_3_ptr * const faddqv_fns[4] = { 3991 NULL, gen_helper_sve2p1_faddqv_h, 3992 gen_helper_sve2p1_faddqv_s, gen_helper_sve2p1_faddqv_d, 3993 }; 3994 TRANS_FEAT(FADDQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz, 3995 faddqv_fns[a->esz], a, 0, 3996 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 3997 3998 static gen_helper_gvec_3_ptr * const fmaxnmqv_fns[4] = { 3999 NULL, gen_helper_sve2p1_fmaxnmqv_h, 4000 gen_helper_sve2p1_fmaxnmqv_s, gen_helper_sve2p1_fmaxnmqv_d, 4001 }; 4002 TRANS_FEAT(FMAXNMQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz, 4003 fmaxnmqv_fns[a->esz], a, 0, 4004 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 4005 4006 static gen_helper_gvec_3_ptr * const fminnmqv_fns[4] = { 4007 NULL, gen_helper_sve2p1_fminnmqv_h, 4008 gen_helper_sve2p1_fminnmqv_s, gen_helper_sve2p1_fminnmqv_d, 4009 }; 4010 TRANS_FEAT(FMINNMQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz, 4011 fminnmqv_fns[a->esz], a, 0, 4012 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 4013 4014 static gen_helper_gvec_3_ptr * const fmaxqv_fns[4] = { 4015 NULL, gen_helper_sve2p1_fmaxqv_h, 4016 gen_helper_sve2p1_fmaxqv_s, gen_helper_sve2p1_fmaxqv_d, 4017 }; 4018 static gen_helper_gvec_3_ptr * const fmaxqv_ah_fns[4] = { 4019 NULL, gen_helper_sve2p1_ah_fmaxqv_h, 4020 gen_helper_sve2p1_ah_fmaxqv_s, gen_helper_sve2p1_ah_fmaxqv_d, 4021 }; 4022 TRANS_FEAT(FMAXQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz, 4023 (s->fpcr_ah ? fmaxqv_ah_fns : fmaxqv_fns)[a->esz], a, 0, 4024 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 4025 4026 static gen_helper_gvec_3_ptr * const fminqv_fns[4] = { 4027 NULL, gen_helper_sve2p1_fminqv_h, 4028 gen_helper_sve2p1_fminqv_s, gen_helper_sve2p1_fminqv_d, 4029 }; 4030 static gen_helper_gvec_3_ptr * const fminqv_ah_fns[4] = { 4031 NULL, gen_helper_sve2p1_ah_fminqv_h, 4032 gen_helper_sve2p1_ah_fminqv_s, gen_helper_sve2p1_ah_fminqv_d, 4033 }; 4034 TRANS_FEAT(FMINQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz, 4035 (s->fpcr_ah ? fminqv_ah_fns : fminqv_fns)[a->esz], a, 0, 4036 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 4037 4038 /* 4039 *** SVE Floating Point Unary Operations - Unpredicated Group 4040 */ 4041 4042 static gen_helper_gvec_2_ptr * const frecpe_fns[] = { 4043 NULL, gen_helper_gvec_frecpe_h, 4044 gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, 4045 }; 4046 static gen_helper_gvec_2_ptr * const frecpe_rpres_fns[] = { 4047 NULL, gen_helper_gvec_frecpe_h, 4048 gen_helper_gvec_frecpe_rpres_s, gen_helper_gvec_frecpe_d, 4049 }; 4050 TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, 4051 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? 4052 frecpe_rpres_fns[a->esz] : frecpe_fns[a->esz], a, 0) 4053 4054 static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { 4055 NULL, gen_helper_gvec_frsqrte_h, 4056 gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, 4057 }; 4058 static gen_helper_gvec_2_ptr * const frsqrte_rpres_fns[] = { 4059 NULL, gen_helper_gvec_frsqrte_h, 4060 gen_helper_gvec_frsqrte_rpres_s, gen_helper_gvec_frsqrte_d, 4061 }; 4062 TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, 4063 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? 4064 frsqrte_rpres_fns[a->esz] : frsqrte_fns[a->esz], a, 0) 4065 4066 /* 4067 *** SVE Floating Point Compare with Zero Group 4068 */ 4069 4070 static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a, 4071 gen_helper_gvec_3_ptr *fn) 4072 { 4073 if (fn == NULL) { 4074 return false; 4075 } 4076 if (sve_access_check(s)) { 4077 unsigned vsz = vec_full_reg_size(s); 4078 TCGv_ptr status = 4079 fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 4080 4081 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd), 4082 vec_full_reg_offset(s, a->rn), 4083 pred_full_reg_offset(s, a->pg), 4084 status, vsz, vsz, 0, fn); 4085 } 4086 return true; 4087 } 4088 4089 #define DO_PPZ(NAME, name) \ 4090 static gen_helper_gvec_3_ptr * const name##_fns[] = { \ 4091 NULL, gen_helper_sve_##name##_h, \ 4092 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 4093 }; \ 4094 TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz]) 4095 4096 DO_PPZ(FCMGE_ppz0, fcmge0) 4097 DO_PPZ(FCMGT_ppz0, fcmgt0) 4098 DO_PPZ(FCMLE_ppz0, fcmle0) 4099 DO_PPZ(FCMLT_ppz0, fcmlt0) 4100 DO_PPZ(FCMEQ_ppz0, fcmeq0) 4101 DO_PPZ(FCMNE_ppz0, fcmne0) 4102 4103 #undef DO_PPZ 4104 4105 /* 4106 *** SVE floating-point trig multiply-add coefficient 4107 */ 4108 4109 static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { 4110 NULL, gen_helper_sve_ftmad_h, 4111 gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d, 4112 }; 4113 TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, 4114 ftmad_fns[a->esz], a->rd, a->rn, a->rm, 4115 a->imm | (s->fpcr_ah << 3), 4116 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 4117 4118 /* 4119 *** SVE Floating Point Accumulating Reduction Group 4120 */ 4121 4122 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) 4123 { 4124 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr, 4125 TCGv_ptr, TCGv_ptr, TCGv_i32); 4126 static fadda_fn * const fns[3] = { 4127 gen_helper_sve_fadda_h, 4128 gen_helper_sve_fadda_s, 4129 gen_helper_sve_fadda_d, 4130 }; 4131 unsigned vsz = vec_full_reg_size(s); 4132 TCGv_ptr t_rm, t_pg, t_fpst; 4133 TCGv_i64 t_val; 4134 TCGv_i32 t_desc; 4135 4136 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 4137 return false; 4138 } 4139 s->is_nonstreaming = true; 4140 if (!sve_access_check(s)) { 4141 return true; 4142 } 4143 4144 t_val = load_esz(tcg_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz); 4145 t_rm = tcg_temp_new_ptr(); 4146 t_pg = tcg_temp_new_ptr(); 4147 tcg_gen_addi_ptr(t_rm, tcg_env, vec_full_reg_offset(s, a->rm)); 4148 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); 4149 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 4150 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 4151 4152 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc); 4153 4154 write_fp_dreg(s, a->rd, t_val); 4155 return true; 4156 } 4157 4158 /* 4159 *** SVE Floating Point Arithmetic - Unpredicated Group 4160 */ 4161 4162 #define DO_FP3(NAME, name) \ 4163 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ 4164 gen_helper_gvec_##name##_b16, gen_helper_gvec_##name##_h, \ 4165 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ 4166 }; \ 4167 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0) 4168 4169 #define DO_FP3_AH(NAME, name) \ 4170 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ 4171 NULL, gen_helper_gvec_##name##_h, \ 4172 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ 4173 }; \ 4174 static gen_helper_gvec_3_ptr * const name##_ah_fns[4] = { \ 4175 NULL, gen_helper_gvec_ah_##name##_h, \ 4176 gen_helper_gvec_ah_##name##_s, gen_helper_gvec_ah_##name##_d \ 4177 }; \ 4178 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, \ 4179 s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], a, 0) 4180 4181 DO_FP3(FADD_zzz, fadd) 4182 DO_FP3(FSUB_zzz, fsub) 4183 DO_FP3(FMUL_zzz, fmul) 4184 DO_FP3_AH(FRECPS, recps) 4185 DO_FP3_AH(FRSQRTS, rsqrts) 4186 4187 #undef DO_FP3 4188 4189 static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = { 4190 NULL, gen_helper_gvec_ftsmul_h, 4191 gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d 4192 }; 4193 TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, 4194 ftsmul_fns[a->esz], a, 0) 4195 4196 /* 4197 *** SVE Floating Point Arithmetic - Predicated Group 4198 */ 4199 4200 #define DO_ZPZZ_FP(NAME, FEAT, name) \ 4201 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ 4202 NULL, gen_helper_##name##_h, \ 4203 gen_helper_##name##_s, gen_helper_##name##_d \ 4204 }; \ 4205 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) 4206 4207 #define DO_ZPZZ_AH_FP(NAME, FEAT, name, ah_name) \ 4208 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ 4209 NULL, gen_helper_##name##_h, \ 4210 gen_helper_##name##_s, gen_helper_##name##_d \ 4211 }; \ 4212 static gen_helper_gvec_4_ptr * const name##_ah_zpzz_fns[4] = { \ 4213 NULL, gen_helper_##ah_name##_h, \ 4214 gen_helper_##ah_name##_s, gen_helper_##ah_name##_d \ 4215 }; \ 4216 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, \ 4217 s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] : \ 4218 name##_zpzz_fns[a->esz], a) 4219 4220 /* Similar, but for insns where sz == 0 encodes bfloat16 */ 4221 #define DO_ZPZZ_FP_B16(NAME, FEAT, name) \ 4222 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ 4223 gen_helper_##name##_b16, gen_helper_##name##_h, \ 4224 gen_helper_##name##_s, gen_helper_##name##_d \ 4225 }; \ 4226 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) 4227 4228 #define DO_ZPZZ_AH_FP_B16(NAME, FEAT, name, ah_name) \ 4229 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ 4230 gen_helper_##name##_b16, gen_helper_##name##_h, \ 4231 gen_helper_##name##_s, gen_helper_##name##_d \ 4232 }; \ 4233 static gen_helper_gvec_4_ptr * const name##_ah_zpzz_fns[4] = { \ 4234 gen_helper_##ah_name##_b16, gen_helper_##ah_name##_h, \ 4235 gen_helper_##ah_name##_s, gen_helper_##ah_name##_d \ 4236 }; \ 4237 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, \ 4238 s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] : \ 4239 name##_zpzz_fns[a->esz], a) 4240 4241 DO_ZPZZ_FP_B16(FADD_zpzz, aa64_sve, sve_fadd) 4242 DO_ZPZZ_FP_B16(FSUB_zpzz, aa64_sve, sve_fsub) 4243 DO_ZPZZ_FP_B16(FMUL_zpzz, aa64_sve, sve_fmul) 4244 DO_ZPZZ_AH_FP_B16(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin) 4245 DO_ZPZZ_AH_FP_B16(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax) 4246 DO_ZPZZ_FP_B16(FMINNM_zpzz, aa64_sve, sve_fminnum) 4247 DO_ZPZZ_FP_B16(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) 4248 DO_ZPZZ_AH_FP(FABD, aa64_sve, sve_fabd, sve_ah_fabd) 4249 DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn) 4250 DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv) 4251 DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx) 4252 4253 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr, 4254 TCGv_i64, TCGv_ptr, TCGv_i32); 4255 4256 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16, 4257 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn) 4258 { 4259 unsigned vsz = vec_full_reg_size(s); 4260 TCGv_ptr t_zd, t_zn, t_pg, status; 4261 TCGv_i32 desc; 4262 4263 t_zd = tcg_temp_new_ptr(); 4264 t_zn = tcg_temp_new_ptr(); 4265 t_pg = tcg_temp_new_ptr(); 4266 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, zd)); 4267 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, zn)); 4268 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 4269 4270 status = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64); 4271 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 4272 fn(t_zd, t_zn, t_pg, scalar, status, desc); 4273 } 4274 4275 static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm, 4276 gen_helper_sve_fp2scalar *fn) 4277 { 4278 if (fn == NULL) { 4279 return false; 4280 } 4281 if (sve_access_check(s)) { 4282 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, 4283 tcg_constant_i64(imm), fn); 4284 } 4285 return true; 4286 } 4287 4288 #define DO_FP_IMM(NAME, name, const0, const1) \ 4289 static gen_helper_sve_fp2scalar * const name##_fns[4] = { \ 4290 NULL, gen_helper_sve_##name##_h, \ 4291 gen_helper_sve_##name##_s, \ 4292 gen_helper_sve_##name##_d \ 4293 }; \ 4294 static uint64_t const name##_const[4][2] = { \ 4295 { -1, -1 }, \ 4296 { float16_##const0, float16_##const1 }, \ 4297 { float32_##const0, float32_##const1 }, \ 4298 { float64_##const0, float64_##const1 }, \ 4299 }; \ 4300 TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ 4301 name##_const[a->esz][a->imm], name##_fns[a->esz]) 4302 4303 #define DO_FP_AH_IMM(NAME, name, const0, const1) \ 4304 static gen_helper_sve_fp2scalar * const name##_fns[4] = { \ 4305 NULL, gen_helper_sve_##name##_h, \ 4306 gen_helper_sve_##name##_s, \ 4307 gen_helper_sve_##name##_d \ 4308 }; \ 4309 static gen_helper_sve_fp2scalar * const name##_ah_fns[4] = { \ 4310 NULL, gen_helper_sve_ah_##name##_h, \ 4311 gen_helper_sve_ah_##name##_s, \ 4312 gen_helper_sve_ah_##name##_d \ 4313 }; \ 4314 static uint64_t const name##_const[4][2] = { \ 4315 { -1, -1 }, \ 4316 { float16_##const0, float16_##const1 }, \ 4317 { float32_##const0, float32_##const1 }, \ 4318 { float64_##const0, float64_##const1 }, \ 4319 }; \ 4320 TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ 4321 name##_const[a->esz][a->imm], \ 4322 s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz]) 4323 4324 DO_FP_IMM(FADD, fadds, half, one) 4325 DO_FP_IMM(FSUB, fsubs, half, one) 4326 DO_FP_IMM(FMUL, fmuls, half, two) 4327 DO_FP_IMM(FSUBR, fsubrs, half, one) 4328 DO_FP_IMM(FMAXNM, fmaxnms, zero, one) 4329 DO_FP_IMM(FMINNM, fminnms, zero, one) 4330 DO_FP_AH_IMM(FMAX, fmaxs, zero, one) 4331 DO_FP_AH_IMM(FMIN, fmins, zero, one) 4332 4333 #undef DO_FP_IMM 4334 4335 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a, 4336 gen_helper_gvec_4_ptr *fn) 4337 { 4338 if (fn == NULL) { 4339 return false; 4340 } 4341 if (sve_access_check(s)) { 4342 unsigned vsz = vec_full_reg_size(s); 4343 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 4344 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd), 4345 vec_full_reg_offset(s, a->rn), 4346 vec_full_reg_offset(s, a->rm), 4347 pred_full_reg_offset(s, a->pg), 4348 status, vsz, vsz, 0, fn); 4349 } 4350 return true; 4351 } 4352 4353 #define DO_FPCMP(NAME, name) \ 4354 static gen_helper_gvec_4_ptr * const name##_fns[4] = { \ 4355 NULL, gen_helper_sve_##name##_h, \ 4356 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 4357 }; \ 4358 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz]) 4359 4360 DO_FPCMP(FCMGE, fcmge) 4361 DO_FPCMP(FCMGT, fcmgt) 4362 DO_FPCMP(FCMEQ, fcmeq) 4363 DO_FPCMP(FCMNE, fcmne) 4364 DO_FPCMP(FCMUO, fcmuo) 4365 DO_FPCMP(FACGE, facge) 4366 DO_FPCMP(FACGT, facgt) 4367 4368 #undef DO_FPCMP 4369 4370 static gen_helper_gvec_4_ptr * const fcadd_fns[] = { 4371 NULL, gen_helper_sve_fcadd_h, 4372 gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d, 4373 }; 4374 TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], 4375 a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1), 4376 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 4377 4378 static bool do_fmla_zpzzz(DisasContext *s, arg_rprrr_esz *a, 4379 gen_helper_gvec_5_ptr *fn) 4380 { 4381 /* These insns use MO_8 to encode BFloat16 */ 4382 if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) { 4383 return false; 4384 } 4385 return gen_gvec_fpst_zzzzp(s, fn, a->rd, a->rn, a->rm, a->ra, a->pg, 0, 4386 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 4387 } 4388 4389 #define DO_FMLA(NAME, name, ah_name) \ 4390 static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ 4391 gen_helper_sve_##name##_b16, gen_helper_sve_##name##_h, \ 4392 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 4393 }; \ 4394 static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \ 4395 gen_helper_sve_##ah_name##_b16, gen_helper_sve_##ah_name##_h, \ 4396 gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \ 4397 }; \ 4398 TRANS_FEAT(NAME, aa64_sve, do_fmla_zpzzz, a, \ 4399 s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz]) 4400 4401 /* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */ 4402 DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz) 4403 DO_FMLA(FMLS_zpzzz, fmls_zpzzz, ah_fmls_zpzzz) 4404 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz, ah_fnmla_zpzzz) 4405 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz, ah_fnmls_zpzzz) 4406 4407 #undef DO_FMLA 4408 4409 static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { 4410 NULL, gen_helper_sve_fcmla_zpzzz_h, 4411 gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, 4412 }; 4413 TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], 4414 a->rd, a->rn, a->rm, a->ra, a->pg, a->rot | (s->fpcr_ah << 2), 4415 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 4416 4417 static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { 4418 NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL 4419 }; 4420 TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz], 4421 a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot, 4422 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 4423 4424 /* 4425 *** SVE Floating Point Unary Operations Predicated Group 4426 */ 4427 4428 TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4429 gen_helper_sve_fcvt_sh, a, 0, FPST_A64) 4430 TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4431 gen_helper_sve_fcvt_hs, a, 0, FPST_A64_F16) 4432 4433 TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 4434 gen_helper_sve_bfcvt, a, 0, 4435 s->fpcr_ah ? FPST_AH : FPST_A64) 4436 4437 TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4438 gen_helper_sve_fcvt_dh, a, 0, FPST_A64) 4439 TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4440 gen_helper_sve_fcvt_hd, a, 0, FPST_A64_F16) 4441 TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4442 gen_helper_sve_fcvt_ds, a, 0, FPST_A64) 4443 TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4444 gen_helper_sve_fcvt_sd, a, 0, FPST_A64) 4445 4446 TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4447 gen_helper_sve_fcvtzs_hh, a, 0, FPST_A64_F16) 4448 TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4449 gen_helper_sve_fcvtzu_hh, a, 0, FPST_A64_F16) 4450 TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4451 gen_helper_sve_fcvtzs_hs, a, 0, FPST_A64_F16) 4452 TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4453 gen_helper_sve_fcvtzu_hs, a, 0, FPST_A64_F16) 4454 TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4455 gen_helper_sve_fcvtzs_hd, a, 0, FPST_A64_F16) 4456 TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4457 gen_helper_sve_fcvtzu_hd, a, 0, FPST_A64_F16) 4458 4459 TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4460 gen_helper_sve_fcvtzs_ss, a, 0, FPST_A64) 4461 TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4462 gen_helper_sve_fcvtzu_ss, a, 0, FPST_A64) 4463 TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4464 gen_helper_sve_fcvtzs_sd, a, 0, FPST_A64) 4465 TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4466 gen_helper_sve_fcvtzu_sd, a, 0, FPST_A64) 4467 TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4468 gen_helper_sve_fcvtzs_ds, a, 0, FPST_A64) 4469 TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4470 gen_helper_sve_fcvtzu_ds, a, 0, FPST_A64) 4471 4472 TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4473 gen_helper_sve_fcvtzs_dd, a, 0, FPST_A64) 4474 TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4475 gen_helper_sve_fcvtzu_dd, a, 0, FPST_A64) 4476 4477 static gen_helper_gvec_3_ptr * const frint_fns[] = { 4478 NULL, 4479 gen_helper_sve_frint_h, 4480 gen_helper_sve_frint_s, 4481 gen_helper_sve_frint_d 4482 }; 4483 TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz], 4484 a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 4485 4486 static gen_helper_gvec_3_ptr * const frintx_fns[] = { 4487 NULL, 4488 gen_helper_sve_frintx_h, 4489 gen_helper_sve_frintx_s, 4490 gen_helper_sve_frintx_d 4491 }; 4492 TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz], 4493 a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 4494 4495 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, 4496 ARMFPRounding mode, gen_helper_gvec_3_ptr *fn) 4497 { 4498 unsigned vsz; 4499 TCGv_i32 tmode; 4500 TCGv_ptr status; 4501 4502 if (fn == NULL) { 4503 return false; 4504 } 4505 if (!sve_access_check(s)) { 4506 return true; 4507 } 4508 4509 vsz = vec_full_reg_size(s); 4510 status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 4511 tmode = gen_set_rmode(mode, status); 4512 4513 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 4514 vec_full_reg_offset(s, a->rn), 4515 pred_full_reg_offset(s, a->pg), 4516 status, vsz, vsz, 0, fn); 4517 4518 gen_restore_rmode(tmode, status); 4519 return true; 4520 } 4521 4522 TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a, 4523 FPROUNDING_TIEEVEN, frint_fns[a->esz]) 4524 TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a, 4525 FPROUNDING_POSINF, frint_fns[a->esz]) 4526 TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a, 4527 FPROUNDING_NEGINF, frint_fns[a->esz]) 4528 TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a, 4529 FPROUNDING_ZERO, frint_fns[a->esz]) 4530 TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a, 4531 FPROUNDING_TIEAWAY, frint_fns[a->esz]) 4532 4533 static gen_helper_gvec_3_ptr * const frecpx_fns[] = { 4534 NULL, gen_helper_sve_frecpx_h, 4535 gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d, 4536 }; 4537 TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz], 4538 a, 0, select_ah_fpst(s, a->esz)) 4539 4540 static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { 4541 NULL, gen_helper_sve_fsqrt_h, 4542 gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d, 4543 }; 4544 TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz], 4545 a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 4546 4547 TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4548 gen_helper_sve_scvt_hh, a, 0, FPST_A64_F16) 4549 TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4550 gen_helper_sve_scvt_sh, a, 0, FPST_A64_F16) 4551 TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4552 gen_helper_sve_scvt_dh, a, 0, FPST_A64_F16) 4553 4554 TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4555 gen_helper_sve_scvt_ss, a, 0, FPST_A64) 4556 TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4557 gen_helper_sve_scvt_ds, a, 0, FPST_A64) 4558 4559 TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4560 gen_helper_sve_scvt_sd, a, 0, FPST_A64) 4561 TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4562 gen_helper_sve_scvt_dd, a, 0, FPST_A64) 4563 4564 TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4565 gen_helper_sve_ucvt_hh, a, 0, FPST_A64_F16) 4566 TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4567 gen_helper_sve_ucvt_sh, a, 0, FPST_A64_F16) 4568 TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4569 gen_helper_sve_ucvt_dh, a, 0, FPST_A64_F16) 4570 4571 TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4572 gen_helper_sve_ucvt_ss, a, 0, FPST_A64) 4573 TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4574 gen_helper_sve_ucvt_ds, a, 0, FPST_A64) 4575 TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4576 gen_helper_sve_ucvt_sd, a, 0, FPST_A64) 4577 4578 TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4579 gen_helper_sve_ucvt_dd, a, 0, FPST_A64) 4580 4581 /* 4582 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group 4583 */ 4584 4585 /* Subroutine loading a vector register at VOFS of LEN bytes. 4586 * The load should begin at the address Rn + IMM. 4587 */ 4588 4589 void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, 4590 int len, int rn, int imm, MemOp align) 4591 { 4592 int len_align = QEMU_ALIGN_DOWN(len, 16); 4593 int len_remain = len % 16; 4594 int nparts = len / 16 + ctpop8(len_remain); 4595 int midx = get_mem_index(s); 4596 TCGv_i64 dirty_addr, clean_addr, t0, t1; 4597 TCGv_i128 t16; 4598 4599 dirty_addr = tcg_temp_new_i64(); 4600 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4601 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); 4602 4603 /* 4604 * Note that unpredicated load/store of vector/predicate registers 4605 * are defined as a stream of bytes, which equates to little-endian 4606 * operations on larger quantities. 4607 * Attempt to keep code expansion to a minimum by limiting the 4608 * amount of unrolling done. 4609 */ 4610 if (nparts <= 4) { 4611 int i; 4612 4613 t0 = tcg_temp_new_i64(); 4614 t1 = tcg_temp_new_i64(); 4615 t16 = tcg_temp_new_i128(); 4616 4617 for (i = 0; i < len_align; i += 16) { 4618 tcg_gen_qemu_ld_i128(t16, clean_addr, midx, 4619 MO_LE | MO_128 | MO_ATOM_NONE | align); 4620 tcg_gen_extr_i128_i64(t0, t1, t16); 4621 tcg_gen_st_i64(t0, base, vofs + i); 4622 tcg_gen_st_i64(t1, base, vofs + i + 8); 4623 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4624 } 4625 if (len_align) { 4626 align = MO_UNALN; 4627 } 4628 } else { 4629 TCGLabel *loop = gen_new_label(); 4630 TCGv_ptr tp, i = tcg_temp_new_ptr(); 4631 4632 tcg_gen_movi_ptr(i, 0); 4633 gen_set_label(loop); 4634 4635 t16 = tcg_temp_new_i128(); 4636 tcg_gen_qemu_ld_i128(t16, clean_addr, midx, 4637 MO_LE | MO_128 | MO_ATOM_NONE | align); 4638 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4639 4640 tp = tcg_temp_new_ptr(); 4641 tcg_gen_add_ptr(tp, base, i); 4642 tcg_gen_addi_ptr(i, i, 16); 4643 4644 t0 = tcg_temp_new_i64(); 4645 t1 = tcg_temp_new_i64(); 4646 tcg_gen_extr_i128_i64(t0, t1, t16); 4647 4648 tcg_gen_st_i64(t0, tp, vofs); 4649 tcg_gen_st_i64(t1, tp, vofs + 8); 4650 4651 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4652 align = MO_UNALN; 4653 } 4654 4655 /* 4656 * Predicate register loads can be any multiple of 2. 4657 * Note that we still store the entire 64-bit unit into tcg_env. 4658 */ 4659 if (len_remain >= 8) { 4660 t0 = tcg_temp_new_i64(); 4661 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, 4662 MO_LEUQ | MO_ATOM_NONE | align); 4663 align = MO_UNALN; 4664 tcg_gen_st_i64(t0, base, vofs + len_align); 4665 len_remain -= 8; 4666 len_align += 8; 4667 if (len_remain) { 4668 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4669 } 4670 } 4671 if (len_remain) { 4672 t0 = tcg_temp_new_i64(); 4673 switch (len_remain) { 4674 case 2: 4675 case 4: 4676 case 8: 4677 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, 4678 MO_LE | ctz32(len_remain) 4679 | MO_ATOM_NONE | align); 4680 break; 4681 4682 case 6: 4683 t1 = tcg_temp_new_i64(); 4684 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, 4685 MO_LEUL | MO_ATOM_NONE | align); 4686 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4687 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); 4688 tcg_gen_deposit_i64(t0, t0, t1, 32, 32); 4689 break; 4690 4691 default: 4692 g_assert_not_reached(); 4693 } 4694 tcg_gen_st_i64(t0, base, vofs + len_align); 4695 } 4696 } 4697 4698 /* Similarly for stores. */ 4699 void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, 4700 int len, int rn, int imm, MemOp align) 4701 { 4702 int len_align = QEMU_ALIGN_DOWN(len, 16); 4703 int len_remain = len % 16; 4704 int nparts = len / 16 + ctpop8(len_remain); 4705 int midx = get_mem_index(s); 4706 TCGv_i64 dirty_addr, clean_addr, t0, t1; 4707 TCGv_i128 t16; 4708 4709 dirty_addr = tcg_temp_new_i64(); 4710 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4711 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); 4712 4713 /* Note that unpredicated load/store of vector/predicate registers 4714 * are defined as a stream of bytes, which equates to little-endian 4715 * operations on larger quantities. There is no nice way to force 4716 * a little-endian store for aarch64_be-linux-user out of line. 4717 * 4718 * Attempt to keep code expansion to a minimum by limiting the 4719 * amount of unrolling done. 4720 */ 4721 if (nparts <= 4) { 4722 int i; 4723 4724 t0 = tcg_temp_new_i64(); 4725 t1 = tcg_temp_new_i64(); 4726 t16 = tcg_temp_new_i128(); 4727 for (i = 0; i < len_align; i += 16) { 4728 tcg_gen_ld_i64(t0, base, vofs + i); 4729 tcg_gen_ld_i64(t1, base, vofs + i + 8); 4730 tcg_gen_concat_i64_i128(t16, t0, t1); 4731 tcg_gen_qemu_st_i128(t16, clean_addr, midx, 4732 MO_LE | MO_128 | MO_ATOM_NONE | align); 4733 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4734 } 4735 if (len_align) { 4736 align = MO_UNALN; 4737 } 4738 } else { 4739 TCGLabel *loop = gen_new_label(); 4740 TCGv_ptr tp, i = tcg_temp_new_ptr(); 4741 4742 tcg_gen_movi_ptr(i, 0); 4743 gen_set_label(loop); 4744 4745 t0 = tcg_temp_new_i64(); 4746 t1 = tcg_temp_new_i64(); 4747 tp = tcg_temp_new_ptr(); 4748 tcg_gen_add_ptr(tp, base, i); 4749 tcg_gen_ld_i64(t0, tp, vofs); 4750 tcg_gen_ld_i64(t1, tp, vofs + 8); 4751 tcg_gen_addi_ptr(i, i, 16); 4752 4753 t16 = tcg_temp_new_i128(); 4754 tcg_gen_concat_i64_i128(t16, t0, t1); 4755 4756 tcg_gen_qemu_st_i128(t16, clean_addr, midx, 4757 MO_LE | MO_128 | MO_ATOM_NONE); 4758 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4759 4760 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4761 align = MO_UNALN; 4762 } 4763 4764 /* Predicate register stores can be any multiple of 2. */ 4765 if (len_remain >= 8) { 4766 t0 = tcg_temp_new_i64(); 4767 tcg_gen_ld_i64(t0, base, vofs + len_align); 4768 tcg_gen_qemu_st_i64(t0, clean_addr, midx, 4769 MO_LEUQ | MO_ATOM_NONE | align); 4770 align = MO_UNALN; 4771 len_remain -= 8; 4772 len_align += 8; 4773 if (len_remain) { 4774 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4775 } 4776 } 4777 if (len_remain) { 4778 t0 = tcg_temp_new_i64(); 4779 tcg_gen_ld_i64(t0, base, vofs + len_align); 4780 4781 switch (len_remain) { 4782 case 2: 4783 case 4: 4784 case 8: 4785 tcg_gen_qemu_st_i64(t0, clean_addr, midx, 4786 MO_LE | ctz32(len_remain) 4787 | MO_ATOM_NONE | align); 4788 break; 4789 4790 case 6: 4791 tcg_gen_qemu_st_i64(t0, clean_addr, midx, 4792 MO_LEUL | MO_ATOM_NONE | align); 4793 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4794 tcg_gen_shri_i64(t0, t0, 32); 4795 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); 4796 break; 4797 4798 default: 4799 g_assert_not_reached(); 4800 } 4801 } 4802 } 4803 4804 static bool trans_LDR_zri(DisasContext *s, arg_rri *a) 4805 { 4806 if (!dc_isar_feature(aa64_sve, s)) { 4807 return false; 4808 } 4809 if (sve_access_check(s)) { 4810 int size = vec_full_reg_size(s); 4811 int off = vec_full_reg_offset(s, a->rd); 4812 gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size, 4813 s->align_mem ? MO_ALIGN_16 : MO_UNALN); 4814 } 4815 return true; 4816 } 4817 4818 static bool trans_LDR_pri(DisasContext *s, arg_rri *a) 4819 { 4820 if (!dc_isar_feature(aa64_sve, s)) { 4821 return false; 4822 } 4823 if (sve_access_check(s)) { 4824 int size = pred_full_reg_size(s); 4825 int off = pred_full_reg_offset(s, a->rd); 4826 gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size, 4827 s->align_mem ? MO_ALIGN_2 : MO_UNALN); 4828 } 4829 return true; 4830 } 4831 4832 static bool trans_STR_zri(DisasContext *s, arg_rri *a) 4833 { 4834 if (!dc_isar_feature(aa64_sve, s)) { 4835 return false; 4836 } 4837 if (sve_access_check(s)) { 4838 int size = vec_full_reg_size(s); 4839 int off = vec_full_reg_offset(s, a->rd); 4840 gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size, 4841 s->align_mem ? MO_ALIGN_16 : MO_UNALN); 4842 } 4843 return true; 4844 } 4845 4846 static bool trans_STR_pri(DisasContext *s, arg_rri *a) 4847 { 4848 if (!dc_isar_feature(aa64_sve, s)) { 4849 return false; 4850 } 4851 if (sve_access_check(s)) { 4852 int size = pred_full_reg_size(s); 4853 int off = pred_full_reg_offset(s, a->rd); 4854 gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size, 4855 s->align_mem ? MO_ALIGN_2 : MO_UNALN); 4856 } 4857 return true; 4858 } 4859 4860 /* 4861 *** SVE Memory - Contiguous Load Group 4862 */ 4863 4864 /* The memory mode of the dtype. */ 4865 static const MemOp dtype_mop[19] = { 4866 MO_UB, MO_UB, MO_UB, MO_UB, 4867 MO_SL, MO_UW, MO_UW, MO_UW, 4868 MO_SW, MO_SW, MO_UL, MO_UL, 4869 MO_SB, MO_SB, MO_SB, MO_UQ, 4870 /* Artificial values used by decode */ 4871 MO_UL, MO_UQ, MO_128, 4872 }; 4873 4874 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE) 4875 4876 /* The vector element size of dtype. */ 4877 static const uint8_t dtype_esz[19] = { 4878 0, 1, 2, 3, 4879 3, 1, 2, 3, 4880 3, 2, 2, 3, 4881 3, 2, 1, 3, 4882 /* Artificial values used by decode */ 4883 4, 4, 4, 4884 }; 4885 4886 uint64_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs, 4887 uint32_t msz, bool is_write, uint32_t data) 4888 { 4889 uint32_t sizem1; 4890 uint64_t desc = 0; 4891 4892 /* Assert all of the data fits, with or without MTE enabled. */ 4893 assert(nregs >= 1 && nregs <= 4); 4894 sizem1 = (nregs << msz) - 1; 4895 assert(sizem1 <= R_MTEDESC_SIZEM1_MASK >> R_MTEDESC_SIZEM1_SHIFT); 4896 4897 if (s->mte_active[0]) { 4898 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 4899 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4900 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4901 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 4902 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, sizem1); 4903 desc <<= 32; 4904 } 4905 return simd_desc(vsz, vsz, data) | desc; 4906 } 4907 4908 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 4909 int dtype, uint32_t nregs, bool is_write, 4910 gen_helper_gvec_mem *fn) 4911 { 4912 TCGv_ptr t_pg; 4913 uint64_t desc; 4914 4915 if (!s->mte_active[0]) { 4916 addr = clean_data_tbi(s, addr); 4917 } 4918 4919 /* 4920 * For e.g. LD4, there are not enough arguments to pass all 4 4921 * registers as pointers, so encode the regno into the data field. 4922 * For consistency, do this even for LD1. 4923 */ 4924 desc = make_svemte_desc(s, vec_full_reg_size(s), nregs, 4925 dtype_msz(dtype), is_write, zt); 4926 t_pg = tcg_temp_new_ptr(); 4927 4928 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 4929 fn(tcg_env, t_pg, addr, tcg_constant_i64(desc)); 4930 } 4931 4932 /* Indexed by [mte][be][dtype][nreg] */ 4933 static gen_helper_gvec_mem * const ldr_fns[2][2][19][4] = { 4934 { /* mte inactive, little-endian */ 4935 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4936 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4937 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4938 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4939 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4940 4941 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL }, 4942 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r, 4943 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r }, 4944 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL }, 4945 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL }, 4946 4947 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL }, 4948 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL }, 4949 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r, 4950 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r }, 4951 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL }, 4952 4953 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4954 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4955 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4956 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, 4957 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r }, 4958 4959 { gen_helper_sve_ld1squ_le_r, NULL, NULL, NULL }, 4960 { gen_helper_sve_ld1dqu_le_r, NULL, NULL, NULL }, 4961 { NULL, gen_helper_sve_ld2qq_le_r, 4962 gen_helper_sve_ld3qq_le_r, gen_helper_sve_ld4qq_le_r }, 4963 }, 4964 4965 /* mte inactive, big-endian */ 4966 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4967 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4968 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4969 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4970 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4971 4972 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL }, 4973 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r, 4974 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r }, 4975 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL }, 4976 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL }, 4977 4978 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL }, 4979 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL }, 4980 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r, 4981 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r }, 4982 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL }, 4983 4984 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4985 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4986 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4987 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, 4988 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r }, 4989 4990 { gen_helper_sve_ld1squ_be_r, NULL, NULL, NULL }, 4991 { gen_helper_sve_ld1dqu_be_r, NULL, NULL, NULL }, 4992 { NULL, gen_helper_sve_ld2qq_be_r, 4993 gen_helper_sve_ld3qq_be_r, gen_helper_sve_ld4qq_be_r }, 4994 }, 4995 }, 4996 4997 { /* mte active, little-endian */ 4998 { { gen_helper_sve_ld1bb_r_mte, 4999 gen_helper_sve_ld2bb_r_mte, 5000 gen_helper_sve_ld3bb_r_mte, 5001 gen_helper_sve_ld4bb_r_mte }, 5002 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 5003 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 5004 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 5005 5006 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL }, 5007 { gen_helper_sve_ld1hh_le_r_mte, 5008 gen_helper_sve_ld2hh_le_r_mte, 5009 gen_helper_sve_ld3hh_le_r_mte, 5010 gen_helper_sve_ld4hh_le_r_mte }, 5011 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL }, 5012 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL }, 5013 5014 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL }, 5015 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL }, 5016 { gen_helper_sve_ld1ss_le_r_mte, 5017 gen_helper_sve_ld2ss_le_r_mte, 5018 gen_helper_sve_ld3ss_le_r_mte, 5019 gen_helper_sve_ld4ss_le_r_mte }, 5020 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL }, 5021 5022 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 5023 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 5024 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 5025 { gen_helper_sve_ld1dd_le_r_mte, 5026 gen_helper_sve_ld2dd_le_r_mte, 5027 gen_helper_sve_ld3dd_le_r_mte, 5028 gen_helper_sve_ld4dd_le_r_mte }, 5029 5030 { gen_helper_sve_ld1squ_le_r_mte, NULL, NULL, NULL }, 5031 { gen_helper_sve_ld1dqu_le_r_mte, NULL, NULL, NULL }, 5032 { NULL, 5033 gen_helper_sve_ld2qq_le_r_mte, 5034 gen_helper_sve_ld3qq_le_r_mte, 5035 gen_helper_sve_ld4qq_le_r_mte }, 5036 }, 5037 5038 /* mte active, big-endian */ 5039 { { gen_helper_sve_ld1bb_r_mte, 5040 gen_helper_sve_ld2bb_r_mte, 5041 gen_helper_sve_ld3bb_r_mte, 5042 gen_helper_sve_ld4bb_r_mte }, 5043 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 5044 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 5045 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 5046 5047 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL }, 5048 { gen_helper_sve_ld1hh_be_r_mte, 5049 gen_helper_sve_ld2hh_be_r_mte, 5050 gen_helper_sve_ld3hh_be_r_mte, 5051 gen_helper_sve_ld4hh_be_r_mte }, 5052 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL }, 5053 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL }, 5054 5055 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL }, 5056 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL }, 5057 { gen_helper_sve_ld1ss_be_r_mte, 5058 gen_helper_sve_ld2ss_be_r_mte, 5059 gen_helper_sve_ld3ss_be_r_mte, 5060 gen_helper_sve_ld4ss_be_r_mte }, 5061 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL }, 5062 5063 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 5064 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 5065 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 5066 { gen_helper_sve_ld1dd_be_r_mte, 5067 gen_helper_sve_ld2dd_be_r_mte, 5068 gen_helper_sve_ld3dd_be_r_mte, 5069 gen_helper_sve_ld4dd_be_r_mte }, 5070 5071 { gen_helper_sve_ld1squ_be_r_mte, NULL, NULL, NULL }, 5072 { gen_helper_sve_ld1dqu_be_r_mte, NULL, NULL, NULL }, 5073 { NULL, 5074 gen_helper_sve_ld2qq_be_r_mte, 5075 gen_helper_sve_ld3qq_be_r_mte, 5076 gen_helper_sve_ld4qq_be_r_mte }, 5077 }, 5078 }, 5079 }; 5080 5081 static void do_ld_zpa(DisasContext *s, int zt, int pg, 5082 TCGv_i64 addr, int dtype, int nreg) 5083 { 5084 gen_helper_gvec_mem *fn 5085 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg]; 5086 5087 /* 5088 * While there are holes in the table, they are not 5089 * accessible via the instruction encoding. 5090 */ 5091 assert(fn != NULL); 5092 do_mem_zpa(s, zt, pg, addr, dtype, nreg + 1, false, fn); 5093 } 5094 5095 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) 5096 { 5097 if (a->rm == 31) { 5098 return false; 5099 } 5100 5101 /* dtypes 16-18 are artificial, representing 128-bit element */ 5102 switch (a->dtype) { 5103 case 0 ... 15: 5104 if (!dc_isar_feature(aa64_sve, s)) { 5105 return false; 5106 } 5107 break; 5108 case 16: case 17: 5109 if (!dc_isar_feature(aa64_sve2p1, s)) { 5110 return false; 5111 } 5112 s->is_nonstreaming = true; 5113 break; 5114 case 18: 5115 if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { 5116 return false; 5117 } 5118 break; 5119 default: 5120 g_assert_not_reached(); 5121 } 5122 5123 if (sve_access_check(s)) { 5124 TCGv_i64 addr = tcg_temp_new_i64(); 5125 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 5126 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5127 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 5128 } 5129 return true; 5130 } 5131 5132 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a) 5133 { 5134 /* dtypes 16-18 are artificial, representing 128-bit element */ 5135 switch (a->dtype) { 5136 case 0 ... 15: 5137 if (!dc_isar_feature(aa64_sve, s)) { 5138 return false; 5139 } 5140 break; 5141 case 16: case 17: 5142 if (!dc_isar_feature(aa64_sve2p1, s)) { 5143 return false; 5144 } 5145 s->is_nonstreaming = true; 5146 break; 5147 case 18: 5148 if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { 5149 return false; 5150 } 5151 break; 5152 default: 5153 g_assert_not_reached(); 5154 } 5155 5156 if (sve_access_check(s)) { 5157 int vsz = vec_full_reg_size(s); 5158 int elements = vsz >> dtype_esz[a->dtype]; 5159 TCGv_i64 addr = tcg_temp_new_i64(); 5160 5161 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 5162 (a->imm * elements * (a->nreg + 1)) 5163 << dtype_msz(a->dtype)); 5164 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 5165 } 5166 return true; 5167 } 5168 5169 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a) 5170 { 5171 static gen_helper_gvec_mem * const fns[2][2][16] = { 5172 { /* mte inactive, little-endian */ 5173 { gen_helper_sve_ldff1bb_r, 5174 gen_helper_sve_ldff1bhu_r, 5175 gen_helper_sve_ldff1bsu_r, 5176 gen_helper_sve_ldff1bdu_r, 5177 5178 gen_helper_sve_ldff1sds_le_r, 5179 gen_helper_sve_ldff1hh_le_r, 5180 gen_helper_sve_ldff1hsu_le_r, 5181 gen_helper_sve_ldff1hdu_le_r, 5182 5183 gen_helper_sve_ldff1hds_le_r, 5184 gen_helper_sve_ldff1hss_le_r, 5185 gen_helper_sve_ldff1ss_le_r, 5186 gen_helper_sve_ldff1sdu_le_r, 5187 5188 gen_helper_sve_ldff1bds_r, 5189 gen_helper_sve_ldff1bss_r, 5190 gen_helper_sve_ldff1bhs_r, 5191 gen_helper_sve_ldff1dd_le_r }, 5192 5193 /* mte inactive, big-endian */ 5194 { gen_helper_sve_ldff1bb_r, 5195 gen_helper_sve_ldff1bhu_r, 5196 gen_helper_sve_ldff1bsu_r, 5197 gen_helper_sve_ldff1bdu_r, 5198 5199 gen_helper_sve_ldff1sds_be_r, 5200 gen_helper_sve_ldff1hh_be_r, 5201 gen_helper_sve_ldff1hsu_be_r, 5202 gen_helper_sve_ldff1hdu_be_r, 5203 5204 gen_helper_sve_ldff1hds_be_r, 5205 gen_helper_sve_ldff1hss_be_r, 5206 gen_helper_sve_ldff1ss_be_r, 5207 gen_helper_sve_ldff1sdu_be_r, 5208 5209 gen_helper_sve_ldff1bds_r, 5210 gen_helper_sve_ldff1bss_r, 5211 gen_helper_sve_ldff1bhs_r, 5212 gen_helper_sve_ldff1dd_be_r } }, 5213 5214 { /* mte active, little-endian */ 5215 { gen_helper_sve_ldff1bb_r_mte, 5216 gen_helper_sve_ldff1bhu_r_mte, 5217 gen_helper_sve_ldff1bsu_r_mte, 5218 gen_helper_sve_ldff1bdu_r_mte, 5219 5220 gen_helper_sve_ldff1sds_le_r_mte, 5221 gen_helper_sve_ldff1hh_le_r_mte, 5222 gen_helper_sve_ldff1hsu_le_r_mte, 5223 gen_helper_sve_ldff1hdu_le_r_mte, 5224 5225 gen_helper_sve_ldff1hds_le_r_mte, 5226 gen_helper_sve_ldff1hss_le_r_mte, 5227 gen_helper_sve_ldff1ss_le_r_mte, 5228 gen_helper_sve_ldff1sdu_le_r_mte, 5229 5230 gen_helper_sve_ldff1bds_r_mte, 5231 gen_helper_sve_ldff1bss_r_mte, 5232 gen_helper_sve_ldff1bhs_r_mte, 5233 gen_helper_sve_ldff1dd_le_r_mte }, 5234 5235 /* mte active, big-endian */ 5236 { gen_helper_sve_ldff1bb_r_mte, 5237 gen_helper_sve_ldff1bhu_r_mte, 5238 gen_helper_sve_ldff1bsu_r_mte, 5239 gen_helper_sve_ldff1bdu_r_mte, 5240 5241 gen_helper_sve_ldff1sds_be_r_mte, 5242 gen_helper_sve_ldff1hh_be_r_mte, 5243 gen_helper_sve_ldff1hsu_be_r_mte, 5244 gen_helper_sve_ldff1hdu_be_r_mte, 5245 5246 gen_helper_sve_ldff1hds_be_r_mte, 5247 gen_helper_sve_ldff1hss_be_r_mte, 5248 gen_helper_sve_ldff1ss_be_r_mte, 5249 gen_helper_sve_ldff1sdu_be_r_mte, 5250 5251 gen_helper_sve_ldff1bds_r_mte, 5252 gen_helper_sve_ldff1bss_r_mte, 5253 gen_helper_sve_ldff1bhs_r_mte, 5254 gen_helper_sve_ldff1dd_be_r_mte } }, 5255 }; 5256 5257 if (!dc_isar_feature(aa64_sve, s)) { 5258 return false; 5259 } 5260 s->is_nonstreaming = true; 5261 if (sve_access_check(s)) { 5262 TCGv_i64 addr = tcg_temp_new_i64(); 5263 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 5264 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5265 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 5266 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 5267 } 5268 return true; 5269 } 5270 5271 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) 5272 { 5273 static gen_helper_gvec_mem * const fns[2][2][16] = { 5274 { /* mte inactive, little-endian */ 5275 { gen_helper_sve_ldnf1bb_r, 5276 gen_helper_sve_ldnf1bhu_r, 5277 gen_helper_sve_ldnf1bsu_r, 5278 gen_helper_sve_ldnf1bdu_r, 5279 5280 gen_helper_sve_ldnf1sds_le_r, 5281 gen_helper_sve_ldnf1hh_le_r, 5282 gen_helper_sve_ldnf1hsu_le_r, 5283 gen_helper_sve_ldnf1hdu_le_r, 5284 5285 gen_helper_sve_ldnf1hds_le_r, 5286 gen_helper_sve_ldnf1hss_le_r, 5287 gen_helper_sve_ldnf1ss_le_r, 5288 gen_helper_sve_ldnf1sdu_le_r, 5289 5290 gen_helper_sve_ldnf1bds_r, 5291 gen_helper_sve_ldnf1bss_r, 5292 gen_helper_sve_ldnf1bhs_r, 5293 gen_helper_sve_ldnf1dd_le_r }, 5294 5295 /* mte inactive, big-endian */ 5296 { gen_helper_sve_ldnf1bb_r, 5297 gen_helper_sve_ldnf1bhu_r, 5298 gen_helper_sve_ldnf1bsu_r, 5299 gen_helper_sve_ldnf1bdu_r, 5300 5301 gen_helper_sve_ldnf1sds_be_r, 5302 gen_helper_sve_ldnf1hh_be_r, 5303 gen_helper_sve_ldnf1hsu_be_r, 5304 gen_helper_sve_ldnf1hdu_be_r, 5305 5306 gen_helper_sve_ldnf1hds_be_r, 5307 gen_helper_sve_ldnf1hss_be_r, 5308 gen_helper_sve_ldnf1ss_be_r, 5309 gen_helper_sve_ldnf1sdu_be_r, 5310 5311 gen_helper_sve_ldnf1bds_r, 5312 gen_helper_sve_ldnf1bss_r, 5313 gen_helper_sve_ldnf1bhs_r, 5314 gen_helper_sve_ldnf1dd_be_r } }, 5315 5316 { /* mte inactive, little-endian */ 5317 { gen_helper_sve_ldnf1bb_r_mte, 5318 gen_helper_sve_ldnf1bhu_r_mte, 5319 gen_helper_sve_ldnf1bsu_r_mte, 5320 gen_helper_sve_ldnf1bdu_r_mte, 5321 5322 gen_helper_sve_ldnf1sds_le_r_mte, 5323 gen_helper_sve_ldnf1hh_le_r_mte, 5324 gen_helper_sve_ldnf1hsu_le_r_mte, 5325 gen_helper_sve_ldnf1hdu_le_r_mte, 5326 5327 gen_helper_sve_ldnf1hds_le_r_mte, 5328 gen_helper_sve_ldnf1hss_le_r_mte, 5329 gen_helper_sve_ldnf1ss_le_r_mte, 5330 gen_helper_sve_ldnf1sdu_le_r_mte, 5331 5332 gen_helper_sve_ldnf1bds_r_mte, 5333 gen_helper_sve_ldnf1bss_r_mte, 5334 gen_helper_sve_ldnf1bhs_r_mte, 5335 gen_helper_sve_ldnf1dd_le_r_mte }, 5336 5337 /* mte inactive, big-endian */ 5338 { gen_helper_sve_ldnf1bb_r_mte, 5339 gen_helper_sve_ldnf1bhu_r_mte, 5340 gen_helper_sve_ldnf1bsu_r_mte, 5341 gen_helper_sve_ldnf1bdu_r_mte, 5342 5343 gen_helper_sve_ldnf1sds_be_r_mte, 5344 gen_helper_sve_ldnf1hh_be_r_mte, 5345 gen_helper_sve_ldnf1hsu_be_r_mte, 5346 gen_helper_sve_ldnf1hdu_be_r_mte, 5347 5348 gen_helper_sve_ldnf1hds_be_r_mte, 5349 gen_helper_sve_ldnf1hss_be_r_mte, 5350 gen_helper_sve_ldnf1ss_be_r_mte, 5351 gen_helper_sve_ldnf1sdu_be_r_mte, 5352 5353 gen_helper_sve_ldnf1bds_r_mte, 5354 gen_helper_sve_ldnf1bss_r_mte, 5355 gen_helper_sve_ldnf1bhs_r_mte, 5356 gen_helper_sve_ldnf1dd_be_r_mte } }, 5357 }; 5358 5359 if (!dc_isar_feature(aa64_sve, s)) { 5360 return false; 5361 } 5362 s->is_nonstreaming = true; 5363 if (sve_access_check(s)) { 5364 int vsz = vec_full_reg_size(s); 5365 int elements = vsz >> dtype_esz[a->dtype]; 5366 int off = (a->imm * elements) << dtype_msz(a->dtype); 5367 TCGv_i64 addr = tcg_temp_new_i64(); 5368 5369 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off); 5370 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 5371 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 5372 } 5373 return true; 5374 } 5375 5376 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 5377 { 5378 unsigned vsz = vec_full_reg_size(s); 5379 TCGv_ptr t_pg; 5380 int poff; 5381 uint64_t desc; 5382 5383 /* Load the first quadword using the normal predicated load helpers. */ 5384 if (!s->mte_active[0]) { 5385 addr = clean_data_tbi(s, addr); 5386 } 5387 5388 poff = pred_full_reg_offset(s, pg); 5389 if (vsz > 16) { 5390 /* 5391 * Zero-extend the first 16 bits of the predicate into a temporary. 5392 * This avoids triggering an assert making sure we don't have bits 5393 * set within a predicate beyond VQ, but we have lowered VQ to 1 5394 * for this load operation. 5395 */ 5396 TCGv_i64 tmp = tcg_temp_new_i64(); 5397 #if HOST_BIG_ENDIAN 5398 poff += 6; 5399 #endif 5400 tcg_gen_ld16u_i64(tmp, tcg_env, poff); 5401 5402 poff = offsetof(CPUARMState, vfp.preg_tmp); 5403 tcg_gen_st_i64(tmp, tcg_env, poff); 5404 } 5405 5406 t_pg = tcg_temp_new_ptr(); 5407 tcg_gen_addi_ptr(t_pg, tcg_env, poff); 5408 5409 gen_helper_gvec_mem *fn 5410 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 5411 desc = make_svemte_desc(s, 16, 1, dtype_msz(dtype), false, zt); 5412 fn(tcg_env, t_pg, addr, tcg_constant_i64(desc)); 5413 5414 /* Replicate that first quadword. */ 5415 if (vsz > 16) { 5416 int doff = vec_full_reg_offset(s, zt); 5417 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16); 5418 } 5419 } 5420 5421 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a) 5422 { 5423 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 5424 return false; 5425 } 5426 if (sve_access_check(s)) { 5427 int msz = dtype_msz(a->dtype); 5428 TCGv_i64 addr = tcg_temp_new_i64(); 5429 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz); 5430 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5431 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 5432 } 5433 return true; 5434 } 5435 5436 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a) 5437 { 5438 if (!dc_isar_feature(aa64_sve, s)) { 5439 return false; 5440 } 5441 if (sve_access_check(s)) { 5442 TCGv_i64 addr = tcg_temp_new_i64(); 5443 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16); 5444 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 5445 } 5446 return true; 5447 } 5448 5449 static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 5450 { 5451 unsigned vsz = vec_full_reg_size(s); 5452 unsigned vsz_r32; 5453 TCGv_ptr t_pg; 5454 int poff, doff; 5455 uint64_t desc; 5456 5457 if (vsz < 32) { 5458 /* 5459 * Note that this UNDEFINED check comes after CheckSVEEnabled() 5460 * in the ARM pseudocode, which is the sve_access_check() done 5461 * in our caller. We should not now return false from the caller. 5462 */ 5463 unallocated_encoding(s); 5464 return; 5465 } 5466 5467 /* Load the first octaword using the normal predicated load helpers. */ 5468 if (!s->mte_active[0]) { 5469 addr = clean_data_tbi(s, addr); 5470 } 5471 5472 poff = pred_full_reg_offset(s, pg); 5473 if (vsz > 32) { 5474 /* 5475 * Zero-extend the first 32 bits of the predicate into a temporary. 5476 * This avoids triggering an assert making sure we don't have bits 5477 * set within a predicate beyond VQ, but we have lowered VQ to 2 5478 * for this load operation. 5479 */ 5480 TCGv_i64 tmp = tcg_temp_new_i64(); 5481 #if HOST_BIG_ENDIAN 5482 poff += 4; 5483 #endif 5484 tcg_gen_ld32u_i64(tmp, tcg_env, poff); 5485 5486 poff = offsetof(CPUARMState, vfp.preg_tmp); 5487 tcg_gen_st_i64(tmp, tcg_env, poff); 5488 } 5489 5490 t_pg = tcg_temp_new_ptr(); 5491 tcg_gen_addi_ptr(t_pg, tcg_env, poff); 5492 5493 gen_helper_gvec_mem *fn 5494 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 5495 desc = make_svemte_desc(s, 32, 1, dtype_msz(dtype), false, zt); 5496 fn(tcg_env, t_pg, addr, tcg_constant_i64(desc)); 5497 5498 /* 5499 * Replicate that first octaword. 5500 * The replication happens in units of 32; if the full vector size 5501 * is not a multiple of 32, the final bits are zeroed. 5502 */ 5503 doff = vec_full_reg_offset(s, zt); 5504 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32); 5505 if (vsz >= 64) { 5506 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32); 5507 } 5508 vsz -= vsz_r32; 5509 if (vsz) { 5510 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0); 5511 } 5512 } 5513 5514 static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a) 5515 { 5516 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 5517 return false; 5518 } 5519 if (a->rm == 31) { 5520 return false; 5521 } 5522 s->is_nonstreaming = true; 5523 if (sve_access_check(s)) { 5524 TCGv_i64 addr = tcg_temp_new_i64(); 5525 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 5526 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5527 do_ldro(s, a->rd, a->pg, addr, a->dtype); 5528 } 5529 return true; 5530 } 5531 5532 static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a) 5533 { 5534 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 5535 return false; 5536 } 5537 s->is_nonstreaming = true; 5538 if (sve_access_check(s)) { 5539 TCGv_i64 addr = tcg_temp_new_i64(); 5540 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32); 5541 do_ldro(s, a->rd, a->pg, addr, a->dtype); 5542 } 5543 return true; 5544 } 5545 5546 /* Load and broadcast element. */ 5547 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) 5548 { 5549 unsigned vsz = vec_full_reg_size(s); 5550 unsigned psz = pred_full_reg_size(s); 5551 unsigned esz = dtype_esz[a->dtype]; 5552 unsigned msz = dtype_msz(a->dtype); 5553 TCGLabel *over; 5554 TCGv_i64 temp, clean_addr; 5555 MemOp memop; 5556 5557 if (!dc_isar_feature(aa64_sve, s)) { 5558 return false; 5559 } 5560 if (!sve_access_check(s)) { 5561 return true; 5562 } 5563 5564 over = gen_new_label(); 5565 5566 /* If the guarding predicate has no bits set, no load occurs. */ 5567 if (psz <= 8) { 5568 /* Reduce the pred_esz_masks value simply to reduce the 5569 * size of the code generated here. 5570 */ 5571 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8); 5572 temp = tcg_temp_new_i64(); 5573 tcg_gen_ld_i64(temp, tcg_env, pred_full_reg_offset(s, a->pg)); 5574 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask); 5575 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over); 5576 } else { 5577 TCGv_i32 t32 = tcg_temp_new_i32(); 5578 find_last_active(s, t32, esz, a->pg); 5579 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over); 5580 } 5581 5582 /* Load the data. */ 5583 temp = tcg_temp_new_i64(); 5584 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz); 5585 5586 memop = finalize_memop(s, dtype_mop[a->dtype]); 5587 clean_addr = gen_mte_check1(s, temp, false, true, memop); 5588 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), memop); 5589 5590 /* Broadcast to *all* elements. */ 5591 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 5592 vsz, vsz, temp); 5593 5594 /* Zero the inactive elements. */ 5595 gen_set_label(over); 5596 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false); 5597 } 5598 5599 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 5600 int msz, int esz, int nreg) 5601 { 5602 static gen_helper_gvec_mem * const fn_single[2][2][4][5] = { 5603 { { { gen_helper_sve_st1bb_r, 5604 gen_helper_sve_st1bh_r, 5605 gen_helper_sve_st1bs_r, 5606 gen_helper_sve_st1bd_r }, 5607 { NULL, 5608 gen_helper_sve_st1hh_le_r, 5609 gen_helper_sve_st1hs_le_r, 5610 gen_helper_sve_st1hd_le_r }, 5611 { NULL, NULL, 5612 gen_helper_sve_st1ss_le_r, 5613 gen_helper_sve_st1sd_le_r, 5614 gen_helper_sve_st1sq_le_r, }, 5615 { NULL, NULL, NULL, 5616 gen_helper_sve_st1dd_le_r, 5617 gen_helper_sve_st1dq_le_r, } }, 5618 { { gen_helper_sve_st1bb_r, 5619 gen_helper_sve_st1bh_r, 5620 gen_helper_sve_st1bs_r, 5621 gen_helper_sve_st1bd_r }, 5622 { NULL, 5623 gen_helper_sve_st1hh_be_r, 5624 gen_helper_sve_st1hs_be_r, 5625 gen_helper_sve_st1hd_be_r }, 5626 { NULL, NULL, 5627 gen_helper_sve_st1ss_be_r, 5628 gen_helper_sve_st1sd_be_r, 5629 gen_helper_sve_st1sq_be_r }, 5630 { NULL, NULL, NULL, 5631 gen_helper_sve_st1dd_be_r, 5632 gen_helper_sve_st1dq_be_r } } }, 5633 5634 { { { gen_helper_sve_st1bb_r_mte, 5635 gen_helper_sve_st1bh_r_mte, 5636 gen_helper_sve_st1bs_r_mte, 5637 gen_helper_sve_st1bd_r_mte }, 5638 { NULL, 5639 gen_helper_sve_st1hh_le_r_mte, 5640 gen_helper_sve_st1hs_le_r_mte, 5641 gen_helper_sve_st1hd_le_r_mte }, 5642 { NULL, NULL, 5643 gen_helper_sve_st1ss_le_r_mte, 5644 gen_helper_sve_st1sd_le_r_mte, 5645 gen_helper_sve_st1sq_le_r_mte }, 5646 { NULL, NULL, NULL, 5647 gen_helper_sve_st1dd_le_r_mte, 5648 gen_helper_sve_st1dq_le_r_mte } }, 5649 { { gen_helper_sve_st1bb_r_mte, 5650 gen_helper_sve_st1bh_r_mte, 5651 gen_helper_sve_st1bs_r_mte, 5652 gen_helper_sve_st1bd_r_mte }, 5653 { NULL, 5654 gen_helper_sve_st1hh_be_r_mte, 5655 gen_helper_sve_st1hs_be_r_mte, 5656 gen_helper_sve_st1hd_be_r_mte }, 5657 { NULL, NULL, 5658 gen_helper_sve_st1ss_be_r_mte, 5659 gen_helper_sve_st1sd_be_r_mte, 5660 gen_helper_sve_st1sq_be_r_mte }, 5661 { NULL, NULL, NULL, 5662 gen_helper_sve_st1dd_be_r_mte, 5663 gen_helper_sve_st1dq_be_r_mte } } }, 5664 }; 5665 static gen_helper_gvec_mem * const fn_multiple[2][2][3][5] = { 5666 { { { gen_helper_sve_st2bb_r, 5667 gen_helper_sve_st2hh_le_r, 5668 gen_helper_sve_st2ss_le_r, 5669 gen_helper_sve_st2dd_le_r, 5670 gen_helper_sve_st2qq_le_r }, 5671 { gen_helper_sve_st3bb_r, 5672 gen_helper_sve_st3hh_le_r, 5673 gen_helper_sve_st3ss_le_r, 5674 gen_helper_sve_st3dd_le_r, 5675 gen_helper_sve_st3qq_le_r }, 5676 { gen_helper_sve_st4bb_r, 5677 gen_helper_sve_st4hh_le_r, 5678 gen_helper_sve_st4ss_le_r, 5679 gen_helper_sve_st4dd_le_r, 5680 gen_helper_sve_st4qq_le_r } }, 5681 { { gen_helper_sve_st2bb_r, 5682 gen_helper_sve_st2hh_be_r, 5683 gen_helper_sve_st2ss_be_r, 5684 gen_helper_sve_st2dd_be_r, 5685 gen_helper_sve_st2qq_be_r }, 5686 { gen_helper_sve_st3bb_r, 5687 gen_helper_sve_st3hh_be_r, 5688 gen_helper_sve_st3ss_be_r, 5689 gen_helper_sve_st3dd_be_r, 5690 gen_helper_sve_st3qq_be_r }, 5691 { gen_helper_sve_st4bb_r, 5692 gen_helper_sve_st4hh_be_r, 5693 gen_helper_sve_st4ss_be_r, 5694 gen_helper_sve_st4dd_be_r, 5695 gen_helper_sve_st4qq_be_r } } }, 5696 { { { gen_helper_sve_st2bb_r_mte, 5697 gen_helper_sve_st2hh_le_r_mte, 5698 gen_helper_sve_st2ss_le_r_mte, 5699 gen_helper_sve_st2dd_le_r_mte, 5700 gen_helper_sve_st2qq_le_r_mte }, 5701 { gen_helper_sve_st3bb_r_mte, 5702 gen_helper_sve_st3hh_le_r_mte, 5703 gen_helper_sve_st3ss_le_r_mte, 5704 gen_helper_sve_st3dd_le_r_mte, 5705 gen_helper_sve_st3qq_le_r_mte }, 5706 { gen_helper_sve_st4bb_r_mte, 5707 gen_helper_sve_st4hh_le_r_mte, 5708 gen_helper_sve_st4ss_le_r_mte, 5709 gen_helper_sve_st4dd_le_r_mte, 5710 gen_helper_sve_st4qq_le_r_mte } }, 5711 { { gen_helper_sve_st2bb_r_mte, 5712 gen_helper_sve_st2hh_be_r_mte, 5713 gen_helper_sve_st2ss_be_r_mte, 5714 gen_helper_sve_st2dd_be_r_mte, 5715 gen_helper_sve_st2qq_be_r_mte }, 5716 { gen_helper_sve_st3bb_r_mte, 5717 gen_helper_sve_st3hh_be_r_mte, 5718 gen_helper_sve_st3ss_be_r_mte, 5719 gen_helper_sve_st3dd_be_r_mte, 5720 gen_helper_sve_st3qq_be_r_mte }, 5721 { gen_helper_sve_st4bb_r_mte, 5722 gen_helper_sve_st4hh_be_r_mte, 5723 gen_helper_sve_st4ss_be_r_mte, 5724 gen_helper_sve_st4dd_be_r_mte, 5725 gen_helper_sve_st4qq_be_r_mte } } }, 5726 }; 5727 gen_helper_gvec_mem *fn; 5728 int be = s->be_data == MO_BE; 5729 5730 if (nreg == 0) { 5731 /* ST1 */ 5732 fn = fn_single[s->mte_active[0]][be][msz][esz]; 5733 } else { 5734 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */ 5735 assert(msz == esz); 5736 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz]; 5737 } 5738 assert(fn != NULL); 5739 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg + 1, true, fn); 5740 } 5741 5742 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) 5743 { 5744 if (a->rm == 31 || a->msz > a->esz) { 5745 return false; 5746 } 5747 switch (a->esz) { 5748 case MO_8 ... MO_64: 5749 if (!dc_isar_feature(aa64_sve, s)) { 5750 return false; 5751 } 5752 break; 5753 case MO_128: 5754 if (a->nreg == 0) { 5755 assert(a->msz < a->esz); 5756 if (!dc_isar_feature(aa64_sve2p1, s)) { 5757 return false; 5758 } 5759 s->is_nonstreaming = true; 5760 } else { 5761 if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { 5762 return false; 5763 } 5764 } 5765 break; 5766 default: 5767 g_assert_not_reached(); 5768 } 5769 5770 if (sve_access_check(s)) { 5771 TCGv_i64 addr = tcg_temp_new_i64(); 5772 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz); 5773 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5774 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5775 } 5776 return true; 5777 } 5778 5779 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a) 5780 { 5781 if (a->msz > a->esz) { 5782 return false; 5783 } 5784 switch (a->esz) { 5785 case MO_8 ... MO_64: 5786 if (!dc_isar_feature(aa64_sve, s)) { 5787 return false; 5788 } 5789 break; 5790 case MO_128: 5791 if (a->nreg == 0) { 5792 assert(a->msz < a->esz); 5793 if (!dc_isar_feature(aa64_sve2p1, s)) { 5794 return false; 5795 } 5796 s->is_nonstreaming = true; 5797 } else { 5798 if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { 5799 return false; 5800 } 5801 } 5802 break; 5803 default: 5804 g_assert_not_reached(); 5805 } 5806 5807 if (sve_access_check(s)) { 5808 int vsz = vec_full_reg_size(s); 5809 int elements = vsz >> a->esz; 5810 TCGv_i64 addr = tcg_temp_new_i64(); 5811 5812 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 5813 (a->imm * elements * (a->nreg + 1)) << a->msz); 5814 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5815 } 5816 return true; 5817 } 5818 5819 /* 5820 *** SVE gather loads / scatter stores 5821 */ 5822 5823 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, 5824 int scale, TCGv_i64 scalar, int msz, bool is_write, 5825 gen_helper_gvec_mem_scatter *fn) 5826 { 5827 TCGv_ptr t_zm = tcg_temp_new_ptr(); 5828 TCGv_ptr t_pg = tcg_temp_new_ptr(); 5829 TCGv_ptr t_zt = tcg_temp_new_ptr(); 5830 uint64_t desc; 5831 5832 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 5833 tcg_gen_addi_ptr(t_zm, tcg_env, vec_full_reg_offset(s, zm)); 5834 tcg_gen_addi_ptr(t_zt, tcg_env, vec_full_reg_offset(s, zt)); 5835 5836 desc = make_svemte_desc(s, vec_full_reg_size(s), 1, msz, is_write, scale); 5837 fn(tcg_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i64(desc)); 5838 } 5839 5840 /* Indexed by [mte][be][ff][xs][u][msz]. */ 5841 static gen_helper_gvec_mem_scatter * const 5842 gather_load_fn32[2][2][2][2][2][3] = { 5843 { /* MTE Inactive */ 5844 { /* Little-endian */ 5845 { { { gen_helper_sve_ldbss_zsu, 5846 gen_helper_sve_ldhss_le_zsu, 5847 NULL, }, 5848 { gen_helper_sve_ldbsu_zsu, 5849 gen_helper_sve_ldhsu_le_zsu, 5850 gen_helper_sve_ldss_le_zsu, } }, 5851 { { gen_helper_sve_ldbss_zss, 5852 gen_helper_sve_ldhss_le_zss, 5853 NULL, }, 5854 { gen_helper_sve_ldbsu_zss, 5855 gen_helper_sve_ldhsu_le_zss, 5856 gen_helper_sve_ldss_le_zss, } } }, 5857 5858 /* First-fault */ 5859 { { { gen_helper_sve_ldffbss_zsu, 5860 gen_helper_sve_ldffhss_le_zsu, 5861 NULL, }, 5862 { gen_helper_sve_ldffbsu_zsu, 5863 gen_helper_sve_ldffhsu_le_zsu, 5864 gen_helper_sve_ldffss_le_zsu, } }, 5865 { { gen_helper_sve_ldffbss_zss, 5866 gen_helper_sve_ldffhss_le_zss, 5867 NULL, }, 5868 { gen_helper_sve_ldffbsu_zss, 5869 gen_helper_sve_ldffhsu_le_zss, 5870 gen_helper_sve_ldffss_le_zss, } } } }, 5871 5872 { /* Big-endian */ 5873 { { { gen_helper_sve_ldbss_zsu, 5874 gen_helper_sve_ldhss_be_zsu, 5875 NULL, }, 5876 { gen_helper_sve_ldbsu_zsu, 5877 gen_helper_sve_ldhsu_be_zsu, 5878 gen_helper_sve_ldss_be_zsu, } }, 5879 { { gen_helper_sve_ldbss_zss, 5880 gen_helper_sve_ldhss_be_zss, 5881 NULL, }, 5882 { gen_helper_sve_ldbsu_zss, 5883 gen_helper_sve_ldhsu_be_zss, 5884 gen_helper_sve_ldss_be_zss, } } }, 5885 5886 /* First-fault */ 5887 { { { gen_helper_sve_ldffbss_zsu, 5888 gen_helper_sve_ldffhss_be_zsu, 5889 NULL, }, 5890 { gen_helper_sve_ldffbsu_zsu, 5891 gen_helper_sve_ldffhsu_be_zsu, 5892 gen_helper_sve_ldffss_be_zsu, } }, 5893 { { gen_helper_sve_ldffbss_zss, 5894 gen_helper_sve_ldffhss_be_zss, 5895 NULL, }, 5896 { gen_helper_sve_ldffbsu_zss, 5897 gen_helper_sve_ldffhsu_be_zss, 5898 gen_helper_sve_ldffss_be_zss, } } } } }, 5899 { /* MTE Active */ 5900 { /* Little-endian */ 5901 { { { gen_helper_sve_ldbss_zsu_mte, 5902 gen_helper_sve_ldhss_le_zsu_mte, 5903 NULL, }, 5904 { gen_helper_sve_ldbsu_zsu_mte, 5905 gen_helper_sve_ldhsu_le_zsu_mte, 5906 gen_helper_sve_ldss_le_zsu_mte, } }, 5907 { { gen_helper_sve_ldbss_zss_mte, 5908 gen_helper_sve_ldhss_le_zss_mte, 5909 NULL, }, 5910 { gen_helper_sve_ldbsu_zss_mte, 5911 gen_helper_sve_ldhsu_le_zss_mte, 5912 gen_helper_sve_ldss_le_zss_mte, } } }, 5913 5914 /* First-fault */ 5915 { { { gen_helper_sve_ldffbss_zsu_mte, 5916 gen_helper_sve_ldffhss_le_zsu_mte, 5917 NULL, }, 5918 { gen_helper_sve_ldffbsu_zsu_mte, 5919 gen_helper_sve_ldffhsu_le_zsu_mte, 5920 gen_helper_sve_ldffss_le_zsu_mte, } }, 5921 { { gen_helper_sve_ldffbss_zss_mte, 5922 gen_helper_sve_ldffhss_le_zss_mte, 5923 NULL, }, 5924 { gen_helper_sve_ldffbsu_zss_mte, 5925 gen_helper_sve_ldffhsu_le_zss_mte, 5926 gen_helper_sve_ldffss_le_zss_mte, } } } }, 5927 5928 { /* Big-endian */ 5929 { { { gen_helper_sve_ldbss_zsu_mte, 5930 gen_helper_sve_ldhss_be_zsu_mte, 5931 NULL, }, 5932 { gen_helper_sve_ldbsu_zsu_mte, 5933 gen_helper_sve_ldhsu_be_zsu_mte, 5934 gen_helper_sve_ldss_be_zsu_mte, } }, 5935 { { gen_helper_sve_ldbss_zss_mte, 5936 gen_helper_sve_ldhss_be_zss_mte, 5937 NULL, }, 5938 { gen_helper_sve_ldbsu_zss_mte, 5939 gen_helper_sve_ldhsu_be_zss_mte, 5940 gen_helper_sve_ldss_be_zss_mte, } } }, 5941 5942 /* First-fault */ 5943 { { { gen_helper_sve_ldffbss_zsu_mte, 5944 gen_helper_sve_ldffhss_be_zsu_mte, 5945 NULL, }, 5946 { gen_helper_sve_ldffbsu_zsu_mte, 5947 gen_helper_sve_ldffhsu_be_zsu_mte, 5948 gen_helper_sve_ldffss_be_zsu_mte, } }, 5949 { { gen_helper_sve_ldffbss_zss_mte, 5950 gen_helper_sve_ldffhss_be_zss_mte, 5951 NULL, }, 5952 { gen_helper_sve_ldffbsu_zss_mte, 5953 gen_helper_sve_ldffhsu_be_zss_mte, 5954 gen_helper_sve_ldffss_be_zss_mte, } } } } }, 5955 }; 5956 5957 /* Note that we overload xs=2 to indicate 64-bit offset. */ 5958 static gen_helper_gvec_mem_scatter * const 5959 gather_load_fn64[2][2][2][3][2][4] = { 5960 { /* MTE Inactive */ 5961 { /* Little-endian */ 5962 { { { gen_helper_sve_ldbds_zsu, 5963 gen_helper_sve_ldhds_le_zsu, 5964 gen_helper_sve_ldsds_le_zsu, 5965 NULL, }, 5966 { gen_helper_sve_ldbdu_zsu, 5967 gen_helper_sve_ldhdu_le_zsu, 5968 gen_helper_sve_ldsdu_le_zsu, 5969 gen_helper_sve_lddd_le_zsu, } }, 5970 { { gen_helper_sve_ldbds_zss, 5971 gen_helper_sve_ldhds_le_zss, 5972 gen_helper_sve_ldsds_le_zss, 5973 NULL, }, 5974 { gen_helper_sve_ldbdu_zss, 5975 gen_helper_sve_ldhdu_le_zss, 5976 gen_helper_sve_ldsdu_le_zss, 5977 gen_helper_sve_lddd_le_zss, } }, 5978 { { gen_helper_sve_ldbds_zd, 5979 gen_helper_sve_ldhds_le_zd, 5980 gen_helper_sve_ldsds_le_zd, 5981 NULL, }, 5982 { gen_helper_sve_ldbdu_zd, 5983 gen_helper_sve_ldhdu_le_zd, 5984 gen_helper_sve_ldsdu_le_zd, 5985 gen_helper_sve_lddd_le_zd, } } }, 5986 5987 /* First-fault */ 5988 { { { gen_helper_sve_ldffbds_zsu, 5989 gen_helper_sve_ldffhds_le_zsu, 5990 gen_helper_sve_ldffsds_le_zsu, 5991 NULL, }, 5992 { gen_helper_sve_ldffbdu_zsu, 5993 gen_helper_sve_ldffhdu_le_zsu, 5994 gen_helper_sve_ldffsdu_le_zsu, 5995 gen_helper_sve_ldffdd_le_zsu, } }, 5996 { { gen_helper_sve_ldffbds_zss, 5997 gen_helper_sve_ldffhds_le_zss, 5998 gen_helper_sve_ldffsds_le_zss, 5999 NULL, }, 6000 { gen_helper_sve_ldffbdu_zss, 6001 gen_helper_sve_ldffhdu_le_zss, 6002 gen_helper_sve_ldffsdu_le_zss, 6003 gen_helper_sve_ldffdd_le_zss, } }, 6004 { { gen_helper_sve_ldffbds_zd, 6005 gen_helper_sve_ldffhds_le_zd, 6006 gen_helper_sve_ldffsds_le_zd, 6007 NULL, }, 6008 { gen_helper_sve_ldffbdu_zd, 6009 gen_helper_sve_ldffhdu_le_zd, 6010 gen_helper_sve_ldffsdu_le_zd, 6011 gen_helper_sve_ldffdd_le_zd, } } } }, 6012 { /* Big-endian */ 6013 { { { gen_helper_sve_ldbds_zsu, 6014 gen_helper_sve_ldhds_be_zsu, 6015 gen_helper_sve_ldsds_be_zsu, 6016 NULL, }, 6017 { gen_helper_sve_ldbdu_zsu, 6018 gen_helper_sve_ldhdu_be_zsu, 6019 gen_helper_sve_ldsdu_be_zsu, 6020 gen_helper_sve_lddd_be_zsu, } }, 6021 { { gen_helper_sve_ldbds_zss, 6022 gen_helper_sve_ldhds_be_zss, 6023 gen_helper_sve_ldsds_be_zss, 6024 NULL, }, 6025 { gen_helper_sve_ldbdu_zss, 6026 gen_helper_sve_ldhdu_be_zss, 6027 gen_helper_sve_ldsdu_be_zss, 6028 gen_helper_sve_lddd_be_zss, } }, 6029 { { gen_helper_sve_ldbds_zd, 6030 gen_helper_sve_ldhds_be_zd, 6031 gen_helper_sve_ldsds_be_zd, 6032 NULL, }, 6033 { gen_helper_sve_ldbdu_zd, 6034 gen_helper_sve_ldhdu_be_zd, 6035 gen_helper_sve_ldsdu_be_zd, 6036 gen_helper_sve_lddd_be_zd, } } }, 6037 6038 /* First-fault */ 6039 { { { gen_helper_sve_ldffbds_zsu, 6040 gen_helper_sve_ldffhds_be_zsu, 6041 gen_helper_sve_ldffsds_be_zsu, 6042 NULL, }, 6043 { gen_helper_sve_ldffbdu_zsu, 6044 gen_helper_sve_ldffhdu_be_zsu, 6045 gen_helper_sve_ldffsdu_be_zsu, 6046 gen_helper_sve_ldffdd_be_zsu, } }, 6047 { { gen_helper_sve_ldffbds_zss, 6048 gen_helper_sve_ldffhds_be_zss, 6049 gen_helper_sve_ldffsds_be_zss, 6050 NULL, }, 6051 { gen_helper_sve_ldffbdu_zss, 6052 gen_helper_sve_ldffhdu_be_zss, 6053 gen_helper_sve_ldffsdu_be_zss, 6054 gen_helper_sve_ldffdd_be_zss, } }, 6055 { { gen_helper_sve_ldffbds_zd, 6056 gen_helper_sve_ldffhds_be_zd, 6057 gen_helper_sve_ldffsds_be_zd, 6058 NULL, }, 6059 { gen_helper_sve_ldffbdu_zd, 6060 gen_helper_sve_ldffhdu_be_zd, 6061 gen_helper_sve_ldffsdu_be_zd, 6062 gen_helper_sve_ldffdd_be_zd, } } } } }, 6063 { /* MTE Active */ 6064 { /* Little-endian */ 6065 { { { gen_helper_sve_ldbds_zsu_mte, 6066 gen_helper_sve_ldhds_le_zsu_mte, 6067 gen_helper_sve_ldsds_le_zsu_mte, 6068 NULL, }, 6069 { gen_helper_sve_ldbdu_zsu_mte, 6070 gen_helper_sve_ldhdu_le_zsu_mte, 6071 gen_helper_sve_ldsdu_le_zsu_mte, 6072 gen_helper_sve_lddd_le_zsu_mte, } }, 6073 { { gen_helper_sve_ldbds_zss_mte, 6074 gen_helper_sve_ldhds_le_zss_mte, 6075 gen_helper_sve_ldsds_le_zss_mte, 6076 NULL, }, 6077 { gen_helper_sve_ldbdu_zss_mte, 6078 gen_helper_sve_ldhdu_le_zss_mte, 6079 gen_helper_sve_ldsdu_le_zss_mte, 6080 gen_helper_sve_lddd_le_zss_mte, } }, 6081 { { gen_helper_sve_ldbds_zd_mte, 6082 gen_helper_sve_ldhds_le_zd_mte, 6083 gen_helper_sve_ldsds_le_zd_mte, 6084 NULL, }, 6085 { gen_helper_sve_ldbdu_zd_mte, 6086 gen_helper_sve_ldhdu_le_zd_mte, 6087 gen_helper_sve_ldsdu_le_zd_mte, 6088 gen_helper_sve_lddd_le_zd_mte, } } }, 6089 6090 /* First-fault */ 6091 { { { gen_helper_sve_ldffbds_zsu_mte, 6092 gen_helper_sve_ldffhds_le_zsu_mte, 6093 gen_helper_sve_ldffsds_le_zsu_mte, 6094 NULL, }, 6095 { gen_helper_sve_ldffbdu_zsu_mte, 6096 gen_helper_sve_ldffhdu_le_zsu_mte, 6097 gen_helper_sve_ldffsdu_le_zsu_mte, 6098 gen_helper_sve_ldffdd_le_zsu_mte, } }, 6099 { { gen_helper_sve_ldffbds_zss_mte, 6100 gen_helper_sve_ldffhds_le_zss_mte, 6101 gen_helper_sve_ldffsds_le_zss_mte, 6102 NULL, }, 6103 { gen_helper_sve_ldffbdu_zss_mte, 6104 gen_helper_sve_ldffhdu_le_zss_mte, 6105 gen_helper_sve_ldffsdu_le_zss_mte, 6106 gen_helper_sve_ldffdd_le_zss_mte, } }, 6107 { { gen_helper_sve_ldffbds_zd_mte, 6108 gen_helper_sve_ldffhds_le_zd_mte, 6109 gen_helper_sve_ldffsds_le_zd_mte, 6110 NULL, }, 6111 { gen_helper_sve_ldffbdu_zd_mte, 6112 gen_helper_sve_ldffhdu_le_zd_mte, 6113 gen_helper_sve_ldffsdu_le_zd_mte, 6114 gen_helper_sve_ldffdd_le_zd_mte, } } } }, 6115 { /* Big-endian */ 6116 { { { gen_helper_sve_ldbds_zsu_mte, 6117 gen_helper_sve_ldhds_be_zsu_mte, 6118 gen_helper_sve_ldsds_be_zsu_mte, 6119 NULL, }, 6120 { gen_helper_sve_ldbdu_zsu_mte, 6121 gen_helper_sve_ldhdu_be_zsu_mte, 6122 gen_helper_sve_ldsdu_be_zsu_mte, 6123 gen_helper_sve_lddd_be_zsu_mte, } }, 6124 { { gen_helper_sve_ldbds_zss_mte, 6125 gen_helper_sve_ldhds_be_zss_mte, 6126 gen_helper_sve_ldsds_be_zss_mte, 6127 NULL, }, 6128 { gen_helper_sve_ldbdu_zss_mte, 6129 gen_helper_sve_ldhdu_be_zss_mte, 6130 gen_helper_sve_ldsdu_be_zss_mte, 6131 gen_helper_sve_lddd_be_zss_mte, } }, 6132 { { gen_helper_sve_ldbds_zd_mte, 6133 gen_helper_sve_ldhds_be_zd_mte, 6134 gen_helper_sve_ldsds_be_zd_mte, 6135 NULL, }, 6136 { gen_helper_sve_ldbdu_zd_mte, 6137 gen_helper_sve_ldhdu_be_zd_mte, 6138 gen_helper_sve_ldsdu_be_zd_mte, 6139 gen_helper_sve_lddd_be_zd_mte, } } }, 6140 6141 /* First-fault */ 6142 { { { gen_helper_sve_ldffbds_zsu_mte, 6143 gen_helper_sve_ldffhds_be_zsu_mte, 6144 gen_helper_sve_ldffsds_be_zsu_mte, 6145 NULL, }, 6146 { gen_helper_sve_ldffbdu_zsu_mte, 6147 gen_helper_sve_ldffhdu_be_zsu_mte, 6148 gen_helper_sve_ldffsdu_be_zsu_mte, 6149 gen_helper_sve_ldffdd_be_zsu_mte, } }, 6150 { { gen_helper_sve_ldffbds_zss_mte, 6151 gen_helper_sve_ldffhds_be_zss_mte, 6152 gen_helper_sve_ldffsds_be_zss_mte, 6153 NULL, }, 6154 { gen_helper_sve_ldffbdu_zss_mte, 6155 gen_helper_sve_ldffhdu_be_zss_mte, 6156 gen_helper_sve_ldffsdu_be_zss_mte, 6157 gen_helper_sve_ldffdd_be_zss_mte, } }, 6158 { { gen_helper_sve_ldffbds_zd_mte, 6159 gen_helper_sve_ldffhds_be_zd_mte, 6160 gen_helper_sve_ldffsds_be_zd_mte, 6161 NULL, }, 6162 { gen_helper_sve_ldffbdu_zd_mte, 6163 gen_helper_sve_ldffhdu_be_zd_mte, 6164 gen_helper_sve_ldffsdu_be_zd_mte, 6165 gen_helper_sve_ldffdd_be_zd_mte, } } } } }, 6166 }; 6167 6168 static gen_helper_gvec_mem_scatter * const 6169 gather_load_fn128[2][2] = { 6170 { gen_helper_sve_ldqq_le_zd, 6171 gen_helper_sve_ldqq_be_zd }, 6172 { gen_helper_sve_ldqq_le_zd_mte, 6173 gen_helper_sve_ldqq_be_zd_mte } 6174 }; 6175 6176 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) 6177 { 6178 gen_helper_gvec_mem_scatter *fn = NULL; 6179 bool be = s->be_data == MO_BE; 6180 bool mte = s->mte_active[0]; 6181 6182 if (!dc_isar_feature(aa64_sve, s)) { 6183 return false; 6184 } 6185 s->is_nonstreaming = true; 6186 if (!sve_access_check(s)) { 6187 return true; 6188 } 6189 6190 switch (a->esz) { 6191 case MO_32: 6192 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz]; 6193 break; 6194 case MO_64: 6195 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz]; 6196 break; 6197 default: 6198 g_assert_not_reached(); 6199 } 6200 assert(fn != NULL); 6201 6202 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 6203 cpu_reg_sp(s, a->rn), a->msz, false, fn); 6204 return true; 6205 } 6206 6207 static bool trans_LD1Q(DisasContext *s, arg_LD1Q *a) 6208 { 6209 gen_helper_gvec_mem_scatter *fn = NULL; 6210 bool be = s->be_data == MO_BE; 6211 bool mte = s->mte_active[0]; 6212 6213 if (!dc_isar_feature(aa64_sve2p1, s)) { 6214 return false; 6215 } 6216 s->is_nonstreaming = true; 6217 if (!sve_access_check(s)) { 6218 return true; 6219 } 6220 6221 fn = gather_load_fn128[mte][be]; 6222 assert(fn != NULL); 6223 6224 /* 6225 * Unlike LD1_zprz, a->rm is the scalar register and it can be XZR, not XSP. 6226 * a->rn is the vector register. 6227 */ 6228 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 6229 cpu_reg(s, a->rm), MO_128, false, fn); 6230 return true; 6231 } 6232 6233 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) 6234 { 6235 gen_helper_gvec_mem_scatter *fn = NULL; 6236 bool be = s->be_data == MO_BE; 6237 bool mte = s->mte_active[0]; 6238 6239 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) { 6240 return false; 6241 } 6242 if (!dc_isar_feature(aa64_sve, s)) { 6243 return false; 6244 } 6245 s->is_nonstreaming = true; 6246 if (!sve_access_check(s)) { 6247 return true; 6248 } 6249 6250 switch (a->esz) { 6251 case MO_32: 6252 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz]; 6253 break; 6254 case MO_64: 6255 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz]; 6256 break; 6257 } 6258 assert(fn != NULL); 6259 6260 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x]) 6261 * by loading the immediate into the scalar parameter. 6262 */ 6263 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 6264 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn); 6265 return true; 6266 } 6267 6268 static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a) 6269 { 6270 gen_helper_gvec_mem_scatter *fn = NULL; 6271 bool be = s->be_data == MO_BE; 6272 bool mte = s->mte_active[0]; 6273 6274 if (a->esz < a->msz + !a->u) { 6275 return false; 6276 } 6277 if (!dc_isar_feature(aa64_sve2, s)) { 6278 return false; 6279 } 6280 s->is_nonstreaming = true; 6281 if (!sve_access_check(s)) { 6282 return true; 6283 } 6284 6285 switch (a->esz) { 6286 case MO_32: 6287 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz]; 6288 break; 6289 case MO_64: 6290 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz]; 6291 break; 6292 } 6293 assert(fn != NULL); 6294 6295 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 6296 cpu_reg(s, a->rm), a->msz, false, fn); 6297 return true; 6298 } 6299 6300 /* Indexed by [mte][be][xs][msz]. */ 6301 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = { 6302 { /* MTE Inactive */ 6303 { /* Little-endian */ 6304 { gen_helper_sve_stbs_zsu, 6305 gen_helper_sve_sths_le_zsu, 6306 gen_helper_sve_stss_le_zsu, }, 6307 { gen_helper_sve_stbs_zss, 6308 gen_helper_sve_sths_le_zss, 6309 gen_helper_sve_stss_le_zss, } }, 6310 { /* Big-endian */ 6311 { gen_helper_sve_stbs_zsu, 6312 gen_helper_sve_sths_be_zsu, 6313 gen_helper_sve_stss_be_zsu, }, 6314 { gen_helper_sve_stbs_zss, 6315 gen_helper_sve_sths_be_zss, 6316 gen_helper_sve_stss_be_zss, } } }, 6317 { /* MTE Active */ 6318 { /* Little-endian */ 6319 { gen_helper_sve_stbs_zsu_mte, 6320 gen_helper_sve_sths_le_zsu_mte, 6321 gen_helper_sve_stss_le_zsu_mte, }, 6322 { gen_helper_sve_stbs_zss_mte, 6323 gen_helper_sve_sths_le_zss_mte, 6324 gen_helper_sve_stss_le_zss_mte, } }, 6325 { /* Big-endian */ 6326 { gen_helper_sve_stbs_zsu_mte, 6327 gen_helper_sve_sths_be_zsu_mte, 6328 gen_helper_sve_stss_be_zsu_mte, }, 6329 { gen_helper_sve_stbs_zss_mte, 6330 gen_helper_sve_sths_be_zss_mte, 6331 gen_helper_sve_stss_be_zss_mte, } } }, 6332 }; 6333 6334 /* Note that we overload xs=2 to indicate 64-bit offset. */ 6335 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = { 6336 { /* MTE Inactive */ 6337 { /* Little-endian */ 6338 { gen_helper_sve_stbd_zsu, 6339 gen_helper_sve_sthd_le_zsu, 6340 gen_helper_sve_stsd_le_zsu, 6341 gen_helper_sve_stdd_le_zsu, }, 6342 { gen_helper_sve_stbd_zss, 6343 gen_helper_sve_sthd_le_zss, 6344 gen_helper_sve_stsd_le_zss, 6345 gen_helper_sve_stdd_le_zss, }, 6346 { gen_helper_sve_stbd_zd, 6347 gen_helper_sve_sthd_le_zd, 6348 gen_helper_sve_stsd_le_zd, 6349 gen_helper_sve_stdd_le_zd, } }, 6350 { /* Big-endian */ 6351 { gen_helper_sve_stbd_zsu, 6352 gen_helper_sve_sthd_be_zsu, 6353 gen_helper_sve_stsd_be_zsu, 6354 gen_helper_sve_stdd_be_zsu, }, 6355 { gen_helper_sve_stbd_zss, 6356 gen_helper_sve_sthd_be_zss, 6357 gen_helper_sve_stsd_be_zss, 6358 gen_helper_sve_stdd_be_zss, }, 6359 { gen_helper_sve_stbd_zd, 6360 gen_helper_sve_sthd_be_zd, 6361 gen_helper_sve_stsd_be_zd, 6362 gen_helper_sve_stdd_be_zd, } } }, 6363 { /* MTE Inactive */ 6364 { /* Little-endian */ 6365 { gen_helper_sve_stbd_zsu_mte, 6366 gen_helper_sve_sthd_le_zsu_mte, 6367 gen_helper_sve_stsd_le_zsu_mte, 6368 gen_helper_sve_stdd_le_zsu_mte, }, 6369 { gen_helper_sve_stbd_zss_mte, 6370 gen_helper_sve_sthd_le_zss_mte, 6371 gen_helper_sve_stsd_le_zss_mte, 6372 gen_helper_sve_stdd_le_zss_mte, }, 6373 { gen_helper_sve_stbd_zd_mte, 6374 gen_helper_sve_sthd_le_zd_mte, 6375 gen_helper_sve_stsd_le_zd_mte, 6376 gen_helper_sve_stdd_le_zd_mte, } }, 6377 { /* Big-endian */ 6378 { gen_helper_sve_stbd_zsu_mte, 6379 gen_helper_sve_sthd_be_zsu_mte, 6380 gen_helper_sve_stsd_be_zsu_mte, 6381 gen_helper_sve_stdd_be_zsu_mte, }, 6382 { gen_helper_sve_stbd_zss_mte, 6383 gen_helper_sve_sthd_be_zss_mte, 6384 gen_helper_sve_stsd_be_zss_mte, 6385 gen_helper_sve_stdd_be_zss_mte, }, 6386 { gen_helper_sve_stbd_zd_mte, 6387 gen_helper_sve_sthd_be_zd_mte, 6388 gen_helper_sve_stsd_be_zd_mte, 6389 gen_helper_sve_stdd_be_zd_mte, } } }, 6390 }; 6391 6392 static gen_helper_gvec_mem_scatter * const 6393 scatter_store_fn128[2][2] = { 6394 { gen_helper_sve_stqq_le_zd, 6395 gen_helper_sve_stqq_be_zd }, 6396 { gen_helper_sve_stqq_le_zd_mte, 6397 gen_helper_sve_stqq_be_zd_mte } 6398 }; 6399 6400 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) 6401 { 6402 gen_helper_gvec_mem_scatter *fn; 6403 bool be = s->be_data == MO_BE; 6404 bool mte = s->mte_active[0]; 6405 6406 if (a->esz < a->msz || (a->msz == 0 && a->scale)) { 6407 return false; 6408 } 6409 if (!dc_isar_feature(aa64_sve, s)) { 6410 return false; 6411 } 6412 s->is_nonstreaming = true; 6413 if (!sve_access_check(s)) { 6414 return true; 6415 } 6416 switch (a->esz) { 6417 case MO_32: 6418 fn = scatter_store_fn32[mte][be][a->xs][a->msz]; 6419 break; 6420 case MO_64: 6421 fn = scatter_store_fn64[mte][be][a->xs][a->msz]; 6422 break; 6423 default: 6424 g_assert_not_reached(); 6425 } 6426 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 6427 cpu_reg_sp(s, a->rn), a->msz, true, fn); 6428 return true; 6429 } 6430 6431 static bool trans_ST1Q(DisasContext *s, arg_ST1Q *a) 6432 { 6433 gen_helper_gvec_mem_scatter *fn; 6434 bool be = s->be_data == MO_BE; 6435 bool mte = s->mte_active[0]; 6436 6437 if (!dc_isar_feature(aa64_sve2p1, s)) { 6438 return false; 6439 } 6440 s->is_nonstreaming = true; 6441 if (!sve_access_check(s)) { 6442 return true; 6443 } 6444 fn = scatter_store_fn128[mte][be]; 6445 /* 6446 * Unlike ST1_zprz, a->rm is the scalar register, and it 6447 * can be XZR, not XSP. a->rn is the vector register. 6448 */ 6449 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 6450 cpu_reg(s, a->rm), MO_128, true, fn); 6451 return true; 6452 } 6453 6454 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) 6455 { 6456 gen_helper_gvec_mem_scatter *fn = NULL; 6457 bool be = s->be_data == MO_BE; 6458 bool mte = s->mte_active[0]; 6459 6460 if (a->esz < a->msz) { 6461 return false; 6462 } 6463 if (!dc_isar_feature(aa64_sve, s)) { 6464 return false; 6465 } 6466 s->is_nonstreaming = true; 6467 if (!sve_access_check(s)) { 6468 return true; 6469 } 6470 6471 switch (a->esz) { 6472 case MO_32: 6473 fn = scatter_store_fn32[mte][be][0][a->msz]; 6474 break; 6475 case MO_64: 6476 fn = scatter_store_fn64[mte][be][2][a->msz]; 6477 break; 6478 } 6479 assert(fn != NULL); 6480 6481 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x]) 6482 * by loading the immediate into the scalar parameter. 6483 */ 6484 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 6485 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn); 6486 return true; 6487 } 6488 6489 static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a) 6490 { 6491 gen_helper_gvec_mem_scatter *fn; 6492 bool be = s->be_data == MO_BE; 6493 bool mte = s->mte_active[0]; 6494 6495 if (a->esz < a->msz) { 6496 return false; 6497 } 6498 if (!dc_isar_feature(aa64_sve2, s)) { 6499 return false; 6500 } 6501 s->is_nonstreaming = true; 6502 if (!sve_access_check(s)) { 6503 return true; 6504 } 6505 6506 switch (a->esz) { 6507 case MO_32: 6508 fn = scatter_store_fn32[mte][be][0][a->msz]; 6509 break; 6510 case MO_64: 6511 fn = scatter_store_fn64[mte][be][2][a->msz]; 6512 break; 6513 default: 6514 g_assert_not_reached(); 6515 } 6516 6517 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 6518 cpu_reg(s, a->rm), a->msz, true, fn); 6519 return true; 6520 } 6521 6522 /* 6523 * Prefetches 6524 */ 6525 6526 static bool trans_PRF(DisasContext *s, arg_PRF *a) 6527 { 6528 if (!dc_isar_feature(aa64_sve, s)) { 6529 return false; 6530 } 6531 /* Prefetch is a nop within QEMU. */ 6532 (void)sve_access_check(s); 6533 return true; 6534 } 6535 6536 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a) 6537 { 6538 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 6539 return false; 6540 } 6541 /* Prefetch is a nop within QEMU. */ 6542 (void)sve_access_check(s); 6543 return true; 6544 } 6545 6546 static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a) 6547 { 6548 if (!dc_isar_feature(aa64_sve, s)) { 6549 return false; 6550 } 6551 /* Prefetch is a nop within QEMU. */ 6552 s->is_nonstreaming = true; 6553 (void)sve_access_check(s); 6554 return true; 6555 } 6556 6557 /* 6558 * Move Prefix 6559 * 6560 * TODO: The implementation so far could handle predicated merging movprfx. 6561 * The helper functions as written take an extra source register to 6562 * use in the operation, but the result is only written when predication 6563 * succeeds. For unpredicated movprfx, we need to rearrange the helpers 6564 * to allow the final write back to the destination to be unconditional. 6565 * For predicated zeroing movprfx, we need to rearrange the helpers to 6566 * allow the final write back to zero inactives. 6567 * 6568 * In the meantime, just emit the moves. 6569 */ 6570 6571 TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn) 6572 TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz) 6573 TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false) 6574 6575 /* 6576 * SVE2 Integer Multiply - Unpredicated 6577 */ 6578 6579 TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a) 6580 TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_sve2_sqdmulh, a) 6581 6582 static gen_helper_gvec_3 * const smulh_zzz_fns[4] = { 6583 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h, 6584 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d, 6585 }; 6586 TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6587 smulh_zzz_fns[a->esz], a, 0) 6588 6589 static gen_helper_gvec_3 * const umulh_zzz_fns[4] = { 6590 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h, 6591 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d, 6592 }; 6593 TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6594 umulh_zzz_fns[a->esz], a, 0) 6595 6596 TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6597 gen_helper_gvec_pmul_b, a, 0) 6598 6599 static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = { 6600 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h, 6601 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d, 6602 }; 6603 TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6604 sqrdmulh_zzz_fns[a->esz], a, 0) 6605 6606 /* 6607 * SVE2 Integer - Predicated 6608 */ 6609 6610 static gen_helper_gvec_4 * const sadlp_fns[4] = { 6611 NULL, gen_helper_sve2_sadalp_zpzz_h, 6612 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d, 6613 }; 6614 TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 6615 sadlp_fns[a->esz], a, 0) 6616 6617 static gen_helper_gvec_4 * const uadlp_fns[4] = { 6618 NULL, gen_helper_sve2_uadalp_zpzz_h, 6619 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d, 6620 }; 6621 TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 6622 uadlp_fns[a->esz], a, 0) 6623 6624 /* 6625 * SVE2 integer unary operations (predicated) 6626 */ 6627 6628 TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz, 6629 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0) 6630 6631 TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz, 6632 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0) 6633 6634 static gen_helper_gvec_3 * const sqabs_fns[4] = { 6635 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h, 6636 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d, 6637 }; 6638 TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0) 6639 6640 static gen_helper_gvec_3 * const sqneg_fns[4] = { 6641 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h, 6642 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d, 6643 }; 6644 TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0) 6645 6646 DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl) 6647 DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl) 6648 DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl) 6649 6650 DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl) 6651 DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl) 6652 DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl) 6653 6654 DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd) 6655 DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd) 6656 DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub) 6657 6658 DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd) 6659 DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd) 6660 DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub) 6661 6662 DO_ZPZZ(ADDP, aa64_sve2, sve2_addp) 6663 DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp) 6664 DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp) 6665 DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp) 6666 DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp) 6667 6668 DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd) 6669 DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd) 6670 DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub) 6671 DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub) 6672 DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd) 6673 DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd) 6674 6675 /* 6676 * SVE2 Widening Integer Arithmetic 6677 */ 6678 6679 static gen_helper_gvec_3 * const saddl_fns[4] = { 6680 NULL, gen_helper_sve2_saddl_h, 6681 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d, 6682 }; 6683 TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6684 saddl_fns[a->esz], a, 0) 6685 TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6686 saddl_fns[a->esz], a, 3) 6687 TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 6688 saddl_fns[a->esz], a, 2) 6689 6690 static gen_helper_gvec_3 * const ssubl_fns[4] = { 6691 NULL, gen_helper_sve2_ssubl_h, 6692 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d, 6693 }; 6694 TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6695 ssubl_fns[a->esz], a, 0) 6696 TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6697 ssubl_fns[a->esz], a, 3) 6698 TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 6699 ssubl_fns[a->esz], a, 2) 6700 TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz, 6701 ssubl_fns[a->esz], a, 1) 6702 6703 static gen_helper_gvec_3 * const sabdl_fns[4] = { 6704 NULL, gen_helper_sve2_sabdl_h, 6705 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d, 6706 }; 6707 TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6708 sabdl_fns[a->esz], a, 0) 6709 TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6710 sabdl_fns[a->esz], a, 3) 6711 6712 static gen_helper_gvec_3 * const uaddl_fns[4] = { 6713 NULL, gen_helper_sve2_uaddl_h, 6714 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d, 6715 }; 6716 TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6717 uaddl_fns[a->esz], a, 0) 6718 TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6719 uaddl_fns[a->esz], a, 3) 6720 6721 static gen_helper_gvec_3 * const usubl_fns[4] = { 6722 NULL, gen_helper_sve2_usubl_h, 6723 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d, 6724 }; 6725 TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6726 usubl_fns[a->esz], a, 0) 6727 TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6728 usubl_fns[a->esz], a, 3) 6729 6730 static gen_helper_gvec_3 * const uabdl_fns[4] = { 6731 NULL, gen_helper_sve2_uabdl_h, 6732 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d, 6733 }; 6734 TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6735 uabdl_fns[a->esz], a, 0) 6736 TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6737 uabdl_fns[a->esz], a, 3) 6738 6739 static gen_helper_gvec_3 * const sqdmull_fns[4] = { 6740 NULL, gen_helper_sve2_sqdmull_zzz_h, 6741 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d, 6742 }; 6743 TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6744 sqdmull_fns[a->esz], a, 0) 6745 TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6746 sqdmull_fns[a->esz], a, 3) 6747 6748 static gen_helper_gvec_3 * const smull_fns[4] = { 6749 NULL, gen_helper_sve2_smull_zzz_h, 6750 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d, 6751 }; 6752 TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6753 smull_fns[a->esz], a, 0) 6754 TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6755 smull_fns[a->esz], a, 3) 6756 6757 static gen_helper_gvec_3 * const umull_fns[4] = { 6758 NULL, gen_helper_sve2_umull_zzz_h, 6759 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d, 6760 }; 6761 TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6762 umull_fns[a->esz], a, 0) 6763 TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6764 umull_fns[a->esz], a, 3) 6765 6766 static gen_helper_gvec_3 * const eoril_fns[4] = { 6767 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h, 6768 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d, 6769 }; 6770 TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2) 6771 TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1) 6772 6773 static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel) 6774 { 6775 static gen_helper_gvec_3 * const fns[4] = { 6776 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h, 6777 NULL, gen_helper_sve2_pmull_d, 6778 }; 6779 6780 if (a->esz == 0) { 6781 if (!dc_isar_feature(aa64_sve2_pmull128, s)) { 6782 return false; 6783 } 6784 s->is_nonstreaming = true; 6785 } else if (!dc_isar_feature(aa64_sve, s)) { 6786 return false; 6787 } 6788 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel); 6789 } 6790 6791 TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false) 6792 TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true) 6793 6794 static gen_helper_gvec_3 * const saddw_fns[4] = { 6795 NULL, gen_helper_sve2_saddw_h, 6796 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d, 6797 }; 6798 TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0) 6799 TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1) 6800 6801 static gen_helper_gvec_3 * const ssubw_fns[4] = { 6802 NULL, gen_helper_sve2_ssubw_h, 6803 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d, 6804 }; 6805 TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0) 6806 TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1) 6807 6808 static gen_helper_gvec_3 * const uaddw_fns[4] = { 6809 NULL, gen_helper_sve2_uaddw_h, 6810 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d, 6811 }; 6812 TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0) 6813 TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1) 6814 6815 static gen_helper_gvec_3 * const usubw_fns[4] = { 6816 NULL, gen_helper_sve2_usubw_h, 6817 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d, 6818 }; 6819 TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0) 6820 TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1) 6821 6822 static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6823 { 6824 int top = imm & 1; 6825 int shl = imm >> 1; 6826 int halfbits = 4 << vece; 6827 6828 if (top) { 6829 if (shl == halfbits) { 6830 tcg_gen_and_vec(vece, d, n, 6831 tcg_constant_vec_matching(d, vece, 6832 MAKE_64BIT_MASK(halfbits, halfbits))); 6833 } else { 6834 tcg_gen_sari_vec(vece, d, n, halfbits); 6835 tcg_gen_shli_vec(vece, d, d, shl); 6836 } 6837 } else { 6838 tcg_gen_shli_vec(vece, d, n, halfbits); 6839 tcg_gen_sari_vec(vece, d, d, halfbits - shl); 6840 } 6841 } 6842 6843 static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm) 6844 { 6845 int halfbits = 4 << vece; 6846 int top = imm & 1; 6847 int shl = (imm >> 1); 6848 int shift; 6849 uint64_t mask; 6850 6851 mask = MAKE_64BIT_MASK(0, halfbits); 6852 mask <<= shl; 6853 mask = dup_const(vece, mask); 6854 6855 shift = shl - top * halfbits; 6856 if (shift < 0) { 6857 tcg_gen_shri_i64(d, n, -shift); 6858 } else { 6859 tcg_gen_shli_i64(d, n, shift); 6860 } 6861 tcg_gen_andi_i64(d, d, mask); 6862 } 6863 6864 static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6865 { 6866 gen_ushll_i64(MO_16, d, n, imm); 6867 } 6868 6869 static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6870 { 6871 gen_ushll_i64(MO_32, d, n, imm); 6872 } 6873 6874 static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6875 { 6876 gen_ushll_i64(MO_64, d, n, imm); 6877 } 6878 6879 static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6880 { 6881 int halfbits = 4 << vece; 6882 int top = imm & 1; 6883 int shl = imm >> 1; 6884 6885 if (top) { 6886 if (shl == halfbits) { 6887 tcg_gen_and_vec(vece, d, n, 6888 tcg_constant_vec_matching(d, vece, 6889 MAKE_64BIT_MASK(halfbits, halfbits))); 6890 } else { 6891 tcg_gen_shri_vec(vece, d, n, halfbits); 6892 tcg_gen_shli_vec(vece, d, d, shl); 6893 } 6894 } else { 6895 if (shl == 0) { 6896 tcg_gen_and_vec(vece, d, n, 6897 tcg_constant_vec_matching(d, vece, 6898 MAKE_64BIT_MASK(0, halfbits))); 6899 } else { 6900 tcg_gen_shli_vec(vece, d, n, halfbits); 6901 tcg_gen_shri_vec(vece, d, d, halfbits - shl); 6902 } 6903 } 6904 } 6905 6906 static bool do_shll_tb(DisasContext *s, arg_rri_esz *a, 6907 const GVecGen2i ops[3], bool sel) 6908 { 6909 6910 if (a->esz < 0 || a->esz > 2) { 6911 return false; 6912 } 6913 if (sve_access_check(s)) { 6914 unsigned vsz = vec_full_reg_size(s); 6915 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 6916 vec_full_reg_offset(s, a->rn), 6917 vsz, vsz, (a->imm << 1) | sel, 6918 &ops[a->esz]); 6919 } 6920 return true; 6921 } 6922 6923 static const TCGOpcode sshll_list[] = { 6924 INDEX_op_shli_vec, INDEX_op_sari_vec, 0 6925 }; 6926 static const GVecGen2i sshll_ops[3] = { 6927 { .fniv = gen_sshll_vec, 6928 .opt_opc = sshll_list, 6929 .fno = gen_helper_sve2_sshll_h, 6930 .vece = MO_16 }, 6931 { .fniv = gen_sshll_vec, 6932 .opt_opc = sshll_list, 6933 .fno = gen_helper_sve2_sshll_s, 6934 .vece = MO_32 }, 6935 { .fniv = gen_sshll_vec, 6936 .opt_opc = sshll_list, 6937 .fno = gen_helper_sve2_sshll_d, 6938 .vece = MO_64 } 6939 }; 6940 TRANS_FEAT(SSHLLB, aa64_sve2, do_shll_tb, a, sshll_ops, false) 6941 TRANS_FEAT(SSHLLT, aa64_sve2, do_shll_tb, a, sshll_ops, true) 6942 6943 static const TCGOpcode ushll_list[] = { 6944 INDEX_op_shli_vec, INDEX_op_shri_vec, 0 6945 }; 6946 static const GVecGen2i ushll_ops[3] = { 6947 { .fni8 = gen_ushll16_i64, 6948 .fniv = gen_ushll_vec, 6949 .opt_opc = ushll_list, 6950 .fno = gen_helper_sve2_ushll_h, 6951 .vece = MO_16 }, 6952 { .fni8 = gen_ushll32_i64, 6953 .fniv = gen_ushll_vec, 6954 .opt_opc = ushll_list, 6955 .fno = gen_helper_sve2_ushll_s, 6956 .vece = MO_32 }, 6957 { .fni8 = gen_ushll64_i64, 6958 .fniv = gen_ushll_vec, 6959 .opt_opc = ushll_list, 6960 .fno = gen_helper_sve2_ushll_d, 6961 .vece = MO_64 }, 6962 }; 6963 TRANS_FEAT(USHLLB, aa64_sve2, do_shll_tb, a, ushll_ops, false) 6964 TRANS_FEAT(USHLLT, aa64_sve2, do_shll_tb, a, ushll_ops, true) 6965 6966 static gen_helper_gvec_3 * const bext_fns[4] = { 6967 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h, 6968 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d, 6969 }; 6970 TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6971 bext_fns[a->esz], a, 0) 6972 6973 static gen_helper_gvec_3 * const bdep_fns[4] = { 6974 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h, 6975 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d, 6976 }; 6977 TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6978 bdep_fns[a->esz], a, 0) 6979 6980 static gen_helper_gvec_3 * const bgrp_fns[4] = { 6981 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h, 6982 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d, 6983 }; 6984 TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6985 bgrp_fns[a->esz], a, 0) 6986 6987 static gen_helper_gvec_3 * const cadd_fns[4] = { 6988 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h, 6989 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d, 6990 }; 6991 TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6992 cadd_fns[a->esz], a, 0) 6993 TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6994 cadd_fns[a->esz], a, 1) 6995 6996 static gen_helper_gvec_3 * const sqcadd_fns[4] = { 6997 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h, 6998 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d, 6999 }; 7000 TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 7001 sqcadd_fns[a->esz], a, 0) 7002 TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 7003 sqcadd_fns[a->esz], a, 1) 7004 7005 static gen_helper_gvec_4 * const sabal_fns[4] = { 7006 NULL, gen_helper_sve2_sabal_h, 7007 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d, 7008 }; 7009 TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0) 7010 TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1) 7011 7012 static gen_helper_gvec_4 * const uabal_fns[4] = { 7013 NULL, gen_helper_sve2_uabal_h, 7014 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d, 7015 }; 7016 TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0) 7017 TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1) 7018 7019 static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel) 7020 { 7021 static gen_helper_gvec_4 * const fns[2] = { 7022 gen_helper_sve2_adcl_s, 7023 gen_helper_sve2_adcl_d, 7024 }; 7025 /* 7026 * Note that in this case the ESZ field encodes both size and sign. 7027 * Split out 'subtract' into bit 1 of the data field for the helper. 7028 */ 7029 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel); 7030 } 7031 7032 TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false) 7033 TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true) 7034 7035 TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a) 7036 TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a) 7037 TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a) 7038 TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a) 7039 TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a) 7040 TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a) 7041 7042 TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a) 7043 TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a) 7044 7045 static bool do_narrow_extract(DisasContext *s, arg_rri_esz *a, 7046 const GVecGen2 ops[3]) 7047 { 7048 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0) { 7049 return false; 7050 } 7051 if (sve_access_check(s)) { 7052 unsigned vsz = vec_full_reg_size(s); 7053 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd), 7054 vec_full_reg_offset(s, a->rn), 7055 vsz, vsz, &ops[a->esz]); 7056 } 7057 return true; 7058 } 7059 7060 static const TCGOpcode sqxtn_list[] = { 7061 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 7062 }; 7063 7064 static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 7065 { 7066 int halfbits = 4 << vece; 7067 int64_t mask = (1ull << halfbits) - 1; 7068 int64_t min = -1ull << (halfbits - 1); 7069 int64_t max = -min - 1; 7070 7071 tcg_gen_smax_vec(vece, d, n, tcg_constant_vec_matching(d, vece, min)); 7072 tcg_gen_smin_vec(vece, d, d, tcg_constant_vec_matching(d, vece, max)); 7073 tcg_gen_and_vec(vece, d, d, tcg_constant_vec_matching(d, vece, mask)); 7074 } 7075 7076 static const GVecGen2 sqxtnb_ops[3] = { 7077 { .fniv = gen_sqxtnb_vec, 7078 .opt_opc = sqxtn_list, 7079 .fno = gen_helper_sve2_sqxtnb_h, 7080 .vece = MO_16 }, 7081 { .fniv = gen_sqxtnb_vec, 7082 .opt_opc = sqxtn_list, 7083 .fno = gen_helper_sve2_sqxtnb_s, 7084 .vece = MO_32 }, 7085 { .fniv = gen_sqxtnb_vec, 7086 .opt_opc = sqxtn_list, 7087 .fno = gen_helper_sve2_sqxtnb_d, 7088 .vece = MO_64 }, 7089 }; 7090 TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops) 7091 7092 static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 7093 { 7094 int halfbits = 4 << vece; 7095 int64_t mask = (1ull << halfbits) - 1; 7096 int64_t min = -1ull << (halfbits - 1); 7097 int64_t max = -min - 1; 7098 7099 tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min)); 7100 tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max)); 7101 tcg_gen_shli_vec(vece, n, n, halfbits); 7102 tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n); 7103 } 7104 7105 static const GVecGen2 sqxtnt_ops[3] = { 7106 { .fniv = gen_sqxtnt_vec, 7107 .opt_opc = sqxtn_list, 7108 .load_dest = true, 7109 .fno = gen_helper_sve2_sqxtnt_h, 7110 .vece = MO_16 }, 7111 { .fniv = gen_sqxtnt_vec, 7112 .opt_opc = sqxtn_list, 7113 .load_dest = true, 7114 .fno = gen_helper_sve2_sqxtnt_s, 7115 .vece = MO_32 }, 7116 { .fniv = gen_sqxtnt_vec, 7117 .opt_opc = sqxtn_list, 7118 .load_dest = true, 7119 .fno = gen_helper_sve2_sqxtnt_d, 7120 .vece = MO_64 }, 7121 }; 7122 TRANS_FEAT(SQXTNT, aa64_sve2, do_narrow_extract, a, sqxtnt_ops) 7123 7124 static const TCGOpcode uqxtn_list[] = { 7125 INDEX_op_shli_vec, INDEX_op_umin_vec, 0 7126 }; 7127 7128 static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 7129 { 7130 int halfbits = 4 << vece; 7131 int64_t max = (1ull << halfbits) - 1; 7132 7133 tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max)); 7134 } 7135 7136 static const GVecGen2 uqxtnb_ops[3] = { 7137 { .fniv = gen_uqxtnb_vec, 7138 .opt_opc = uqxtn_list, 7139 .fno = gen_helper_sve2_uqxtnb_h, 7140 .vece = MO_16 }, 7141 { .fniv = gen_uqxtnb_vec, 7142 .opt_opc = uqxtn_list, 7143 .fno = gen_helper_sve2_uqxtnb_s, 7144 .vece = MO_32 }, 7145 { .fniv = gen_uqxtnb_vec, 7146 .opt_opc = uqxtn_list, 7147 .fno = gen_helper_sve2_uqxtnb_d, 7148 .vece = MO_64 }, 7149 }; 7150 TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops) 7151 7152 static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 7153 { 7154 int halfbits = 4 << vece; 7155 int64_t max = (1ull << halfbits) - 1; 7156 TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max); 7157 7158 tcg_gen_umin_vec(vece, n, n, maxv); 7159 tcg_gen_shli_vec(vece, n, n, halfbits); 7160 tcg_gen_bitsel_vec(vece, d, maxv, d, n); 7161 } 7162 7163 static const GVecGen2 uqxtnt_ops[3] = { 7164 { .fniv = gen_uqxtnt_vec, 7165 .opt_opc = uqxtn_list, 7166 .load_dest = true, 7167 .fno = gen_helper_sve2_uqxtnt_h, 7168 .vece = MO_16 }, 7169 { .fniv = gen_uqxtnt_vec, 7170 .opt_opc = uqxtn_list, 7171 .load_dest = true, 7172 .fno = gen_helper_sve2_uqxtnt_s, 7173 .vece = MO_32 }, 7174 { .fniv = gen_uqxtnt_vec, 7175 .opt_opc = uqxtn_list, 7176 .load_dest = true, 7177 .fno = gen_helper_sve2_uqxtnt_d, 7178 .vece = MO_64 }, 7179 }; 7180 TRANS_FEAT(UQXTNT, aa64_sve2, do_narrow_extract, a, uqxtnt_ops) 7181 7182 static const TCGOpcode sqxtun_list[] = { 7183 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0 7184 }; 7185 7186 static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 7187 { 7188 int halfbits = 4 << vece; 7189 int64_t max = (1ull << halfbits) - 1; 7190 7191 tcg_gen_smax_vec(vece, d, n, tcg_constant_vec_matching(d, vece, 0)); 7192 tcg_gen_umin_vec(vece, d, d, tcg_constant_vec_matching(d, vece, max)); 7193 } 7194 7195 static const GVecGen2 sqxtunb_ops[3] = { 7196 { .fniv = gen_sqxtunb_vec, 7197 .opt_opc = sqxtun_list, 7198 .fno = gen_helper_sve2_sqxtunb_h, 7199 .vece = MO_16 }, 7200 { .fniv = gen_sqxtunb_vec, 7201 .opt_opc = sqxtun_list, 7202 .fno = gen_helper_sve2_sqxtunb_s, 7203 .vece = MO_32 }, 7204 { .fniv = gen_sqxtunb_vec, 7205 .opt_opc = sqxtun_list, 7206 .fno = gen_helper_sve2_sqxtunb_d, 7207 .vece = MO_64 }, 7208 }; 7209 TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops) 7210 7211 static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 7212 { 7213 int halfbits = 4 << vece; 7214 int64_t max = (1ull << halfbits) - 1; 7215 TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max); 7216 7217 tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0)); 7218 tcg_gen_umin_vec(vece, n, n, maxv); 7219 tcg_gen_shli_vec(vece, n, n, halfbits); 7220 tcg_gen_bitsel_vec(vece, d, maxv, d, n); 7221 } 7222 7223 static const GVecGen2 sqxtunt_ops[3] = { 7224 { .fniv = gen_sqxtunt_vec, 7225 .opt_opc = sqxtun_list, 7226 .load_dest = true, 7227 .fno = gen_helper_sve2_sqxtunt_h, 7228 .vece = MO_16 }, 7229 { .fniv = gen_sqxtunt_vec, 7230 .opt_opc = sqxtun_list, 7231 .load_dest = true, 7232 .fno = gen_helper_sve2_sqxtunt_s, 7233 .vece = MO_32 }, 7234 { .fniv = gen_sqxtunt_vec, 7235 .opt_opc = sqxtun_list, 7236 .load_dest = true, 7237 .fno = gen_helper_sve2_sqxtunt_d, 7238 .vece = MO_64 }, 7239 }; 7240 TRANS_FEAT(SQXTUNT, aa64_sve2, do_narrow_extract, a, sqxtunt_ops) 7241 7242 static bool do_shr_narrow(DisasContext *s, arg_rri_esz *a, 7243 const GVecGen2i ops[3]) 7244 { 7245 if (a->esz < 0 || a->esz > MO_32) { 7246 return false; 7247 } 7248 assert(a->imm > 0 && a->imm <= (8 << a->esz)); 7249 if (sve_access_check(s)) { 7250 unsigned vsz = vec_full_reg_size(s); 7251 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 7252 vec_full_reg_offset(s, a->rn), 7253 vsz, vsz, a->imm, &ops[a->esz]); 7254 } 7255 return true; 7256 } 7257 7258 static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 7259 { 7260 int halfbits = 4 << vece; 7261 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 7262 7263 tcg_gen_shri_i64(d, n, shr); 7264 tcg_gen_andi_i64(d, d, mask); 7265 } 7266 7267 static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 7268 { 7269 gen_shrnb_i64(MO_16, d, n, shr); 7270 } 7271 7272 static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 7273 { 7274 gen_shrnb_i64(MO_32, d, n, shr); 7275 } 7276 7277 static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 7278 { 7279 gen_shrnb_i64(MO_64, d, n, shr); 7280 } 7281 7282 static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 7283 { 7284 int halfbits = 4 << vece; 7285 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 7286 7287 tcg_gen_shri_vec(vece, n, n, shr); 7288 tcg_gen_and_vec(vece, d, n, tcg_constant_vec_matching(d, vece, mask)); 7289 } 7290 7291 static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 }; 7292 static const GVecGen2i shrnb_ops[3] = { 7293 { .fni8 = gen_shrnb16_i64, 7294 .fniv = gen_shrnb_vec, 7295 .opt_opc = shrnb_vec_list, 7296 .fno = gen_helper_sve2_shrnb_h, 7297 .vece = MO_16 }, 7298 { .fni8 = gen_shrnb32_i64, 7299 .fniv = gen_shrnb_vec, 7300 .opt_opc = shrnb_vec_list, 7301 .fno = gen_helper_sve2_shrnb_s, 7302 .vece = MO_32 }, 7303 { .fni8 = gen_shrnb64_i64, 7304 .fniv = gen_shrnb_vec, 7305 .opt_opc = shrnb_vec_list, 7306 .fno = gen_helper_sve2_shrnb_d, 7307 .vece = MO_64 }, 7308 }; 7309 TRANS_FEAT(SHRNB, aa64_sve2, do_shr_narrow, a, shrnb_ops) 7310 7311 static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 7312 { 7313 int halfbits = 4 << vece; 7314 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 7315 7316 tcg_gen_shli_i64(n, n, halfbits - shr); 7317 tcg_gen_andi_i64(n, n, ~mask); 7318 tcg_gen_andi_i64(d, d, mask); 7319 tcg_gen_or_i64(d, d, n); 7320 } 7321 7322 static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 7323 { 7324 gen_shrnt_i64(MO_16, d, n, shr); 7325 } 7326 7327 static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 7328 { 7329 gen_shrnt_i64(MO_32, d, n, shr); 7330 } 7331 7332 static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 7333 { 7334 tcg_gen_shri_i64(n, n, shr); 7335 tcg_gen_deposit_i64(d, d, n, 32, 32); 7336 } 7337 7338 static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 7339 { 7340 int halfbits = 4 << vece; 7341 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 7342 7343 tcg_gen_shli_vec(vece, n, n, halfbits - shr); 7344 tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n); 7345 } 7346 7347 static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 }; 7348 static const GVecGen2i shrnt_ops[3] = { 7349 { .fni8 = gen_shrnt16_i64, 7350 .fniv = gen_shrnt_vec, 7351 .opt_opc = shrnt_vec_list, 7352 .load_dest = true, 7353 .fno = gen_helper_sve2_shrnt_h, 7354 .vece = MO_16 }, 7355 { .fni8 = gen_shrnt32_i64, 7356 .fniv = gen_shrnt_vec, 7357 .opt_opc = shrnt_vec_list, 7358 .load_dest = true, 7359 .fno = gen_helper_sve2_shrnt_s, 7360 .vece = MO_32 }, 7361 { .fni8 = gen_shrnt64_i64, 7362 .fniv = gen_shrnt_vec, 7363 .opt_opc = shrnt_vec_list, 7364 .load_dest = true, 7365 .fno = gen_helper_sve2_shrnt_d, 7366 .vece = MO_64 }, 7367 }; 7368 TRANS_FEAT(SHRNT, aa64_sve2, do_shr_narrow, a, shrnt_ops) 7369 7370 static const GVecGen2i rshrnb_ops[3] = { 7371 { .fno = gen_helper_sve2_rshrnb_h }, 7372 { .fno = gen_helper_sve2_rshrnb_s }, 7373 { .fno = gen_helper_sve2_rshrnb_d }, 7374 }; 7375 TRANS_FEAT(RSHRNB, aa64_sve2, do_shr_narrow, a, rshrnb_ops) 7376 7377 static const GVecGen2i rshrnt_ops[3] = { 7378 { .fno = gen_helper_sve2_rshrnt_h }, 7379 { .fno = gen_helper_sve2_rshrnt_s }, 7380 { .fno = gen_helper_sve2_rshrnt_d }, 7381 }; 7382 TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops) 7383 7384 static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d, 7385 TCGv_vec n, int64_t shr) 7386 { 7387 int halfbits = 4 << vece; 7388 uint64_t max = MAKE_64BIT_MASK(0, halfbits); 7389 7390 tcg_gen_sari_vec(vece, n, n, shr); 7391 tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0)); 7392 tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max)); 7393 } 7394 7395 static const TCGOpcode sqshrunb_vec_list[] = { 7396 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0 7397 }; 7398 static const GVecGen2i sqshrunb_ops[3] = { 7399 { .fniv = gen_sqshrunb_vec, 7400 .opt_opc = sqshrunb_vec_list, 7401 .fno = gen_helper_sve2_sqshrunb_h, 7402 .vece = MO_16 }, 7403 { .fniv = gen_sqshrunb_vec, 7404 .opt_opc = sqshrunb_vec_list, 7405 .fno = gen_helper_sve2_sqshrunb_s, 7406 .vece = MO_32 }, 7407 { .fniv = gen_sqshrunb_vec, 7408 .opt_opc = sqshrunb_vec_list, 7409 .fno = gen_helper_sve2_sqshrunb_d, 7410 .vece = MO_64 }, 7411 }; 7412 TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops) 7413 7414 static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d, 7415 TCGv_vec n, int64_t shr) 7416 { 7417 int halfbits = 4 << vece; 7418 uint64_t max = MAKE_64BIT_MASK(0, halfbits); 7419 TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max); 7420 7421 tcg_gen_sari_vec(vece, n, n, shr); 7422 tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0)); 7423 tcg_gen_umin_vec(vece, n, n, maxv); 7424 tcg_gen_shli_vec(vece, n, n, halfbits); 7425 tcg_gen_bitsel_vec(vece, d, maxv, d, n); 7426 } 7427 7428 static const TCGOpcode sqshrunt_vec_list[] = { 7429 INDEX_op_shli_vec, INDEX_op_sari_vec, 7430 INDEX_op_smax_vec, INDEX_op_umin_vec, 0 7431 }; 7432 static const GVecGen2i sqshrunt_ops[3] = { 7433 { .fniv = gen_sqshrunt_vec, 7434 .opt_opc = sqshrunt_vec_list, 7435 .load_dest = true, 7436 .fno = gen_helper_sve2_sqshrunt_h, 7437 .vece = MO_16 }, 7438 { .fniv = gen_sqshrunt_vec, 7439 .opt_opc = sqshrunt_vec_list, 7440 .load_dest = true, 7441 .fno = gen_helper_sve2_sqshrunt_s, 7442 .vece = MO_32 }, 7443 { .fniv = gen_sqshrunt_vec, 7444 .opt_opc = sqshrunt_vec_list, 7445 .load_dest = true, 7446 .fno = gen_helper_sve2_sqshrunt_d, 7447 .vece = MO_64 }, 7448 }; 7449 TRANS_FEAT(SQSHRUNT, aa64_sve2, do_shr_narrow, a, sqshrunt_ops) 7450 7451 static const GVecGen2i sqrshrunb_ops[3] = { 7452 { .fno = gen_helper_sve2_sqrshrunb_h }, 7453 { .fno = gen_helper_sve2_sqrshrunb_s }, 7454 { .fno = gen_helper_sve2_sqrshrunb_d }, 7455 }; 7456 TRANS_FEAT(SQRSHRUNB, aa64_sve2, do_shr_narrow, a, sqrshrunb_ops) 7457 7458 static const GVecGen2i sqrshrunt_ops[3] = { 7459 { .fno = gen_helper_sve2_sqrshrunt_h }, 7460 { .fno = gen_helper_sve2_sqrshrunt_s }, 7461 { .fno = gen_helper_sve2_sqrshrunt_d }, 7462 }; 7463 TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops) 7464 7465 static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d, 7466 TCGv_vec n, int64_t shr) 7467 { 7468 int halfbits = 4 << vece; 7469 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 7470 int64_t min = -max - 1; 7471 int64_t mask = MAKE_64BIT_MASK(0, halfbits); 7472 7473 tcg_gen_sari_vec(vece, n, n, shr); 7474 tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min)); 7475 tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max)); 7476 tcg_gen_and_vec(vece, d, n, tcg_constant_vec_matching(d, vece, mask)); 7477 } 7478 7479 static const TCGOpcode sqshrnb_vec_list[] = { 7480 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0 7481 }; 7482 static const GVecGen2i sqshrnb_ops[3] = { 7483 { .fniv = gen_sqshrnb_vec, 7484 .opt_opc = sqshrnb_vec_list, 7485 .fno = gen_helper_sve2_sqshrnb_h, 7486 .vece = MO_16 }, 7487 { .fniv = gen_sqshrnb_vec, 7488 .opt_opc = sqshrnb_vec_list, 7489 .fno = gen_helper_sve2_sqshrnb_s, 7490 .vece = MO_32 }, 7491 { .fniv = gen_sqshrnb_vec, 7492 .opt_opc = sqshrnb_vec_list, 7493 .fno = gen_helper_sve2_sqshrnb_d, 7494 .vece = MO_64 }, 7495 }; 7496 TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops) 7497 7498 static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d, 7499 TCGv_vec n, int64_t shr) 7500 { 7501 int halfbits = 4 << vece; 7502 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 7503 int64_t min = -max - 1; 7504 int64_t mask = MAKE_64BIT_MASK(0, halfbits); 7505 7506 tcg_gen_sari_vec(vece, n, n, shr); 7507 tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min)); 7508 tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max)); 7509 tcg_gen_shli_vec(vece, n, n, halfbits); 7510 tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n); 7511 } 7512 7513 static const TCGOpcode sqshrnt_vec_list[] = { 7514 INDEX_op_shli_vec, INDEX_op_sari_vec, 7515 INDEX_op_smax_vec, INDEX_op_smin_vec, 0 7516 }; 7517 static const GVecGen2i sqshrnt_ops[3] = { 7518 { .fniv = gen_sqshrnt_vec, 7519 .opt_opc = sqshrnt_vec_list, 7520 .load_dest = true, 7521 .fno = gen_helper_sve2_sqshrnt_h, 7522 .vece = MO_16 }, 7523 { .fniv = gen_sqshrnt_vec, 7524 .opt_opc = sqshrnt_vec_list, 7525 .load_dest = true, 7526 .fno = gen_helper_sve2_sqshrnt_s, 7527 .vece = MO_32 }, 7528 { .fniv = gen_sqshrnt_vec, 7529 .opt_opc = sqshrnt_vec_list, 7530 .load_dest = true, 7531 .fno = gen_helper_sve2_sqshrnt_d, 7532 .vece = MO_64 }, 7533 }; 7534 TRANS_FEAT(SQSHRNT, aa64_sve2, do_shr_narrow, a, sqshrnt_ops) 7535 7536 static const GVecGen2i sqrshrnb_ops[3] = { 7537 { .fno = gen_helper_sve2_sqrshrnb_h }, 7538 { .fno = gen_helper_sve2_sqrshrnb_s }, 7539 { .fno = gen_helper_sve2_sqrshrnb_d }, 7540 }; 7541 TRANS_FEAT(SQRSHRNB, aa64_sve2, do_shr_narrow, a, sqrshrnb_ops) 7542 7543 static const GVecGen2i sqrshrnt_ops[3] = { 7544 { .fno = gen_helper_sve2_sqrshrnt_h }, 7545 { .fno = gen_helper_sve2_sqrshrnt_s }, 7546 { .fno = gen_helper_sve2_sqrshrnt_d }, 7547 }; 7548 TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops) 7549 7550 static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d, 7551 TCGv_vec n, int64_t shr) 7552 { 7553 int halfbits = 4 << vece; 7554 int64_t max = MAKE_64BIT_MASK(0, halfbits); 7555 7556 tcg_gen_shri_vec(vece, n, n, shr); 7557 tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max)); 7558 } 7559 7560 static const TCGOpcode uqshrnb_vec_list[] = { 7561 INDEX_op_shri_vec, INDEX_op_umin_vec, 0 7562 }; 7563 static const GVecGen2i uqshrnb_ops[3] = { 7564 { .fniv = gen_uqshrnb_vec, 7565 .opt_opc = uqshrnb_vec_list, 7566 .fno = gen_helper_sve2_uqshrnb_h, 7567 .vece = MO_16 }, 7568 { .fniv = gen_uqshrnb_vec, 7569 .opt_opc = uqshrnb_vec_list, 7570 .fno = gen_helper_sve2_uqshrnb_s, 7571 .vece = MO_32 }, 7572 { .fniv = gen_uqshrnb_vec, 7573 .opt_opc = uqshrnb_vec_list, 7574 .fno = gen_helper_sve2_uqshrnb_d, 7575 .vece = MO_64 }, 7576 }; 7577 TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops) 7578 7579 static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d, 7580 TCGv_vec n, int64_t shr) 7581 { 7582 int halfbits = 4 << vece; 7583 int64_t max = MAKE_64BIT_MASK(0, halfbits); 7584 TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max); 7585 7586 tcg_gen_shri_vec(vece, n, n, shr); 7587 tcg_gen_umin_vec(vece, n, n, maxv); 7588 tcg_gen_shli_vec(vece, n, n, halfbits); 7589 tcg_gen_bitsel_vec(vece, d, maxv, d, n); 7590 } 7591 7592 static const TCGOpcode uqshrnt_vec_list[] = { 7593 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0 7594 }; 7595 static const GVecGen2i uqshrnt_ops[3] = { 7596 { .fniv = gen_uqshrnt_vec, 7597 .opt_opc = uqshrnt_vec_list, 7598 .load_dest = true, 7599 .fno = gen_helper_sve2_uqshrnt_h, 7600 .vece = MO_16 }, 7601 { .fniv = gen_uqshrnt_vec, 7602 .opt_opc = uqshrnt_vec_list, 7603 .load_dest = true, 7604 .fno = gen_helper_sve2_uqshrnt_s, 7605 .vece = MO_32 }, 7606 { .fniv = gen_uqshrnt_vec, 7607 .opt_opc = uqshrnt_vec_list, 7608 .load_dest = true, 7609 .fno = gen_helper_sve2_uqshrnt_d, 7610 .vece = MO_64 }, 7611 }; 7612 TRANS_FEAT(UQSHRNT, aa64_sve2, do_shr_narrow, a, uqshrnt_ops) 7613 7614 static const GVecGen2i uqrshrnb_ops[3] = { 7615 { .fno = gen_helper_sve2_uqrshrnb_h }, 7616 { .fno = gen_helper_sve2_uqrshrnb_s }, 7617 { .fno = gen_helper_sve2_uqrshrnb_d }, 7618 }; 7619 TRANS_FEAT(UQRSHRNB, aa64_sve2, do_shr_narrow, a, uqrshrnb_ops) 7620 7621 static const GVecGen2i uqrshrnt_ops[3] = { 7622 { .fno = gen_helper_sve2_uqrshrnt_h }, 7623 { .fno = gen_helper_sve2_uqrshrnt_s }, 7624 { .fno = gen_helper_sve2_uqrshrnt_d }, 7625 }; 7626 TRANS_FEAT(UQRSHRNT, aa64_sve2, do_shr_narrow, a, uqrshrnt_ops) 7627 7628 #define DO_SVE2_ZZZ_NARROW(NAME, name) \ 7629 static gen_helper_gvec_3 * const name##_fns[4] = { \ 7630 NULL, gen_helper_sve2_##name##_h, \ 7631 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \ 7632 }; \ 7633 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \ 7634 name##_fns[a->esz], a, 0) 7635 7636 DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb) 7637 DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt) 7638 DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb) 7639 DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt) 7640 7641 DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb) 7642 DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt) 7643 DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb) 7644 DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt) 7645 7646 static gen_helper_gvec_flags_4 * const match_fns[4] = { 7647 gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL 7648 }; 7649 TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz]) 7650 7651 static gen_helper_gvec_flags_4 * const nmatch_fns[4] = { 7652 gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL 7653 }; 7654 TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz]) 7655 7656 static gen_helper_gvec_4 * const histcnt_fns[4] = { 7657 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d 7658 }; 7659 TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz, 7660 histcnt_fns[a->esz], a, 0) 7661 7662 TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz, 7663 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0) 7664 7665 DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz) 7666 DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz) 7667 DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz) 7668 DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz) 7669 DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz) 7670 7671 static bool do_fmmla(DisasContext *s, arg_rrrr_esz *a, 7672 gen_helper_gvec_4_ptr *fn) 7673 { 7674 if (sve_access_check(s)) { 7675 if (vec_full_reg_size(s) < 4 * memop_size(a->esz)) { 7676 unallocated_encoding(s); 7677 } else { 7678 gen_gvec_fpst_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, 0, FPST_A64); 7679 } 7680 } 7681 return true; 7682 } 7683 7684 TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, do_fmmla, a, gen_helper_fmmla_s) 7685 TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, do_fmmla, a, gen_helper_fmmla_d) 7686 7687 /* 7688 * SVE Integer Multiply-Add (unpredicated) 7689 */ 7690 7691 static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = { 7692 NULL, gen_helper_sve2_sqdmlal_zzzw_h, 7693 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d, 7694 }; 7695 TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7696 sqdmlal_zzzw_fns[a->esz], a, 0) 7697 TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7698 sqdmlal_zzzw_fns[a->esz], a, 3) 7699 TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 7700 sqdmlal_zzzw_fns[a->esz], a, 2) 7701 7702 static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = { 7703 NULL, gen_helper_sve2_sqdmlsl_zzzw_h, 7704 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d, 7705 }; 7706 TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7707 sqdmlsl_zzzw_fns[a->esz], a, 0) 7708 TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7709 sqdmlsl_zzzw_fns[a->esz], a, 3) 7710 TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 7711 sqdmlsl_zzzw_fns[a->esz], a, 2) 7712 7713 static gen_helper_gvec_4 * const sqrdmlah_fns[] = { 7714 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h, 7715 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d, 7716 }; 7717 TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 7718 sqrdmlah_fns[a->esz], a, 0) 7719 7720 static gen_helper_gvec_4 * const sqrdmlsh_fns[] = { 7721 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h, 7722 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d, 7723 }; 7724 TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 7725 sqrdmlsh_fns[a->esz], a, 0) 7726 7727 static gen_helper_gvec_4 * const smlal_zzzw_fns[] = { 7728 NULL, gen_helper_sve2_smlal_zzzw_h, 7729 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d, 7730 }; 7731 TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7732 smlal_zzzw_fns[a->esz], a, 0) 7733 TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7734 smlal_zzzw_fns[a->esz], a, 1) 7735 7736 static gen_helper_gvec_4 * const umlal_zzzw_fns[] = { 7737 NULL, gen_helper_sve2_umlal_zzzw_h, 7738 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d, 7739 }; 7740 TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7741 umlal_zzzw_fns[a->esz], a, 0) 7742 TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7743 umlal_zzzw_fns[a->esz], a, 1) 7744 7745 static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = { 7746 NULL, gen_helper_sve2_smlsl_zzzw_h, 7747 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d, 7748 }; 7749 TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7750 smlsl_zzzw_fns[a->esz], a, 0) 7751 TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7752 smlsl_zzzw_fns[a->esz], a, 1) 7753 7754 static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = { 7755 NULL, gen_helper_sve2_umlsl_zzzw_h, 7756 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d, 7757 }; 7758 TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7759 umlsl_zzzw_fns[a->esz], a, 0) 7760 TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7761 umlsl_zzzw_fns[a->esz], a, 1) 7762 7763 static gen_helper_gvec_4 * const cmla_fns[] = { 7764 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h, 7765 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d, 7766 }; 7767 TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7768 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7769 7770 static gen_helper_gvec_4 * const cdot_fns[] = { 7771 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d 7772 }; 7773 TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7774 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7775 7776 static gen_helper_gvec_4 * const sqrdcmlah_fns[] = { 7777 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h, 7778 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d, 7779 }; 7780 TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7781 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7782 7783 TRANS_FEAT(USDOT_zzzz_4s, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7784 gen_helper_gvec_usdot_4b, a, 0) 7785 7786 TRANS_FEAT(SDOT_zzzz_2s, aa64_sme2_or_sve2p1, gen_gvec_ool_arg_zzzz, 7787 gen_helper_gvec_sdot_2h, a, 0) 7788 TRANS_FEAT(UDOT_zzzz_2s, aa64_sme2_or_sve2p1, gen_gvec_ool_arg_zzzz, 7789 gen_helper_gvec_udot_2h, a, 0) 7790 7791 TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz, 7792 gen_helper_crypto_aesmc, a->rd, a->rd, 0) 7793 TRANS_FEAT_NONSTREAMING(AESIMC, aa64_sve2_aes, gen_gvec_ool_zz, 7794 gen_helper_crypto_aesimc, a->rd, a->rd, 0) 7795 7796 TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7797 gen_helper_crypto_aese, a, 0) 7798 TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7799 gen_helper_crypto_aesd, a, 0) 7800 7801 TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7802 gen_helper_crypto_sm4e, a, 0) 7803 TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7804 gen_helper_crypto_sm4ekey, a, 0) 7805 7806 TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, 7807 gen_gvec_rax1, a) 7808 7809 TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz, 7810 gen_helper_sve2_fcvtnt_sh, a, 0, FPST_A64) 7811 TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, 7812 gen_helper_sve2_fcvtnt_ds, a, 0, FPST_A64) 7813 7814 TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 7815 gen_helper_sve_bfcvtnt, a, 0, 7816 s->fpcr_ah ? FPST_AH : FPST_A64) 7817 7818 TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, 7819 gen_helper_sve2_fcvtlt_hs, a, 0, FPST_A64) 7820 TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz, 7821 gen_helper_sve2_fcvtlt_sd, a, 0, FPST_A64) 7822 7823 TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a, 7824 FPROUNDING_ODD, gen_helper_sve_fcvt_ds) 7825 TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a, 7826 FPROUNDING_ODD, gen_helper_sve2_fcvtnt_ds) 7827 7828 static gen_helper_gvec_3_ptr * const flogb_fns[] = { 7829 NULL, gen_helper_flogb_h, 7830 gen_helper_flogb_s, gen_helper_flogb_d 7831 }; 7832 TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz], 7833 a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) 7834 7835 static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel) 7836 { 7837 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s, 7838 a->rd, a->rn, a->rm, a->ra, 7839 (sel << 1) | sub, tcg_env); 7840 } 7841 7842 TRANS_FEAT(FMLALB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, false) 7843 TRANS_FEAT(FMLALT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, true) 7844 TRANS_FEAT(FMLSLB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, false) 7845 TRANS_FEAT(FMLSLT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, true) 7846 7847 static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel) 7848 { 7849 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s, 7850 a->rd, a->rn, a->rm, a->ra, 7851 (a->index << 3) | (sel << 1) | sub, tcg_env); 7852 } 7853 7854 TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false) 7855 TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true) 7856 TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false) 7857 TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true) 7858 7859 TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7860 gen_helper_gvec_smmla_b, a, 0) 7861 TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7862 gen_helper_gvec_usmmla_b, a, 0) 7863 TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7864 gen_helper_gvec_ummla_b, a, 0) 7865 7866 TRANS_FEAT(FDOT_zzzz, aa64_sme2_or_sve2p1, gen_gvec_env_arg_zzzz, 7867 gen_helper_sme2_fdot_h, a, 0) 7868 TRANS_FEAT(FDOT_zzxz, aa64_sme2_or_sve2p1, gen_gvec_env_arg_zzxz, 7869 gen_helper_sme2_fdot_idx_h, a) 7870 7871 TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_env_arg_zzzz, 7872 gen_helper_gvec_bfdot, a, 0) 7873 TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_env_arg_zzxz, 7874 gen_helper_gvec_bfdot_idx, a) 7875 7876 TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_env_arg_zzzz, 7877 gen_helper_gvec_bfmmla, a, 0) 7878 7879 static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) 7880 { 7881 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, 7882 a->rd, a->rn, a->rm, a->ra, sel, 7883 s->fpcr_ah ? FPST_AH : FPST_A64); 7884 } 7885 7886 TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) 7887 TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true) 7888 7889 static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) 7890 { 7891 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, 7892 a->rd, a->rn, a->rm, a->ra, 7893 (a->index << 1) | sel, 7894 s->fpcr_ah ? FPST_AH : FPST_A64); 7895 } 7896 7897 TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) 7898 TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true) 7899 7900 static bool do_BFMLSL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) 7901 { 7902 if (s->fpcr_ah) { 7903 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_ah_bfmlsl, 7904 a->rd, a->rn, a->rm, a->ra, sel, FPST_AH); 7905 } else { 7906 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlsl, 7907 a->rd, a->rn, a->rm, a->ra, sel, FPST_A64); 7908 } 7909 } 7910 7911 TRANS_FEAT(BFMLSLB_zzzw, aa64_sme2_or_sve2p1, do_BFMLSL_zzzw, a, false) 7912 TRANS_FEAT(BFMLSLT_zzzw, aa64_sme2_or_sve2p1, do_BFMLSL_zzzw, a, true) 7913 7914 static bool do_BFMLSL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) 7915 { 7916 if (s->fpcr_ah) { 7917 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_ah_bfmlsl_idx, 7918 a->rd, a->rn, a->rm, a->ra, 7919 (a->index << 1) | sel, FPST_AH); 7920 } else { 7921 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlsl_idx, 7922 a->rd, a->rn, a->rm, a->ra, 7923 (a->index << 1) | sel, FPST_A64); 7924 } 7925 } 7926 7927 TRANS_FEAT(BFMLSLB_zzxw, aa64_sme2_or_sve2p1, do_BFMLSL_zzxw, a, false) 7928 TRANS_FEAT(BFMLSLT_zzxw, aa64_sme2_or_sve2p1, do_BFMLSL_zzxw, a, true) 7929 7930 static bool trans_PSEL(DisasContext *s, arg_psel *a) 7931 { 7932 int vl = vec_full_reg_size(s); 7933 int pl = pred_gvec_reg_size(s); 7934 int elements = vl >> a->esz; 7935 TCGv_i64 tmp, didx, dbit; 7936 TCGv_ptr ptr; 7937 7938 if (!dc_isar_feature(aa64_sme_or_sve2p1, s)) { 7939 return false; 7940 } 7941 if (!sve_access_check(s)) { 7942 return true; 7943 } 7944 7945 tmp = tcg_temp_new_i64(); 7946 dbit = tcg_temp_new_i64(); 7947 didx = tcg_temp_new_i64(); 7948 ptr = tcg_temp_new_ptr(); 7949 7950 /* Compute the predicate element. */ 7951 tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm); 7952 if (is_power_of_2(elements)) { 7953 tcg_gen_andi_i64(tmp, tmp, elements - 1); 7954 } else { 7955 tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements)); 7956 } 7957 7958 /* Extract the predicate byte and bit indices. */ 7959 tcg_gen_shli_i64(tmp, tmp, a->esz); 7960 tcg_gen_andi_i64(dbit, tmp, 7); 7961 tcg_gen_shri_i64(didx, tmp, 3); 7962 if (HOST_BIG_ENDIAN) { 7963 tcg_gen_xori_i64(didx, didx, 7); 7964 } 7965 7966 /* Load the predicate word. */ 7967 tcg_gen_trunc_i64_ptr(ptr, didx); 7968 tcg_gen_add_ptr(ptr, ptr, tcg_env); 7969 tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm)); 7970 7971 /* Extract the predicate bit and replicate to MO_64. */ 7972 tcg_gen_shr_i64(tmp, tmp, dbit); 7973 tcg_gen_andi_i64(tmp, tmp, 1); 7974 tcg_gen_neg_i64(tmp, tmp); 7975 7976 /* Apply to either copy the source, or write zeros. */ 7977 pl = size_for_gvec(pl); 7978 tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd), 7979 pred_full_reg_offset(s, a->pn), tmp, pl, pl); 7980 return true; 7981 } 7982 7983 static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7984 { 7985 tcg_gen_smax_i32(d, a, n); 7986 tcg_gen_smin_i32(d, d, m); 7987 } 7988 7989 static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7990 { 7991 tcg_gen_smax_i64(d, a, n); 7992 tcg_gen_smin_i64(d, d, m); 7993 } 7994 7995 static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7996 TCGv_vec m, TCGv_vec a) 7997 { 7998 tcg_gen_smax_vec(vece, d, a, n); 7999 tcg_gen_smin_vec(vece, d, d, m); 8000 } 8001 8002 static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 8003 uint32_t a, uint32_t oprsz, uint32_t maxsz) 8004 { 8005 static const TCGOpcode vecop[] = { 8006 INDEX_op_smin_vec, INDEX_op_smax_vec, 0 8007 }; 8008 static const GVecGen4 ops[4] = { 8009 { .fniv = gen_sclamp_vec, 8010 .fno = gen_helper_gvec_sclamp_b, 8011 .opt_opc = vecop, 8012 .vece = MO_8 }, 8013 { .fniv = gen_sclamp_vec, 8014 .fno = gen_helper_gvec_sclamp_h, 8015 .opt_opc = vecop, 8016 .vece = MO_16 }, 8017 { .fni4 = gen_sclamp_i32, 8018 .fniv = gen_sclamp_vec, 8019 .fno = gen_helper_gvec_sclamp_s, 8020 .opt_opc = vecop, 8021 .vece = MO_32 }, 8022 { .fni8 = gen_sclamp_i64, 8023 .fniv = gen_sclamp_vec, 8024 .fno = gen_helper_gvec_sclamp_d, 8025 .opt_opc = vecop, 8026 .vece = MO_64, 8027 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 8028 }; 8029 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 8030 } 8031 8032 TRANS_FEAT(SCLAMP, aa64_sme_or_sve2p1, gen_gvec_fn_arg_zzzz, gen_sclamp, a) 8033 8034 static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 8035 { 8036 tcg_gen_umax_i32(d, a, n); 8037 tcg_gen_umin_i32(d, d, m); 8038 } 8039 8040 static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 8041 { 8042 tcg_gen_umax_i64(d, a, n); 8043 tcg_gen_umin_i64(d, d, m); 8044 } 8045 8046 static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 8047 TCGv_vec m, TCGv_vec a) 8048 { 8049 tcg_gen_umax_vec(vece, d, a, n); 8050 tcg_gen_umin_vec(vece, d, d, m); 8051 } 8052 8053 static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 8054 uint32_t a, uint32_t oprsz, uint32_t maxsz) 8055 { 8056 static const TCGOpcode vecop[] = { 8057 INDEX_op_umin_vec, INDEX_op_umax_vec, 0 8058 }; 8059 static const GVecGen4 ops[4] = { 8060 { .fniv = gen_uclamp_vec, 8061 .fno = gen_helper_gvec_uclamp_b, 8062 .opt_opc = vecop, 8063 .vece = MO_8 }, 8064 { .fniv = gen_uclamp_vec, 8065 .fno = gen_helper_gvec_uclamp_h, 8066 .opt_opc = vecop, 8067 .vece = MO_16 }, 8068 { .fni4 = gen_uclamp_i32, 8069 .fniv = gen_uclamp_vec, 8070 .fno = gen_helper_gvec_uclamp_s, 8071 .opt_opc = vecop, 8072 .vece = MO_32 }, 8073 { .fni8 = gen_uclamp_i64, 8074 .fniv = gen_uclamp_vec, 8075 .fno = gen_helper_gvec_uclamp_d, 8076 .opt_opc = vecop, 8077 .vece = MO_64, 8078 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 8079 }; 8080 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 8081 } 8082 8083 TRANS_FEAT(UCLAMP, aa64_sme_or_sve2p1, gen_gvec_fn_arg_zzzz, gen_uclamp, a) 8084 8085 static bool trans_FCLAMP(DisasContext *s, arg_FCLAMP *a) 8086 { 8087 static gen_helper_gvec_3_ptr * const fn[] = { 8088 gen_helper_sme2_bfclamp, 8089 gen_helper_sme2_fclamp_h, 8090 gen_helper_sme2_fclamp_s, 8091 gen_helper_sme2_fclamp_d, 8092 }; 8093 8094 /* This insn uses MO_8 to encode BFloat16. */ 8095 if (a->esz == MO_8 8096 ? !dc_isar_feature(aa64_sve_b16b16, s) 8097 : !dc_isar_feature(aa64_sme2_or_sve2p1, s)) { 8098 return false; 8099 } 8100 8101 /* So far we never optimize rda with MOVPRFX */ 8102 assert(a->rd == a->ra); 8103 return gen_gvec_fpst_zzz(s, fn[a->esz], a->rd, a->rn, a->rm, 1, 8104 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 8105 } 8106 8107 TRANS_FEAT(SQCVTN_sh, aa64_sme2_or_sve2p1, gen_gvec_ool_zz, 8108 gen_helper_sme2_sqcvtn_sh, a->rd, a->rn, 0) 8109 TRANS_FEAT(UQCVTN_sh, aa64_sme2_or_sve2p1, gen_gvec_ool_zz, 8110 gen_helper_sme2_uqcvtn_sh, a->rd, a->rn, 0) 8111 TRANS_FEAT(SQCVTUN_sh, aa64_sme2_or_sve2p1, gen_gvec_ool_zz, 8112 gen_helper_sme2_sqcvtun_sh, a->rd, a->rn, 0) 8113 8114 static bool gen_ldst_c(DisasContext *s, TCGv_i64 addr, int zd, int png, 8115 MemOp esz, bool is_write, int n, bool strided) 8116 { 8117 typedef void ldst_c_fn(TCGv_env, TCGv_ptr, TCGv_i64, 8118 TCGv_i32, TCGv_i64); 8119 static ldst_c_fn * const f_ldst[2][2][4] = { 8120 { { gen_helper_sve2p1_ld1bb_c, 8121 gen_helper_sve2p1_ld1hh_le_c, 8122 gen_helper_sve2p1_ld1ss_le_c, 8123 gen_helper_sve2p1_ld1dd_le_c, }, 8124 { gen_helper_sve2p1_ld1bb_c, 8125 gen_helper_sve2p1_ld1hh_be_c, 8126 gen_helper_sve2p1_ld1ss_be_c, 8127 gen_helper_sve2p1_ld1dd_be_c, } }, 8128 8129 { { gen_helper_sve2p1_st1bb_c, 8130 gen_helper_sve2p1_st1hh_le_c, 8131 gen_helper_sve2p1_st1ss_le_c, 8132 gen_helper_sve2p1_st1dd_le_c, }, 8133 { gen_helper_sve2p1_st1bb_c, 8134 gen_helper_sve2p1_st1hh_be_c, 8135 gen_helper_sve2p1_st1ss_be_c, 8136 gen_helper_sve2p1_st1dd_be_c, } } 8137 }; 8138 8139 TCGv_i32 t_png; 8140 TCGv_i64 t_desc; 8141 TCGv_ptr t_zd; 8142 uint64_t desc, lg2_rstride = 0; 8143 bool be = s->be_data == MO_BE; 8144 8145 assert(n == 2 || n == 4); 8146 if (strided) { 8147 lg2_rstride = 3; 8148 if (n == 4) { 8149 /* Validate ZD alignment. */ 8150 if (zd & 4) { 8151 return false; 8152 } 8153 lg2_rstride = 2; 8154 } 8155 /* Ignore non-temporal bit */ 8156 zd &= ~8; 8157 } 8158 8159 if (strided || !dc_isar_feature(aa64_sve2p1, s) 8160 ? !sme_sm_enabled_check(s) 8161 : !sve_access_check(s)) { 8162 return true; 8163 } 8164 8165 if (!s->mte_active[0]) { 8166 addr = clean_data_tbi(s, addr); 8167 } 8168 8169 desc = n == 2 ? 0 : 1; 8170 desc = desc | (lg2_rstride << 1); 8171 desc = make_svemte_desc(s, vec_full_reg_size(s), 1, esz, is_write, desc); 8172 t_desc = tcg_constant_i64(desc); 8173 8174 t_png = tcg_temp_new_i32(); 8175 tcg_gen_ld16u_i32(t_png, tcg_env, 8176 pred_full_reg_offset(s, png) ^ 8177 (HOST_BIG_ENDIAN ? 6 : 0)); 8178 8179 t_zd = tcg_temp_new_ptr(); 8180 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, zd)); 8181 8182 f_ldst[is_write][be][esz](tcg_env, t_zd, addr, t_png, t_desc); 8183 return true; 8184 } 8185 8186 static bool gen_ldst_zcrr_c(DisasContext *s, arg_zcrr_ldst *a, 8187 bool is_write, bool strided) 8188 { 8189 TCGv_i64 addr = tcg_temp_new_i64(); 8190 8191 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz); 8192 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 8193 return gen_ldst_c(s, addr, a->rd, a->png, a->esz, is_write, 8194 a->nreg, strided); 8195 } 8196 8197 static bool gen_ldst_zcri_c(DisasContext *s, arg_zcri_ldst *a, 8198 bool is_write, bool strided) 8199 { 8200 TCGv_i64 addr = tcg_temp_new_i64(); 8201 8202 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 8203 a->imm * a->nreg * vec_full_reg_size(s)); 8204 return gen_ldst_c(s, addr, a->rd, a->png, a->esz, is_write, 8205 a->nreg, strided); 8206 } 8207 8208 TRANS_FEAT(LD1_zcrr, aa64_sme2_or_sve2p1, gen_ldst_zcrr_c, a, false, false) 8209 TRANS_FEAT(LD1_zcri, aa64_sme2_or_sve2p1, gen_ldst_zcri_c, a, false, false) 8210 TRANS_FEAT(ST1_zcrr, aa64_sme2_or_sve2p1, gen_ldst_zcrr_c, a, true, false) 8211 TRANS_FEAT(ST1_zcri, aa64_sme2_or_sve2p1, gen_ldst_zcri_c, a, true, false) 8212 8213 TRANS_FEAT(LD1_zcrr_stride, aa64_sme2, gen_ldst_zcrr_c, a, false, true) 8214 TRANS_FEAT(LD1_zcri_stride, aa64_sme2, gen_ldst_zcri_c, a, false, true) 8215 TRANS_FEAT(ST1_zcrr_stride, aa64_sme2, gen_ldst_zcrr_c, a, true, true) 8216 TRANS_FEAT(ST1_zcri_stride, aa64_sme2, gen_ldst_zcri_c, a, true, true) 8217