1 /* 2 * AArch64 SVE translation 3 * 4 * Copyright (c) 2018 Linaro, Ltd 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "translate.h" 22 #include "translate-a64.h" 23 #include "fpu/softfloat.h" 24 25 26 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, 27 TCGv_i64, uint32_t, uint32_t); 28 29 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr, 30 TCGv_ptr, TCGv_i32); 31 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr, 32 TCGv_ptr, TCGv_ptr, TCGv_i32); 33 34 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32); 35 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr, 36 TCGv_ptr, TCGv_i64, TCGv_i32); 37 38 /* 39 * Helpers for extracting complex instruction fields. 40 */ 41 42 /* See e.g. ASR (immediate, predicated). 43 * Returns -1 for unallocated encoding; diagnose later. 44 */ 45 static int tszimm_esz(DisasContext *s, int x) 46 { 47 x >>= 3; /* discard imm3 */ 48 return 31 - clz32(x); 49 } 50 51 static int tszimm_shr(DisasContext *s, int x) 52 { 53 return (16 << tszimm_esz(s, x)) - x; 54 } 55 56 /* See e.g. LSL (immediate, predicated). */ 57 static int tszimm_shl(DisasContext *s, int x) 58 { 59 return x - (8 << tszimm_esz(s, x)); 60 } 61 62 /* The SH bit is in bit 8. Extract the low 8 and shift. */ 63 static inline int expand_imm_sh8s(DisasContext *s, int x) 64 { 65 return (int8_t)x << (x & 0x100 ? 8 : 0); 66 } 67 68 static inline int expand_imm_sh8u(DisasContext *s, int x) 69 { 70 return (uint8_t)x << (x & 0x100 ? 8 : 0); 71 } 72 73 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype) 74 * with unsigned data. C.f. SVE Memory Contiguous Load Group. 75 */ 76 static inline int msz_dtype(DisasContext *s, int msz) 77 { 78 static const uint8_t dtype[4] = { 0, 5, 10, 15 }; 79 return dtype[msz]; 80 } 81 82 /* 83 * Include the generated decoder. 84 */ 85 86 #include "decode-sve.c.inc" 87 88 /* 89 * Implement all of the translator functions referenced by the decoder. 90 */ 91 92 /* Invoke an out-of-line helper on 2 Zregs. */ 93 static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn, 94 int rd, int rn, int data) 95 { 96 if (fn == NULL) { 97 return false; 98 } 99 if (sve_access_check(s)) { 100 unsigned vsz = vec_full_reg_size(s); 101 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 102 vec_full_reg_offset(s, rn), 103 vsz, vsz, data, fn); 104 } 105 return true; 106 } 107 108 static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 109 int rd, int rn, int data, 110 ARMFPStatusFlavour flavour) 111 { 112 if (fn == NULL) { 113 return false; 114 } 115 if (sve_access_check(s)) { 116 unsigned vsz = vec_full_reg_size(s); 117 TCGv_ptr status = fpstatus_ptr(flavour); 118 119 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 120 vec_full_reg_offset(s, rn), 121 status, vsz, vsz, data, fn); 122 } 123 return true; 124 } 125 126 static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 127 arg_rr_esz *a, int data) 128 { 129 return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data, 130 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 131 } 132 133 /* Invoke an out-of-line helper on 3 Zregs. */ 134 static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 135 int rd, int rn, int rm, int data) 136 { 137 if (fn == NULL) { 138 return false; 139 } 140 if (sve_access_check(s)) { 141 unsigned vsz = vec_full_reg_size(s); 142 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 143 vec_full_reg_offset(s, rn), 144 vec_full_reg_offset(s, rm), 145 vsz, vsz, data, fn); 146 } 147 return true; 148 } 149 150 static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 151 arg_rrr_esz *a, int data) 152 { 153 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data); 154 } 155 156 /* Invoke an out-of-line helper on 3 Zregs, plus float_status. */ 157 static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 158 int rd, int rn, int rm, 159 int data, ARMFPStatusFlavour flavour) 160 { 161 if (fn == NULL) { 162 return false; 163 } 164 if (sve_access_check(s)) { 165 unsigned vsz = vec_full_reg_size(s); 166 TCGv_ptr status = fpstatus_ptr(flavour); 167 168 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 169 vec_full_reg_offset(s, rn), 170 vec_full_reg_offset(s, rm), 171 status, vsz, vsz, data, fn); 172 } 173 return true; 174 } 175 176 static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 177 arg_rrr_esz *a, int data) 178 { 179 return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, 180 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 181 } 182 183 /* Invoke an out-of-line helper on 4 Zregs. */ 184 static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 185 int rd, int rn, int rm, int ra, int data) 186 { 187 if (fn == NULL) { 188 return false; 189 } 190 if (sve_access_check(s)) { 191 unsigned vsz = vec_full_reg_size(s); 192 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 193 vec_full_reg_offset(s, rn), 194 vec_full_reg_offset(s, rm), 195 vec_full_reg_offset(s, ra), 196 vsz, vsz, data, fn); 197 } 198 return true; 199 } 200 201 static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 202 arg_rrrr_esz *a, int data) 203 { 204 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data); 205 } 206 207 static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn, 208 arg_rrxr_esz *a) 209 { 210 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index); 211 } 212 213 /* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */ 214 static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 215 int rd, int rn, int rm, int ra, 216 int data, TCGv_ptr ptr) 217 { 218 if (fn == NULL) { 219 return false; 220 } 221 if (sve_access_check(s)) { 222 unsigned vsz = vec_full_reg_size(s); 223 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 224 vec_full_reg_offset(s, rn), 225 vec_full_reg_offset(s, rm), 226 vec_full_reg_offset(s, ra), 227 ptr, vsz, vsz, data, fn); 228 } 229 return true; 230 } 231 232 static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 233 int rd, int rn, int rm, int ra, 234 int data, ARMFPStatusFlavour flavour) 235 { 236 TCGv_ptr status = fpstatus_ptr(flavour); 237 bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status); 238 return ret; 239 } 240 241 /* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */ 242 static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn, 243 int rd, int rn, int rm, int ra, int pg, 244 int data, ARMFPStatusFlavour flavour) 245 { 246 if (fn == NULL) { 247 return false; 248 } 249 if (sve_access_check(s)) { 250 unsigned vsz = vec_full_reg_size(s); 251 TCGv_ptr status = fpstatus_ptr(flavour); 252 253 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd), 254 vec_full_reg_offset(s, rn), 255 vec_full_reg_offset(s, rm), 256 vec_full_reg_offset(s, ra), 257 pred_full_reg_offset(s, pg), 258 status, vsz, vsz, data, fn); 259 } 260 return true; 261 } 262 263 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */ 264 static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn, 265 int rd, int rn, int pg, int data) 266 { 267 if (fn == NULL) { 268 return false; 269 } 270 if (sve_access_check(s)) { 271 unsigned vsz = vec_full_reg_size(s); 272 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 273 vec_full_reg_offset(s, rn), 274 pred_full_reg_offset(s, pg), 275 vsz, vsz, data, fn); 276 } 277 return true; 278 } 279 280 static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn, 281 arg_rpr_esz *a, int data) 282 { 283 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data); 284 } 285 286 static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn, 287 arg_rpri_esz *a) 288 { 289 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm); 290 } 291 292 static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn, 293 int rd, int rn, int pg, int data, 294 ARMFPStatusFlavour flavour) 295 { 296 if (fn == NULL) { 297 return false; 298 } 299 if (sve_access_check(s)) { 300 unsigned vsz = vec_full_reg_size(s); 301 TCGv_ptr status = fpstatus_ptr(flavour); 302 303 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 304 vec_full_reg_offset(s, rn), 305 pred_full_reg_offset(s, pg), 306 status, vsz, vsz, data, fn); 307 } 308 return true; 309 } 310 311 static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 312 arg_rpr_esz *a, int data, 313 ARMFPStatusFlavour flavour) 314 { 315 return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour); 316 } 317 318 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 319 static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn, 320 int rd, int rn, int rm, int pg, int data) 321 { 322 if (fn == NULL) { 323 return false; 324 } 325 if (sve_access_check(s)) { 326 unsigned vsz = vec_full_reg_size(s); 327 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 328 vec_full_reg_offset(s, rn), 329 vec_full_reg_offset(s, rm), 330 pred_full_reg_offset(s, pg), 331 vsz, vsz, data, fn); 332 } 333 return true; 334 } 335 336 static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn, 337 arg_rprr_esz *a, int data) 338 { 339 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data); 340 } 341 342 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 343 static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn, 344 int rd, int rn, int rm, int pg, int data, 345 ARMFPStatusFlavour flavour) 346 { 347 if (fn == NULL) { 348 return false; 349 } 350 if (sve_access_check(s)) { 351 unsigned vsz = vec_full_reg_size(s); 352 TCGv_ptr status = fpstatus_ptr(flavour); 353 354 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 355 vec_full_reg_offset(s, rn), 356 vec_full_reg_offset(s, rm), 357 pred_full_reg_offset(s, pg), 358 status, vsz, vsz, data, fn); 359 } 360 return true; 361 } 362 363 static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 364 arg_rprr_esz *a) 365 { 366 return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0, 367 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 368 } 369 370 /* Invoke a vector expander on two Zregs and an immediate. */ 371 static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 372 int esz, int rd, int rn, uint64_t imm) 373 { 374 if (gvec_fn == NULL) { 375 return false; 376 } 377 if (sve_access_check(s)) { 378 unsigned vsz = vec_full_reg_size(s); 379 gvec_fn(esz, vec_full_reg_offset(s, rd), 380 vec_full_reg_offset(s, rn), imm, vsz, vsz); 381 } 382 return true; 383 } 384 385 static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 386 arg_rri_esz *a) 387 { 388 if (a->esz < 0) { 389 /* Invalid tsz encoding -- see tszimm_esz. */ 390 return false; 391 } 392 return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm); 393 } 394 395 /* Invoke a vector expander on three Zregs. */ 396 static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn, 397 int esz, int rd, int rn, int rm) 398 { 399 if (gvec_fn == NULL) { 400 return false; 401 } 402 if (sve_access_check(s)) { 403 unsigned vsz = vec_full_reg_size(s); 404 gvec_fn(esz, vec_full_reg_offset(s, rd), 405 vec_full_reg_offset(s, rn), 406 vec_full_reg_offset(s, rm), vsz, vsz); 407 } 408 return true; 409 } 410 411 static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn, 412 arg_rrr_esz *a) 413 { 414 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm); 415 } 416 417 /* Invoke a vector expander on four Zregs. */ 418 static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn, 419 arg_rrrr_esz *a) 420 { 421 if (gvec_fn == NULL) { 422 return false; 423 } 424 if (sve_access_check(s)) { 425 unsigned vsz = vec_full_reg_size(s); 426 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 427 vec_full_reg_offset(s, a->rn), 428 vec_full_reg_offset(s, a->rm), 429 vec_full_reg_offset(s, a->ra), vsz, vsz); 430 } 431 return true; 432 } 433 434 /* Invoke a vector move on two Zregs. */ 435 static bool do_mov_z(DisasContext *s, int rd, int rn) 436 { 437 if (sve_access_check(s)) { 438 unsigned vsz = vec_full_reg_size(s); 439 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd), 440 vec_full_reg_offset(s, rn), vsz, vsz); 441 } 442 return true; 443 } 444 445 /* Initialize a Zreg with replications of a 64-bit immediate. */ 446 static void do_dupi_z(DisasContext *s, int rd, uint64_t word) 447 { 448 unsigned vsz = vec_full_reg_size(s); 449 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word); 450 } 451 452 /* Invoke a vector expander on three Pregs. */ 453 static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn, 454 int rd, int rn, int rm) 455 { 456 if (sve_access_check(s)) { 457 unsigned psz = pred_gvec_reg_size(s); 458 gvec_fn(MO_64, pred_full_reg_offset(s, rd), 459 pred_full_reg_offset(s, rn), 460 pred_full_reg_offset(s, rm), psz, psz); 461 } 462 return true; 463 } 464 465 /* Invoke a vector move on two Pregs. */ 466 static bool do_mov_p(DisasContext *s, int rd, int rn) 467 { 468 if (sve_access_check(s)) { 469 unsigned psz = pred_gvec_reg_size(s); 470 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd), 471 pred_full_reg_offset(s, rn), psz, psz); 472 } 473 return true; 474 } 475 476 /* Set the cpu flags as per a return from an SVE helper. */ 477 static void do_pred_flags(TCGv_i32 t) 478 { 479 tcg_gen_mov_i32(cpu_NF, t); 480 tcg_gen_andi_i32(cpu_ZF, t, 2); 481 tcg_gen_andi_i32(cpu_CF, t, 1); 482 tcg_gen_movi_i32(cpu_VF, 0); 483 } 484 485 /* Subroutines computing the ARM PredTest psuedofunction. */ 486 static void do_predtest1(TCGv_i64 d, TCGv_i64 g) 487 { 488 TCGv_i32 t = tcg_temp_new_i32(); 489 490 gen_helper_sve_predtest1(t, d, g); 491 do_pred_flags(t); 492 } 493 494 static void do_predtest(DisasContext *s, int dofs, int gofs, int words) 495 { 496 TCGv_ptr dptr = tcg_temp_new_ptr(); 497 TCGv_ptr gptr = tcg_temp_new_ptr(); 498 TCGv_i32 t = tcg_temp_new_i32(); 499 500 tcg_gen_addi_ptr(dptr, tcg_env, dofs); 501 tcg_gen_addi_ptr(gptr, tcg_env, gofs); 502 503 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words)); 504 505 do_pred_flags(t); 506 } 507 508 /* For each element size, the bits within a predicate word that are active. */ 509 const uint64_t pred_esz_masks[5] = { 510 0xffffffffffffffffull, 0x5555555555555555ull, 511 0x1111111111111111ull, 0x0101010101010101ull, 512 0x0001000100010001ull, 513 }; 514 515 static bool trans_INVALID(DisasContext *s, arg_INVALID *a) 516 { 517 unallocated_encoding(s); 518 return true; 519 } 520 521 /* 522 *** SVE Logical - Unpredicated Group 523 */ 524 525 TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a) 526 TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a) 527 TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a) 528 TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a) 529 530 static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 531 { 532 TCGv_i64 t = tcg_temp_new_i64(); 533 uint64_t mask = dup_const(MO_8, 0xff >> sh); 534 535 tcg_gen_xor_i64(t, n, m); 536 tcg_gen_shri_i64(d, t, sh); 537 tcg_gen_shli_i64(t, t, 8 - sh); 538 tcg_gen_andi_i64(d, d, mask); 539 tcg_gen_andi_i64(t, t, ~mask); 540 tcg_gen_or_i64(d, d, t); 541 } 542 543 static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 544 { 545 TCGv_i64 t = tcg_temp_new_i64(); 546 uint64_t mask = dup_const(MO_16, 0xffff >> sh); 547 548 tcg_gen_xor_i64(t, n, m); 549 tcg_gen_shri_i64(d, t, sh); 550 tcg_gen_shli_i64(t, t, 16 - sh); 551 tcg_gen_andi_i64(d, d, mask); 552 tcg_gen_andi_i64(t, t, ~mask); 553 tcg_gen_or_i64(d, d, t); 554 } 555 556 static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh) 557 { 558 tcg_gen_xor_i32(d, n, m); 559 tcg_gen_rotri_i32(d, d, sh); 560 } 561 562 static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 563 { 564 tcg_gen_xor_i64(d, n, m); 565 tcg_gen_rotri_i64(d, d, sh); 566 } 567 568 static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 569 TCGv_vec m, int64_t sh) 570 { 571 tcg_gen_xor_vec(vece, d, n, m); 572 tcg_gen_rotri_vec(vece, d, d, sh); 573 } 574 575 void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 576 uint32_t rm_ofs, int64_t shift, 577 uint32_t opr_sz, uint32_t max_sz) 578 { 579 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 }; 580 static const GVecGen3i ops[4] = { 581 { .fni8 = gen_xar8_i64, 582 .fniv = gen_xar_vec, 583 .fno = gen_helper_sve2_xar_b, 584 .opt_opc = vecop, 585 .vece = MO_8 }, 586 { .fni8 = gen_xar16_i64, 587 .fniv = gen_xar_vec, 588 .fno = gen_helper_sve2_xar_h, 589 .opt_opc = vecop, 590 .vece = MO_16 }, 591 { .fni4 = gen_xar_i32, 592 .fniv = gen_xar_vec, 593 .fno = gen_helper_sve2_xar_s, 594 .opt_opc = vecop, 595 .vece = MO_32 }, 596 { .fni8 = gen_xar_i64, 597 .fniv = gen_xar_vec, 598 .fno = gen_helper_gvec_xar_d, 599 .opt_opc = vecop, 600 .vece = MO_64 } 601 }; 602 int esize = 8 << vece; 603 604 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */ 605 tcg_debug_assert(shift >= 0); 606 tcg_debug_assert(shift <= esize); 607 shift &= esize - 1; 608 609 if (shift == 0) { 610 /* xar with no rotate devolves to xor. */ 611 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz); 612 } else { 613 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 614 shift, &ops[vece]); 615 } 616 } 617 618 static bool trans_XAR(DisasContext *s, arg_rrri_esz *a) 619 { 620 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { 621 return false; 622 } 623 if (sve_access_check(s)) { 624 unsigned vsz = vec_full_reg_size(s); 625 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd), 626 vec_full_reg_offset(s, a->rn), 627 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz); 628 } 629 return true; 630 } 631 632 static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 633 { 634 tcg_gen_xor_i64(d, n, m); 635 tcg_gen_xor_i64(d, d, k); 636 } 637 638 static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 639 TCGv_vec m, TCGv_vec k) 640 { 641 tcg_gen_xor_vec(vece, d, n, m); 642 tcg_gen_xor_vec(vece, d, d, k); 643 } 644 645 static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 646 uint32_t a, uint32_t oprsz, uint32_t maxsz) 647 { 648 static const GVecGen4 op = { 649 .fni8 = gen_eor3_i64, 650 .fniv = gen_eor3_vec, 651 .fno = gen_helper_sve2_eor3, 652 .vece = MO_64, 653 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 654 }; 655 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 656 } 657 658 TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a) 659 660 static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 661 { 662 tcg_gen_andc_i64(d, m, k); 663 tcg_gen_xor_i64(d, d, n); 664 } 665 666 static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 667 TCGv_vec m, TCGv_vec k) 668 { 669 tcg_gen_andc_vec(vece, d, m, k); 670 tcg_gen_xor_vec(vece, d, d, n); 671 } 672 673 static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 674 uint32_t a, uint32_t oprsz, uint32_t maxsz) 675 { 676 static const GVecGen4 op = { 677 .fni8 = gen_bcax_i64, 678 .fniv = gen_bcax_vec, 679 .fno = gen_helper_sve2_bcax, 680 .vece = MO_64, 681 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 682 }; 683 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 684 } 685 686 TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a) 687 688 static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 689 uint32_t a, uint32_t oprsz, uint32_t maxsz) 690 { 691 /* BSL differs from the generic bitsel in argument ordering. */ 692 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz); 693 } 694 695 TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a) 696 697 static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 698 { 699 tcg_gen_andc_i64(n, k, n); 700 tcg_gen_andc_i64(m, m, k); 701 tcg_gen_or_i64(d, n, m); 702 } 703 704 static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 705 TCGv_vec m, TCGv_vec k) 706 { 707 if (TCG_TARGET_HAS_bitsel_vec) { 708 tcg_gen_not_vec(vece, n, n); 709 tcg_gen_bitsel_vec(vece, d, k, n, m); 710 } else { 711 tcg_gen_andc_vec(vece, n, k, n); 712 tcg_gen_andc_vec(vece, m, m, k); 713 tcg_gen_or_vec(vece, d, n, m); 714 } 715 } 716 717 static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 718 uint32_t a, uint32_t oprsz, uint32_t maxsz) 719 { 720 static const GVecGen4 op = { 721 .fni8 = gen_bsl1n_i64, 722 .fniv = gen_bsl1n_vec, 723 .fno = gen_helper_sve2_bsl1n, 724 .vece = MO_64, 725 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 726 }; 727 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 728 } 729 730 TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a) 731 732 static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 733 { 734 /* 735 * Z[dn] = (n & k) | (~m & ~k) 736 * = | ~(m | k) 737 */ 738 tcg_gen_and_i64(n, n, k); 739 if (TCG_TARGET_HAS_orc_i64) { 740 tcg_gen_or_i64(m, m, k); 741 tcg_gen_orc_i64(d, n, m); 742 } else { 743 tcg_gen_nor_i64(m, m, k); 744 tcg_gen_or_i64(d, n, m); 745 } 746 } 747 748 static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 749 TCGv_vec m, TCGv_vec k) 750 { 751 if (TCG_TARGET_HAS_bitsel_vec) { 752 tcg_gen_not_vec(vece, m, m); 753 tcg_gen_bitsel_vec(vece, d, k, n, m); 754 } else { 755 tcg_gen_and_vec(vece, n, n, k); 756 tcg_gen_or_vec(vece, m, m, k); 757 tcg_gen_orc_vec(vece, d, n, m); 758 } 759 } 760 761 static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 762 uint32_t a, uint32_t oprsz, uint32_t maxsz) 763 { 764 static const GVecGen4 op = { 765 .fni8 = gen_bsl2n_i64, 766 .fniv = gen_bsl2n_vec, 767 .fno = gen_helper_sve2_bsl2n, 768 .vece = MO_64, 769 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 770 }; 771 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 772 } 773 774 TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a) 775 776 static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 777 { 778 tcg_gen_and_i64(n, n, k); 779 tcg_gen_andc_i64(m, m, k); 780 tcg_gen_nor_i64(d, n, m); 781 } 782 783 static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 784 TCGv_vec m, TCGv_vec k) 785 { 786 tcg_gen_bitsel_vec(vece, d, k, n, m); 787 tcg_gen_not_vec(vece, d, d); 788 } 789 790 static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 791 uint32_t a, uint32_t oprsz, uint32_t maxsz) 792 { 793 static const GVecGen4 op = { 794 .fni8 = gen_nbsl_i64, 795 .fniv = gen_nbsl_vec, 796 .fno = gen_helper_sve2_nbsl, 797 .vece = MO_64, 798 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 799 }; 800 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 801 } 802 803 TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a) 804 805 /* 806 *** SVE Integer Arithmetic - Unpredicated Group 807 */ 808 809 TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a) 810 TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a) 811 TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a) 812 TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a) 813 TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a) 814 TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a) 815 816 /* 817 *** SVE Integer Arithmetic - Binary Predicated Group 818 */ 819 820 /* Select active elememnts from Zn and inactive elements from Zm, 821 * storing the result in Zd. 822 */ 823 static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz) 824 { 825 static gen_helper_gvec_4 * const fns[4] = { 826 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h, 827 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d 828 }; 829 return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0); 830 } 831 832 #define DO_ZPZZ(NAME, FEAT, name) \ 833 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \ 834 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \ 835 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \ 836 }; \ 837 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \ 838 name##_zpzz_fns[a->esz], a, 0) 839 840 DO_ZPZZ(AND_zpzz, aa64_sve, sve_and) 841 DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor) 842 DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr) 843 DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic) 844 845 DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add) 846 DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub) 847 848 DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax) 849 DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax) 850 DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin) 851 DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin) 852 DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd) 853 DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd) 854 855 DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul) 856 DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh) 857 DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh) 858 859 DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr) 860 DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr) 861 DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl) 862 863 static gen_helper_gvec_4 * const sdiv_fns[4] = { 864 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d 865 }; 866 TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0) 867 868 static gen_helper_gvec_4 * const udiv_fns[4] = { 869 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d 870 }; 871 TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0) 872 873 TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz) 874 875 /* 876 *** SVE Integer Arithmetic - Unary Predicated Group 877 */ 878 879 #define DO_ZPZ(NAME, FEAT, name) \ 880 static gen_helper_gvec_3 * const name##_fns[4] = { \ 881 gen_helper_##name##_b, gen_helper_##name##_h, \ 882 gen_helper_##name##_s, gen_helper_##name##_d, \ 883 }; \ 884 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0) 885 886 DO_ZPZ(CLS, aa64_sve, sve_cls) 887 DO_ZPZ(CLZ, aa64_sve, sve_clz) 888 DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz) 889 DO_ZPZ(CNOT, aa64_sve, sve_cnot) 890 DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz) 891 DO_ZPZ(ABS, aa64_sve, sve_abs) 892 DO_ZPZ(NEG, aa64_sve, sve_neg) 893 DO_ZPZ(RBIT, aa64_sve, sve_rbit) 894 895 static gen_helper_gvec_3 * const fabs_fns[4] = { 896 NULL, gen_helper_sve_fabs_h, 897 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d, 898 }; 899 TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0) 900 901 static gen_helper_gvec_3 * const fneg_fns[4] = { 902 NULL, gen_helper_sve_fneg_h, 903 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d, 904 }; 905 TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0) 906 907 static gen_helper_gvec_3 * const sxtb_fns[4] = { 908 NULL, gen_helper_sve_sxtb_h, 909 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d, 910 }; 911 TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0) 912 913 static gen_helper_gvec_3 * const uxtb_fns[4] = { 914 NULL, gen_helper_sve_uxtb_h, 915 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d, 916 }; 917 TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0) 918 919 static gen_helper_gvec_3 * const sxth_fns[4] = { 920 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d 921 }; 922 TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0) 923 924 static gen_helper_gvec_3 * const uxth_fns[4] = { 925 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d 926 }; 927 TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0) 928 929 TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz, 930 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0) 931 TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz, 932 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0) 933 934 /* 935 *** SVE Integer Reduction Group 936 */ 937 938 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32); 939 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a, 940 gen_helper_gvec_reduc *fn) 941 { 942 unsigned vsz = vec_full_reg_size(s); 943 TCGv_ptr t_zn, t_pg; 944 TCGv_i32 desc; 945 TCGv_i64 temp; 946 947 if (fn == NULL) { 948 return false; 949 } 950 if (!sve_access_check(s)) { 951 return true; 952 } 953 954 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 955 temp = tcg_temp_new_i64(); 956 t_zn = tcg_temp_new_ptr(); 957 t_pg = tcg_temp_new_ptr(); 958 959 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); 960 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); 961 fn(temp, t_zn, t_pg, desc); 962 963 write_fp_dreg(s, a->rd, temp); 964 return true; 965 } 966 967 #define DO_VPZ(NAME, name) \ 968 static gen_helper_gvec_reduc * const name##_fns[4] = { \ 969 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \ 970 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 971 }; \ 972 TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz]) 973 974 DO_VPZ(ORV, orv) 975 DO_VPZ(ANDV, andv) 976 DO_VPZ(EORV, eorv) 977 978 DO_VPZ(UADDV, uaddv) 979 DO_VPZ(SMAXV, smaxv) 980 DO_VPZ(UMAXV, umaxv) 981 DO_VPZ(SMINV, sminv) 982 DO_VPZ(UMINV, uminv) 983 984 static gen_helper_gvec_reduc * const saddv_fns[4] = { 985 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h, 986 gen_helper_sve_saddv_s, NULL 987 }; 988 TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz]) 989 990 #undef DO_VPZ 991 992 /* 993 *** SVE Shift by Immediate - Predicated Group 994 */ 995 996 /* 997 * Copy Zn into Zd, storing zeros into inactive elements. 998 * If invert, store zeros into the active elements. 999 */ 1000 static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg, 1001 int esz, bool invert) 1002 { 1003 static gen_helper_gvec_3 * const fns[4] = { 1004 gen_helper_sve_movz_b, gen_helper_sve_movz_h, 1005 gen_helper_sve_movz_s, gen_helper_sve_movz_d, 1006 }; 1007 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert); 1008 } 1009 1010 static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr, 1011 gen_helper_gvec_3 * const fns[4]) 1012 { 1013 int max; 1014 1015 if (a->esz < 0) { 1016 /* Invalid tsz encoding -- see tszimm_esz. */ 1017 return false; 1018 } 1019 1020 /* 1021 * Shift by element size is architecturally valid. 1022 * For arithmetic right-shift, it's the same as by one less. 1023 * For logical shifts and ASRD, it is a zeroing operation. 1024 */ 1025 max = 8 << a->esz; 1026 if (a->imm >= max) { 1027 if (asr) { 1028 a->imm = max - 1; 1029 } else { 1030 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); 1031 } 1032 } 1033 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a); 1034 } 1035 1036 static gen_helper_gvec_3 * const asr_zpzi_fns[4] = { 1037 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h, 1038 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d, 1039 }; 1040 TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns) 1041 1042 static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = { 1043 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h, 1044 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d, 1045 }; 1046 TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns) 1047 1048 static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = { 1049 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h, 1050 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d, 1051 }; 1052 TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns) 1053 1054 static gen_helper_gvec_3 * const asrd_fns[4] = { 1055 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h, 1056 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d, 1057 }; 1058 TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns) 1059 1060 static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = { 1061 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h, 1062 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d, 1063 }; 1064 TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 1065 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a) 1066 1067 static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = { 1068 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h, 1069 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d, 1070 }; 1071 TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 1072 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a) 1073 1074 static gen_helper_gvec_3 * const srshr_fns[4] = { 1075 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h, 1076 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d, 1077 }; 1078 TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 1079 a->esz < 0 ? NULL : srshr_fns[a->esz], a) 1080 1081 static gen_helper_gvec_3 * const urshr_fns[4] = { 1082 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h, 1083 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d, 1084 }; 1085 TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 1086 a->esz < 0 ? NULL : urshr_fns[a->esz], a) 1087 1088 static gen_helper_gvec_3 * const sqshlu_fns[4] = { 1089 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h, 1090 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d, 1091 }; 1092 TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi, 1093 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a) 1094 1095 /* 1096 *** SVE Bitwise Shift - Predicated Group 1097 */ 1098 1099 #define DO_ZPZW(NAME, name) \ 1100 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \ 1101 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \ 1102 gen_helper_sve_##name##_zpzw_s, NULL \ 1103 }; \ 1104 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \ 1105 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0) 1106 1107 DO_ZPZW(ASR, asr) 1108 DO_ZPZW(LSR, lsr) 1109 DO_ZPZW(LSL, lsl) 1110 1111 #undef DO_ZPZW 1112 1113 /* 1114 *** SVE Bitwise Shift - Unpredicated Group 1115 */ 1116 1117 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr, 1118 void (*gvec_fn)(unsigned, uint32_t, uint32_t, 1119 int64_t, uint32_t, uint32_t)) 1120 { 1121 if (a->esz < 0) { 1122 /* Invalid tsz encoding -- see tszimm_esz. */ 1123 return false; 1124 } 1125 if (sve_access_check(s)) { 1126 unsigned vsz = vec_full_reg_size(s); 1127 /* Shift by element size is architecturally valid. For 1128 arithmetic right-shift, it's the same as by one less. 1129 Otherwise it is a zeroing operation. */ 1130 if (a->imm >= 8 << a->esz) { 1131 if (asr) { 1132 a->imm = (8 << a->esz) - 1; 1133 } else { 1134 do_dupi_z(s, a->rd, 0); 1135 return true; 1136 } 1137 } 1138 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 1139 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz); 1140 } 1141 return true; 1142 } 1143 1144 TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari) 1145 TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri) 1146 TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli) 1147 1148 #define DO_ZZW(NAME, name) \ 1149 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \ 1150 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \ 1151 gen_helper_sve_##name##_zzw_s, NULL \ 1152 }; \ 1153 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \ 1154 name##_zzw_fns[a->esz], a, 0) 1155 1156 DO_ZZW(ASR_zzw, asr) 1157 DO_ZZW(LSR_zzw, lsr) 1158 DO_ZZW(LSL_zzw, lsl) 1159 1160 #undef DO_ZZW 1161 1162 /* 1163 *** SVE Integer Multiply-Add Group 1164 */ 1165 1166 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a, 1167 gen_helper_gvec_5 *fn) 1168 { 1169 if (sve_access_check(s)) { 1170 unsigned vsz = vec_full_reg_size(s); 1171 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd), 1172 vec_full_reg_offset(s, a->ra), 1173 vec_full_reg_offset(s, a->rn), 1174 vec_full_reg_offset(s, a->rm), 1175 pred_full_reg_offset(s, a->pg), 1176 vsz, vsz, 0, fn); 1177 } 1178 return true; 1179 } 1180 1181 static gen_helper_gvec_5 * const mla_fns[4] = { 1182 gen_helper_sve_mla_b, gen_helper_sve_mla_h, 1183 gen_helper_sve_mla_s, gen_helper_sve_mla_d, 1184 }; 1185 TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz]) 1186 1187 static gen_helper_gvec_5 * const mls_fns[4] = { 1188 gen_helper_sve_mls_b, gen_helper_sve_mls_h, 1189 gen_helper_sve_mls_s, gen_helper_sve_mls_d, 1190 }; 1191 TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz]) 1192 1193 /* 1194 *** SVE Index Generation Group 1195 */ 1196 1197 static bool do_index(DisasContext *s, int esz, int rd, 1198 TCGv_i64 start, TCGv_i64 incr) 1199 { 1200 unsigned vsz; 1201 TCGv_i32 desc; 1202 TCGv_ptr t_zd; 1203 1204 if (!sve_access_check(s)) { 1205 return true; 1206 } 1207 1208 vsz = vec_full_reg_size(s); 1209 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1210 t_zd = tcg_temp_new_ptr(); 1211 1212 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd)); 1213 if (esz == 3) { 1214 gen_helper_sve_index_d(t_zd, start, incr, desc); 1215 } else { 1216 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 1217 static index_fn * const fns[3] = { 1218 gen_helper_sve_index_b, 1219 gen_helper_sve_index_h, 1220 gen_helper_sve_index_s, 1221 }; 1222 TCGv_i32 s32 = tcg_temp_new_i32(); 1223 TCGv_i32 i32 = tcg_temp_new_i32(); 1224 1225 tcg_gen_extrl_i64_i32(s32, start); 1226 tcg_gen_extrl_i64_i32(i32, incr); 1227 fns[esz](t_zd, s32, i32, desc); 1228 } 1229 return true; 1230 } 1231 1232 TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd, 1233 tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2)) 1234 TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd, 1235 tcg_constant_i64(a->imm), cpu_reg(s, a->rm)) 1236 TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd, 1237 cpu_reg(s, a->rn), tcg_constant_i64(a->imm)) 1238 TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd, 1239 cpu_reg(s, a->rn), cpu_reg(s, a->rm)) 1240 1241 /* 1242 *** SVE Stack Allocation Group 1243 */ 1244 1245 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a) 1246 { 1247 if (!dc_isar_feature(aa64_sve, s)) { 1248 return false; 1249 } 1250 if (sve_access_check(s)) { 1251 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1252 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1253 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s)); 1254 } 1255 return true; 1256 } 1257 1258 static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a) 1259 { 1260 if (!dc_isar_feature(aa64_sme, s)) { 1261 return false; 1262 } 1263 if (sme_enabled_check(s)) { 1264 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1265 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1266 tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s)); 1267 } 1268 return true; 1269 } 1270 1271 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a) 1272 { 1273 if (!dc_isar_feature(aa64_sve, s)) { 1274 return false; 1275 } 1276 if (sve_access_check(s)) { 1277 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1278 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1279 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s)); 1280 } 1281 return true; 1282 } 1283 1284 static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a) 1285 { 1286 if (!dc_isar_feature(aa64_sme, s)) { 1287 return false; 1288 } 1289 if (sme_enabled_check(s)) { 1290 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1291 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1292 tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s)); 1293 } 1294 return true; 1295 } 1296 1297 static bool trans_RDVL(DisasContext *s, arg_RDVL *a) 1298 { 1299 if (!dc_isar_feature(aa64_sve, s)) { 1300 return false; 1301 } 1302 if (sve_access_check(s)) { 1303 TCGv_i64 reg = cpu_reg(s, a->rd); 1304 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s)); 1305 } 1306 return true; 1307 } 1308 1309 static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a) 1310 { 1311 if (!dc_isar_feature(aa64_sme, s)) { 1312 return false; 1313 } 1314 if (sme_enabled_check(s)) { 1315 TCGv_i64 reg = cpu_reg(s, a->rd); 1316 tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s)); 1317 } 1318 return true; 1319 } 1320 1321 /* 1322 *** SVE Compute Vector Address Group 1323 */ 1324 1325 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn) 1326 { 1327 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm); 1328 } 1329 1330 TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32) 1331 TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64) 1332 TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32) 1333 TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32) 1334 1335 /* 1336 *** SVE Integer Misc - Unpredicated Group 1337 */ 1338 1339 static gen_helper_gvec_2 * const fexpa_fns[4] = { 1340 NULL, gen_helper_sve_fexpa_h, 1341 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d, 1342 }; 1343 TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz, 1344 fexpa_fns[a->esz], a->rd, a->rn, 0) 1345 1346 static gen_helper_gvec_3 * const ftssel_fns[4] = { 1347 NULL, gen_helper_sve_ftssel_h, 1348 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d, 1349 }; 1350 TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, 1351 ftssel_fns[a->esz], a, 0) 1352 1353 /* 1354 *** SVE Predicate Logical Operations Group 1355 */ 1356 1357 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a, 1358 const GVecGen4 *gvec_op) 1359 { 1360 if (!sve_access_check(s)) { 1361 return true; 1362 } 1363 1364 unsigned psz = pred_gvec_reg_size(s); 1365 int dofs = pred_full_reg_offset(s, a->rd); 1366 int nofs = pred_full_reg_offset(s, a->rn); 1367 int mofs = pred_full_reg_offset(s, a->rm); 1368 int gofs = pred_full_reg_offset(s, a->pg); 1369 1370 if (!a->s) { 1371 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1372 return true; 1373 } 1374 1375 if (psz == 8) { 1376 /* Do the operation and the flags generation in temps. */ 1377 TCGv_i64 pd = tcg_temp_new_i64(); 1378 TCGv_i64 pn = tcg_temp_new_i64(); 1379 TCGv_i64 pm = tcg_temp_new_i64(); 1380 TCGv_i64 pg = tcg_temp_new_i64(); 1381 1382 tcg_gen_ld_i64(pn, tcg_env, nofs); 1383 tcg_gen_ld_i64(pm, tcg_env, mofs); 1384 tcg_gen_ld_i64(pg, tcg_env, gofs); 1385 1386 gvec_op->fni8(pd, pn, pm, pg); 1387 tcg_gen_st_i64(pd, tcg_env, dofs); 1388 1389 do_predtest1(pd, pg); 1390 } else { 1391 /* The operation and flags generation is large. The computation 1392 * of the flags depends on the original contents of the guarding 1393 * predicate. If the destination overwrites the guarding predicate, 1394 * then the easiest way to get this right is to save a copy. 1395 */ 1396 int tofs = gofs; 1397 if (a->rd == a->pg) { 1398 tofs = offsetof(CPUARMState, vfp.preg_tmp); 1399 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz); 1400 } 1401 1402 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1403 do_predtest(s, dofs, tofs, psz / 8); 1404 } 1405 return true; 1406 } 1407 1408 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1409 { 1410 tcg_gen_and_i64(pd, pn, pm); 1411 tcg_gen_and_i64(pd, pd, pg); 1412 } 1413 1414 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1415 TCGv_vec pm, TCGv_vec pg) 1416 { 1417 tcg_gen_and_vec(vece, pd, pn, pm); 1418 tcg_gen_and_vec(vece, pd, pd, pg); 1419 } 1420 1421 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a) 1422 { 1423 static const GVecGen4 op = { 1424 .fni8 = gen_and_pg_i64, 1425 .fniv = gen_and_pg_vec, 1426 .fno = gen_helper_sve_and_pppp, 1427 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1428 }; 1429 1430 if (!dc_isar_feature(aa64_sve, s)) { 1431 return false; 1432 } 1433 if (!a->s) { 1434 if (a->rn == a->rm) { 1435 if (a->pg == a->rn) { 1436 return do_mov_p(s, a->rd, a->rn); 1437 } 1438 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg); 1439 } else if (a->pg == a->rn || a->pg == a->rm) { 1440 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm); 1441 } 1442 } 1443 return do_pppp_flags(s, a, &op); 1444 } 1445 1446 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1447 { 1448 tcg_gen_andc_i64(pd, pn, pm); 1449 tcg_gen_and_i64(pd, pd, pg); 1450 } 1451 1452 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1453 TCGv_vec pm, TCGv_vec pg) 1454 { 1455 tcg_gen_andc_vec(vece, pd, pn, pm); 1456 tcg_gen_and_vec(vece, pd, pd, pg); 1457 } 1458 1459 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a) 1460 { 1461 static const GVecGen4 op = { 1462 .fni8 = gen_bic_pg_i64, 1463 .fniv = gen_bic_pg_vec, 1464 .fno = gen_helper_sve_bic_pppp, 1465 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1466 }; 1467 1468 if (!dc_isar_feature(aa64_sve, s)) { 1469 return false; 1470 } 1471 if (!a->s && a->pg == a->rn) { 1472 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm); 1473 } 1474 return do_pppp_flags(s, a, &op); 1475 } 1476 1477 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1478 { 1479 tcg_gen_xor_i64(pd, pn, pm); 1480 tcg_gen_and_i64(pd, pd, pg); 1481 } 1482 1483 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1484 TCGv_vec pm, TCGv_vec pg) 1485 { 1486 tcg_gen_xor_vec(vece, pd, pn, pm); 1487 tcg_gen_and_vec(vece, pd, pd, pg); 1488 } 1489 1490 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a) 1491 { 1492 static const GVecGen4 op = { 1493 .fni8 = gen_eor_pg_i64, 1494 .fniv = gen_eor_pg_vec, 1495 .fno = gen_helper_sve_eor_pppp, 1496 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1497 }; 1498 1499 if (!dc_isar_feature(aa64_sve, s)) { 1500 return false; 1501 } 1502 /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */ 1503 if (!a->s && a->pg == a->rm) { 1504 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn); 1505 } 1506 return do_pppp_flags(s, a, &op); 1507 } 1508 1509 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a) 1510 { 1511 if (a->s || !dc_isar_feature(aa64_sve, s)) { 1512 return false; 1513 } 1514 if (sve_access_check(s)) { 1515 unsigned psz = pred_gvec_reg_size(s); 1516 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd), 1517 pred_full_reg_offset(s, a->pg), 1518 pred_full_reg_offset(s, a->rn), 1519 pred_full_reg_offset(s, a->rm), psz, psz); 1520 } 1521 return true; 1522 } 1523 1524 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1525 { 1526 tcg_gen_or_i64(pd, pn, pm); 1527 tcg_gen_and_i64(pd, pd, pg); 1528 } 1529 1530 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1531 TCGv_vec pm, TCGv_vec pg) 1532 { 1533 tcg_gen_or_vec(vece, pd, pn, pm); 1534 tcg_gen_and_vec(vece, pd, pd, pg); 1535 } 1536 1537 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a) 1538 { 1539 static const GVecGen4 op = { 1540 .fni8 = gen_orr_pg_i64, 1541 .fniv = gen_orr_pg_vec, 1542 .fno = gen_helper_sve_orr_pppp, 1543 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1544 }; 1545 1546 if (!dc_isar_feature(aa64_sve, s)) { 1547 return false; 1548 } 1549 if (!a->s && a->pg == a->rn && a->rn == a->rm) { 1550 return do_mov_p(s, a->rd, a->rn); 1551 } 1552 return do_pppp_flags(s, a, &op); 1553 } 1554 1555 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1556 { 1557 tcg_gen_orc_i64(pd, pn, pm); 1558 tcg_gen_and_i64(pd, pd, pg); 1559 } 1560 1561 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1562 TCGv_vec pm, TCGv_vec pg) 1563 { 1564 tcg_gen_orc_vec(vece, pd, pn, pm); 1565 tcg_gen_and_vec(vece, pd, pd, pg); 1566 } 1567 1568 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a) 1569 { 1570 static const GVecGen4 op = { 1571 .fni8 = gen_orn_pg_i64, 1572 .fniv = gen_orn_pg_vec, 1573 .fno = gen_helper_sve_orn_pppp, 1574 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1575 }; 1576 1577 if (!dc_isar_feature(aa64_sve, s)) { 1578 return false; 1579 } 1580 return do_pppp_flags(s, a, &op); 1581 } 1582 1583 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1584 { 1585 tcg_gen_or_i64(pd, pn, pm); 1586 tcg_gen_andc_i64(pd, pg, pd); 1587 } 1588 1589 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1590 TCGv_vec pm, TCGv_vec pg) 1591 { 1592 tcg_gen_or_vec(vece, pd, pn, pm); 1593 tcg_gen_andc_vec(vece, pd, pg, pd); 1594 } 1595 1596 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a) 1597 { 1598 static const GVecGen4 op = { 1599 .fni8 = gen_nor_pg_i64, 1600 .fniv = gen_nor_pg_vec, 1601 .fno = gen_helper_sve_nor_pppp, 1602 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1603 }; 1604 1605 if (!dc_isar_feature(aa64_sve, s)) { 1606 return false; 1607 } 1608 return do_pppp_flags(s, a, &op); 1609 } 1610 1611 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1612 { 1613 tcg_gen_and_i64(pd, pn, pm); 1614 tcg_gen_andc_i64(pd, pg, pd); 1615 } 1616 1617 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1618 TCGv_vec pm, TCGv_vec pg) 1619 { 1620 tcg_gen_and_vec(vece, pd, pn, pm); 1621 tcg_gen_andc_vec(vece, pd, pg, pd); 1622 } 1623 1624 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a) 1625 { 1626 static const GVecGen4 op = { 1627 .fni8 = gen_nand_pg_i64, 1628 .fniv = gen_nand_pg_vec, 1629 .fno = gen_helper_sve_nand_pppp, 1630 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1631 }; 1632 1633 if (!dc_isar_feature(aa64_sve, s)) { 1634 return false; 1635 } 1636 return do_pppp_flags(s, a, &op); 1637 } 1638 1639 /* 1640 *** SVE Predicate Misc Group 1641 */ 1642 1643 static bool trans_PTEST(DisasContext *s, arg_PTEST *a) 1644 { 1645 if (!dc_isar_feature(aa64_sve, s)) { 1646 return false; 1647 } 1648 if (sve_access_check(s)) { 1649 int nofs = pred_full_reg_offset(s, a->rn); 1650 int gofs = pred_full_reg_offset(s, a->pg); 1651 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8); 1652 1653 if (words == 1) { 1654 TCGv_i64 pn = tcg_temp_new_i64(); 1655 TCGv_i64 pg = tcg_temp_new_i64(); 1656 1657 tcg_gen_ld_i64(pn, tcg_env, nofs); 1658 tcg_gen_ld_i64(pg, tcg_env, gofs); 1659 do_predtest1(pn, pg); 1660 } else { 1661 do_predtest(s, nofs, gofs, words); 1662 } 1663 } 1664 return true; 1665 } 1666 1667 /* See the ARM pseudocode DecodePredCount. */ 1668 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz) 1669 { 1670 unsigned elements = fullsz >> esz; 1671 unsigned bound; 1672 1673 switch (pattern) { 1674 case 0x0: /* POW2 */ 1675 return pow2floor(elements); 1676 case 0x1: /* VL1 */ 1677 case 0x2: /* VL2 */ 1678 case 0x3: /* VL3 */ 1679 case 0x4: /* VL4 */ 1680 case 0x5: /* VL5 */ 1681 case 0x6: /* VL6 */ 1682 case 0x7: /* VL7 */ 1683 case 0x8: /* VL8 */ 1684 bound = pattern; 1685 break; 1686 case 0x9: /* VL16 */ 1687 case 0xa: /* VL32 */ 1688 case 0xb: /* VL64 */ 1689 case 0xc: /* VL128 */ 1690 case 0xd: /* VL256 */ 1691 bound = 16 << (pattern - 9); 1692 break; 1693 case 0x1d: /* MUL4 */ 1694 return elements - elements % 4; 1695 case 0x1e: /* MUL3 */ 1696 return elements - elements % 3; 1697 case 0x1f: /* ALL */ 1698 return elements; 1699 default: /* #uimm5 */ 1700 return 0; 1701 } 1702 return elements >= bound ? bound : 0; 1703 } 1704 1705 /* This handles all of the predicate initialization instructions, 1706 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32 1707 * so that decode_pred_count returns 0. For SETFFR, we will have 1708 * set RD == 16 == FFR. 1709 */ 1710 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag) 1711 { 1712 if (!sve_access_check(s)) { 1713 return true; 1714 } 1715 1716 unsigned fullsz = vec_full_reg_size(s); 1717 unsigned ofs = pred_full_reg_offset(s, rd); 1718 unsigned numelem, setsz, i; 1719 uint64_t word, lastword; 1720 TCGv_i64 t; 1721 1722 numelem = decode_pred_count(fullsz, pat, esz); 1723 1724 /* Determine what we must store into each bit, and how many. */ 1725 if (numelem == 0) { 1726 lastword = word = 0; 1727 setsz = fullsz; 1728 } else { 1729 setsz = numelem << esz; 1730 lastword = word = pred_esz_masks[esz]; 1731 if (setsz % 64) { 1732 lastword &= MAKE_64BIT_MASK(0, setsz % 64); 1733 } 1734 } 1735 1736 t = tcg_temp_new_i64(); 1737 if (fullsz <= 64) { 1738 tcg_gen_movi_i64(t, lastword); 1739 tcg_gen_st_i64(t, tcg_env, ofs); 1740 goto done; 1741 } 1742 1743 if (word == lastword) { 1744 unsigned maxsz = size_for_gvec(fullsz / 8); 1745 unsigned oprsz = size_for_gvec(setsz / 8); 1746 1747 if (oprsz * 8 == setsz) { 1748 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word); 1749 goto done; 1750 } 1751 } 1752 1753 setsz /= 8; 1754 fullsz /= 8; 1755 1756 tcg_gen_movi_i64(t, word); 1757 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) { 1758 tcg_gen_st_i64(t, tcg_env, ofs + i); 1759 } 1760 if (lastword != word) { 1761 tcg_gen_movi_i64(t, lastword); 1762 tcg_gen_st_i64(t, tcg_env, ofs + i); 1763 i += 8; 1764 } 1765 if (i < fullsz) { 1766 tcg_gen_movi_i64(t, 0); 1767 for (; i < fullsz; i += 8) { 1768 tcg_gen_st_i64(t, tcg_env, ofs + i); 1769 } 1770 } 1771 1772 done: 1773 /* PTRUES */ 1774 if (setflag) { 1775 tcg_gen_movi_i32(cpu_NF, -(word != 0)); 1776 tcg_gen_movi_i32(cpu_CF, word == 0); 1777 tcg_gen_movi_i32(cpu_VF, 0); 1778 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1779 } 1780 return true; 1781 } 1782 1783 TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s) 1784 1785 /* Note pat == 31 is #all, to set all elements. */ 1786 TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve, 1787 do_predset, 0, FFR_PRED_NUM, 31, false) 1788 1789 /* Note pat == 32 is #unimp, to set no elements. */ 1790 TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false) 1791 1792 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a) 1793 { 1794 /* The path through do_pppp_flags is complicated enough to want to avoid 1795 * duplication. Frob the arguments into the form of a predicated AND. 1796 */ 1797 arg_rprr_s alt_a = { 1798 .rd = a->rd, .pg = a->pg, .s = a->s, 1799 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM, 1800 }; 1801 1802 s->is_nonstreaming = true; 1803 return trans_AND_pppp(s, &alt_a); 1804 } 1805 1806 TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM) 1807 TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn) 1808 1809 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a, 1810 void (*gen_fn)(TCGv_i32, TCGv_ptr, 1811 TCGv_ptr, TCGv_i32)) 1812 { 1813 if (!sve_access_check(s)) { 1814 return true; 1815 } 1816 1817 TCGv_ptr t_pd = tcg_temp_new_ptr(); 1818 TCGv_ptr t_pg = tcg_temp_new_ptr(); 1819 TCGv_i32 t; 1820 unsigned desc = 0; 1821 1822 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 1823 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 1824 1825 tcg_gen_addi_ptr(t_pd, tcg_env, pred_full_reg_offset(s, a->rd)); 1826 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->rn)); 1827 t = tcg_temp_new_i32(); 1828 1829 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc)); 1830 1831 do_pred_flags(t); 1832 return true; 1833 } 1834 1835 TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst) 1836 TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext) 1837 1838 /* 1839 *** SVE Element Count Group 1840 */ 1841 1842 /* Perform an inline saturating addition of a 32-bit value within 1843 * a 64-bit register. The second operand is known to be positive, 1844 * which halves the comparisons we must perform to bound the result. 1845 */ 1846 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1847 { 1848 int64_t ibound; 1849 1850 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 1851 if (u) { 1852 tcg_gen_ext32u_i64(reg, reg); 1853 } else { 1854 tcg_gen_ext32s_i64(reg, reg); 1855 } 1856 if (d) { 1857 tcg_gen_sub_i64(reg, reg, val); 1858 ibound = (u ? 0 : INT32_MIN); 1859 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound)); 1860 } else { 1861 tcg_gen_add_i64(reg, reg, val); 1862 ibound = (u ? UINT32_MAX : INT32_MAX); 1863 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound)); 1864 } 1865 } 1866 1867 /* Similarly with 64-bit values. */ 1868 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1869 { 1870 TCGv_i64 t0 = tcg_temp_new_i64(); 1871 TCGv_i64 t2; 1872 1873 if (u) { 1874 if (d) { 1875 tcg_gen_sub_i64(t0, reg, val); 1876 t2 = tcg_constant_i64(0); 1877 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0); 1878 } else { 1879 tcg_gen_add_i64(t0, reg, val); 1880 t2 = tcg_constant_i64(-1); 1881 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0); 1882 } 1883 } else { 1884 TCGv_i64 t1 = tcg_temp_new_i64(); 1885 if (d) { 1886 /* Detect signed overflow for subtraction. */ 1887 tcg_gen_xor_i64(t0, reg, val); 1888 tcg_gen_sub_i64(t1, reg, val); 1889 tcg_gen_xor_i64(reg, reg, t1); 1890 tcg_gen_and_i64(t0, t0, reg); 1891 1892 /* Bound the result. */ 1893 tcg_gen_movi_i64(reg, INT64_MIN); 1894 t2 = tcg_constant_i64(0); 1895 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1); 1896 } else { 1897 /* Detect signed overflow for addition. */ 1898 tcg_gen_xor_i64(t0, reg, val); 1899 tcg_gen_add_i64(reg, reg, val); 1900 tcg_gen_xor_i64(t1, reg, val); 1901 tcg_gen_andc_i64(t0, t1, t0); 1902 1903 /* Bound the result. */ 1904 tcg_gen_movi_i64(t1, INT64_MAX); 1905 t2 = tcg_constant_i64(0); 1906 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg); 1907 } 1908 } 1909 } 1910 1911 /* Similarly with a vector and a scalar operand. */ 1912 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn, 1913 TCGv_i64 val, bool u, bool d) 1914 { 1915 unsigned vsz = vec_full_reg_size(s); 1916 TCGv_ptr dptr, nptr; 1917 TCGv_i32 t32, desc; 1918 TCGv_i64 t64; 1919 1920 dptr = tcg_temp_new_ptr(); 1921 nptr = tcg_temp_new_ptr(); 1922 tcg_gen_addi_ptr(dptr, tcg_env, vec_full_reg_offset(s, rd)); 1923 tcg_gen_addi_ptr(nptr, tcg_env, vec_full_reg_offset(s, rn)); 1924 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1925 1926 switch (esz) { 1927 case MO_8: 1928 t32 = tcg_temp_new_i32(); 1929 tcg_gen_extrl_i64_i32(t32, val); 1930 if (d) { 1931 tcg_gen_neg_i32(t32, t32); 1932 } 1933 if (u) { 1934 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc); 1935 } else { 1936 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc); 1937 } 1938 break; 1939 1940 case MO_16: 1941 t32 = tcg_temp_new_i32(); 1942 tcg_gen_extrl_i64_i32(t32, val); 1943 if (d) { 1944 tcg_gen_neg_i32(t32, t32); 1945 } 1946 if (u) { 1947 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc); 1948 } else { 1949 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc); 1950 } 1951 break; 1952 1953 case MO_32: 1954 t64 = tcg_temp_new_i64(); 1955 if (d) { 1956 tcg_gen_neg_i64(t64, val); 1957 } else { 1958 tcg_gen_mov_i64(t64, val); 1959 } 1960 if (u) { 1961 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc); 1962 } else { 1963 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc); 1964 } 1965 break; 1966 1967 case MO_64: 1968 if (u) { 1969 if (d) { 1970 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc); 1971 } else { 1972 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc); 1973 } 1974 } else if (d) { 1975 t64 = tcg_temp_new_i64(); 1976 tcg_gen_neg_i64(t64, val); 1977 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc); 1978 } else { 1979 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc); 1980 } 1981 break; 1982 1983 default: 1984 g_assert_not_reached(); 1985 } 1986 } 1987 1988 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a) 1989 { 1990 if (!dc_isar_feature(aa64_sve, s)) { 1991 return false; 1992 } 1993 if (sve_access_check(s)) { 1994 unsigned fullsz = vec_full_reg_size(s); 1995 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1996 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm); 1997 } 1998 return true; 1999 } 2000 2001 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a) 2002 { 2003 if (!dc_isar_feature(aa64_sve, s)) { 2004 return false; 2005 } 2006 if (sve_access_check(s)) { 2007 unsigned fullsz = vec_full_reg_size(s); 2008 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2009 int inc = numelem * a->imm * (a->d ? -1 : 1); 2010 TCGv_i64 reg = cpu_reg(s, a->rd); 2011 2012 tcg_gen_addi_i64(reg, reg, inc); 2013 } 2014 return true; 2015 } 2016 2017 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a) 2018 { 2019 if (!dc_isar_feature(aa64_sve, s)) { 2020 return false; 2021 } 2022 if (!sve_access_check(s)) { 2023 return true; 2024 } 2025 2026 unsigned fullsz = vec_full_reg_size(s); 2027 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2028 int inc = numelem * a->imm; 2029 TCGv_i64 reg = cpu_reg(s, a->rd); 2030 2031 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 2032 if (inc == 0) { 2033 if (a->u) { 2034 tcg_gen_ext32u_i64(reg, reg); 2035 } else { 2036 tcg_gen_ext32s_i64(reg, reg); 2037 } 2038 } else { 2039 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d); 2040 } 2041 return true; 2042 } 2043 2044 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a) 2045 { 2046 if (!dc_isar_feature(aa64_sve, s)) { 2047 return false; 2048 } 2049 if (!sve_access_check(s)) { 2050 return true; 2051 } 2052 2053 unsigned fullsz = vec_full_reg_size(s); 2054 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2055 int inc = numelem * a->imm; 2056 TCGv_i64 reg = cpu_reg(s, a->rd); 2057 2058 if (inc != 0) { 2059 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d); 2060 } 2061 return true; 2062 } 2063 2064 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 2065 { 2066 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2067 return false; 2068 } 2069 2070 unsigned fullsz = vec_full_reg_size(s); 2071 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2072 int inc = numelem * a->imm; 2073 2074 if (inc != 0) { 2075 if (sve_access_check(s)) { 2076 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd), 2077 vec_full_reg_offset(s, a->rn), 2078 tcg_constant_i64(a->d ? -inc : inc), 2079 fullsz, fullsz); 2080 } 2081 } else { 2082 do_mov_z(s, a->rd, a->rn); 2083 } 2084 return true; 2085 } 2086 2087 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 2088 { 2089 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2090 return false; 2091 } 2092 2093 unsigned fullsz = vec_full_reg_size(s); 2094 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2095 int inc = numelem * a->imm; 2096 2097 if (inc != 0) { 2098 if (sve_access_check(s)) { 2099 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 2100 tcg_constant_i64(inc), a->u, a->d); 2101 } 2102 } else { 2103 do_mov_z(s, a->rd, a->rn); 2104 } 2105 return true; 2106 } 2107 2108 /* 2109 *** SVE Bitwise Immediate Group 2110 */ 2111 2112 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn) 2113 { 2114 uint64_t imm; 2115 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2116 extract32(a->dbm, 0, 6), 2117 extract32(a->dbm, 6, 6))) { 2118 return false; 2119 } 2120 return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm); 2121 } 2122 2123 TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi) 2124 TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori) 2125 TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori) 2126 2127 static bool trans_DUPM(DisasContext *s, arg_DUPM *a) 2128 { 2129 uint64_t imm; 2130 2131 if (!dc_isar_feature(aa64_sve, s)) { 2132 return false; 2133 } 2134 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2135 extract32(a->dbm, 0, 6), 2136 extract32(a->dbm, 6, 6))) { 2137 return false; 2138 } 2139 if (sve_access_check(s)) { 2140 do_dupi_z(s, a->rd, imm); 2141 } 2142 return true; 2143 } 2144 2145 /* 2146 *** SVE Integer Wide Immediate - Predicated Group 2147 */ 2148 2149 /* Implement all merging copies. This is used for CPY (immediate), 2150 * FCPY, CPY (scalar), CPY (SIMD&FP scalar). 2151 */ 2152 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg, 2153 TCGv_i64 val) 2154 { 2155 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2156 static gen_cpy * const fns[4] = { 2157 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h, 2158 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d, 2159 }; 2160 unsigned vsz = vec_full_reg_size(s); 2161 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2162 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2163 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2164 TCGv_ptr t_pg = tcg_temp_new_ptr(); 2165 2166 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd)); 2167 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, rn)); 2168 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 2169 2170 fns[esz](t_zd, t_zn, t_pg, val, desc); 2171 } 2172 2173 static bool trans_FCPY(DisasContext *s, arg_FCPY *a) 2174 { 2175 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2176 return false; 2177 } 2178 if (sve_access_check(s)) { 2179 /* Decode the VFP immediate. */ 2180 uint64_t imm = vfp_expand_imm(a->esz, a->imm); 2181 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm)); 2182 } 2183 return true; 2184 } 2185 2186 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a) 2187 { 2188 if (!dc_isar_feature(aa64_sve, s)) { 2189 return false; 2190 } 2191 if (sve_access_check(s)) { 2192 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm)); 2193 } 2194 return true; 2195 } 2196 2197 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a) 2198 { 2199 static gen_helper_gvec_2i * const fns[4] = { 2200 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h, 2201 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d, 2202 }; 2203 2204 if (!dc_isar_feature(aa64_sve, s)) { 2205 return false; 2206 } 2207 if (sve_access_check(s)) { 2208 unsigned vsz = vec_full_reg_size(s); 2209 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 2210 pred_full_reg_offset(s, a->pg), 2211 tcg_constant_i64(a->imm), 2212 vsz, vsz, 0, fns[a->esz]); 2213 } 2214 return true; 2215 } 2216 2217 /* 2218 *** SVE Permute Extract Group 2219 */ 2220 2221 static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm) 2222 { 2223 if (!sve_access_check(s)) { 2224 return true; 2225 } 2226 2227 unsigned vsz = vec_full_reg_size(s); 2228 unsigned n_ofs = imm >= vsz ? 0 : imm; 2229 unsigned n_siz = vsz - n_ofs; 2230 unsigned d = vec_full_reg_offset(s, rd); 2231 unsigned n = vec_full_reg_offset(s, rn); 2232 unsigned m = vec_full_reg_offset(s, rm); 2233 2234 /* Use host vector move insns if we have appropriate sizes 2235 * and no unfortunate overlap. 2236 */ 2237 if (m != d 2238 && n_ofs == size_for_gvec(n_ofs) 2239 && n_siz == size_for_gvec(n_siz) 2240 && (d != n || n_siz <= n_ofs)) { 2241 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz); 2242 if (n_ofs != 0) { 2243 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs); 2244 } 2245 } else { 2246 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext); 2247 } 2248 return true; 2249 } 2250 2251 TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm) 2252 TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm) 2253 2254 /* 2255 *** SVE Permute - Unpredicated Group 2256 */ 2257 2258 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a) 2259 { 2260 if (!dc_isar_feature(aa64_sve, s)) { 2261 return false; 2262 } 2263 if (sve_access_check(s)) { 2264 unsigned vsz = vec_full_reg_size(s); 2265 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd), 2266 vsz, vsz, cpu_reg_sp(s, a->rn)); 2267 } 2268 return true; 2269 } 2270 2271 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a) 2272 { 2273 if (!dc_isar_feature(aa64_sve, s)) { 2274 return false; 2275 } 2276 if ((a->imm & 0x1f) == 0) { 2277 return false; 2278 } 2279 if (sve_access_check(s)) { 2280 unsigned vsz = vec_full_reg_size(s); 2281 unsigned dofs = vec_full_reg_offset(s, a->rd); 2282 unsigned esz, index; 2283 2284 esz = ctz32(a->imm); 2285 index = a->imm >> (esz + 1); 2286 2287 if ((index << esz) < vsz) { 2288 unsigned nofs = vec_reg_offset(s, a->rn, index, esz); 2289 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz); 2290 } else { 2291 /* 2292 * While dup_mem handles 128-bit elements, dup_imm does not. 2293 * Thankfully element size doesn't matter for splatting zero. 2294 */ 2295 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); 2296 } 2297 } 2298 return true; 2299 } 2300 2301 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val) 2302 { 2303 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2304 static gen_insr * const fns[4] = { 2305 gen_helper_sve_insr_b, gen_helper_sve_insr_h, 2306 gen_helper_sve_insr_s, gen_helper_sve_insr_d, 2307 }; 2308 unsigned vsz = vec_full_reg_size(s); 2309 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2310 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2311 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2312 2313 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, a->rd)); 2314 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); 2315 2316 fns[a->esz](t_zd, t_zn, val, desc); 2317 } 2318 2319 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a) 2320 { 2321 if (!dc_isar_feature(aa64_sve, s)) { 2322 return false; 2323 } 2324 if (sve_access_check(s)) { 2325 TCGv_i64 t = tcg_temp_new_i64(); 2326 tcg_gen_ld_i64(t, tcg_env, vec_reg_offset(s, a->rm, 0, MO_64)); 2327 do_insr_i64(s, a, t); 2328 } 2329 return true; 2330 } 2331 2332 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a) 2333 { 2334 if (!dc_isar_feature(aa64_sve, s)) { 2335 return false; 2336 } 2337 if (sve_access_check(s)) { 2338 do_insr_i64(s, a, cpu_reg(s, a->rm)); 2339 } 2340 return true; 2341 } 2342 2343 static gen_helper_gvec_2 * const rev_fns[4] = { 2344 gen_helper_sve_rev_b, gen_helper_sve_rev_h, 2345 gen_helper_sve_rev_s, gen_helper_sve_rev_d 2346 }; 2347 TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0) 2348 2349 static gen_helper_gvec_3 * const sve_tbl_fns[4] = { 2350 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h, 2351 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d 2352 }; 2353 TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0) 2354 2355 static gen_helper_gvec_4 * const sve2_tbl_fns[4] = { 2356 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h, 2357 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d 2358 }; 2359 TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz], 2360 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0) 2361 2362 static gen_helper_gvec_3 * const tbx_fns[4] = { 2363 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h, 2364 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d 2365 }; 2366 TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0) 2367 2368 static bool trans_UNPK(DisasContext *s, arg_UNPK *a) 2369 { 2370 static gen_helper_gvec_2 * const fns[4][2] = { 2371 { NULL, NULL }, 2372 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h }, 2373 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s }, 2374 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d }, 2375 }; 2376 2377 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2378 return false; 2379 } 2380 if (sve_access_check(s)) { 2381 unsigned vsz = vec_full_reg_size(s); 2382 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd), 2383 vec_full_reg_offset(s, a->rn) 2384 + (a->h ? vsz / 2 : 0), 2385 vsz, vsz, 0, fns[a->esz][a->u]); 2386 } 2387 return true; 2388 } 2389 2390 /* 2391 *** SVE Permute - Predicates Group 2392 */ 2393 2394 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd, 2395 gen_helper_gvec_3 *fn) 2396 { 2397 if (!sve_access_check(s)) { 2398 return true; 2399 } 2400 2401 unsigned vsz = pred_full_reg_size(s); 2402 2403 TCGv_ptr t_d = tcg_temp_new_ptr(); 2404 TCGv_ptr t_n = tcg_temp_new_ptr(); 2405 TCGv_ptr t_m = tcg_temp_new_ptr(); 2406 uint32_t desc = 0; 2407 2408 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2409 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2410 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2411 2412 tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd)); 2413 tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn)); 2414 tcg_gen_addi_ptr(t_m, tcg_env, pred_full_reg_offset(s, a->rm)); 2415 2416 fn(t_d, t_n, t_m, tcg_constant_i32(desc)); 2417 return true; 2418 } 2419 2420 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd, 2421 gen_helper_gvec_2 *fn) 2422 { 2423 if (!sve_access_check(s)) { 2424 return true; 2425 } 2426 2427 unsigned vsz = pred_full_reg_size(s); 2428 TCGv_ptr t_d = tcg_temp_new_ptr(); 2429 TCGv_ptr t_n = tcg_temp_new_ptr(); 2430 uint32_t desc = 0; 2431 2432 tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd)); 2433 tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn)); 2434 2435 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2436 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2437 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2438 2439 fn(t_d, t_n, tcg_constant_i32(desc)); 2440 return true; 2441 } 2442 2443 TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p) 2444 TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p) 2445 TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p) 2446 TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p) 2447 TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p) 2448 TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p) 2449 2450 TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p) 2451 TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p) 2452 TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p) 2453 2454 /* 2455 *** SVE Permute - Interleaving Group 2456 */ 2457 2458 static gen_helper_gvec_3 * const zip_fns[4] = { 2459 gen_helper_sve_zip_b, gen_helper_sve_zip_h, 2460 gen_helper_sve_zip_s, gen_helper_sve_zip_d, 2461 }; 2462 TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2463 zip_fns[a->esz], a, 0) 2464 TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2465 zip_fns[a->esz], a, vec_full_reg_size(s) / 2) 2466 2467 TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2468 gen_helper_sve2_zip_q, a, 0) 2469 TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2470 gen_helper_sve2_zip_q, a, 2471 QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2) 2472 2473 static gen_helper_gvec_3 * const uzp_fns[4] = { 2474 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h, 2475 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d, 2476 }; 2477 2478 TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2479 uzp_fns[a->esz], a, 0) 2480 TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2481 uzp_fns[a->esz], a, 1 << a->esz) 2482 2483 TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2484 gen_helper_sve2_uzp_q, a, 0) 2485 TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2486 gen_helper_sve2_uzp_q, a, 16) 2487 2488 static gen_helper_gvec_3 * const trn_fns[4] = { 2489 gen_helper_sve_trn_b, gen_helper_sve_trn_h, 2490 gen_helper_sve_trn_s, gen_helper_sve_trn_d, 2491 }; 2492 2493 TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2494 trn_fns[a->esz], a, 0) 2495 TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2496 trn_fns[a->esz], a, 1 << a->esz) 2497 2498 TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2499 gen_helper_sve2_trn_q, a, 0) 2500 TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2501 gen_helper_sve2_trn_q, a, 16) 2502 2503 /* 2504 *** SVE Permute Vector - Predicated Group 2505 */ 2506 2507 static gen_helper_gvec_3 * const compact_fns[4] = { 2508 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d 2509 }; 2510 TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, 2511 compact_fns[a->esz], a, 0) 2512 2513 /* Call the helper that computes the ARM LastActiveElement pseudocode 2514 * function, scaled by the element size. This includes the not found 2515 * indication; e.g. not found for esz=3 is -8. 2516 */ 2517 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg) 2518 { 2519 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot 2520 * round up, as we do elsewhere, because we need the exact size. 2521 */ 2522 TCGv_ptr t_p = tcg_temp_new_ptr(); 2523 unsigned desc = 0; 2524 2525 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 2526 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 2527 2528 tcg_gen_addi_ptr(t_p, tcg_env, pred_full_reg_offset(s, pg)); 2529 2530 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc)); 2531 } 2532 2533 /* Increment LAST to the offset of the next element in the vector, 2534 * wrapping around to 0. 2535 */ 2536 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz) 2537 { 2538 unsigned vsz = vec_full_reg_size(s); 2539 2540 tcg_gen_addi_i32(last, last, 1 << esz); 2541 if (is_power_of_2(vsz)) { 2542 tcg_gen_andi_i32(last, last, vsz - 1); 2543 } else { 2544 TCGv_i32 max = tcg_constant_i32(vsz); 2545 TCGv_i32 zero = tcg_constant_i32(0); 2546 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last); 2547 } 2548 } 2549 2550 /* If LAST < 0, set LAST to the offset of the last element in the vector. */ 2551 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz) 2552 { 2553 unsigned vsz = vec_full_reg_size(s); 2554 2555 if (is_power_of_2(vsz)) { 2556 tcg_gen_andi_i32(last, last, vsz - 1); 2557 } else { 2558 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz)); 2559 TCGv_i32 zero = tcg_constant_i32(0); 2560 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last); 2561 } 2562 } 2563 2564 /* Load an unsigned element of ESZ from BASE+OFS. */ 2565 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz) 2566 { 2567 TCGv_i64 r = tcg_temp_new_i64(); 2568 2569 switch (esz) { 2570 case 0: 2571 tcg_gen_ld8u_i64(r, base, ofs); 2572 break; 2573 case 1: 2574 tcg_gen_ld16u_i64(r, base, ofs); 2575 break; 2576 case 2: 2577 tcg_gen_ld32u_i64(r, base, ofs); 2578 break; 2579 case 3: 2580 tcg_gen_ld_i64(r, base, ofs); 2581 break; 2582 default: 2583 g_assert_not_reached(); 2584 } 2585 return r; 2586 } 2587 2588 /* Load an unsigned element of ESZ from RM[LAST]. */ 2589 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last, 2590 int rm, int esz) 2591 { 2592 TCGv_ptr p = tcg_temp_new_ptr(); 2593 2594 /* Convert offset into vector into offset into ENV. 2595 * The final adjustment for the vector register base 2596 * is added via constant offset to the load. 2597 */ 2598 #if HOST_BIG_ENDIAN 2599 /* Adjust for element ordering. See vec_reg_offset. */ 2600 if (esz < 3) { 2601 tcg_gen_xori_i32(last, last, 8 - (1 << esz)); 2602 } 2603 #endif 2604 tcg_gen_ext_i32_ptr(p, last); 2605 tcg_gen_add_ptr(p, p, tcg_env); 2606 2607 return load_esz(p, vec_full_reg_offset(s, rm), esz); 2608 } 2609 2610 /* Compute CLAST for a Zreg. */ 2611 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before) 2612 { 2613 TCGv_i32 last; 2614 TCGLabel *over; 2615 TCGv_i64 ele; 2616 unsigned vsz, esz = a->esz; 2617 2618 if (!sve_access_check(s)) { 2619 return true; 2620 } 2621 2622 last = tcg_temp_new_i32(); 2623 over = gen_new_label(); 2624 2625 find_last_active(s, last, esz, a->pg); 2626 2627 /* There is of course no movcond for a 2048-bit vector, 2628 * so we must branch over the actual store. 2629 */ 2630 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over); 2631 2632 if (!before) { 2633 incr_last_active(s, last, esz); 2634 } 2635 2636 ele = load_last_active(s, last, a->rm, esz); 2637 2638 vsz = vec_full_reg_size(s); 2639 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele); 2640 2641 /* If this insn used MOVPRFX, we may need a second move. */ 2642 if (a->rd != a->rn) { 2643 TCGLabel *done = gen_new_label(); 2644 tcg_gen_br(done); 2645 2646 gen_set_label(over); 2647 do_mov_z(s, a->rd, a->rn); 2648 2649 gen_set_label(done); 2650 } else { 2651 gen_set_label(over); 2652 } 2653 return true; 2654 } 2655 2656 TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false) 2657 TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true) 2658 2659 /* Compute CLAST for a scalar. */ 2660 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm, 2661 bool before, TCGv_i64 reg_val) 2662 { 2663 TCGv_i32 last = tcg_temp_new_i32(); 2664 TCGv_i64 ele, cmp; 2665 2666 find_last_active(s, last, esz, pg); 2667 2668 /* Extend the original value of last prior to incrementing. */ 2669 cmp = tcg_temp_new_i64(); 2670 tcg_gen_ext_i32_i64(cmp, last); 2671 2672 if (!before) { 2673 incr_last_active(s, last, esz); 2674 } 2675 2676 /* The conceit here is that while last < 0 indicates not found, after 2677 * adjusting for tcg_env->vfp.zregs[rm], it is still a valid address 2678 * from which we can load garbage. We then discard the garbage with 2679 * a conditional move. 2680 */ 2681 ele = load_last_active(s, last, rm, esz); 2682 2683 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0), 2684 ele, reg_val); 2685 } 2686 2687 /* Compute CLAST for a Vreg. */ 2688 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2689 { 2690 if (sve_access_check(s)) { 2691 int esz = a->esz; 2692 int ofs = vec_reg_offset(s, a->rd, 0, esz); 2693 TCGv_i64 reg = load_esz(tcg_env, ofs, esz); 2694 2695 do_clast_scalar(s, esz, a->pg, a->rn, before, reg); 2696 write_fp_dreg(s, a->rd, reg); 2697 } 2698 return true; 2699 } 2700 2701 TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false) 2702 TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true) 2703 2704 /* Compute CLAST for a Xreg. */ 2705 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before) 2706 { 2707 TCGv_i64 reg; 2708 2709 if (!sve_access_check(s)) { 2710 return true; 2711 } 2712 2713 reg = cpu_reg(s, a->rd); 2714 switch (a->esz) { 2715 case 0: 2716 tcg_gen_ext8u_i64(reg, reg); 2717 break; 2718 case 1: 2719 tcg_gen_ext16u_i64(reg, reg); 2720 break; 2721 case 2: 2722 tcg_gen_ext32u_i64(reg, reg); 2723 break; 2724 case 3: 2725 break; 2726 default: 2727 g_assert_not_reached(); 2728 } 2729 2730 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg); 2731 return true; 2732 } 2733 2734 TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false) 2735 TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true) 2736 2737 /* Compute LAST for a scalar. */ 2738 static TCGv_i64 do_last_scalar(DisasContext *s, int esz, 2739 int pg, int rm, bool before) 2740 { 2741 TCGv_i32 last = tcg_temp_new_i32(); 2742 2743 find_last_active(s, last, esz, pg); 2744 if (before) { 2745 wrap_last_active(s, last, esz); 2746 } else { 2747 incr_last_active(s, last, esz); 2748 } 2749 2750 return load_last_active(s, last, rm, esz); 2751 } 2752 2753 /* Compute LAST for a Vreg. */ 2754 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2755 { 2756 if (sve_access_check(s)) { 2757 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2758 write_fp_dreg(s, a->rd, val); 2759 } 2760 return true; 2761 } 2762 2763 TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false) 2764 TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true) 2765 2766 /* Compute LAST for a Xreg. */ 2767 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before) 2768 { 2769 if (sve_access_check(s)) { 2770 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2771 tcg_gen_mov_i64(cpu_reg(s, a->rd), val); 2772 } 2773 return true; 2774 } 2775 2776 TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false) 2777 TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true) 2778 2779 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a) 2780 { 2781 if (!dc_isar_feature(aa64_sve, s)) { 2782 return false; 2783 } 2784 if (sve_access_check(s)) { 2785 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn)); 2786 } 2787 return true; 2788 } 2789 2790 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a) 2791 { 2792 if (!dc_isar_feature(aa64_sve, s)) { 2793 return false; 2794 } 2795 if (sve_access_check(s)) { 2796 int ofs = vec_reg_offset(s, a->rn, 0, a->esz); 2797 TCGv_i64 t = load_esz(tcg_env, ofs, a->esz); 2798 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t); 2799 } 2800 return true; 2801 } 2802 2803 static gen_helper_gvec_3 * const revb_fns[4] = { 2804 NULL, gen_helper_sve_revb_h, 2805 gen_helper_sve_revb_s, gen_helper_sve_revb_d, 2806 }; 2807 TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0) 2808 2809 static gen_helper_gvec_3 * const revh_fns[4] = { 2810 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d, 2811 }; 2812 TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0) 2813 2814 TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz, 2815 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0) 2816 2817 TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0) 2818 2819 TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz, 2820 gen_helper_sve_splice, a, a->esz) 2821 2822 TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice, 2823 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz) 2824 2825 /* 2826 *** SVE Integer Compare - Vectors Group 2827 */ 2828 2829 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a, 2830 gen_helper_gvec_flags_4 *gen_fn) 2831 { 2832 TCGv_ptr pd, zn, zm, pg; 2833 unsigned vsz; 2834 TCGv_i32 t; 2835 2836 if (gen_fn == NULL) { 2837 return false; 2838 } 2839 if (!sve_access_check(s)) { 2840 return true; 2841 } 2842 2843 vsz = vec_full_reg_size(s); 2844 t = tcg_temp_new_i32(); 2845 pd = tcg_temp_new_ptr(); 2846 zn = tcg_temp_new_ptr(); 2847 zm = tcg_temp_new_ptr(); 2848 pg = tcg_temp_new_ptr(); 2849 2850 tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd)); 2851 tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn)); 2852 tcg_gen_addi_ptr(zm, tcg_env, vec_full_reg_offset(s, a->rm)); 2853 tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg)); 2854 2855 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0))); 2856 2857 do_pred_flags(t); 2858 return true; 2859 } 2860 2861 #define DO_PPZZ(NAME, name) \ 2862 static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \ 2863 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \ 2864 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \ 2865 }; \ 2866 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \ 2867 a, name##_ppzz_fns[a->esz]) 2868 2869 DO_PPZZ(CMPEQ, cmpeq) 2870 DO_PPZZ(CMPNE, cmpne) 2871 DO_PPZZ(CMPGT, cmpgt) 2872 DO_PPZZ(CMPGE, cmpge) 2873 DO_PPZZ(CMPHI, cmphi) 2874 DO_PPZZ(CMPHS, cmphs) 2875 2876 #undef DO_PPZZ 2877 2878 #define DO_PPZW(NAME, name) \ 2879 static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \ 2880 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \ 2881 gen_helper_sve_##name##_ppzw_s, NULL \ 2882 }; \ 2883 TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \ 2884 a, name##_ppzw_fns[a->esz]) 2885 2886 DO_PPZW(CMPEQ, cmpeq) 2887 DO_PPZW(CMPNE, cmpne) 2888 DO_PPZW(CMPGT, cmpgt) 2889 DO_PPZW(CMPGE, cmpge) 2890 DO_PPZW(CMPHI, cmphi) 2891 DO_PPZW(CMPHS, cmphs) 2892 DO_PPZW(CMPLT, cmplt) 2893 DO_PPZW(CMPLE, cmple) 2894 DO_PPZW(CMPLO, cmplo) 2895 DO_PPZW(CMPLS, cmpls) 2896 2897 #undef DO_PPZW 2898 2899 /* 2900 *** SVE Integer Compare - Immediate Groups 2901 */ 2902 2903 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a, 2904 gen_helper_gvec_flags_3 *gen_fn) 2905 { 2906 TCGv_ptr pd, zn, pg; 2907 unsigned vsz; 2908 TCGv_i32 t; 2909 2910 if (gen_fn == NULL) { 2911 return false; 2912 } 2913 if (!sve_access_check(s)) { 2914 return true; 2915 } 2916 2917 vsz = vec_full_reg_size(s); 2918 t = tcg_temp_new_i32(); 2919 pd = tcg_temp_new_ptr(); 2920 zn = tcg_temp_new_ptr(); 2921 pg = tcg_temp_new_ptr(); 2922 2923 tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd)); 2924 tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn)); 2925 tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg)); 2926 2927 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm))); 2928 2929 do_pred_flags(t); 2930 return true; 2931 } 2932 2933 #define DO_PPZI(NAME, name) \ 2934 static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \ 2935 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \ 2936 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \ 2937 }; \ 2938 TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \ 2939 name##_ppzi_fns[a->esz]) 2940 2941 DO_PPZI(CMPEQ, cmpeq) 2942 DO_PPZI(CMPNE, cmpne) 2943 DO_PPZI(CMPGT, cmpgt) 2944 DO_PPZI(CMPGE, cmpge) 2945 DO_PPZI(CMPHI, cmphi) 2946 DO_PPZI(CMPHS, cmphs) 2947 DO_PPZI(CMPLT, cmplt) 2948 DO_PPZI(CMPLE, cmple) 2949 DO_PPZI(CMPLO, cmplo) 2950 DO_PPZI(CMPLS, cmpls) 2951 2952 #undef DO_PPZI 2953 2954 /* 2955 *** SVE Partition Break Group 2956 */ 2957 2958 static bool do_brk3(DisasContext *s, arg_rprr_s *a, 2959 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s) 2960 { 2961 if (!sve_access_check(s)) { 2962 return true; 2963 } 2964 2965 unsigned vsz = pred_full_reg_size(s); 2966 2967 /* Predicate sizes may be smaller and cannot use simd_desc. */ 2968 TCGv_ptr d = tcg_temp_new_ptr(); 2969 TCGv_ptr n = tcg_temp_new_ptr(); 2970 TCGv_ptr m = tcg_temp_new_ptr(); 2971 TCGv_ptr g = tcg_temp_new_ptr(); 2972 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 2973 2974 tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd)); 2975 tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn)); 2976 tcg_gen_addi_ptr(m, tcg_env, pred_full_reg_offset(s, a->rm)); 2977 tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg)); 2978 2979 if (a->s) { 2980 TCGv_i32 t = tcg_temp_new_i32(); 2981 fn_s(t, d, n, m, g, desc); 2982 do_pred_flags(t); 2983 } else { 2984 fn(d, n, m, g, desc); 2985 } 2986 return true; 2987 } 2988 2989 static bool do_brk2(DisasContext *s, arg_rpr_s *a, 2990 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s) 2991 { 2992 if (!sve_access_check(s)) { 2993 return true; 2994 } 2995 2996 unsigned vsz = pred_full_reg_size(s); 2997 2998 /* Predicate sizes may be smaller and cannot use simd_desc. */ 2999 TCGv_ptr d = tcg_temp_new_ptr(); 3000 TCGv_ptr n = tcg_temp_new_ptr(); 3001 TCGv_ptr g = tcg_temp_new_ptr(); 3002 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 3003 3004 tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd)); 3005 tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn)); 3006 tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg)); 3007 3008 if (a->s) { 3009 TCGv_i32 t = tcg_temp_new_i32(); 3010 fn_s(t, d, n, g, desc); 3011 do_pred_flags(t); 3012 } else { 3013 fn(d, n, g, desc); 3014 } 3015 return true; 3016 } 3017 3018 TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a, 3019 gen_helper_sve_brkpa, gen_helper_sve_brkpas) 3020 TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a, 3021 gen_helper_sve_brkpb, gen_helper_sve_brkpbs) 3022 3023 TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a, 3024 gen_helper_sve_brka_m, gen_helper_sve_brkas_m) 3025 TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a, 3026 gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m) 3027 3028 TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a, 3029 gen_helper_sve_brka_z, gen_helper_sve_brkas_z) 3030 TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a, 3031 gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z) 3032 3033 TRANS_FEAT(BRKN, aa64_sve, do_brk2, a, 3034 gen_helper_sve_brkn, gen_helper_sve_brkns) 3035 3036 /* 3037 *** SVE Predicate Count Group 3038 */ 3039 3040 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg) 3041 { 3042 unsigned psz = pred_full_reg_size(s); 3043 3044 if (psz <= 8) { 3045 uint64_t psz_mask; 3046 3047 tcg_gen_ld_i64(val, tcg_env, pred_full_reg_offset(s, pn)); 3048 if (pn != pg) { 3049 TCGv_i64 g = tcg_temp_new_i64(); 3050 tcg_gen_ld_i64(g, tcg_env, pred_full_reg_offset(s, pg)); 3051 tcg_gen_and_i64(val, val, g); 3052 } 3053 3054 /* Reduce the pred_esz_masks value simply to reduce the 3055 * size of the code generated here. 3056 */ 3057 psz_mask = MAKE_64BIT_MASK(0, psz * 8); 3058 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask); 3059 3060 tcg_gen_ctpop_i64(val, val); 3061 } else { 3062 TCGv_ptr t_pn = tcg_temp_new_ptr(); 3063 TCGv_ptr t_pg = tcg_temp_new_ptr(); 3064 unsigned desc = 0; 3065 3066 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz); 3067 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 3068 3069 tcg_gen_addi_ptr(t_pn, tcg_env, pred_full_reg_offset(s, pn)); 3070 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 3071 3072 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc)); 3073 } 3074 } 3075 3076 static bool trans_CNTP(DisasContext *s, arg_CNTP *a) 3077 { 3078 if (!dc_isar_feature(aa64_sve, s)) { 3079 return false; 3080 } 3081 if (sve_access_check(s)) { 3082 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg); 3083 } 3084 return true; 3085 } 3086 3087 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a) 3088 { 3089 if (!dc_isar_feature(aa64_sve, s)) { 3090 return false; 3091 } 3092 if (sve_access_check(s)) { 3093 TCGv_i64 reg = cpu_reg(s, a->rd); 3094 TCGv_i64 val = tcg_temp_new_i64(); 3095 3096 do_cntp(s, val, a->esz, a->pg, a->pg); 3097 if (a->d) { 3098 tcg_gen_sub_i64(reg, reg, val); 3099 } else { 3100 tcg_gen_add_i64(reg, reg, val); 3101 } 3102 } 3103 return true; 3104 } 3105 3106 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3107 { 3108 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3109 return false; 3110 } 3111 if (sve_access_check(s)) { 3112 unsigned vsz = vec_full_reg_size(s); 3113 TCGv_i64 val = tcg_temp_new_i64(); 3114 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds; 3115 3116 do_cntp(s, val, a->esz, a->pg, a->pg); 3117 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 3118 vec_full_reg_offset(s, a->rn), val, vsz, vsz); 3119 } 3120 return true; 3121 } 3122 3123 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a) 3124 { 3125 if (!dc_isar_feature(aa64_sve, s)) { 3126 return false; 3127 } 3128 if (sve_access_check(s)) { 3129 TCGv_i64 reg = cpu_reg(s, a->rd); 3130 TCGv_i64 val = tcg_temp_new_i64(); 3131 3132 do_cntp(s, val, a->esz, a->pg, a->pg); 3133 do_sat_addsub_32(reg, val, a->u, a->d); 3134 } 3135 return true; 3136 } 3137 3138 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a) 3139 { 3140 if (!dc_isar_feature(aa64_sve, s)) { 3141 return false; 3142 } 3143 if (sve_access_check(s)) { 3144 TCGv_i64 reg = cpu_reg(s, a->rd); 3145 TCGv_i64 val = tcg_temp_new_i64(); 3146 3147 do_cntp(s, val, a->esz, a->pg, a->pg); 3148 do_sat_addsub_64(reg, val, a->u, a->d); 3149 } 3150 return true; 3151 } 3152 3153 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3154 { 3155 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3156 return false; 3157 } 3158 if (sve_access_check(s)) { 3159 TCGv_i64 val = tcg_temp_new_i64(); 3160 do_cntp(s, val, a->esz, a->pg, a->pg); 3161 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d); 3162 } 3163 return true; 3164 } 3165 3166 /* 3167 *** SVE Integer Compare Scalars Group 3168 */ 3169 3170 static bool trans_CTERM(DisasContext *s, arg_CTERM *a) 3171 { 3172 if (!dc_isar_feature(aa64_sve, s)) { 3173 return false; 3174 } 3175 if (!sve_access_check(s)) { 3176 return true; 3177 } 3178 3179 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ); 3180 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf); 3181 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf); 3182 TCGv_i64 cmp = tcg_temp_new_i64(); 3183 3184 tcg_gen_setcond_i64(cond, cmp, rn, rm); 3185 tcg_gen_extrl_i64_i32(cpu_NF, cmp); 3186 3187 /* VF = !NF & !CF. */ 3188 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1); 3189 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF); 3190 3191 /* Both NF and VF actually look at bit 31. */ 3192 tcg_gen_neg_i32(cpu_NF, cpu_NF); 3193 tcg_gen_neg_i32(cpu_VF, cpu_VF); 3194 return true; 3195 } 3196 3197 static bool trans_WHILE(DisasContext *s, arg_WHILE *a) 3198 { 3199 TCGv_i64 op0, op1, t0, t1, tmax; 3200 TCGv_i32 t2; 3201 TCGv_ptr ptr; 3202 unsigned vsz = vec_full_reg_size(s); 3203 unsigned desc = 0; 3204 TCGCond cond; 3205 uint64_t maxval; 3206 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */ 3207 bool eq = a->eq == a->lt; 3208 3209 /* The greater-than conditions are all SVE2. */ 3210 if (a->lt 3211 ? !dc_isar_feature(aa64_sve, s) 3212 : !dc_isar_feature(aa64_sve2, s)) { 3213 return false; 3214 } 3215 if (!sve_access_check(s)) { 3216 return true; 3217 } 3218 3219 op0 = read_cpu_reg(s, a->rn, 1); 3220 op1 = read_cpu_reg(s, a->rm, 1); 3221 3222 if (!a->sf) { 3223 if (a->u) { 3224 tcg_gen_ext32u_i64(op0, op0); 3225 tcg_gen_ext32u_i64(op1, op1); 3226 } else { 3227 tcg_gen_ext32s_i64(op0, op0); 3228 tcg_gen_ext32s_i64(op1, op1); 3229 } 3230 } 3231 3232 /* For the helper, compress the different conditions into a computation 3233 * of how many iterations for which the condition is true. 3234 */ 3235 t0 = tcg_temp_new_i64(); 3236 t1 = tcg_temp_new_i64(); 3237 3238 if (a->lt) { 3239 tcg_gen_sub_i64(t0, op1, op0); 3240 if (a->u) { 3241 maxval = a->sf ? UINT64_MAX : UINT32_MAX; 3242 cond = eq ? TCG_COND_LEU : TCG_COND_LTU; 3243 } else { 3244 maxval = a->sf ? INT64_MAX : INT32_MAX; 3245 cond = eq ? TCG_COND_LE : TCG_COND_LT; 3246 } 3247 } else { 3248 tcg_gen_sub_i64(t0, op0, op1); 3249 if (a->u) { 3250 maxval = 0; 3251 cond = eq ? TCG_COND_GEU : TCG_COND_GTU; 3252 } else { 3253 maxval = a->sf ? INT64_MIN : INT32_MIN; 3254 cond = eq ? TCG_COND_GE : TCG_COND_GT; 3255 } 3256 } 3257 3258 tmax = tcg_constant_i64(vsz >> a->esz); 3259 if (eq) { 3260 /* Equality means one more iteration. */ 3261 tcg_gen_addi_i64(t0, t0, 1); 3262 3263 /* 3264 * For the less-than while, if op1 is maxval (and the only time 3265 * the addition above could overflow), then we produce an all-true 3266 * predicate by setting the count to the vector length. This is 3267 * because the pseudocode is described as an increment + compare 3268 * loop, and the maximum integer would always compare true. 3269 * Similarly, the greater-than while has the same issue with the 3270 * minimum integer due to the decrement + compare loop. 3271 */ 3272 tcg_gen_movi_i64(t1, maxval); 3273 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0); 3274 } 3275 3276 /* Bound to the maximum. */ 3277 tcg_gen_umin_i64(t0, t0, tmax); 3278 3279 /* Set the count to zero if the condition is false. */ 3280 tcg_gen_movi_i64(t1, 0); 3281 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1); 3282 3283 /* Since we're bounded, pass as a 32-bit type. */ 3284 t2 = tcg_temp_new_i32(); 3285 tcg_gen_extrl_i64_i32(t2, t0); 3286 3287 /* Scale elements to bits. */ 3288 tcg_gen_shli_i32(t2, t2, a->esz); 3289 3290 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3291 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3292 3293 ptr = tcg_temp_new_ptr(); 3294 tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd)); 3295 3296 if (a->lt) { 3297 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); 3298 } else { 3299 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc)); 3300 } 3301 do_pred_flags(t2); 3302 return true; 3303 } 3304 3305 static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) 3306 { 3307 TCGv_i64 op0, op1, diff, t1, tmax; 3308 TCGv_i32 t2; 3309 TCGv_ptr ptr; 3310 unsigned vsz = vec_full_reg_size(s); 3311 unsigned desc = 0; 3312 3313 if (!dc_isar_feature(aa64_sve2, s)) { 3314 return false; 3315 } 3316 if (!sve_access_check(s)) { 3317 return true; 3318 } 3319 3320 op0 = read_cpu_reg(s, a->rn, 1); 3321 op1 = read_cpu_reg(s, a->rm, 1); 3322 3323 tmax = tcg_constant_i64(vsz); 3324 diff = tcg_temp_new_i64(); 3325 3326 if (a->rw) { 3327 /* WHILERW */ 3328 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */ 3329 t1 = tcg_temp_new_i64(); 3330 tcg_gen_sub_i64(diff, op0, op1); 3331 tcg_gen_sub_i64(t1, op1, op0); 3332 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1); 3333 /* Round down to a multiple of ESIZE. */ 3334 tcg_gen_andi_i64(diff, diff, -1 << a->esz); 3335 /* If op1 == op0, diff == 0, and the condition is always true. */ 3336 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff); 3337 } else { 3338 /* WHILEWR */ 3339 tcg_gen_sub_i64(diff, op1, op0); 3340 /* Round down to a multiple of ESIZE. */ 3341 tcg_gen_andi_i64(diff, diff, -1 << a->esz); 3342 /* If op0 >= op1, diff <= 0, the condition is always true. */ 3343 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff); 3344 } 3345 3346 /* Bound to the maximum. */ 3347 tcg_gen_umin_i64(diff, diff, tmax); 3348 3349 /* Since we're bounded, pass as a 32-bit type. */ 3350 t2 = tcg_temp_new_i32(); 3351 tcg_gen_extrl_i64_i32(t2, diff); 3352 3353 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3354 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3355 3356 ptr = tcg_temp_new_ptr(); 3357 tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd)); 3358 3359 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); 3360 do_pred_flags(t2); 3361 return true; 3362 } 3363 3364 /* 3365 *** SVE Integer Wide Immediate - Unpredicated Group 3366 */ 3367 3368 static bool trans_FDUP(DisasContext *s, arg_FDUP *a) 3369 { 3370 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3371 return false; 3372 } 3373 if (sve_access_check(s)) { 3374 unsigned vsz = vec_full_reg_size(s); 3375 int dofs = vec_full_reg_offset(s, a->rd); 3376 uint64_t imm; 3377 3378 /* Decode the VFP immediate. */ 3379 imm = vfp_expand_imm(a->esz, a->imm); 3380 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm); 3381 } 3382 return true; 3383 } 3384 3385 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a) 3386 { 3387 if (!dc_isar_feature(aa64_sve, s)) { 3388 return false; 3389 } 3390 if (sve_access_check(s)) { 3391 unsigned vsz = vec_full_reg_size(s); 3392 int dofs = vec_full_reg_offset(s, a->rd); 3393 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm); 3394 } 3395 return true; 3396 } 3397 3398 TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a) 3399 3400 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a) 3401 { 3402 a->imm = -a->imm; 3403 return trans_ADD_zzi(s, a); 3404 } 3405 3406 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a) 3407 { 3408 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 }; 3409 static const GVecGen2s op[4] = { 3410 { .fni8 = tcg_gen_vec_sub8_i64, 3411 .fniv = tcg_gen_sub_vec, 3412 .fno = gen_helper_sve_subri_b, 3413 .opt_opc = vecop_list, 3414 .vece = MO_8, 3415 .scalar_first = true }, 3416 { .fni8 = tcg_gen_vec_sub16_i64, 3417 .fniv = tcg_gen_sub_vec, 3418 .fno = gen_helper_sve_subri_h, 3419 .opt_opc = vecop_list, 3420 .vece = MO_16, 3421 .scalar_first = true }, 3422 { .fni4 = tcg_gen_sub_i32, 3423 .fniv = tcg_gen_sub_vec, 3424 .fno = gen_helper_sve_subri_s, 3425 .opt_opc = vecop_list, 3426 .vece = MO_32, 3427 .scalar_first = true }, 3428 { .fni8 = tcg_gen_sub_i64, 3429 .fniv = tcg_gen_sub_vec, 3430 .fno = gen_helper_sve_subri_d, 3431 .opt_opc = vecop_list, 3432 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 3433 .vece = MO_64, 3434 .scalar_first = true } 3435 }; 3436 3437 if (!dc_isar_feature(aa64_sve, s)) { 3438 return false; 3439 } 3440 if (sve_access_check(s)) { 3441 unsigned vsz = vec_full_reg_size(s); 3442 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd), 3443 vec_full_reg_offset(s, a->rn), 3444 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]); 3445 } 3446 return true; 3447 } 3448 3449 TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a) 3450 3451 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d) 3452 { 3453 if (sve_access_check(s)) { 3454 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 3455 tcg_constant_i64(a->imm), u, d); 3456 } 3457 return true; 3458 } 3459 3460 TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false) 3461 TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false) 3462 TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true) 3463 TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true) 3464 3465 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn) 3466 { 3467 if (sve_access_check(s)) { 3468 unsigned vsz = vec_full_reg_size(s); 3469 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 3470 vec_full_reg_offset(s, a->rn), 3471 tcg_constant_i64(a->imm), vsz, vsz, 0, fn); 3472 } 3473 return true; 3474 } 3475 3476 #define DO_ZZI(NAME, name) \ 3477 static gen_helper_gvec_2i * const name##i_fns[4] = { \ 3478 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \ 3479 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \ 3480 }; \ 3481 TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz]) 3482 3483 DO_ZZI(SMAX, smax) 3484 DO_ZZI(UMAX, umax) 3485 DO_ZZI(SMIN, smin) 3486 DO_ZZI(UMIN, umin) 3487 3488 #undef DO_ZZI 3489 3490 static gen_helper_gvec_4 * const dot_fns[2][2] = { 3491 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h }, 3492 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h } 3493 }; 3494 TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz, 3495 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0) 3496 3497 /* 3498 * SVE Multiply - Indexed 3499 */ 3500 3501 TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, 3502 gen_helper_gvec_sdot_idx_b, a) 3503 TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, 3504 gen_helper_gvec_sdot_idx_h, a) 3505 TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, 3506 gen_helper_gvec_udot_idx_b, a) 3507 TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, 3508 gen_helper_gvec_udot_idx_h, a) 3509 3510 TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3511 gen_helper_gvec_sudot_idx_b, a) 3512 TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3513 gen_helper_gvec_usdot_idx_b, a) 3514 3515 #define DO_SVE2_RRX(NAME, FUNC) \ 3516 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3517 a->rd, a->rn, a->rm, a->index) 3518 3519 DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h) 3520 DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s) 3521 DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d) 3522 3523 DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h) 3524 DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s) 3525 DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d) 3526 3527 DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h) 3528 DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s) 3529 DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d) 3530 3531 #undef DO_SVE2_RRX 3532 3533 #define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \ 3534 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3535 a->rd, a->rn, a->rm, (a->index << 1) | TOP) 3536 3537 DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false) 3538 DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false) 3539 DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true) 3540 DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true) 3541 3542 DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false) 3543 DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false) 3544 DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true) 3545 DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true) 3546 3547 DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false) 3548 DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false) 3549 DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true) 3550 DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true) 3551 3552 #undef DO_SVE2_RRX_TB 3553 3554 #define DO_SVE2_RRXR(NAME, FUNC) \ 3555 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a) 3556 3557 DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h) 3558 DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s) 3559 DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d) 3560 3561 DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h) 3562 DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s) 3563 DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d) 3564 3565 DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h) 3566 DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s) 3567 DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d) 3568 3569 DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h) 3570 DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s) 3571 DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d) 3572 3573 #undef DO_SVE2_RRXR 3574 3575 #define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \ 3576 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3577 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP) 3578 3579 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false) 3580 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false) 3581 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true) 3582 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true) 3583 3584 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false) 3585 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false) 3586 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true) 3587 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true) 3588 3589 DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false) 3590 DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false) 3591 DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true) 3592 DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true) 3593 3594 DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false) 3595 DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false) 3596 DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true) 3597 DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true) 3598 3599 DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false) 3600 DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false) 3601 DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true) 3602 DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true) 3603 3604 DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false) 3605 DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false) 3606 DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true) 3607 DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true) 3608 3609 #undef DO_SVE2_RRXR_TB 3610 3611 #define DO_SVE2_RRXR_ROT(NAME, FUNC) \ 3612 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3613 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot) 3614 3615 DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h) 3616 DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s) 3617 3618 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h) 3619 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s) 3620 3621 DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s) 3622 DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) 3623 3624 #undef DO_SVE2_RRXR_ROT 3625 3626 /* 3627 *** SVE Floating Point Multiply-Add Indexed Group 3628 */ 3629 3630 static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) 3631 { 3632 static gen_helper_gvec_4_ptr * const fns[4] = { 3633 NULL, 3634 gen_helper_gvec_fmla_idx_h, 3635 gen_helper_gvec_fmla_idx_s, 3636 gen_helper_gvec_fmla_idx_d, 3637 }; 3638 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, 3639 (a->index << 1) | sub, 3640 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3641 } 3642 3643 TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false) 3644 TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true) 3645 3646 /* 3647 *** SVE Floating Point Multiply Indexed Group 3648 */ 3649 3650 static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = { 3651 NULL, gen_helper_gvec_fmul_idx_h, 3652 gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d, 3653 }; 3654 TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz, 3655 fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index, 3656 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3657 3658 /* 3659 *** SVE Floating Point Fast Reduction Group 3660 */ 3661 3662 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr, 3663 TCGv_ptr, TCGv_i32); 3664 3665 static bool do_reduce(DisasContext *s, arg_rpr_esz *a, 3666 gen_helper_fp_reduce *fn) 3667 { 3668 unsigned vsz, p2vsz; 3669 TCGv_i32 t_desc; 3670 TCGv_ptr t_zn, t_pg, status; 3671 TCGv_i64 temp; 3672 3673 if (fn == NULL) { 3674 return false; 3675 } 3676 if (!sve_access_check(s)) { 3677 return true; 3678 } 3679 3680 vsz = vec_full_reg_size(s); 3681 p2vsz = pow2ceil(vsz); 3682 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz)); 3683 temp = tcg_temp_new_i64(); 3684 t_zn = tcg_temp_new_ptr(); 3685 t_pg = tcg_temp_new_ptr(); 3686 3687 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); 3688 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); 3689 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3690 3691 fn(temp, t_zn, t_pg, status, t_desc); 3692 3693 write_fp_dreg(s, a->rd, temp); 3694 return true; 3695 } 3696 3697 #define DO_VPZ(NAME, name) \ 3698 static gen_helper_fp_reduce * const name##_fns[4] = { \ 3699 NULL, gen_helper_sve_##name##_h, \ 3700 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3701 }; \ 3702 TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz]) 3703 3704 DO_VPZ(FADDV, faddv) 3705 DO_VPZ(FMINNMV, fminnmv) 3706 DO_VPZ(FMAXNMV, fmaxnmv) 3707 DO_VPZ(FMINV, fminv) 3708 DO_VPZ(FMAXV, fmaxv) 3709 3710 #undef DO_VPZ 3711 3712 /* 3713 *** SVE Floating Point Unary Operations - Unpredicated Group 3714 */ 3715 3716 static gen_helper_gvec_2_ptr * const frecpe_fns[] = { 3717 NULL, gen_helper_gvec_frecpe_h, 3718 gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, 3719 }; 3720 TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0) 3721 3722 static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { 3723 NULL, gen_helper_gvec_frsqrte_h, 3724 gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, 3725 }; 3726 TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0) 3727 3728 /* 3729 *** SVE Floating Point Compare with Zero Group 3730 */ 3731 3732 static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a, 3733 gen_helper_gvec_3_ptr *fn) 3734 { 3735 if (fn == NULL) { 3736 return false; 3737 } 3738 if (sve_access_check(s)) { 3739 unsigned vsz = vec_full_reg_size(s); 3740 TCGv_ptr status = 3741 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3742 3743 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd), 3744 vec_full_reg_offset(s, a->rn), 3745 pred_full_reg_offset(s, a->pg), 3746 status, vsz, vsz, 0, fn); 3747 } 3748 return true; 3749 } 3750 3751 #define DO_PPZ(NAME, name) \ 3752 static gen_helper_gvec_3_ptr * const name##_fns[] = { \ 3753 NULL, gen_helper_sve_##name##_h, \ 3754 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3755 }; \ 3756 TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz]) 3757 3758 DO_PPZ(FCMGE_ppz0, fcmge0) 3759 DO_PPZ(FCMGT_ppz0, fcmgt0) 3760 DO_PPZ(FCMLE_ppz0, fcmle0) 3761 DO_PPZ(FCMLT_ppz0, fcmlt0) 3762 DO_PPZ(FCMEQ_ppz0, fcmeq0) 3763 DO_PPZ(FCMNE_ppz0, fcmne0) 3764 3765 #undef DO_PPZ 3766 3767 /* 3768 *** SVE floating-point trig multiply-add coefficient 3769 */ 3770 3771 static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { 3772 NULL, gen_helper_sve_ftmad_h, 3773 gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d, 3774 }; 3775 TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, 3776 ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm, 3777 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3778 3779 /* 3780 *** SVE Floating Point Accumulating Reduction Group 3781 */ 3782 3783 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) 3784 { 3785 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr, 3786 TCGv_ptr, TCGv_ptr, TCGv_i32); 3787 static fadda_fn * const fns[3] = { 3788 gen_helper_sve_fadda_h, 3789 gen_helper_sve_fadda_s, 3790 gen_helper_sve_fadda_d, 3791 }; 3792 unsigned vsz = vec_full_reg_size(s); 3793 TCGv_ptr t_rm, t_pg, t_fpst; 3794 TCGv_i64 t_val; 3795 TCGv_i32 t_desc; 3796 3797 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3798 return false; 3799 } 3800 s->is_nonstreaming = true; 3801 if (!sve_access_check(s)) { 3802 return true; 3803 } 3804 3805 t_val = load_esz(tcg_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz); 3806 t_rm = tcg_temp_new_ptr(); 3807 t_pg = tcg_temp_new_ptr(); 3808 tcg_gen_addi_ptr(t_rm, tcg_env, vec_full_reg_offset(s, a->rm)); 3809 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); 3810 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3811 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 3812 3813 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc); 3814 3815 write_fp_dreg(s, a->rd, t_val); 3816 return true; 3817 } 3818 3819 /* 3820 *** SVE Floating Point Arithmetic - Unpredicated Group 3821 */ 3822 3823 #define DO_FP3(NAME, name) \ 3824 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ 3825 NULL, gen_helper_gvec_##name##_h, \ 3826 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ 3827 }; \ 3828 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0) 3829 3830 DO_FP3(FADD_zzz, fadd) 3831 DO_FP3(FSUB_zzz, fsub) 3832 DO_FP3(FMUL_zzz, fmul) 3833 DO_FP3(FRECPS, recps) 3834 DO_FP3(FRSQRTS, rsqrts) 3835 3836 #undef DO_FP3 3837 3838 static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = { 3839 NULL, gen_helper_gvec_ftsmul_h, 3840 gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d 3841 }; 3842 TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, 3843 ftsmul_fns[a->esz], a, 0) 3844 3845 /* 3846 *** SVE Floating Point Arithmetic - Predicated Group 3847 */ 3848 3849 #define DO_ZPZZ_FP(NAME, FEAT, name) \ 3850 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ 3851 NULL, gen_helper_##name##_h, \ 3852 gen_helper_##name##_s, gen_helper_##name##_d \ 3853 }; \ 3854 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) 3855 3856 DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd) 3857 DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub) 3858 DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul) 3859 DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin) 3860 DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax) 3861 DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) 3862 DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) 3863 DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) 3864 DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn) 3865 DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv) 3866 DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx) 3867 3868 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr, 3869 TCGv_i64, TCGv_ptr, TCGv_i32); 3870 3871 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16, 3872 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn) 3873 { 3874 unsigned vsz = vec_full_reg_size(s); 3875 TCGv_ptr t_zd, t_zn, t_pg, status; 3876 TCGv_i32 desc; 3877 3878 t_zd = tcg_temp_new_ptr(); 3879 t_zn = tcg_temp_new_ptr(); 3880 t_pg = tcg_temp_new_ptr(); 3881 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, zd)); 3882 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, zn)); 3883 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 3884 3885 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 3886 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 3887 fn(t_zd, t_zn, t_pg, scalar, status, desc); 3888 } 3889 3890 static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm, 3891 gen_helper_sve_fp2scalar *fn) 3892 { 3893 if (fn == NULL) { 3894 return false; 3895 } 3896 if (sve_access_check(s)) { 3897 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, 3898 tcg_constant_i64(imm), fn); 3899 } 3900 return true; 3901 } 3902 3903 #define DO_FP_IMM(NAME, name, const0, const1) \ 3904 static gen_helper_sve_fp2scalar * const name##_fns[4] = { \ 3905 NULL, gen_helper_sve_##name##_h, \ 3906 gen_helper_sve_##name##_s, \ 3907 gen_helper_sve_##name##_d \ 3908 }; \ 3909 static uint64_t const name##_const[4][2] = { \ 3910 { -1, -1 }, \ 3911 { float16_##const0, float16_##const1 }, \ 3912 { float32_##const0, float32_##const1 }, \ 3913 { float64_##const0, float64_##const1 }, \ 3914 }; \ 3915 TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ 3916 name##_const[a->esz][a->imm], name##_fns[a->esz]) 3917 3918 DO_FP_IMM(FADD, fadds, half, one) 3919 DO_FP_IMM(FSUB, fsubs, half, one) 3920 DO_FP_IMM(FMUL, fmuls, half, two) 3921 DO_FP_IMM(FSUBR, fsubrs, half, one) 3922 DO_FP_IMM(FMAXNM, fmaxnms, zero, one) 3923 DO_FP_IMM(FMINNM, fminnms, zero, one) 3924 DO_FP_IMM(FMAX, fmaxs, zero, one) 3925 DO_FP_IMM(FMIN, fmins, zero, one) 3926 3927 #undef DO_FP_IMM 3928 3929 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a, 3930 gen_helper_gvec_4_ptr *fn) 3931 { 3932 if (fn == NULL) { 3933 return false; 3934 } 3935 if (sve_access_check(s)) { 3936 unsigned vsz = vec_full_reg_size(s); 3937 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3938 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd), 3939 vec_full_reg_offset(s, a->rn), 3940 vec_full_reg_offset(s, a->rm), 3941 pred_full_reg_offset(s, a->pg), 3942 status, vsz, vsz, 0, fn); 3943 } 3944 return true; 3945 } 3946 3947 #define DO_FPCMP(NAME, name) \ 3948 static gen_helper_gvec_4_ptr * const name##_fns[4] = { \ 3949 NULL, gen_helper_sve_##name##_h, \ 3950 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 3951 }; \ 3952 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz]) 3953 3954 DO_FPCMP(FCMGE, fcmge) 3955 DO_FPCMP(FCMGT, fcmgt) 3956 DO_FPCMP(FCMEQ, fcmeq) 3957 DO_FPCMP(FCMNE, fcmne) 3958 DO_FPCMP(FCMUO, fcmuo) 3959 DO_FPCMP(FACGE, facge) 3960 DO_FPCMP(FACGT, facgt) 3961 3962 #undef DO_FPCMP 3963 3964 static gen_helper_gvec_4_ptr * const fcadd_fns[] = { 3965 NULL, gen_helper_sve_fcadd_h, 3966 gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d, 3967 }; 3968 TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], 3969 a->rd, a->rn, a->rm, a->pg, a->rot, 3970 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3971 3972 #define DO_FMLA(NAME, name) \ 3973 static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ 3974 NULL, gen_helper_sve_##name##_h, \ 3975 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 3976 }; \ 3977 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \ 3978 a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ 3979 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3980 3981 DO_FMLA(FMLA_zpzzz, fmla_zpzzz) 3982 DO_FMLA(FMLS_zpzzz, fmls_zpzzz) 3983 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz) 3984 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz) 3985 3986 #undef DO_FMLA 3987 3988 static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { 3989 NULL, gen_helper_sve_fcmla_zpzzz_h, 3990 gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, 3991 }; 3992 TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], 3993 a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, 3994 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3995 3996 static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { 3997 NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL 3998 }; 3999 TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz], 4000 a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot, 4001 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4002 4003 /* 4004 *** SVE Floating Point Unary Operations Predicated Group 4005 */ 4006 4007 TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4008 gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR) 4009 TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4010 gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR) 4011 4012 TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 4013 gen_helper_sve_bfcvt, a, 0, FPST_FPCR) 4014 4015 TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4016 gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR) 4017 TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4018 gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR) 4019 TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4020 gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR) 4021 TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4022 gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR) 4023 4024 TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4025 gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16) 4026 TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4027 gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16) 4028 TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4029 gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16) 4030 TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4031 gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16) 4032 TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4033 gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16) 4034 TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4035 gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16) 4036 4037 TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4038 gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR) 4039 TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4040 gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR) 4041 TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4042 gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR) 4043 TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4044 gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR) 4045 TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4046 gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR) 4047 TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4048 gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR) 4049 4050 TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4051 gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR) 4052 TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4053 gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR) 4054 4055 static gen_helper_gvec_3_ptr * const frint_fns[] = { 4056 NULL, 4057 gen_helper_sve_frint_h, 4058 gen_helper_sve_frint_s, 4059 gen_helper_sve_frint_d 4060 }; 4061 TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz], 4062 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4063 4064 static gen_helper_gvec_3_ptr * const frintx_fns[] = { 4065 NULL, 4066 gen_helper_sve_frintx_h, 4067 gen_helper_sve_frintx_s, 4068 gen_helper_sve_frintx_d 4069 }; 4070 TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz], 4071 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4072 4073 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, 4074 ARMFPRounding mode, gen_helper_gvec_3_ptr *fn) 4075 { 4076 unsigned vsz; 4077 TCGv_i32 tmode; 4078 TCGv_ptr status; 4079 4080 if (fn == NULL) { 4081 return false; 4082 } 4083 if (!sve_access_check(s)) { 4084 return true; 4085 } 4086 4087 vsz = vec_full_reg_size(s); 4088 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4089 tmode = gen_set_rmode(mode, status); 4090 4091 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 4092 vec_full_reg_offset(s, a->rn), 4093 pred_full_reg_offset(s, a->pg), 4094 status, vsz, vsz, 0, fn); 4095 4096 gen_restore_rmode(tmode, status); 4097 return true; 4098 } 4099 4100 TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a, 4101 FPROUNDING_TIEEVEN, frint_fns[a->esz]) 4102 TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a, 4103 FPROUNDING_POSINF, frint_fns[a->esz]) 4104 TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a, 4105 FPROUNDING_NEGINF, frint_fns[a->esz]) 4106 TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a, 4107 FPROUNDING_ZERO, frint_fns[a->esz]) 4108 TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a, 4109 FPROUNDING_TIEAWAY, frint_fns[a->esz]) 4110 4111 static gen_helper_gvec_3_ptr * const frecpx_fns[] = { 4112 NULL, gen_helper_sve_frecpx_h, 4113 gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d, 4114 }; 4115 TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz], 4116 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4117 4118 static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { 4119 NULL, gen_helper_sve_fsqrt_h, 4120 gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d, 4121 }; 4122 TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz], 4123 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4124 4125 TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4126 gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16) 4127 TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4128 gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16) 4129 TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4130 gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16) 4131 4132 TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4133 gen_helper_sve_scvt_ss, a, 0, FPST_FPCR) 4134 TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4135 gen_helper_sve_scvt_ds, a, 0, FPST_FPCR) 4136 4137 TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4138 gen_helper_sve_scvt_sd, a, 0, FPST_FPCR) 4139 TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4140 gen_helper_sve_scvt_dd, a, 0, FPST_FPCR) 4141 4142 TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4143 gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16) 4144 TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4145 gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16) 4146 TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4147 gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16) 4148 4149 TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4150 gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR) 4151 TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4152 gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR) 4153 TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4154 gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR) 4155 4156 TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4157 gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR) 4158 4159 /* 4160 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group 4161 */ 4162 4163 /* Subroutine loading a vector register at VOFS of LEN bytes. 4164 * The load should begin at the address Rn + IMM. 4165 */ 4166 4167 void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, 4168 int len, int rn, int imm) 4169 { 4170 int len_align = QEMU_ALIGN_DOWN(len, 16); 4171 int len_remain = len % 16; 4172 int nparts = len / 16 + ctpop8(len_remain); 4173 int midx = get_mem_index(s); 4174 TCGv_i64 dirty_addr, clean_addr, t0, t1; 4175 TCGv_i128 t16; 4176 4177 dirty_addr = tcg_temp_new_i64(); 4178 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4179 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); 4180 4181 /* 4182 * Note that unpredicated load/store of vector/predicate registers 4183 * are defined as a stream of bytes, which equates to little-endian 4184 * operations on larger quantities. 4185 * Attempt to keep code expansion to a minimum by limiting the 4186 * amount of unrolling done. 4187 */ 4188 if (nparts <= 4) { 4189 int i; 4190 4191 t0 = tcg_temp_new_i64(); 4192 t1 = tcg_temp_new_i64(); 4193 t16 = tcg_temp_new_i128(); 4194 4195 for (i = 0; i < len_align; i += 16) { 4196 tcg_gen_qemu_ld_i128(t16, clean_addr, midx, 4197 MO_LE | MO_128 | MO_ATOM_NONE); 4198 tcg_gen_extr_i128_i64(t0, t1, t16); 4199 tcg_gen_st_i64(t0, base, vofs + i); 4200 tcg_gen_st_i64(t1, base, vofs + i + 8); 4201 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4202 } 4203 } else { 4204 TCGLabel *loop = gen_new_label(); 4205 TCGv_ptr tp, i = tcg_temp_new_ptr(); 4206 4207 tcg_gen_movi_ptr(i, 0); 4208 gen_set_label(loop); 4209 4210 t16 = tcg_temp_new_i128(); 4211 tcg_gen_qemu_ld_i128(t16, clean_addr, midx, 4212 MO_LE | MO_128 | MO_ATOM_NONE); 4213 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4214 4215 tp = tcg_temp_new_ptr(); 4216 tcg_gen_add_ptr(tp, base, i); 4217 tcg_gen_addi_ptr(i, i, 16); 4218 4219 t0 = tcg_temp_new_i64(); 4220 t1 = tcg_temp_new_i64(); 4221 tcg_gen_extr_i128_i64(t0, t1, t16); 4222 4223 tcg_gen_st_i64(t0, tp, vofs); 4224 tcg_gen_st_i64(t1, tp, vofs + 8); 4225 4226 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4227 } 4228 4229 /* 4230 * Predicate register loads can be any multiple of 2. 4231 * Note that we still store the entire 64-bit unit into tcg_env. 4232 */ 4233 if (len_remain >= 8) { 4234 t0 = tcg_temp_new_i64(); 4235 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE); 4236 tcg_gen_st_i64(t0, base, vofs + len_align); 4237 len_remain -= 8; 4238 len_align += 8; 4239 if (len_remain) { 4240 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4241 } 4242 } 4243 if (len_remain) { 4244 t0 = tcg_temp_new_i64(); 4245 switch (len_remain) { 4246 case 2: 4247 case 4: 4248 case 8: 4249 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, 4250 MO_LE | ctz32(len_remain) | MO_ATOM_NONE); 4251 break; 4252 4253 case 6: 4254 t1 = tcg_temp_new_i64(); 4255 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE); 4256 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4257 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); 4258 tcg_gen_deposit_i64(t0, t0, t1, 32, 32); 4259 break; 4260 4261 default: 4262 g_assert_not_reached(); 4263 } 4264 tcg_gen_st_i64(t0, base, vofs + len_align); 4265 } 4266 } 4267 4268 /* Similarly for stores. */ 4269 void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, 4270 int len, int rn, int imm) 4271 { 4272 int len_align = QEMU_ALIGN_DOWN(len, 16); 4273 int len_remain = len % 16; 4274 int nparts = len / 16 + ctpop8(len_remain); 4275 int midx = get_mem_index(s); 4276 TCGv_i64 dirty_addr, clean_addr, t0, t1; 4277 TCGv_i128 t16; 4278 4279 dirty_addr = tcg_temp_new_i64(); 4280 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4281 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); 4282 4283 /* Note that unpredicated load/store of vector/predicate registers 4284 * are defined as a stream of bytes, which equates to little-endian 4285 * operations on larger quantities. There is no nice way to force 4286 * a little-endian store for aarch64_be-linux-user out of line. 4287 * 4288 * Attempt to keep code expansion to a minimum by limiting the 4289 * amount of unrolling done. 4290 */ 4291 if (nparts <= 4) { 4292 int i; 4293 4294 t0 = tcg_temp_new_i64(); 4295 t1 = tcg_temp_new_i64(); 4296 t16 = tcg_temp_new_i128(); 4297 for (i = 0; i < len_align; i += 16) { 4298 tcg_gen_ld_i64(t0, base, vofs + i); 4299 tcg_gen_ld_i64(t1, base, vofs + i + 8); 4300 tcg_gen_concat_i64_i128(t16, t0, t1); 4301 tcg_gen_qemu_st_i128(t16, clean_addr, midx, 4302 MO_LE | MO_128 | MO_ATOM_NONE); 4303 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4304 } 4305 } else { 4306 TCGLabel *loop = gen_new_label(); 4307 TCGv_ptr tp, i = tcg_temp_new_ptr(); 4308 4309 tcg_gen_movi_ptr(i, 0); 4310 gen_set_label(loop); 4311 4312 t0 = tcg_temp_new_i64(); 4313 t1 = tcg_temp_new_i64(); 4314 tp = tcg_temp_new_ptr(); 4315 tcg_gen_add_ptr(tp, base, i); 4316 tcg_gen_ld_i64(t0, tp, vofs); 4317 tcg_gen_ld_i64(t1, tp, vofs + 8); 4318 tcg_gen_addi_ptr(i, i, 16); 4319 4320 t16 = tcg_temp_new_i128(); 4321 tcg_gen_concat_i64_i128(t16, t0, t1); 4322 4323 tcg_gen_qemu_st_i128(t16, clean_addr, midx, 4324 MO_LE | MO_128 | MO_ATOM_NONE); 4325 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4326 4327 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4328 } 4329 4330 /* Predicate register stores can be any multiple of 2. */ 4331 if (len_remain >= 8) { 4332 t0 = tcg_temp_new_i64(); 4333 tcg_gen_ld_i64(t0, base, vofs + len_align); 4334 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE); 4335 len_remain -= 8; 4336 len_align += 8; 4337 if (len_remain) { 4338 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4339 } 4340 } 4341 if (len_remain) { 4342 t0 = tcg_temp_new_i64(); 4343 tcg_gen_ld_i64(t0, base, vofs + len_align); 4344 4345 switch (len_remain) { 4346 case 2: 4347 case 4: 4348 case 8: 4349 tcg_gen_qemu_st_i64(t0, clean_addr, midx, 4350 MO_LE | ctz32(len_remain) | MO_ATOM_NONE); 4351 break; 4352 4353 case 6: 4354 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE); 4355 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4356 tcg_gen_shri_i64(t0, t0, 32); 4357 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); 4358 break; 4359 4360 default: 4361 g_assert_not_reached(); 4362 } 4363 } 4364 } 4365 4366 static bool trans_LDR_zri(DisasContext *s, arg_rri *a) 4367 { 4368 if (!dc_isar_feature(aa64_sve, s)) { 4369 return false; 4370 } 4371 if (sve_access_check(s)) { 4372 int size = vec_full_reg_size(s); 4373 int off = vec_full_reg_offset(s, a->rd); 4374 gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size); 4375 } 4376 return true; 4377 } 4378 4379 static bool trans_LDR_pri(DisasContext *s, arg_rri *a) 4380 { 4381 if (!dc_isar_feature(aa64_sve, s)) { 4382 return false; 4383 } 4384 if (sve_access_check(s)) { 4385 int size = pred_full_reg_size(s); 4386 int off = pred_full_reg_offset(s, a->rd); 4387 gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size); 4388 } 4389 return true; 4390 } 4391 4392 static bool trans_STR_zri(DisasContext *s, arg_rri *a) 4393 { 4394 if (!dc_isar_feature(aa64_sve, s)) { 4395 return false; 4396 } 4397 if (sve_access_check(s)) { 4398 int size = vec_full_reg_size(s); 4399 int off = vec_full_reg_offset(s, a->rd); 4400 gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size); 4401 } 4402 return true; 4403 } 4404 4405 static bool trans_STR_pri(DisasContext *s, arg_rri *a) 4406 { 4407 if (!dc_isar_feature(aa64_sve, s)) { 4408 return false; 4409 } 4410 if (sve_access_check(s)) { 4411 int size = pred_full_reg_size(s); 4412 int off = pred_full_reg_offset(s, a->rd); 4413 gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size); 4414 } 4415 return true; 4416 } 4417 4418 /* 4419 *** SVE Memory - Contiguous Load Group 4420 */ 4421 4422 /* The memory mode of the dtype. */ 4423 static const MemOp dtype_mop[16] = { 4424 MO_UB, MO_UB, MO_UB, MO_UB, 4425 MO_SL, MO_UW, MO_UW, MO_UW, 4426 MO_SW, MO_SW, MO_UL, MO_UL, 4427 MO_SB, MO_SB, MO_SB, MO_UQ 4428 }; 4429 4430 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE) 4431 4432 /* The vector element size of dtype. */ 4433 static const uint8_t dtype_esz[16] = { 4434 0, 1, 2, 3, 4435 3, 1, 2, 3, 4436 3, 2, 2, 3, 4437 3, 2, 1, 3 4438 }; 4439 4440 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 4441 int dtype, uint32_t mte_n, bool is_write, 4442 gen_helper_gvec_mem *fn) 4443 { 4444 unsigned vsz = vec_full_reg_size(s); 4445 TCGv_ptr t_pg; 4446 int desc = 0; 4447 4448 /* 4449 * For e.g. LD4, there are not enough arguments to pass all 4 4450 * registers as pointers, so encode the regno into the data field. 4451 * For consistency, do this even for LD1. 4452 */ 4453 if (s->mte_active[0]) { 4454 int msz = dtype_msz(dtype); 4455 4456 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 4457 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4458 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4459 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 4460 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1); 4461 desc <<= SVE_MTEDESC_SHIFT; 4462 } else { 4463 addr = clean_data_tbi(s, addr); 4464 } 4465 4466 desc = simd_desc(vsz, vsz, zt | desc); 4467 t_pg = tcg_temp_new_ptr(); 4468 4469 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 4470 fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); 4471 } 4472 4473 /* Indexed by [mte][be][dtype][nreg] */ 4474 static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = { 4475 { /* mte inactive, little-endian */ 4476 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4477 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4478 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4479 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4480 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4481 4482 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL }, 4483 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r, 4484 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r }, 4485 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL }, 4486 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL }, 4487 4488 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL }, 4489 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL }, 4490 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r, 4491 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r }, 4492 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL }, 4493 4494 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4495 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4496 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4497 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, 4498 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, 4499 4500 /* mte inactive, big-endian */ 4501 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4502 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4503 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4504 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4505 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4506 4507 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL }, 4508 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r, 4509 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r }, 4510 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL }, 4511 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL }, 4512 4513 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL }, 4514 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL }, 4515 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r, 4516 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r }, 4517 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL }, 4518 4519 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4520 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4521 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4522 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, 4523 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } }, 4524 4525 { /* mte active, little-endian */ 4526 { { gen_helper_sve_ld1bb_r_mte, 4527 gen_helper_sve_ld2bb_r_mte, 4528 gen_helper_sve_ld3bb_r_mte, 4529 gen_helper_sve_ld4bb_r_mte }, 4530 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 4531 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 4532 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 4533 4534 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL }, 4535 { gen_helper_sve_ld1hh_le_r_mte, 4536 gen_helper_sve_ld2hh_le_r_mte, 4537 gen_helper_sve_ld3hh_le_r_mte, 4538 gen_helper_sve_ld4hh_le_r_mte }, 4539 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL }, 4540 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL }, 4541 4542 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL }, 4543 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL }, 4544 { gen_helper_sve_ld1ss_le_r_mte, 4545 gen_helper_sve_ld2ss_le_r_mte, 4546 gen_helper_sve_ld3ss_le_r_mte, 4547 gen_helper_sve_ld4ss_le_r_mte }, 4548 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL }, 4549 4550 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 4551 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 4552 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 4553 { gen_helper_sve_ld1dd_le_r_mte, 4554 gen_helper_sve_ld2dd_le_r_mte, 4555 gen_helper_sve_ld3dd_le_r_mte, 4556 gen_helper_sve_ld4dd_le_r_mte } }, 4557 4558 /* mte active, big-endian */ 4559 { { gen_helper_sve_ld1bb_r_mte, 4560 gen_helper_sve_ld2bb_r_mte, 4561 gen_helper_sve_ld3bb_r_mte, 4562 gen_helper_sve_ld4bb_r_mte }, 4563 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 4564 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 4565 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 4566 4567 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL }, 4568 { gen_helper_sve_ld1hh_be_r_mte, 4569 gen_helper_sve_ld2hh_be_r_mte, 4570 gen_helper_sve_ld3hh_be_r_mte, 4571 gen_helper_sve_ld4hh_be_r_mte }, 4572 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL }, 4573 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL }, 4574 4575 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL }, 4576 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL }, 4577 { gen_helper_sve_ld1ss_be_r_mte, 4578 gen_helper_sve_ld2ss_be_r_mte, 4579 gen_helper_sve_ld3ss_be_r_mte, 4580 gen_helper_sve_ld4ss_be_r_mte }, 4581 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL }, 4582 4583 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 4584 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 4585 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 4586 { gen_helper_sve_ld1dd_be_r_mte, 4587 gen_helper_sve_ld2dd_be_r_mte, 4588 gen_helper_sve_ld3dd_be_r_mte, 4589 gen_helper_sve_ld4dd_be_r_mte } } }, 4590 }; 4591 4592 static void do_ld_zpa(DisasContext *s, int zt, int pg, 4593 TCGv_i64 addr, int dtype, int nreg) 4594 { 4595 gen_helper_gvec_mem *fn 4596 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg]; 4597 4598 /* 4599 * While there are holes in the table, they are not 4600 * accessible via the instruction encoding. 4601 */ 4602 assert(fn != NULL); 4603 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn); 4604 } 4605 4606 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) 4607 { 4608 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 4609 return false; 4610 } 4611 if (sve_access_check(s)) { 4612 TCGv_i64 addr = tcg_temp_new_i64(); 4613 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4614 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4615 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 4616 } 4617 return true; 4618 } 4619 4620 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a) 4621 { 4622 if (!dc_isar_feature(aa64_sve, s)) { 4623 return false; 4624 } 4625 if (sve_access_check(s)) { 4626 int vsz = vec_full_reg_size(s); 4627 int elements = vsz >> dtype_esz[a->dtype]; 4628 TCGv_i64 addr = tcg_temp_new_i64(); 4629 4630 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 4631 (a->imm * elements * (a->nreg + 1)) 4632 << dtype_msz(a->dtype)); 4633 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 4634 } 4635 return true; 4636 } 4637 4638 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a) 4639 { 4640 static gen_helper_gvec_mem * const fns[2][2][16] = { 4641 { /* mte inactive, little-endian */ 4642 { gen_helper_sve_ldff1bb_r, 4643 gen_helper_sve_ldff1bhu_r, 4644 gen_helper_sve_ldff1bsu_r, 4645 gen_helper_sve_ldff1bdu_r, 4646 4647 gen_helper_sve_ldff1sds_le_r, 4648 gen_helper_sve_ldff1hh_le_r, 4649 gen_helper_sve_ldff1hsu_le_r, 4650 gen_helper_sve_ldff1hdu_le_r, 4651 4652 gen_helper_sve_ldff1hds_le_r, 4653 gen_helper_sve_ldff1hss_le_r, 4654 gen_helper_sve_ldff1ss_le_r, 4655 gen_helper_sve_ldff1sdu_le_r, 4656 4657 gen_helper_sve_ldff1bds_r, 4658 gen_helper_sve_ldff1bss_r, 4659 gen_helper_sve_ldff1bhs_r, 4660 gen_helper_sve_ldff1dd_le_r }, 4661 4662 /* mte inactive, big-endian */ 4663 { gen_helper_sve_ldff1bb_r, 4664 gen_helper_sve_ldff1bhu_r, 4665 gen_helper_sve_ldff1bsu_r, 4666 gen_helper_sve_ldff1bdu_r, 4667 4668 gen_helper_sve_ldff1sds_be_r, 4669 gen_helper_sve_ldff1hh_be_r, 4670 gen_helper_sve_ldff1hsu_be_r, 4671 gen_helper_sve_ldff1hdu_be_r, 4672 4673 gen_helper_sve_ldff1hds_be_r, 4674 gen_helper_sve_ldff1hss_be_r, 4675 gen_helper_sve_ldff1ss_be_r, 4676 gen_helper_sve_ldff1sdu_be_r, 4677 4678 gen_helper_sve_ldff1bds_r, 4679 gen_helper_sve_ldff1bss_r, 4680 gen_helper_sve_ldff1bhs_r, 4681 gen_helper_sve_ldff1dd_be_r } }, 4682 4683 { /* mte active, little-endian */ 4684 { gen_helper_sve_ldff1bb_r_mte, 4685 gen_helper_sve_ldff1bhu_r_mte, 4686 gen_helper_sve_ldff1bsu_r_mte, 4687 gen_helper_sve_ldff1bdu_r_mte, 4688 4689 gen_helper_sve_ldff1sds_le_r_mte, 4690 gen_helper_sve_ldff1hh_le_r_mte, 4691 gen_helper_sve_ldff1hsu_le_r_mte, 4692 gen_helper_sve_ldff1hdu_le_r_mte, 4693 4694 gen_helper_sve_ldff1hds_le_r_mte, 4695 gen_helper_sve_ldff1hss_le_r_mte, 4696 gen_helper_sve_ldff1ss_le_r_mte, 4697 gen_helper_sve_ldff1sdu_le_r_mte, 4698 4699 gen_helper_sve_ldff1bds_r_mte, 4700 gen_helper_sve_ldff1bss_r_mte, 4701 gen_helper_sve_ldff1bhs_r_mte, 4702 gen_helper_sve_ldff1dd_le_r_mte }, 4703 4704 /* mte active, big-endian */ 4705 { gen_helper_sve_ldff1bb_r_mte, 4706 gen_helper_sve_ldff1bhu_r_mte, 4707 gen_helper_sve_ldff1bsu_r_mte, 4708 gen_helper_sve_ldff1bdu_r_mte, 4709 4710 gen_helper_sve_ldff1sds_be_r_mte, 4711 gen_helper_sve_ldff1hh_be_r_mte, 4712 gen_helper_sve_ldff1hsu_be_r_mte, 4713 gen_helper_sve_ldff1hdu_be_r_mte, 4714 4715 gen_helper_sve_ldff1hds_be_r_mte, 4716 gen_helper_sve_ldff1hss_be_r_mte, 4717 gen_helper_sve_ldff1ss_be_r_mte, 4718 gen_helper_sve_ldff1sdu_be_r_mte, 4719 4720 gen_helper_sve_ldff1bds_r_mte, 4721 gen_helper_sve_ldff1bss_r_mte, 4722 gen_helper_sve_ldff1bhs_r_mte, 4723 gen_helper_sve_ldff1dd_be_r_mte } }, 4724 }; 4725 4726 if (!dc_isar_feature(aa64_sve, s)) { 4727 return false; 4728 } 4729 s->is_nonstreaming = true; 4730 if (sve_access_check(s)) { 4731 TCGv_i64 addr = tcg_temp_new_i64(); 4732 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4733 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4734 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 4735 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 4736 } 4737 return true; 4738 } 4739 4740 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) 4741 { 4742 static gen_helper_gvec_mem * const fns[2][2][16] = { 4743 { /* mte inactive, little-endian */ 4744 { gen_helper_sve_ldnf1bb_r, 4745 gen_helper_sve_ldnf1bhu_r, 4746 gen_helper_sve_ldnf1bsu_r, 4747 gen_helper_sve_ldnf1bdu_r, 4748 4749 gen_helper_sve_ldnf1sds_le_r, 4750 gen_helper_sve_ldnf1hh_le_r, 4751 gen_helper_sve_ldnf1hsu_le_r, 4752 gen_helper_sve_ldnf1hdu_le_r, 4753 4754 gen_helper_sve_ldnf1hds_le_r, 4755 gen_helper_sve_ldnf1hss_le_r, 4756 gen_helper_sve_ldnf1ss_le_r, 4757 gen_helper_sve_ldnf1sdu_le_r, 4758 4759 gen_helper_sve_ldnf1bds_r, 4760 gen_helper_sve_ldnf1bss_r, 4761 gen_helper_sve_ldnf1bhs_r, 4762 gen_helper_sve_ldnf1dd_le_r }, 4763 4764 /* mte inactive, big-endian */ 4765 { gen_helper_sve_ldnf1bb_r, 4766 gen_helper_sve_ldnf1bhu_r, 4767 gen_helper_sve_ldnf1bsu_r, 4768 gen_helper_sve_ldnf1bdu_r, 4769 4770 gen_helper_sve_ldnf1sds_be_r, 4771 gen_helper_sve_ldnf1hh_be_r, 4772 gen_helper_sve_ldnf1hsu_be_r, 4773 gen_helper_sve_ldnf1hdu_be_r, 4774 4775 gen_helper_sve_ldnf1hds_be_r, 4776 gen_helper_sve_ldnf1hss_be_r, 4777 gen_helper_sve_ldnf1ss_be_r, 4778 gen_helper_sve_ldnf1sdu_be_r, 4779 4780 gen_helper_sve_ldnf1bds_r, 4781 gen_helper_sve_ldnf1bss_r, 4782 gen_helper_sve_ldnf1bhs_r, 4783 gen_helper_sve_ldnf1dd_be_r } }, 4784 4785 { /* mte inactive, little-endian */ 4786 { gen_helper_sve_ldnf1bb_r_mte, 4787 gen_helper_sve_ldnf1bhu_r_mte, 4788 gen_helper_sve_ldnf1bsu_r_mte, 4789 gen_helper_sve_ldnf1bdu_r_mte, 4790 4791 gen_helper_sve_ldnf1sds_le_r_mte, 4792 gen_helper_sve_ldnf1hh_le_r_mte, 4793 gen_helper_sve_ldnf1hsu_le_r_mte, 4794 gen_helper_sve_ldnf1hdu_le_r_mte, 4795 4796 gen_helper_sve_ldnf1hds_le_r_mte, 4797 gen_helper_sve_ldnf1hss_le_r_mte, 4798 gen_helper_sve_ldnf1ss_le_r_mte, 4799 gen_helper_sve_ldnf1sdu_le_r_mte, 4800 4801 gen_helper_sve_ldnf1bds_r_mte, 4802 gen_helper_sve_ldnf1bss_r_mte, 4803 gen_helper_sve_ldnf1bhs_r_mte, 4804 gen_helper_sve_ldnf1dd_le_r_mte }, 4805 4806 /* mte inactive, big-endian */ 4807 { gen_helper_sve_ldnf1bb_r_mte, 4808 gen_helper_sve_ldnf1bhu_r_mte, 4809 gen_helper_sve_ldnf1bsu_r_mte, 4810 gen_helper_sve_ldnf1bdu_r_mte, 4811 4812 gen_helper_sve_ldnf1sds_be_r_mte, 4813 gen_helper_sve_ldnf1hh_be_r_mte, 4814 gen_helper_sve_ldnf1hsu_be_r_mte, 4815 gen_helper_sve_ldnf1hdu_be_r_mte, 4816 4817 gen_helper_sve_ldnf1hds_be_r_mte, 4818 gen_helper_sve_ldnf1hss_be_r_mte, 4819 gen_helper_sve_ldnf1ss_be_r_mte, 4820 gen_helper_sve_ldnf1sdu_be_r_mte, 4821 4822 gen_helper_sve_ldnf1bds_r_mte, 4823 gen_helper_sve_ldnf1bss_r_mte, 4824 gen_helper_sve_ldnf1bhs_r_mte, 4825 gen_helper_sve_ldnf1dd_be_r_mte } }, 4826 }; 4827 4828 if (!dc_isar_feature(aa64_sve, s)) { 4829 return false; 4830 } 4831 s->is_nonstreaming = true; 4832 if (sve_access_check(s)) { 4833 int vsz = vec_full_reg_size(s); 4834 int elements = vsz >> dtype_esz[a->dtype]; 4835 int off = (a->imm * elements) << dtype_msz(a->dtype); 4836 TCGv_i64 addr = tcg_temp_new_i64(); 4837 4838 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off); 4839 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 4840 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 4841 } 4842 return true; 4843 } 4844 4845 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 4846 { 4847 unsigned vsz = vec_full_reg_size(s); 4848 TCGv_ptr t_pg; 4849 int poff; 4850 4851 /* Load the first quadword using the normal predicated load helpers. */ 4852 poff = pred_full_reg_offset(s, pg); 4853 if (vsz > 16) { 4854 /* 4855 * Zero-extend the first 16 bits of the predicate into a temporary. 4856 * This avoids triggering an assert making sure we don't have bits 4857 * set within a predicate beyond VQ, but we have lowered VQ to 1 4858 * for this load operation. 4859 */ 4860 TCGv_i64 tmp = tcg_temp_new_i64(); 4861 #if HOST_BIG_ENDIAN 4862 poff += 6; 4863 #endif 4864 tcg_gen_ld16u_i64(tmp, tcg_env, poff); 4865 4866 poff = offsetof(CPUARMState, vfp.preg_tmp); 4867 tcg_gen_st_i64(tmp, tcg_env, poff); 4868 } 4869 4870 t_pg = tcg_temp_new_ptr(); 4871 tcg_gen_addi_ptr(t_pg, tcg_env, poff); 4872 4873 gen_helper_gvec_mem *fn 4874 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 4875 fn(tcg_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt))); 4876 4877 /* Replicate that first quadword. */ 4878 if (vsz > 16) { 4879 int doff = vec_full_reg_offset(s, zt); 4880 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16); 4881 } 4882 } 4883 4884 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a) 4885 { 4886 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 4887 return false; 4888 } 4889 if (sve_access_check(s)) { 4890 int msz = dtype_msz(a->dtype); 4891 TCGv_i64 addr = tcg_temp_new_i64(); 4892 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz); 4893 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4894 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 4895 } 4896 return true; 4897 } 4898 4899 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a) 4900 { 4901 if (!dc_isar_feature(aa64_sve, s)) { 4902 return false; 4903 } 4904 if (sve_access_check(s)) { 4905 TCGv_i64 addr = tcg_temp_new_i64(); 4906 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16); 4907 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 4908 } 4909 return true; 4910 } 4911 4912 static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 4913 { 4914 unsigned vsz = vec_full_reg_size(s); 4915 unsigned vsz_r32; 4916 TCGv_ptr t_pg; 4917 int poff, doff; 4918 4919 if (vsz < 32) { 4920 /* 4921 * Note that this UNDEFINED check comes after CheckSVEEnabled() 4922 * in the ARM pseudocode, which is the sve_access_check() done 4923 * in our caller. We should not now return false from the caller. 4924 */ 4925 unallocated_encoding(s); 4926 return; 4927 } 4928 4929 /* Load the first octaword using the normal predicated load helpers. */ 4930 4931 poff = pred_full_reg_offset(s, pg); 4932 if (vsz > 32) { 4933 /* 4934 * Zero-extend the first 32 bits of the predicate into a temporary. 4935 * This avoids triggering an assert making sure we don't have bits 4936 * set within a predicate beyond VQ, but we have lowered VQ to 2 4937 * for this load operation. 4938 */ 4939 TCGv_i64 tmp = tcg_temp_new_i64(); 4940 #if HOST_BIG_ENDIAN 4941 poff += 4; 4942 #endif 4943 tcg_gen_ld32u_i64(tmp, tcg_env, poff); 4944 4945 poff = offsetof(CPUARMState, vfp.preg_tmp); 4946 tcg_gen_st_i64(tmp, tcg_env, poff); 4947 } 4948 4949 t_pg = tcg_temp_new_ptr(); 4950 tcg_gen_addi_ptr(t_pg, tcg_env, poff); 4951 4952 gen_helper_gvec_mem *fn 4953 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 4954 fn(tcg_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt))); 4955 4956 /* 4957 * Replicate that first octaword. 4958 * The replication happens in units of 32; if the full vector size 4959 * is not a multiple of 32, the final bits are zeroed. 4960 */ 4961 doff = vec_full_reg_offset(s, zt); 4962 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32); 4963 if (vsz >= 64) { 4964 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32); 4965 } 4966 vsz -= vsz_r32; 4967 if (vsz) { 4968 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0); 4969 } 4970 } 4971 4972 static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a) 4973 { 4974 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 4975 return false; 4976 } 4977 if (a->rm == 31) { 4978 return false; 4979 } 4980 s->is_nonstreaming = true; 4981 if (sve_access_check(s)) { 4982 TCGv_i64 addr = tcg_temp_new_i64(); 4983 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4984 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4985 do_ldro(s, a->rd, a->pg, addr, a->dtype); 4986 } 4987 return true; 4988 } 4989 4990 static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a) 4991 { 4992 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 4993 return false; 4994 } 4995 s->is_nonstreaming = true; 4996 if (sve_access_check(s)) { 4997 TCGv_i64 addr = tcg_temp_new_i64(); 4998 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32); 4999 do_ldro(s, a->rd, a->pg, addr, a->dtype); 5000 } 5001 return true; 5002 } 5003 5004 /* Load and broadcast element. */ 5005 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) 5006 { 5007 unsigned vsz = vec_full_reg_size(s); 5008 unsigned psz = pred_full_reg_size(s); 5009 unsigned esz = dtype_esz[a->dtype]; 5010 unsigned msz = dtype_msz(a->dtype); 5011 TCGLabel *over; 5012 TCGv_i64 temp, clean_addr; 5013 MemOp memop; 5014 5015 if (!dc_isar_feature(aa64_sve, s)) { 5016 return false; 5017 } 5018 if (!sve_access_check(s)) { 5019 return true; 5020 } 5021 5022 over = gen_new_label(); 5023 5024 /* If the guarding predicate has no bits set, no load occurs. */ 5025 if (psz <= 8) { 5026 /* Reduce the pred_esz_masks value simply to reduce the 5027 * size of the code generated here. 5028 */ 5029 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8); 5030 temp = tcg_temp_new_i64(); 5031 tcg_gen_ld_i64(temp, tcg_env, pred_full_reg_offset(s, a->pg)); 5032 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask); 5033 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over); 5034 } else { 5035 TCGv_i32 t32 = tcg_temp_new_i32(); 5036 find_last_active(s, t32, esz, a->pg); 5037 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over); 5038 } 5039 5040 /* Load the data. */ 5041 temp = tcg_temp_new_i64(); 5042 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz); 5043 5044 memop = finalize_memop(s, dtype_mop[a->dtype]); 5045 clean_addr = gen_mte_check1(s, temp, false, true, memop); 5046 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), memop); 5047 5048 /* Broadcast to *all* elements. */ 5049 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 5050 vsz, vsz, temp); 5051 5052 /* Zero the inactive elements. */ 5053 gen_set_label(over); 5054 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false); 5055 } 5056 5057 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 5058 int msz, int esz, int nreg) 5059 { 5060 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = { 5061 { { { gen_helper_sve_st1bb_r, 5062 gen_helper_sve_st1bh_r, 5063 gen_helper_sve_st1bs_r, 5064 gen_helper_sve_st1bd_r }, 5065 { NULL, 5066 gen_helper_sve_st1hh_le_r, 5067 gen_helper_sve_st1hs_le_r, 5068 gen_helper_sve_st1hd_le_r }, 5069 { NULL, NULL, 5070 gen_helper_sve_st1ss_le_r, 5071 gen_helper_sve_st1sd_le_r }, 5072 { NULL, NULL, NULL, 5073 gen_helper_sve_st1dd_le_r } }, 5074 { { gen_helper_sve_st1bb_r, 5075 gen_helper_sve_st1bh_r, 5076 gen_helper_sve_st1bs_r, 5077 gen_helper_sve_st1bd_r }, 5078 { NULL, 5079 gen_helper_sve_st1hh_be_r, 5080 gen_helper_sve_st1hs_be_r, 5081 gen_helper_sve_st1hd_be_r }, 5082 { NULL, NULL, 5083 gen_helper_sve_st1ss_be_r, 5084 gen_helper_sve_st1sd_be_r }, 5085 { NULL, NULL, NULL, 5086 gen_helper_sve_st1dd_be_r } } }, 5087 5088 { { { gen_helper_sve_st1bb_r_mte, 5089 gen_helper_sve_st1bh_r_mte, 5090 gen_helper_sve_st1bs_r_mte, 5091 gen_helper_sve_st1bd_r_mte }, 5092 { NULL, 5093 gen_helper_sve_st1hh_le_r_mte, 5094 gen_helper_sve_st1hs_le_r_mte, 5095 gen_helper_sve_st1hd_le_r_mte }, 5096 { NULL, NULL, 5097 gen_helper_sve_st1ss_le_r_mte, 5098 gen_helper_sve_st1sd_le_r_mte }, 5099 { NULL, NULL, NULL, 5100 gen_helper_sve_st1dd_le_r_mte } }, 5101 { { gen_helper_sve_st1bb_r_mte, 5102 gen_helper_sve_st1bh_r_mte, 5103 gen_helper_sve_st1bs_r_mte, 5104 gen_helper_sve_st1bd_r_mte }, 5105 { NULL, 5106 gen_helper_sve_st1hh_be_r_mte, 5107 gen_helper_sve_st1hs_be_r_mte, 5108 gen_helper_sve_st1hd_be_r_mte }, 5109 { NULL, NULL, 5110 gen_helper_sve_st1ss_be_r_mte, 5111 gen_helper_sve_st1sd_be_r_mte }, 5112 { NULL, NULL, NULL, 5113 gen_helper_sve_st1dd_be_r_mte } } }, 5114 }; 5115 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = { 5116 { { { gen_helper_sve_st2bb_r, 5117 gen_helper_sve_st2hh_le_r, 5118 gen_helper_sve_st2ss_le_r, 5119 gen_helper_sve_st2dd_le_r }, 5120 { gen_helper_sve_st3bb_r, 5121 gen_helper_sve_st3hh_le_r, 5122 gen_helper_sve_st3ss_le_r, 5123 gen_helper_sve_st3dd_le_r }, 5124 { gen_helper_sve_st4bb_r, 5125 gen_helper_sve_st4hh_le_r, 5126 gen_helper_sve_st4ss_le_r, 5127 gen_helper_sve_st4dd_le_r } }, 5128 { { gen_helper_sve_st2bb_r, 5129 gen_helper_sve_st2hh_be_r, 5130 gen_helper_sve_st2ss_be_r, 5131 gen_helper_sve_st2dd_be_r }, 5132 { gen_helper_sve_st3bb_r, 5133 gen_helper_sve_st3hh_be_r, 5134 gen_helper_sve_st3ss_be_r, 5135 gen_helper_sve_st3dd_be_r }, 5136 { gen_helper_sve_st4bb_r, 5137 gen_helper_sve_st4hh_be_r, 5138 gen_helper_sve_st4ss_be_r, 5139 gen_helper_sve_st4dd_be_r } } }, 5140 { { { gen_helper_sve_st2bb_r_mte, 5141 gen_helper_sve_st2hh_le_r_mte, 5142 gen_helper_sve_st2ss_le_r_mte, 5143 gen_helper_sve_st2dd_le_r_mte }, 5144 { gen_helper_sve_st3bb_r_mte, 5145 gen_helper_sve_st3hh_le_r_mte, 5146 gen_helper_sve_st3ss_le_r_mte, 5147 gen_helper_sve_st3dd_le_r_mte }, 5148 { gen_helper_sve_st4bb_r_mte, 5149 gen_helper_sve_st4hh_le_r_mte, 5150 gen_helper_sve_st4ss_le_r_mte, 5151 gen_helper_sve_st4dd_le_r_mte } }, 5152 { { gen_helper_sve_st2bb_r_mte, 5153 gen_helper_sve_st2hh_be_r_mte, 5154 gen_helper_sve_st2ss_be_r_mte, 5155 gen_helper_sve_st2dd_be_r_mte }, 5156 { gen_helper_sve_st3bb_r_mte, 5157 gen_helper_sve_st3hh_be_r_mte, 5158 gen_helper_sve_st3ss_be_r_mte, 5159 gen_helper_sve_st3dd_be_r_mte }, 5160 { gen_helper_sve_st4bb_r_mte, 5161 gen_helper_sve_st4hh_be_r_mte, 5162 gen_helper_sve_st4ss_be_r_mte, 5163 gen_helper_sve_st4dd_be_r_mte } } }, 5164 }; 5165 gen_helper_gvec_mem *fn; 5166 int be = s->be_data == MO_BE; 5167 5168 if (nreg == 0) { 5169 /* ST1 */ 5170 fn = fn_single[s->mte_active[0]][be][msz][esz]; 5171 nreg = 1; 5172 } else { 5173 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */ 5174 assert(msz == esz); 5175 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz]; 5176 } 5177 assert(fn != NULL); 5178 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn); 5179 } 5180 5181 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) 5182 { 5183 if (!dc_isar_feature(aa64_sve, s)) { 5184 return false; 5185 } 5186 if (a->rm == 31 || a->msz > a->esz) { 5187 return false; 5188 } 5189 if (sve_access_check(s)) { 5190 TCGv_i64 addr = tcg_temp_new_i64(); 5191 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz); 5192 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5193 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5194 } 5195 return true; 5196 } 5197 5198 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a) 5199 { 5200 if (!dc_isar_feature(aa64_sve, s)) { 5201 return false; 5202 } 5203 if (a->msz > a->esz) { 5204 return false; 5205 } 5206 if (sve_access_check(s)) { 5207 int vsz = vec_full_reg_size(s); 5208 int elements = vsz >> a->esz; 5209 TCGv_i64 addr = tcg_temp_new_i64(); 5210 5211 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 5212 (a->imm * elements * (a->nreg + 1)) << a->msz); 5213 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5214 } 5215 return true; 5216 } 5217 5218 /* 5219 *** SVE gather loads / scatter stores 5220 */ 5221 5222 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, 5223 int scale, TCGv_i64 scalar, int msz, bool is_write, 5224 gen_helper_gvec_mem_scatter *fn) 5225 { 5226 unsigned vsz = vec_full_reg_size(s); 5227 TCGv_ptr t_zm = tcg_temp_new_ptr(); 5228 TCGv_ptr t_pg = tcg_temp_new_ptr(); 5229 TCGv_ptr t_zt = tcg_temp_new_ptr(); 5230 int desc = 0; 5231 5232 if (s->mte_active[0]) { 5233 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 5234 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 5235 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 5236 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 5237 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1); 5238 desc <<= SVE_MTEDESC_SHIFT; 5239 } 5240 desc = simd_desc(vsz, vsz, desc | scale); 5241 5242 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 5243 tcg_gen_addi_ptr(t_zm, tcg_env, vec_full_reg_offset(s, zm)); 5244 tcg_gen_addi_ptr(t_zt, tcg_env, vec_full_reg_offset(s, zt)); 5245 fn(tcg_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc)); 5246 } 5247 5248 /* Indexed by [mte][be][ff][xs][u][msz]. */ 5249 static gen_helper_gvec_mem_scatter * const 5250 gather_load_fn32[2][2][2][2][2][3] = { 5251 { /* MTE Inactive */ 5252 { /* Little-endian */ 5253 { { { gen_helper_sve_ldbss_zsu, 5254 gen_helper_sve_ldhss_le_zsu, 5255 NULL, }, 5256 { gen_helper_sve_ldbsu_zsu, 5257 gen_helper_sve_ldhsu_le_zsu, 5258 gen_helper_sve_ldss_le_zsu, } }, 5259 { { gen_helper_sve_ldbss_zss, 5260 gen_helper_sve_ldhss_le_zss, 5261 NULL, }, 5262 { gen_helper_sve_ldbsu_zss, 5263 gen_helper_sve_ldhsu_le_zss, 5264 gen_helper_sve_ldss_le_zss, } } }, 5265 5266 /* First-fault */ 5267 { { { gen_helper_sve_ldffbss_zsu, 5268 gen_helper_sve_ldffhss_le_zsu, 5269 NULL, }, 5270 { gen_helper_sve_ldffbsu_zsu, 5271 gen_helper_sve_ldffhsu_le_zsu, 5272 gen_helper_sve_ldffss_le_zsu, } }, 5273 { { gen_helper_sve_ldffbss_zss, 5274 gen_helper_sve_ldffhss_le_zss, 5275 NULL, }, 5276 { gen_helper_sve_ldffbsu_zss, 5277 gen_helper_sve_ldffhsu_le_zss, 5278 gen_helper_sve_ldffss_le_zss, } } } }, 5279 5280 { /* Big-endian */ 5281 { { { gen_helper_sve_ldbss_zsu, 5282 gen_helper_sve_ldhss_be_zsu, 5283 NULL, }, 5284 { gen_helper_sve_ldbsu_zsu, 5285 gen_helper_sve_ldhsu_be_zsu, 5286 gen_helper_sve_ldss_be_zsu, } }, 5287 { { gen_helper_sve_ldbss_zss, 5288 gen_helper_sve_ldhss_be_zss, 5289 NULL, }, 5290 { gen_helper_sve_ldbsu_zss, 5291 gen_helper_sve_ldhsu_be_zss, 5292 gen_helper_sve_ldss_be_zss, } } }, 5293 5294 /* First-fault */ 5295 { { { gen_helper_sve_ldffbss_zsu, 5296 gen_helper_sve_ldffhss_be_zsu, 5297 NULL, }, 5298 { gen_helper_sve_ldffbsu_zsu, 5299 gen_helper_sve_ldffhsu_be_zsu, 5300 gen_helper_sve_ldffss_be_zsu, } }, 5301 { { gen_helper_sve_ldffbss_zss, 5302 gen_helper_sve_ldffhss_be_zss, 5303 NULL, }, 5304 { gen_helper_sve_ldffbsu_zss, 5305 gen_helper_sve_ldffhsu_be_zss, 5306 gen_helper_sve_ldffss_be_zss, } } } } }, 5307 { /* MTE Active */ 5308 { /* Little-endian */ 5309 { { { gen_helper_sve_ldbss_zsu_mte, 5310 gen_helper_sve_ldhss_le_zsu_mte, 5311 NULL, }, 5312 { gen_helper_sve_ldbsu_zsu_mte, 5313 gen_helper_sve_ldhsu_le_zsu_mte, 5314 gen_helper_sve_ldss_le_zsu_mte, } }, 5315 { { gen_helper_sve_ldbss_zss_mte, 5316 gen_helper_sve_ldhss_le_zss_mte, 5317 NULL, }, 5318 { gen_helper_sve_ldbsu_zss_mte, 5319 gen_helper_sve_ldhsu_le_zss_mte, 5320 gen_helper_sve_ldss_le_zss_mte, } } }, 5321 5322 /* First-fault */ 5323 { { { gen_helper_sve_ldffbss_zsu_mte, 5324 gen_helper_sve_ldffhss_le_zsu_mte, 5325 NULL, }, 5326 { gen_helper_sve_ldffbsu_zsu_mte, 5327 gen_helper_sve_ldffhsu_le_zsu_mte, 5328 gen_helper_sve_ldffss_le_zsu_mte, } }, 5329 { { gen_helper_sve_ldffbss_zss_mte, 5330 gen_helper_sve_ldffhss_le_zss_mte, 5331 NULL, }, 5332 { gen_helper_sve_ldffbsu_zss_mte, 5333 gen_helper_sve_ldffhsu_le_zss_mte, 5334 gen_helper_sve_ldffss_le_zss_mte, } } } }, 5335 5336 { /* Big-endian */ 5337 { { { gen_helper_sve_ldbss_zsu_mte, 5338 gen_helper_sve_ldhss_be_zsu_mte, 5339 NULL, }, 5340 { gen_helper_sve_ldbsu_zsu_mte, 5341 gen_helper_sve_ldhsu_be_zsu_mte, 5342 gen_helper_sve_ldss_be_zsu_mte, } }, 5343 { { gen_helper_sve_ldbss_zss_mte, 5344 gen_helper_sve_ldhss_be_zss_mte, 5345 NULL, }, 5346 { gen_helper_sve_ldbsu_zss_mte, 5347 gen_helper_sve_ldhsu_be_zss_mte, 5348 gen_helper_sve_ldss_be_zss_mte, } } }, 5349 5350 /* First-fault */ 5351 { { { gen_helper_sve_ldffbss_zsu_mte, 5352 gen_helper_sve_ldffhss_be_zsu_mte, 5353 NULL, }, 5354 { gen_helper_sve_ldffbsu_zsu_mte, 5355 gen_helper_sve_ldffhsu_be_zsu_mte, 5356 gen_helper_sve_ldffss_be_zsu_mte, } }, 5357 { { gen_helper_sve_ldffbss_zss_mte, 5358 gen_helper_sve_ldffhss_be_zss_mte, 5359 NULL, }, 5360 { gen_helper_sve_ldffbsu_zss_mte, 5361 gen_helper_sve_ldffhsu_be_zss_mte, 5362 gen_helper_sve_ldffss_be_zss_mte, } } } } }, 5363 }; 5364 5365 /* Note that we overload xs=2 to indicate 64-bit offset. */ 5366 static gen_helper_gvec_mem_scatter * const 5367 gather_load_fn64[2][2][2][3][2][4] = { 5368 { /* MTE Inactive */ 5369 { /* Little-endian */ 5370 { { { gen_helper_sve_ldbds_zsu, 5371 gen_helper_sve_ldhds_le_zsu, 5372 gen_helper_sve_ldsds_le_zsu, 5373 NULL, }, 5374 { gen_helper_sve_ldbdu_zsu, 5375 gen_helper_sve_ldhdu_le_zsu, 5376 gen_helper_sve_ldsdu_le_zsu, 5377 gen_helper_sve_lddd_le_zsu, } }, 5378 { { gen_helper_sve_ldbds_zss, 5379 gen_helper_sve_ldhds_le_zss, 5380 gen_helper_sve_ldsds_le_zss, 5381 NULL, }, 5382 { gen_helper_sve_ldbdu_zss, 5383 gen_helper_sve_ldhdu_le_zss, 5384 gen_helper_sve_ldsdu_le_zss, 5385 gen_helper_sve_lddd_le_zss, } }, 5386 { { gen_helper_sve_ldbds_zd, 5387 gen_helper_sve_ldhds_le_zd, 5388 gen_helper_sve_ldsds_le_zd, 5389 NULL, }, 5390 { gen_helper_sve_ldbdu_zd, 5391 gen_helper_sve_ldhdu_le_zd, 5392 gen_helper_sve_ldsdu_le_zd, 5393 gen_helper_sve_lddd_le_zd, } } }, 5394 5395 /* First-fault */ 5396 { { { gen_helper_sve_ldffbds_zsu, 5397 gen_helper_sve_ldffhds_le_zsu, 5398 gen_helper_sve_ldffsds_le_zsu, 5399 NULL, }, 5400 { gen_helper_sve_ldffbdu_zsu, 5401 gen_helper_sve_ldffhdu_le_zsu, 5402 gen_helper_sve_ldffsdu_le_zsu, 5403 gen_helper_sve_ldffdd_le_zsu, } }, 5404 { { gen_helper_sve_ldffbds_zss, 5405 gen_helper_sve_ldffhds_le_zss, 5406 gen_helper_sve_ldffsds_le_zss, 5407 NULL, }, 5408 { gen_helper_sve_ldffbdu_zss, 5409 gen_helper_sve_ldffhdu_le_zss, 5410 gen_helper_sve_ldffsdu_le_zss, 5411 gen_helper_sve_ldffdd_le_zss, } }, 5412 { { gen_helper_sve_ldffbds_zd, 5413 gen_helper_sve_ldffhds_le_zd, 5414 gen_helper_sve_ldffsds_le_zd, 5415 NULL, }, 5416 { gen_helper_sve_ldffbdu_zd, 5417 gen_helper_sve_ldffhdu_le_zd, 5418 gen_helper_sve_ldffsdu_le_zd, 5419 gen_helper_sve_ldffdd_le_zd, } } } }, 5420 { /* Big-endian */ 5421 { { { gen_helper_sve_ldbds_zsu, 5422 gen_helper_sve_ldhds_be_zsu, 5423 gen_helper_sve_ldsds_be_zsu, 5424 NULL, }, 5425 { gen_helper_sve_ldbdu_zsu, 5426 gen_helper_sve_ldhdu_be_zsu, 5427 gen_helper_sve_ldsdu_be_zsu, 5428 gen_helper_sve_lddd_be_zsu, } }, 5429 { { gen_helper_sve_ldbds_zss, 5430 gen_helper_sve_ldhds_be_zss, 5431 gen_helper_sve_ldsds_be_zss, 5432 NULL, }, 5433 { gen_helper_sve_ldbdu_zss, 5434 gen_helper_sve_ldhdu_be_zss, 5435 gen_helper_sve_ldsdu_be_zss, 5436 gen_helper_sve_lddd_be_zss, } }, 5437 { { gen_helper_sve_ldbds_zd, 5438 gen_helper_sve_ldhds_be_zd, 5439 gen_helper_sve_ldsds_be_zd, 5440 NULL, }, 5441 { gen_helper_sve_ldbdu_zd, 5442 gen_helper_sve_ldhdu_be_zd, 5443 gen_helper_sve_ldsdu_be_zd, 5444 gen_helper_sve_lddd_be_zd, } } }, 5445 5446 /* First-fault */ 5447 { { { gen_helper_sve_ldffbds_zsu, 5448 gen_helper_sve_ldffhds_be_zsu, 5449 gen_helper_sve_ldffsds_be_zsu, 5450 NULL, }, 5451 { gen_helper_sve_ldffbdu_zsu, 5452 gen_helper_sve_ldffhdu_be_zsu, 5453 gen_helper_sve_ldffsdu_be_zsu, 5454 gen_helper_sve_ldffdd_be_zsu, } }, 5455 { { gen_helper_sve_ldffbds_zss, 5456 gen_helper_sve_ldffhds_be_zss, 5457 gen_helper_sve_ldffsds_be_zss, 5458 NULL, }, 5459 { gen_helper_sve_ldffbdu_zss, 5460 gen_helper_sve_ldffhdu_be_zss, 5461 gen_helper_sve_ldffsdu_be_zss, 5462 gen_helper_sve_ldffdd_be_zss, } }, 5463 { { gen_helper_sve_ldffbds_zd, 5464 gen_helper_sve_ldffhds_be_zd, 5465 gen_helper_sve_ldffsds_be_zd, 5466 NULL, }, 5467 { gen_helper_sve_ldffbdu_zd, 5468 gen_helper_sve_ldffhdu_be_zd, 5469 gen_helper_sve_ldffsdu_be_zd, 5470 gen_helper_sve_ldffdd_be_zd, } } } } }, 5471 { /* MTE Active */ 5472 { /* Little-endian */ 5473 { { { gen_helper_sve_ldbds_zsu_mte, 5474 gen_helper_sve_ldhds_le_zsu_mte, 5475 gen_helper_sve_ldsds_le_zsu_mte, 5476 NULL, }, 5477 { gen_helper_sve_ldbdu_zsu_mte, 5478 gen_helper_sve_ldhdu_le_zsu_mte, 5479 gen_helper_sve_ldsdu_le_zsu_mte, 5480 gen_helper_sve_lddd_le_zsu_mte, } }, 5481 { { gen_helper_sve_ldbds_zss_mte, 5482 gen_helper_sve_ldhds_le_zss_mte, 5483 gen_helper_sve_ldsds_le_zss_mte, 5484 NULL, }, 5485 { gen_helper_sve_ldbdu_zss_mte, 5486 gen_helper_sve_ldhdu_le_zss_mte, 5487 gen_helper_sve_ldsdu_le_zss_mte, 5488 gen_helper_sve_lddd_le_zss_mte, } }, 5489 { { gen_helper_sve_ldbds_zd_mte, 5490 gen_helper_sve_ldhds_le_zd_mte, 5491 gen_helper_sve_ldsds_le_zd_mte, 5492 NULL, }, 5493 { gen_helper_sve_ldbdu_zd_mte, 5494 gen_helper_sve_ldhdu_le_zd_mte, 5495 gen_helper_sve_ldsdu_le_zd_mte, 5496 gen_helper_sve_lddd_le_zd_mte, } } }, 5497 5498 /* First-fault */ 5499 { { { gen_helper_sve_ldffbds_zsu_mte, 5500 gen_helper_sve_ldffhds_le_zsu_mte, 5501 gen_helper_sve_ldffsds_le_zsu_mte, 5502 NULL, }, 5503 { gen_helper_sve_ldffbdu_zsu_mte, 5504 gen_helper_sve_ldffhdu_le_zsu_mte, 5505 gen_helper_sve_ldffsdu_le_zsu_mte, 5506 gen_helper_sve_ldffdd_le_zsu_mte, } }, 5507 { { gen_helper_sve_ldffbds_zss_mte, 5508 gen_helper_sve_ldffhds_le_zss_mte, 5509 gen_helper_sve_ldffsds_le_zss_mte, 5510 NULL, }, 5511 { gen_helper_sve_ldffbdu_zss_mte, 5512 gen_helper_sve_ldffhdu_le_zss_mte, 5513 gen_helper_sve_ldffsdu_le_zss_mte, 5514 gen_helper_sve_ldffdd_le_zss_mte, } }, 5515 { { gen_helper_sve_ldffbds_zd_mte, 5516 gen_helper_sve_ldffhds_le_zd_mte, 5517 gen_helper_sve_ldffsds_le_zd_mte, 5518 NULL, }, 5519 { gen_helper_sve_ldffbdu_zd_mte, 5520 gen_helper_sve_ldffhdu_le_zd_mte, 5521 gen_helper_sve_ldffsdu_le_zd_mte, 5522 gen_helper_sve_ldffdd_le_zd_mte, } } } }, 5523 { /* Big-endian */ 5524 { { { gen_helper_sve_ldbds_zsu_mte, 5525 gen_helper_sve_ldhds_be_zsu_mte, 5526 gen_helper_sve_ldsds_be_zsu_mte, 5527 NULL, }, 5528 { gen_helper_sve_ldbdu_zsu_mte, 5529 gen_helper_sve_ldhdu_be_zsu_mte, 5530 gen_helper_sve_ldsdu_be_zsu_mte, 5531 gen_helper_sve_lddd_be_zsu_mte, } }, 5532 { { gen_helper_sve_ldbds_zss_mte, 5533 gen_helper_sve_ldhds_be_zss_mte, 5534 gen_helper_sve_ldsds_be_zss_mte, 5535 NULL, }, 5536 { gen_helper_sve_ldbdu_zss_mte, 5537 gen_helper_sve_ldhdu_be_zss_mte, 5538 gen_helper_sve_ldsdu_be_zss_mte, 5539 gen_helper_sve_lddd_be_zss_mte, } }, 5540 { { gen_helper_sve_ldbds_zd_mte, 5541 gen_helper_sve_ldhds_be_zd_mte, 5542 gen_helper_sve_ldsds_be_zd_mte, 5543 NULL, }, 5544 { gen_helper_sve_ldbdu_zd_mte, 5545 gen_helper_sve_ldhdu_be_zd_mte, 5546 gen_helper_sve_ldsdu_be_zd_mte, 5547 gen_helper_sve_lddd_be_zd_mte, } } }, 5548 5549 /* First-fault */ 5550 { { { gen_helper_sve_ldffbds_zsu_mte, 5551 gen_helper_sve_ldffhds_be_zsu_mte, 5552 gen_helper_sve_ldffsds_be_zsu_mte, 5553 NULL, }, 5554 { gen_helper_sve_ldffbdu_zsu_mte, 5555 gen_helper_sve_ldffhdu_be_zsu_mte, 5556 gen_helper_sve_ldffsdu_be_zsu_mte, 5557 gen_helper_sve_ldffdd_be_zsu_mte, } }, 5558 { { gen_helper_sve_ldffbds_zss_mte, 5559 gen_helper_sve_ldffhds_be_zss_mte, 5560 gen_helper_sve_ldffsds_be_zss_mte, 5561 NULL, }, 5562 { gen_helper_sve_ldffbdu_zss_mte, 5563 gen_helper_sve_ldffhdu_be_zss_mte, 5564 gen_helper_sve_ldffsdu_be_zss_mte, 5565 gen_helper_sve_ldffdd_be_zss_mte, } }, 5566 { { gen_helper_sve_ldffbds_zd_mte, 5567 gen_helper_sve_ldffhds_be_zd_mte, 5568 gen_helper_sve_ldffsds_be_zd_mte, 5569 NULL, }, 5570 { gen_helper_sve_ldffbdu_zd_mte, 5571 gen_helper_sve_ldffhdu_be_zd_mte, 5572 gen_helper_sve_ldffsdu_be_zd_mte, 5573 gen_helper_sve_ldffdd_be_zd_mte, } } } } }, 5574 }; 5575 5576 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) 5577 { 5578 gen_helper_gvec_mem_scatter *fn = NULL; 5579 bool be = s->be_data == MO_BE; 5580 bool mte = s->mte_active[0]; 5581 5582 if (!dc_isar_feature(aa64_sve, s)) { 5583 return false; 5584 } 5585 s->is_nonstreaming = true; 5586 if (!sve_access_check(s)) { 5587 return true; 5588 } 5589 5590 switch (a->esz) { 5591 case MO_32: 5592 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz]; 5593 break; 5594 case MO_64: 5595 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz]; 5596 break; 5597 } 5598 assert(fn != NULL); 5599 5600 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 5601 cpu_reg_sp(s, a->rn), a->msz, false, fn); 5602 return true; 5603 } 5604 5605 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) 5606 { 5607 gen_helper_gvec_mem_scatter *fn = NULL; 5608 bool be = s->be_data == MO_BE; 5609 bool mte = s->mte_active[0]; 5610 5611 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) { 5612 return false; 5613 } 5614 if (!dc_isar_feature(aa64_sve, s)) { 5615 return false; 5616 } 5617 s->is_nonstreaming = true; 5618 if (!sve_access_check(s)) { 5619 return true; 5620 } 5621 5622 switch (a->esz) { 5623 case MO_32: 5624 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz]; 5625 break; 5626 case MO_64: 5627 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz]; 5628 break; 5629 } 5630 assert(fn != NULL); 5631 5632 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x]) 5633 * by loading the immediate into the scalar parameter. 5634 */ 5635 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5636 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn); 5637 return true; 5638 } 5639 5640 static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a) 5641 { 5642 gen_helper_gvec_mem_scatter *fn = NULL; 5643 bool be = s->be_data == MO_BE; 5644 bool mte = s->mte_active[0]; 5645 5646 if (a->esz < a->msz + !a->u) { 5647 return false; 5648 } 5649 if (!dc_isar_feature(aa64_sve2, s)) { 5650 return false; 5651 } 5652 s->is_nonstreaming = true; 5653 if (!sve_access_check(s)) { 5654 return true; 5655 } 5656 5657 switch (a->esz) { 5658 case MO_32: 5659 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz]; 5660 break; 5661 case MO_64: 5662 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz]; 5663 break; 5664 } 5665 assert(fn != NULL); 5666 5667 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5668 cpu_reg(s, a->rm), a->msz, false, fn); 5669 return true; 5670 } 5671 5672 /* Indexed by [mte][be][xs][msz]. */ 5673 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = { 5674 { /* MTE Inactive */ 5675 { /* Little-endian */ 5676 { gen_helper_sve_stbs_zsu, 5677 gen_helper_sve_sths_le_zsu, 5678 gen_helper_sve_stss_le_zsu, }, 5679 { gen_helper_sve_stbs_zss, 5680 gen_helper_sve_sths_le_zss, 5681 gen_helper_sve_stss_le_zss, } }, 5682 { /* Big-endian */ 5683 { gen_helper_sve_stbs_zsu, 5684 gen_helper_sve_sths_be_zsu, 5685 gen_helper_sve_stss_be_zsu, }, 5686 { gen_helper_sve_stbs_zss, 5687 gen_helper_sve_sths_be_zss, 5688 gen_helper_sve_stss_be_zss, } } }, 5689 { /* MTE Active */ 5690 { /* Little-endian */ 5691 { gen_helper_sve_stbs_zsu_mte, 5692 gen_helper_sve_sths_le_zsu_mte, 5693 gen_helper_sve_stss_le_zsu_mte, }, 5694 { gen_helper_sve_stbs_zss_mte, 5695 gen_helper_sve_sths_le_zss_mte, 5696 gen_helper_sve_stss_le_zss_mte, } }, 5697 { /* Big-endian */ 5698 { gen_helper_sve_stbs_zsu_mte, 5699 gen_helper_sve_sths_be_zsu_mte, 5700 gen_helper_sve_stss_be_zsu_mte, }, 5701 { gen_helper_sve_stbs_zss_mte, 5702 gen_helper_sve_sths_be_zss_mte, 5703 gen_helper_sve_stss_be_zss_mte, } } }, 5704 }; 5705 5706 /* Note that we overload xs=2 to indicate 64-bit offset. */ 5707 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = { 5708 { /* MTE Inactive */ 5709 { /* Little-endian */ 5710 { gen_helper_sve_stbd_zsu, 5711 gen_helper_sve_sthd_le_zsu, 5712 gen_helper_sve_stsd_le_zsu, 5713 gen_helper_sve_stdd_le_zsu, }, 5714 { gen_helper_sve_stbd_zss, 5715 gen_helper_sve_sthd_le_zss, 5716 gen_helper_sve_stsd_le_zss, 5717 gen_helper_sve_stdd_le_zss, }, 5718 { gen_helper_sve_stbd_zd, 5719 gen_helper_sve_sthd_le_zd, 5720 gen_helper_sve_stsd_le_zd, 5721 gen_helper_sve_stdd_le_zd, } }, 5722 { /* Big-endian */ 5723 { gen_helper_sve_stbd_zsu, 5724 gen_helper_sve_sthd_be_zsu, 5725 gen_helper_sve_stsd_be_zsu, 5726 gen_helper_sve_stdd_be_zsu, }, 5727 { gen_helper_sve_stbd_zss, 5728 gen_helper_sve_sthd_be_zss, 5729 gen_helper_sve_stsd_be_zss, 5730 gen_helper_sve_stdd_be_zss, }, 5731 { gen_helper_sve_stbd_zd, 5732 gen_helper_sve_sthd_be_zd, 5733 gen_helper_sve_stsd_be_zd, 5734 gen_helper_sve_stdd_be_zd, } } }, 5735 { /* MTE Inactive */ 5736 { /* Little-endian */ 5737 { gen_helper_sve_stbd_zsu_mte, 5738 gen_helper_sve_sthd_le_zsu_mte, 5739 gen_helper_sve_stsd_le_zsu_mte, 5740 gen_helper_sve_stdd_le_zsu_mte, }, 5741 { gen_helper_sve_stbd_zss_mte, 5742 gen_helper_sve_sthd_le_zss_mte, 5743 gen_helper_sve_stsd_le_zss_mte, 5744 gen_helper_sve_stdd_le_zss_mte, }, 5745 { gen_helper_sve_stbd_zd_mte, 5746 gen_helper_sve_sthd_le_zd_mte, 5747 gen_helper_sve_stsd_le_zd_mte, 5748 gen_helper_sve_stdd_le_zd_mte, } }, 5749 { /* Big-endian */ 5750 { gen_helper_sve_stbd_zsu_mte, 5751 gen_helper_sve_sthd_be_zsu_mte, 5752 gen_helper_sve_stsd_be_zsu_mte, 5753 gen_helper_sve_stdd_be_zsu_mte, }, 5754 { gen_helper_sve_stbd_zss_mte, 5755 gen_helper_sve_sthd_be_zss_mte, 5756 gen_helper_sve_stsd_be_zss_mte, 5757 gen_helper_sve_stdd_be_zss_mte, }, 5758 { gen_helper_sve_stbd_zd_mte, 5759 gen_helper_sve_sthd_be_zd_mte, 5760 gen_helper_sve_stsd_be_zd_mte, 5761 gen_helper_sve_stdd_be_zd_mte, } } }, 5762 }; 5763 5764 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) 5765 { 5766 gen_helper_gvec_mem_scatter *fn; 5767 bool be = s->be_data == MO_BE; 5768 bool mte = s->mte_active[0]; 5769 5770 if (a->esz < a->msz || (a->msz == 0 && a->scale)) { 5771 return false; 5772 } 5773 if (!dc_isar_feature(aa64_sve, s)) { 5774 return false; 5775 } 5776 s->is_nonstreaming = true; 5777 if (!sve_access_check(s)) { 5778 return true; 5779 } 5780 switch (a->esz) { 5781 case MO_32: 5782 fn = scatter_store_fn32[mte][be][a->xs][a->msz]; 5783 break; 5784 case MO_64: 5785 fn = scatter_store_fn64[mte][be][a->xs][a->msz]; 5786 break; 5787 default: 5788 g_assert_not_reached(); 5789 } 5790 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 5791 cpu_reg_sp(s, a->rn), a->msz, true, fn); 5792 return true; 5793 } 5794 5795 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) 5796 { 5797 gen_helper_gvec_mem_scatter *fn = NULL; 5798 bool be = s->be_data == MO_BE; 5799 bool mte = s->mte_active[0]; 5800 5801 if (a->esz < a->msz) { 5802 return false; 5803 } 5804 if (!dc_isar_feature(aa64_sve, s)) { 5805 return false; 5806 } 5807 s->is_nonstreaming = true; 5808 if (!sve_access_check(s)) { 5809 return true; 5810 } 5811 5812 switch (a->esz) { 5813 case MO_32: 5814 fn = scatter_store_fn32[mte][be][0][a->msz]; 5815 break; 5816 case MO_64: 5817 fn = scatter_store_fn64[mte][be][2][a->msz]; 5818 break; 5819 } 5820 assert(fn != NULL); 5821 5822 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x]) 5823 * by loading the immediate into the scalar parameter. 5824 */ 5825 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5826 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn); 5827 return true; 5828 } 5829 5830 static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a) 5831 { 5832 gen_helper_gvec_mem_scatter *fn; 5833 bool be = s->be_data == MO_BE; 5834 bool mte = s->mte_active[0]; 5835 5836 if (a->esz < a->msz) { 5837 return false; 5838 } 5839 if (!dc_isar_feature(aa64_sve2, s)) { 5840 return false; 5841 } 5842 s->is_nonstreaming = true; 5843 if (!sve_access_check(s)) { 5844 return true; 5845 } 5846 5847 switch (a->esz) { 5848 case MO_32: 5849 fn = scatter_store_fn32[mte][be][0][a->msz]; 5850 break; 5851 case MO_64: 5852 fn = scatter_store_fn64[mte][be][2][a->msz]; 5853 break; 5854 default: 5855 g_assert_not_reached(); 5856 } 5857 5858 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5859 cpu_reg(s, a->rm), a->msz, true, fn); 5860 return true; 5861 } 5862 5863 /* 5864 * Prefetches 5865 */ 5866 5867 static bool trans_PRF(DisasContext *s, arg_PRF *a) 5868 { 5869 if (!dc_isar_feature(aa64_sve, s)) { 5870 return false; 5871 } 5872 /* Prefetch is a nop within QEMU. */ 5873 (void)sve_access_check(s); 5874 return true; 5875 } 5876 5877 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a) 5878 { 5879 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 5880 return false; 5881 } 5882 /* Prefetch is a nop within QEMU. */ 5883 (void)sve_access_check(s); 5884 return true; 5885 } 5886 5887 static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a) 5888 { 5889 if (!dc_isar_feature(aa64_sve, s)) { 5890 return false; 5891 } 5892 /* Prefetch is a nop within QEMU. */ 5893 s->is_nonstreaming = true; 5894 (void)sve_access_check(s); 5895 return true; 5896 } 5897 5898 /* 5899 * Move Prefix 5900 * 5901 * TODO: The implementation so far could handle predicated merging movprfx. 5902 * The helper functions as written take an extra source register to 5903 * use in the operation, but the result is only written when predication 5904 * succeeds. For unpredicated movprfx, we need to rearrange the helpers 5905 * to allow the final write back to the destination to be unconditional. 5906 * For predicated zeroing movprfx, we need to rearrange the helpers to 5907 * allow the final write back to zero inactives. 5908 * 5909 * In the meantime, just emit the moves. 5910 */ 5911 5912 TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn) 5913 TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz) 5914 TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false) 5915 5916 /* 5917 * SVE2 Integer Multiply - Unpredicated 5918 */ 5919 5920 TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a) 5921 5922 static gen_helper_gvec_3 * const smulh_zzz_fns[4] = { 5923 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h, 5924 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d, 5925 }; 5926 TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5927 smulh_zzz_fns[a->esz], a, 0) 5928 5929 static gen_helper_gvec_3 * const umulh_zzz_fns[4] = { 5930 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h, 5931 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d, 5932 }; 5933 TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5934 umulh_zzz_fns[a->esz], a, 0) 5935 5936 TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5937 gen_helper_gvec_pmul_b, a, 0) 5938 5939 static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = { 5940 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h, 5941 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d, 5942 }; 5943 TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5944 sqdmulh_zzz_fns[a->esz], a, 0) 5945 5946 static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = { 5947 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h, 5948 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d, 5949 }; 5950 TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5951 sqrdmulh_zzz_fns[a->esz], a, 0) 5952 5953 /* 5954 * SVE2 Integer - Predicated 5955 */ 5956 5957 static gen_helper_gvec_4 * const sadlp_fns[4] = { 5958 NULL, gen_helper_sve2_sadalp_zpzz_h, 5959 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d, 5960 }; 5961 TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 5962 sadlp_fns[a->esz], a, 0) 5963 5964 static gen_helper_gvec_4 * const uadlp_fns[4] = { 5965 NULL, gen_helper_sve2_uadalp_zpzz_h, 5966 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d, 5967 }; 5968 TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 5969 uadlp_fns[a->esz], a, 0) 5970 5971 /* 5972 * SVE2 integer unary operations (predicated) 5973 */ 5974 5975 TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz, 5976 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0) 5977 5978 TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz, 5979 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0) 5980 5981 static gen_helper_gvec_3 * const sqabs_fns[4] = { 5982 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h, 5983 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d, 5984 }; 5985 TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0) 5986 5987 static gen_helper_gvec_3 * const sqneg_fns[4] = { 5988 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h, 5989 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d, 5990 }; 5991 TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0) 5992 5993 DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl) 5994 DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl) 5995 DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl) 5996 5997 DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl) 5998 DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl) 5999 DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl) 6000 6001 DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd) 6002 DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd) 6003 DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub) 6004 6005 DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd) 6006 DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd) 6007 DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub) 6008 6009 DO_ZPZZ(ADDP, aa64_sve2, sve2_addp) 6010 DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp) 6011 DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp) 6012 DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp) 6013 DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp) 6014 6015 DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd) 6016 DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd) 6017 DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub) 6018 DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub) 6019 DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd) 6020 DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd) 6021 6022 /* 6023 * SVE2 Widening Integer Arithmetic 6024 */ 6025 6026 static gen_helper_gvec_3 * const saddl_fns[4] = { 6027 NULL, gen_helper_sve2_saddl_h, 6028 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d, 6029 }; 6030 TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6031 saddl_fns[a->esz], a, 0) 6032 TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6033 saddl_fns[a->esz], a, 3) 6034 TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 6035 saddl_fns[a->esz], a, 2) 6036 6037 static gen_helper_gvec_3 * const ssubl_fns[4] = { 6038 NULL, gen_helper_sve2_ssubl_h, 6039 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d, 6040 }; 6041 TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6042 ssubl_fns[a->esz], a, 0) 6043 TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6044 ssubl_fns[a->esz], a, 3) 6045 TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 6046 ssubl_fns[a->esz], a, 2) 6047 TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz, 6048 ssubl_fns[a->esz], a, 1) 6049 6050 static gen_helper_gvec_3 * const sabdl_fns[4] = { 6051 NULL, gen_helper_sve2_sabdl_h, 6052 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d, 6053 }; 6054 TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6055 sabdl_fns[a->esz], a, 0) 6056 TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6057 sabdl_fns[a->esz], a, 3) 6058 6059 static gen_helper_gvec_3 * const uaddl_fns[4] = { 6060 NULL, gen_helper_sve2_uaddl_h, 6061 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d, 6062 }; 6063 TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6064 uaddl_fns[a->esz], a, 0) 6065 TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6066 uaddl_fns[a->esz], a, 3) 6067 6068 static gen_helper_gvec_3 * const usubl_fns[4] = { 6069 NULL, gen_helper_sve2_usubl_h, 6070 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d, 6071 }; 6072 TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6073 usubl_fns[a->esz], a, 0) 6074 TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6075 usubl_fns[a->esz], a, 3) 6076 6077 static gen_helper_gvec_3 * const uabdl_fns[4] = { 6078 NULL, gen_helper_sve2_uabdl_h, 6079 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d, 6080 }; 6081 TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6082 uabdl_fns[a->esz], a, 0) 6083 TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6084 uabdl_fns[a->esz], a, 3) 6085 6086 static gen_helper_gvec_3 * const sqdmull_fns[4] = { 6087 NULL, gen_helper_sve2_sqdmull_zzz_h, 6088 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d, 6089 }; 6090 TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6091 sqdmull_fns[a->esz], a, 0) 6092 TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6093 sqdmull_fns[a->esz], a, 3) 6094 6095 static gen_helper_gvec_3 * const smull_fns[4] = { 6096 NULL, gen_helper_sve2_smull_zzz_h, 6097 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d, 6098 }; 6099 TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6100 smull_fns[a->esz], a, 0) 6101 TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6102 smull_fns[a->esz], a, 3) 6103 6104 static gen_helper_gvec_3 * const umull_fns[4] = { 6105 NULL, gen_helper_sve2_umull_zzz_h, 6106 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d, 6107 }; 6108 TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6109 umull_fns[a->esz], a, 0) 6110 TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6111 umull_fns[a->esz], a, 3) 6112 6113 static gen_helper_gvec_3 * const eoril_fns[4] = { 6114 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h, 6115 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d, 6116 }; 6117 TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2) 6118 TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1) 6119 6120 static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel) 6121 { 6122 static gen_helper_gvec_3 * const fns[4] = { 6123 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h, 6124 NULL, gen_helper_sve2_pmull_d, 6125 }; 6126 6127 if (a->esz == 0) { 6128 if (!dc_isar_feature(aa64_sve2_pmull128, s)) { 6129 return false; 6130 } 6131 s->is_nonstreaming = true; 6132 } else if (!dc_isar_feature(aa64_sve, s)) { 6133 return false; 6134 } 6135 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel); 6136 } 6137 6138 TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false) 6139 TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true) 6140 6141 static gen_helper_gvec_3 * const saddw_fns[4] = { 6142 NULL, gen_helper_sve2_saddw_h, 6143 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d, 6144 }; 6145 TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0) 6146 TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1) 6147 6148 static gen_helper_gvec_3 * const ssubw_fns[4] = { 6149 NULL, gen_helper_sve2_ssubw_h, 6150 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d, 6151 }; 6152 TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0) 6153 TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1) 6154 6155 static gen_helper_gvec_3 * const uaddw_fns[4] = { 6156 NULL, gen_helper_sve2_uaddw_h, 6157 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d, 6158 }; 6159 TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0) 6160 TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1) 6161 6162 static gen_helper_gvec_3 * const usubw_fns[4] = { 6163 NULL, gen_helper_sve2_usubw_h, 6164 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d, 6165 }; 6166 TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0) 6167 TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1) 6168 6169 static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6170 { 6171 int top = imm & 1; 6172 int shl = imm >> 1; 6173 int halfbits = 4 << vece; 6174 6175 if (top) { 6176 if (shl == halfbits) { 6177 TCGv_vec t = tcg_temp_new_vec_matching(d); 6178 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); 6179 tcg_gen_and_vec(vece, d, n, t); 6180 } else { 6181 tcg_gen_sari_vec(vece, d, n, halfbits); 6182 tcg_gen_shli_vec(vece, d, d, shl); 6183 } 6184 } else { 6185 tcg_gen_shli_vec(vece, d, n, halfbits); 6186 tcg_gen_sari_vec(vece, d, d, halfbits - shl); 6187 } 6188 } 6189 6190 static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm) 6191 { 6192 int halfbits = 4 << vece; 6193 int top = imm & 1; 6194 int shl = (imm >> 1); 6195 int shift; 6196 uint64_t mask; 6197 6198 mask = MAKE_64BIT_MASK(0, halfbits); 6199 mask <<= shl; 6200 mask = dup_const(vece, mask); 6201 6202 shift = shl - top * halfbits; 6203 if (shift < 0) { 6204 tcg_gen_shri_i64(d, n, -shift); 6205 } else { 6206 tcg_gen_shli_i64(d, n, shift); 6207 } 6208 tcg_gen_andi_i64(d, d, mask); 6209 } 6210 6211 static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6212 { 6213 gen_ushll_i64(MO_16, d, n, imm); 6214 } 6215 6216 static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6217 { 6218 gen_ushll_i64(MO_32, d, n, imm); 6219 } 6220 6221 static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6222 { 6223 gen_ushll_i64(MO_64, d, n, imm); 6224 } 6225 6226 static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6227 { 6228 int halfbits = 4 << vece; 6229 int top = imm & 1; 6230 int shl = imm >> 1; 6231 6232 if (top) { 6233 if (shl == halfbits) { 6234 TCGv_vec t = tcg_temp_new_vec_matching(d); 6235 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); 6236 tcg_gen_and_vec(vece, d, n, t); 6237 } else { 6238 tcg_gen_shri_vec(vece, d, n, halfbits); 6239 tcg_gen_shli_vec(vece, d, d, shl); 6240 } 6241 } else { 6242 if (shl == 0) { 6243 TCGv_vec t = tcg_temp_new_vec_matching(d); 6244 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6245 tcg_gen_and_vec(vece, d, n, t); 6246 } else { 6247 tcg_gen_shli_vec(vece, d, n, halfbits); 6248 tcg_gen_shri_vec(vece, d, d, halfbits - shl); 6249 } 6250 } 6251 } 6252 6253 static bool do_shll_tb(DisasContext *s, arg_rri_esz *a, 6254 const GVecGen2i ops[3], bool sel) 6255 { 6256 6257 if (a->esz < 0 || a->esz > 2) { 6258 return false; 6259 } 6260 if (sve_access_check(s)) { 6261 unsigned vsz = vec_full_reg_size(s); 6262 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 6263 vec_full_reg_offset(s, a->rn), 6264 vsz, vsz, (a->imm << 1) | sel, 6265 &ops[a->esz]); 6266 } 6267 return true; 6268 } 6269 6270 static const TCGOpcode sshll_list[] = { 6271 INDEX_op_shli_vec, INDEX_op_sari_vec, 0 6272 }; 6273 static const GVecGen2i sshll_ops[3] = { 6274 { .fniv = gen_sshll_vec, 6275 .opt_opc = sshll_list, 6276 .fno = gen_helper_sve2_sshll_h, 6277 .vece = MO_16 }, 6278 { .fniv = gen_sshll_vec, 6279 .opt_opc = sshll_list, 6280 .fno = gen_helper_sve2_sshll_s, 6281 .vece = MO_32 }, 6282 { .fniv = gen_sshll_vec, 6283 .opt_opc = sshll_list, 6284 .fno = gen_helper_sve2_sshll_d, 6285 .vece = MO_64 } 6286 }; 6287 TRANS_FEAT(SSHLLB, aa64_sve2, do_shll_tb, a, sshll_ops, false) 6288 TRANS_FEAT(SSHLLT, aa64_sve2, do_shll_tb, a, sshll_ops, true) 6289 6290 static const TCGOpcode ushll_list[] = { 6291 INDEX_op_shli_vec, INDEX_op_shri_vec, 0 6292 }; 6293 static const GVecGen2i ushll_ops[3] = { 6294 { .fni8 = gen_ushll16_i64, 6295 .fniv = gen_ushll_vec, 6296 .opt_opc = ushll_list, 6297 .fno = gen_helper_sve2_ushll_h, 6298 .vece = MO_16 }, 6299 { .fni8 = gen_ushll32_i64, 6300 .fniv = gen_ushll_vec, 6301 .opt_opc = ushll_list, 6302 .fno = gen_helper_sve2_ushll_s, 6303 .vece = MO_32 }, 6304 { .fni8 = gen_ushll64_i64, 6305 .fniv = gen_ushll_vec, 6306 .opt_opc = ushll_list, 6307 .fno = gen_helper_sve2_ushll_d, 6308 .vece = MO_64 }, 6309 }; 6310 TRANS_FEAT(USHLLB, aa64_sve2, do_shll_tb, a, ushll_ops, false) 6311 TRANS_FEAT(USHLLT, aa64_sve2, do_shll_tb, a, ushll_ops, true) 6312 6313 static gen_helper_gvec_3 * const bext_fns[4] = { 6314 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h, 6315 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d, 6316 }; 6317 TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6318 bext_fns[a->esz], a, 0) 6319 6320 static gen_helper_gvec_3 * const bdep_fns[4] = { 6321 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h, 6322 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d, 6323 }; 6324 TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6325 bdep_fns[a->esz], a, 0) 6326 6327 static gen_helper_gvec_3 * const bgrp_fns[4] = { 6328 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h, 6329 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d, 6330 }; 6331 TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6332 bgrp_fns[a->esz], a, 0) 6333 6334 static gen_helper_gvec_3 * const cadd_fns[4] = { 6335 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h, 6336 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d, 6337 }; 6338 TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6339 cadd_fns[a->esz], a, 0) 6340 TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6341 cadd_fns[a->esz], a, 1) 6342 6343 static gen_helper_gvec_3 * const sqcadd_fns[4] = { 6344 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h, 6345 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d, 6346 }; 6347 TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6348 sqcadd_fns[a->esz], a, 0) 6349 TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6350 sqcadd_fns[a->esz], a, 1) 6351 6352 static gen_helper_gvec_4 * const sabal_fns[4] = { 6353 NULL, gen_helper_sve2_sabal_h, 6354 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d, 6355 }; 6356 TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0) 6357 TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1) 6358 6359 static gen_helper_gvec_4 * const uabal_fns[4] = { 6360 NULL, gen_helper_sve2_uabal_h, 6361 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d, 6362 }; 6363 TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0) 6364 TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1) 6365 6366 static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel) 6367 { 6368 static gen_helper_gvec_4 * const fns[2] = { 6369 gen_helper_sve2_adcl_s, 6370 gen_helper_sve2_adcl_d, 6371 }; 6372 /* 6373 * Note that in this case the ESZ field encodes both size and sign. 6374 * Split out 'subtract' into bit 1 of the data field for the helper. 6375 */ 6376 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel); 6377 } 6378 6379 TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false) 6380 TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true) 6381 6382 TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a) 6383 TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a) 6384 TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a) 6385 TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a) 6386 TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a) 6387 TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a) 6388 6389 TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a) 6390 TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a) 6391 6392 static bool do_narrow_extract(DisasContext *s, arg_rri_esz *a, 6393 const GVecGen2 ops[3]) 6394 { 6395 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0) { 6396 return false; 6397 } 6398 if (sve_access_check(s)) { 6399 unsigned vsz = vec_full_reg_size(s); 6400 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd), 6401 vec_full_reg_offset(s, a->rn), 6402 vsz, vsz, &ops[a->esz]); 6403 } 6404 return true; 6405 } 6406 6407 static const TCGOpcode sqxtn_list[] = { 6408 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 6409 }; 6410 6411 static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6412 { 6413 TCGv_vec t = tcg_temp_new_vec_matching(d); 6414 int halfbits = 4 << vece; 6415 int64_t mask = (1ull << halfbits) - 1; 6416 int64_t min = -1ull << (halfbits - 1); 6417 int64_t max = -min - 1; 6418 6419 tcg_gen_dupi_vec(vece, t, min); 6420 tcg_gen_smax_vec(vece, d, n, t); 6421 tcg_gen_dupi_vec(vece, t, max); 6422 tcg_gen_smin_vec(vece, d, d, t); 6423 tcg_gen_dupi_vec(vece, t, mask); 6424 tcg_gen_and_vec(vece, d, d, t); 6425 } 6426 6427 static const GVecGen2 sqxtnb_ops[3] = { 6428 { .fniv = gen_sqxtnb_vec, 6429 .opt_opc = sqxtn_list, 6430 .fno = gen_helper_sve2_sqxtnb_h, 6431 .vece = MO_16 }, 6432 { .fniv = gen_sqxtnb_vec, 6433 .opt_opc = sqxtn_list, 6434 .fno = gen_helper_sve2_sqxtnb_s, 6435 .vece = MO_32 }, 6436 { .fniv = gen_sqxtnb_vec, 6437 .opt_opc = sqxtn_list, 6438 .fno = gen_helper_sve2_sqxtnb_d, 6439 .vece = MO_64 }, 6440 }; 6441 TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops) 6442 6443 static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6444 { 6445 TCGv_vec t = tcg_temp_new_vec_matching(d); 6446 int halfbits = 4 << vece; 6447 int64_t mask = (1ull << halfbits) - 1; 6448 int64_t min = -1ull << (halfbits - 1); 6449 int64_t max = -min - 1; 6450 6451 tcg_gen_dupi_vec(vece, t, min); 6452 tcg_gen_smax_vec(vece, n, n, t); 6453 tcg_gen_dupi_vec(vece, t, max); 6454 tcg_gen_smin_vec(vece, n, n, t); 6455 tcg_gen_shli_vec(vece, n, n, halfbits); 6456 tcg_gen_dupi_vec(vece, t, mask); 6457 tcg_gen_bitsel_vec(vece, d, t, d, n); 6458 } 6459 6460 static const GVecGen2 sqxtnt_ops[3] = { 6461 { .fniv = gen_sqxtnt_vec, 6462 .opt_opc = sqxtn_list, 6463 .load_dest = true, 6464 .fno = gen_helper_sve2_sqxtnt_h, 6465 .vece = MO_16 }, 6466 { .fniv = gen_sqxtnt_vec, 6467 .opt_opc = sqxtn_list, 6468 .load_dest = true, 6469 .fno = gen_helper_sve2_sqxtnt_s, 6470 .vece = MO_32 }, 6471 { .fniv = gen_sqxtnt_vec, 6472 .opt_opc = sqxtn_list, 6473 .load_dest = true, 6474 .fno = gen_helper_sve2_sqxtnt_d, 6475 .vece = MO_64 }, 6476 }; 6477 TRANS_FEAT(SQXTNT, aa64_sve2, do_narrow_extract, a, sqxtnt_ops) 6478 6479 static const TCGOpcode uqxtn_list[] = { 6480 INDEX_op_shli_vec, INDEX_op_umin_vec, 0 6481 }; 6482 6483 static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6484 { 6485 TCGv_vec t = tcg_temp_new_vec_matching(d); 6486 int halfbits = 4 << vece; 6487 int64_t max = (1ull << halfbits) - 1; 6488 6489 tcg_gen_dupi_vec(vece, t, max); 6490 tcg_gen_umin_vec(vece, d, n, t); 6491 } 6492 6493 static const GVecGen2 uqxtnb_ops[3] = { 6494 { .fniv = gen_uqxtnb_vec, 6495 .opt_opc = uqxtn_list, 6496 .fno = gen_helper_sve2_uqxtnb_h, 6497 .vece = MO_16 }, 6498 { .fniv = gen_uqxtnb_vec, 6499 .opt_opc = uqxtn_list, 6500 .fno = gen_helper_sve2_uqxtnb_s, 6501 .vece = MO_32 }, 6502 { .fniv = gen_uqxtnb_vec, 6503 .opt_opc = uqxtn_list, 6504 .fno = gen_helper_sve2_uqxtnb_d, 6505 .vece = MO_64 }, 6506 }; 6507 TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops) 6508 6509 static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6510 { 6511 TCGv_vec t = tcg_temp_new_vec_matching(d); 6512 int halfbits = 4 << vece; 6513 int64_t max = (1ull << halfbits) - 1; 6514 6515 tcg_gen_dupi_vec(vece, t, max); 6516 tcg_gen_umin_vec(vece, n, n, t); 6517 tcg_gen_shli_vec(vece, n, n, halfbits); 6518 tcg_gen_bitsel_vec(vece, d, t, d, n); 6519 } 6520 6521 static const GVecGen2 uqxtnt_ops[3] = { 6522 { .fniv = gen_uqxtnt_vec, 6523 .opt_opc = uqxtn_list, 6524 .load_dest = true, 6525 .fno = gen_helper_sve2_uqxtnt_h, 6526 .vece = MO_16 }, 6527 { .fniv = gen_uqxtnt_vec, 6528 .opt_opc = uqxtn_list, 6529 .load_dest = true, 6530 .fno = gen_helper_sve2_uqxtnt_s, 6531 .vece = MO_32 }, 6532 { .fniv = gen_uqxtnt_vec, 6533 .opt_opc = uqxtn_list, 6534 .load_dest = true, 6535 .fno = gen_helper_sve2_uqxtnt_d, 6536 .vece = MO_64 }, 6537 }; 6538 TRANS_FEAT(UQXTNT, aa64_sve2, do_narrow_extract, a, uqxtnt_ops) 6539 6540 static const TCGOpcode sqxtun_list[] = { 6541 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0 6542 }; 6543 6544 static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6545 { 6546 TCGv_vec t = tcg_temp_new_vec_matching(d); 6547 int halfbits = 4 << vece; 6548 int64_t max = (1ull << halfbits) - 1; 6549 6550 tcg_gen_dupi_vec(vece, t, 0); 6551 tcg_gen_smax_vec(vece, d, n, t); 6552 tcg_gen_dupi_vec(vece, t, max); 6553 tcg_gen_umin_vec(vece, d, d, t); 6554 } 6555 6556 static const GVecGen2 sqxtunb_ops[3] = { 6557 { .fniv = gen_sqxtunb_vec, 6558 .opt_opc = sqxtun_list, 6559 .fno = gen_helper_sve2_sqxtunb_h, 6560 .vece = MO_16 }, 6561 { .fniv = gen_sqxtunb_vec, 6562 .opt_opc = sqxtun_list, 6563 .fno = gen_helper_sve2_sqxtunb_s, 6564 .vece = MO_32 }, 6565 { .fniv = gen_sqxtunb_vec, 6566 .opt_opc = sqxtun_list, 6567 .fno = gen_helper_sve2_sqxtunb_d, 6568 .vece = MO_64 }, 6569 }; 6570 TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops) 6571 6572 static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6573 { 6574 TCGv_vec t = tcg_temp_new_vec_matching(d); 6575 int halfbits = 4 << vece; 6576 int64_t max = (1ull << halfbits) - 1; 6577 6578 tcg_gen_dupi_vec(vece, t, 0); 6579 tcg_gen_smax_vec(vece, n, n, t); 6580 tcg_gen_dupi_vec(vece, t, max); 6581 tcg_gen_umin_vec(vece, n, n, t); 6582 tcg_gen_shli_vec(vece, n, n, halfbits); 6583 tcg_gen_bitsel_vec(vece, d, t, d, n); 6584 } 6585 6586 static const GVecGen2 sqxtunt_ops[3] = { 6587 { .fniv = gen_sqxtunt_vec, 6588 .opt_opc = sqxtun_list, 6589 .load_dest = true, 6590 .fno = gen_helper_sve2_sqxtunt_h, 6591 .vece = MO_16 }, 6592 { .fniv = gen_sqxtunt_vec, 6593 .opt_opc = sqxtun_list, 6594 .load_dest = true, 6595 .fno = gen_helper_sve2_sqxtunt_s, 6596 .vece = MO_32 }, 6597 { .fniv = gen_sqxtunt_vec, 6598 .opt_opc = sqxtun_list, 6599 .load_dest = true, 6600 .fno = gen_helper_sve2_sqxtunt_d, 6601 .vece = MO_64 }, 6602 }; 6603 TRANS_FEAT(SQXTUNT, aa64_sve2, do_narrow_extract, a, sqxtunt_ops) 6604 6605 static bool do_shr_narrow(DisasContext *s, arg_rri_esz *a, 6606 const GVecGen2i ops[3]) 6607 { 6608 if (a->esz < 0 || a->esz > MO_32) { 6609 return false; 6610 } 6611 assert(a->imm > 0 && a->imm <= (8 << a->esz)); 6612 if (sve_access_check(s)) { 6613 unsigned vsz = vec_full_reg_size(s); 6614 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 6615 vec_full_reg_offset(s, a->rn), 6616 vsz, vsz, a->imm, &ops[a->esz]); 6617 } 6618 return true; 6619 } 6620 6621 static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 6622 { 6623 int halfbits = 4 << vece; 6624 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 6625 6626 tcg_gen_shri_i64(d, n, shr); 6627 tcg_gen_andi_i64(d, d, mask); 6628 } 6629 6630 static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6631 { 6632 gen_shrnb_i64(MO_16, d, n, shr); 6633 } 6634 6635 static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6636 { 6637 gen_shrnb_i64(MO_32, d, n, shr); 6638 } 6639 6640 static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6641 { 6642 gen_shrnb_i64(MO_64, d, n, shr); 6643 } 6644 6645 static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 6646 { 6647 TCGv_vec t = tcg_temp_new_vec_matching(d); 6648 int halfbits = 4 << vece; 6649 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 6650 6651 tcg_gen_shri_vec(vece, n, n, shr); 6652 tcg_gen_dupi_vec(vece, t, mask); 6653 tcg_gen_and_vec(vece, d, n, t); 6654 } 6655 6656 static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 }; 6657 static const GVecGen2i shrnb_ops[3] = { 6658 { .fni8 = gen_shrnb16_i64, 6659 .fniv = gen_shrnb_vec, 6660 .opt_opc = shrnb_vec_list, 6661 .fno = gen_helper_sve2_shrnb_h, 6662 .vece = MO_16 }, 6663 { .fni8 = gen_shrnb32_i64, 6664 .fniv = gen_shrnb_vec, 6665 .opt_opc = shrnb_vec_list, 6666 .fno = gen_helper_sve2_shrnb_s, 6667 .vece = MO_32 }, 6668 { .fni8 = gen_shrnb64_i64, 6669 .fniv = gen_shrnb_vec, 6670 .opt_opc = shrnb_vec_list, 6671 .fno = gen_helper_sve2_shrnb_d, 6672 .vece = MO_64 }, 6673 }; 6674 TRANS_FEAT(SHRNB, aa64_sve2, do_shr_narrow, a, shrnb_ops) 6675 6676 static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 6677 { 6678 int halfbits = 4 << vece; 6679 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 6680 6681 tcg_gen_shli_i64(n, n, halfbits - shr); 6682 tcg_gen_andi_i64(n, n, ~mask); 6683 tcg_gen_andi_i64(d, d, mask); 6684 tcg_gen_or_i64(d, d, n); 6685 } 6686 6687 static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6688 { 6689 gen_shrnt_i64(MO_16, d, n, shr); 6690 } 6691 6692 static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6693 { 6694 gen_shrnt_i64(MO_32, d, n, shr); 6695 } 6696 6697 static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6698 { 6699 tcg_gen_shri_i64(n, n, shr); 6700 tcg_gen_deposit_i64(d, d, n, 32, 32); 6701 } 6702 6703 static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 6704 { 6705 TCGv_vec t = tcg_temp_new_vec_matching(d); 6706 int halfbits = 4 << vece; 6707 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 6708 6709 tcg_gen_shli_vec(vece, n, n, halfbits - shr); 6710 tcg_gen_dupi_vec(vece, t, mask); 6711 tcg_gen_bitsel_vec(vece, d, t, d, n); 6712 } 6713 6714 static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 }; 6715 static const GVecGen2i shrnt_ops[3] = { 6716 { .fni8 = gen_shrnt16_i64, 6717 .fniv = gen_shrnt_vec, 6718 .opt_opc = shrnt_vec_list, 6719 .load_dest = true, 6720 .fno = gen_helper_sve2_shrnt_h, 6721 .vece = MO_16 }, 6722 { .fni8 = gen_shrnt32_i64, 6723 .fniv = gen_shrnt_vec, 6724 .opt_opc = shrnt_vec_list, 6725 .load_dest = true, 6726 .fno = gen_helper_sve2_shrnt_s, 6727 .vece = MO_32 }, 6728 { .fni8 = gen_shrnt64_i64, 6729 .fniv = gen_shrnt_vec, 6730 .opt_opc = shrnt_vec_list, 6731 .load_dest = true, 6732 .fno = gen_helper_sve2_shrnt_d, 6733 .vece = MO_64 }, 6734 }; 6735 TRANS_FEAT(SHRNT, aa64_sve2, do_shr_narrow, a, shrnt_ops) 6736 6737 static const GVecGen2i rshrnb_ops[3] = { 6738 { .fno = gen_helper_sve2_rshrnb_h }, 6739 { .fno = gen_helper_sve2_rshrnb_s }, 6740 { .fno = gen_helper_sve2_rshrnb_d }, 6741 }; 6742 TRANS_FEAT(RSHRNB, aa64_sve2, do_shr_narrow, a, rshrnb_ops) 6743 6744 static const GVecGen2i rshrnt_ops[3] = { 6745 { .fno = gen_helper_sve2_rshrnt_h }, 6746 { .fno = gen_helper_sve2_rshrnt_s }, 6747 { .fno = gen_helper_sve2_rshrnt_d }, 6748 }; 6749 TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops) 6750 6751 static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d, 6752 TCGv_vec n, int64_t shr) 6753 { 6754 TCGv_vec t = tcg_temp_new_vec_matching(d); 6755 int halfbits = 4 << vece; 6756 6757 tcg_gen_sari_vec(vece, n, n, shr); 6758 tcg_gen_dupi_vec(vece, t, 0); 6759 tcg_gen_smax_vec(vece, n, n, t); 6760 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6761 tcg_gen_umin_vec(vece, d, n, t); 6762 } 6763 6764 static const TCGOpcode sqshrunb_vec_list[] = { 6765 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0 6766 }; 6767 static const GVecGen2i sqshrunb_ops[3] = { 6768 { .fniv = gen_sqshrunb_vec, 6769 .opt_opc = sqshrunb_vec_list, 6770 .fno = gen_helper_sve2_sqshrunb_h, 6771 .vece = MO_16 }, 6772 { .fniv = gen_sqshrunb_vec, 6773 .opt_opc = sqshrunb_vec_list, 6774 .fno = gen_helper_sve2_sqshrunb_s, 6775 .vece = MO_32 }, 6776 { .fniv = gen_sqshrunb_vec, 6777 .opt_opc = sqshrunb_vec_list, 6778 .fno = gen_helper_sve2_sqshrunb_d, 6779 .vece = MO_64 }, 6780 }; 6781 TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops) 6782 6783 static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d, 6784 TCGv_vec n, int64_t shr) 6785 { 6786 TCGv_vec t = tcg_temp_new_vec_matching(d); 6787 int halfbits = 4 << vece; 6788 6789 tcg_gen_sari_vec(vece, n, n, shr); 6790 tcg_gen_dupi_vec(vece, t, 0); 6791 tcg_gen_smax_vec(vece, n, n, t); 6792 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6793 tcg_gen_umin_vec(vece, n, n, t); 6794 tcg_gen_shli_vec(vece, n, n, halfbits); 6795 tcg_gen_bitsel_vec(vece, d, t, d, n); 6796 } 6797 6798 static const TCGOpcode sqshrunt_vec_list[] = { 6799 INDEX_op_shli_vec, INDEX_op_sari_vec, 6800 INDEX_op_smax_vec, INDEX_op_umin_vec, 0 6801 }; 6802 static const GVecGen2i sqshrunt_ops[3] = { 6803 { .fniv = gen_sqshrunt_vec, 6804 .opt_opc = sqshrunt_vec_list, 6805 .load_dest = true, 6806 .fno = gen_helper_sve2_sqshrunt_h, 6807 .vece = MO_16 }, 6808 { .fniv = gen_sqshrunt_vec, 6809 .opt_opc = sqshrunt_vec_list, 6810 .load_dest = true, 6811 .fno = gen_helper_sve2_sqshrunt_s, 6812 .vece = MO_32 }, 6813 { .fniv = gen_sqshrunt_vec, 6814 .opt_opc = sqshrunt_vec_list, 6815 .load_dest = true, 6816 .fno = gen_helper_sve2_sqshrunt_d, 6817 .vece = MO_64 }, 6818 }; 6819 TRANS_FEAT(SQSHRUNT, aa64_sve2, do_shr_narrow, a, sqshrunt_ops) 6820 6821 static const GVecGen2i sqrshrunb_ops[3] = { 6822 { .fno = gen_helper_sve2_sqrshrunb_h }, 6823 { .fno = gen_helper_sve2_sqrshrunb_s }, 6824 { .fno = gen_helper_sve2_sqrshrunb_d }, 6825 }; 6826 TRANS_FEAT(SQRSHRUNB, aa64_sve2, do_shr_narrow, a, sqrshrunb_ops) 6827 6828 static const GVecGen2i sqrshrunt_ops[3] = { 6829 { .fno = gen_helper_sve2_sqrshrunt_h }, 6830 { .fno = gen_helper_sve2_sqrshrunt_s }, 6831 { .fno = gen_helper_sve2_sqrshrunt_d }, 6832 }; 6833 TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops) 6834 6835 static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d, 6836 TCGv_vec n, int64_t shr) 6837 { 6838 TCGv_vec t = tcg_temp_new_vec_matching(d); 6839 int halfbits = 4 << vece; 6840 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 6841 int64_t min = -max - 1; 6842 6843 tcg_gen_sari_vec(vece, n, n, shr); 6844 tcg_gen_dupi_vec(vece, t, min); 6845 tcg_gen_smax_vec(vece, n, n, t); 6846 tcg_gen_dupi_vec(vece, t, max); 6847 tcg_gen_smin_vec(vece, n, n, t); 6848 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6849 tcg_gen_and_vec(vece, d, n, t); 6850 } 6851 6852 static const TCGOpcode sqshrnb_vec_list[] = { 6853 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0 6854 }; 6855 static const GVecGen2i sqshrnb_ops[3] = { 6856 { .fniv = gen_sqshrnb_vec, 6857 .opt_opc = sqshrnb_vec_list, 6858 .fno = gen_helper_sve2_sqshrnb_h, 6859 .vece = MO_16 }, 6860 { .fniv = gen_sqshrnb_vec, 6861 .opt_opc = sqshrnb_vec_list, 6862 .fno = gen_helper_sve2_sqshrnb_s, 6863 .vece = MO_32 }, 6864 { .fniv = gen_sqshrnb_vec, 6865 .opt_opc = sqshrnb_vec_list, 6866 .fno = gen_helper_sve2_sqshrnb_d, 6867 .vece = MO_64 }, 6868 }; 6869 TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops) 6870 6871 static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d, 6872 TCGv_vec n, int64_t shr) 6873 { 6874 TCGv_vec t = tcg_temp_new_vec_matching(d); 6875 int halfbits = 4 << vece; 6876 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 6877 int64_t min = -max - 1; 6878 6879 tcg_gen_sari_vec(vece, n, n, shr); 6880 tcg_gen_dupi_vec(vece, t, min); 6881 tcg_gen_smax_vec(vece, n, n, t); 6882 tcg_gen_dupi_vec(vece, t, max); 6883 tcg_gen_smin_vec(vece, n, n, t); 6884 tcg_gen_shli_vec(vece, n, n, halfbits); 6885 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6886 tcg_gen_bitsel_vec(vece, d, t, d, n); 6887 } 6888 6889 static const TCGOpcode sqshrnt_vec_list[] = { 6890 INDEX_op_shli_vec, INDEX_op_sari_vec, 6891 INDEX_op_smax_vec, INDEX_op_smin_vec, 0 6892 }; 6893 static const GVecGen2i sqshrnt_ops[3] = { 6894 { .fniv = gen_sqshrnt_vec, 6895 .opt_opc = sqshrnt_vec_list, 6896 .load_dest = true, 6897 .fno = gen_helper_sve2_sqshrnt_h, 6898 .vece = MO_16 }, 6899 { .fniv = gen_sqshrnt_vec, 6900 .opt_opc = sqshrnt_vec_list, 6901 .load_dest = true, 6902 .fno = gen_helper_sve2_sqshrnt_s, 6903 .vece = MO_32 }, 6904 { .fniv = gen_sqshrnt_vec, 6905 .opt_opc = sqshrnt_vec_list, 6906 .load_dest = true, 6907 .fno = gen_helper_sve2_sqshrnt_d, 6908 .vece = MO_64 }, 6909 }; 6910 TRANS_FEAT(SQSHRNT, aa64_sve2, do_shr_narrow, a, sqshrnt_ops) 6911 6912 static const GVecGen2i sqrshrnb_ops[3] = { 6913 { .fno = gen_helper_sve2_sqrshrnb_h }, 6914 { .fno = gen_helper_sve2_sqrshrnb_s }, 6915 { .fno = gen_helper_sve2_sqrshrnb_d }, 6916 }; 6917 TRANS_FEAT(SQRSHRNB, aa64_sve2, do_shr_narrow, a, sqrshrnb_ops) 6918 6919 static const GVecGen2i sqrshrnt_ops[3] = { 6920 { .fno = gen_helper_sve2_sqrshrnt_h }, 6921 { .fno = gen_helper_sve2_sqrshrnt_s }, 6922 { .fno = gen_helper_sve2_sqrshrnt_d }, 6923 }; 6924 TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops) 6925 6926 static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d, 6927 TCGv_vec n, int64_t shr) 6928 { 6929 TCGv_vec t = tcg_temp_new_vec_matching(d); 6930 int halfbits = 4 << vece; 6931 6932 tcg_gen_shri_vec(vece, n, n, shr); 6933 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6934 tcg_gen_umin_vec(vece, d, n, t); 6935 } 6936 6937 static const TCGOpcode uqshrnb_vec_list[] = { 6938 INDEX_op_shri_vec, INDEX_op_umin_vec, 0 6939 }; 6940 static const GVecGen2i uqshrnb_ops[3] = { 6941 { .fniv = gen_uqshrnb_vec, 6942 .opt_opc = uqshrnb_vec_list, 6943 .fno = gen_helper_sve2_uqshrnb_h, 6944 .vece = MO_16 }, 6945 { .fniv = gen_uqshrnb_vec, 6946 .opt_opc = uqshrnb_vec_list, 6947 .fno = gen_helper_sve2_uqshrnb_s, 6948 .vece = MO_32 }, 6949 { .fniv = gen_uqshrnb_vec, 6950 .opt_opc = uqshrnb_vec_list, 6951 .fno = gen_helper_sve2_uqshrnb_d, 6952 .vece = MO_64 }, 6953 }; 6954 TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops) 6955 6956 static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d, 6957 TCGv_vec n, int64_t shr) 6958 { 6959 TCGv_vec t = tcg_temp_new_vec_matching(d); 6960 int halfbits = 4 << vece; 6961 6962 tcg_gen_shri_vec(vece, n, n, shr); 6963 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6964 tcg_gen_umin_vec(vece, n, n, t); 6965 tcg_gen_shli_vec(vece, n, n, halfbits); 6966 tcg_gen_bitsel_vec(vece, d, t, d, n); 6967 } 6968 6969 static const TCGOpcode uqshrnt_vec_list[] = { 6970 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0 6971 }; 6972 static const GVecGen2i uqshrnt_ops[3] = { 6973 { .fniv = gen_uqshrnt_vec, 6974 .opt_opc = uqshrnt_vec_list, 6975 .load_dest = true, 6976 .fno = gen_helper_sve2_uqshrnt_h, 6977 .vece = MO_16 }, 6978 { .fniv = gen_uqshrnt_vec, 6979 .opt_opc = uqshrnt_vec_list, 6980 .load_dest = true, 6981 .fno = gen_helper_sve2_uqshrnt_s, 6982 .vece = MO_32 }, 6983 { .fniv = gen_uqshrnt_vec, 6984 .opt_opc = uqshrnt_vec_list, 6985 .load_dest = true, 6986 .fno = gen_helper_sve2_uqshrnt_d, 6987 .vece = MO_64 }, 6988 }; 6989 TRANS_FEAT(UQSHRNT, aa64_sve2, do_shr_narrow, a, uqshrnt_ops) 6990 6991 static const GVecGen2i uqrshrnb_ops[3] = { 6992 { .fno = gen_helper_sve2_uqrshrnb_h }, 6993 { .fno = gen_helper_sve2_uqrshrnb_s }, 6994 { .fno = gen_helper_sve2_uqrshrnb_d }, 6995 }; 6996 TRANS_FEAT(UQRSHRNB, aa64_sve2, do_shr_narrow, a, uqrshrnb_ops) 6997 6998 static const GVecGen2i uqrshrnt_ops[3] = { 6999 { .fno = gen_helper_sve2_uqrshrnt_h }, 7000 { .fno = gen_helper_sve2_uqrshrnt_s }, 7001 { .fno = gen_helper_sve2_uqrshrnt_d }, 7002 }; 7003 TRANS_FEAT(UQRSHRNT, aa64_sve2, do_shr_narrow, a, uqrshrnt_ops) 7004 7005 #define DO_SVE2_ZZZ_NARROW(NAME, name) \ 7006 static gen_helper_gvec_3 * const name##_fns[4] = { \ 7007 NULL, gen_helper_sve2_##name##_h, \ 7008 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \ 7009 }; \ 7010 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \ 7011 name##_fns[a->esz], a, 0) 7012 7013 DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb) 7014 DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt) 7015 DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb) 7016 DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt) 7017 7018 DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb) 7019 DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt) 7020 DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb) 7021 DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt) 7022 7023 static gen_helper_gvec_flags_4 * const match_fns[4] = { 7024 gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL 7025 }; 7026 TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz]) 7027 7028 static gen_helper_gvec_flags_4 * const nmatch_fns[4] = { 7029 gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL 7030 }; 7031 TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz]) 7032 7033 static gen_helper_gvec_4 * const histcnt_fns[4] = { 7034 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d 7035 }; 7036 TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz, 7037 histcnt_fns[a->esz], a, 0) 7038 7039 TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz, 7040 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0) 7041 7042 DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz) 7043 DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz) 7044 DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz) 7045 DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz) 7046 DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz) 7047 7048 /* 7049 * SVE Integer Multiply-Add (unpredicated) 7050 */ 7051 7052 TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, 7053 gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra, 7054 0, FPST_FPCR) 7055 TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, 7056 gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra, 7057 0, FPST_FPCR) 7058 7059 static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = { 7060 NULL, gen_helper_sve2_sqdmlal_zzzw_h, 7061 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d, 7062 }; 7063 TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7064 sqdmlal_zzzw_fns[a->esz], a, 0) 7065 TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7066 sqdmlal_zzzw_fns[a->esz], a, 3) 7067 TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 7068 sqdmlal_zzzw_fns[a->esz], a, 2) 7069 7070 static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = { 7071 NULL, gen_helper_sve2_sqdmlsl_zzzw_h, 7072 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d, 7073 }; 7074 TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7075 sqdmlsl_zzzw_fns[a->esz], a, 0) 7076 TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7077 sqdmlsl_zzzw_fns[a->esz], a, 3) 7078 TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 7079 sqdmlsl_zzzw_fns[a->esz], a, 2) 7080 7081 static gen_helper_gvec_4 * const sqrdmlah_fns[] = { 7082 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h, 7083 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d, 7084 }; 7085 TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 7086 sqrdmlah_fns[a->esz], a, 0) 7087 7088 static gen_helper_gvec_4 * const sqrdmlsh_fns[] = { 7089 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h, 7090 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d, 7091 }; 7092 TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 7093 sqrdmlsh_fns[a->esz], a, 0) 7094 7095 static gen_helper_gvec_4 * const smlal_zzzw_fns[] = { 7096 NULL, gen_helper_sve2_smlal_zzzw_h, 7097 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d, 7098 }; 7099 TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7100 smlal_zzzw_fns[a->esz], a, 0) 7101 TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7102 smlal_zzzw_fns[a->esz], a, 1) 7103 7104 static gen_helper_gvec_4 * const umlal_zzzw_fns[] = { 7105 NULL, gen_helper_sve2_umlal_zzzw_h, 7106 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d, 7107 }; 7108 TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7109 umlal_zzzw_fns[a->esz], a, 0) 7110 TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7111 umlal_zzzw_fns[a->esz], a, 1) 7112 7113 static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = { 7114 NULL, gen_helper_sve2_smlsl_zzzw_h, 7115 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d, 7116 }; 7117 TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7118 smlsl_zzzw_fns[a->esz], a, 0) 7119 TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7120 smlsl_zzzw_fns[a->esz], a, 1) 7121 7122 static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = { 7123 NULL, gen_helper_sve2_umlsl_zzzw_h, 7124 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d, 7125 }; 7126 TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7127 umlsl_zzzw_fns[a->esz], a, 0) 7128 TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7129 umlsl_zzzw_fns[a->esz], a, 1) 7130 7131 static gen_helper_gvec_4 * const cmla_fns[] = { 7132 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h, 7133 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d, 7134 }; 7135 TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7136 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7137 7138 static gen_helper_gvec_4 * const cdot_fns[] = { 7139 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d 7140 }; 7141 TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7142 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7143 7144 static gen_helper_gvec_4 * const sqrdcmlah_fns[] = { 7145 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h, 7146 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d, 7147 }; 7148 TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7149 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7150 7151 TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7152 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0) 7153 7154 TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz, 7155 gen_helper_crypto_aesmc, a->rd, a->rd, 0) 7156 TRANS_FEAT_NONSTREAMING(AESIMC, aa64_sve2_aes, gen_gvec_ool_zz, 7157 gen_helper_crypto_aesimc, a->rd, a->rd, 0) 7158 7159 TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7160 gen_helper_crypto_aese, a, 0) 7161 TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7162 gen_helper_crypto_aesd, a, 0) 7163 7164 TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7165 gen_helper_crypto_sm4e, a, 0) 7166 TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7167 gen_helper_crypto_sm4ekey, a, 0) 7168 7169 TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, 7170 gen_gvec_rax1, a) 7171 7172 TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz, 7173 gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR) 7174 TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, 7175 gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR) 7176 7177 TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 7178 gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR) 7179 7180 TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, 7181 gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR) 7182 TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz, 7183 gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR) 7184 7185 TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a, 7186 FPROUNDING_ODD, gen_helper_sve_fcvt_ds) 7187 TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a, 7188 FPROUNDING_ODD, gen_helper_sve2_fcvtnt_ds) 7189 7190 static gen_helper_gvec_3_ptr * const flogb_fns[] = { 7191 NULL, gen_helper_flogb_h, 7192 gen_helper_flogb_s, gen_helper_flogb_d 7193 }; 7194 TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz], 7195 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 7196 7197 static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel) 7198 { 7199 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s, 7200 a->rd, a->rn, a->rm, a->ra, 7201 (sel << 1) | sub, tcg_env); 7202 } 7203 7204 TRANS_FEAT(FMLALB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, false) 7205 TRANS_FEAT(FMLALT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, true) 7206 TRANS_FEAT(FMLSLB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, false) 7207 TRANS_FEAT(FMLSLT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, true) 7208 7209 static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel) 7210 { 7211 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s, 7212 a->rd, a->rn, a->rm, a->ra, 7213 (a->index << 2) | (sel << 1) | sub, tcg_env); 7214 } 7215 7216 TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false) 7217 TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true) 7218 TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false) 7219 TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true) 7220 7221 TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7222 gen_helper_gvec_smmla_b, a, 0) 7223 TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7224 gen_helper_gvec_usmmla_b, a, 0) 7225 TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7226 gen_helper_gvec_ummla_b, a, 0) 7227 7228 TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, 7229 gen_helper_gvec_bfdot, a, 0) 7230 TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz, 7231 gen_helper_gvec_bfdot_idx, a) 7232 7233 TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, 7234 gen_helper_gvec_bfmmla, a, 0) 7235 7236 static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) 7237 { 7238 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, 7239 a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR); 7240 } 7241 7242 TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) 7243 TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true) 7244 7245 static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) 7246 { 7247 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, 7248 a->rd, a->rn, a->rm, a->ra, 7249 (a->index << 1) | sel, FPST_FPCR); 7250 } 7251 7252 TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) 7253 TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true) 7254 7255 static bool trans_PSEL(DisasContext *s, arg_psel *a) 7256 { 7257 int vl = vec_full_reg_size(s); 7258 int pl = pred_gvec_reg_size(s); 7259 int elements = vl >> a->esz; 7260 TCGv_i64 tmp, didx, dbit; 7261 TCGv_ptr ptr; 7262 7263 if (!dc_isar_feature(aa64_sme, s)) { 7264 return false; 7265 } 7266 if (!sve_access_check(s)) { 7267 return true; 7268 } 7269 7270 tmp = tcg_temp_new_i64(); 7271 dbit = tcg_temp_new_i64(); 7272 didx = tcg_temp_new_i64(); 7273 ptr = tcg_temp_new_ptr(); 7274 7275 /* Compute the predicate element. */ 7276 tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm); 7277 if (is_power_of_2(elements)) { 7278 tcg_gen_andi_i64(tmp, tmp, elements - 1); 7279 } else { 7280 tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements)); 7281 } 7282 7283 /* Extract the predicate byte and bit indices. */ 7284 tcg_gen_shli_i64(tmp, tmp, a->esz); 7285 tcg_gen_andi_i64(dbit, tmp, 7); 7286 tcg_gen_shri_i64(didx, tmp, 3); 7287 if (HOST_BIG_ENDIAN) { 7288 tcg_gen_xori_i64(didx, didx, 7); 7289 } 7290 7291 /* Load the predicate word. */ 7292 tcg_gen_trunc_i64_ptr(ptr, didx); 7293 tcg_gen_add_ptr(ptr, ptr, tcg_env); 7294 tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm)); 7295 7296 /* Extract the predicate bit and replicate to MO_64. */ 7297 tcg_gen_shr_i64(tmp, tmp, dbit); 7298 tcg_gen_andi_i64(tmp, tmp, 1); 7299 tcg_gen_neg_i64(tmp, tmp); 7300 7301 /* Apply to either copy the source, or write zeros. */ 7302 tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd), 7303 pred_full_reg_offset(s, a->pn), tmp, pl, pl); 7304 return true; 7305 } 7306 7307 static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7308 { 7309 tcg_gen_smax_i32(d, a, n); 7310 tcg_gen_smin_i32(d, d, m); 7311 } 7312 7313 static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7314 { 7315 tcg_gen_smax_i64(d, a, n); 7316 tcg_gen_smin_i64(d, d, m); 7317 } 7318 7319 static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7320 TCGv_vec m, TCGv_vec a) 7321 { 7322 tcg_gen_smax_vec(vece, d, a, n); 7323 tcg_gen_smin_vec(vece, d, d, m); 7324 } 7325 7326 static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 7327 uint32_t a, uint32_t oprsz, uint32_t maxsz) 7328 { 7329 static const TCGOpcode vecop[] = { 7330 INDEX_op_smin_vec, INDEX_op_smax_vec, 0 7331 }; 7332 static const GVecGen4 ops[4] = { 7333 { .fniv = gen_sclamp_vec, 7334 .fno = gen_helper_gvec_sclamp_b, 7335 .opt_opc = vecop, 7336 .vece = MO_8 }, 7337 { .fniv = gen_sclamp_vec, 7338 .fno = gen_helper_gvec_sclamp_h, 7339 .opt_opc = vecop, 7340 .vece = MO_16 }, 7341 { .fni4 = gen_sclamp_i32, 7342 .fniv = gen_sclamp_vec, 7343 .fno = gen_helper_gvec_sclamp_s, 7344 .opt_opc = vecop, 7345 .vece = MO_32 }, 7346 { .fni8 = gen_sclamp_i64, 7347 .fniv = gen_sclamp_vec, 7348 .fno = gen_helper_gvec_sclamp_d, 7349 .opt_opc = vecop, 7350 .vece = MO_64, 7351 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 7352 }; 7353 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 7354 } 7355 7356 TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a) 7357 7358 static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7359 { 7360 tcg_gen_umax_i32(d, a, n); 7361 tcg_gen_umin_i32(d, d, m); 7362 } 7363 7364 static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7365 { 7366 tcg_gen_umax_i64(d, a, n); 7367 tcg_gen_umin_i64(d, d, m); 7368 } 7369 7370 static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7371 TCGv_vec m, TCGv_vec a) 7372 { 7373 tcg_gen_umax_vec(vece, d, a, n); 7374 tcg_gen_umin_vec(vece, d, d, m); 7375 } 7376 7377 static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 7378 uint32_t a, uint32_t oprsz, uint32_t maxsz) 7379 { 7380 static const TCGOpcode vecop[] = { 7381 INDEX_op_umin_vec, INDEX_op_umax_vec, 0 7382 }; 7383 static const GVecGen4 ops[4] = { 7384 { .fniv = gen_uclamp_vec, 7385 .fno = gen_helper_gvec_uclamp_b, 7386 .opt_opc = vecop, 7387 .vece = MO_8 }, 7388 { .fniv = gen_uclamp_vec, 7389 .fno = gen_helper_gvec_uclamp_h, 7390 .opt_opc = vecop, 7391 .vece = MO_16 }, 7392 { .fni4 = gen_uclamp_i32, 7393 .fniv = gen_uclamp_vec, 7394 .fno = gen_helper_gvec_uclamp_s, 7395 .opt_opc = vecop, 7396 .vece = MO_32 }, 7397 { .fni8 = gen_uclamp_i64, 7398 .fniv = gen_uclamp_vec, 7399 .fno = gen_helper_gvec_uclamp_d, 7400 .opt_opc = vecop, 7401 .vece = MO_64, 7402 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 7403 }; 7404 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 7405 } 7406 7407 TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a) 7408