1 /* 2 * AArch64 SVE translation 3 * 4 * Copyright (c) 2018 Linaro, Ltd 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "translate.h" 22 #include "translate-a64.h" 23 #include "fpu/softfloat.h" 24 25 26 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, 27 TCGv_i64, uint32_t, uint32_t); 28 29 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr, 30 TCGv_ptr, TCGv_i32); 31 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr, 32 TCGv_ptr, TCGv_ptr, TCGv_i32); 33 34 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32); 35 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr, 36 TCGv_ptr, TCGv_i64, TCGv_i32); 37 38 /* 39 * Helpers for extracting complex instruction fields. 40 */ 41 42 /* See e.g. ASR (immediate, predicated). 43 * Returns -1 for unallocated encoding; diagnose later. 44 */ 45 static int tszimm_esz(DisasContext *s, int x) 46 { 47 x >>= 3; /* discard imm3 */ 48 return 31 - clz32(x); 49 } 50 51 static int tszimm_shr(DisasContext *s, int x) 52 { 53 return (16 << tszimm_esz(s, x)) - x; 54 } 55 56 /* See e.g. LSL (immediate, predicated). */ 57 static int tszimm_shl(DisasContext *s, int x) 58 { 59 return x - (8 << tszimm_esz(s, x)); 60 } 61 62 /* The SH bit is in bit 8. Extract the low 8 and shift. */ 63 static inline int expand_imm_sh8s(DisasContext *s, int x) 64 { 65 return (int8_t)x << (x & 0x100 ? 8 : 0); 66 } 67 68 static inline int expand_imm_sh8u(DisasContext *s, int x) 69 { 70 return (uint8_t)x << (x & 0x100 ? 8 : 0); 71 } 72 73 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype) 74 * with unsigned data. C.f. SVE Memory Contiguous Load Group. 75 */ 76 static inline int msz_dtype(DisasContext *s, int msz) 77 { 78 static const uint8_t dtype[4] = { 0, 5, 10, 15 }; 79 return dtype[msz]; 80 } 81 82 /* 83 * Include the generated decoder. 84 */ 85 86 #include "decode-sve.c.inc" 87 88 /* 89 * Implement all of the translator functions referenced by the decoder. 90 */ 91 92 /* Invoke an out-of-line helper on 2 Zregs. */ 93 static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn, 94 int rd, int rn, int data) 95 { 96 if (fn == NULL) { 97 return false; 98 } 99 if (sve_access_check(s)) { 100 unsigned vsz = vec_full_reg_size(s); 101 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 102 vec_full_reg_offset(s, rn), 103 vsz, vsz, data, fn); 104 } 105 return true; 106 } 107 108 static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 109 int rd, int rn, int data, 110 ARMFPStatusFlavour flavour) 111 { 112 if (fn == NULL) { 113 return false; 114 } 115 if (sve_access_check(s)) { 116 unsigned vsz = vec_full_reg_size(s); 117 TCGv_ptr status = fpstatus_ptr(flavour); 118 119 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 120 vec_full_reg_offset(s, rn), 121 status, vsz, vsz, data, fn); 122 } 123 return true; 124 } 125 126 static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 127 arg_rr_esz *a, int data) 128 { 129 return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data, 130 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 131 } 132 133 /* Invoke an out-of-line helper on 3 Zregs. */ 134 static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 135 int rd, int rn, int rm, int data) 136 { 137 if (fn == NULL) { 138 return false; 139 } 140 if (sve_access_check(s)) { 141 unsigned vsz = vec_full_reg_size(s); 142 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 143 vec_full_reg_offset(s, rn), 144 vec_full_reg_offset(s, rm), 145 vsz, vsz, data, fn); 146 } 147 return true; 148 } 149 150 static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 151 arg_rrr_esz *a, int data) 152 { 153 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data); 154 } 155 156 /* Invoke an out-of-line helper on 3 Zregs, plus float_status. */ 157 static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 158 int rd, int rn, int rm, 159 int data, ARMFPStatusFlavour flavour) 160 { 161 if (fn == NULL) { 162 return false; 163 } 164 if (sve_access_check(s)) { 165 unsigned vsz = vec_full_reg_size(s); 166 TCGv_ptr status = fpstatus_ptr(flavour); 167 168 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 169 vec_full_reg_offset(s, rn), 170 vec_full_reg_offset(s, rm), 171 status, vsz, vsz, data, fn); 172 } 173 return true; 174 } 175 176 static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 177 arg_rrr_esz *a, int data) 178 { 179 return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, 180 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 181 } 182 183 /* Invoke an out-of-line helper on 4 Zregs. */ 184 static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 185 int rd, int rn, int rm, int ra, int data) 186 { 187 if (fn == NULL) { 188 return false; 189 } 190 if (sve_access_check(s)) { 191 unsigned vsz = vec_full_reg_size(s); 192 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 193 vec_full_reg_offset(s, rn), 194 vec_full_reg_offset(s, rm), 195 vec_full_reg_offset(s, ra), 196 vsz, vsz, data, fn); 197 } 198 return true; 199 } 200 201 static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 202 arg_rrrr_esz *a, int data) 203 { 204 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data); 205 } 206 207 static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn, 208 arg_rrxr_esz *a) 209 { 210 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index); 211 } 212 213 /* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */ 214 static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 215 int rd, int rn, int rm, int ra, 216 int data, TCGv_ptr ptr) 217 { 218 if (fn == NULL) { 219 return false; 220 } 221 if (sve_access_check(s)) { 222 unsigned vsz = vec_full_reg_size(s); 223 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 224 vec_full_reg_offset(s, rn), 225 vec_full_reg_offset(s, rm), 226 vec_full_reg_offset(s, ra), 227 ptr, vsz, vsz, data, fn); 228 } 229 return true; 230 } 231 232 static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 233 int rd, int rn, int rm, int ra, 234 int data, ARMFPStatusFlavour flavour) 235 { 236 TCGv_ptr status = fpstatus_ptr(flavour); 237 bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status); 238 return ret; 239 } 240 241 /* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */ 242 static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn, 243 int rd, int rn, int rm, int ra, int pg, 244 int data, ARMFPStatusFlavour flavour) 245 { 246 if (fn == NULL) { 247 return false; 248 } 249 if (sve_access_check(s)) { 250 unsigned vsz = vec_full_reg_size(s); 251 TCGv_ptr status = fpstatus_ptr(flavour); 252 253 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd), 254 vec_full_reg_offset(s, rn), 255 vec_full_reg_offset(s, rm), 256 vec_full_reg_offset(s, ra), 257 pred_full_reg_offset(s, pg), 258 status, vsz, vsz, data, fn); 259 } 260 return true; 261 } 262 263 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */ 264 static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn, 265 int rd, int rn, int pg, int data) 266 { 267 if (fn == NULL) { 268 return false; 269 } 270 if (sve_access_check(s)) { 271 unsigned vsz = vec_full_reg_size(s); 272 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 273 vec_full_reg_offset(s, rn), 274 pred_full_reg_offset(s, pg), 275 vsz, vsz, data, fn); 276 } 277 return true; 278 } 279 280 static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn, 281 arg_rpr_esz *a, int data) 282 { 283 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data); 284 } 285 286 static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn, 287 arg_rpri_esz *a) 288 { 289 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm); 290 } 291 292 static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn, 293 int rd, int rn, int pg, int data, 294 ARMFPStatusFlavour flavour) 295 { 296 if (fn == NULL) { 297 return false; 298 } 299 if (sve_access_check(s)) { 300 unsigned vsz = vec_full_reg_size(s); 301 TCGv_ptr status = fpstatus_ptr(flavour); 302 303 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 304 vec_full_reg_offset(s, rn), 305 pred_full_reg_offset(s, pg), 306 status, vsz, vsz, data, fn); 307 } 308 return true; 309 } 310 311 static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 312 arg_rpr_esz *a, int data, 313 ARMFPStatusFlavour flavour) 314 { 315 return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour); 316 } 317 318 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 319 static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn, 320 int rd, int rn, int rm, int pg, int data) 321 { 322 if (fn == NULL) { 323 return false; 324 } 325 if (sve_access_check(s)) { 326 unsigned vsz = vec_full_reg_size(s); 327 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 328 vec_full_reg_offset(s, rn), 329 vec_full_reg_offset(s, rm), 330 pred_full_reg_offset(s, pg), 331 vsz, vsz, data, fn); 332 } 333 return true; 334 } 335 336 static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn, 337 arg_rprr_esz *a, int data) 338 { 339 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data); 340 } 341 342 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 343 static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn, 344 int rd, int rn, int rm, int pg, int data, 345 ARMFPStatusFlavour flavour) 346 { 347 if (fn == NULL) { 348 return false; 349 } 350 if (sve_access_check(s)) { 351 unsigned vsz = vec_full_reg_size(s); 352 TCGv_ptr status = fpstatus_ptr(flavour); 353 354 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 355 vec_full_reg_offset(s, rn), 356 vec_full_reg_offset(s, rm), 357 pred_full_reg_offset(s, pg), 358 status, vsz, vsz, data, fn); 359 } 360 return true; 361 } 362 363 static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 364 arg_rprr_esz *a) 365 { 366 return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0, 367 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 368 } 369 370 /* Invoke a vector expander on two Zregs and an immediate. */ 371 static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 372 int esz, int rd, int rn, uint64_t imm) 373 { 374 if (gvec_fn == NULL) { 375 return false; 376 } 377 if (sve_access_check(s)) { 378 unsigned vsz = vec_full_reg_size(s); 379 gvec_fn(esz, vec_full_reg_offset(s, rd), 380 vec_full_reg_offset(s, rn), imm, vsz, vsz); 381 } 382 return true; 383 } 384 385 static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 386 arg_rri_esz *a) 387 { 388 if (a->esz < 0) { 389 /* Invalid tsz encoding -- see tszimm_esz. */ 390 return false; 391 } 392 return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm); 393 } 394 395 /* Invoke a vector expander on three Zregs. */ 396 static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn, 397 int esz, int rd, int rn, int rm) 398 { 399 if (gvec_fn == NULL) { 400 return false; 401 } 402 if (sve_access_check(s)) { 403 unsigned vsz = vec_full_reg_size(s); 404 gvec_fn(esz, vec_full_reg_offset(s, rd), 405 vec_full_reg_offset(s, rn), 406 vec_full_reg_offset(s, rm), vsz, vsz); 407 } 408 return true; 409 } 410 411 static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn, 412 arg_rrr_esz *a) 413 { 414 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm); 415 } 416 417 /* Invoke a vector expander on four Zregs. */ 418 static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn, 419 arg_rrrr_esz *a) 420 { 421 if (gvec_fn == NULL) { 422 return false; 423 } 424 if (sve_access_check(s)) { 425 unsigned vsz = vec_full_reg_size(s); 426 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 427 vec_full_reg_offset(s, a->rn), 428 vec_full_reg_offset(s, a->rm), 429 vec_full_reg_offset(s, a->ra), vsz, vsz); 430 } 431 return true; 432 } 433 434 /* Invoke a vector move on two Zregs. */ 435 static bool do_mov_z(DisasContext *s, int rd, int rn) 436 { 437 if (sve_access_check(s)) { 438 unsigned vsz = vec_full_reg_size(s); 439 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd), 440 vec_full_reg_offset(s, rn), vsz, vsz); 441 } 442 return true; 443 } 444 445 /* Initialize a Zreg with replications of a 64-bit immediate. */ 446 static void do_dupi_z(DisasContext *s, int rd, uint64_t word) 447 { 448 unsigned vsz = vec_full_reg_size(s); 449 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word); 450 } 451 452 /* Invoke a vector expander on three Pregs. */ 453 static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn, 454 int rd, int rn, int rm) 455 { 456 if (sve_access_check(s)) { 457 unsigned psz = pred_gvec_reg_size(s); 458 gvec_fn(MO_64, pred_full_reg_offset(s, rd), 459 pred_full_reg_offset(s, rn), 460 pred_full_reg_offset(s, rm), psz, psz); 461 } 462 return true; 463 } 464 465 /* Invoke a vector move on two Pregs. */ 466 static bool do_mov_p(DisasContext *s, int rd, int rn) 467 { 468 if (sve_access_check(s)) { 469 unsigned psz = pred_gvec_reg_size(s); 470 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd), 471 pred_full_reg_offset(s, rn), psz, psz); 472 } 473 return true; 474 } 475 476 /* Set the cpu flags as per a return from an SVE helper. */ 477 static void do_pred_flags(TCGv_i32 t) 478 { 479 tcg_gen_mov_i32(cpu_NF, t); 480 tcg_gen_andi_i32(cpu_ZF, t, 2); 481 tcg_gen_andi_i32(cpu_CF, t, 1); 482 tcg_gen_movi_i32(cpu_VF, 0); 483 } 484 485 /* Subroutines computing the ARM PredTest psuedofunction. */ 486 static void do_predtest1(TCGv_i64 d, TCGv_i64 g) 487 { 488 TCGv_i32 t = tcg_temp_new_i32(); 489 490 gen_helper_sve_predtest1(t, d, g); 491 do_pred_flags(t); 492 } 493 494 static void do_predtest(DisasContext *s, int dofs, int gofs, int words) 495 { 496 TCGv_ptr dptr = tcg_temp_new_ptr(); 497 TCGv_ptr gptr = tcg_temp_new_ptr(); 498 TCGv_i32 t = tcg_temp_new_i32(); 499 500 tcg_gen_addi_ptr(dptr, cpu_env, dofs); 501 tcg_gen_addi_ptr(gptr, cpu_env, gofs); 502 503 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words)); 504 505 do_pred_flags(t); 506 } 507 508 /* For each element size, the bits within a predicate word that are active. */ 509 const uint64_t pred_esz_masks[5] = { 510 0xffffffffffffffffull, 0x5555555555555555ull, 511 0x1111111111111111ull, 0x0101010101010101ull, 512 0x0001000100010001ull, 513 }; 514 515 static bool trans_INVALID(DisasContext *s, arg_INVALID *a) 516 { 517 unallocated_encoding(s); 518 return true; 519 } 520 521 /* 522 *** SVE Logical - Unpredicated Group 523 */ 524 525 TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a) 526 TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a) 527 TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a) 528 TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a) 529 530 static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 531 { 532 TCGv_i64 t = tcg_temp_new_i64(); 533 uint64_t mask = dup_const(MO_8, 0xff >> sh); 534 535 tcg_gen_xor_i64(t, n, m); 536 tcg_gen_shri_i64(d, t, sh); 537 tcg_gen_shli_i64(t, t, 8 - sh); 538 tcg_gen_andi_i64(d, d, mask); 539 tcg_gen_andi_i64(t, t, ~mask); 540 tcg_gen_or_i64(d, d, t); 541 } 542 543 static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 544 { 545 TCGv_i64 t = tcg_temp_new_i64(); 546 uint64_t mask = dup_const(MO_16, 0xffff >> sh); 547 548 tcg_gen_xor_i64(t, n, m); 549 tcg_gen_shri_i64(d, t, sh); 550 tcg_gen_shli_i64(t, t, 16 - sh); 551 tcg_gen_andi_i64(d, d, mask); 552 tcg_gen_andi_i64(t, t, ~mask); 553 tcg_gen_or_i64(d, d, t); 554 } 555 556 static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh) 557 { 558 tcg_gen_xor_i32(d, n, m); 559 tcg_gen_rotri_i32(d, d, sh); 560 } 561 562 static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 563 { 564 tcg_gen_xor_i64(d, n, m); 565 tcg_gen_rotri_i64(d, d, sh); 566 } 567 568 static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 569 TCGv_vec m, int64_t sh) 570 { 571 tcg_gen_xor_vec(vece, d, n, m); 572 tcg_gen_rotri_vec(vece, d, d, sh); 573 } 574 575 void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 576 uint32_t rm_ofs, int64_t shift, 577 uint32_t opr_sz, uint32_t max_sz) 578 { 579 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 }; 580 static const GVecGen3i ops[4] = { 581 { .fni8 = gen_xar8_i64, 582 .fniv = gen_xar_vec, 583 .fno = gen_helper_sve2_xar_b, 584 .opt_opc = vecop, 585 .vece = MO_8 }, 586 { .fni8 = gen_xar16_i64, 587 .fniv = gen_xar_vec, 588 .fno = gen_helper_sve2_xar_h, 589 .opt_opc = vecop, 590 .vece = MO_16 }, 591 { .fni4 = gen_xar_i32, 592 .fniv = gen_xar_vec, 593 .fno = gen_helper_sve2_xar_s, 594 .opt_opc = vecop, 595 .vece = MO_32 }, 596 { .fni8 = gen_xar_i64, 597 .fniv = gen_xar_vec, 598 .fno = gen_helper_gvec_xar_d, 599 .opt_opc = vecop, 600 .vece = MO_64 } 601 }; 602 int esize = 8 << vece; 603 604 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */ 605 tcg_debug_assert(shift >= 0); 606 tcg_debug_assert(shift <= esize); 607 shift &= esize - 1; 608 609 if (shift == 0) { 610 /* xar with no rotate devolves to xor. */ 611 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz); 612 } else { 613 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 614 shift, &ops[vece]); 615 } 616 } 617 618 static bool trans_XAR(DisasContext *s, arg_rrri_esz *a) 619 { 620 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { 621 return false; 622 } 623 if (sve_access_check(s)) { 624 unsigned vsz = vec_full_reg_size(s); 625 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd), 626 vec_full_reg_offset(s, a->rn), 627 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz); 628 } 629 return true; 630 } 631 632 static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 633 { 634 tcg_gen_xor_i64(d, n, m); 635 tcg_gen_xor_i64(d, d, k); 636 } 637 638 static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 639 TCGv_vec m, TCGv_vec k) 640 { 641 tcg_gen_xor_vec(vece, d, n, m); 642 tcg_gen_xor_vec(vece, d, d, k); 643 } 644 645 static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 646 uint32_t a, uint32_t oprsz, uint32_t maxsz) 647 { 648 static const GVecGen4 op = { 649 .fni8 = gen_eor3_i64, 650 .fniv = gen_eor3_vec, 651 .fno = gen_helper_sve2_eor3, 652 .vece = MO_64, 653 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 654 }; 655 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 656 } 657 658 TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a) 659 660 static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 661 { 662 tcg_gen_andc_i64(d, m, k); 663 tcg_gen_xor_i64(d, d, n); 664 } 665 666 static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 667 TCGv_vec m, TCGv_vec k) 668 { 669 tcg_gen_andc_vec(vece, d, m, k); 670 tcg_gen_xor_vec(vece, d, d, n); 671 } 672 673 static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 674 uint32_t a, uint32_t oprsz, uint32_t maxsz) 675 { 676 static const GVecGen4 op = { 677 .fni8 = gen_bcax_i64, 678 .fniv = gen_bcax_vec, 679 .fno = gen_helper_sve2_bcax, 680 .vece = MO_64, 681 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 682 }; 683 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 684 } 685 686 TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a) 687 688 static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 689 uint32_t a, uint32_t oprsz, uint32_t maxsz) 690 { 691 /* BSL differs from the generic bitsel in argument ordering. */ 692 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz); 693 } 694 695 TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a) 696 697 static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 698 { 699 tcg_gen_andc_i64(n, k, n); 700 tcg_gen_andc_i64(m, m, k); 701 tcg_gen_or_i64(d, n, m); 702 } 703 704 static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 705 TCGv_vec m, TCGv_vec k) 706 { 707 if (TCG_TARGET_HAS_bitsel_vec) { 708 tcg_gen_not_vec(vece, n, n); 709 tcg_gen_bitsel_vec(vece, d, k, n, m); 710 } else { 711 tcg_gen_andc_vec(vece, n, k, n); 712 tcg_gen_andc_vec(vece, m, m, k); 713 tcg_gen_or_vec(vece, d, n, m); 714 } 715 } 716 717 static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 718 uint32_t a, uint32_t oprsz, uint32_t maxsz) 719 { 720 static const GVecGen4 op = { 721 .fni8 = gen_bsl1n_i64, 722 .fniv = gen_bsl1n_vec, 723 .fno = gen_helper_sve2_bsl1n, 724 .vece = MO_64, 725 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 726 }; 727 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 728 } 729 730 TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a) 731 732 static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 733 { 734 /* 735 * Z[dn] = (n & k) | (~m & ~k) 736 * = | ~(m | k) 737 */ 738 tcg_gen_and_i64(n, n, k); 739 if (TCG_TARGET_HAS_orc_i64) { 740 tcg_gen_or_i64(m, m, k); 741 tcg_gen_orc_i64(d, n, m); 742 } else { 743 tcg_gen_nor_i64(m, m, k); 744 tcg_gen_or_i64(d, n, m); 745 } 746 } 747 748 static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 749 TCGv_vec m, TCGv_vec k) 750 { 751 if (TCG_TARGET_HAS_bitsel_vec) { 752 tcg_gen_not_vec(vece, m, m); 753 tcg_gen_bitsel_vec(vece, d, k, n, m); 754 } else { 755 tcg_gen_and_vec(vece, n, n, k); 756 tcg_gen_or_vec(vece, m, m, k); 757 tcg_gen_orc_vec(vece, d, n, m); 758 } 759 } 760 761 static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 762 uint32_t a, uint32_t oprsz, uint32_t maxsz) 763 { 764 static const GVecGen4 op = { 765 .fni8 = gen_bsl2n_i64, 766 .fniv = gen_bsl2n_vec, 767 .fno = gen_helper_sve2_bsl2n, 768 .vece = MO_64, 769 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 770 }; 771 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 772 } 773 774 TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a) 775 776 static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 777 { 778 tcg_gen_and_i64(n, n, k); 779 tcg_gen_andc_i64(m, m, k); 780 tcg_gen_nor_i64(d, n, m); 781 } 782 783 static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 784 TCGv_vec m, TCGv_vec k) 785 { 786 tcg_gen_bitsel_vec(vece, d, k, n, m); 787 tcg_gen_not_vec(vece, d, d); 788 } 789 790 static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 791 uint32_t a, uint32_t oprsz, uint32_t maxsz) 792 { 793 static const GVecGen4 op = { 794 .fni8 = gen_nbsl_i64, 795 .fniv = gen_nbsl_vec, 796 .fno = gen_helper_sve2_nbsl, 797 .vece = MO_64, 798 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 799 }; 800 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 801 } 802 803 TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a) 804 805 /* 806 *** SVE Integer Arithmetic - Unpredicated Group 807 */ 808 809 TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a) 810 TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a) 811 TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a) 812 TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a) 813 TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a) 814 TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a) 815 816 /* 817 *** SVE Integer Arithmetic - Binary Predicated Group 818 */ 819 820 /* Select active elememnts from Zn and inactive elements from Zm, 821 * storing the result in Zd. 822 */ 823 static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz) 824 { 825 static gen_helper_gvec_4 * const fns[4] = { 826 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h, 827 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d 828 }; 829 return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0); 830 } 831 832 #define DO_ZPZZ(NAME, FEAT, name) \ 833 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \ 834 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \ 835 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \ 836 }; \ 837 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \ 838 name##_zpzz_fns[a->esz], a, 0) 839 840 DO_ZPZZ(AND_zpzz, aa64_sve, sve_and) 841 DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor) 842 DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr) 843 DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic) 844 845 DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add) 846 DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub) 847 848 DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax) 849 DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax) 850 DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin) 851 DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin) 852 DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd) 853 DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd) 854 855 DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul) 856 DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh) 857 DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh) 858 859 DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr) 860 DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr) 861 DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl) 862 863 static gen_helper_gvec_4 * const sdiv_fns[4] = { 864 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d 865 }; 866 TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0) 867 868 static gen_helper_gvec_4 * const udiv_fns[4] = { 869 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d 870 }; 871 TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0) 872 873 TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz) 874 875 /* 876 *** SVE Integer Arithmetic - Unary Predicated Group 877 */ 878 879 #define DO_ZPZ(NAME, FEAT, name) \ 880 static gen_helper_gvec_3 * const name##_fns[4] = { \ 881 gen_helper_##name##_b, gen_helper_##name##_h, \ 882 gen_helper_##name##_s, gen_helper_##name##_d, \ 883 }; \ 884 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0) 885 886 DO_ZPZ(CLS, aa64_sve, sve_cls) 887 DO_ZPZ(CLZ, aa64_sve, sve_clz) 888 DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz) 889 DO_ZPZ(CNOT, aa64_sve, sve_cnot) 890 DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz) 891 DO_ZPZ(ABS, aa64_sve, sve_abs) 892 DO_ZPZ(NEG, aa64_sve, sve_neg) 893 DO_ZPZ(RBIT, aa64_sve, sve_rbit) 894 895 static gen_helper_gvec_3 * const fabs_fns[4] = { 896 NULL, gen_helper_sve_fabs_h, 897 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d, 898 }; 899 TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0) 900 901 static gen_helper_gvec_3 * const fneg_fns[4] = { 902 NULL, gen_helper_sve_fneg_h, 903 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d, 904 }; 905 TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0) 906 907 static gen_helper_gvec_3 * const sxtb_fns[4] = { 908 NULL, gen_helper_sve_sxtb_h, 909 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d, 910 }; 911 TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0) 912 913 static gen_helper_gvec_3 * const uxtb_fns[4] = { 914 NULL, gen_helper_sve_uxtb_h, 915 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d, 916 }; 917 TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0) 918 919 static gen_helper_gvec_3 * const sxth_fns[4] = { 920 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d 921 }; 922 TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0) 923 924 static gen_helper_gvec_3 * const uxth_fns[4] = { 925 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d 926 }; 927 TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0) 928 929 TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz, 930 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0) 931 TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz, 932 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0) 933 934 /* 935 *** SVE Integer Reduction Group 936 */ 937 938 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32); 939 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a, 940 gen_helper_gvec_reduc *fn) 941 { 942 unsigned vsz = vec_full_reg_size(s); 943 TCGv_ptr t_zn, t_pg; 944 TCGv_i32 desc; 945 TCGv_i64 temp; 946 947 if (fn == NULL) { 948 return false; 949 } 950 if (!sve_access_check(s)) { 951 return true; 952 } 953 954 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 955 temp = tcg_temp_new_i64(); 956 t_zn = tcg_temp_new_ptr(); 957 t_pg = tcg_temp_new_ptr(); 958 959 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); 960 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); 961 fn(temp, t_zn, t_pg, desc); 962 963 write_fp_dreg(s, a->rd, temp); 964 return true; 965 } 966 967 #define DO_VPZ(NAME, name) \ 968 static gen_helper_gvec_reduc * const name##_fns[4] = { \ 969 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \ 970 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 971 }; \ 972 TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz]) 973 974 DO_VPZ(ORV, orv) 975 DO_VPZ(ANDV, andv) 976 DO_VPZ(EORV, eorv) 977 978 DO_VPZ(UADDV, uaddv) 979 DO_VPZ(SMAXV, smaxv) 980 DO_VPZ(UMAXV, umaxv) 981 DO_VPZ(SMINV, sminv) 982 DO_VPZ(UMINV, uminv) 983 984 static gen_helper_gvec_reduc * const saddv_fns[4] = { 985 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h, 986 gen_helper_sve_saddv_s, NULL 987 }; 988 TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz]) 989 990 #undef DO_VPZ 991 992 /* 993 *** SVE Shift by Immediate - Predicated Group 994 */ 995 996 /* 997 * Copy Zn into Zd, storing zeros into inactive elements. 998 * If invert, store zeros into the active elements. 999 */ 1000 static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg, 1001 int esz, bool invert) 1002 { 1003 static gen_helper_gvec_3 * const fns[4] = { 1004 gen_helper_sve_movz_b, gen_helper_sve_movz_h, 1005 gen_helper_sve_movz_s, gen_helper_sve_movz_d, 1006 }; 1007 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert); 1008 } 1009 1010 static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr, 1011 gen_helper_gvec_3 * const fns[4]) 1012 { 1013 int max; 1014 1015 if (a->esz < 0) { 1016 /* Invalid tsz encoding -- see tszimm_esz. */ 1017 return false; 1018 } 1019 1020 /* 1021 * Shift by element size is architecturally valid. 1022 * For arithmetic right-shift, it's the same as by one less. 1023 * For logical shifts and ASRD, it is a zeroing operation. 1024 */ 1025 max = 8 << a->esz; 1026 if (a->imm >= max) { 1027 if (asr) { 1028 a->imm = max - 1; 1029 } else { 1030 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); 1031 } 1032 } 1033 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a); 1034 } 1035 1036 static gen_helper_gvec_3 * const asr_zpzi_fns[4] = { 1037 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h, 1038 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d, 1039 }; 1040 TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns) 1041 1042 static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = { 1043 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h, 1044 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d, 1045 }; 1046 TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns) 1047 1048 static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = { 1049 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h, 1050 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d, 1051 }; 1052 TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns) 1053 1054 static gen_helper_gvec_3 * const asrd_fns[4] = { 1055 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h, 1056 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d, 1057 }; 1058 TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns) 1059 1060 static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = { 1061 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h, 1062 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d, 1063 }; 1064 TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 1065 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a) 1066 1067 static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = { 1068 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h, 1069 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d, 1070 }; 1071 TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 1072 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a) 1073 1074 static gen_helper_gvec_3 * const srshr_fns[4] = { 1075 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h, 1076 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d, 1077 }; 1078 TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 1079 a->esz < 0 ? NULL : srshr_fns[a->esz], a) 1080 1081 static gen_helper_gvec_3 * const urshr_fns[4] = { 1082 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h, 1083 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d, 1084 }; 1085 TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 1086 a->esz < 0 ? NULL : urshr_fns[a->esz], a) 1087 1088 static gen_helper_gvec_3 * const sqshlu_fns[4] = { 1089 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h, 1090 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d, 1091 }; 1092 TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi, 1093 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a) 1094 1095 /* 1096 *** SVE Bitwise Shift - Predicated Group 1097 */ 1098 1099 #define DO_ZPZW(NAME, name) \ 1100 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \ 1101 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \ 1102 gen_helper_sve_##name##_zpzw_s, NULL \ 1103 }; \ 1104 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \ 1105 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0) 1106 1107 DO_ZPZW(ASR, asr) 1108 DO_ZPZW(LSR, lsr) 1109 DO_ZPZW(LSL, lsl) 1110 1111 #undef DO_ZPZW 1112 1113 /* 1114 *** SVE Bitwise Shift - Unpredicated Group 1115 */ 1116 1117 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr, 1118 void (*gvec_fn)(unsigned, uint32_t, uint32_t, 1119 int64_t, uint32_t, uint32_t)) 1120 { 1121 if (a->esz < 0) { 1122 /* Invalid tsz encoding -- see tszimm_esz. */ 1123 return false; 1124 } 1125 if (sve_access_check(s)) { 1126 unsigned vsz = vec_full_reg_size(s); 1127 /* Shift by element size is architecturally valid. For 1128 arithmetic right-shift, it's the same as by one less. 1129 Otherwise it is a zeroing operation. */ 1130 if (a->imm >= 8 << a->esz) { 1131 if (asr) { 1132 a->imm = (8 << a->esz) - 1; 1133 } else { 1134 do_dupi_z(s, a->rd, 0); 1135 return true; 1136 } 1137 } 1138 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 1139 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz); 1140 } 1141 return true; 1142 } 1143 1144 TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari) 1145 TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri) 1146 TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli) 1147 1148 #define DO_ZZW(NAME, name) \ 1149 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \ 1150 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \ 1151 gen_helper_sve_##name##_zzw_s, NULL \ 1152 }; \ 1153 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \ 1154 name##_zzw_fns[a->esz], a, 0) 1155 1156 DO_ZZW(ASR_zzw, asr) 1157 DO_ZZW(LSR_zzw, lsr) 1158 DO_ZZW(LSL_zzw, lsl) 1159 1160 #undef DO_ZZW 1161 1162 /* 1163 *** SVE Integer Multiply-Add Group 1164 */ 1165 1166 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a, 1167 gen_helper_gvec_5 *fn) 1168 { 1169 if (sve_access_check(s)) { 1170 unsigned vsz = vec_full_reg_size(s); 1171 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd), 1172 vec_full_reg_offset(s, a->ra), 1173 vec_full_reg_offset(s, a->rn), 1174 vec_full_reg_offset(s, a->rm), 1175 pred_full_reg_offset(s, a->pg), 1176 vsz, vsz, 0, fn); 1177 } 1178 return true; 1179 } 1180 1181 static gen_helper_gvec_5 * const mla_fns[4] = { 1182 gen_helper_sve_mla_b, gen_helper_sve_mla_h, 1183 gen_helper_sve_mla_s, gen_helper_sve_mla_d, 1184 }; 1185 TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz]) 1186 1187 static gen_helper_gvec_5 * const mls_fns[4] = { 1188 gen_helper_sve_mls_b, gen_helper_sve_mls_h, 1189 gen_helper_sve_mls_s, gen_helper_sve_mls_d, 1190 }; 1191 TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz]) 1192 1193 /* 1194 *** SVE Index Generation Group 1195 */ 1196 1197 static bool do_index(DisasContext *s, int esz, int rd, 1198 TCGv_i64 start, TCGv_i64 incr) 1199 { 1200 unsigned vsz; 1201 TCGv_i32 desc; 1202 TCGv_ptr t_zd; 1203 1204 if (!sve_access_check(s)) { 1205 return true; 1206 } 1207 1208 vsz = vec_full_reg_size(s); 1209 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1210 t_zd = tcg_temp_new_ptr(); 1211 1212 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd)); 1213 if (esz == 3) { 1214 gen_helper_sve_index_d(t_zd, start, incr, desc); 1215 } else { 1216 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 1217 static index_fn * const fns[3] = { 1218 gen_helper_sve_index_b, 1219 gen_helper_sve_index_h, 1220 gen_helper_sve_index_s, 1221 }; 1222 TCGv_i32 s32 = tcg_temp_new_i32(); 1223 TCGv_i32 i32 = tcg_temp_new_i32(); 1224 1225 tcg_gen_extrl_i64_i32(s32, start); 1226 tcg_gen_extrl_i64_i32(i32, incr); 1227 fns[esz](t_zd, s32, i32, desc); 1228 } 1229 return true; 1230 } 1231 1232 TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd, 1233 tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2)) 1234 TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd, 1235 tcg_constant_i64(a->imm), cpu_reg(s, a->rm)) 1236 TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd, 1237 cpu_reg(s, a->rn), tcg_constant_i64(a->imm)) 1238 TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd, 1239 cpu_reg(s, a->rn), cpu_reg(s, a->rm)) 1240 1241 /* 1242 *** SVE Stack Allocation Group 1243 */ 1244 1245 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a) 1246 { 1247 if (!dc_isar_feature(aa64_sve, s)) { 1248 return false; 1249 } 1250 if (sve_access_check(s)) { 1251 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1252 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1253 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s)); 1254 } 1255 return true; 1256 } 1257 1258 static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a) 1259 { 1260 if (!dc_isar_feature(aa64_sme, s)) { 1261 return false; 1262 } 1263 if (sme_enabled_check(s)) { 1264 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1265 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1266 tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s)); 1267 } 1268 return true; 1269 } 1270 1271 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a) 1272 { 1273 if (!dc_isar_feature(aa64_sve, s)) { 1274 return false; 1275 } 1276 if (sve_access_check(s)) { 1277 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1278 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1279 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s)); 1280 } 1281 return true; 1282 } 1283 1284 static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a) 1285 { 1286 if (!dc_isar_feature(aa64_sme, s)) { 1287 return false; 1288 } 1289 if (sme_enabled_check(s)) { 1290 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1291 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1292 tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s)); 1293 } 1294 return true; 1295 } 1296 1297 static bool trans_RDVL(DisasContext *s, arg_RDVL *a) 1298 { 1299 if (!dc_isar_feature(aa64_sve, s)) { 1300 return false; 1301 } 1302 if (sve_access_check(s)) { 1303 TCGv_i64 reg = cpu_reg(s, a->rd); 1304 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s)); 1305 } 1306 return true; 1307 } 1308 1309 static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a) 1310 { 1311 if (!dc_isar_feature(aa64_sme, s)) { 1312 return false; 1313 } 1314 if (sme_enabled_check(s)) { 1315 TCGv_i64 reg = cpu_reg(s, a->rd); 1316 tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s)); 1317 } 1318 return true; 1319 } 1320 1321 /* 1322 *** SVE Compute Vector Address Group 1323 */ 1324 1325 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn) 1326 { 1327 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm); 1328 } 1329 1330 TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32) 1331 TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64) 1332 TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32) 1333 TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32) 1334 1335 /* 1336 *** SVE Integer Misc - Unpredicated Group 1337 */ 1338 1339 static gen_helper_gvec_2 * const fexpa_fns[4] = { 1340 NULL, gen_helper_sve_fexpa_h, 1341 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d, 1342 }; 1343 TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz, 1344 fexpa_fns[a->esz], a->rd, a->rn, 0) 1345 1346 static gen_helper_gvec_3 * const ftssel_fns[4] = { 1347 NULL, gen_helper_sve_ftssel_h, 1348 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d, 1349 }; 1350 TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, 1351 ftssel_fns[a->esz], a, 0) 1352 1353 /* 1354 *** SVE Predicate Logical Operations Group 1355 */ 1356 1357 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a, 1358 const GVecGen4 *gvec_op) 1359 { 1360 if (!sve_access_check(s)) { 1361 return true; 1362 } 1363 1364 unsigned psz = pred_gvec_reg_size(s); 1365 int dofs = pred_full_reg_offset(s, a->rd); 1366 int nofs = pred_full_reg_offset(s, a->rn); 1367 int mofs = pred_full_reg_offset(s, a->rm); 1368 int gofs = pred_full_reg_offset(s, a->pg); 1369 1370 if (!a->s) { 1371 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1372 return true; 1373 } 1374 1375 if (psz == 8) { 1376 /* Do the operation and the flags generation in temps. */ 1377 TCGv_i64 pd = tcg_temp_new_i64(); 1378 TCGv_i64 pn = tcg_temp_new_i64(); 1379 TCGv_i64 pm = tcg_temp_new_i64(); 1380 TCGv_i64 pg = tcg_temp_new_i64(); 1381 1382 tcg_gen_ld_i64(pn, cpu_env, nofs); 1383 tcg_gen_ld_i64(pm, cpu_env, mofs); 1384 tcg_gen_ld_i64(pg, cpu_env, gofs); 1385 1386 gvec_op->fni8(pd, pn, pm, pg); 1387 tcg_gen_st_i64(pd, cpu_env, dofs); 1388 1389 do_predtest1(pd, pg); 1390 } else { 1391 /* The operation and flags generation is large. The computation 1392 * of the flags depends on the original contents of the guarding 1393 * predicate. If the destination overwrites the guarding predicate, 1394 * then the easiest way to get this right is to save a copy. 1395 */ 1396 int tofs = gofs; 1397 if (a->rd == a->pg) { 1398 tofs = offsetof(CPUARMState, vfp.preg_tmp); 1399 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz); 1400 } 1401 1402 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1403 do_predtest(s, dofs, tofs, psz / 8); 1404 } 1405 return true; 1406 } 1407 1408 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1409 { 1410 tcg_gen_and_i64(pd, pn, pm); 1411 tcg_gen_and_i64(pd, pd, pg); 1412 } 1413 1414 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1415 TCGv_vec pm, TCGv_vec pg) 1416 { 1417 tcg_gen_and_vec(vece, pd, pn, pm); 1418 tcg_gen_and_vec(vece, pd, pd, pg); 1419 } 1420 1421 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a) 1422 { 1423 static const GVecGen4 op = { 1424 .fni8 = gen_and_pg_i64, 1425 .fniv = gen_and_pg_vec, 1426 .fno = gen_helper_sve_and_pppp, 1427 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1428 }; 1429 1430 if (!dc_isar_feature(aa64_sve, s)) { 1431 return false; 1432 } 1433 if (!a->s) { 1434 if (a->rn == a->rm) { 1435 if (a->pg == a->rn) { 1436 return do_mov_p(s, a->rd, a->rn); 1437 } 1438 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg); 1439 } else if (a->pg == a->rn || a->pg == a->rm) { 1440 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm); 1441 } 1442 } 1443 return do_pppp_flags(s, a, &op); 1444 } 1445 1446 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1447 { 1448 tcg_gen_andc_i64(pd, pn, pm); 1449 tcg_gen_and_i64(pd, pd, pg); 1450 } 1451 1452 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1453 TCGv_vec pm, TCGv_vec pg) 1454 { 1455 tcg_gen_andc_vec(vece, pd, pn, pm); 1456 tcg_gen_and_vec(vece, pd, pd, pg); 1457 } 1458 1459 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a) 1460 { 1461 static const GVecGen4 op = { 1462 .fni8 = gen_bic_pg_i64, 1463 .fniv = gen_bic_pg_vec, 1464 .fno = gen_helper_sve_bic_pppp, 1465 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1466 }; 1467 1468 if (!dc_isar_feature(aa64_sve, s)) { 1469 return false; 1470 } 1471 if (!a->s && a->pg == a->rn) { 1472 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm); 1473 } 1474 return do_pppp_flags(s, a, &op); 1475 } 1476 1477 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1478 { 1479 tcg_gen_xor_i64(pd, pn, pm); 1480 tcg_gen_and_i64(pd, pd, pg); 1481 } 1482 1483 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1484 TCGv_vec pm, TCGv_vec pg) 1485 { 1486 tcg_gen_xor_vec(vece, pd, pn, pm); 1487 tcg_gen_and_vec(vece, pd, pd, pg); 1488 } 1489 1490 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a) 1491 { 1492 static const GVecGen4 op = { 1493 .fni8 = gen_eor_pg_i64, 1494 .fniv = gen_eor_pg_vec, 1495 .fno = gen_helper_sve_eor_pppp, 1496 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1497 }; 1498 1499 if (!dc_isar_feature(aa64_sve, s)) { 1500 return false; 1501 } 1502 /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */ 1503 if (!a->s && a->pg == a->rm) { 1504 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn); 1505 } 1506 return do_pppp_flags(s, a, &op); 1507 } 1508 1509 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a) 1510 { 1511 if (a->s || !dc_isar_feature(aa64_sve, s)) { 1512 return false; 1513 } 1514 if (sve_access_check(s)) { 1515 unsigned psz = pred_gvec_reg_size(s); 1516 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd), 1517 pred_full_reg_offset(s, a->pg), 1518 pred_full_reg_offset(s, a->rn), 1519 pred_full_reg_offset(s, a->rm), psz, psz); 1520 } 1521 return true; 1522 } 1523 1524 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1525 { 1526 tcg_gen_or_i64(pd, pn, pm); 1527 tcg_gen_and_i64(pd, pd, pg); 1528 } 1529 1530 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1531 TCGv_vec pm, TCGv_vec pg) 1532 { 1533 tcg_gen_or_vec(vece, pd, pn, pm); 1534 tcg_gen_and_vec(vece, pd, pd, pg); 1535 } 1536 1537 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a) 1538 { 1539 static const GVecGen4 op = { 1540 .fni8 = gen_orr_pg_i64, 1541 .fniv = gen_orr_pg_vec, 1542 .fno = gen_helper_sve_orr_pppp, 1543 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1544 }; 1545 1546 if (!dc_isar_feature(aa64_sve, s)) { 1547 return false; 1548 } 1549 if (!a->s && a->pg == a->rn && a->rn == a->rm) { 1550 return do_mov_p(s, a->rd, a->rn); 1551 } 1552 return do_pppp_flags(s, a, &op); 1553 } 1554 1555 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1556 { 1557 tcg_gen_orc_i64(pd, pn, pm); 1558 tcg_gen_and_i64(pd, pd, pg); 1559 } 1560 1561 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1562 TCGv_vec pm, TCGv_vec pg) 1563 { 1564 tcg_gen_orc_vec(vece, pd, pn, pm); 1565 tcg_gen_and_vec(vece, pd, pd, pg); 1566 } 1567 1568 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a) 1569 { 1570 static const GVecGen4 op = { 1571 .fni8 = gen_orn_pg_i64, 1572 .fniv = gen_orn_pg_vec, 1573 .fno = gen_helper_sve_orn_pppp, 1574 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1575 }; 1576 1577 if (!dc_isar_feature(aa64_sve, s)) { 1578 return false; 1579 } 1580 return do_pppp_flags(s, a, &op); 1581 } 1582 1583 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1584 { 1585 tcg_gen_or_i64(pd, pn, pm); 1586 tcg_gen_andc_i64(pd, pg, pd); 1587 } 1588 1589 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1590 TCGv_vec pm, TCGv_vec pg) 1591 { 1592 tcg_gen_or_vec(vece, pd, pn, pm); 1593 tcg_gen_andc_vec(vece, pd, pg, pd); 1594 } 1595 1596 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a) 1597 { 1598 static const GVecGen4 op = { 1599 .fni8 = gen_nor_pg_i64, 1600 .fniv = gen_nor_pg_vec, 1601 .fno = gen_helper_sve_nor_pppp, 1602 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1603 }; 1604 1605 if (!dc_isar_feature(aa64_sve, s)) { 1606 return false; 1607 } 1608 return do_pppp_flags(s, a, &op); 1609 } 1610 1611 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1612 { 1613 tcg_gen_and_i64(pd, pn, pm); 1614 tcg_gen_andc_i64(pd, pg, pd); 1615 } 1616 1617 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1618 TCGv_vec pm, TCGv_vec pg) 1619 { 1620 tcg_gen_and_vec(vece, pd, pn, pm); 1621 tcg_gen_andc_vec(vece, pd, pg, pd); 1622 } 1623 1624 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a) 1625 { 1626 static const GVecGen4 op = { 1627 .fni8 = gen_nand_pg_i64, 1628 .fniv = gen_nand_pg_vec, 1629 .fno = gen_helper_sve_nand_pppp, 1630 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1631 }; 1632 1633 if (!dc_isar_feature(aa64_sve, s)) { 1634 return false; 1635 } 1636 return do_pppp_flags(s, a, &op); 1637 } 1638 1639 /* 1640 *** SVE Predicate Misc Group 1641 */ 1642 1643 static bool trans_PTEST(DisasContext *s, arg_PTEST *a) 1644 { 1645 if (!dc_isar_feature(aa64_sve, s)) { 1646 return false; 1647 } 1648 if (sve_access_check(s)) { 1649 int nofs = pred_full_reg_offset(s, a->rn); 1650 int gofs = pred_full_reg_offset(s, a->pg); 1651 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8); 1652 1653 if (words == 1) { 1654 TCGv_i64 pn = tcg_temp_new_i64(); 1655 TCGv_i64 pg = tcg_temp_new_i64(); 1656 1657 tcg_gen_ld_i64(pn, cpu_env, nofs); 1658 tcg_gen_ld_i64(pg, cpu_env, gofs); 1659 do_predtest1(pn, pg); 1660 } else { 1661 do_predtest(s, nofs, gofs, words); 1662 } 1663 } 1664 return true; 1665 } 1666 1667 /* See the ARM pseudocode DecodePredCount. */ 1668 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz) 1669 { 1670 unsigned elements = fullsz >> esz; 1671 unsigned bound; 1672 1673 switch (pattern) { 1674 case 0x0: /* POW2 */ 1675 return pow2floor(elements); 1676 case 0x1: /* VL1 */ 1677 case 0x2: /* VL2 */ 1678 case 0x3: /* VL3 */ 1679 case 0x4: /* VL4 */ 1680 case 0x5: /* VL5 */ 1681 case 0x6: /* VL6 */ 1682 case 0x7: /* VL7 */ 1683 case 0x8: /* VL8 */ 1684 bound = pattern; 1685 break; 1686 case 0x9: /* VL16 */ 1687 case 0xa: /* VL32 */ 1688 case 0xb: /* VL64 */ 1689 case 0xc: /* VL128 */ 1690 case 0xd: /* VL256 */ 1691 bound = 16 << (pattern - 9); 1692 break; 1693 case 0x1d: /* MUL4 */ 1694 return elements - elements % 4; 1695 case 0x1e: /* MUL3 */ 1696 return elements - elements % 3; 1697 case 0x1f: /* ALL */ 1698 return elements; 1699 default: /* #uimm5 */ 1700 return 0; 1701 } 1702 return elements >= bound ? bound : 0; 1703 } 1704 1705 /* This handles all of the predicate initialization instructions, 1706 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32 1707 * so that decode_pred_count returns 0. For SETFFR, we will have 1708 * set RD == 16 == FFR. 1709 */ 1710 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag) 1711 { 1712 if (!sve_access_check(s)) { 1713 return true; 1714 } 1715 1716 unsigned fullsz = vec_full_reg_size(s); 1717 unsigned ofs = pred_full_reg_offset(s, rd); 1718 unsigned numelem, setsz, i; 1719 uint64_t word, lastword; 1720 TCGv_i64 t; 1721 1722 numelem = decode_pred_count(fullsz, pat, esz); 1723 1724 /* Determine what we must store into each bit, and how many. */ 1725 if (numelem == 0) { 1726 lastword = word = 0; 1727 setsz = fullsz; 1728 } else { 1729 setsz = numelem << esz; 1730 lastword = word = pred_esz_masks[esz]; 1731 if (setsz % 64) { 1732 lastword &= MAKE_64BIT_MASK(0, setsz % 64); 1733 } 1734 } 1735 1736 t = tcg_temp_new_i64(); 1737 if (fullsz <= 64) { 1738 tcg_gen_movi_i64(t, lastword); 1739 tcg_gen_st_i64(t, cpu_env, ofs); 1740 goto done; 1741 } 1742 1743 if (word == lastword) { 1744 unsigned maxsz = size_for_gvec(fullsz / 8); 1745 unsigned oprsz = size_for_gvec(setsz / 8); 1746 1747 if (oprsz * 8 == setsz) { 1748 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word); 1749 goto done; 1750 } 1751 } 1752 1753 setsz /= 8; 1754 fullsz /= 8; 1755 1756 tcg_gen_movi_i64(t, word); 1757 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) { 1758 tcg_gen_st_i64(t, cpu_env, ofs + i); 1759 } 1760 if (lastword != word) { 1761 tcg_gen_movi_i64(t, lastword); 1762 tcg_gen_st_i64(t, cpu_env, ofs + i); 1763 i += 8; 1764 } 1765 if (i < fullsz) { 1766 tcg_gen_movi_i64(t, 0); 1767 for (; i < fullsz; i += 8) { 1768 tcg_gen_st_i64(t, cpu_env, ofs + i); 1769 } 1770 } 1771 1772 done: 1773 /* PTRUES */ 1774 if (setflag) { 1775 tcg_gen_movi_i32(cpu_NF, -(word != 0)); 1776 tcg_gen_movi_i32(cpu_CF, word == 0); 1777 tcg_gen_movi_i32(cpu_VF, 0); 1778 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1779 } 1780 return true; 1781 } 1782 1783 TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s) 1784 1785 /* Note pat == 31 is #all, to set all elements. */ 1786 TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve, 1787 do_predset, 0, FFR_PRED_NUM, 31, false) 1788 1789 /* Note pat == 32 is #unimp, to set no elements. */ 1790 TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false) 1791 1792 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a) 1793 { 1794 /* The path through do_pppp_flags is complicated enough to want to avoid 1795 * duplication. Frob the arguments into the form of a predicated AND. 1796 */ 1797 arg_rprr_s alt_a = { 1798 .rd = a->rd, .pg = a->pg, .s = a->s, 1799 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM, 1800 }; 1801 1802 s->is_nonstreaming = true; 1803 return trans_AND_pppp(s, &alt_a); 1804 } 1805 1806 TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM) 1807 TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn) 1808 1809 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a, 1810 void (*gen_fn)(TCGv_i32, TCGv_ptr, 1811 TCGv_ptr, TCGv_i32)) 1812 { 1813 if (!sve_access_check(s)) { 1814 return true; 1815 } 1816 1817 TCGv_ptr t_pd = tcg_temp_new_ptr(); 1818 TCGv_ptr t_pg = tcg_temp_new_ptr(); 1819 TCGv_i32 t; 1820 unsigned desc = 0; 1821 1822 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 1823 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 1824 1825 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd)); 1826 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn)); 1827 t = tcg_temp_new_i32(); 1828 1829 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc)); 1830 1831 do_pred_flags(t); 1832 return true; 1833 } 1834 1835 TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst) 1836 TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext) 1837 1838 /* 1839 *** SVE Element Count Group 1840 */ 1841 1842 /* Perform an inline saturating addition of a 32-bit value within 1843 * a 64-bit register. The second operand is known to be positive, 1844 * which halves the comparisions we must perform to bound the result. 1845 */ 1846 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1847 { 1848 int64_t ibound; 1849 1850 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 1851 if (u) { 1852 tcg_gen_ext32u_i64(reg, reg); 1853 } else { 1854 tcg_gen_ext32s_i64(reg, reg); 1855 } 1856 if (d) { 1857 tcg_gen_sub_i64(reg, reg, val); 1858 ibound = (u ? 0 : INT32_MIN); 1859 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound)); 1860 } else { 1861 tcg_gen_add_i64(reg, reg, val); 1862 ibound = (u ? UINT32_MAX : INT32_MAX); 1863 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound)); 1864 } 1865 } 1866 1867 /* Similarly with 64-bit values. */ 1868 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1869 { 1870 TCGv_i64 t0 = tcg_temp_new_i64(); 1871 TCGv_i64 t2; 1872 1873 if (u) { 1874 if (d) { 1875 tcg_gen_sub_i64(t0, reg, val); 1876 t2 = tcg_constant_i64(0); 1877 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0); 1878 } else { 1879 tcg_gen_add_i64(t0, reg, val); 1880 t2 = tcg_constant_i64(-1); 1881 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0); 1882 } 1883 } else { 1884 TCGv_i64 t1 = tcg_temp_new_i64(); 1885 if (d) { 1886 /* Detect signed overflow for subtraction. */ 1887 tcg_gen_xor_i64(t0, reg, val); 1888 tcg_gen_sub_i64(t1, reg, val); 1889 tcg_gen_xor_i64(reg, reg, t1); 1890 tcg_gen_and_i64(t0, t0, reg); 1891 1892 /* Bound the result. */ 1893 tcg_gen_movi_i64(reg, INT64_MIN); 1894 t2 = tcg_constant_i64(0); 1895 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1); 1896 } else { 1897 /* Detect signed overflow for addition. */ 1898 tcg_gen_xor_i64(t0, reg, val); 1899 tcg_gen_add_i64(reg, reg, val); 1900 tcg_gen_xor_i64(t1, reg, val); 1901 tcg_gen_andc_i64(t0, t1, t0); 1902 1903 /* Bound the result. */ 1904 tcg_gen_movi_i64(t1, INT64_MAX); 1905 t2 = tcg_constant_i64(0); 1906 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg); 1907 } 1908 } 1909 } 1910 1911 /* Similarly with a vector and a scalar operand. */ 1912 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn, 1913 TCGv_i64 val, bool u, bool d) 1914 { 1915 unsigned vsz = vec_full_reg_size(s); 1916 TCGv_ptr dptr, nptr; 1917 TCGv_i32 t32, desc; 1918 TCGv_i64 t64; 1919 1920 dptr = tcg_temp_new_ptr(); 1921 nptr = tcg_temp_new_ptr(); 1922 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd)); 1923 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn)); 1924 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1925 1926 switch (esz) { 1927 case MO_8: 1928 t32 = tcg_temp_new_i32(); 1929 tcg_gen_extrl_i64_i32(t32, val); 1930 if (d) { 1931 tcg_gen_neg_i32(t32, t32); 1932 } 1933 if (u) { 1934 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc); 1935 } else { 1936 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc); 1937 } 1938 break; 1939 1940 case MO_16: 1941 t32 = tcg_temp_new_i32(); 1942 tcg_gen_extrl_i64_i32(t32, val); 1943 if (d) { 1944 tcg_gen_neg_i32(t32, t32); 1945 } 1946 if (u) { 1947 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc); 1948 } else { 1949 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc); 1950 } 1951 break; 1952 1953 case MO_32: 1954 t64 = tcg_temp_new_i64(); 1955 if (d) { 1956 tcg_gen_neg_i64(t64, val); 1957 } else { 1958 tcg_gen_mov_i64(t64, val); 1959 } 1960 if (u) { 1961 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc); 1962 } else { 1963 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc); 1964 } 1965 break; 1966 1967 case MO_64: 1968 if (u) { 1969 if (d) { 1970 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc); 1971 } else { 1972 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc); 1973 } 1974 } else if (d) { 1975 t64 = tcg_temp_new_i64(); 1976 tcg_gen_neg_i64(t64, val); 1977 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc); 1978 } else { 1979 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc); 1980 } 1981 break; 1982 1983 default: 1984 g_assert_not_reached(); 1985 } 1986 } 1987 1988 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a) 1989 { 1990 if (!dc_isar_feature(aa64_sve, s)) { 1991 return false; 1992 } 1993 if (sve_access_check(s)) { 1994 unsigned fullsz = vec_full_reg_size(s); 1995 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1996 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm); 1997 } 1998 return true; 1999 } 2000 2001 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a) 2002 { 2003 if (!dc_isar_feature(aa64_sve, s)) { 2004 return false; 2005 } 2006 if (sve_access_check(s)) { 2007 unsigned fullsz = vec_full_reg_size(s); 2008 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2009 int inc = numelem * a->imm * (a->d ? -1 : 1); 2010 TCGv_i64 reg = cpu_reg(s, a->rd); 2011 2012 tcg_gen_addi_i64(reg, reg, inc); 2013 } 2014 return true; 2015 } 2016 2017 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a) 2018 { 2019 if (!dc_isar_feature(aa64_sve, s)) { 2020 return false; 2021 } 2022 if (!sve_access_check(s)) { 2023 return true; 2024 } 2025 2026 unsigned fullsz = vec_full_reg_size(s); 2027 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2028 int inc = numelem * a->imm; 2029 TCGv_i64 reg = cpu_reg(s, a->rd); 2030 2031 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 2032 if (inc == 0) { 2033 if (a->u) { 2034 tcg_gen_ext32u_i64(reg, reg); 2035 } else { 2036 tcg_gen_ext32s_i64(reg, reg); 2037 } 2038 } else { 2039 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d); 2040 } 2041 return true; 2042 } 2043 2044 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a) 2045 { 2046 if (!dc_isar_feature(aa64_sve, s)) { 2047 return false; 2048 } 2049 if (!sve_access_check(s)) { 2050 return true; 2051 } 2052 2053 unsigned fullsz = vec_full_reg_size(s); 2054 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2055 int inc = numelem * a->imm; 2056 TCGv_i64 reg = cpu_reg(s, a->rd); 2057 2058 if (inc != 0) { 2059 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d); 2060 } 2061 return true; 2062 } 2063 2064 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 2065 { 2066 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2067 return false; 2068 } 2069 2070 unsigned fullsz = vec_full_reg_size(s); 2071 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2072 int inc = numelem * a->imm; 2073 2074 if (inc != 0) { 2075 if (sve_access_check(s)) { 2076 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd), 2077 vec_full_reg_offset(s, a->rn), 2078 tcg_constant_i64(a->d ? -inc : inc), 2079 fullsz, fullsz); 2080 } 2081 } else { 2082 do_mov_z(s, a->rd, a->rn); 2083 } 2084 return true; 2085 } 2086 2087 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 2088 { 2089 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2090 return false; 2091 } 2092 2093 unsigned fullsz = vec_full_reg_size(s); 2094 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2095 int inc = numelem * a->imm; 2096 2097 if (inc != 0) { 2098 if (sve_access_check(s)) { 2099 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 2100 tcg_constant_i64(inc), a->u, a->d); 2101 } 2102 } else { 2103 do_mov_z(s, a->rd, a->rn); 2104 } 2105 return true; 2106 } 2107 2108 /* 2109 *** SVE Bitwise Immediate Group 2110 */ 2111 2112 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn) 2113 { 2114 uint64_t imm; 2115 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2116 extract32(a->dbm, 0, 6), 2117 extract32(a->dbm, 6, 6))) { 2118 return false; 2119 } 2120 return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm); 2121 } 2122 2123 TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi) 2124 TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori) 2125 TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori) 2126 2127 static bool trans_DUPM(DisasContext *s, arg_DUPM *a) 2128 { 2129 uint64_t imm; 2130 2131 if (!dc_isar_feature(aa64_sve, s)) { 2132 return false; 2133 } 2134 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2135 extract32(a->dbm, 0, 6), 2136 extract32(a->dbm, 6, 6))) { 2137 return false; 2138 } 2139 if (sve_access_check(s)) { 2140 do_dupi_z(s, a->rd, imm); 2141 } 2142 return true; 2143 } 2144 2145 /* 2146 *** SVE Integer Wide Immediate - Predicated Group 2147 */ 2148 2149 /* Implement all merging copies. This is used for CPY (immediate), 2150 * FCPY, CPY (scalar), CPY (SIMD&FP scalar). 2151 */ 2152 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg, 2153 TCGv_i64 val) 2154 { 2155 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2156 static gen_cpy * const fns[4] = { 2157 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h, 2158 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d, 2159 }; 2160 unsigned vsz = vec_full_reg_size(s); 2161 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2162 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2163 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2164 TCGv_ptr t_pg = tcg_temp_new_ptr(); 2165 2166 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd)); 2167 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn)); 2168 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 2169 2170 fns[esz](t_zd, t_zn, t_pg, val, desc); 2171 } 2172 2173 static bool trans_FCPY(DisasContext *s, arg_FCPY *a) 2174 { 2175 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2176 return false; 2177 } 2178 if (sve_access_check(s)) { 2179 /* Decode the VFP immediate. */ 2180 uint64_t imm = vfp_expand_imm(a->esz, a->imm); 2181 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm)); 2182 } 2183 return true; 2184 } 2185 2186 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a) 2187 { 2188 if (!dc_isar_feature(aa64_sve, s)) { 2189 return false; 2190 } 2191 if (sve_access_check(s)) { 2192 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm)); 2193 } 2194 return true; 2195 } 2196 2197 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a) 2198 { 2199 static gen_helper_gvec_2i * const fns[4] = { 2200 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h, 2201 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d, 2202 }; 2203 2204 if (!dc_isar_feature(aa64_sve, s)) { 2205 return false; 2206 } 2207 if (sve_access_check(s)) { 2208 unsigned vsz = vec_full_reg_size(s); 2209 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 2210 pred_full_reg_offset(s, a->pg), 2211 tcg_constant_i64(a->imm), 2212 vsz, vsz, 0, fns[a->esz]); 2213 } 2214 return true; 2215 } 2216 2217 /* 2218 *** SVE Permute Extract Group 2219 */ 2220 2221 static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm) 2222 { 2223 if (!sve_access_check(s)) { 2224 return true; 2225 } 2226 2227 unsigned vsz = vec_full_reg_size(s); 2228 unsigned n_ofs = imm >= vsz ? 0 : imm; 2229 unsigned n_siz = vsz - n_ofs; 2230 unsigned d = vec_full_reg_offset(s, rd); 2231 unsigned n = vec_full_reg_offset(s, rn); 2232 unsigned m = vec_full_reg_offset(s, rm); 2233 2234 /* Use host vector move insns if we have appropriate sizes 2235 * and no unfortunate overlap. 2236 */ 2237 if (m != d 2238 && n_ofs == size_for_gvec(n_ofs) 2239 && n_siz == size_for_gvec(n_siz) 2240 && (d != n || n_siz <= n_ofs)) { 2241 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz); 2242 if (n_ofs != 0) { 2243 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs); 2244 } 2245 } else { 2246 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext); 2247 } 2248 return true; 2249 } 2250 2251 TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm) 2252 TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm) 2253 2254 /* 2255 *** SVE Permute - Unpredicated Group 2256 */ 2257 2258 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a) 2259 { 2260 if (!dc_isar_feature(aa64_sve, s)) { 2261 return false; 2262 } 2263 if (sve_access_check(s)) { 2264 unsigned vsz = vec_full_reg_size(s); 2265 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd), 2266 vsz, vsz, cpu_reg_sp(s, a->rn)); 2267 } 2268 return true; 2269 } 2270 2271 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a) 2272 { 2273 if (!dc_isar_feature(aa64_sve, s)) { 2274 return false; 2275 } 2276 if ((a->imm & 0x1f) == 0) { 2277 return false; 2278 } 2279 if (sve_access_check(s)) { 2280 unsigned vsz = vec_full_reg_size(s); 2281 unsigned dofs = vec_full_reg_offset(s, a->rd); 2282 unsigned esz, index; 2283 2284 esz = ctz32(a->imm); 2285 index = a->imm >> (esz + 1); 2286 2287 if ((index << esz) < vsz) { 2288 unsigned nofs = vec_reg_offset(s, a->rn, index, esz); 2289 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz); 2290 } else { 2291 /* 2292 * While dup_mem handles 128-bit elements, dup_imm does not. 2293 * Thankfully element size doesn't matter for splatting zero. 2294 */ 2295 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); 2296 } 2297 } 2298 return true; 2299 } 2300 2301 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val) 2302 { 2303 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2304 static gen_insr * const fns[4] = { 2305 gen_helper_sve_insr_b, gen_helper_sve_insr_h, 2306 gen_helper_sve_insr_s, gen_helper_sve_insr_d, 2307 }; 2308 unsigned vsz = vec_full_reg_size(s); 2309 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2310 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2311 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2312 2313 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd)); 2314 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); 2315 2316 fns[a->esz](t_zd, t_zn, val, desc); 2317 } 2318 2319 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a) 2320 { 2321 if (!dc_isar_feature(aa64_sve, s)) { 2322 return false; 2323 } 2324 if (sve_access_check(s)) { 2325 TCGv_i64 t = tcg_temp_new_i64(); 2326 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64)); 2327 do_insr_i64(s, a, t); 2328 } 2329 return true; 2330 } 2331 2332 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a) 2333 { 2334 if (!dc_isar_feature(aa64_sve, s)) { 2335 return false; 2336 } 2337 if (sve_access_check(s)) { 2338 do_insr_i64(s, a, cpu_reg(s, a->rm)); 2339 } 2340 return true; 2341 } 2342 2343 static gen_helper_gvec_2 * const rev_fns[4] = { 2344 gen_helper_sve_rev_b, gen_helper_sve_rev_h, 2345 gen_helper_sve_rev_s, gen_helper_sve_rev_d 2346 }; 2347 TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0) 2348 2349 static gen_helper_gvec_3 * const sve_tbl_fns[4] = { 2350 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h, 2351 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d 2352 }; 2353 TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0) 2354 2355 static gen_helper_gvec_4 * const sve2_tbl_fns[4] = { 2356 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h, 2357 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d 2358 }; 2359 TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz], 2360 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0) 2361 2362 static gen_helper_gvec_3 * const tbx_fns[4] = { 2363 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h, 2364 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d 2365 }; 2366 TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0) 2367 2368 static bool trans_UNPK(DisasContext *s, arg_UNPK *a) 2369 { 2370 static gen_helper_gvec_2 * const fns[4][2] = { 2371 { NULL, NULL }, 2372 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h }, 2373 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s }, 2374 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d }, 2375 }; 2376 2377 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2378 return false; 2379 } 2380 if (sve_access_check(s)) { 2381 unsigned vsz = vec_full_reg_size(s); 2382 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd), 2383 vec_full_reg_offset(s, a->rn) 2384 + (a->h ? vsz / 2 : 0), 2385 vsz, vsz, 0, fns[a->esz][a->u]); 2386 } 2387 return true; 2388 } 2389 2390 /* 2391 *** SVE Permute - Predicates Group 2392 */ 2393 2394 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd, 2395 gen_helper_gvec_3 *fn) 2396 { 2397 if (!sve_access_check(s)) { 2398 return true; 2399 } 2400 2401 unsigned vsz = pred_full_reg_size(s); 2402 2403 TCGv_ptr t_d = tcg_temp_new_ptr(); 2404 TCGv_ptr t_n = tcg_temp_new_ptr(); 2405 TCGv_ptr t_m = tcg_temp_new_ptr(); 2406 uint32_t desc = 0; 2407 2408 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2409 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2410 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2411 2412 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd)); 2413 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn)); 2414 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm)); 2415 2416 fn(t_d, t_n, t_m, tcg_constant_i32(desc)); 2417 return true; 2418 } 2419 2420 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd, 2421 gen_helper_gvec_2 *fn) 2422 { 2423 if (!sve_access_check(s)) { 2424 return true; 2425 } 2426 2427 unsigned vsz = pred_full_reg_size(s); 2428 TCGv_ptr t_d = tcg_temp_new_ptr(); 2429 TCGv_ptr t_n = tcg_temp_new_ptr(); 2430 uint32_t desc = 0; 2431 2432 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd)); 2433 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn)); 2434 2435 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2436 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2437 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2438 2439 fn(t_d, t_n, tcg_constant_i32(desc)); 2440 return true; 2441 } 2442 2443 TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p) 2444 TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p) 2445 TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p) 2446 TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p) 2447 TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p) 2448 TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p) 2449 2450 TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p) 2451 TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p) 2452 TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p) 2453 2454 /* 2455 *** SVE Permute - Interleaving Group 2456 */ 2457 2458 static gen_helper_gvec_3 * const zip_fns[4] = { 2459 gen_helper_sve_zip_b, gen_helper_sve_zip_h, 2460 gen_helper_sve_zip_s, gen_helper_sve_zip_d, 2461 }; 2462 TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2463 zip_fns[a->esz], a, 0) 2464 TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2465 zip_fns[a->esz], a, vec_full_reg_size(s) / 2) 2466 2467 TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2468 gen_helper_sve2_zip_q, a, 0) 2469 TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2470 gen_helper_sve2_zip_q, a, 2471 QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2) 2472 2473 static gen_helper_gvec_3 * const uzp_fns[4] = { 2474 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h, 2475 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d, 2476 }; 2477 2478 TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2479 uzp_fns[a->esz], a, 0) 2480 TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2481 uzp_fns[a->esz], a, 1 << a->esz) 2482 2483 TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2484 gen_helper_sve2_uzp_q, a, 0) 2485 TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2486 gen_helper_sve2_uzp_q, a, 16) 2487 2488 static gen_helper_gvec_3 * const trn_fns[4] = { 2489 gen_helper_sve_trn_b, gen_helper_sve_trn_h, 2490 gen_helper_sve_trn_s, gen_helper_sve_trn_d, 2491 }; 2492 2493 TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2494 trn_fns[a->esz], a, 0) 2495 TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2496 trn_fns[a->esz], a, 1 << a->esz) 2497 2498 TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2499 gen_helper_sve2_trn_q, a, 0) 2500 TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2501 gen_helper_sve2_trn_q, a, 16) 2502 2503 /* 2504 *** SVE Permute Vector - Predicated Group 2505 */ 2506 2507 static gen_helper_gvec_3 * const compact_fns[4] = { 2508 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d 2509 }; 2510 TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, 2511 compact_fns[a->esz], a, 0) 2512 2513 /* Call the helper that computes the ARM LastActiveElement pseudocode 2514 * function, scaled by the element size. This includes the not found 2515 * indication; e.g. not found for esz=3 is -8. 2516 */ 2517 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg) 2518 { 2519 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot 2520 * round up, as we do elsewhere, because we need the exact size. 2521 */ 2522 TCGv_ptr t_p = tcg_temp_new_ptr(); 2523 unsigned desc = 0; 2524 2525 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 2526 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 2527 2528 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg)); 2529 2530 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc)); 2531 } 2532 2533 /* Increment LAST to the offset of the next element in the vector, 2534 * wrapping around to 0. 2535 */ 2536 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz) 2537 { 2538 unsigned vsz = vec_full_reg_size(s); 2539 2540 tcg_gen_addi_i32(last, last, 1 << esz); 2541 if (is_power_of_2(vsz)) { 2542 tcg_gen_andi_i32(last, last, vsz - 1); 2543 } else { 2544 TCGv_i32 max = tcg_constant_i32(vsz); 2545 TCGv_i32 zero = tcg_constant_i32(0); 2546 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last); 2547 } 2548 } 2549 2550 /* If LAST < 0, set LAST to the offset of the last element in the vector. */ 2551 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz) 2552 { 2553 unsigned vsz = vec_full_reg_size(s); 2554 2555 if (is_power_of_2(vsz)) { 2556 tcg_gen_andi_i32(last, last, vsz - 1); 2557 } else { 2558 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz)); 2559 TCGv_i32 zero = tcg_constant_i32(0); 2560 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last); 2561 } 2562 } 2563 2564 /* Load an unsigned element of ESZ from BASE+OFS. */ 2565 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz) 2566 { 2567 TCGv_i64 r = tcg_temp_new_i64(); 2568 2569 switch (esz) { 2570 case 0: 2571 tcg_gen_ld8u_i64(r, base, ofs); 2572 break; 2573 case 1: 2574 tcg_gen_ld16u_i64(r, base, ofs); 2575 break; 2576 case 2: 2577 tcg_gen_ld32u_i64(r, base, ofs); 2578 break; 2579 case 3: 2580 tcg_gen_ld_i64(r, base, ofs); 2581 break; 2582 default: 2583 g_assert_not_reached(); 2584 } 2585 return r; 2586 } 2587 2588 /* Load an unsigned element of ESZ from RM[LAST]. */ 2589 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last, 2590 int rm, int esz) 2591 { 2592 TCGv_ptr p = tcg_temp_new_ptr(); 2593 2594 /* Convert offset into vector into offset into ENV. 2595 * The final adjustment for the vector register base 2596 * is added via constant offset to the load. 2597 */ 2598 #if HOST_BIG_ENDIAN 2599 /* Adjust for element ordering. See vec_reg_offset. */ 2600 if (esz < 3) { 2601 tcg_gen_xori_i32(last, last, 8 - (1 << esz)); 2602 } 2603 #endif 2604 tcg_gen_ext_i32_ptr(p, last); 2605 tcg_gen_add_ptr(p, p, cpu_env); 2606 2607 return load_esz(p, vec_full_reg_offset(s, rm), esz); 2608 } 2609 2610 /* Compute CLAST for a Zreg. */ 2611 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before) 2612 { 2613 TCGv_i32 last; 2614 TCGLabel *over; 2615 TCGv_i64 ele; 2616 unsigned vsz, esz = a->esz; 2617 2618 if (!sve_access_check(s)) { 2619 return true; 2620 } 2621 2622 last = tcg_temp_new_i32(); 2623 over = gen_new_label(); 2624 2625 find_last_active(s, last, esz, a->pg); 2626 2627 /* There is of course no movcond for a 2048-bit vector, 2628 * so we must branch over the actual store. 2629 */ 2630 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over); 2631 2632 if (!before) { 2633 incr_last_active(s, last, esz); 2634 } 2635 2636 ele = load_last_active(s, last, a->rm, esz); 2637 2638 vsz = vec_full_reg_size(s); 2639 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele); 2640 2641 /* If this insn used MOVPRFX, we may need a second move. */ 2642 if (a->rd != a->rn) { 2643 TCGLabel *done = gen_new_label(); 2644 tcg_gen_br(done); 2645 2646 gen_set_label(over); 2647 do_mov_z(s, a->rd, a->rn); 2648 2649 gen_set_label(done); 2650 } else { 2651 gen_set_label(over); 2652 } 2653 return true; 2654 } 2655 2656 TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false) 2657 TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true) 2658 2659 /* Compute CLAST for a scalar. */ 2660 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm, 2661 bool before, TCGv_i64 reg_val) 2662 { 2663 TCGv_i32 last = tcg_temp_new_i32(); 2664 TCGv_i64 ele, cmp; 2665 2666 find_last_active(s, last, esz, pg); 2667 2668 /* Extend the original value of last prior to incrementing. */ 2669 cmp = tcg_temp_new_i64(); 2670 tcg_gen_ext_i32_i64(cmp, last); 2671 2672 if (!before) { 2673 incr_last_active(s, last, esz); 2674 } 2675 2676 /* The conceit here is that while last < 0 indicates not found, after 2677 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address 2678 * from which we can load garbage. We then discard the garbage with 2679 * a conditional move. 2680 */ 2681 ele = load_last_active(s, last, rm, esz); 2682 2683 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0), 2684 ele, reg_val); 2685 } 2686 2687 /* Compute CLAST for a Vreg. */ 2688 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2689 { 2690 if (sve_access_check(s)) { 2691 int esz = a->esz; 2692 int ofs = vec_reg_offset(s, a->rd, 0, esz); 2693 TCGv_i64 reg = load_esz(cpu_env, ofs, esz); 2694 2695 do_clast_scalar(s, esz, a->pg, a->rn, before, reg); 2696 write_fp_dreg(s, a->rd, reg); 2697 } 2698 return true; 2699 } 2700 2701 TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false) 2702 TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true) 2703 2704 /* Compute CLAST for a Xreg. */ 2705 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before) 2706 { 2707 TCGv_i64 reg; 2708 2709 if (!sve_access_check(s)) { 2710 return true; 2711 } 2712 2713 reg = cpu_reg(s, a->rd); 2714 switch (a->esz) { 2715 case 0: 2716 tcg_gen_ext8u_i64(reg, reg); 2717 break; 2718 case 1: 2719 tcg_gen_ext16u_i64(reg, reg); 2720 break; 2721 case 2: 2722 tcg_gen_ext32u_i64(reg, reg); 2723 break; 2724 case 3: 2725 break; 2726 default: 2727 g_assert_not_reached(); 2728 } 2729 2730 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg); 2731 return true; 2732 } 2733 2734 TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false) 2735 TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true) 2736 2737 /* Compute LAST for a scalar. */ 2738 static TCGv_i64 do_last_scalar(DisasContext *s, int esz, 2739 int pg, int rm, bool before) 2740 { 2741 TCGv_i32 last = tcg_temp_new_i32(); 2742 2743 find_last_active(s, last, esz, pg); 2744 if (before) { 2745 wrap_last_active(s, last, esz); 2746 } else { 2747 incr_last_active(s, last, esz); 2748 } 2749 2750 return load_last_active(s, last, rm, esz); 2751 } 2752 2753 /* Compute LAST for a Vreg. */ 2754 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2755 { 2756 if (sve_access_check(s)) { 2757 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2758 write_fp_dreg(s, a->rd, val); 2759 } 2760 return true; 2761 } 2762 2763 TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false) 2764 TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true) 2765 2766 /* Compute LAST for a Xreg. */ 2767 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before) 2768 { 2769 if (sve_access_check(s)) { 2770 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2771 tcg_gen_mov_i64(cpu_reg(s, a->rd), val); 2772 } 2773 return true; 2774 } 2775 2776 TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false) 2777 TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true) 2778 2779 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a) 2780 { 2781 if (!dc_isar_feature(aa64_sve, s)) { 2782 return false; 2783 } 2784 if (sve_access_check(s)) { 2785 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn)); 2786 } 2787 return true; 2788 } 2789 2790 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a) 2791 { 2792 if (!dc_isar_feature(aa64_sve, s)) { 2793 return false; 2794 } 2795 if (sve_access_check(s)) { 2796 int ofs = vec_reg_offset(s, a->rn, 0, a->esz); 2797 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz); 2798 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t); 2799 } 2800 return true; 2801 } 2802 2803 static gen_helper_gvec_3 * const revb_fns[4] = { 2804 NULL, gen_helper_sve_revb_h, 2805 gen_helper_sve_revb_s, gen_helper_sve_revb_d, 2806 }; 2807 TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0) 2808 2809 static gen_helper_gvec_3 * const revh_fns[4] = { 2810 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d, 2811 }; 2812 TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0) 2813 2814 TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz, 2815 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0) 2816 2817 TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0) 2818 2819 TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz, 2820 gen_helper_sve_splice, a, a->esz) 2821 2822 TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice, 2823 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz) 2824 2825 /* 2826 *** SVE Integer Compare - Vectors Group 2827 */ 2828 2829 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a, 2830 gen_helper_gvec_flags_4 *gen_fn) 2831 { 2832 TCGv_ptr pd, zn, zm, pg; 2833 unsigned vsz; 2834 TCGv_i32 t; 2835 2836 if (gen_fn == NULL) { 2837 return false; 2838 } 2839 if (!sve_access_check(s)) { 2840 return true; 2841 } 2842 2843 vsz = vec_full_reg_size(s); 2844 t = tcg_temp_new_i32(); 2845 pd = tcg_temp_new_ptr(); 2846 zn = tcg_temp_new_ptr(); 2847 zm = tcg_temp_new_ptr(); 2848 pg = tcg_temp_new_ptr(); 2849 2850 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd)); 2851 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn)); 2852 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm)); 2853 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg)); 2854 2855 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0))); 2856 2857 do_pred_flags(t); 2858 return true; 2859 } 2860 2861 #define DO_PPZZ(NAME, name) \ 2862 static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \ 2863 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \ 2864 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \ 2865 }; \ 2866 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \ 2867 a, name##_ppzz_fns[a->esz]) 2868 2869 DO_PPZZ(CMPEQ, cmpeq) 2870 DO_PPZZ(CMPNE, cmpne) 2871 DO_PPZZ(CMPGT, cmpgt) 2872 DO_PPZZ(CMPGE, cmpge) 2873 DO_PPZZ(CMPHI, cmphi) 2874 DO_PPZZ(CMPHS, cmphs) 2875 2876 #undef DO_PPZZ 2877 2878 #define DO_PPZW(NAME, name) \ 2879 static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \ 2880 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \ 2881 gen_helper_sve_##name##_ppzw_s, NULL \ 2882 }; \ 2883 TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \ 2884 a, name##_ppzw_fns[a->esz]) 2885 2886 DO_PPZW(CMPEQ, cmpeq) 2887 DO_PPZW(CMPNE, cmpne) 2888 DO_PPZW(CMPGT, cmpgt) 2889 DO_PPZW(CMPGE, cmpge) 2890 DO_PPZW(CMPHI, cmphi) 2891 DO_PPZW(CMPHS, cmphs) 2892 DO_PPZW(CMPLT, cmplt) 2893 DO_PPZW(CMPLE, cmple) 2894 DO_PPZW(CMPLO, cmplo) 2895 DO_PPZW(CMPLS, cmpls) 2896 2897 #undef DO_PPZW 2898 2899 /* 2900 *** SVE Integer Compare - Immediate Groups 2901 */ 2902 2903 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a, 2904 gen_helper_gvec_flags_3 *gen_fn) 2905 { 2906 TCGv_ptr pd, zn, pg; 2907 unsigned vsz; 2908 TCGv_i32 t; 2909 2910 if (gen_fn == NULL) { 2911 return false; 2912 } 2913 if (!sve_access_check(s)) { 2914 return true; 2915 } 2916 2917 vsz = vec_full_reg_size(s); 2918 t = tcg_temp_new_i32(); 2919 pd = tcg_temp_new_ptr(); 2920 zn = tcg_temp_new_ptr(); 2921 pg = tcg_temp_new_ptr(); 2922 2923 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd)); 2924 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn)); 2925 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg)); 2926 2927 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm))); 2928 2929 do_pred_flags(t); 2930 return true; 2931 } 2932 2933 #define DO_PPZI(NAME, name) \ 2934 static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \ 2935 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \ 2936 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \ 2937 }; \ 2938 TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \ 2939 name##_ppzi_fns[a->esz]) 2940 2941 DO_PPZI(CMPEQ, cmpeq) 2942 DO_PPZI(CMPNE, cmpne) 2943 DO_PPZI(CMPGT, cmpgt) 2944 DO_PPZI(CMPGE, cmpge) 2945 DO_PPZI(CMPHI, cmphi) 2946 DO_PPZI(CMPHS, cmphs) 2947 DO_PPZI(CMPLT, cmplt) 2948 DO_PPZI(CMPLE, cmple) 2949 DO_PPZI(CMPLO, cmplo) 2950 DO_PPZI(CMPLS, cmpls) 2951 2952 #undef DO_PPZI 2953 2954 /* 2955 *** SVE Partition Break Group 2956 */ 2957 2958 static bool do_brk3(DisasContext *s, arg_rprr_s *a, 2959 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s) 2960 { 2961 if (!sve_access_check(s)) { 2962 return true; 2963 } 2964 2965 unsigned vsz = pred_full_reg_size(s); 2966 2967 /* Predicate sizes may be smaller and cannot use simd_desc. */ 2968 TCGv_ptr d = tcg_temp_new_ptr(); 2969 TCGv_ptr n = tcg_temp_new_ptr(); 2970 TCGv_ptr m = tcg_temp_new_ptr(); 2971 TCGv_ptr g = tcg_temp_new_ptr(); 2972 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 2973 2974 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd)); 2975 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn)); 2976 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm)); 2977 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg)); 2978 2979 if (a->s) { 2980 TCGv_i32 t = tcg_temp_new_i32(); 2981 fn_s(t, d, n, m, g, desc); 2982 do_pred_flags(t); 2983 } else { 2984 fn(d, n, m, g, desc); 2985 } 2986 return true; 2987 } 2988 2989 static bool do_brk2(DisasContext *s, arg_rpr_s *a, 2990 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s) 2991 { 2992 if (!sve_access_check(s)) { 2993 return true; 2994 } 2995 2996 unsigned vsz = pred_full_reg_size(s); 2997 2998 /* Predicate sizes may be smaller and cannot use simd_desc. */ 2999 TCGv_ptr d = tcg_temp_new_ptr(); 3000 TCGv_ptr n = tcg_temp_new_ptr(); 3001 TCGv_ptr g = tcg_temp_new_ptr(); 3002 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 3003 3004 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd)); 3005 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn)); 3006 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg)); 3007 3008 if (a->s) { 3009 TCGv_i32 t = tcg_temp_new_i32(); 3010 fn_s(t, d, n, g, desc); 3011 do_pred_flags(t); 3012 } else { 3013 fn(d, n, g, desc); 3014 } 3015 return true; 3016 } 3017 3018 TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a, 3019 gen_helper_sve_brkpa, gen_helper_sve_brkpas) 3020 TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a, 3021 gen_helper_sve_brkpb, gen_helper_sve_brkpbs) 3022 3023 TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a, 3024 gen_helper_sve_brka_m, gen_helper_sve_brkas_m) 3025 TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a, 3026 gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m) 3027 3028 TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a, 3029 gen_helper_sve_brka_z, gen_helper_sve_brkas_z) 3030 TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a, 3031 gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z) 3032 3033 TRANS_FEAT(BRKN, aa64_sve, do_brk2, a, 3034 gen_helper_sve_brkn, gen_helper_sve_brkns) 3035 3036 /* 3037 *** SVE Predicate Count Group 3038 */ 3039 3040 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg) 3041 { 3042 unsigned psz = pred_full_reg_size(s); 3043 3044 if (psz <= 8) { 3045 uint64_t psz_mask; 3046 3047 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn)); 3048 if (pn != pg) { 3049 TCGv_i64 g = tcg_temp_new_i64(); 3050 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg)); 3051 tcg_gen_and_i64(val, val, g); 3052 } 3053 3054 /* Reduce the pred_esz_masks value simply to reduce the 3055 * size of the code generated here. 3056 */ 3057 psz_mask = MAKE_64BIT_MASK(0, psz * 8); 3058 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask); 3059 3060 tcg_gen_ctpop_i64(val, val); 3061 } else { 3062 TCGv_ptr t_pn = tcg_temp_new_ptr(); 3063 TCGv_ptr t_pg = tcg_temp_new_ptr(); 3064 unsigned desc = 0; 3065 3066 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz); 3067 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 3068 3069 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn)); 3070 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 3071 3072 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc)); 3073 } 3074 } 3075 3076 static bool trans_CNTP(DisasContext *s, arg_CNTP *a) 3077 { 3078 if (!dc_isar_feature(aa64_sve, s)) { 3079 return false; 3080 } 3081 if (sve_access_check(s)) { 3082 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg); 3083 } 3084 return true; 3085 } 3086 3087 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a) 3088 { 3089 if (!dc_isar_feature(aa64_sve, s)) { 3090 return false; 3091 } 3092 if (sve_access_check(s)) { 3093 TCGv_i64 reg = cpu_reg(s, a->rd); 3094 TCGv_i64 val = tcg_temp_new_i64(); 3095 3096 do_cntp(s, val, a->esz, a->pg, a->pg); 3097 if (a->d) { 3098 tcg_gen_sub_i64(reg, reg, val); 3099 } else { 3100 tcg_gen_add_i64(reg, reg, val); 3101 } 3102 } 3103 return true; 3104 } 3105 3106 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3107 { 3108 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3109 return false; 3110 } 3111 if (sve_access_check(s)) { 3112 unsigned vsz = vec_full_reg_size(s); 3113 TCGv_i64 val = tcg_temp_new_i64(); 3114 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds; 3115 3116 do_cntp(s, val, a->esz, a->pg, a->pg); 3117 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 3118 vec_full_reg_offset(s, a->rn), val, vsz, vsz); 3119 } 3120 return true; 3121 } 3122 3123 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a) 3124 { 3125 if (!dc_isar_feature(aa64_sve, s)) { 3126 return false; 3127 } 3128 if (sve_access_check(s)) { 3129 TCGv_i64 reg = cpu_reg(s, a->rd); 3130 TCGv_i64 val = tcg_temp_new_i64(); 3131 3132 do_cntp(s, val, a->esz, a->pg, a->pg); 3133 do_sat_addsub_32(reg, val, a->u, a->d); 3134 } 3135 return true; 3136 } 3137 3138 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a) 3139 { 3140 if (!dc_isar_feature(aa64_sve, s)) { 3141 return false; 3142 } 3143 if (sve_access_check(s)) { 3144 TCGv_i64 reg = cpu_reg(s, a->rd); 3145 TCGv_i64 val = tcg_temp_new_i64(); 3146 3147 do_cntp(s, val, a->esz, a->pg, a->pg); 3148 do_sat_addsub_64(reg, val, a->u, a->d); 3149 } 3150 return true; 3151 } 3152 3153 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3154 { 3155 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3156 return false; 3157 } 3158 if (sve_access_check(s)) { 3159 TCGv_i64 val = tcg_temp_new_i64(); 3160 do_cntp(s, val, a->esz, a->pg, a->pg); 3161 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d); 3162 } 3163 return true; 3164 } 3165 3166 /* 3167 *** SVE Integer Compare Scalars Group 3168 */ 3169 3170 static bool trans_CTERM(DisasContext *s, arg_CTERM *a) 3171 { 3172 if (!dc_isar_feature(aa64_sve, s)) { 3173 return false; 3174 } 3175 if (!sve_access_check(s)) { 3176 return true; 3177 } 3178 3179 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ); 3180 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf); 3181 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf); 3182 TCGv_i64 cmp = tcg_temp_new_i64(); 3183 3184 tcg_gen_setcond_i64(cond, cmp, rn, rm); 3185 tcg_gen_extrl_i64_i32(cpu_NF, cmp); 3186 3187 /* VF = !NF & !CF. */ 3188 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1); 3189 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF); 3190 3191 /* Both NF and VF actually look at bit 31. */ 3192 tcg_gen_neg_i32(cpu_NF, cpu_NF); 3193 tcg_gen_neg_i32(cpu_VF, cpu_VF); 3194 return true; 3195 } 3196 3197 static bool trans_WHILE(DisasContext *s, arg_WHILE *a) 3198 { 3199 TCGv_i64 op0, op1, t0, t1, tmax; 3200 TCGv_i32 t2; 3201 TCGv_ptr ptr; 3202 unsigned vsz = vec_full_reg_size(s); 3203 unsigned desc = 0; 3204 TCGCond cond; 3205 uint64_t maxval; 3206 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */ 3207 bool eq = a->eq == a->lt; 3208 3209 /* The greater-than conditions are all SVE2. */ 3210 if (a->lt 3211 ? !dc_isar_feature(aa64_sve, s) 3212 : !dc_isar_feature(aa64_sve2, s)) { 3213 return false; 3214 } 3215 if (!sve_access_check(s)) { 3216 return true; 3217 } 3218 3219 op0 = read_cpu_reg(s, a->rn, 1); 3220 op1 = read_cpu_reg(s, a->rm, 1); 3221 3222 if (!a->sf) { 3223 if (a->u) { 3224 tcg_gen_ext32u_i64(op0, op0); 3225 tcg_gen_ext32u_i64(op1, op1); 3226 } else { 3227 tcg_gen_ext32s_i64(op0, op0); 3228 tcg_gen_ext32s_i64(op1, op1); 3229 } 3230 } 3231 3232 /* For the helper, compress the different conditions into a computation 3233 * of how many iterations for which the condition is true. 3234 */ 3235 t0 = tcg_temp_new_i64(); 3236 t1 = tcg_temp_new_i64(); 3237 3238 if (a->lt) { 3239 tcg_gen_sub_i64(t0, op1, op0); 3240 if (a->u) { 3241 maxval = a->sf ? UINT64_MAX : UINT32_MAX; 3242 cond = eq ? TCG_COND_LEU : TCG_COND_LTU; 3243 } else { 3244 maxval = a->sf ? INT64_MAX : INT32_MAX; 3245 cond = eq ? TCG_COND_LE : TCG_COND_LT; 3246 } 3247 } else { 3248 tcg_gen_sub_i64(t0, op0, op1); 3249 if (a->u) { 3250 maxval = 0; 3251 cond = eq ? TCG_COND_GEU : TCG_COND_GTU; 3252 } else { 3253 maxval = a->sf ? INT64_MIN : INT32_MIN; 3254 cond = eq ? TCG_COND_GE : TCG_COND_GT; 3255 } 3256 } 3257 3258 tmax = tcg_constant_i64(vsz >> a->esz); 3259 if (eq) { 3260 /* Equality means one more iteration. */ 3261 tcg_gen_addi_i64(t0, t0, 1); 3262 3263 /* 3264 * For the less-than while, if op1 is maxval (and the only time 3265 * the addition above could overflow), then we produce an all-true 3266 * predicate by setting the count to the vector length. This is 3267 * because the pseudocode is described as an increment + compare 3268 * loop, and the maximum integer would always compare true. 3269 * Similarly, the greater-than while has the same issue with the 3270 * minimum integer due to the decrement + compare loop. 3271 */ 3272 tcg_gen_movi_i64(t1, maxval); 3273 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0); 3274 } 3275 3276 /* Bound to the maximum. */ 3277 tcg_gen_umin_i64(t0, t0, tmax); 3278 3279 /* Set the count to zero if the condition is false. */ 3280 tcg_gen_movi_i64(t1, 0); 3281 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1); 3282 3283 /* Since we're bounded, pass as a 32-bit type. */ 3284 t2 = tcg_temp_new_i32(); 3285 tcg_gen_extrl_i64_i32(t2, t0); 3286 3287 /* Scale elements to bits. */ 3288 tcg_gen_shli_i32(t2, t2, a->esz); 3289 3290 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3291 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3292 3293 ptr = tcg_temp_new_ptr(); 3294 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd)); 3295 3296 if (a->lt) { 3297 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); 3298 } else { 3299 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc)); 3300 } 3301 do_pred_flags(t2); 3302 return true; 3303 } 3304 3305 static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) 3306 { 3307 TCGv_i64 op0, op1, diff, t1, tmax; 3308 TCGv_i32 t2; 3309 TCGv_ptr ptr; 3310 unsigned vsz = vec_full_reg_size(s); 3311 unsigned desc = 0; 3312 3313 if (!dc_isar_feature(aa64_sve2, s)) { 3314 return false; 3315 } 3316 if (!sve_access_check(s)) { 3317 return true; 3318 } 3319 3320 op0 = read_cpu_reg(s, a->rn, 1); 3321 op1 = read_cpu_reg(s, a->rm, 1); 3322 3323 tmax = tcg_constant_i64(vsz); 3324 diff = tcg_temp_new_i64(); 3325 3326 if (a->rw) { 3327 /* WHILERW */ 3328 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */ 3329 t1 = tcg_temp_new_i64(); 3330 tcg_gen_sub_i64(diff, op0, op1); 3331 tcg_gen_sub_i64(t1, op1, op0); 3332 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1); 3333 /* Round down to a multiple of ESIZE. */ 3334 tcg_gen_andi_i64(diff, diff, -1 << a->esz); 3335 /* If op1 == op0, diff == 0, and the condition is always true. */ 3336 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff); 3337 } else { 3338 /* WHILEWR */ 3339 tcg_gen_sub_i64(diff, op1, op0); 3340 /* Round down to a multiple of ESIZE. */ 3341 tcg_gen_andi_i64(diff, diff, -1 << a->esz); 3342 /* If op0 >= op1, diff <= 0, the condition is always true. */ 3343 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff); 3344 } 3345 3346 /* Bound to the maximum. */ 3347 tcg_gen_umin_i64(diff, diff, tmax); 3348 3349 /* Since we're bounded, pass as a 32-bit type. */ 3350 t2 = tcg_temp_new_i32(); 3351 tcg_gen_extrl_i64_i32(t2, diff); 3352 3353 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3354 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3355 3356 ptr = tcg_temp_new_ptr(); 3357 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd)); 3358 3359 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); 3360 do_pred_flags(t2); 3361 return true; 3362 } 3363 3364 /* 3365 *** SVE Integer Wide Immediate - Unpredicated Group 3366 */ 3367 3368 static bool trans_FDUP(DisasContext *s, arg_FDUP *a) 3369 { 3370 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3371 return false; 3372 } 3373 if (sve_access_check(s)) { 3374 unsigned vsz = vec_full_reg_size(s); 3375 int dofs = vec_full_reg_offset(s, a->rd); 3376 uint64_t imm; 3377 3378 /* Decode the VFP immediate. */ 3379 imm = vfp_expand_imm(a->esz, a->imm); 3380 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm); 3381 } 3382 return true; 3383 } 3384 3385 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a) 3386 { 3387 if (!dc_isar_feature(aa64_sve, s)) { 3388 return false; 3389 } 3390 if (sve_access_check(s)) { 3391 unsigned vsz = vec_full_reg_size(s); 3392 int dofs = vec_full_reg_offset(s, a->rd); 3393 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm); 3394 } 3395 return true; 3396 } 3397 3398 TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a) 3399 3400 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a) 3401 { 3402 a->imm = -a->imm; 3403 return trans_ADD_zzi(s, a); 3404 } 3405 3406 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a) 3407 { 3408 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 }; 3409 static const GVecGen2s op[4] = { 3410 { .fni8 = tcg_gen_vec_sub8_i64, 3411 .fniv = tcg_gen_sub_vec, 3412 .fno = gen_helper_sve_subri_b, 3413 .opt_opc = vecop_list, 3414 .vece = MO_8, 3415 .scalar_first = true }, 3416 { .fni8 = tcg_gen_vec_sub16_i64, 3417 .fniv = tcg_gen_sub_vec, 3418 .fno = gen_helper_sve_subri_h, 3419 .opt_opc = vecop_list, 3420 .vece = MO_16, 3421 .scalar_first = true }, 3422 { .fni4 = tcg_gen_sub_i32, 3423 .fniv = tcg_gen_sub_vec, 3424 .fno = gen_helper_sve_subri_s, 3425 .opt_opc = vecop_list, 3426 .vece = MO_32, 3427 .scalar_first = true }, 3428 { .fni8 = tcg_gen_sub_i64, 3429 .fniv = tcg_gen_sub_vec, 3430 .fno = gen_helper_sve_subri_d, 3431 .opt_opc = vecop_list, 3432 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 3433 .vece = MO_64, 3434 .scalar_first = true } 3435 }; 3436 3437 if (!dc_isar_feature(aa64_sve, s)) { 3438 return false; 3439 } 3440 if (sve_access_check(s)) { 3441 unsigned vsz = vec_full_reg_size(s); 3442 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd), 3443 vec_full_reg_offset(s, a->rn), 3444 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]); 3445 } 3446 return true; 3447 } 3448 3449 TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a) 3450 3451 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d) 3452 { 3453 if (sve_access_check(s)) { 3454 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 3455 tcg_constant_i64(a->imm), u, d); 3456 } 3457 return true; 3458 } 3459 3460 TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false) 3461 TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false) 3462 TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true) 3463 TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true) 3464 3465 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn) 3466 { 3467 if (sve_access_check(s)) { 3468 unsigned vsz = vec_full_reg_size(s); 3469 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 3470 vec_full_reg_offset(s, a->rn), 3471 tcg_constant_i64(a->imm), vsz, vsz, 0, fn); 3472 } 3473 return true; 3474 } 3475 3476 #define DO_ZZI(NAME, name) \ 3477 static gen_helper_gvec_2i * const name##i_fns[4] = { \ 3478 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \ 3479 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \ 3480 }; \ 3481 TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz]) 3482 3483 DO_ZZI(SMAX, smax) 3484 DO_ZZI(UMAX, umax) 3485 DO_ZZI(SMIN, smin) 3486 DO_ZZI(UMIN, umin) 3487 3488 #undef DO_ZZI 3489 3490 static gen_helper_gvec_4 * const dot_fns[2][2] = { 3491 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h }, 3492 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h } 3493 }; 3494 TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz, 3495 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0) 3496 3497 /* 3498 * SVE Multiply - Indexed 3499 */ 3500 3501 TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, 3502 gen_helper_gvec_sdot_idx_b, a) 3503 TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, 3504 gen_helper_gvec_sdot_idx_h, a) 3505 TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, 3506 gen_helper_gvec_udot_idx_b, a) 3507 TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, 3508 gen_helper_gvec_udot_idx_h, a) 3509 3510 TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3511 gen_helper_gvec_sudot_idx_b, a) 3512 TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3513 gen_helper_gvec_usdot_idx_b, a) 3514 3515 #define DO_SVE2_RRX(NAME, FUNC) \ 3516 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3517 a->rd, a->rn, a->rm, a->index) 3518 3519 DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h) 3520 DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s) 3521 DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d) 3522 3523 DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h) 3524 DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s) 3525 DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d) 3526 3527 DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h) 3528 DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s) 3529 DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d) 3530 3531 #undef DO_SVE2_RRX 3532 3533 #define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \ 3534 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3535 a->rd, a->rn, a->rm, (a->index << 1) | TOP) 3536 3537 DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false) 3538 DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false) 3539 DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true) 3540 DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true) 3541 3542 DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false) 3543 DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false) 3544 DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true) 3545 DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true) 3546 3547 DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false) 3548 DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false) 3549 DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true) 3550 DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true) 3551 3552 #undef DO_SVE2_RRX_TB 3553 3554 #define DO_SVE2_RRXR(NAME, FUNC) \ 3555 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a) 3556 3557 DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h) 3558 DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s) 3559 DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d) 3560 3561 DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h) 3562 DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s) 3563 DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d) 3564 3565 DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h) 3566 DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s) 3567 DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d) 3568 3569 DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h) 3570 DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s) 3571 DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d) 3572 3573 #undef DO_SVE2_RRXR 3574 3575 #define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \ 3576 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3577 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP) 3578 3579 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false) 3580 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false) 3581 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true) 3582 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true) 3583 3584 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false) 3585 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false) 3586 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true) 3587 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true) 3588 3589 DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false) 3590 DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false) 3591 DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true) 3592 DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true) 3593 3594 DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false) 3595 DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false) 3596 DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true) 3597 DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true) 3598 3599 DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false) 3600 DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false) 3601 DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true) 3602 DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true) 3603 3604 DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false) 3605 DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false) 3606 DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true) 3607 DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true) 3608 3609 #undef DO_SVE2_RRXR_TB 3610 3611 #define DO_SVE2_RRXR_ROT(NAME, FUNC) \ 3612 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3613 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot) 3614 3615 DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h) 3616 DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s) 3617 3618 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h) 3619 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s) 3620 3621 DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s) 3622 DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) 3623 3624 #undef DO_SVE2_RRXR_ROT 3625 3626 /* 3627 *** SVE Floating Point Multiply-Add Indexed Group 3628 */ 3629 3630 static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) 3631 { 3632 static gen_helper_gvec_4_ptr * const fns[4] = { 3633 NULL, 3634 gen_helper_gvec_fmla_idx_h, 3635 gen_helper_gvec_fmla_idx_s, 3636 gen_helper_gvec_fmla_idx_d, 3637 }; 3638 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, 3639 (a->index << 1) | sub, 3640 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3641 } 3642 3643 TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false) 3644 TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true) 3645 3646 /* 3647 *** SVE Floating Point Multiply Indexed Group 3648 */ 3649 3650 static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = { 3651 NULL, gen_helper_gvec_fmul_idx_h, 3652 gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d, 3653 }; 3654 TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz, 3655 fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index, 3656 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3657 3658 /* 3659 *** SVE Floating Point Fast Reduction Group 3660 */ 3661 3662 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr, 3663 TCGv_ptr, TCGv_i32); 3664 3665 static bool do_reduce(DisasContext *s, arg_rpr_esz *a, 3666 gen_helper_fp_reduce *fn) 3667 { 3668 unsigned vsz, p2vsz; 3669 TCGv_i32 t_desc; 3670 TCGv_ptr t_zn, t_pg, status; 3671 TCGv_i64 temp; 3672 3673 if (fn == NULL) { 3674 return false; 3675 } 3676 if (!sve_access_check(s)) { 3677 return true; 3678 } 3679 3680 vsz = vec_full_reg_size(s); 3681 p2vsz = pow2ceil(vsz); 3682 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz)); 3683 temp = tcg_temp_new_i64(); 3684 t_zn = tcg_temp_new_ptr(); 3685 t_pg = tcg_temp_new_ptr(); 3686 3687 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); 3688 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); 3689 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3690 3691 fn(temp, t_zn, t_pg, status, t_desc); 3692 3693 write_fp_dreg(s, a->rd, temp); 3694 return true; 3695 } 3696 3697 #define DO_VPZ(NAME, name) \ 3698 static gen_helper_fp_reduce * const name##_fns[4] = { \ 3699 NULL, gen_helper_sve_##name##_h, \ 3700 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3701 }; \ 3702 TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz]) 3703 3704 DO_VPZ(FADDV, faddv) 3705 DO_VPZ(FMINNMV, fminnmv) 3706 DO_VPZ(FMAXNMV, fmaxnmv) 3707 DO_VPZ(FMINV, fminv) 3708 DO_VPZ(FMAXV, fmaxv) 3709 3710 #undef DO_VPZ 3711 3712 /* 3713 *** SVE Floating Point Unary Operations - Unpredicated Group 3714 */ 3715 3716 static gen_helper_gvec_2_ptr * const frecpe_fns[] = { 3717 NULL, gen_helper_gvec_frecpe_h, 3718 gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, 3719 }; 3720 TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0) 3721 3722 static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { 3723 NULL, gen_helper_gvec_frsqrte_h, 3724 gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, 3725 }; 3726 TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0) 3727 3728 /* 3729 *** SVE Floating Point Compare with Zero Group 3730 */ 3731 3732 static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a, 3733 gen_helper_gvec_3_ptr *fn) 3734 { 3735 if (fn == NULL) { 3736 return false; 3737 } 3738 if (sve_access_check(s)) { 3739 unsigned vsz = vec_full_reg_size(s); 3740 TCGv_ptr status = 3741 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3742 3743 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd), 3744 vec_full_reg_offset(s, a->rn), 3745 pred_full_reg_offset(s, a->pg), 3746 status, vsz, vsz, 0, fn); 3747 } 3748 return true; 3749 } 3750 3751 #define DO_PPZ(NAME, name) \ 3752 static gen_helper_gvec_3_ptr * const name##_fns[] = { \ 3753 NULL, gen_helper_sve_##name##_h, \ 3754 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3755 }; \ 3756 TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz]) 3757 3758 DO_PPZ(FCMGE_ppz0, fcmge0) 3759 DO_PPZ(FCMGT_ppz0, fcmgt0) 3760 DO_PPZ(FCMLE_ppz0, fcmle0) 3761 DO_PPZ(FCMLT_ppz0, fcmlt0) 3762 DO_PPZ(FCMEQ_ppz0, fcmeq0) 3763 DO_PPZ(FCMNE_ppz0, fcmne0) 3764 3765 #undef DO_PPZ 3766 3767 /* 3768 *** SVE floating-point trig multiply-add coefficient 3769 */ 3770 3771 static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { 3772 NULL, gen_helper_sve_ftmad_h, 3773 gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d, 3774 }; 3775 TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, 3776 ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm, 3777 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3778 3779 /* 3780 *** SVE Floating Point Accumulating Reduction Group 3781 */ 3782 3783 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) 3784 { 3785 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr, 3786 TCGv_ptr, TCGv_ptr, TCGv_i32); 3787 static fadda_fn * const fns[3] = { 3788 gen_helper_sve_fadda_h, 3789 gen_helper_sve_fadda_s, 3790 gen_helper_sve_fadda_d, 3791 }; 3792 unsigned vsz = vec_full_reg_size(s); 3793 TCGv_ptr t_rm, t_pg, t_fpst; 3794 TCGv_i64 t_val; 3795 TCGv_i32 t_desc; 3796 3797 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3798 return false; 3799 } 3800 s->is_nonstreaming = true; 3801 if (!sve_access_check(s)) { 3802 return true; 3803 } 3804 3805 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz); 3806 t_rm = tcg_temp_new_ptr(); 3807 t_pg = tcg_temp_new_ptr(); 3808 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm)); 3809 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); 3810 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3811 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 3812 3813 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc); 3814 3815 write_fp_dreg(s, a->rd, t_val); 3816 return true; 3817 } 3818 3819 /* 3820 *** SVE Floating Point Arithmetic - Unpredicated Group 3821 */ 3822 3823 #define DO_FP3(NAME, name) \ 3824 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ 3825 NULL, gen_helper_gvec_##name##_h, \ 3826 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ 3827 }; \ 3828 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0) 3829 3830 DO_FP3(FADD_zzz, fadd) 3831 DO_FP3(FSUB_zzz, fsub) 3832 DO_FP3(FMUL_zzz, fmul) 3833 DO_FP3(FRECPS, recps) 3834 DO_FP3(FRSQRTS, rsqrts) 3835 3836 #undef DO_FP3 3837 3838 static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = { 3839 NULL, gen_helper_gvec_ftsmul_h, 3840 gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d 3841 }; 3842 TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, 3843 ftsmul_fns[a->esz], a, 0) 3844 3845 /* 3846 *** SVE Floating Point Arithmetic - Predicated Group 3847 */ 3848 3849 #define DO_ZPZZ_FP(NAME, FEAT, name) \ 3850 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ 3851 NULL, gen_helper_##name##_h, \ 3852 gen_helper_##name##_s, gen_helper_##name##_d \ 3853 }; \ 3854 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) 3855 3856 DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd) 3857 DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub) 3858 DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul) 3859 DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin) 3860 DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax) 3861 DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) 3862 DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) 3863 DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) 3864 DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn) 3865 DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv) 3866 DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx) 3867 3868 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr, 3869 TCGv_i64, TCGv_ptr, TCGv_i32); 3870 3871 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16, 3872 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn) 3873 { 3874 unsigned vsz = vec_full_reg_size(s); 3875 TCGv_ptr t_zd, t_zn, t_pg, status; 3876 TCGv_i32 desc; 3877 3878 t_zd = tcg_temp_new_ptr(); 3879 t_zn = tcg_temp_new_ptr(); 3880 t_pg = tcg_temp_new_ptr(); 3881 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd)); 3882 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn)); 3883 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 3884 3885 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 3886 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 3887 fn(t_zd, t_zn, t_pg, scalar, status, desc); 3888 } 3889 3890 static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm, 3891 gen_helper_sve_fp2scalar *fn) 3892 { 3893 if (fn == NULL) { 3894 return false; 3895 } 3896 if (sve_access_check(s)) { 3897 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, 3898 tcg_constant_i64(imm), fn); 3899 } 3900 return true; 3901 } 3902 3903 #define DO_FP_IMM(NAME, name, const0, const1) \ 3904 static gen_helper_sve_fp2scalar * const name##_fns[4] = { \ 3905 NULL, gen_helper_sve_##name##_h, \ 3906 gen_helper_sve_##name##_s, \ 3907 gen_helper_sve_##name##_d \ 3908 }; \ 3909 static uint64_t const name##_const[4][2] = { \ 3910 { -1, -1 }, \ 3911 { float16_##const0, float16_##const1 }, \ 3912 { float32_##const0, float32_##const1 }, \ 3913 { float64_##const0, float64_##const1 }, \ 3914 }; \ 3915 TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ 3916 name##_const[a->esz][a->imm], name##_fns[a->esz]) 3917 3918 DO_FP_IMM(FADD, fadds, half, one) 3919 DO_FP_IMM(FSUB, fsubs, half, one) 3920 DO_FP_IMM(FMUL, fmuls, half, two) 3921 DO_FP_IMM(FSUBR, fsubrs, half, one) 3922 DO_FP_IMM(FMAXNM, fmaxnms, zero, one) 3923 DO_FP_IMM(FMINNM, fminnms, zero, one) 3924 DO_FP_IMM(FMAX, fmaxs, zero, one) 3925 DO_FP_IMM(FMIN, fmins, zero, one) 3926 3927 #undef DO_FP_IMM 3928 3929 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a, 3930 gen_helper_gvec_4_ptr *fn) 3931 { 3932 if (fn == NULL) { 3933 return false; 3934 } 3935 if (sve_access_check(s)) { 3936 unsigned vsz = vec_full_reg_size(s); 3937 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3938 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd), 3939 vec_full_reg_offset(s, a->rn), 3940 vec_full_reg_offset(s, a->rm), 3941 pred_full_reg_offset(s, a->pg), 3942 status, vsz, vsz, 0, fn); 3943 } 3944 return true; 3945 } 3946 3947 #define DO_FPCMP(NAME, name) \ 3948 static gen_helper_gvec_4_ptr * const name##_fns[4] = { \ 3949 NULL, gen_helper_sve_##name##_h, \ 3950 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 3951 }; \ 3952 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz]) 3953 3954 DO_FPCMP(FCMGE, fcmge) 3955 DO_FPCMP(FCMGT, fcmgt) 3956 DO_FPCMP(FCMEQ, fcmeq) 3957 DO_FPCMP(FCMNE, fcmne) 3958 DO_FPCMP(FCMUO, fcmuo) 3959 DO_FPCMP(FACGE, facge) 3960 DO_FPCMP(FACGT, facgt) 3961 3962 #undef DO_FPCMP 3963 3964 static gen_helper_gvec_4_ptr * const fcadd_fns[] = { 3965 NULL, gen_helper_sve_fcadd_h, 3966 gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d, 3967 }; 3968 TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], 3969 a->rd, a->rn, a->rm, a->pg, a->rot, 3970 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3971 3972 #define DO_FMLA(NAME, name) \ 3973 static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ 3974 NULL, gen_helper_sve_##name##_h, \ 3975 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 3976 }; \ 3977 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \ 3978 a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ 3979 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3980 3981 DO_FMLA(FMLA_zpzzz, fmla_zpzzz) 3982 DO_FMLA(FMLS_zpzzz, fmls_zpzzz) 3983 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz) 3984 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz) 3985 3986 #undef DO_FMLA 3987 3988 static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { 3989 NULL, gen_helper_sve_fcmla_zpzzz_h, 3990 gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, 3991 }; 3992 TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], 3993 a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, 3994 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3995 3996 static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { 3997 NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL 3998 }; 3999 TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz], 4000 a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot, 4001 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4002 4003 /* 4004 *** SVE Floating Point Unary Operations Predicated Group 4005 */ 4006 4007 TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4008 gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR) 4009 TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4010 gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR) 4011 4012 TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 4013 gen_helper_sve_bfcvt, a, 0, FPST_FPCR) 4014 4015 TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4016 gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR) 4017 TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4018 gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR) 4019 TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4020 gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR) 4021 TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4022 gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR) 4023 4024 TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4025 gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16) 4026 TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4027 gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16) 4028 TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4029 gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16) 4030 TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4031 gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16) 4032 TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4033 gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16) 4034 TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4035 gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16) 4036 4037 TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4038 gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR) 4039 TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4040 gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR) 4041 TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4042 gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR) 4043 TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4044 gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR) 4045 TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4046 gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR) 4047 TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4048 gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR) 4049 4050 TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4051 gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR) 4052 TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4053 gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR) 4054 4055 static gen_helper_gvec_3_ptr * const frint_fns[] = { 4056 NULL, 4057 gen_helper_sve_frint_h, 4058 gen_helper_sve_frint_s, 4059 gen_helper_sve_frint_d 4060 }; 4061 TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz], 4062 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4063 4064 static gen_helper_gvec_3_ptr * const frintx_fns[] = { 4065 NULL, 4066 gen_helper_sve_frintx_h, 4067 gen_helper_sve_frintx_s, 4068 gen_helper_sve_frintx_d 4069 }; 4070 TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz], 4071 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4072 4073 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, 4074 ARMFPRounding mode, gen_helper_gvec_3_ptr *fn) 4075 { 4076 unsigned vsz; 4077 TCGv_i32 tmode; 4078 TCGv_ptr status; 4079 4080 if (fn == NULL) { 4081 return false; 4082 } 4083 if (!sve_access_check(s)) { 4084 return true; 4085 } 4086 4087 vsz = vec_full_reg_size(s); 4088 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4089 tmode = gen_set_rmode(mode, status); 4090 4091 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 4092 vec_full_reg_offset(s, a->rn), 4093 pred_full_reg_offset(s, a->pg), 4094 status, vsz, vsz, 0, fn); 4095 4096 gen_restore_rmode(tmode, status); 4097 return true; 4098 } 4099 4100 TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a, 4101 FPROUNDING_TIEEVEN, frint_fns[a->esz]) 4102 TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a, 4103 FPROUNDING_POSINF, frint_fns[a->esz]) 4104 TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a, 4105 FPROUNDING_NEGINF, frint_fns[a->esz]) 4106 TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a, 4107 FPROUNDING_ZERO, frint_fns[a->esz]) 4108 TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a, 4109 FPROUNDING_TIEAWAY, frint_fns[a->esz]) 4110 4111 static gen_helper_gvec_3_ptr * const frecpx_fns[] = { 4112 NULL, gen_helper_sve_frecpx_h, 4113 gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d, 4114 }; 4115 TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz], 4116 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4117 4118 static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { 4119 NULL, gen_helper_sve_fsqrt_h, 4120 gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d, 4121 }; 4122 TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz], 4123 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4124 4125 TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4126 gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16) 4127 TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4128 gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16) 4129 TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4130 gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16) 4131 4132 TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4133 gen_helper_sve_scvt_ss, a, 0, FPST_FPCR) 4134 TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4135 gen_helper_sve_scvt_ds, a, 0, FPST_FPCR) 4136 4137 TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4138 gen_helper_sve_scvt_sd, a, 0, FPST_FPCR) 4139 TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4140 gen_helper_sve_scvt_dd, a, 0, FPST_FPCR) 4141 4142 TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4143 gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16) 4144 TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4145 gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16) 4146 TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4147 gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16) 4148 4149 TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4150 gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR) 4151 TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4152 gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR) 4153 TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4154 gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR) 4155 4156 TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4157 gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR) 4158 4159 /* 4160 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group 4161 */ 4162 4163 /* Subroutine loading a vector register at VOFS of LEN bytes. 4164 * The load should begin at the address Rn + IMM. 4165 */ 4166 4167 void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, 4168 int len, int rn, int imm) 4169 { 4170 int len_align = QEMU_ALIGN_DOWN(len, 8); 4171 int len_remain = len % 8; 4172 int nparts = len / 8 + ctpop8(len_remain); 4173 int midx = get_mem_index(s); 4174 TCGv_i64 dirty_addr, clean_addr, t0, t1; 4175 4176 dirty_addr = tcg_temp_new_i64(); 4177 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4178 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len); 4179 4180 /* 4181 * Note that unpredicated load/store of vector/predicate registers 4182 * are defined as a stream of bytes, which equates to little-endian 4183 * operations on larger quantities. 4184 * Attempt to keep code expansion to a minimum by limiting the 4185 * amount of unrolling done. 4186 */ 4187 if (nparts <= 4) { 4188 int i; 4189 4190 t0 = tcg_temp_new_i64(); 4191 for (i = 0; i < len_align; i += 8) { 4192 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ); 4193 tcg_gen_st_i64(t0, base, vofs + i); 4194 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4195 } 4196 } else { 4197 TCGLabel *loop = gen_new_label(); 4198 TCGv_ptr tp, i = tcg_temp_new_ptr(); 4199 4200 tcg_gen_movi_ptr(i, 0); 4201 gen_set_label(loop); 4202 4203 t0 = tcg_temp_new_i64(); 4204 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ); 4205 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4206 4207 tp = tcg_temp_new_ptr(); 4208 tcg_gen_add_ptr(tp, base, i); 4209 tcg_gen_addi_ptr(i, i, 8); 4210 tcg_gen_st_i64(t0, tp, vofs); 4211 4212 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4213 } 4214 4215 /* 4216 * Predicate register loads can be any multiple of 2. 4217 * Note that we still store the entire 64-bit unit into cpu_env. 4218 */ 4219 if (len_remain) { 4220 t0 = tcg_temp_new_i64(); 4221 switch (len_remain) { 4222 case 2: 4223 case 4: 4224 case 8: 4225 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, 4226 MO_LE | ctz32(len_remain)); 4227 break; 4228 4229 case 6: 4230 t1 = tcg_temp_new_i64(); 4231 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL); 4232 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4233 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW); 4234 tcg_gen_deposit_i64(t0, t0, t1, 32, 32); 4235 break; 4236 4237 default: 4238 g_assert_not_reached(); 4239 } 4240 tcg_gen_st_i64(t0, base, vofs + len_align); 4241 } 4242 } 4243 4244 /* Similarly for stores. */ 4245 void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, 4246 int len, int rn, int imm) 4247 { 4248 int len_align = QEMU_ALIGN_DOWN(len, 8); 4249 int len_remain = len % 8; 4250 int nparts = len / 8 + ctpop8(len_remain); 4251 int midx = get_mem_index(s); 4252 TCGv_i64 dirty_addr, clean_addr, t0; 4253 4254 dirty_addr = tcg_temp_new_i64(); 4255 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4256 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len); 4257 4258 /* Note that unpredicated load/store of vector/predicate registers 4259 * are defined as a stream of bytes, which equates to little-endian 4260 * operations on larger quantities. There is no nice way to force 4261 * a little-endian store for aarch64_be-linux-user out of line. 4262 * 4263 * Attempt to keep code expansion to a minimum by limiting the 4264 * amount of unrolling done. 4265 */ 4266 if (nparts <= 4) { 4267 int i; 4268 4269 t0 = tcg_temp_new_i64(); 4270 for (i = 0; i < len_align; i += 8) { 4271 tcg_gen_ld_i64(t0, base, vofs + i); 4272 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ); 4273 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4274 } 4275 } else { 4276 TCGLabel *loop = gen_new_label(); 4277 TCGv_ptr tp, i = tcg_temp_new_ptr(); 4278 4279 tcg_gen_movi_ptr(i, 0); 4280 gen_set_label(loop); 4281 4282 t0 = tcg_temp_new_i64(); 4283 tp = tcg_temp_new_ptr(); 4284 tcg_gen_add_ptr(tp, base, i); 4285 tcg_gen_ld_i64(t0, tp, vofs); 4286 tcg_gen_addi_ptr(i, i, 8); 4287 4288 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ); 4289 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4290 4291 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4292 } 4293 4294 /* Predicate register stores can be any multiple of 2. */ 4295 if (len_remain) { 4296 t0 = tcg_temp_new_i64(); 4297 tcg_gen_ld_i64(t0, base, vofs + len_align); 4298 4299 switch (len_remain) { 4300 case 2: 4301 case 4: 4302 case 8: 4303 tcg_gen_qemu_st_i64(t0, clean_addr, midx, 4304 MO_LE | ctz32(len_remain)); 4305 break; 4306 4307 case 6: 4308 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL); 4309 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4310 tcg_gen_shri_i64(t0, t0, 32); 4311 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW); 4312 break; 4313 4314 default: 4315 g_assert_not_reached(); 4316 } 4317 } 4318 } 4319 4320 static bool trans_LDR_zri(DisasContext *s, arg_rri *a) 4321 { 4322 if (!dc_isar_feature(aa64_sve, s)) { 4323 return false; 4324 } 4325 if (sve_access_check(s)) { 4326 int size = vec_full_reg_size(s); 4327 int off = vec_full_reg_offset(s, a->rd); 4328 gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size); 4329 } 4330 return true; 4331 } 4332 4333 static bool trans_LDR_pri(DisasContext *s, arg_rri *a) 4334 { 4335 if (!dc_isar_feature(aa64_sve, s)) { 4336 return false; 4337 } 4338 if (sve_access_check(s)) { 4339 int size = pred_full_reg_size(s); 4340 int off = pred_full_reg_offset(s, a->rd); 4341 gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size); 4342 } 4343 return true; 4344 } 4345 4346 static bool trans_STR_zri(DisasContext *s, arg_rri *a) 4347 { 4348 if (!dc_isar_feature(aa64_sve, s)) { 4349 return false; 4350 } 4351 if (sve_access_check(s)) { 4352 int size = vec_full_reg_size(s); 4353 int off = vec_full_reg_offset(s, a->rd); 4354 gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size); 4355 } 4356 return true; 4357 } 4358 4359 static bool trans_STR_pri(DisasContext *s, arg_rri *a) 4360 { 4361 if (!dc_isar_feature(aa64_sve, s)) { 4362 return false; 4363 } 4364 if (sve_access_check(s)) { 4365 int size = pred_full_reg_size(s); 4366 int off = pred_full_reg_offset(s, a->rd); 4367 gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size); 4368 } 4369 return true; 4370 } 4371 4372 /* 4373 *** SVE Memory - Contiguous Load Group 4374 */ 4375 4376 /* The memory mode of the dtype. */ 4377 static const MemOp dtype_mop[16] = { 4378 MO_UB, MO_UB, MO_UB, MO_UB, 4379 MO_SL, MO_UW, MO_UW, MO_UW, 4380 MO_SW, MO_SW, MO_UL, MO_UL, 4381 MO_SB, MO_SB, MO_SB, MO_UQ 4382 }; 4383 4384 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE) 4385 4386 /* The vector element size of dtype. */ 4387 static const uint8_t dtype_esz[16] = { 4388 0, 1, 2, 3, 4389 3, 1, 2, 3, 4390 3, 2, 2, 3, 4391 3, 2, 1, 3 4392 }; 4393 4394 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 4395 int dtype, uint32_t mte_n, bool is_write, 4396 gen_helper_gvec_mem *fn) 4397 { 4398 unsigned vsz = vec_full_reg_size(s); 4399 TCGv_ptr t_pg; 4400 int desc = 0; 4401 4402 /* 4403 * For e.g. LD4, there are not enough arguments to pass all 4 4404 * registers as pointers, so encode the regno into the data field. 4405 * For consistency, do this even for LD1. 4406 */ 4407 if (s->mte_active[0]) { 4408 int msz = dtype_msz(dtype); 4409 4410 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 4411 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4412 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4413 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 4414 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1); 4415 desc <<= SVE_MTEDESC_SHIFT; 4416 } else { 4417 addr = clean_data_tbi(s, addr); 4418 } 4419 4420 desc = simd_desc(vsz, vsz, zt | desc); 4421 t_pg = tcg_temp_new_ptr(); 4422 4423 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 4424 fn(cpu_env, t_pg, addr, tcg_constant_i32(desc)); 4425 } 4426 4427 /* Indexed by [mte][be][dtype][nreg] */ 4428 static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = { 4429 { /* mte inactive, little-endian */ 4430 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4431 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4432 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4433 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4434 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4435 4436 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL }, 4437 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r, 4438 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r }, 4439 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL }, 4440 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL }, 4441 4442 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL }, 4443 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL }, 4444 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r, 4445 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r }, 4446 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL }, 4447 4448 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4449 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4450 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4451 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, 4452 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, 4453 4454 /* mte inactive, big-endian */ 4455 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4456 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4457 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4458 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4459 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4460 4461 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL }, 4462 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r, 4463 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r }, 4464 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL }, 4465 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL }, 4466 4467 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL }, 4468 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL }, 4469 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r, 4470 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r }, 4471 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL }, 4472 4473 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4474 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4475 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4476 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, 4477 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } }, 4478 4479 { /* mte active, little-endian */ 4480 { { gen_helper_sve_ld1bb_r_mte, 4481 gen_helper_sve_ld2bb_r_mte, 4482 gen_helper_sve_ld3bb_r_mte, 4483 gen_helper_sve_ld4bb_r_mte }, 4484 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 4485 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 4486 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 4487 4488 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL }, 4489 { gen_helper_sve_ld1hh_le_r_mte, 4490 gen_helper_sve_ld2hh_le_r_mte, 4491 gen_helper_sve_ld3hh_le_r_mte, 4492 gen_helper_sve_ld4hh_le_r_mte }, 4493 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL }, 4494 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL }, 4495 4496 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL }, 4497 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL }, 4498 { gen_helper_sve_ld1ss_le_r_mte, 4499 gen_helper_sve_ld2ss_le_r_mte, 4500 gen_helper_sve_ld3ss_le_r_mte, 4501 gen_helper_sve_ld4ss_le_r_mte }, 4502 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL }, 4503 4504 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 4505 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 4506 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 4507 { gen_helper_sve_ld1dd_le_r_mte, 4508 gen_helper_sve_ld2dd_le_r_mte, 4509 gen_helper_sve_ld3dd_le_r_mte, 4510 gen_helper_sve_ld4dd_le_r_mte } }, 4511 4512 /* mte active, big-endian */ 4513 { { gen_helper_sve_ld1bb_r_mte, 4514 gen_helper_sve_ld2bb_r_mte, 4515 gen_helper_sve_ld3bb_r_mte, 4516 gen_helper_sve_ld4bb_r_mte }, 4517 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 4518 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 4519 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 4520 4521 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL }, 4522 { gen_helper_sve_ld1hh_be_r_mte, 4523 gen_helper_sve_ld2hh_be_r_mte, 4524 gen_helper_sve_ld3hh_be_r_mte, 4525 gen_helper_sve_ld4hh_be_r_mte }, 4526 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL }, 4527 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL }, 4528 4529 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL }, 4530 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL }, 4531 { gen_helper_sve_ld1ss_be_r_mte, 4532 gen_helper_sve_ld2ss_be_r_mte, 4533 gen_helper_sve_ld3ss_be_r_mte, 4534 gen_helper_sve_ld4ss_be_r_mte }, 4535 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL }, 4536 4537 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 4538 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 4539 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 4540 { gen_helper_sve_ld1dd_be_r_mte, 4541 gen_helper_sve_ld2dd_be_r_mte, 4542 gen_helper_sve_ld3dd_be_r_mte, 4543 gen_helper_sve_ld4dd_be_r_mte } } }, 4544 }; 4545 4546 static void do_ld_zpa(DisasContext *s, int zt, int pg, 4547 TCGv_i64 addr, int dtype, int nreg) 4548 { 4549 gen_helper_gvec_mem *fn 4550 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg]; 4551 4552 /* 4553 * While there are holes in the table, they are not 4554 * accessible via the instruction encoding. 4555 */ 4556 assert(fn != NULL); 4557 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn); 4558 } 4559 4560 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) 4561 { 4562 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 4563 return false; 4564 } 4565 if (sve_access_check(s)) { 4566 TCGv_i64 addr = tcg_temp_new_i64(); 4567 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4568 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4569 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 4570 } 4571 return true; 4572 } 4573 4574 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a) 4575 { 4576 if (!dc_isar_feature(aa64_sve, s)) { 4577 return false; 4578 } 4579 if (sve_access_check(s)) { 4580 int vsz = vec_full_reg_size(s); 4581 int elements = vsz >> dtype_esz[a->dtype]; 4582 TCGv_i64 addr = tcg_temp_new_i64(); 4583 4584 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 4585 (a->imm * elements * (a->nreg + 1)) 4586 << dtype_msz(a->dtype)); 4587 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 4588 } 4589 return true; 4590 } 4591 4592 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a) 4593 { 4594 static gen_helper_gvec_mem * const fns[2][2][16] = { 4595 { /* mte inactive, little-endian */ 4596 { gen_helper_sve_ldff1bb_r, 4597 gen_helper_sve_ldff1bhu_r, 4598 gen_helper_sve_ldff1bsu_r, 4599 gen_helper_sve_ldff1bdu_r, 4600 4601 gen_helper_sve_ldff1sds_le_r, 4602 gen_helper_sve_ldff1hh_le_r, 4603 gen_helper_sve_ldff1hsu_le_r, 4604 gen_helper_sve_ldff1hdu_le_r, 4605 4606 gen_helper_sve_ldff1hds_le_r, 4607 gen_helper_sve_ldff1hss_le_r, 4608 gen_helper_sve_ldff1ss_le_r, 4609 gen_helper_sve_ldff1sdu_le_r, 4610 4611 gen_helper_sve_ldff1bds_r, 4612 gen_helper_sve_ldff1bss_r, 4613 gen_helper_sve_ldff1bhs_r, 4614 gen_helper_sve_ldff1dd_le_r }, 4615 4616 /* mte inactive, big-endian */ 4617 { gen_helper_sve_ldff1bb_r, 4618 gen_helper_sve_ldff1bhu_r, 4619 gen_helper_sve_ldff1bsu_r, 4620 gen_helper_sve_ldff1bdu_r, 4621 4622 gen_helper_sve_ldff1sds_be_r, 4623 gen_helper_sve_ldff1hh_be_r, 4624 gen_helper_sve_ldff1hsu_be_r, 4625 gen_helper_sve_ldff1hdu_be_r, 4626 4627 gen_helper_sve_ldff1hds_be_r, 4628 gen_helper_sve_ldff1hss_be_r, 4629 gen_helper_sve_ldff1ss_be_r, 4630 gen_helper_sve_ldff1sdu_be_r, 4631 4632 gen_helper_sve_ldff1bds_r, 4633 gen_helper_sve_ldff1bss_r, 4634 gen_helper_sve_ldff1bhs_r, 4635 gen_helper_sve_ldff1dd_be_r } }, 4636 4637 { /* mte active, little-endian */ 4638 { gen_helper_sve_ldff1bb_r_mte, 4639 gen_helper_sve_ldff1bhu_r_mte, 4640 gen_helper_sve_ldff1bsu_r_mte, 4641 gen_helper_sve_ldff1bdu_r_mte, 4642 4643 gen_helper_sve_ldff1sds_le_r_mte, 4644 gen_helper_sve_ldff1hh_le_r_mte, 4645 gen_helper_sve_ldff1hsu_le_r_mte, 4646 gen_helper_sve_ldff1hdu_le_r_mte, 4647 4648 gen_helper_sve_ldff1hds_le_r_mte, 4649 gen_helper_sve_ldff1hss_le_r_mte, 4650 gen_helper_sve_ldff1ss_le_r_mte, 4651 gen_helper_sve_ldff1sdu_le_r_mte, 4652 4653 gen_helper_sve_ldff1bds_r_mte, 4654 gen_helper_sve_ldff1bss_r_mte, 4655 gen_helper_sve_ldff1bhs_r_mte, 4656 gen_helper_sve_ldff1dd_le_r_mte }, 4657 4658 /* mte active, big-endian */ 4659 { gen_helper_sve_ldff1bb_r_mte, 4660 gen_helper_sve_ldff1bhu_r_mte, 4661 gen_helper_sve_ldff1bsu_r_mte, 4662 gen_helper_sve_ldff1bdu_r_mte, 4663 4664 gen_helper_sve_ldff1sds_be_r_mte, 4665 gen_helper_sve_ldff1hh_be_r_mte, 4666 gen_helper_sve_ldff1hsu_be_r_mte, 4667 gen_helper_sve_ldff1hdu_be_r_mte, 4668 4669 gen_helper_sve_ldff1hds_be_r_mte, 4670 gen_helper_sve_ldff1hss_be_r_mte, 4671 gen_helper_sve_ldff1ss_be_r_mte, 4672 gen_helper_sve_ldff1sdu_be_r_mte, 4673 4674 gen_helper_sve_ldff1bds_r_mte, 4675 gen_helper_sve_ldff1bss_r_mte, 4676 gen_helper_sve_ldff1bhs_r_mte, 4677 gen_helper_sve_ldff1dd_be_r_mte } }, 4678 }; 4679 4680 if (!dc_isar_feature(aa64_sve, s)) { 4681 return false; 4682 } 4683 s->is_nonstreaming = true; 4684 if (sve_access_check(s)) { 4685 TCGv_i64 addr = tcg_temp_new_i64(); 4686 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4687 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4688 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 4689 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 4690 } 4691 return true; 4692 } 4693 4694 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) 4695 { 4696 static gen_helper_gvec_mem * const fns[2][2][16] = { 4697 { /* mte inactive, little-endian */ 4698 { gen_helper_sve_ldnf1bb_r, 4699 gen_helper_sve_ldnf1bhu_r, 4700 gen_helper_sve_ldnf1bsu_r, 4701 gen_helper_sve_ldnf1bdu_r, 4702 4703 gen_helper_sve_ldnf1sds_le_r, 4704 gen_helper_sve_ldnf1hh_le_r, 4705 gen_helper_sve_ldnf1hsu_le_r, 4706 gen_helper_sve_ldnf1hdu_le_r, 4707 4708 gen_helper_sve_ldnf1hds_le_r, 4709 gen_helper_sve_ldnf1hss_le_r, 4710 gen_helper_sve_ldnf1ss_le_r, 4711 gen_helper_sve_ldnf1sdu_le_r, 4712 4713 gen_helper_sve_ldnf1bds_r, 4714 gen_helper_sve_ldnf1bss_r, 4715 gen_helper_sve_ldnf1bhs_r, 4716 gen_helper_sve_ldnf1dd_le_r }, 4717 4718 /* mte inactive, big-endian */ 4719 { gen_helper_sve_ldnf1bb_r, 4720 gen_helper_sve_ldnf1bhu_r, 4721 gen_helper_sve_ldnf1bsu_r, 4722 gen_helper_sve_ldnf1bdu_r, 4723 4724 gen_helper_sve_ldnf1sds_be_r, 4725 gen_helper_sve_ldnf1hh_be_r, 4726 gen_helper_sve_ldnf1hsu_be_r, 4727 gen_helper_sve_ldnf1hdu_be_r, 4728 4729 gen_helper_sve_ldnf1hds_be_r, 4730 gen_helper_sve_ldnf1hss_be_r, 4731 gen_helper_sve_ldnf1ss_be_r, 4732 gen_helper_sve_ldnf1sdu_be_r, 4733 4734 gen_helper_sve_ldnf1bds_r, 4735 gen_helper_sve_ldnf1bss_r, 4736 gen_helper_sve_ldnf1bhs_r, 4737 gen_helper_sve_ldnf1dd_be_r } }, 4738 4739 { /* mte inactive, little-endian */ 4740 { gen_helper_sve_ldnf1bb_r_mte, 4741 gen_helper_sve_ldnf1bhu_r_mte, 4742 gen_helper_sve_ldnf1bsu_r_mte, 4743 gen_helper_sve_ldnf1bdu_r_mte, 4744 4745 gen_helper_sve_ldnf1sds_le_r_mte, 4746 gen_helper_sve_ldnf1hh_le_r_mte, 4747 gen_helper_sve_ldnf1hsu_le_r_mte, 4748 gen_helper_sve_ldnf1hdu_le_r_mte, 4749 4750 gen_helper_sve_ldnf1hds_le_r_mte, 4751 gen_helper_sve_ldnf1hss_le_r_mte, 4752 gen_helper_sve_ldnf1ss_le_r_mte, 4753 gen_helper_sve_ldnf1sdu_le_r_mte, 4754 4755 gen_helper_sve_ldnf1bds_r_mte, 4756 gen_helper_sve_ldnf1bss_r_mte, 4757 gen_helper_sve_ldnf1bhs_r_mte, 4758 gen_helper_sve_ldnf1dd_le_r_mte }, 4759 4760 /* mte inactive, big-endian */ 4761 { gen_helper_sve_ldnf1bb_r_mte, 4762 gen_helper_sve_ldnf1bhu_r_mte, 4763 gen_helper_sve_ldnf1bsu_r_mte, 4764 gen_helper_sve_ldnf1bdu_r_mte, 4765 4766 gen_helper_sve_ldnf1sds_be_r_mte, 4767 gen_helper_sve_ldnf1hh_be_r_mte, 4768 gen_helper_sve_ldnf1hsu_be_r_mte, 4769 gen_helper_sve_ldnf1hdu_be_r_mte, 4770 4771 gen_helper_sve_ldnf1hds_be_r_mte, 4772 gen_helper_sve_ldnf1hss_be_r_mte, 4773 gen_helper_sve_ldnf1ss_be_r_mte, 4774 gen_helper_sve_ldnf1sdu_be_r_mte, 4775 4776 gen_helper_sve_ldnf1bds_r_mte, 4777 gen_helper_sve_ldnf1bss_r_mte, 4778 gen_helper_sve_ldnf1bhs_r_mte, 4779 gen_helper_sve_ldnf1dd_be_r_mte } }, 4780 }; 4781 4782 if (!dc_isar_feature(aa64_sve, s)) { 4783 return false; 4784 } 4785 s->is_nonstreaming = true; 4786 if (sve_access_check(s)) { 4787 int vsz = vec_full_reg_size(s); 4788 int elements = vsz >> dtype_esz[a->dtype]; 4789 int off = (a->imm * elements) << dtype_msz(a->dtype); 4790 TCGv_i64 addr = tcg_temp_new_i64(); 4791 4792 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off); 4793 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 4794 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 4795 } 4796 return true; 4797 } 4798 4799 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 4800 { 4801 unsigned vsz = vec_full_reg_size(s); 4802 TCGv_ptr t_pg; 4803 int poff; 4804 4805 /* Load the first quadword using the normal predicated load helpers. */ 4806 poff = pred_full_reg_offset(s, pg); 4807 if (vsz > 16) { 4808 /* 4809 * Zero-extend the first 16 bits of the predicate into a temporary. 4810 * This avoids triggering an assert making sure we don't have bits 4811 * set within a predicate beyond VQ, but we have lowered VQ to 1 4812 * for this load operation. 4813 */ 4814 TCGv_i64 tmp = tcg_temp_new_i64(); 4815 #if HOST_BIG_ENDIAN 4816 poff += 6; 4817 #endif 4818 tcg_gen_ld16u_i64(tmp, cpu_env, poff); 4819 4820 poff = offsetof(CPUARMState, vfp.preg_tmp); 4821 tcg_gen_st_i64(tmp, cpu_env, poff); 4822 } 4823 4824 t_pg = tcg_temp_new_ptr(); 4825 tcg_gen_addi_ptr(t_pg, cpu_env, poff); 4826 4827 gen_helper_gvec_mem *fn 4828 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 4829 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt))); 4830 4831 /* Replicate that first quadword. */ 4832 if (vsz > 16) { 4833 int doff = vec_full_reg_offset(s, zt); 4834 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16); 4835 } 4836 } 4837 4838 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a) 4839 { 4840 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 4841 return false; 4842 } 4843 if (sve_access_check(s)) { 4844 int msz = dtype_msz(a->dtype); 4845 TCGv_i64 addr = tcg_temp_new_i64(); 4846 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz); 4847 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4848 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 4849 } 4850 return true; 4851 } 4852 4853 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a) 4854 { 4855 if (!dc_isar_feature(aa64_sve, s)) { 4856 return false; 4857 } 4858 if (sve_access_check(s)) { 4859 TCGv_i64 addr = tcg_temp_new_i64(); 4860 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16); 4861 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 4862 } 4863 return true; 4864 } 4865 4866 static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 4867 { 4868 unsigned vsz = vec_full_reg_size(s); 4869 unsigned vsz_r32; 4870 TCGv_ptr t_pg; 4871 int poff, doff; 4872 4873 if (vsz < 32) { 4874 /* 4875 * Note that this UNDEFINED check comes after CheckSVEEnabled() 4876 * in the ARM pseudocode, which is the sve_access_check() done 4877 * in our caller. We should not now return false from the caller. 4878 */ 4879 unallocated_encoding(s); 4880 return; 4881 } 4882 4883 /* Load the first octaword using the normal predicated load helpers. */ 4884 4885 poff = pred_full_reg_offset(s, pg); 4886 if (vsz > 32) { 4887 /* 4888 * Zero-extend the first 32 bits of the predicate into a temporary. 4889 * This avoids triggering an assert making sure we don't have bits 4890 * set within a predicate beyond VQ, but we have lowered VQ to 2 4891 * for this load operation. 4892 */ 4893 TCGv_i64 tmp = tcg_temp_new_i64(); 4894 #if HOST_BIG_ENDIAN 4895 poff += 4; 4896 #endif 4897 tcg_gen_ld32u_i64(tmp, cpu_env, poff); 4898 4899 poff = offsetof(CPUARMState, vfp.preg_tmp); 4900 tcg_gen_st_i64(tmp, cpu_env, poff); 4901 } 4902 4903 t_pg = tcg_temp_new_ptr(); 4904 tcg_gen_addi_ptr(t_pg, cpu_env, poff); 4905 4906 gen_helper_gvec_mem *fn 4907 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 4908 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt))); 4909 4910 /* 4911 * Replicate that first octaword. 4912 * The replication happens in units of 32; if the full vector size 4913 * is not a multiple of 32, the final bits are zeroed. 4914 */ 4915 doff = vec_full_reg_offset(s, zt); 4916 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32); 4917 if (vsz >= 64) { 4918 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32); 4919 } 4920 vsz -= vsz_r32; 4921 if (vsz) { 4922 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0); 4923 } 4924 } 4925 4926 static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a) 4927 { 4928 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 4929 return false; 4930 } 4931 if (a->rm == 31) { 4932 return false; 4933 } 4934 s->is_nonstreaming = true; 4935 if (sve_access_check(s)) { 4936 TCGv_i64 addr = tcg_temp_new_i64(); 4937 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4938 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4939 do_ldro(s, a->rd, a->pg, addr, a->dtype); 4940 } 4941 return true; 4942 } 4943 4944 static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a) 4945 { 4946 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 4947 return false; 4948 } 4949 s->is_nonstreaming = true; 4950 if (sve_access_check(s)) { 4951 TCGv_i64 addr = tcg_temp_new_i64(); 4952 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32); 4953 do_ldro(s, a->rd, a->pg, addr, a->dtype); 4954 } 4955 return true; 4956 } 4957 4958 /* Load and broadcast element. */ 4959 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) 4960 { 4961 unsigned vsz = vec_full_reg_size(s); 4962 unsigned psz = pred_full_reg_size(s); 4963 unsigned esz = dtype_esz[a->dtype]; 4964 unsigned msz = dtype_msz(a->dtype); 4965 TCGLabel *over; 4966 TCGv_i64 temp, clean_addr; 4967 4968 if (!dc_isar_feature(aa64_sve, s)) { 4969 return false; 4970 } 4971 if (!sve_access_check(s)) { 4972 return true; 4973 } 4974 4975 over = gen_new_label(); 4976 4977 /* If the guarding predicate has no bits set, no load occurs. */ 4978 if (psz <= 8) { 4979 /* Reduce the pred_esz_masks value simply to reduce the 4980 * size of the code generated here. 4981 */ 4982 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8); 4983 temp = tcg_temp_new_i64(); 4984 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg)); 4985 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask); 4986 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over); 4987 } else { 4988 TCGv_i32 t32 = tcg_temp_new_i32(); 4989 find_last_active(s, t32, esz, a->pg); 4990 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over); 4991 } 4992 4993 /* Load the data. */ 4994 temp = tcg_temp_new_i64(); 4995 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz); 4996 clean_addr = gen_mte_check1(s, temp, false, true, msz); 4997 4998 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), 4999 finalize_memop(s, dtype_mop[a->dtype])); 5000 5001 /* Broadcast to *all* elements. */ 5002 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 5003 vsz, vsz, temp); 5004 5005 /* Zero the inactive elements. */ 5006 gen_set_label(over); 5007 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false); 5008 } 5009 5010 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 5011 int msz, int esz, int nreg) 5012 { 5013 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = { 5014 { { { gen_helper_sve_st1bb_r, 5015 gen_helper_sve_st1bh_r, 5016 gen_helper_sve_st1bs_r, 5017 gen_helper_sve_st1bd_r }, 5018 { NULL, 5019 gen_helper_sve_st1hh_le_r, 5020 gen_helper_sve_st1hs_le_r, 5021 gen_helper_sve_st1hd_le_r }, 5022 { NULL, NULL, 5023 gen_helper_sve_st1ss_le_r, 5024 gen_helper_sve_st1sd_le_r }, 5025 { NULL, NULL, NULL, 5026 gen_helper_sve_st1dd_le_r } }, 5027 { { gen_helper_sve_st1bb_r, 5028 gen_helper_sve_st1bh_r, 5029 gen_helper_sve_st1bs_r, 5030 gen_helper_sve_st1bd_r }, 5031 { NULL, 5032 gen_helper_sve_st1hh_be_r, 5033 gen_helper_sve_st1hs_be_r, 5034 gen_helper_sve_st1hd_be_r }, 5035 { NULL, NULL, 5036 gen_helper_sve_st1ss_be_r, 5037 gen_helper_sve_st1sd_be_r }, 5038 { NULL, NULL, NULL, 5039 gen_helper_sve_st1dd_be_r } } }, 5040 5041 { { { gen_helper_sve_st1bb_r_mte, 5042 gen_helper_sve_st1bh_r_mte, 5043 gen_helper_sve_st1bs_r_mte, 5044 gen_helper_sve_st1bd_r_mte }, 5045 { NULL, 5046 gen_helper_sve_st1hh_le_r_mte, 5047 gen_helper_sve_st1hs_le_r_mte, 5048 gen_helper_sve_st1hd_le_r_mte }, 5049 { NULL, NULL, 5050 gen_helper_sve_st1ss_le_r_mte, 5051 gen_helper_sve_st1sd_le_r_mte }, 5052 { NULL, NULL, NULL, 5053 gen_helper_sve_st1dd_le_r_mte } }, 5054 { { gen_helper_sve_st1bb_r_mte, 5055 gen_helper_sve_st1bh_r_mte, 5056 gen_helper_sve_st1bs_r_mte, 5057 gen_helper_sve_st1bd_r_mte }, 5058 { NULL, 5059 gen_helper_sve_st1hh_be_r_mte, 5060 gen_helper_sve_st1hs_be_r_mte, 5061 gen_helper_sve_st1hd_be_r_mte }, 5062 { NULL, NULL, 5063 gen_helper_sve_st1ss_be_r_mte, 5064 gen_helper_sve_st1sd_be_r_mte }, 5065 { NULL, NULL, NULL, 5066 gen_helper_sve_st1dd_be_r_mte } } }, 5067 }; 5068 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = { 5069 { { { gen_helper_sve_st2bb_r, 5070 gen_helper_sve_st2hh_le_r, 5071 gen_helper_sve_st2ss_le_r, 5072 gen_helper_sve_st2dd_le_r }, 5073 { gen_helper_sve_st3bb_r, 5074 gen_helper_sve_st3hh_le_r, 5075 gen_helper_sve_st3ss_le_r, 5076 gen_helper_sve_st3dd_le_r }, 5077 { gen_helper_sve_st4bb_r, 5078 gen_helper_sve_st4hh_le_r, 5079 gen_helper_sve_st4ss_le_r, 5080 gen_helper_sve_st4dd_le_r } }, 5081 { { gen_helper_sve_st2bb_r, 5082 gen_helper_sve_st2hh_be_r, 5083 gen_helper_sve_st2ss_be_r, 5084 gen_helper_sve_st2dd_be_r }, 5085 { gen_helper_sve_st3bb_r, 5086 gen_helper_sve_st3hh_be_r, 5087 gen_helper_sve_st3ss_be_r, 5088 gen_helper_sve_st3dd_be_r }, 5089 { gen_helper_sve_st4bb_r, 5090 gen_helper_sve_st4hh_be_r, 5091 gen_helper_sve_st4ss_be_r, 5092 gen_helper_sve_st4dd_be_r } } }, 5093 { { { gen_helper_sve_st2bb_r_mte, 5094 gen_helper_sve_st2hh_le_r_mte, 5095 gen_helper_sve_st2ss_le_r_mte, 5096 gen_helper_sve_st2dd_le_r_mte }, 5097 { gen_helper_sve_st3bb_r_mte, 5098 gen_helper_sve_st3hh_le_r_mte, 5099 gen_helper_sve_st3ss_le_r_mte, 5100 gen_helper_sve_st3dd_le_r_mte }, 5101 { gen_helper_sve_st4bb_r_mte, 5102 gen_helper_sve_st4hh_le_r_mte, 5103 gen_helper_sve_st4ss_le_r_mte, 5104 gen_helper_sve_st4dd_le_r_mte } }, 5105 { { gen_helper_sve_st2bb_r_mte, 5106 gen_helper_sve_st2hh_be_r_mte, 5107 gen_helper_sve_st2ss_be_r_mte, 5108 gen_helper_sve_st2dd_be_r_mte }, 5109 { gen_helper_sve_st3bb_r_mte, 5110 gen_helper_sve_st3hh_be_r_mte, 5111 gen_helper_sve_st3ss_be_r_mte, 5112 gen_helper_sve_st3dd_be_r_mte }, 5113 { gen_helper_sve_st4bb_r_mte, 5114 gen_helper_sve_st4hh_be_r_mte, 5115 gen_helper_sve_st4ss_be_r_mte, 5116 gen_helper_sve_st4dd_be_r_mte } } }, 5117 }; 5118 gen_helper_gvec_mem *fn; 5119 int be = s->be_data == MO_BE; 5120 5121 if (nreg == 0) { 5122 /* ST1 */ 5123 fn = fn_single[s->mte_active[0]][be][msz][esz]; 5124 nreg = 1; 5125 } else { 5126 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */ 5127 assert(msz == esz); 5128 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz]; 5129 } 5130 assert(fn != NULL); 5131 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn); 5132 } 5133 5134 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) 5135 { 5136 if (!dc_isar_feature(aa64_sve, s)) { 5137 return false; 5138 } 5139 if (a->rm == 31 || a->msz > a->esz) { 5140 return false; 5141 } 5142 if (sve_access_check(s)) { 5143 TCGv_i64 addr = tcg_temp_new_i64(); 5144 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz); 5145 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5146 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5147 } 5148 return true; 5149 } 5150 5151 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a) 5152 { 5153 if (!dc_isar_feature(aa64_sve, s)) { 5154 return false; 5155 } 5156 if (a->msz > a->esz) { 5157 return false; 5158 } 5159 if (sve_access_check(s)) { 5160 int vsz = vec_full_reg_size(s); 5161 int elements = vsz >> a->esz; 5162 TCGv_i64 addr = tcg_temp_new_i64(); 5163 5164 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 5165 (a->imm * elements * (a->nreg + 1)) << a->msz); 5166 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5167 } 5168 return true; 5169 } 5170 5171 /* 5172 *** SVE gather loads / scatter stores 5173 */ 5174 5175 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, 5176 int scale, TCGv_i64 scalar, int msz, bool is_write, 5177 gen_helper_gvec_mem_scatter *fn) 5178 { 5179 unsigned vsz = vec_full_reg_size(s); 5180 TCGv_ptr t_zm = tcg_temp_new_ptr(); 5181 TCGv_ptr t_pg = tcg_temp_new_ptr(); 5182 TCGv_ptr t_zt = tcg_temp_new_ptr(); 5183 int desc = 0; 5184 5185 if (s->mte_active[0]) { 5186 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 5187 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 5188 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 5189 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 5190 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1); 5191 desc <<= SVE_MTEDESC_SHIFT; 5192 } 5193 desc = simd_desc(vsz, vsz, desc | scale); 5194 5195 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 5196 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm)); 5197 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt)); 5198 fn(cpu_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc)); 5199 } 5200 5201 /* Indexed by [mte][be][ff][xs][u][msz]. */ 5202 static gen_helper_gvec_mem_scatter * const 5203 gather_load_fn32[2][2][2][2][2][3] = { 5204 { /* MTE Inactive */ 5205 { /* Little-endian */ 5206 { { { gen_helper_sve_ldbss_zsu, 5207 gen_helper_sve_ldhss_le_zsu, 5208 NULL, }, 5209 { gen_helper_sve_ldbsu_zsu, 5210 gen_helper_sve_ldhsu_le_zsu, 5211 gen_helper_sve_ldss_le_zsu, } }, 5212 { { gen_helper_sve_ldbss_zss, 5213 gen_helper_sve_ldhss_le_zss, 5214 NULL, }, 5215 { gen_helper_sve_ldbsu_zss, 5216 gen_helper_sve_ldhsu_le_zss, 5217 gen_helper_sve_ldss_le_zss, } } }, 5218 5219 /* First-fault */ 5220 { { { gen_helper_sve_ldffbss_zsu, 5221 gen_helper_sve_ldffhss_le_zsu, 5222 NULL, }, 5223 { gen_helper_sve_ldffbsu_zsu, 5224 gen_helper_sve_ldffhsu_le_zsu, 5225 gen_helper_sve_ldffss_le_zsu, } }, 5226 { { gen_helper_sve_ldffbss_zss, 5227 gen_helper_sve_ldffhss_le_zss, 5228 NULL, }, 5229 { gen_helper_sve_ldffbsu_zss, 5230 gen_helper_sve_ldffhsu_le_zss, 5231 gen_helper_sve_ldffss_le_zss, } } } }, 5232 5233 { /* Big-endian */ 5234 { { { gen_helper_sve_ldbss_zsu, 5235 gen_helper_sve_ldhss_be_zsu, 5236 NULL, }, 5237 { gen_helper_sve_ldbsu_zsu, 5238 gen_helper_sve_ldhsu_be_zsu, 5239 gen_helper_sve_ldss_be_zsu, } }, 5240 { { gen_helper_sve_ldbss_zss, 5241 gen_helper_sve_ldhss_be_zss, 5242 NULL, }, 5243 { gen_helper_sve_ldbsu_zss, 5244 gen_helper_sve_ldhsu_be_zss, 5245 gen_helper_sve_ldss_be_zss, } } }, 5246 5247 /* First-fault */ 5248 { { { gen_helper_sve_ldffbss_zsu, 5249 gen_helper_sve_ldffhss_be_zsu, 5250 NULL, }, 5251 { gen_helper_sve_ldffbsu_zsu, 5252 gen_helper_sve_ldffhsu_be_zsu, 5253 gen_helper_sve_ldffss_be_zsu, } }, 5254 { { gen_helper_sve_ldffbss_zss, 5255 gen_helper_sve_ldffhss_be_zss, 5256 NULL, }, 5257 { gen_helper_sve_ldffbsu_zss, 5258 gen_helper_sve_ldffhsu_be_zss, 5259 gen_helper_sve_ldffss_be_zss, } } } } }, 5260 { /* MTE Active */ 5261 { /* Little-endian */ 5262 { { { gen_helper_sve_ldbss_zsu_mte, 5263 gen_helper_sve_ldhss_le_zsu_mte, 5264 NULL, }, 5265 { gen_helper_sve_ldbsu_zsu_mte, 5266 gen_helper_sve_ldhsu_le_zsu_mte, 5267 gen_helper_sve_ldss_le_zsu_mte, } }, 5268 { { gen_helper_sve_ldbss_zss_mte, 5269 gen_helper_sve_ldhss_le_zss_mte, 5270 NULL, }, 5271 { gen_helper_sve_ldbsu_zss_mte, 5272 gen_helper_sve_ldhsu_le_zss_mte, 5273 gen_helper_sve_ldss_le_zss_mte, } } }, 5274 5275 /* First-fault */ 5276 { { { gen_helper_sve_ldffbss_zsu_mte, 5277 gen_helper_sve_ldffhss_le_zsu_mte, 5278 NULL, }, 5279 { gen_helper_sve_ldffbsu_zsu_mte, 5280 gen_helper_sve_ldffhsu_le_zsu_mte, 5281 gen_helper_sve_ldffss_le_zsu_mte, } }, 5282 { { gen_helper_sve_ldffbss_zss_mte, 5283 gen_helper_sve_ldffhss_le_zss_mte, 5284 NULL, }, 5285 { gen_helper_sve_ldffbsu_zss_mte, 5286 gen_helper_sve_ldffhsu_le_zss_mte, 5287 gen_helper_sve_ldffss_le_zss_mte, } } } }, 5288 5289 { /* Big-endian */ 5290 { { { gen_helper_sve_ldbss_zsu_mte, 5291 gen_helper_sve_ldhss_be_zsu_mte, 5292 NULL, }, 5293 { gen_helper_sve_ldbsu_zsu_mte, 5294 gen_helper_sve_ldhsu_be_zsu_mte, 5295 gen_helper_sve_ldss_be_zsu_mte, } }, 5296 { { gen_helper_sve_ldbss_zss_mte, 5297 gen_helper_sve_ldhss_be_zss_mte, 5298 NULL, }, 5299 { gen_helper_sve_ldbsu_zss_mte, 5300 gen_helper_sve_ldhsu_be_zss_mte, 5301 gen_helper_sve_ldss_be_zss_mte, } } }, 5302 5303 /* First-fault */ 5304 { { { gen_helper_sve_ldffbss_zsu_mte, 5305 gen_helper_sve_ldffhss_be_zsu_mte, 5306 NULL, }, 5307 { gen_helper_sve_ldffbsu_zsu_mte, 5308 gen_helper_sve_ldffhsu_be_zsu_mte, 5309 gen_helper_sve_ldffss_be_zsu_mte, } }, 5310 { { gen_helper_sve_ldffbss_zss_mte, 5311 gen_helper_sve_ldffhss_be_zss_mte, 5312 NULL, }, 5313 { gen_helper_sve_ldffbsu_zss_mte, 5314 gen_helper_sve_ldffhsu_be_zss_mte, 5315 gen_helper_sve_ldffss_be_zss_mte, } } } } }, 5316 }; 5317 5318 /* Note that we overload xs=2 to indicate 64-bit offset. */ 5319 static gen_helper_gvec_mem_scatter * const 5320 gather_load_fn64[2][2][2][3][2][4] = { 5321 { /* MTE Inactive */ 5322 { /* Little-endian */ 5323 { { { gen_helper_sve_ldbds_zsu, 5324 gen_helper_sve_ldhds_le_zsu, 5325 gen_helper_sve_ldsds_le_zsu, 5326 NULL, }, 5327 { gen_helper_sve_ldbdu_zsu, 5328 gen_helper_sve_ldhdu_le_zsu, 5329 gen_helper_sve_ldsdu_le_zsu, 5330 gen_helper_sve_lddd_le_zsu, } }, 5331 { { gen_helper_sve_ldbds_zss, 5332 gen_helper_sve_ldhds_le_zss, 5333 gen_helper_sve_ldsds_le_zss, 5334 NULL, }, 5335 { gen_helper_sve_ldbdu_zss, 5336 gen_helper_sve_ldhdu_le_zss, 5337 gen_helper_sve_ldsdu_le_zss, 5338 gen_helper_sve_lddd_le_zss, } }, 5339 { { gen_helper_sve_ldbds_zd, 5340 gen_helper_sve_ldhds_le_zd, 5341 gen_helper_sve_ldsds_le_zd, 5342 NULL, }, 5343 { gen_helper_sve_ldbdu_zd, 5344 gen_helper_sve_ldhdu_le_zd, 5345 gen_helper_sve_ldsdu_le_zd, 5346 gen_helper_sve_lddd_le_zd, } } }, 5347 5348 /* First-fault */ 5349 { { { gen_helper_sve_ldffbds_zsu, 5350 gen_helper_sve_ldffhds_le_zsu, 5351 gen_helper_sve_ldffsds_le_zsu, 5352 NULL, }, 5353 { gen_helper_sve_ldffbdu_zsu, 5354 gen_helper_sve_ldffhdu_le_zsu, 5355 gen_helper_sve_ldffsdu_le_zsu, 5356 gen_helper_sve_ldffdd_le_zsu, } }, 5357 { { gen_helper_sve_ldffbds_zss, 5358 gen_helper_sve_ldffhds_le_zss, 5359 gen_helper_sve_ldffsds_le_zss, 5360 NULL, }, 5361 { gen_helper_sve_ldffbdu_zss, 5362 gen_helper_sve_ldffhdu_le_zss, 5363 gen_helper_sve_ldffsdu_le_zss, 5364 gen_helper_sve_ldffdd_le_zss, } }, 5365 { { gen_helper_sve_ldffbds_zd, 5366 gen_helper_sve_ldffhds_le_zd, 5367 gen_helper_sve_ldffsds_le_zd, 5368 NULL, }, 5369 { gen_helper_sve_ldffbdu_zd, 5370 gen_helper_sve_ldffhdu_le_zd, 5371 gen_helper_sve_ldffsdu_le_zd, 5372 gen_helper_sve_ldffdd_le_zd, } } } }, 5373 { /* Big-endian */ 5374 { { { gen_helper_sve_ldbds_zsu, 5375 gen_helper_sve_ldhds_be_zsu, 5376 gen_helper_sve_ldsds_be_zsu, 5377 NULL, }, 5378 { gen_helper_sve_ldbdu_zsu, 5379 gen_helper_sve_ldhdu_be_zsu, 5380 gen_helper_sve_ldsdu_be_zsu, 5381 gen_helper_sve_lddd_be_zsu, } }, 5382 { { gen_helper_sve_ldbds_zss, 5383 gen_helper_sve_ldhds_be_zss, 5384 gen_helper_sve_ldsds_be_zss, 5385 NULL, }, 5386 { gen_helper_sve_ldbdu_zss, 5387 gen_helper_sve_ldhdu_be_zss, 5388 gen_helper_sve_ldsdu_be_zss, 5389 gen_helper_sve_lddd_be_zss, } }, 5390 { { gen_helper_sve_ldbds_zd, 5391 gen_helper_sve_ldhds_be_zd, 5392 gen_helper_sve_ldsds_be_zd, 5393 NULL, }, 5394 { gen_helper_sve_ldbdu_zd, 5395 gen_helper_sve_ldhdu_be_zd, 5396 gen_helper_sve_ldsdu_be_zd, 5397 gen_helper_sve_lddd_be_zd, } } }, 5398 5399 /* First-fault */ 5400 { { { gen_helper_sve_ldffbds_zsu, 5401 gen_helper_sve_ldffhds_be_zsu, 5402 gen_helper_sve_ldffsds_be_zsu, 5403 NULL, }, 5404 { gen_helper_sve_ldffbdu_zsu, 5405 gen_helper_sve_ldffhdu_be_zsu, 5406 gen_helper_sve_ldffsdu_be_zsu, 5407 gen_helper_sve_ldffdd_be_zsu, } }, 5408 { { gen_helper_sve_ldffbds_zss, 5409 gen_helper_sve_ldffhds_be_zss, 5410 gen_helper_sve_ldffsds_be_zss, 5411 NULL, }, 5412 { gen_helper_sve_ldffbdu_zss, 5413 gen_helper_sve_ldffhdu_be_zss, 5414 gen_helper_sve_ldffsdu_be_zss, 5415 gen_helper_sve_ldffdd_be_zss, } }, 5416 { { gen_helper_sve_ldffbds_zd, 5417 gen_helper_sve_ldffhds_be_zd, 5418 gen_helper_sve_ldffsds_be_zd, 5419 NULL, }, 5420 { gen_helper_sve_ldffbdu_zd, 5421 gen_helper_sve_ldffhdu_be_zd, 5422 gen_helper_sve_ldffsdu_be_zd, 5423 gen_helper_sve_ldffdd_be_zd, } } } } }, 5424 { /* MTE Active */ 5425 { /* Little-endian */ 5426 { { { gen_helper_sve_ldbds_zsu_mte, 5427 gen_helper_sve_ldhds_le_zsu_mte, 5428 gen_helper_sve_ldsds_le_zsu_mte, 5429 NULL, }, 5430 { gen_helper_sve_ldbdu_zsu_mte, 5431 gen_helper_sve_ldhdu_le_zsu_mte, 5432 gen_helper_sve_ldsdu_le_zsu_mte, 5433 gen_helper_sve_lddd_le_zsu_mte, } }, 5434 { { gen_helper_sve_ldbds_zss_mte, 5435 gen_helper_sve_ldhds_le_zss_mte, 5436 gen_helper_sve_ldsds_le_zss_mte, 5437 NULL, }, 5438 { gen_helper_sve_ldbdu_zss_mte, 5439 gen_helper_sve_ldhdu_le_zss_mte, 5440 gen_helper_sve_ldsdu_le_zss_mte, 5441 gen_helper_sve_lddd_le_zss_mte, } }, 5442 { { gen_helper_sve_ldbds_zd_mte, 5443 gen_helper_sve_ldhds_le_zd_mte, 5444 gen_helper_sve_ldsds_le_zd_mte, 5445 NULL, }, 5446 { gen_helper_sve_ldbdu_zd_mte, 5447 gen_helper_sve_ldhdu_le_zd_mte, 5448 gen_helper_sve_ldsdu_le_zd_mte, 5449 gen_helper_sve_lddd_le_zd_mte, } } }, 5450 5451 /* First-fault */ 5452 { { { gen_helper_sve_ldffbds_zsu_mte, 5453 gen_helper_sve_ldffhds_le_zsu_mte, 5454 gen_helper_sve_ldffsds_le_zsu_mte, 5455 NULL, }, 5456 { gen_helper_sve_ldffbdu_zsu_mte, 5457 gen_helper_sve_ldffhdu_le_zsu_mte, 5458 gen_helper_sve_ldffsdu_le_zsu_mte, 5459 gen_helper_sve_ldffdd_le_zsu_mte, } }, 5460 { { gen_helper_sve_ldffbds_zss_mte, 5461 gen_helper_sve_ldffhds_le_zss_mte, 5462 gen_helper_sve_ldffsds_le_zss_mte, 5463 NULL, }, 5464 { gen_helper_sve_ldffbdu_zss_mte, 5465 gen_helper_sve_ldffhdu_le_zss_mte, 5466 gen_helper_sve_ldffsdu_le_zss_mte, 5467 gen_helper_sve_ldffdd_le_zss_mte, } }, 5468 { { gen_helper_sve_ldffbds_zd_mte, 5469 gen_helper_sve_ldffhds_le_zd_mte, 5470 gen_helper_sve_ldffsds_le_zd_mte, 5471 NULL, }, 5472 { gen_helper_sve_ldffbdu_zd_mte, 5473 gen_helper_sve_ldffhdu_le_zd_mte, 5474 gen_helper_sve_ldffsdu_le_zd_mte, 5475 gen_helper_sve_ldffdd_le_zd_mte, } } } }, 5476 { /* Big-endian */ 5477 { { { gen_helper_sve_ldbds_zsu_mte, 5478 gen_helper_sve_ldhds_be_zsu_mte, 5479 gen_helper_sve_ldsds_be_zsu_mte, 5480 NULL, }, 5481 { gen_helper_sve_ldbdu_zsu_mte, 5482 gen_helper_sve_ldhdu_be_zsu_mte, 5483 gen_helper_sve_ldsdu_be_zsu_mte, 5484 gen_helper_sve_lddd_be_zsu_mte, } }, 5485 { { gen_helper_sve_ldbds_zss_mte, 5486 gen_helper_sve_ldhds_be_zss_mte, 5487 gen_helper_sve_ldsds_be_zss_mte, 5488 NULL, }, 5489 { gen_helper_sve_ldbdu_zss_mte, 5490 gen_helper_sve_ldhdu_be_zss_mte, 5491 gen_helper_sve_ldsdu_be_zss_mte, 5492 gen_helper_sve_lddd_be_zss_mte, } }, 5493 { { gen_helper_sve_ldbds_zd_mte, 5494 gen_helper_sve_ldhds_be_zd_mte, 5495 gen_helper_sve_ldsds_be_zd_mte, 5496 NULL, }, 5497 { gen_helper_sve_ldbdu_zd_mte, 5498 gen_helper_sve_ldhdu_be_zd_mte, 5499 gen_helper_sve_ldsdu_be_zd_mte, 5500 gen_helper_sve_lddd_be_zd_mte, } } }, 5501 5502 /* First-fault */ 5503 { { { gen_helper_sve_ldffbds_zsu_mte, 5504 gen_helper_sve_ldffhds_be_zsu_mte, 5505 gen_helper_sve_ldffsds_be_zsu_mte, 5506 NULL, }, 5507 { gen_helper_sve_ldffbdu_zsu_mte, 5508 gen_helper_sve_ldffhdu_be_zsu_mte, 5509 gen_helper_sve_ldffsdu_be_zsu_mte, 5510 gen_helper_sve_ldffdd_be_zsu_mte, } }, 5511 { { gen_helper_sve_ldffbds_zss_mte, 5512 gen_helper_sve_ldffhds_be_zss_mte, 5513 gen_helper_sve_ldffsds_be_zss_mte, 5514 NULL, }, 5515 { gen_helper_sve_ldffbdu_zss_mte, 5516 gen_helper_sve_ldffhdu_be_zss_mte, 5517 gen_helper_sve_ldffsdu_be_zss_mte, 5518 gen_helper_sve_ldffdd_be_zss_mte, } }, 5519 { { gen_helper_sve_ldffbds_zd_mte, 5520 gen_helper_sve_ldffhds_be_zd_mte, 5521 gen_helper_sve_ldffsds_be_zd_mte, 5522 NULL, }, 5523 { gen_helper_sve_ldffbdu_zd_mte, 5524 gen_helper_sve_ldffhdu_be_zd_mte, 5525 gen_helper_sve_ldffsdu_be_zd_mte, 5526 gen_helper_sve_ldffdd_be_zd_mte, } } } } }, 5527 }; 5528 5529 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) 5530 { 5531 gen_helper_gvec_mem_scatter *fn = NULL; 5532 bool be = s->be_data == MO_BE; 5533 bool mte = s->mte_active[0]; 5534 5535 if (!dc_isar_feature(aa64_sve, s)) { 5536 return false; 5537 } 5538 s->is_nonstreaming = true; 5539 if (!sve_access_check(s)) { 5540 return true; 5541 } 5542 5543 switch (a->esz) { 5544 case MO_32: 5545 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz]; 5546 break; 5547 case MO_64: 5548 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz]; 5549 break; 5550 } 5551 assert(fn != NULL); 5552 5553 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 5554 cpu_reg_sp(s, a->rn), a->msz, false, fn); 5555 return true; 5556 } 5557 5558 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) 5559 { 5560 gen_helper_gvec_mem_scatter *fn = NULL; 5561 bool be = s->be_data == MO_BE; 5562 bool mte = s->mte_active[0]; 5563 5564 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) { 5565 return false; 5566 } 5567 if (!dc_isar_feature(aa64_sve, s)) { 5568 return false; 5569 } 5570 s->is_nonstreaming = true; 5571 if (!sve_access_check(s)) { 5572 return true; 5573 } 5574 5575 switch (a->esz) { 5576 case MO_32: 5577 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz]; 5578 break; 5579 case MO_64: 5580 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz]; 5581 break; 5582 } 5583 assert(fn != NULL); 5584 5585 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x]) 5586 * by loading the immediate into the scalar parameter. 5587 */ 5588 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5589 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn); 5590 return true; 5591 } 5592 5593 static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a) 5594 { 5595 gen_helper_gvec_mem_scatter *fn = NULL; 5596 bool be = s->be_data == MO_BE; 5597 bool mte = s->mte_active[0]; 5598 5599 if (a->esz < a->msz + !a->u) { 5600 return false; 5601 } 5602 if (!dc_isar_feature(aa64_sve2, s)) { 5603 return false; 5604 } 5605 s->is_nonstreaming = true; 5606 if (!sve_access_check(s)) { 5607 return true; 5608 } 5609 5610 switch (a->esz) { 5611 case MO_32: 5612 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz]; 5613 break; 5614 case MO_64: 5615 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz]; 5616 break; 5617 } 5618 assert(fn != NULL); 5619 5620 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5621 cpu_reg(s, a->rm), a->msz, false, fn); 5622 return true; 5623 } 5624 5625 /* Indexed by [mte][be][xs][msz]. */ 5626 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = { 5627 { /* MTE Inactive */ 5628 { /* Little-endian */ 5629 { gen_helper_sve_stbs_zsu, 5630 gen_helper_sve_sths_le_zsu, 5631 gen_helper_sve_stss_le_zsu, }, 5632 { gen_helper_sve_stbs_zss, 5633 gen_helper_sve_sths_le_zss, 5634 gen_helper_sve_stss_le_zss, } }, 5635 { /* Big-endian */ 5636 { gen_helper_sve_stbs_zsu, 5637 gen_helper_sve_sths_be_zsu, 5638 gen_helper_sve_stss_be_zsu, }, 5639 { gen_helper_sve_stbs_zss, 5640 gen_helper_sve_sths_be_zss, 5641 gen_helper_sve_stss_be_zss, } } }, 5642 { /* MTE Active */ 5643 { /* Little-endian */ 5644 { gen_helper_sve_stbs_zsu_mte, 5645 gen_helper_sve_sths_le_zsu_mte, 5646 gen_helper_sve_stss_le_zsu_mte, }, 5647 { gen_helper_sve_stbs_zss_mte, 5648 gen_helper_sve_sths_le_zss_mte, 5649 gen_helper_sve_stss_le_zss_mte, } }, 5650 { /* Big-endian */ 5651 { gen_helper_sve_stbs_zsu_mte, 5652 gen_helper_sve_sths_be_zsu_mte, 5653 gen_helper_sve_stss_be_zsu_mte, }, 5654 { gen_helper_sve_stbs_zss_mte, 5655 gen_helper_sve_sths_be_zss_mte, 5656 gen_helper_sve_stss_be_zss_mte, } } }, 5657 }; 5658 5659 /* Note that we overload xs=2 to indicate 64-bit offset. */ 5660 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = { 5661 { /* MTE Inactive */ 5662 { /* Little-endian */ 5663 { gen_helper_sve_stbd_zsu, 5664 gen_helper_sve_sthd_le_zsu, 5665 gen_helper_sve_stsd_le_zsu, 5666 gen_helper_sve_stdd_le_zsu, }, 5667 { gen_helper_sve_stbd_zss, 5668 gen_helper_sve_sthd_le_zss, 5669 gen_helper_sve_stsd_le_zss, 5670 gen_helper_sve_stdd_le_zss, }, 5671 { gen_helper_sve_stbd_zd, 5672 gen_helper_sve_sthd_le_zd, 5673 gen_helper_sve_stsd_le_zd, 5674 gen_helper_sve_stdd_le_zd, } }, 5675 { /* Big-endian */ 5676 { gen_helper_sve_stbd_zsu, 5677 gen_helper_sve_sthd_be_zsu, 5678 gen_helper_sve_stsd_be_zsu, 5679 gen_helper_sve_stdd_be_zsu, }, 5680 { gen_helper_sve_stbd_zss, 5681 gen_helper_sve_sthd_be_zss, 5682 gen_helper_sve_stsd_be_zss, 5683 gen_helper_sve_stdd_be_zss, }, 5684 { gen_helper_sve_stbd_zd, 5685 gen_helper_sve_sthd_be_zd, 5686 gen_helper_sve_stsd_be_zd, 5687 gen_helper_sve_stdd_be_zd, } } }, 5688 { /* MTE Inactive */ 5689 { /* Little-endian */ 5690 { gen_helper_sve_stbd_zsu_mte, 5691 gen_helper_sve_sthd_le_zsu_mte, 5692 gen_helper_sve_stsd_le_zsu_mte, 5693 gen_helper_sve_stdd_le_zsu_mte, }, 5694 { gen_helper_sve_stbd_zss_mte, 5695 gen_helper_sve_sthd_le_zss_mte, 5696 gen_helper_sve_stsd_le_zss_mte, 5697 gen_helper_sve_stdd_le_zss_mte, }, 5698 { gen_helper_sve_stbd_zd_mte, 5699 gen_helper_sve_sthd_le_zd_mte, 5700 gen_helper_sve_stsd_le_zd_mte, 5701 gen_helper_sve_stdd_le_zd_mte, } }, 5702 { /* Big-endian */ 5703 { gen_helper_sve_stbd_zsu_mte, 5704 gen_helper_sve_sthd_be_zsu_mte, 5705 gen_helper_sve_stsd_be_zsu_mte, 5706 gen_helper_sve_stdd_be_zsu_mte, }, 5707 { gen_helper_sve_stbd_zss_mte, 5708 gen_helper_sve_sthd_be_zss_mte, 5709 gen_helper_sve_stsd_be_zss_mte, 5710 gen_helper_sve_stdd_be_zss_mte, }, 5711 { gen_helper_sve_stbd_zd_mte, 5712 gen_helper_sve_sthd_be_zd_mte, 5713 gen_helper_sve_stsd_be_zd_mte, 5714 gen_helper_sve_stdd_be_zd_mte, } } }, 5715 }; 5716 5717 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) 5718 { 5719 gen_helper_gvec_mem_scatter *fn; 5720 bool be = s->be_data == MO_BE; 5721 bool mte = s->mte_active[0]; 5722 5723 if (a->esz < a->msz || (a->msz == 0 && a->scale)) { 5724 return false; 5725 } 5726 if (!dc_isar_feature(aa64_sve, s)) { 5727 return false; 5728 } 5729 s->is_nonstreaming = true; 5730 if (!sve_access_check(s)) { 5731 return true; 5732 } 5733 switch (a->esz) { 5734 case MO_32: 5735 fn = scatter_store_fn32[mte][be][a->xs][a->msz]; 5736 break; 5737 case MO_64: 5738 fn = scatter_store_fn64[mte][be][a->xs][a->msz]; 5739 break; 5740 default: 5741 g_assert_not_reached(); 5742 } 5743 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 5744 cpu_reg_sp(s, a->rn), a->msz, true, fn); 5745 return true; 5746 } 5747 5748 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) 5749 { 5750 gen_helper_gvec_mem_scatter *fn = NULL; 5751 bool be = s->be_data == MO_BE; 5752 bool mte = s->mte_active[0]; 5753 5754 if (a->esz < a->msz) { 5755 return false; 5756 } 5757 if (!dc_isar_feature(aa64_sve, s)) { 5758 return false; 5759 } 5760 s->is_nonstreaming = true; 5761 if (!sve_access_check(s)) { 5762 return true; 5763 } 5764 5765 switch (a->esz) { 5766 case MO_32: 5767 fn = scatter_store_fn32[mte][be][0][a->msz]; 5768 break; 5769 case MO_64: 5770 fn = scatter_store_fn64[mte][be][2][a->msz]; 5771 break; 5772 } 5773 assert(fn != NULL); 5774 5775 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x]) 5776 * by loading the immediate into the scalar parameter. 5777 */ 5778 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5779 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn); 5780 return true; 5781 } 5782 5783 static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a) 5784 { 5785 gen_helper_gvec_mem_scatter *fn; 5786 bool be = s->be_data == MO_BE; 5787 bool mte = s->mte_active[0]; 5788 5789 if (a->esz < a->msz) { 5790 return false; 5791 } 5792 if (!dc_isar_feature(aa64_sve2, s)) { 5793 return false; 5794 } 5795 s->is_nonstreaming = true; 5796 if (!sve_access_check(s)) { 5797 return true; 5798 } 5799 5800 switch (a->esz) { 5801 case MO_32: 5802 fn = scatter_store_fn32[mte][be][0][a->msz]; 5803 break; 5804 case MO_64: 5805 fn = scatter_store_fn64[mte][be][2][a->msz]; 5806 break; 5807 default: 5808 g_assert_not_reached(); 5809 } 5810 5811 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5812 cpu_reg(s, a->rm), a->msz, true, fn); 5813 return true; 5814 } 5815 5816 /* 5817 * Prefetches 5818 */ 5819 5820 static bool trans_PRF(DisasContext *s, arg_PRF *a) 5821 { 5822 if (!dc_isar_feature(aa64_sve, s)) { 5823 return false; 5824 } 5825 /* Prefetch is a nop within QEMU. */ 5826 (void)sve_access_check(s); 5827 return true; 5828 } 5829 5830 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a) 5831 { 5832 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 5833 return false; 5834 } 5835 /* Prefetch is a nop within QEMU. */ 5836 (void)sve_access_check(s); 5837 return true; 5838 } 5839 5840 static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a) 5841 { 5842 if (!dc_isar_feature(aa64_sve, s)) { 5843 return false; 5844 } 5845 /* Prefetch is a nop within QEMU. */ 5846 s->is_nonstreaming = true; 5847 (void)sve_access_check(s); 5848 return true; 5849 } 5850 5851 /* 5852 * Move Prefix 5853 * 5854 * TODO: The implementation so far could handle predicated merging movprfx. 5855 * The helper functions as written take an extra source register to 5856 * use in the operation, but the result is only written when predication 5857 * succeeds. For unpredicated movprfx, we need to rearrange the helpers 5858 * to allow the final write back to the destination to be unconditional. 5859 * For predicated zeroing movprfx, we need to rearrange the helpers to 5860 * allow the final write back to zero inactives. 5861 * 5862 * In the meantime, just emit the moves. 5863 */ 5864 5865 TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn) 5866 TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz) 5867 TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false) 5868 5869 /* 5870 * SVE2 Integer Multiply - Unpredicated 5871 */ 5872 5873 TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a) 5874 5875 static gen_helper_gvec_3 * const smulh_zzz_fns[4] = { 5876 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h, 5877 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d, 5878 }; 5879 TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5880 smulh_zzz_fns[a->esz], a, 0) 5881 5882 static gen_helper_gvec_3 * const umulh_zzz_fns[4] = { 5883 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h, 5884 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d, 5885 }; 5886 TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5887 umulh_zzz_fns[a->esz], a, 0) 5888 5889 TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5890 gen_helper_gvec_pmul_b, a, 0) 5891 5892 static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = { 5893 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h, 5894 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d, 5895 }; 5896 TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5897 sqdmulh_zzz_fns[a->esz], a, 0) 5898 5899 static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = { 5900 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h, 5901 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d, 5902 }; 5903 TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5904 sqrdmulh_zzz_fns[a->esz], a, 0) 5905 5906 /* 5907 * SVE2 Integer - Predicated 5908 */ 5909 5910 static gen_helper_gvec_4 * const sadlp_fns[4] = { 5911 NULL, gen_helper_sve2_sadalp_zpzz_h, 5912 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d, 5913 }; 5914 TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 5915 sadlp_fns[a->esz], a, 0) 5916 5917 static gen_helper_gvec_4 * const uadlp_fns[4] = { 5918 NULL, gen_helper_sve2_uadalp_zpzz_h, 5919 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d, 5920 }; 5921 TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 5922 uadlp_fns[a->esz], a, 0) 5923 5924 /* 5925 * SVE2 integer unary operations (predicated) 5926 */ 5927 5928 TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz, 5929 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0) 5930 5931 TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz, 5932 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0) 5933 5934 static gen_helper_gvec_3 * const sqabs_fns[4] = { 5935 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h, 5936 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d, 5937 }; 5938 TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0) 5939 5940 static gen_helper_gvec_3 * const sqneg_fns[4] = { 5941 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h, 5942 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d, 5943 }; 5944 TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0) 5945 5946 DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl) 5947 DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl) 5948 DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl) 5949 5950 DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl) 5951 DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl) 5952 DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl) 5953 5954 DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd) 5955 DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd) 5956 DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub) 5957 5958 DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd) 5959 DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd) 5960 DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub) 5961 5962 DO_ZPZZ(ADDP, aa64_sve2, sve2_addp) 5963 DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp) 5964 DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp) 5965 DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp) 5966 DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp) 5967 5968 DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd) 5969 DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd) 5970 DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub) 5971 DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub) 5972 DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd) 5973 DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd) 5974 5975 /* 5976 * SVE2 Widening Integer Arithmetic 5977 */ 5978 5979 static gen_helper_gvec_3 * const saddl_fns[4] = { 5980 NULL, gen_helper_sve2_saddl_h, 5981 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d, 5982 }; 5983 TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 5984 saddl_fns[a->esz], a, 0) 5985 TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 5986 saddl_fns[a->esz], a, 3) 5987 TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 5988 saddl_fns[a->esz], a, 2) 5989 5990 static gen_helper_gvec_3 * const ssubl_fns[4] = { 5991 NULL, gen_helper_sve2_ssubl_h, 5992 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d, 5993 }; 5994 TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 5995 ssubl_fns[a->esz], a, 0) 5996 TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 5997 ssubl_fns[a->esz], a, 3) 5998 TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 5999 ssubl_fns[a->esz], a, 2) 6000 TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz, 6001 ssubl_fns[a->esz], a, 1) 6002 6003 static gen_helper_gvec_3 * const sabdl_fns[4] = { 6004 NULL, gen_helper_sve2_sabdl_h, 6005 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d, 6006 }; 6007 TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6008 sabdl_fns[a->esz], a, 0) 6009 TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6010 sabdl_fns[a->esz], a, 3) 6011 6012 static gen_helper_gvec_3 * const uaddl_fns[4] = { 6013 NULL, gen_helper_sve2_uaddl_h, 6014 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d, 6015 }; 6016 TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6017 uaddl_fns[a->esz], a, 0) 6018 TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6019 uaddl_fns[a->esz], a, 3) 6020 6021 static gen_helper_gvec_3 * const usubl_fns[4] = { 6022 NULL, gen_helper_sve2_usubl_h, 6023 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d, 6024 }; 6025 TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6026 usubl_fns[a->esz], a, 0) 6027 TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6028 usubl_fns[a->esz], a, 3) 6029 6030 static gen_helper_gvec_3 * const uabdl_fns[4] = { 6031 NULL, gen_helper_sve2_uabdl_h, 6032 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d, 6033 }; 6034 TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6035 uabdl_fns[a->esz], a, 0) 6036 TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6037 uabdl_fns[a->esz], a, 3) 6038 6039 static gen_helper_gvec_3 * const sqdmull_fns[4] = { 6040 NULL, gen_helper_sve2_sqdmull_zzz_h, 6041 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d, 6042 }; 6043 TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6044 sqdmull_fns[a->esz], a, 0) 6045 TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6046 sqdmull_fns[a->esz], a, 3) 6047 6048 static gen_helper_gvec_3 * const smull_fns[4] = { 6049 NULL, gen_helper_sve2_smull_zzz_h, 6050 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d, 6051 }; 6052 TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6053 smull_fns[a->esz], a, 0) 6054 TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6055 smull_fns[a->esz], a, 3) 6056 6057 static gen_helper_gvec_3 * const umull_fns[4] = { 6058 NULL, gen_helper_sve2_umull_zzz_h, 6059 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d, 6060 }; 6061 TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6062 umull_fns[a->esz], a, 0) 6063 TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6064 umull_fns[a->esz], a, 3) 6065 6066 static gen_helper_gvec_3 * const eoril_fns[4] = { 6067 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h, 6068 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d, 6069 }; 6070 TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2) 6071 TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1) 6072 6073 static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel) 6074 { 6075 static gen_helper_gvec_3 * const fns[4] = { 6076 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h, 6077 NULL, gen_helper_sve2_pmull_d, 6078 }; 6079 6080 if (a->esz == 0) { 6081 if (!dc_isar_feature(aa64_sve2_pmull128, s)) { 6082 return false; 6083 } 6084 s->is_nonstreaming = true; 6085 } else if (!dc_isar_feature(aa64_sve, s)) { 6086 return false; 6087 } 6088 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel); 6089 } 6090 6091 TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false) 6092 TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true) 6093 6094 static gen_helper_gvec_3 * const saddw_fns[4] = { 6095 NULL, gen_helper_sve2_saddw_h, 6096 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d, 6097 }; 6098 TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0) 6099 TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1) 6100 6101 static gen_helper_gvec_3 * const ssubw_fns[4] = { 6102 NULL, gen_helper_sve2_ssubw_h, 6103 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d, 6104 }; 6105 TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0) 6106 TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1) 6107 6108 static gen_helper_gvec_3 * const uaddw_fns[4] = { 6109 NULL, gen_helper_sve2_uaddw_h, 6110 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d, 6111 }; 6112 TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0) 6113 TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1) 6114 6115 static gen_helper_gvec_3 * const usubw_fns[4] = { 6116 NULL, gen_helper_sve2_usubw_h, 6117 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d, 6118 }; 6119 TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0) 6120 TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1) 6121 6122 static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6123 { 6124 int top = imm & 1; 6125 int shl = imm >> 1; 6126 int halfbits = 4 << vece; 6127 6128 if (top) { 6129 if (shl == halfbits) { 6130 TCGv_vec t = tcg_temp_new_vec_matching(d); 6131 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); 6132 tcg_gen_and_vec(vece, d, n, t); 6133 } else { 6134 tcg_gen_sari_vec(vece, d, n, halfbits); 6135 tcg_gen_shli_vec(vece, d, d, shl); 6136 } 6137 } else { 6138 tcg_gen_shli_vec(vece, d, n, halfbits); 6139 tcg_gen_sari_vec(vece, d, d, halfbits - shl); 6140 } 6141 } 6142 6143 static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm) 6144 { 6145 int halfbits = 4 << vece; 6146 int top = imm & 1; 6147 int shl = (imm >> 1); 6148 int shift; 6149 uint64_t mask; 6150 6151 mask = MAKE_64BIT_MASK(0, halfbits); 6152 mask <<= shl; 6153 mask = dup_const(vece, mask); 6154 6155 shift = shl - top * halfbits; 6156 if (shift < 0) { 6157 tcg_gen_shri_i64(d, n, -shift); 6158 } else { 6159 tcg_gen_shli_i64(d, n, shift); 6160 } 6161 tcg_gen_andi_i64(d, d, mask); 6162 } 6163 6164 static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6165 { 6166 gen_ushll_i64(MO_16, d, n, imm); 6167 } 6168 6169 static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6170 { 6171 gen_ushll_i64(MO_32, d, n, imm); 6172 } 6173 6174 static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6175 { 6176 gen_ushll_i64(MO_64, d, n, imm); 6177 } 6178 6179 static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6180 { 6181 int halfbits = 4 << vece; 6182 int top = imm & 1; 6183 int shl = imm >> 1; 6184 6185 if (top) { 6186 if (shl == halfbits) { 6187 TCGv_vec t = tcg_temp_new_vec_matching(d); 6188 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); 6189 tcg_gen_and_vec(vece, d, n, t); 6190 } else { 6191 tcg_gen_shri_vec(vece, d, n, halfbits); 6192 tcg_gen_shli_vec(vece, d, d, shl); 6193 } 6194 } else { 6195 if (shl == 0) { 6196 TCGv_vec t = tcg_temp_new_vec_matching(d); 6197 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6198 tcg_gen_and_vec(vece, d, n, t); 6199 } else { 6200 tcg_gen_shli_vec(vece, d, n, halfbits); 6201 tcg_gen_shri_vec(vece, d, d, halfbits - shl); 6202 } 6203 } 6204 } 6205 6206 static bool do_shll_tb(DisasContext *s, arg_rri_esz *a, 6207 const GVecGen2i ops[3], bool sel) 6208 { 6209 6210 if (a->esz < 0 || a->esz > 2) { 6211 return false; 6212 } 6213 if (sve_access_check(s)) { 6214 unsigned vsz = vec_full_reg_size(s); 6215 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 6216 vec_full_reg_offset(s, a->rn), 6217 vsz, vsz, (a->imm << 1) | sel, 6218 &ops[a->esz]); 6219 } 6220 return true; 6221 } 6222 6223 static const TCGOpcode sshll_list[] = { 6224 INDEX_op_shli_vec, INDEX_op_sari_vec, 0 6225 }; 6226 static const GVecGen2i sshll_ops[3] = { 6227 { .fniv = gen_sshll_vec, 6228 .opt_opc = sshll_list, 6229 .fno = gen_helper_sve2_sshll_h, 6230 .vece = MO_16 }, 6231 { .fniv = gen_sshll_vec, 6232 .opt_opc = sshll_list, 6233 .fno = gen_helper_sve2_sshll_s, 6234 .vece = MO_32 }, 6235 { .fniv = gen_sshll_vec, 6236 .opt_opc = sshll_list, 6237 .fno = gen_helper_sve2_sshll_d, 6238 .vece = MO_64 } 6239 }; 6240 TRANS_FEAT(SSHLLB, aa64_sve2, do_shll_tb, a, sshll_ops, false) 6241 TRANS_FEAT(SSHLLT, aa64_sve2, do_shll_tb, a, sshll_ops, true) 6242 6243 static const TCGOpcode ushll_list[] = { 6244 INDEX_op_shli_vec, INDEX_op_shri_vec, 0 6245 }; 6246 static const GVecGen2i ushll_ops[3] = { 6247 { .fni8 = gen_ushll16_i64, 6248 .fniv = gen_ushll_vec, 6249 .opt_opc = ushll_list, 6250 .fno = gen_helper_sve2_ushll_h, 6251 .vece = MO_16 }, 6252 { .fni8 = gen_ushll32_i64, 6253 .fniv = gen_ushll_vec, 6254 .opt_opc = ushll_list, 6255 .fno = gen_helper_sve2_ushll_s, 6256 .vece = MO_32 }, 6257 { .fni8 = gen_ushll64_i64, 6258 .fniv = gen_ushll_vec, 6259 .opt_opc = ushll_list, 6260 .fno = gen_helper_sve2_ushll_d, 6261 .vece = MO_64 }, 6262 }; 6263 TRANS_FEAT(USHLLB, aa64_sve2, do_shll_tb, a, ushll_ops, false) 6264 TRANS_FEAT(USHLLT, aa64_sve2, do_shll_tb, a, ushll_ops, true) 6265 6266 static gen_helper_gvec_3 * const bext_fns[4] = { 6267 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h, 6268 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d, 6269 }; 6270 TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6271 bext_fns[a->esz], a, 0) 6272 6273 static gen_helper_gvec_3 * const bdep_fns[4] = { 6274 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h, 6275 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d, 6276 }; 6277 TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6278 bdep_fns[a->esz], a, 0) 6279 6280 static gen_helper_gvec_3 * const bgrp_fns[4] = { 6281 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h, 6282 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d, 6283 }; 6284 TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6285 bgrp_fns[a->esz], a, 0) 6286 6287 static gen_helper_gvec_3 * const cadd_fns[4] = { 6288 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h, 6289 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d, 6290 }; 6291 TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6292 cadd_fns[a->esz], a, 0) 6293 TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6294 cadd_fns[a->esz], a, 1) 6295 6296 static gen_helper_gvec_3 * const sqcadd_fns[4] = { 6297 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h, 6298 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d, 6299 }; 6300 TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6301 sqcadd_fns[a->esz], a, 0) 6302 TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6303 sqcadd_fns[a->esz], a, 1) 6304 6305 static gen_helper_gvec_4 * const sabal_fns[4] = { 6306 NULL, gen_helper_sve2_sabal_h, 6307 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d, 6308 }; 6309 TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0) 6310 TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1) 6311 6312 static gen_helper_gvec_4 * const uabal_fns[4] = { 6313 NULL, gen_helper_sve2_uabal_h, 6314 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d, 6315 }; 6316 TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0) 6317 TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1) 6318 6319 static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel) 6320 { 6321 static gen_helper_gvec_4 * const fns[2] = { 6322 gen_helper_sve2_adcl_s, 6323 gen_helper_sve2_adcl_d, 6324 }; 6325 /* 6326 * Note that in this case the ESZ field encodes both size and sign. 6327 * Split out 'subtract' into bit 1 of the data field for the helper. 6328 */ 6329 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel); 6330 } 6331 6332 TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false) 6333 TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true) 6334 6335 TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a) 6336 TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a) 6337 TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a) 6338 TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a) 6339 TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a) 6340 TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a) 6341 6342 TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a) 6343 TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a) 6344 6345 static bool do_narrow_extract(DisasContext *s, arg_rri_esz *a, 6346 const GVecGen2 ops[3]) 6347 { 6348 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0) { 6349 return false; 6350 } 6351 if (sve_access_check(s)) { 6352 unsigned vsz = vec_full_reg_size(s); 6353 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd), 6354 vec_full_reg_offset(s, a->rn), 6355 vsz, vsz, &ops[a->esz]); 6356 } 6357 return true; 6358 } 6359 6360 static const TCGOpcode sqxtn_list[] = { 6361 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 6362 }; 6363 6364 static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6365 { 6366 TCGv_vec t = tcg_temp_new_vec_matching(d); 6367 int halfbits = 4 << vece; 6368 int64_t mask = (1ull << halfbits) - 1; 6369 int64_t min = -1ull << (halfbits - 1); 6370 int64_t max = -min - 1; 6371 6372 tcg_gen_dupi_vec(vece, t, min); 6373 tcg_gen_smax_vec(vece, d, n, t); 6374 tcg_gen_dupi_vec(vece, t, max); 6375 tcg_gen_smin_vec(vece, d, d, t); 6376 tcg_gen_dupi_vec(vece, t, mask); 6377 tcg_gen_and_vec(vece, d, d, t); 6378 } 6379 6380 static const GVecGen2 sqxtnb_ops[3] = { 6381 { .fniv = gen_sqxtnb_vec, 6382 .opt_opc = sqxtn_list, 6383 .fno = gen_helper_sve2_sqxtnb_h, 6384 .vece = MO_16 }, 6385 { .fniv = gen_sqxtnb_vec, 6386 .opt_opc = sqxtn_list, 6387 .fno = gen_helper_sve2_sqxtnb_s, 6388 .vece = MO_32 }, 6389 { .fniv = gen_sqxtnb_vec, 6390 .opt_opc = sqxtn_list, 6391 .fno = gen_helper_sve2_sqxtnb_d, 6392 .vece = MO_64 }, 6393 }; 6394 TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops) 6395 6396 static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6397 { 6398 TCGv_vec t = tcg_temp_new_vec_matching(d); 6399 int halfbits = 4 << vece; 6400 int64_t mask = (1ull << halfbits) - 1; 6401 int64_t min = -1ull << (halfbits - 1); 6402 int64_t max = -min - 1; 6403 6404 tcg_gen_dupi_vec(vece, t, min); 6405 tcg_gen_smax_vec(vece, n, n, t); 6406 tcg_gen_dupi_vec(vece, t, max); 6407 tcg_gen_smin_vec(vece, n, n, t); 6408 tcg_gen_shli_vec(vece, n, n, halfbits); 6409 tcg_gen_dupi_vec(vece, t, mask); 6410 tcg_gen_bitsel_vec(vece, d, t, d, n); 6411 } 6412 6413 static const GVecGen2 sqxtnt_ops[3] = { 6414 { .fniv = gen_sqxtnt_vec, 6415 .opt_opc = sqxtn_list, 6416 .load_dest = true, 6417 .fno = gen_helper_sve2_sqxtnt_h, 6418 .vece = MO_16 }, 6419 { .fniv = gen_sqxtnt_vec, 6420 .opt_opc = sqxtn_list, 6421 .load_dest = true, 6422 .fno = gen_helper_sve2_sqxtnt_s, 6423 .vece = MO_32 }, 6424 { .fniv = gen_sqxtnt_vec, 6425 .opt_opc = sqxtn_list, 6426 .load_dest = true, 6427 .fno = gen_helper_sve2_sqxtnt_d, 6428 .vece = MO_64 }, 6429 }; 6430 TRANS_FEAT(SQXTNT, aa64_sve2, do_narrow_extract, a, sqxtnt_ops) 6431 6432 static const TCGOpcode uqxtn_list[] = { 6433 INDEX_op_shli_vec, INDEX_op_umin_vec, 0 6434 }; 6435 6436 static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6437 { 6438 TCGv_vec t = tcg_temp_new_vec_matching(d); 6439 int halfbits = 4 << vece; 6440 int64_t max = (1ull << halfbits) - 1; 6441 6442 tcg_gen_dupi_vec(vece, t, max); 6443 tcg_gen_umin_vec(vece, d, n, t); 6444 } 6445 6446 static const GVecGen2 uqxtnb_ops[3] = { 6447 { .fniv = gen_uqxtnb_vec, 6448 .opt_opc = uqxtn_list, 6449 .fno = gen_helper_sve2_uqxtnb_h, 6450 .vece = MO_16 }, 6451 { .fniv = gen_uqxtnb_vec, 6452 .opt_opc = uqxtn_list, 6453 .fno = gen_helper_sve2_uqxtnb_s, 6454 .vece = MO_32 }, 6455 { .fniv = gen_uqxtnb_vec, 6456 .opt_opc = uqxtn_list, 6457 .fno = gen_helper_sve2_uqxtnb_d, 6458 .vece = MO_64 }, 6459 }; 6460 TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops) 6461 6462 static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6463 { 6464 TCGv_vec t = tcg_temp_new_vec_matching(d); 6465 int halfbits = 4 << vece; 6466 int64_t max = (1ull << halfbits) - 1; 6467 6468 tcg_gen_dupi_vec(vece, t, max); 6469 tcg_gen_umin_vec(vece, n, n, t); 6470 tcg_gen_shli_vec(vece, n, n, halfbits); 6471 tcg_gen_bitsel_vec(vece, d, t, d, n); 6472 } 6473 6474 static const GVecGen2 uqxtnt_ops[3] = { 6475 { .fniv = gen_uqxtnt_vec, 6476 .opt_opc = uqxtn_list, 6477 .load_dest = true, 6478 .fno = gen_helper_sve2_uqxtnt_h, 6479 .vece = MO_16 }, 6480 { .fniv = gen_uqxtnt_vec, 6481 .opt_opc = uqxtn_list, 6482 .load_dest = true, 6483 .fno = gen_helper_sve2_uqxtnt_s, 6484 .vece = MO_32 }, 6485 { .fniv = gen_uqxtnt_vec, 6486 .opt_opc = uqxtn_list, 6487 .load_dest = true, 6488 .fno = gen_helper_sve2_uqxtnt_d, 6489 .vece = MO_64 }, 6490 }; 6491 TRANS_FEAT(UQXTNT, aa64_sve2, do_narrow_extract, a, uqxtnt_ops) 6492 6493 static const TCGOpcode sqxtun_list[] = { 6494 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0 6495 }; 6496 6497 static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6498 { 6499 TCGv_vec t = tcg_temp_new_vec_matching(d); 6500 int halfbits = 4 << vece; 6501 int64_t max = (1ull << halfbits) - 1; 6502 6503 tcg_gen_dupi_vec(vece, t, 0); 6504 tcg_gen_smax_vec(vece, d, n, t); 6505 tcg_gen_dupi_vec(vece, t, max); 6506 tcg_gen_umin_vec(vece, d, d, t); 6507 } 6508 6509 static const GVecGen2 sqxtunb_ops[3] = { 6510 { .fniv = gen_sqxtunb_vec, 6511 .opt_opc = sqxtun_list, 6512 .fno = gen_helper_sve2_sqxtunb_h, 6513 .vece = MO_16 }, 6514 { .fniv = gen_sqxtunb_vec, 6515 .opt_opc = sqxtun_list, 6516 .fno = gen_helper_sve2_sqxtunb_s, 6517 .vece = MO_32 }, 6518 { .fniv = gen_sqxtunb_vec, 6519 .opt_opc = sqxtun_list, 6520 .fno = gen_helper_sve2_sqxtunb_d, 6521 .vece = MO_64 }, 6522 }; 6523 TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops) 6524 6525 static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6526 { 6527 TCGv_vec t = tcg_temp_new_vec_matching(d); 6528 int halfbits = 4 << vece; 6529 int64_t max = (1ull << halfbits) - 1; 6530 6531 tcg_gen_dupi_vec(vece, t, 0); 6532 tcg_gen_smax_vec(vece, n, n, t); 6533 tcg_gen_dupi_vec(vece, t, max); 6534 tcg_gen_umin_vec(vece, n, n, t); 6535 tcg_gen_shli_vec(vece, n, n, halfbits); 6536 tcg_gen_bitsel_vec(vece, d, t, d, n); 6537 } 6538 6539 static const GVecGen2 sqxtunt_ops[3] = { 6540 { .fniv = gen_sqxtunt_vec, 6541 .opt_opc = sqxtun_list, 6542 .load_dest = true, 6543 .fno = gen_helper_sve2_sqxtunt_h, 6544 .vece = MO_16 }, 6545 { .fniv = gen_sqxtunt_vec, 6546 .opt_opc = sqxtun_list, 6547 .load_dest = true, 6548 .fno = gen_helper_sve2_sqxtunt_s, 6549 .vece = MO_32 }, 6550 { .fniv = gen_sqxtunt_vec, 6551 .opt_opc = sqxtun_list, 6552 .load_dest = true, 6553 .fno = gen_helper_sve2_sqxtunt_d, 6554 .vece = MO_64 }, 6555 }; 6556 TRANS_FEAT(SQXTUNT, aa64_sve2, do_narrow_extract, a, sqxtunt_ops) 6557 6558 static bool do_shr_narrow(DisasContext *s, arg_rri_esz *a, 6559 const GVecGen2i ops[3]) 6560 { 6561 if (a->esz < 0 || a->esz > MO_32) { 6562 return false; 6563 } 6564 assert(a->imm > 0 && a->imm <= (8 << a->esz)); 6565 if (sve_access_check(s)) { 6566 unsigned vsz = vec_full_reg_size(s); 6567 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 6568 vec_full_reg_offset(s, a->rn), 6569 vsz, vsz, a->imm, &ops[a->esz]); 6570 } 6571 return true; 6572 } 6573 6574 static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 6575 { 6576 int halfbits = 4 << vece; 6577 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 6578 6579 tcg_gen_shri_i64(d, n, shr); 6580 tcg_gen_andi_i64(d, d, mask); 6581 } 6582 6583 static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6584 { 6585 gen_shrnb_i64(MO_16, d, n, shr); 6586 } 6587 6588 static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6589 { 6590 gen_shrnb_i64(MO_32, d, n, shr); 6591 } 6592 6593 static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6594 { 6595 gen_shrnb_i64(MO_64, d, n, shr); 6596 } 6597 6598 static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 6599 { 6600 TCGv_vec t = tcg_temp_new_vec_matching(d); 6601 int halfbits = 4 << vece; 6602 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 6603 6604 tcg_gen_shri_vec(vece, n, n, shr); 6605 tcg_gen_dupi_vec(vece, t, mask); 6606 tcg_gen_and_vec(vece, d, n, t); 6607 } 6608 6609 static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 }; 6610 static const GVecGen2i shrnb_ops[3] = { 6611 { .fni8 = gen_shrnb16_i64, 6612 .fniv = gen_shrnb_vec, 6613 .opt_opc = shrnb_vec_list, 6614 .fno = gen_helper_sve2_shrnb_h, 6615 .vece = MO_16 }, 6616 { .fni8 = gen_shrnb32_i64, 6617 .fniv = gen_shrnb_vec, 6618 .opt_opc = shrnb_vec_list, 6619 .fno = gen_helper_sve2_shrnb_s, 6620 .vece = MO_32 }, 6621 { .fni8 = gen_shrnb64_i64, 6622 .fniv = gen_shrnb_vec, 6623 .opt_opc = shrnb_vec_list, 6624 .fno = gen_helper_sve2_shrnb_d, 6625 .vece = MO_64 }, 6626 }; 6627 TRANS_FEAT(SHRNB, aa64_sve2, do_shr_narrow, a, shrnb_ops) 6628 6629 static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 6630 { 6631 int halfbits = 4 << vece; 6632 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 6633 6634 tcg_gen_shli_i64(n, n, halfbits - shr); 6635 tcg_gen_andi_i64(n, n, ~mask); 6636 tcg_gen_andi_i64(d, d, mask); 6637 tcg_gen_or_i64(d, d, n); 6638 } 6639 6640 static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6641 { 6642 gen_shrnt_i64(MO_16, d, n, shr); 6643 } 6644 6645 static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6646 { 6647 gen_shrnt_i64(MO_32, d, n, shr); 6648 } 6649 6650 static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6651 { 6652 tcg_gen_shri_i64(n, n, shr); 6653 tcg_gen_deposit_i64(d, d, n, 32, 32); 6654 } 6655 6656 static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 6657 { 6658 TCGv_vec t = tcg_temp_new_vec_matching(d); 6659 int halfbits = 4 << vece; 6660 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 6661 6662 tcg_gen_shli_vec(vece, n, n, halfbits - shr); 6663 tcg_gen_dupi_vec(vece, t, mask); 6664 tcg_gen_bitsel_vec(vece, d, t, d, n); 6665 } 6666 6667 static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 }; 6668 static const GVecGen2i shrnt_ops[3] = { 6669 { .fni8 = gen_shrnt16_i64, 6670 .fniv = gen_shrnt_vec, 6671 .opt_opc = shrnt_vec_list, 6672 .load_dest = true, 6673 .fno = gen_helper_sve2_shrnt_h, 6674 .vece = MO_16 }, 6675 { .fni8 = gen_shrnt32_i64, 6676 .fniv = gen_shrnt_vec, 6677 .opt_opc = shrnt_vec_list, 6678 .load_dest = true, 6679 .fno = gen_helper_sve2_shrnt_s, 6680 .vece = MO_32 }, 6681 { .fni8 = gen_shrnt64_i64, 6682 .fniv = gen_shrnt_vec, 6683 .opt_opc = shrnt_vec_list, 6684 .load_dest = true, 6685 .fno = gen_helper_sve2_shrnt_d, 6686 .vece = MO_64 }, 6687 }; 6688 TRANS_FEAT(SHRNT, aa64_sve2, do_shr_narrow, a, shrnt_ops) 6689 6690 static const GVecGen2i rshrnb_ops[3] = { 6691 { .fno = gen_helper_sve2_rshrnb_h }, 6692 { .fno = gen_helper_sve2_rshrnb_s }, 6693 { .fno = gen_helper_sve2_rshrnb_d }, 6694 }; 6695 TRANS_FEAT(RSHRNB, aa64_sve2, do_shr_narrow, a, rshrnb_ops) 6696 6697 static const GVecGen2i rshrnt_ops[3] = { 6698 { .fno = gen_helper_sve2_rshrnt_h }, 6699 { .fno = gen_helper_sve2_rshrnt_s }, 6700 { .fno = gen_helper_sve2_rshrnt_d }, 6701 }; 6702 TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops) 6703 6704 static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d, 6705 TCGv_vec n, int64_t shr) 6706 { 6707 TCGv_vec t = tcg_temp_new_vec_matching(d); 6708 int halfbits = 4 << vece; 6709 6710 tcg_gen_sari_vec(vece, n, n, shr); 6711 tcg_gen_dupi_vec(vece, t, 0); 6712 tcg_gen_smax_vec(vece, n, n, t); 6713 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6714 tcg_gen_umin_vec(vece, d, n, t); 6715 } 6716 6717 static const TCGOpcode sqshrunb_vec_list[] = { 6718 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0 6719 }; 6720 static const GVecGen2i sqshrunb_ops[3] = { 6721 { .fniv = gen_sqshrunb_vec, 6722 .opt_opc = sqshrunb_vec_list, 6723 .fno = gen_helper_sve2_sqshrunb_h, 6724 .vece = MO_16 }, 6725 { .fniv = gen_sqshrunb_vec, 6726 .opt_opc = sqshrunb_vec_list, 6727 .fno = gen_helper_sve2_sqshrunb_s, 6728 .vece = MO_32 }, 6729 { .fniv = gen_sqshrunb_vec, 6730 .opt_opc = sqshrunb_vec_list, 6731 .fno = gen_helper_sve2_sqshrunb_d, 6732 .vece = MO_64 }, 6733 }; 6734 TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops) 6735 6736 static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d, 6737 TCGv_vec n, int64_t shr) 6738 { 6739 TCGv_vec t = tcg_temp_new_vec_matching(d); 6740 int halfbits = 4 << vece; 6741 6742 tcg_gen_sari_vec(vece, n, n, shr); 6743 tcg_gen_dupi_vec(vece, t, 0); 6744 tcg_gen_smax_vec(vece, n, n, t); 6745 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6746 tcg_gen_umin_vec(vece, n, n, t); 6747 tcg_gen_shli_vec(vece, n, n, halfbits); 6748 tcg_gen_bitsel_vec(vece, d, t, d, n); 6749 } 6750 6751 static const TCGOpcode sqshrunt_vec_list[] = { 6752 INDEX_op_shli_vec, INDEX_op_sari_vec, 6753 INDEX_op_smax_vec, INDEX_op_umin_vec, 0 6754 }; 6755 static const GVecGen2i sqshrunt_ops[3] = { 6756 { .fniv = gen_sqshrunt_vec, 6757 .opt_opc = sqshrunt_vec_list, 6758 .load_dest = true, 6759 .fno = gen_helper_sve2_sqshrunt_h, 6760 .vece = MO_16 }, 6761 { .fniv = gen_sqshrunt_vec, 6762 .opt_opc = sqshrunt_vec_list, 6763 .load_dest = true, 6764 .fno = gen_helper_sve2_sqshrunt_s, 6765 .vece = MO_32 }, 6766 { .fniv = gen_sqshrunt_vec, 6767 .opt_opc = sqshrunt_vec_list, 6768 .load_dest = true, 6769 .fno = gen_helper_sve2_sqshrunt_d, 6770 .vece = MO_64 }, 6771 }; 6772 TRANS_FEAT(SQSHRUNT, aa64_sve2, do_shr_narrow, a, sqshrunt_ops) 6773 6774 static const GVecGen2i sqrshrunb_ops[3] = { 6775 { .fno = gen_helper_sve2_sqrshrunb_h }, 6776 { .fno = gen_helper_sve2_sqrshrunb_s }, 6777 { .fno = gen_helper_sve2_sqrshrunb_d }, 6778 }; 6779 TRANS_FEAT(SQRSHRUNB, aa64_sve2, do_shr_narrow, a, sqrshrunb_ops) 6780 6781 static const GVecGen2i sqrshrunt_ops[3] = { 6782 { .fno = gen_helper_sve2_sqrshrunt_h }, 6783 { .fno = gen_helper_sve2_sqrshrunt_s }, 6784 { .fno = gen_helper_sve2_sqrshrunt_d }, 6785 }; 6786 TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops) 6787 6788 static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d, 6789 TCGv_vec n, int64_t shr) 6790 { 6791 TCGv_vec t = tcg_temp_new_vec_matching(d); 6792 int halfbits = 4 << vece; 6793 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 6794 int64_t min = -max - 1; 6795 6796 tcg_gen_sari_vec(vece, n, n, shr); 6797 tcg_gen_dupi_vec(vece, t, min); 6798 tcg_gen_smax_vec(vece, n, n, t); 6799 tcg_gen_dupi_vec(vece, t, max); 6800 tcg_gen_smin_vec(vece, n, n, t); 6801 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6802 tcg_gen_and_vec(vece, d, n, t); 6803 } 6804 6805 static const TCGOpcode sqshrnb_vec_list[] = { 6806 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0 6807 }; 6808 static const GVecGen2i sqshrnb_ops[3] = { 6809 { .fniv = gen_sqshrnb_vec, 6810 .opt_opc = sqshrnb_vec_list, 6811 .fno = gen_helper_sve2_sqshrnb_h, 6812 .vece = MO_16 }, 6813 { .fniv = gen_sqshrnb_vec, 6814 .opt_opc = sqshrnb_vec_list, 6815 .fno = gen_helper_sve2_sqshrnb_s, 6816 .vece = MO_32 }, 6817 { .fniv = gen_sqshrnb_vec, 6818 .opt_opc = sqshrnb_vec_list, 6819 .fno = gen_helper_sve2_sqshrnb_d, 6820 .vece = MO_64 }, 6821 }; 6822 TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops) 6823 6824 static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d, 6825 TCGv_vec n, int64_t shr) 6826 { 6827 TCGv_vec t = tcg_temp_new_vec_matching(d); 6828 int halfbits = 4 << vece; 6829 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 6830 int64_t min = -max - 1; 6831 6832 tcg_gen_sari_vec(vece, n, n, shr); 6833 tcg_gen_dupi_vec(vece, t, min); 6834 tcg_gen_smax_vec(vece, n, n, t); 6835 tcg_gen_dupi_vec(vece, t, max); 6836 tcg_gen_smin_vec(vece, n, n, t); 6837 tcg_gen_shli_vec(vece, n, n, halfbits); 6838 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6839 tcg_gen_bitsel_vec(vece, d, t, d, n); 6840 } 6841 6842 static const TCGOpcode sqshrnt_vec_list[] = { 6843 INDEX_op_shli_vec, INDEX_op_sari_vec, 6844 INDEX_op_smax_vec, INDEX_op_smin_vec, 0 6845 }; 6846 static const GVecGen2i sqshrnt_ops[3] = { 6847 { .fniv = gen_sqshrnt_vec, 6848 .opt_opc = sqshrnt_vec_list, 6849 .load_dest = true, 6850 .fno = gen_helper_sve2_sqshrnt_h, 6851 .vece = MO_16 }, 6852 { .fniv = gen_sqshrnt_vec, 6853 .opt_opc = sqshrnt_vec_list, 6854 .load_dest = true, 6855 .fno = gen_helper_sve2_sqshrnt_s, 6856 .vece = MO_32 }, 6857 { .fniv = gen_sqshrnt_vec, 6858 .opt_opc = sqshrnt_vec_list, 6859 .load_dest = true, 6860 .fno = gen_helper_sve2_sqshrnt_d, 6861 .vece = MO_64 }, 6862 }; 6863 TRANS_FEAT(SQSHRNT, aa64_sve2, do_shr_narrow, a, sqshrnt_ops) 6864 6865 static const GVecGen2i sqrshrnb_ops[3] = { 6866 { .fno = gen_helper_sve2_sqrshrnb_h }, 6867 { .fno = gen_helper_sve2_sqrshrnb_s }, 6868 { .fno = gen_helper_sve2_sqrshrnb_d }, 6869 }; 6870 TRANS_FEAT(SQRSHRNB, aa64_sve2, do_shr_narrow, a, sqrshrnb_ops) 6871 6872 static const GVecGen2i sqrshrnt_ops[3] = { 6873 { .fno = gen_helper_sve2_sqrshrnt_h }, 6874 { .fno = gen_helper_sve2_sqrshrnt_s }, 6875 { .fno = gen_helper_sve2_sqrshrnt_d }, 6876 }; 6877 TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops) 6878 6879 static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d, 6880 TCGv_vec n, int64_t shr) 6881 { 6882 TCGv_vec t = tcg_temp_new_vec_matching(d); 6883 int halfbits = 4 << vece; 6884 6885 tcg_gen_shri_vec(vece, n, n, shr); 6886 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6887 tcg_gen_umin_vec(vece, d, n, t); 6888 } 6889 6890 static const TCGOpcode uqshrnb_vec_list[] = { 6891 INDEX_op_shri_vec, INDEX_op_umin_vec, 0 6892 }; 6893 static const GVecGen2i uqshrnb_ops[3] = { 6894 { .fniv = gen_uqshrnb_vec, 6895 .opt_opc = uqshrnb_vec_list, 6896 .fno = gen_helper_sve2_uqshrnb_h, 6897 .vece = MO_16 }, 6898 { .fniv = gen_uqshrnb_vec, 6899 .opt_opc = uqshrnb_vec_list, 6900 .fno = gen_helper_sve2_uqshrnb_s, 6901 .vece = MO_32 }, 6902 { .fniv = gen_uqshrnb_vec, 6903 .opt_opc = uqshrnb_vec_list, 6904 .fno = gen_helper_sve2_uqshrnb_d, 6905 .vece = MO_64 }, 6906 }; 6907 TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops) 6908 6909 static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d, 6910 TCGv_vec n, int64_t shr) 6911 { 6912 TCGv_vec t = tcg_temp_new_vec_matching(d); 6913 int halfbits = 4 << vece; 6914 6915 tcg_gen_shri_vec(vece, n, n, shr); 6916 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6917 tcg_gen_umin_vec(vece, n, n, t); 6918 tcg_gen_shli_vec(vece, n, n, halfbits); 6919 tcg_gen_bitsel_vec(vece, d, t, d, n); 6920 } 6921 6922 static const TCGOpcode uqshrnt_vec_list[] = { 6923 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0 6924 }; 6925 static const GVecGen2i uqshrnt_ops[3] = { 6926 { .fniv = gen_uqshrnt_vec, 6927 .opt_opc = uqshrnt_vec_list, 6928 .load_dest = true, 6929 .fno = gen_helper_sve2_uqshrnt_h, 6930 .vece = MO_16 }, 6931 { .fniv = gen_uqshrnt_vec, 6932 .opt_opc = uqshrnt_vec_list, 6933 .load_dest = true, 6934 .fno = gen_helper_sve2_uqshrnt_s, 6935 .vece = MO_32 }, 6936 { .fniv = gen_uqshrnt_vec, 6937 .opt_opc = uqshrnt_vec_list, 6938 .load_dest = true, 6939 .fno = gen_helper_sve2_uqshrnt_d, 6940 .vece = MO_64 }, 6941 }; 6942 TRANS_FEAT(UQSHRNT, aa64_sve2, do_shr_narrow, a, uqshrnt_ops) 6943 6944 static const GVecGen2i uqrshrnb_ops[3] = { 6945 { .fno = gen_helper_sve2_uqrshrnb_h }, 6946 { .fno = gen_helper_sve2_uqrshrnb_s }, 6947 { .fno = gen_helper_sve2_uqrshrnb_d }, 6948 }; 6949 TRANS_FEAT(UQRSHRNB, aa64_sve2, do_shr_narrow, a, uqrshrnb_ops) 6950 6951 static const GVecGen2i uqrshrnt_ops[3] = { 6952 { .fno = gen_helper_sve2_uqrshrnt_h }, 6953 { .fno = gen_helper_sve2_uqrshrnt_s }, 6954 { .fno = gen_helper_sve2_uqrshrnt_d }, 6955 }; 6956 TRANS_FEAT(UQRSHRNT, aa64_sve2, do_shr_narrow, a, uqrshrnt_ops) 6957 6958 #define DO_SVE2_ZZZ_NARROW(NAME, name) \ 6959 static gen_helper_gvec_3 * const name##_fns[4] = { \ 6960 NULL, gen_helper_sve2_##name##_h, \ 6961 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \ 6962 }; \ 6963 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \ 6964 name##_fns[a->esz], a, 0) 6965 6966 DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb) 6967 DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt) 6968 DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb) 6969 DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt) 6970 6971 DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb) 6972 DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt) 6973 DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb) 6974 DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt) 6975 6976 static gen_helper_gvec_flags_4 * const match_fns[4] = { 6977 gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL 6978 }; 6979 TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz]) 6980 6981 static gen_helper_gvec_flags_4 * const nmatch_fns[4] = { 6982 gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL 6983 }; 6984 TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz]) 6985 6986 static gen_helper_gvec_4 * const histcnt_fns[4] = { 6987 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d 6988 }; 6989 TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz, 6990 histcnt_fns[a->esz], a, 0) 6991 6992 TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz, 6993 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0) 6994 6995 DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz) 6996 DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz) 6997 DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz) 6998 DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz) 6999 DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz) 7000 7001 /* 7002 * SVE Integer Multiply-Add (unpredicated) 7003 */ 7004 7005 TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, 7006 gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra, 7007 0, FPST_FPCR) 7008 TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, 7009 gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra, 7010 0, FPST_FPCR) 7011 7012 static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = { 7013 NULL, gen_helper_sve2_sqdmlal_zzzw_h, 7014 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d, 7015 }; 7016 TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7017 sqdmlal_zzzw_fns[a->esz], a, 0) 7018 TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7019 sqdmlal_zzzw_fns[a->esz], a, 3) 7020 TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 7021 sqdmlal_zzzw_fns[a->esz], a, 2) 7022 7023 static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = { 7024 NULL, gen_helper_sve2_sqdmlsl_zzzw_h, 7025 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d, 7026 }; 7027 TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7028 sqdmlsl_zzzw_fns[a->esz], a, 0) 7029 TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7030 sqdmlsl_zzzw_fns[a->esz], a, 3) 7031 TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 7032 sqdmlsl_zzzw_fns[a->esz], a, 2) 7033 7034 static gen_helper_gvec_4 * const sqrdmlah_fns[] = { 7035 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h, 7036 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d, 7037 }; 7038 TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 7039 sqrdmlah_fns[a->esz], a, 0) 7040 7041 static gen_helper_gvec_4 * const sqrdmlsh_fns[] = { 7042 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h, 7043 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d, 7044 }; 7045 TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 7046 sqrdmlsh_fns[a->esz], a, 0) 7047 7048 static gen_helper_gvec_4 * const smlal_zzzw_fns[] = { 7049 NULL, gen_helper_sve2_smlal_zzzw_h, 7050 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d, 7051 }; 7052 TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7053 smlal_zzzw_fns[a->esz], a, 0) 7054 TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7055 smlal_zzzw_fns[a->esz], a, 1) 7056 7057 static gen_helper_gvec_4 * const umlal_zzzw_fns[] = { 7058 NULL, gen_helper_sve2_umlal_zzzw_h, 7059 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d, 7060 }; 7061 TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7062 umlal_zzzw_fns[a->esz], a, 0) 7063 TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7064 umlal_zzzw_fns[a->esz], a, 1) 7065 7066 static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = { 7067 NULL, gen_helper_sve2_smlsl_zzzw_h, 7068 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d, 7069 }; 7070 TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7071 smlsl_zzzw_fns[a->esz], a, 0) 7072 TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7073 smlsl_zzzw_fns[a->esz], a, 1) 7074 7075 static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = { 7076 NULL, gen_helper_sve2_umlsl_zzzw_h, 7077 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d, 7078 }; 7079 TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7080 umlsl_zzzw_fns[a->esz], a, 0) 7081 TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7082 umlsl_zzzw_fns[a->esz], a, 1) 7083 7084 static gen_helper_gvec_4 * const cmla_fns[] = { 7085 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h, 7086 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d, 7087 }; 7088 TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7089 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7090 7091 static gen_helper_gvec_4 * const cdot_fns[] = { 7092 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d 7093 }; 7094 TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7095 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7096 7097 static gen_helper_gvec_4 * const sqrdcmlah_fns[] = { 7098 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h, 7099 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d, 7100 }; 7101 TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7102 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7103 7104 TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7105 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0) 7106 7107 TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz, 7108 gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt) 7109 7110 TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7111 gen_helper_crypto_aese, a, false) 7112 TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7113 gen_helper_crypto_aese, a, true) 7114 7115 TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7116 gen_helper_crypto_sm4e, a, 0) 7117 TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7118 gen_helper_crypto_sm4ekey, a, 0) 7119 7120 TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, 7121 gen_gvec_rax1, a) 7122 7123 TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz, 7124 gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR) 7125 TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, 7126 gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR) 7127 7128 TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 7129 gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR) 7130 7131 TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, 7132 gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR) 7133 TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz, 7134 gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR) 7135 7136 TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a, 7137 FPROUNDING_ODD, gen_helper_sve_fcvt_ds) 7138 TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a, 7139 FPROUNDING_ODD, gen_helper_sve2_fcvtnt_ds) 7140 7141 static gen_helper_gvec_3_ptr * const flogb_fns[] = { 7142 NULL, gen_helper_flogb_h, 7143 gen_helper_flogb_s, gen_helper_flogb_d 7144 }; 7145 TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz], 7146 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 7147 7148 static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel) 7149 { 7150 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s, 7151 a->rd, a->rn, a->rm, a->ra, 7152 (sel << 1) | sub, cpu_env); 7153 } 7154 7155 TRANS_FEAT(FMLALB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, false) 7156 TRANS_FEAT(FMLALT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, true) 7157 TRANS_FEAT(FMLSLB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, false) 7158 TRANS_FEAT(FMLSLT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, true) 7159 7160 static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel) 7161 { 7162 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s, 7163 a->rd, a->rn, a->rm, a->ra, 7164 (a->index << 2) | (sel << 1) | sub, cpu_env); 7165 } 7166 7167 TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false) 7168 TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true) 7169 TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false) 7170 TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true) 7171 7172 TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7173 gen_helper_gvec_smmla_b, a, 0) 7174 TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7175 gen_helper_gvec_usmmla_b, a, 0) 7176 TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7177 gen_helper_gvec_ummla_b, a, 0) 7178 7179 TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, 7180 gen_helper_gvec_bfdot, a, 0) 7181 TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz, 7182 gen_helper_gvec_bfdot_idx, a) 7183 7184 TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, 7185 gen_helper_gvec_bfmmla, a, 0) 7186 7187 static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) 7188 { 7189 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, 7190 a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR); 7191 } 7192 7193 TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) 7194 TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true) 7195 7196 static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) 7197 { 7198 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, 7199 a->rd, a->rn, a->rm, a->ra, 7200 (a->index << 1) | sel, FPST_FPCR); 7201 } 7202 7203 TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) 7204 TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true) 7205 7206 static bool trans_PSEL(DisasContext *s, arg_psel *a) 7207 { 7208 int vl = vec_full_reg_size(s); 7209 int pl = pred_gvec_reg_size(s); 7210 int elements = vl >> a->esz; 7211 TCGv_i64 tmp, didx, dbit; 7212 TCGv_ptr ptr; 7213 7214 if (!dc_isar_feature(aa64_sme, s)) { 7215 return false; 7216 } 7217 if (!sve_access_check(s)) { 7218 return true; 7219 } 7220 7221 tmp = tcg_temp_new_i64(); 7222 dbit = tcg_temp_new_i64(); 7223 didx = tcg_temp_new_i64(); 7224 ptr = tcg_temp_new_ptr(); 7225 7226 /* Compute the predicate element. */ 7227 tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm); 7228 if (is_power_of_2(elements)) { 7229 tcg_gen_andi_i64(tmp, tmp, elements - 1); 7230 } else { 7231 tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements)); 7232 } 7233 7234 /* Extract the predicate byte and bit indices. */ 7235 tcg_gen_shli_i64(tmp, tmp, a->esz); 7236 tcg_gen_andi_i64(dbit, tmp, 7); 7237 tcg_gen_shri_i64(didx, tmp, 3); 7238 if (HOST_BIG_ENDIAN) { 7239 tcg_gen_xori_i64(didx, didx, 7); 7240 } 7241 7242 /* Load the predicate word. */ 7243 tcg_gen_trunc_i64_ptr(ptr, didx); 7244 tcg_gen_add_ptr(ptr, ptr, cpu_env); 7245 tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm)); 7246 7247 /* Extract the predicate bit and replicate to MO_64. */ 7248 tcg_gen_shr_i64(tmp, tmp, dbit); 7249 tcg_gen_andi_i64(tmp, tmp, 1); 7250 tcg_gen_neg_i64(tmp, tmp); 7251 7252 /* Apply to either copy the source, or write zeros. */ 7253 tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd), 7254 pred_full_reg_offset(s, a->pn), tmp, pl, pl); 7255 return true; 7256 } 7257 7258 static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7259 { 7260 tcg_gen_smax_i32(d, a, n); 7261 tcg_gen_smin_i32(d, d, m); 7262 } 7263 7264 static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7265 { 7266 tcg_gen_smax_i64(d, a, n); 7267 tcg_gen_smin_i64(d, d, m); 7268 } 7269 7270 static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7271 TCGv_vec m, TCGv_vec a) 7272 { 7273 tcg_gen_smax_vec(vece, d, a, n); 7274 tcg_gen_smin_vec(vece, d, d, m); 7275 } 7276 7277 static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 7278 uint32_t a, uint32_t oprsz, uint32_t maxsz) 7279 { 7280 static const TCGOpcode vecop[] = { 7281 INDEX_op_smin_vec, INDEX_op_smax_vec, 0 7282 }; 7283 static const GVecGen4 ops[4] = { 7284 { .fniv = gen_sclamp_vec, 7285 .fno = gen_helper_gvec_sclamp_b, 7286 .opt_opc = vecop, 7287 .vece = MO_8 }, 7288 { .fniv = gen_sclamp_vec, 7289 .fno = gen_helper_gvec_sclamp_h, 7290 .opt_opc = vecop, 7291 .vece = MO_16 }, 7292 { .fni4 = gen_sclamp_i32, 7293 .fniv = gen_sclamp_vec, 7294 .fno = gen_helper_gvec_sclamp_s, 7295 .opt_opc = vecop, 7296 .vece = MO_32 }, 7297 { .fni8 = gen_sclamp_i64, 7298 .fniv = gen_sclamp_vec, 7299 .fno = gen_helper_gvec_sclamp_d, 7300 .opt_opc = vecop, 7301 .vece = MO_64, 7302 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 7303 }; 7304 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 7305 } 7306 7307 TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a) 7308 7309 static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7310 { 7311 tcg_gen_umax_i32(d, a, n); 7312 tcg_gen_umin_i32(d, d, m); 7313 } 7314 7315 static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7316 { 7317 tcg_gen_umax_i64(d, a, n); 7318 tcg_gen_umin_i64(d, d, m); 7319 } 7320 7321 static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7322 TCGv_vec m, TCGv_vec a) 7323 { 7324 tcg_gen_umax_vec(vece, d, a, n); 7325 tcg_gen_umin_vec(vece, d, d, m); 7326 } 7327 7328 static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 7329 uint32_t a, uint32_t oprsz, uint32_t maxsz) 7330 { 7331 static const TCGOpcode vecop[] = { 7332 INDEX_op_umin_vec, INDEX_op_umax_vec, 0 7333 }; 7334 static const GVecGen4 ops[4] = { 7335 { .fniv = gen_uclamp_vec, 7336 .fno = gen_helper_gvec_uclamp_b, 7337 .opt_opc = vecop, 7338 .vece = MO_8 }, 7339 { .fniv = gen_uclamp_vec, 7340 .fno = gen_helper_gvec_uclamp_h, 7341 .opt_opc = vecop, 7342 .vece = MO_16 }, 7343 { .fni4 = gen_uclamp_i32, 7344 .fniv = gen_uclamp_vec, 7345 .fno = gen_helper_gvec_uclamp_s, 7346 .opt_opc = vecop, 7347 .vece = MO_32 }, 7348 { .fni8 = gen_uclamp_i64, 7349 .fniv = gen_uclamp_vec, 7350 .fno = gen_helper_gvec_uclamp_d, 7351 .opt_opc = vecop, 7352 .vece = MO_64, 7353 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 7354 }; 7355 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 7356 } 7357 7358 TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a) 7359