1 /* 2 * AArch64 SVE translation 3 * 4 * Copyright (c) 2018 Linaro, Ltd 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "translate.h" 22 #include "translate-a64.h" 23 #include "fpu/softfloat.h" 24 25 26 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, 27 TCGv_i64, uint32_t, uint32_t); 28 29 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr, 30 TCGv_ptr, TCGv_i32); 31 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr, 32 TCGv_ptr, TCGv_ptr, TCGv_i32); 33 34 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32); 35 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr, 36 TCGv_ptr, TCGv_i64, TCGv_i32); 37 38 /* 39 * Helpers for extracting complex instruction fields. 40 */ 41 42 /* See e.g. ASR (immediate, predicated). 43 * Returns -1 for unallocated encoding; diagnose later. 44 */ 45 static int tszimm_esz(DisasContext *s, int x) 46 { 47 x >>= 3; /* discard imm3 */ 48 return 31 - clz32(x); 49 } 50 51 static int tszimm_shr(DisasContext *s, int x) 52 { 53 return (16 << tszimm_esz(s, x)) - x; 54 } 55 56 /* See e.g. LSL (immediate, predicated). */ 57 static int tszimm_shl(DisasContext *s, int x) 58 { 59 return x - (8 << tszimm_esz(s, x)); 60 } 61 62 /* The SH bit is in bit 8. Extract the low 8 and shift. */ 63 static inline int expand_imm_sh8s(DisasContext *s, int x) 64 { 65 return (int8_t)x << (x & 0x100 ? 8 : 0); 66 } 67 68 static inline int expand_imm_sh8u(DisasContext *s, int x) 69 { 70 return (uint8_t)x << (x & 0x100 ? 8 : 0); 71 } 72 73 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype) 74 * with unsigned data. C.f. SVE Memory Contiguous Load Group. 75 */ 76 static inline int msz_dtype(DisasContext *s, int msz) 77 { 78 static const uint8_t dtype[4] = { 0, 5, 10, 15 }; 79 return dtype[msz]; 80 } 81 82 /* 83 * Include the generated decoder. 84 */ 85 86 #include "decode-sve.c.inc" 87 88 /* 89 * Implement all of the translator functions referenced by the decoder. 90 */ 91 92 /* Invoke an out-of-line helper on 2 Zregs. */ 93 static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn, 94 int rd, int rn, int data) 95 { 96 if (fn == NULL) { 97 return false; 98 } 99 if (sve_access_check(s)) { 100 unsigned vsz = vec_full_reg_size(s); 101 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 102 vec_full_reg_offset(s, rn), 103 vsz, vsz, data, fn); 104 } 105 return true; 106 } 107 108 static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 109 int rd, int rn, int data, 110 ARMFPStatusFlavour flavour) 111 { 112 if (fn == NULL) { 113 return false; 114 } 115 if (sve_access_check(s)) { 116 unsigned vsz = vec_full_reg_size(s); 117 TCGv_ptr status = fpstatus_ptr(flavour); 118 119 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 120 vec_full_reg_offset(s, rn), 121 status, vsz, vsz, data, fn); 122 } 123 return true; 124 } 125 126 static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 127 arg_rr_esz *a, int data) 128 { 129 return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data, 130 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 131 } 132 133 /* Invoke an out-of-line helper on 3 Zregs. */ 134 static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 135 int rd, int rn, int rm, int data) 136 { 137 if (fn == NULL) { 138 return false; 139 } 140 if (sve_access_check(s)) { 141 unsigned vsz = vec_full_reg_size(s); 142 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 143 vec_full_reg_offset(s, rn), 144 vec_full_reg_offset(s, rm), 145 vsz, vsz, data, fn); 146 } 147 return true; 148 } 149 150 static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 151 arg_rrr_esz *a, int data) 152 { 153 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data); 154 } 155 156 /* Invoke an out-of-line helper on 3 Zregs, plus float_status. */ 157 static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 158 int rd, int rn, int rm, 159 int data, ARMFPStatusFlavour flavour) 160 { 161 if (fn == NULL) { 162 return false; 163 } 164 if (sve_access_check(s)) { 165 unsigned vsz = vec_full_reg_size(s); 166 TCGv_ptr status = fpstatus_ptr(flavour); 167 168 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 169 vec_full_reg_offset(s, rn), 170 vec_full_reg_offset(s, rm), 171 status, vsz, vsz, data, fn); 172 } 173 return true; 174 } 175 176 static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 177 arg_rrr_esz *a, int data) 178 { 179 return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, 180 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 181 } 182 183 /* Invoke an out-of-line helper on 4 Zregs. */ 184 static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 185 int rd, int rn, int rm, int ra, int data) 186 { 187 if (fn == NULL) { 188 return false; 189 } 190 if (sve_access_check(s)) { 191 unsigned vsz = vec_full_reg_size(s); 192 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 193 vec_full_reg_offset(s, rn), 194 vec_full_reg_offset(s, rm), 195 vec_full_reg_offset(s, ra), 196 vsz, vsz, data, fn); 197 } 198 return true; 199 } 200 201 static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 202 arg_rrrr_esz *a, int data) 203 { 204 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data); 205 } 206 207 static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn, 208 arg_rrxr_esz *a) 209 { 210 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index); 211 } 212 213 /* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */ 214 static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 215 int rd, int rn, int rm, int ra, 216 int data, TCGv_ptr ptr) 217 { 218 if (fn == NULL) { 219 return false; 220 } 221 if (sve_access_check(s)) { 222 unsigned vsz = vec_full_reg_size(s); 223 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 224 vec_full_reg_offset(s, rn), 225 vec_full_reg_offset(s, rm), 226 vec_full_reg_offset(s, ra), 227 ptr, vsz, vsz, data, fn); 228 } 229 return true; 230 } 231 232 static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 233 int rd, int rn, int rm, int ra, 234 int data, ARMFPStatusFlavour flavour) 235 { 236 TCGv_ptr status = fpstatus_ptr(flavour); 237 bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status); 238 return ret; 239 } 240 241 /* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */ 242 static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn, 243 int rd, int rn, int rm, int ra, int pg, 244 int data, ARMFPStatusFlavour flavour) 245 { 246 if (fn == NULL) { 247 return false; 248 } 249 if (sve_access_check(s)) { 250 unsigned vsz = vec_full_reg_size(s); 251 TCGv_ptr status = fpstatus_ptr(flavour); 252 253 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd), 254 vec_full_reg_offset(s, rn), 255 vec_full_reg_offset(s, rm), 256 vec_full_reg_offset(s, ra), 257 pred_full_reg_offset(s, pg), 258 status, vsz, vsz, data, fn); 259 } 260 return true; 261 } 262 263 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */ 264 static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn, 265 int rd, int rn, int pg, int data) 266 { 267 if (fn == NULL) { 268 return false; 269 } 270 if (sve_access_check(s)) { 271 unsigned vsz = vec_full_reg_size(s); 272 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 273 vec_full_reg_offset(s, rn), 274 pred_full_reg_offset(s, pg), 275 vsz, vsz, data, fn); 276 } 277 return true; 278 } 279 280 static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn, 281 arg_rpr_esz *a, int data) 282 { 283 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data); 284 } 285 286 static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn, 287 arg_rpri_esz *a) 288 { 289 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm); 290 } 291 292 static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn, 293 int rd, int rn, int pg, int data, 294 ARMFPStatusFlavour flavour) 295 { 296 if (fn == NULL) { 297 return false; 298 } 299 if (sve_access_check(s)) { 300 unsigned vsz = vec_full_reg_size(s); 301 TCGv_ptr status = fpstatus_ptr(flavour); 302 303 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 304 vec_full_reg_offset(s, rn), 305 pred_full_reg_offset(s, pg), 306 status, vsz, vsz, data, fn); 307 } 308 return true; 309 } 310 311 static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 312 arg_rpr_esz *a, int data, 313 ARMFPStatusFlavour flavour) 314 { 315 return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour); 316 } 317 318 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 319 static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn, 320 int rd, int rn, int rm, int pg, int data) 321 { 322 if (fn == NULL) { 323 return false; 324 } 325 if (sve_access_check(s)) { 326 unsigned vsz = vec_full_reg_size(s); 327 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 328 vec_full_reg_offset(s, rn), 329 vec_full_reg_offset(s, rm), 330 pred_full_reg_offset(s, pg), 331 vsz, vsz, data, fn); 332 } 333 return true; 334 } 335 336 static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn, 337 arg_rprr_esz *a, int data) 338 { 339 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data); 340 } 341 342 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 343 static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn, 344 int rd, int rn, int rm, int pg, int data, 345 ARMFPStatusFlavour flavour) 346 { 347 if (fn == NULL) { 348 return false; 349 } 350 if (sve_access_check(s)) { 351 unsigned vsz = vec_full_reg_size(s); 352 TCGv_ptr status = fpstatus_ptr(flavour); 353 354 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 355 vec_full_reg_offset(s, rn), 356 vec_full_reg_offset(s, rm), 357 pred_full_reg_offset(s, pg), 358 status, vsz, vsz, data, fn); 359 } 360 return true; 361 } 362 363 static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 364 arg_rprr_esz *a) 365 { 366 return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0, 367 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 368 } 369 370 /* Invoke a vector expander on two Zregs and an immediate. */ 371 static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 372 int esz, int rd, int rn, uint64_t imm) 373 { 374 if (gvec_fn == NULL) { 375 return false; 376 } 377 if (sve_access_check(s)) { 378 unsigned vsz = vec_full_reg_size(s); 379 gvec_fn(esz, vec_full_reg_offset(s, rd), 380 vec_full_reg_offset(s, rn), imm, vsz, vsz); 381 } 382 return true; 383 } 384 385 static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 386 arg_rri_esz *a) 387 { 388 if (a->esz < 0) { 389 /* Invalid tsz encoding -- see tszimm_esz. */ 390 return false; 391 } 392 return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm); 393 } 394 395 /* Invoke a vector expander on three Zregs. */ 396 static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn, 397 int esz, int rd, int rn, int rm) 398 { 399 if (gvec_fn == NULL) { 400 return false; 401 } 402 if (sve_access_check(s)) { 403 unsigned vsz = vec_full_reg_size(s); 404 gvec_fn(esz, vec_full_reg_offset(s, rd), 405 vec_full_reg_offset(s, rn), 406 vec_full_reg_offset(s, rm), vsz, vsz); 407 } 408 return true; 409 } 410 411 static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn, 412 arg_rrr_esz *a) 413 { 414 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm); 415 } 416 417 /* Invoke a vector expander on four Zregs. */ 418 static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn, 419 arg_rrrr_esz *a) 420 { 421 if (gvec_fn == NULL) { 422 return false; 423 } 424 if (sve_access_check(s)) { 425 unsigned vsz = vec_full_reg_size(s); 426 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 427 vec_full_reg_offset(s, a->rn), 428 vec_full_reg_offset(s, a->rm), 429 vec_full_reg_offset(s, a->ra), vsz, vsz); 430 } 431 return true; 432 } 433 434 /* Invoke a vector move on two Zregs. */ 435 static bool do_mov_z(DisasContext *s, int rd, int rn) 436 { 437 if (sve_access_check(s)) { 438 unsigned vsz = vec_full_reg_size(s); 439 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd), 440 vec_full_reg_offset(s, rn), vsz, vsz); 441 } 442 return true; 443 } 444 445 /* Initialize a Zreg with replications of a 64-bit immediate. */ 446 static void do_dupi_z(DisasContext *s, int rd, uint64_t word) 447 { 448 unsigned vsz = vec_full_reg_size(s); 449 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word); 450 } 451 452 /* Invoke a vector expander on three Pregs. */ 453 static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn, 454 int rd, int rn, int rm) 455 { 456 if (sve_access_check(s)) { 457 unsigned psz = pred_gvec_reg_size(s); 458 gvec_fn(MO_64, pred_full_reg_offset(s, rd), 459 pred_full_reg_offset(s, rn), 460 pred_full_reg_offset(s, rm), psz, psz); 461 } 462 return true; 463 } 464 465 /* Invoke a vector move on two Pregs. */ 466 static bool do_mov_p(DisasContext *s, int rd, int rn) 467 { 468 if (sve_access_check(s)) { 469 unsigned psz = pred_gvec_reg_size(s); 470 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd), 471 pred_full_reg_offset(s, rn), psz, psz); 472 } 473 return true; 474 } 475 476 /* Set the cpu flags as per a return from an SVE helper. */ 477 static void do_pred_flags(TCGv_i32 t) 478 { 479 tcg_gen_mov_i32(cpu_NF, t); 480 tcg_gen_andi_i32(cpu_ZF, t, 2); 481 tcg_gen_andi_i32(cpu_CF, t, 1); 482 tcg_gen_movi_i32(cpu_VF, 0); 483 } 484 485 /* Subroutines computing the ARM PredTest psuedofunction. */ 486 static void do_predtest1(TCGv_i64 d, TCGv_i64 g) 487 { 488 TCGv_i32 t = tcg_temp_new_i32(); 489 490 gen_helper_sve_predtest1(t, d, g); 491 do_pred_flags(t); 492 } 493 494 static void do_predtest(DisasContext *s, int dofs, int gofs, int words) 495 { 496 TCGv_ptr dptr = tcg_temp_new_ptr(); 497 TCGv_ptr gptr = tcg_temp_new_ptr(); 498 TCGv_i32 t = tcg_temp_new_i32(); 499 500 tcg_gen_addi_ptr(dptr, tcg_env, dofs); 501 tcg_gen_addi_ptr(gptr, tcg_env, gofs); 502 503 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words)); 504 505 do_pred_flags(t); 506 } 507 508 /* For each element size, the bits within a predicate word that are active. */ 509 const uint64_t pred_esz_masks[5] = { 510 0xffffffffffffffffull, 0x5555555555555555ull, 511 0x1111111111111111ull, 0x0101010101010101ull, 512 0x0001000100010001ull, 513 }; 514 515 static bool trans_INVALID(DisasContext *s, arg_INVALID *a) 516 { 517 unallocated_encoding(s); 518 return true; 519 } 520 521 /* 522 *** SVE Logical - Unpredicated Group 523 */ 524 525 TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a) 526 TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a) 527 TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a) 528 TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a) 529 530 static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 531 { 532 TCGv_i64 t = tcg_temp_new_i64(); 533 uint64_t mask = dup_const(MO_8, 0xff >> sh); 534 535 tcg_gen_xor_i64(t, n, m); 536 tcg_gen_shri_i64(d, t, sh); 537 tcg_gen_shli_i64(t, t, 8 - sh); 538 tcg_gen_andi_i64(d, d, mask); 539 tcg_gen_andi_i64(t, t, ~mask); 540 tcg_gen_or_i64(d, d, t); 541 } 542 543 static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 544 { 545 TCGv_i64 t = tcg_temp_new_i64(); 546 uint64_t mask = dup_const(MO_16, 0xffff >> sh); 547 548 tcg_gen_xor_i64(t, n, m); 549 tcg_gen_shri_i64(d, t, sh); 550 tcg_gen_shli_i64(t, t, 16 - sh); 551 tcg_gen_andi_i64(d, d, mask); 552 tcg_gen_andi_i64(t, t, ~mask); 553 tcg_gen_or_i64(d, d, t); 554 } 555 556 static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh) 557 { 558 tcg_gen_xor_i32(d, n, m); 559 tcg_gen_rotri_i32(d, d, sh); 560 } 561 562 static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 563 { 564 tcg_gen_xor_i64(d, n, m); 565 tcg_gen_rotri_i64(d, d, sh); 566 } 567 568 static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 569 TCGv_vec m, int64_t sh) 570 { 571 tcg_gen_xor_vec(vece, d, n, m); 572 tcg_gen_rotri_vec(vece, d, d, sh); 573 } 574 575 void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 576 uint32_t rm_ofs, int64_t shift, 577 uint32_t opr_sz, uint32_t max_sz) 578 { 579 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 }; 580 static const GVecGen3i ops[4] = { 581 { .fni8 = gen_xar8_i64, 582 .fniv = gen_xar_vec, 583 .fno = gen_helper_sve2_xar_b, 584 .opt_opc = vecop, 585 .vece = MO_8 }, 586 { .fni8 = gen_xar16_i64, 587 .fniv = gen_xar_vec, 588 .fno = gen_helper_sve2_xar_h, 589 .opt_opc = vecop, 590 .vece = MO_16 }, 591 { .fni4 = gen_xar_i32, 592 .fniv = gen_xar_vec, 593 .fno = gen_helper_sve2_xar_s, 594 .opt_opc = vecop, 595 .vece = MO_32 }, 596 { .fni8 = gen_xar_i64, 597 .fniv = gen_xar_vec, 598 .fno = gen_helper_gvec_xar_d, 599 .opt_opc = vecop, 600 .vece = MO_64 } 601 }; 602 int esize = 8 << vece; 603 604 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */ 605 tcg_debug_assert(shift >= 0); 606 tcg_debug_assert(shift <= esize); 607 shift &= esize - 1; 608 609 if (shift == 0) { 610 /* xar with no rotate devolves to xor. */ 611 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz); 612 } else { 613 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 614 shift, &ops[vece]); 615 } 616 } 617 618 static bool trans_XAR(DisasContext *s, arg_rrri_esz *a) 619 { 620 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { 621 return false; 622 } 623 if (sve_access_check(s)) { 624 unsigned vsz = vec_full_reg_size(s); 625 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd), 626 vec_full_reg_offset(s, a->rn), 627 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz); 628 } 629 return true; 630 } 631 632 static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 633 { 634 tcg_gen_xor_i64(d, n, m); 635 tcg_gen_xor_i64(d, d, k); 636 } 637 638 static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 639 TCGv_vec m, TCGv_vec k) 640 { 641 tcg_gen_xor_vec(vece, d, n, m); 642 tcg_gen_xor_vec(vece, d, d, k); 643 } 644 645 static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 646 uint32_t a, uint32_t oprsz, uint32_t maxsz) 647 { 648 static const GVecGen4 op = { 649 .fni8 = gen_eor3_i64, 650 .fniv = gen_eor3_vec, 651 .fno = gen_helper_sve2_eor3, 652 .vece = MO_64, 653 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 654 }; 655 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 656 } 657 658 TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a) 659 660 static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 661 { 662 tcg_gen_andc_i64(d, m, k); 663 tcg_gen_xor_i64(d, d, n); 664 } 665 666 static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 667 TCGv_vec m, TCGv_vec k) 668 { 669 tcg_gen_andc_vec(vece, d, m, k); 670 tcg_gen_xor_vec(vece, d, d, n); 671 } 672 673 static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 674 uint32_t a, uint32_t oprsz, uint32_t maxsz) 675 { 676 static const GVecGen4 op = { 677 .fni8 = gen_bcax_i64, 678 .fniv = gen_bcax_vec, 679 .fno = gen_helper_sve2_bcax, 680 .vece = MO_64, 681 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 682 }; 683 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 684 } 685 686 TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a) 687 688 static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 689 uint32_t a, uint32_t oprsz, uint32_t maxsz) 690 { 691 /* BSL differs from the generic bitsel in argument ordering. */ 692 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz); 693 } 694 695 TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a) 696 697 static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 698 { 699 tcg_gen_andc_i64(n, k, n); 700 tcg_gen_andc_i64(m, m, k); 701 tcg_gen_or_i64(d, n, m); 702 } 703 704 static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 705 TCGv_vec m, TCGv_vec k) 706 { 707 if (TCG_TARGET_HAS_bitsel_vec) { 708 tcg_gen_not_vec(vece, n, n); 709 tcg_gen_bitsel_vec(vece, d, k, n, m); 710 } else { 711 tcg_gen_andc_vec(vece, n, k, n); 712 tcg_gen_andc_vec(vece, m, m, k); 713 tcg_gen_or_vec(vece, d, n, m); 714 } 715 } 716 717 static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 718 uint32_t a, uint32_t oprsz, uint32_t maxsz) 719 { 720 static const GVecGen4 op = { 721 .fni8 = gen_bsl1n_i64, 722 .fniv = gen_bsl1n_vec, 723 .fno = gen_helper_sve2_bsl1n, 724 .vece = MO_64, 725 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 726 }; 727 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 728 } 729 730 TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a) 731 732 static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 733 { 734 /* 735 * Z[dn] = (n & k) | (~m & ~k) 736 * = | ~(m | k) 737 */ 738 tcg_gen_and_i64(n, n, k); 739 if (TCG_TARGET_HAS_orc_i64) { 740 tcg_gen_or_i64(m, m, k); 741 tcg_gen_orc_i64(d, n, m); 742 } else { 743 tcg_gen_nor_i64(m, m, k); 744 tcg_gen_or_i64(d, n, m); 745 } 746 } 747 748 static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 749 TCGv_vec m, TCGv_vec k) 750 { 751 if (TCG_TARGET_HAS_bitsel_vec) { 752 tcg_gen_not_vec(vece, m, m); 753 tcg_gen_bitsel_vec(vece, d, k, n, m); 754 } else { 755 tcg_gen_and_vec(vece, n, n, k); 756 tcg_gen_or_vec(vece, m, m, k); 757 tcg_gen_orc_vec(vece, d, n, m); 758 } 759 } 760 761 static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 762 uint32_t a, uint32_t oprsz, uint32_t maxsz) 763 { 764 static const GVecGen4 op = { 765 .fni8 = gen_bsl2n_i64, 766 .fniv = gen_bsl2n_vec, 767 .fno = gen_helper_sve2_bsl2n, 768 .vece = MO_64, 769 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 770 }; 771 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 772 } 773 774 TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a) 775 776 static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 777 { 778 tcg_gen_and_i64(n, n, k); 779 tcg_gen_andc_i64(m, m, k); 780 tcg_gen_nor_i64(d, n, m); 781 } 782 783 static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 784 TCGv_vec m, TCGv_vec k) 785 { 786 tcg_gen_bitsel_vec(vece, d, k, n, m); 787 tcg_gen_not_vec(vece, d, d); 788 } 789 790 static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 791 uint32_t a, uint32_t oprsz, uint32_t maxsz) 792 { 793 static const GVecGen4 op = { 794 .fni8 = gen_nbsl_i64, 795 .fniv = gen_nbsl_vec, 796 .fno = gen_helper_sve2_nbsl, 797 .vece = MO_64, 798 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 799 }; 800 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 801 } 802 803 TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a) 804 805 /* 806 *** SVE Integer Arithmetic - Unpredicated Group 807 */ 808 809 TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a) 810 TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a) 811 TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a) 812 TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a) 813 TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a) 814 TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a) 815 816 /* 817 *** SVE Integer Arithmetic - Binary Predicated Group 818 */ 819 820 /* Select active elememnts from Zn and inactive elements from Zm, 821 * storing the result in Zd. 822 */ 823 static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz) 824 { 825 static gen_helper_gvec_4 * const fns[4] = { 826 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h, 827 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d 828 }; 829 return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0); 830 } 831 832 #define DO_ZPZZ(NAME, FEAT, name) \ 833 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \ 834 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \ 835 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \ 836 }; \ 837 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \ 838 name##_zpzz_fns[a->esz], a, 0) 839 840 DO_ZPZZ(AND_zpzz, aa64_sve, sve_and) 841 DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor) 842 DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr) 843 DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic) 844 845 DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add) 846 DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub) 847 848 DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax) 849 DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax) 850 DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin) 851 DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin) 852 DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd) 853 DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd) 854 855 DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul) 856 DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh) 857 DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh) 858 859 DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr) 860 DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr) 861 DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl) 862 863 static gen_helper_gvec_4 * const sdiv_fns[4] = { 864 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d 865 }; 866 TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0) 867 868 static gen_helper_gvec_4 * const udiv_fns[4] = { 869 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d 870 }; 871 TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0) 872 873 TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz) 874 875 /* 876 *** SVE Integer Arithmetic - Unary Predicated Group 877 */ 878 879 #define DO_ZPZ(NAME, FEAT, name) \ 880 static gen_helper_gvec_3 * const name##_fns[4] = { \ 881 gen_helper_##name##_b, gen_helper_##name##_h, \ 882 gen_helper_##name##_s, gen_helper_##name##_d, \ 883 }; \ 884 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0) 885 886 DO_ZPZ(CLS, aa64_sve, sve_cls) 887 DO_ZPZ(CLZ, aa64_sve, sve_clz) 888 DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz) 889 DO_ZPZ(CNOT, aa64_sve, sve_cnot) 890 DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz) 891 DO_ZPZ(ABS, aa64_sve, sve_abs) 892 DO_ZPZ(NEG, aa64_sve, sve_neg) 893 DO_ZPZ(RBIT, aa64_sve, sve_rbit) 894 895 static gen_helper_gvec_3 * const fabs_fns[4] = { 896 NULL, gen_helper_sve_fabs_h, 897 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d, 898 }; 899 TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0) 900 901 static gen_helper_gvec_3 * const fneg_fns[4] = { 902 NULL, gen_helper_sve_fneg_h, 903 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d, 904 }; 905 TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0) 906 907 static gen_helper_gvec_3 * const sxtb_fns[4] = { 908 NULL, gen_helper_sve_sxtb_h, 909 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d, 910 }; 911 TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0) 912 913 static gen_helper_gvec_3 * const uxtb_fns[4] = { 914 NULL, gen_helper_sve_uxtb_h, 915 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d, 916 }; 917 TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0) 918 919 static gen_helper_gvec_3 * const sxth_fns[4] = { 920 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d 921 }; 922 TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0) 923 924 static gen_helper_gvec_3 * const uxth_fns[4] = { 925 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d 926 }; 927 TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0) 928 929 TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz, 930 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0) 931 TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz, 932 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0) 933 934 /* 935 *** SVE Integer Reduction Group 936 */ 937 938 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32); 939 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a, 940 gen_helper_gvec_reduc *fn) 941 { 942 unsigned vsz = vec_full_reg_size(s); 943 TCGv_ptr t_zn, t_pg; 944 TCGv_i32 desc; 945 TCGv_i64 temp; 946 947 if (fn == NULL) { 948 return false; 949 } 950 if (!sve_access_check(s)) { 951 return true; 952 } 953 954 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 955 temp = tcg_temp_new_i64(); 956 t_zn = tcg_temp_new_ptr(); 957 t_pg = tcg_temp_new_ptr(); 958 959 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); 960 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); 961 fn(temp, t_zn, t_pg, desc); 962 963 write_fp_dreg(s, a->rd, temp); 964 return true; 965 } 966 967 #define DO_VPZ(NAME, name) \ 968 static gen_helper_gvec_reduc * const name##_fns[4] = { \ 969 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \ 970 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 971 }; \ 972 TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz]) 973 974 DO_VPZ(ORV, orv) 975 DO_VPZ(ANDV, andv) 976 DO_VPZ(EORV, eorv) 977 978 DO_VPZ(UADDV, uaddv) 979 DO_VPZ(SMAXV, smaxv) 980 DO_VPZ(UMAXV, umaxv) 981 DO_VPZ(SMINV, sminv) 982 DO_VPZ(UMINV, uminv) 983 984 static gen_helper_gvec_reduc * const saddv_fns[4] = { 985 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h, 986 gen_helper_sve_saddv_s, NULL 987 }; 988 TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz]) 989 990 #undef DO_VPZ 991 992 /* 993 *** SVE Shift by Immediate - Predicated Group 994 */ 995 996 /* 997 * Copy Zn into Zd, storing zeros into inactive elements. 998 * If invert, store zeros into the active elements. 999 */ 1000 static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg, 1001 int esz, bool invert) 1002 { 1003 static gen_helper_gvec_3 * const fns[4] = { 1004 gen_helper_sve_movz_b, gen_helper_sve_movz_h, 1005 gen_helper_sve_movz_s, gen_helper_sve_movz_d, 1006 }; 1007 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert); 1008 } 1009 1010 static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr, 1011 gen_helper_gvec_3 * const fns[4]) 1012 { 1013 int max; 1014 1015 if (a->esz < 0) { 1016 /* Invalid tsz encoding -- see tszimm_esz. */ 1017 return false; 1018 } 1019 1020 /* 1021 * Shift by element size is architecturally valid. 1022 * For arithmetic right-shift, it's the same as by one less. 1023 * For logical shifts and ASRD, it is a zeroing operation. 1024 */ 1025 max = 8 << a->esz; 1026 if (a->imm >= max) { 1027 if (asr) { 1028 a->imm = max - 1; 1029 } else { 1030 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); 1031 } 1032 } 1033 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a); 1034 } 1035 1036 static gen_helper_gvec_3 * const asr_zpzi_fns[4] = { 1037 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h, 1038 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d, 1039 }; 1040 TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns) 1041 1042 static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = { 1043 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h, 1044 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d, 1045 }; 1046 TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns) 1047 1048 static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = { 1049 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h, 1050 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d, 1051 }; 1052 TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns) 1053 1054 static gen_helper_gvec_3 * const asrd_fns[4] = { 1055 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h, 1056 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d, 1057 }; 1058 TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns) 1059 1060 static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = { 1061 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h, 1062 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d, 1063 }; 1064 TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 1065 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a) 1066 1067 static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = { 1068 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h, 1069 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d, 1070 }; 1071 TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 1072 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a) 1073 1074 static gen_helper_gvec_3 * const srshr_fns[4] = { 1075 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h, 1076 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d, 1077 }; 1078 TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 1079 a->esz < 0 ? NULL : srshr_fns[a->esz], a) 1080 1081 static gen_helper_gvec_3 * const urshr_fns[4] = { 1082 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h, 1083 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d, 1084 }; 1085 TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 1086 a->esz < 0 ? NULL : urshr_fns[a->esz], a) 1087 1088 static gen_helper_gvec_3 * const sqshlu_fns[4] = { 1089 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h, 1090 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d, 1091 }; 1092 TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi, 1093 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a) 1094 1095 /* 1096 *** SVE Bitwise Shift - Predicated Group 1097 */ 1098 1099 #define DO_ZPZW(NAME, name) \ 1100 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \ 1101 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \ 1102 gen_helper_sve_##name##_zpzw_s, NULL \ 1103 }; \ 1104 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \ 1105 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0) 1106 1107 DO_ZPZW(ASR, asr) 1108 DO_ZPZW(LSR, lsr) 1109 DO_ZPZW(LSL, lsl) 1110 1111 #undef DO_ZPZW 1112 1113 /* 1114 *** SVE Bitwise Shift - Unpredicated Group 1115 */ 1116 1117 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr, 1118 void (*gvec_fn)(unsigned, uint32_t, uint32_t, 1119 int64_t, uint32_t, uint32_t)) 1120 { 1121 if (a->esz < 0) { 1122 /* Invalid tsz encoding -- see tszimm_esz. */ 1123 return false; 1124 } 1125 if (sve_access_check(s)) { 1126 unsigned vsz = vec_full_reg_size(s); 1127 /* Shift by element size is architecturally valid. For 1128 arithmetic right-shift, it's the same as by one less. 1129 Otherwise it is a zeroing operation. */ 1130 if (a->imm >= 8 << a->esz) { 1131 if (asr) { 1132 a->imm = (8 << a->esz) - 1; 1133 } else { 1134 do_dupi_z(s, a->rd, 0); 1135 return true; 1136 } 1137 } 1138 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 1139 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz); 1140 } 1141 return true; 1142 } 1143 1144 TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari) 1145 TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri) 1146 TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli) 1147 1148 #define DO_ZZW(NAME, name) \ 1149 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \ 1150 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \ 1151 gen_helper_sve_##name##_zzw_s, NULL \ 1152 }; \ 1153 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \ 1154 name##_zzw_fns[a->esz], a, 0) 1155 1156 DO_ZZW(ASR_zzw, asr) 1157 DO_ZZW(LSR_zzw, lsr) 1158 DO_ZZW(LSL_zzw, lsl) 1159 1160 #undef DO_ZZW 1161 1162 /* 1163 *** SVE Integer Multiply-Add Group 1164 */ 1165 1166 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a, 1167 gen_helper_gvec_5 *fn) 1168 { 1169 if (sve_access_check(s)) { 1170 unsigned vsz = vec_full_reg_size(s); 1171 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd), 1172 vec_full_reg_offset(s, a->ra), 1173 vec_full_reg_offset(s, a->rn), 1174 vec_full_reg_offset(s, a->rm), 1175 pred_full_reg_offset(s, a->pg), 1176 vsz, vsz, 0, fn); 1177 } 1178 return true; 1179 } 1180 1181 static gen_helper_gvec_5 * const mla_fns[4] = { 1182 gen_helper_sve_mla_b, gen_helper_sve_mla_h, 1183 gen_helper_sve_mla_s, gen_helper_sve_mla_d, 1184 }; 1185 TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz]) 1186 1187 static gen_helper_gvec_5 * const mls_fns[4] = { 1188 gen_helper_sve_mls_b, gen_helper_sve_mls_h, 1189 gen_helper_sve_mls_s, gen_helper_sve_mls_d, 1190 }; 1191 TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz]) 1192 1193 /* 1194 *** SVE Index Generation Group 1195 */ 1196 1197 static bool do_index(DisasContext *s, int esz, int rd, 1198 TCGv_i64 start, TCGv_i64 incr) 1199 { 1200 unsigned vsz; 1201 TCGv_i32 desc; 1202 TCGv_ptr t_zd; 1203 1204 if (!sve_access_check(s)) { 1205 return true; 1206 } 1207 1208 vsz = vec_full_reg_size(s); 1209 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1210 t_zd = tcg_temp_new_ptr(); 1211 1212 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd)); 1213 if (esz == 3) { 1214 gen_helper_sve_index_d(t_zd, start, incr, desc); 1215 } else { 1216 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 1217 static index_fn * const fns[3] = { 1218 gen_helper_sve_index_b, 1219 gen_helper_sve_index_h, 1220 gen_helper_sve_index_s, 1221 }; 1222 TCGv_i32 s32 = tcg_temp_new_i32(); 1223 TCGv_i32 i32 = tcg_temp_new_i32(); 1224 1225 tcg_gen_extrl_i64_i32(s32, start); 1226 tcg_gen_extrl_i64_i32(i32, incr); 1227 fns[esz](t_zd, s32, i32, desc); 1228 } 1229 return true; 1230 } 1231 1232 TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd, 1233 tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2)) 1234 TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd, 1235 tcg_constant_i64(a->imm), cpu_reg(s, a->rm)) 1236 TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd, 1237 cpu_reg(s, a->rn), tcg_constant_i64(a->imm)) 1238 TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd, 1239 cpu_reg(s, a->rn), cpu_reg(s, a->rm)) 1240 1241 /* 1242 *** SVE Stack Allocation Group 1243 */ 1244 1245 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a) 1246 { 1247 if (!dc_isar_feature(aa64_sve, s)) { 1248 return false; 1249 } 1250 if (sve_access_check(s)) { 1251 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1252 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1253 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s)); 1254 } 1255 return true; 1256 } 1257 1258 static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a) 1259 { 1260 if (!dc_isar_feature(aa64_sme, s)) { 1261 return false; 1262 } 1263 if (sme_enabled_check(s)) { 1264 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1265 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1266 tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s)); 1267 } 1268 return true; 1269 } 1270 1271 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a) 1272 { 1273 if (!dc_isar_feature(aa64_sve, s)) { 1274 return false; 1275 } 1276 if (sve_access_check(s)) { 1277 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1278 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1279 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s)); 1280 } 1281 return true; 1282 } 1283 1284 static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a) 1285 { 1286 if (!dc_isar_feature(aa64_sme, s)) { 1287 return false; 1288 } 1289 if (sme_enabled_check(s)) { 1290 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1291 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1292 tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s)); 1293 } 1294 return true; 1295 } 1296 1297 static bool trans_RDVL(DisasContext *s, arg_RDVL *a) 1298 { 1299 if (!dc_isar_feature(aa64_sve, s)) { 1300 return false; 1301 } 1302 if (sve_access_check(s)) { 1303 TCGv_i64 reg = cpu_reg(s, a->rd); 1304 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s)); 1305 } 1306 return true; 1307 } 1308 1309 static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a) 1310 { 1311 if (!dc_isar_feature(aa64_sme, s)) { 1312 return false; 1313 } 1314 if (sme_enabled_check(s)) { 1315 TCGv_i64 reg = cpu_reg(s, a->rd); 1316 tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s)); 1317 } 1318 return true; 1319 } 1320 1321 /* 1322 *** SVE Compute Vector Address Group 1323 */ 1324 1325 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn) 1326 { 1327 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm); 1328 } 1329 1330 TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32) 1331 TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64) 1332 TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32) 1333 TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32) 1334 1335 /* 1336 *** SVE Integer Misc - Unpredicated Group 1337 */ 1338 1339 static gen_helper_gvec_2 * const fexpa_fns[4] = { 1340 NULL, gen_helper_sve_fexpa_h, 1341 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d, 1342 }; 1343 TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz, 1344 fexpa_fns[a->esz], a->rd, a->rn, 0) 1345 1346 static gen_helper_gvec_3 * const ftssel_fns[4] = { 1347 NULL, gen_helper_sve_ftssel_h, 1348 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d, 1349 }; 1350 TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, 1351 ftssel_fns[a->esz], a, 0) 1352 1353 /* 1354 *** SVE Predicate Logical Operations Group 1355 */ 1356 1357 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a, 1358 const GVecGen4 *gvec_op) 1359 { 1360 if (!sve_access_check(s)) { 1361 return true; 1362 } 1363 1364 unsigned psz = pred_gvec_reg_size(s); 1365 int dofs = pred_full_reg_offset(s, a->rd); 1366 int nofs = pred_full_reg_offset(s, a->rn); 1367 int mofs = pred_full_reg_offset(s, a->rm); 1368 int gofs = pred_full_reg_offset(s, a->pg); 1369 1370 if (!a->s) { 1371 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1372 return true; 1373 } 1374 1375 if (psz == 8) { 1376 /* Do the operation and the flags generation in temps. */ 1377 TCGv_i64 pd = tcg_temp_new_i64(); 1378 TCGv_i64 pn = tcg_temp_new_i64(); 1379 TCGv_i64 pm = tcg_temp_new_i64(); 1380 TCGv_i64 pg = tcg_temp_new_i64(); 1381 1382 tcg_gen_ld_i64(pn, tcg_env, nofs); 1383 tcg_gen_ld_i64(pm, tcg_env, mofs); 1384 tcg_gen_ld_i64(pg, tcg_env, gofs); 1385 1386 gvec_op->fni8(pd, pn, pm, pg); 1387 tcg_gen_st_i64(pd, tcg_env, dofs); 1388 1389 do_predtest1(pd, pg); 1390 } else { 1391 /* The operation and flags generation is large. The computation 1392 * of the flags depends on the original contents of the guarding 1393 * predicate. If the destination overwrites the guarding predicate, 1394 * then the easiest way to get this right is to save a copy. 1395 */ 1396 int tofs = gofs; 1397 if (a->rd == a->pg) { 1398 tofs = offsetof(CPUARMState, vfp.preg_tmp); 1399 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz); 1400 } 1401 1402 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1403 do_predtest(s, dofs, tofs, psz / 8); 1404 } 1405 return true; 1406 } 1407 1408 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1409 { 1410 tcg_gen_and_i64(pd, pn, pm); 1411 tcg_gen_and_i64(pd, pd, pg); 1412 } 1413 1414 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1415 TCGv_vec pm, TCGv_vec pg) 1416 { 1417 tcg_gen_and_vec(vece, pd, pn, pm); 1418 tcg_gen_and_vec(vece, pd, pd, pg); 1419 } 1420 1421 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a) 1422 { 1423 static const GVecGen4 op = { 1424 .fni8 = gen_and_pg_i64, 1425 .fniv = gen_and_pg_vec, 1426 .fno = gen_helper_sve_and_pppp, 1427 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1428 }; 1429 1430 if (!dc_isar_feature(aa64_sve, s)) { 1431 return false; 1432 } 1433 if (!a->s) { 1434 if (a->rn == a->rm) { 1435 if (a->pg == a->rn) { 1436 return do_mov_p(s, a->rd, a->rn); 1437 } 1438 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg); 1439 } else if (a->pg == a->rn || a->pg == a->rm) { 1440 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm); 1441 } 1442 } 1443 return do_pppp_flags(s, a, &op); 1444 } 1445 1446 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1447 { 1448 tcg_gen_andc_i64(pd, pn, pm); 1449 tcg_gen_and_i64(pd, pd, pg); 1450 } 1451 1452 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1453 TCGv_vec pm, TCGv_vec pg) 1454 { 1455 tcg_gen_andc_vec(vece, pd, pn, pm); 1456 tcg_gen_and_vec(vece, pd, pd, pg); 1457 } 1458 1459 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a) 1460 { 1461 static const GVecGen4 op = { 1462 .fni8 = gen_bic_pg_i64, 1463 .fniv = gen_bic_pg_vec, 1464 .fno = gen_helper_sve_bic_pppp, 1465 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1466 }; 1467 1468 if (!dc_isar_feature(aa64_sve, s)) { 1469 return false; 1470 } 1471 if (!a->s && a->pg == a->rn) { 1472 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm); 1473 } 1474 return do_pppp_flags(s, a, &op); 1475 } 1476 1477 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1478 { 1479 tcg_gen_xor_i64(pd, pn, pm); 1480 tcg_gen_and_i64(pd, pd, pg); 1481 } 1482 1483 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1484 TCGv_vec pm, TCGv_vec pg) 1485 { 1486 tcg_gen_xor_vec(vece, pd, pn, pm); 1487 tcg_gen_and_vec(vece, pd, pd, pg); 1488 } 1489 1490 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a) 1491 { 1492 static const GVecGen4 op = { 1493 .fni8 = gen_eor_pg_i64, 1494 .fniv = gen_eor_pg_vec, 1495 .fno = gen_helper_sve_eor_pppp, 1496 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1497 }; 1498 1499 if (!dc_isar_feature(aa64_sve, s)) { 1500 return false; 1501 } 1502 /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */ 1503 if (!a->s && a->pg == a->rm) { 1504 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn); 1505 } 1506 return do_pppp_flags(s, a, &op); 1507 } 1508 1509 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a) 1510 { 1511 if (a->s || !dc_isar_feature(aa64_sve, s)) { 1512 return false; 1513 } 1514 if (sve_access_check(s)) { 1515 unsigned psz = pred_gvec_reg_size(s); 1516 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd), 1517 pred_full_reg_offset(s, a->pg), 1518 pred_full_reg_offset(s, a->rn), 1519 pred_full_reg_offset(s, a->rm), psz, psz); 1520 } 1521 return true; 1522 } 1523 1524 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1525 { 1526 tcg_gen_or_i64(pd, pn, pm); 1527 tcg_gen_and_i64(pd, pd, pg); 1528 } 1529 1530 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1531 TCGv_vec pm, TCGv_vec pg) 1532 { 1533 tcg_gen_or_vec(vece, pd, pn, pm); 1534 tcg_gen_and_vec(vece, pd, pd, pg); 1535 } 1536 1537 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a) 1538 { 1539 static const GVecGen4 op = { 1540 .fni8 = gen_orr_pg_i64, 1541 .fniv = gen_orr_pg_vec, 1542 .fno = gen_helper_sve_orr_pppp, 1543 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1544 }; 1545 1546 if (!dc_isar_feature(aa64_sve, s)) { 1547 return false; 1548 } 1549 if (!a->s && a->pg == a->rn && a->rn == a->rm) { 1550 return do_mov_p(s, a->rd, a->rn); 1551 } 1552 return do_pppp_flags(s, a, &op); 1553 } 1554 1555 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1556 { 1557 tcg_gen_orc_i64(pd, pn, pm); 1558 tcg_gen_and_i64(pd, pd, pg); 1559 } 1560 1561 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1562 TCGv_vec pm, TCGv_vec pg) 1563 { 1564 tcg_gen_orc_vec(vece, pd, pn, pm); 1565 tcg_gen_and_vec(vece, pd, pd, pg); 1566 } 1567 1568 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a) 1569 { 1570 static const GVecGen4 op = { 1571 .fni8 = gen_orn_pg_i64, 1572 .fniv = gen_orn_pg_vec, 1573 .fno = gen_helper_sve_orn_pppp, 1574 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1575 }; 1576 1577 if (!dc_isar_feature(aa64_sve, s)) { 1578 return false; 1579 } 1580 return do_pppp_flags(s, a, &op); 1581 } 1582 1583 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1584 { 1585 tcg_gen_or_i64(pd, pn, pm); 1586 tcg_gen_andc_i64(pd, pg, pd); 1587 } 1588 1589 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1590 TCGv_vec pm, TCGv_vec pg) 1591 { 1592 tcg_gen_or_vec(vece, pd, pn, pm); 1593 tcg_gen_andc_vec(vece, pd, pg, pd); 1594 } 1595 1596 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a) 1597 { 1598 static const GVecGen4 op = { 1599 .fni8 = gen_nor_pg_i64, 1600 .fniv = gen_nor_pg_vec, 1601 .fno = gen_helper_sve_nor_pppp, 1602 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1603 }; 1604 1605 if (!dc_isar_feature(aa64_sve, s)) { 1606 return false; 1607 } 1608 return do_pppp_flags(s, a, &op); 1609 } 1610 1611 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1612 { 1613 tcg_gen_and_i64(pd, pn, pm); 1614 tcg_gen_andc_i64(pd, pg, pd); 1615 } 1616 1617 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1618 TCGv_vec pm, TCGv_vec pg) 1619 { 1620 tcg_gen_and_vec(vece, pd, pn, pm); 1621 tcg_gen_andc_vec(vece, pd, pg, pd); 1622 } 1623 1624 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a) 1625 { 1626 static const GVecGen4 op = { 1627 .fni8 = gen_nand_pg_i64, 1628 .fniv = gen_nand_pg_vec, 1629 .fno = gen_helper_sve_nand_pppp, 1630 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1631 }; 1632 1633 if (!dc_isar_feature(aa64_sve, s)) { 1634 return false; 1635 } 1636 return do_pppp_flags(s, a, &op); 1637 } 1638 1639 /* 1640 *** SVE Predicate Misc Group 1641 */ 1642 1643 static bool trans_PTEST(DisasContext *s, arg_PTEST *a) 1644 { 1645 if (!dc_isar_feature(aa64_sve, s)) { 1646 return false; 1647 } 1648 if (sve_access_check(s)) { 1649 int nofs = pred_full_reg_offset(s, a->rn); 1650 int gofs = pred_full_reg_offset(s, a->pg); 1651 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8); 1652 1653 if (words == 1) { 1654 TCGv_i64 pn = tcg_temp_new_i64(); 1655 TCGv_i64 pg = tcg_temp_new_i64(); 1656 1657 tcg_gen_ld_i64(pn, tcg_env, nofs); 1658 tcg_gen_ld_i64(pg, tcg_env, gofs); 1659 do_predtest1(pn, pg); 1660 } else { 1661 do_predtest(s, nofs, gofs, words); 1662 } 1663 } 1664 return true; 1665 } 1666 1667 /* See the ARM pseudocode DecodePredCount. */ 1668 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz) 1669 { 1670 unsigned elements = fullsz >> esz; 1671 unsigned bound; 1672 1673 switch (pattern) { 1674 case 0x0: /* POW2 */ 1675 return pow2floor(elements); 1676 case 0x1: /* VL1 */ 1677 case 0x2: /* VL2 */ 1678 case 0x3: /* VL3 */ 1679 case 0x4: /* VL4 */ 1680 case 0x5: /* VL5 */ 1681 case 0x6: /* VL6 */ 1682 case 0x7: /* VL7 */ 1683 case 0x8: /* VL8 */ 1684 bound = pattern; 1685 break; 1686 case 0x9: /* VL16 */ 1687 case 0xa: /* VL32 */ 1688 case 0xb: /* VL64 */ 1689 case 0xc: /* VL128 */ 1690 case 0xd: /* VL256 */ 1691 bound = 16 << (pattern - 9); 1692 break; 1693 case 0x1d: /* MUL4 */ 1694 return elements - elements % 4; 1695 case 0x1e: /* MUL3 */ 1696 return elements - elements % 3; 1697 case 0x1f: /* ALL */ 1698 return elements; 1699 default: /* #uimm5 */ 1700 return 0; 1701 } 1702 return elements >= bound ? bound : 0; 1703 } 1704 1705 /* This handles all of the predicate initialization instructions, 1706 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32 1707 * so that decode_pred_count returns 0. For SETFFR, we will have 1708 * set RD == 16 == FFR. 1709 */ 1710 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag) 1711 { 1712 if (!sve_access_check(s)) { 1713 return true; 1714 } 1715 1716 unsigned fullsz = vec_full_reg_size(s); 1717 unsigned ofs = pred_full_reg_offset(s, rd); 1718 unsigned numelem, setsz, i; 1719 uint64_t word, lastword; 1720 TCGv_i64 t; 1721 1722 numelem = decode_pred_count(fullsz, pat, esz); 1723 1724 /* Determine what we must store into each bit, and how many. */ 1725 if (numelem == 0) { 1726 lastword = word = 0; 1727 setsz = fullsz; 1728 } else { 1729 setsz = numelem << esz; 1730 lastword = word = pred_esz_masks[esz]; 1731 if (setsz % 64) { 1732 lastword &= MAKE_64BIT_MASK(0, setsz % 64); 1733 } 1734 } 1735 1736 t = tcg_temp_new_i64(); 1737 if (fullsz <= 64) { 1738 tcg_gen_movi_i64(t, lastword); 1739 tcg_gen_st_i64(t, tcg_env, ofs); 1740 goto done; 1741 } 1742 1743 if (word == lastword) { 1744 unsigned maxsz = size_for_gvec(fullsz / 8); 1745 unsigned oprsz = size_for_gvec(setsz / 8); 1746 1747 if (oprsz * 8 == setsz) { 1748 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word); 1749 goto done; 1750 } 1751 } 1752 1753 setsz /= 8; 1754 fullsz /= 8; 1755 1756 tcg_gen_movi_i64(t, word); 1757 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) { 1758 tcg_gen_st_i64(t, tcg_env, ofs + i); 1759 } 1760 if (lastword != word) { 1761 tcg_gen_movi_i64(t, lastword); 1762 tcg_gen_st_i64(t, tcg_env, ofs + i); 1763 i += 8; 1764 } 1765 if (i < fullsz) { 1766 tcg_gen_movi_i64(t, 0); 1767 for (; i < fullsz; i += 8) { 1768 tcg_gen_st_i64(t, tcg_env, ofs + i); 1769 } 1770 } 1771 1772 done: 1773 /* PTRUES */ 1774 if (setflag) { 1775 tcg_gen_movi_i32(cpu_NF, -(word != 0)); 1776 tcg_gen_movi_i32(cpu_CF, word == 0); 1777 tcg_gen_movi_i32(cpu_VF, 0); 1778 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1779 } 1780 return true; 1781 } 1782 1783 TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s) 1784 1785 /* Note pat == 31 is #all, to set all elements. */ 1786 TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve, 1787 do_predset, 0, FFR_PRED_NUM, 31, false) 1788 1789 /* Note pat == 32 is #unimp, to set no elements. */ 1790 TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false) 1791 1792 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a) 1793 { 1794 /* The path through do_pppp_flags is complicated enough to want to avoid 1795 * duplication. Frob the arguments into the form of a predicated AND. 1796 */ 1797 arg_rprr_s alt_a = { 1798 .rd = a->rd, .pg = a->pg, .s = a->s, 1799 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM, 1800 }; 1801 1802 s->is_nonstreaming = true; 1803 return trans_AND_pppp(s, &alt_a); 1804 } 1805 1806 TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM) 1807 TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn) 1808 1809 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a, 1810 void (*gen_fn)(TCGv_i32, TCGv_ptr, 1811 TCGv_ptr, TCGv_i32)) 1812 { 1813 if (!sve_access_check(s)) { 1814 return true; 1815 } 1816 1817 TCGv_ptr t_pd = tcg_temp_new_ptr(); 1818 TCGv_ptr t_pg = tcg_temp_new_ptr(); 1819 TCGv_i32 t; 1820 unsigned desc = 0; 1821 1822 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 1823 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 1824 1825 tcg_gen_addi_ptr(t_pd, tcg_env, pred_full_reg_offset(s, a->rd)); 1826 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->rn)); 1827 t = tcg_temp_new_i32(); 1828 1829 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc)); 1830 1831 do_pred_flags(t); 1832 return true; 1833 } 1834 1835 TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst) 1836 TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext) 1837 1838 /* 1839 *** SVE Element Count Group 1840 */ 1841 1842 /* Perform an inline saturating addition of a 32-bit value within 1843 * a 64-bit register. The second operand is known to be positive, 1844 * which halves the comparisons we must perform to bound the result. 1845 */ 1846 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1847 { 1848 int64_t ibound; 1849 1850 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 1851 if (u) { 1852 tcg_gen_ext32u_i64(reg, reg); 1853 } else { 1854 tcg_gen_ext32s_i64(reg, reg); 1855 } 1856 if (d) { 1857 tcg_gen_sub_i64(reg, reg, val); 1858 ibound = (u ? 0 : INT32_MIN); 1859 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound)); 1860 } else { 1861 tcg_gen_add_i64(reg, reg, val); 1862 ibound = (u ? UINT32_MAX : INT32_MAX); 1863 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound)); 1864 } 1865 } 1866 1867 /* Similarly with 64-bit values. */ 1868 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1869 { 1870 TCGv_i64 t0 = tcg_temp_new_i64(); 1871 TCGv_i64 t2; 1872 1873 if (u) { 1874 if (d) { 1875 tcg_gen_sub_i64(t0, reg, val); 1876 t2 = tcg_constant_i64(0); 1877 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0); 1878 } else { 1879 tcg_gen_add_i64(t0, reg, val); 1880 t2 = tcg_constant_i64(-1); 1881 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0); 1882 } 1883 } else { 1884 TCGv_i64 t1 = tcg_temp_new_i64(); 1885 if (d) { 1886 /* Detect signed overflow for subtraction. */ 1887 tcg_gen_xor_i64(t0, reg, val); 1888 tcg_gen_sub_i64(t1, reg, val); 1889 tcg_gen_xor_i64(reg, reg, t1); 1890 tcg_gen_and_i64(t0, t0, reg); 1891 1892 /* Bound the result. */ 1893 tcg_gen_movi_i64(reg, INT64_MIN); 1894 t2 = tcg_constant_i64(0); 1895 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1); 1896 } else { 1897 /* Detect signed overflow for addition. */ 1898 tcg_gen_xor_i64(t0, reg, val); 1899 tcg_gen_add_i64(reg, reg, val); 1900 tcg_gen_xor_i64(t1, reg, val); 1901 tcg_gen_andc_i64(t0, t1, t0); 1902 1903 /* Bound the result. */ 1904 tcg_gen_movi_i64(t1, INT64_MAX); 1905 t2 = tcg_constant_i64(0); 1906 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg); 1907 } 1908 } 1909 } 1910 1911 /* Similarly with a vector and a scalar operand. */ 1912 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn, 1913 TCGv_i64 val, bool u, bool d) 1914 { 1915 unsigned vsz = vec_full_reg_size(s); 1916 TCGv_ptr dptr, nptr; 1917 TCGv_i32 t32, desc; 1918 TCGv_i64 t64; 1919 1920 dptr = tcg_temp_new_ptr(); 1921 nptr = tcg_temp_new_ptr(); 1922 tcg_gen_addi_ptr(dptr, tcg_env, vec_full_reg_offset(s, rd)); 1923 tcg_gen_addi_ptr(nptr, tcg_env, vec_full_reg_offset(s, rn)); 1924 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1925 1926 switch (esz) { 1927 case MO_8: 1928 t32 = tcg_temp_new_i32(); 1929 tcg_gen_extrl_i64_i32(t32, val); 1930 if (d) { 1931 tcg_gen_neg_i32(t32, t32); 1932 } 1933 if (u) { 1934 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc); 1935 } else { 1936 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc); 1937 } 1938 break; 1939 1940 case MO_16: 1941 t32 = tcg_temp_new_i32(); 1942 tcg_gen_extrl_i64_i32(t32, val); 1943 if (d) { 1944 tcg_gen_neg_i32(t32, t32); 1945 } 1946 if (u) { 1947 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc); 1948 } else { 1949 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc); 1950 } 1951 break; 1952 1953 case MO_32: 1954 t64 = tcg_temp_new_i64(); 1955 if (d) { 1956 tcg_gen_neg_i64(t64, val); 1957 } else { 1958 tcg_gen_mov_i64(t64, val); 1959 } 1960 if (u) { 1961 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc); 1962 } else { 1963 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc); 1964 } 1965 break; 1966 1967 case MO_64: 1968 if (u) { 1969 if (d) { 1970 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc); 1971 } else { 1972 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc); 1973 } 1974 } else if (d) { 1975 t64 = tcg_temp_new_i64(); 1976 tcg_gen_neg_i64(t64, val); 1977 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc); 1978 } else { 1979 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc); 1980 } 1981 break; 1982 1983 default: 1984 g_assert_not_reached(); 1985 } 1986 } 1987 1988 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a) 1989 { 1990 if (!dc_isar_feature(aa64_sve, s)) { 1991 return false; 1992 } 1993 if (sve_access_check(s)) { 1994 unsigned fullsz = vec_full_reg_size(s); 1995 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1996 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm); 1997 } 1998 return true; 1999 } 2000 2001 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a) 2002 { 2003 if (!dc_isar_feature(aa64_sve, s)) { 2004 return false; 2005 } 2006 if (sve_access_check(s)) { 2007 unsigned fullsz = vec_full_reg_size(s); 2008 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2009 int inc = numelem * a->imm * (a->d ? -1 : 1); 2010 TCGv_i64 reg = cpu_reg(s, a->rd); 2011 2012 tcg_gen_addi_i64(reg, reg, inc); 2013 } 2014 return true; 2015 } 2016 2017 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a) 2018 { 2019 if (!dc_isar_feature(aa64_sve, s)) { 2020 return false; 2021 } 2022 if (!sve_access_check(s)) { 2023 return true; 2024 } 2025 2026 unsigned fullsz = vec_full_reg_size(s); 2027 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2028 int inc = numelem * a->imm; 2029 TCGv_i64 reg = cpu_reg(s, a->rd); 2030 2031 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 2032 if (inc == 0) { 2033 if (a->u) { 2034 tcg_gen_ext32u_i64(reg, reg); 2035 } else { 2036 tcg_gen_ext32s_i64(reg, reg); 2037 } 2038 } else { 2039 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d); 2040 } 2041 return true; 2042 } 2043 2044 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a) 2045 { 2046 if (!dc_isar_feature(aa64_sve, s)) { 2047 return false; 2048 } 2049 if (!sve_access_check(s)) { 2050 return true; 2051 } 2052 2053 unsigned fullsz = vec_full_reg_size(s); 2054 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2055 int inc = numelem * a->imm; 2056 TCGv_i64 reg = cpu_reg(s, a->rd); 2057 2058 if (inc != 0) { 2059 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d); 2060 } 2061 return true; 2062 } 2063 2064 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 2065 { 2066 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2067 return false; 2068 } 2069 2070 unsigned fullsz = vec_full_reg_size(s); 2071 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2072 int inc = numelem * a->imm; 2073 2074 if (inc != 0) { 2075 if (sve_access_check(s)) { 2076 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd), 2077 vec_full_reg_offset(s, a->rn), 2078 tcg_constant_i64(a->d ? -inc : inc), 2079 fullsz, fullsz); 2080 } 2081 } else { 2082 do_mov_z(s, a->rd, a->rn); 2083 } 2084 return true; 2085 } 2086 2087 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 2088 { 2089 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2090 return false; 2091 } 2092 2093 unsigned fullsz = vec_full_reg_size(s); 2094 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2095 int inc = numelem * a->imm; 2096 2097 if (inc != 0) { 2098 if (sve_access_check(s)) { 2099 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 2100 tcg_constant_i64(inc), a->u, a->d); 2101 } 2102 } else { 2103 do_mov_z(s, a->rd, a->rn); 2104 } 2105 return true; 2106 } 2107 2108 /* 2109 *** SVE Bitwise Immediate Group 2110 */ 2111 2112 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn) 2113 { 2114 uint64_t imm; 2115 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2116 extract32(a->dbm, 0, 6), 2117 extract32(a->dbm, 6, 6))) { 2118 return false; 2119 } 2120 return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm); 2121 } 2122 2123 TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi) 2124 TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori) 2125 TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori) 2126 2127 static bool trans_DUPM(DisasContext *s, arg_DUPM *a) 2128 { 2129 uint64_t imm; 2130 2131 if (!dc_isar_feature(aa64_sve, s)) { 2132 return false; 2133 } 2134 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2135 extract32(a->dbm, 0, 6), 2136 extract32(a->dbm, 6, 6))) { 2137 return false; 2138 } 2139 if (sve_access_check(s)) { 2140 do_dupi_z(s, a->rd, imm); 2141 } 2142 return true; 2143 } 2144 2145 /* 2146 *** SVE Integer Wide Immediate - Predicated Group 2147 */ 2148 2149 /* Implement all merging copies. This is used for CPY (immediate), 2150 * FCPY, CPY (scalar), CPY (SIMD&FP scalar). 2151 */ 2152 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg, 2153 TCGv_i64 val) 2154 { 2155 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2156 static gen_cpy * const fns[4] = { 2157 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h, 2158 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d, 2159 }; 2160 unsigned vsz = vec_full_reg_size(s); 2161 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2162 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2163 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2164 TCGv_ptr t_pg = tcg_temp_new_ptr(); 2165 2166 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd)); 2167 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, rn)); 2168 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 2169 2170 fns[esz](t_zd, t_zn, t_pg, val, desc); 2171 } 2172 2173 static bool trans_FCPY(DisasContext *s, arg_FCPY *a) 2174 { 2175 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2176 return false; 2177 } 2178 if (sve_access_check(s)) { 2179 /* Decode the VFP immediate. */ 2180 uint64_t imm = vfp_expand_imm(a->esz, a->imm); 2181 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm)); 2182 } 2183 return true; 2184 } 2185 2186 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a) 2187 { 2188 if (!dc_isar_feature(aa64_sve, s)) { 2189 return false; 2190 } 2191 if (sve_access_check(s)) { 2192 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm)); 2193 } 2194 return true; 2195 } 2196 2197 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a) 2198 { 2199 static gen_helper_gvec_2i * const fns[4] = { 2200 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h, 2201 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d, 2202 }; 2203 2204 if (!dc_isar_feature(aa64_sve, s)) { 2205 return false; 2206 } 2207 if (sve_access_check(s)) { 2208 unsigned vsz = vec_full_reg_size(s); 2209 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 2210 pred_full_reg_offset(s, a->pg), 2211 tcg_constant_i64(a->imm), 2212 vsz, vsz, 0, fns[a->esz]); 2213 } 2214 return true; 2215 } 2216 2217 /* 2218 *** SVE Permute Extract Group 2219 */ 2220 2221 static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm) 2222 { 2223 if (!sve_access_check(s)) { 2224 return true; 2225 } 2226 2227 unsigned vsz = vec_full_reg_size(s); 2228 unsigned n_ofs = imm >= vsz ? 0 : imm; 2229 unsigned n_siz = vsz - n_ofs; 2230 unsigned d = vec_full_reg_offset(s, rd); 2231 unsigned n = vec_full_reg_offset(s, rn); 2232 unsigned m = vec_full_reg_offset(s, rm); 2233 2234 /* Use host vector move insns if we have appropriate sizes 2235 * and no unfortunate overlap. 2236 */ 2237 if (m != d 2238 && n_ofs == size_for_gvec(n_ofs) 2239 && n_siz == size_for_gvec(n_siz) 2240 && (d != n || n_siz <= n_ofs)) { 2241 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz); 2242 if (n_ofs != 0) { 2243 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs); 2244 } 2245 } else { 2246 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext); 2247 } 2248 return true; 2249 } 2250 2251 TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm) 2252 TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm) 2253 2254 /* 2255 *** SVE Permute - Unpredicated Group 2256 */ 2257 2258 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a) 2259 { 2260 if (!dc_isar_feature(aa64_sve, s)) { 2261 return false; 2262 } 2263 if (sve_access_check(s)) { 2264 unsigned vsz = vec_full_reg_size(s); 2265 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd), 2266 vsz, vsz, cpu_reg_sp(s, a->rn)); 2267 } 2268 return true; 2269 } 2270 2271 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a) 2272 { 2273 if (!dc_isar_feature(aa64_sve, s)) { 2274 return false; 2275 } 2276 if ((a->imm & 0x1f) == 0) { 2277 return false; 2278 } 2279 if (sve_access_check(s)) { 2280 unsigned vsz = vec_full_reg_size(s); 2281 unsigned dofs = vec_full_reg_offset(s, a->rd); 2282 unsigned esz, index; 2283 2284 esz = ctz32(a->imm); 2285 index = a->imm >> (esz + 1); 2286 2287 if ((index << esz) < vsz) { 2288 unsigned nofs = vec_reg_offset(s, a->rn, index, esz); 2289 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz); 2290 } else { 2291 /* 2292 * While dup_mem handles 128-bit elements, dup_imm does not. 2293 * Thankfully element size doesn't matter for splatting zero. 2294 */ 2295 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); 2296 } 2297 } 2298 return true; 2299 } 2300 2301 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val) 2302 { 2303 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2304 static gen_insr * const fns[4] = { 2305 gen_helper_sve_insr_b, gen_helper_sve_insr_h, 2306 gen_helper_sve_insr_s, gen_helper_sve_insr_d, 2307 }; 2308 unsigned vsz = vec_full_reg_size(s); 2309 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2310 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2311 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2312 2313 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, a->rd)); 2314 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); 2315 2316 fns[a->esz](t_zd, t_zn, val, desc); 2317 } 2318 2319 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a) 2320 { 2321 if (!dc_isar_feature(aa64_sve, s)) { 2322 return false; 2323 } 2324 if (sve_access_check(s)) { 2325 TCGv_i64 t = tcg_temp_new_i64(); 2326 tcg_gen_ld_i64(t, tcg_env, vec_reg_offset(s, a->rm, 0, MO_64)); 2327 do_insr_i64(s, a, t); 2328 } 2329 return true; 2330 } 2331 2332 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a) 2333 { 2334 if (!dc_isar_feature(aa64_sve, s)) { 2335 return false; 2336 } 2337 if (sve_access_check(s)) { 2338 do_insr_i64(s, a, cpu_reg(s, a->rm)); 2339 } 2340 return true; 2341 } 2342 2343 static gen_helper_gvec_2 * const rev_fns[4] = { 2344 gen_helper_sve_rev_b, gen_helper_sve_rev_h, 2345 gen_helper_sve_rev_s, gen_helper_sve_rev_d 2346 }; 2347 TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0) 2348 2349 static gen_helper_gvec_3 * const sve_tbl_fns[4] = { 2350 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h, 2351 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d 2352 }; 2353 TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0) 2354 2355 static gen_helper_gvec_4 * const sve2_tbl_fns[4] = { 2356 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h, 2357 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d 2358 }; 2359 TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz], 2360 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0) 2361 2362 static gen_helper_gvec_3 * const tbx_fns[4] = { 2363 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h, 2364 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d 2365 }; 2366 TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0) 2367 2368 static bool trans_UNPK(DisasContext *s, arg_UNPK *a) 2369 { 2370 static gen_helper_gvec_2 * const fns[4][2] = { 2371 { NULL, NULL }, 2372 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h }, 2373 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s }, 2374 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d }, 2375 }; 2376 2377 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2378 return false; 2379 } 2380 if (sve_access_check(s)) { 2381 unsigned vsz = vec_full_reg_size(s); 2382 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd), 2383 vec_full_reg_offset(s, a->rn) 2384 + (a->h ? vsz / 2 : 0), 2385 vsz, vsz, 0, fns[a->esz][a->u]); 2386 } 2387 return true; 2388 } 2389 2390 /* 2391 *** SVE Permute - Predicates Group 2392 */ 2393 2394 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd, 2395 gen_helper_gvec_3 *fn) 2396 { 2397 if (!sve_access_check(s)) { 2398 return true; 2399 } 2400 2401 unsigned vsz = pred_full_reg_size(s); 2402 2403 TCGv_ptr t_d = tcg_temp_new_ptr(); 2404 TCGv_ptr t_n = tcg_temp_new_ptr(); 2405 TCGv_ptr t_m = tcg_temp_new_ptr(); 2406 uint32_t desc = 0; 2407 2408 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2409 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2410 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2411 2412 tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd)); 2413 tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn)); 2414 tcg_gen_addi_ptr(t_m, tcg_env, pred_full_reg_offset(s, a->rm)); 2415 2416 fn(t_d, t_n, t_m, tcg_constant_i32(desc)); 2417 return true; 2418 } 2419 2420 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd, 2421 gen_helper_gvec_2 *fn) 2422 { 2423 if (!sve_access_check(s)) { 2424 return true; 2425 } 2426 2427 unsigned vsz = pred_full_reg_size(s); 2428 TCGv_ptr t_d = tcg_temp_new_ptr(); 2429 TCGv_ptr t_n = tcg_temp_new_ptr(); 2430 uint32_t desc = 0; 2431 2432 tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd)); 2433 tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn)); 2434 2435 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2436 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2437 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2438 2439 fn(t_d, t_n, tcg_constant_i32(desc)); 2440 return true; 2441 } 2442 2443 TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p) 2444 TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p) 2445 TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p) 2446 TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p) 2447 TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p) 2448 TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p) 2449 2450 TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p) 2451 TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p) 2452 TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p) 2453 2454 /* 2455 *** SVE Permute - Interleaving Group 2456 */ 2457 2458 static gen_helper_gvec_3 * const zip_fns[4] = { 2459 gen_helper_sve_zip_b, gen_helper_sve_zip_h, 2460 gen_helper_sve_zip_s, gen_helper_sve_zip_d, 2461 }; 2462 TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2463 zip_fns[a->esz], a, 0) 2464 TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2465 zip_fns[a->esz], a, vec_full_reg_size(s) / 2) 2466 2467 TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2468 gen_helper_sve2_zip_q, a, 0) 2469 TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2470 gen_helper_sve2_zip_q, a, 2471 QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2) 2472 2473 static gen_helper_gvec_3 * const uzp_fns[4] = { 2474 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h, 2475 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d, 2476 }; 2477 2478 TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2479 uzp_fns[a->esz], a, 0) 2480 TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2481 uzp_fns[a->esz], a, 1 << a->esz) 2482 2483 TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2484 gen_helper_sve2_uzp_q, a, 0) 2485 TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2486 gen_helper_sve2_uzp_q, a, 16) 2487 2488 static gen_helper_gvec_3 * const trn_fns[4] = { 2489 gen_helper_sve_trn_b, gen_helper_sve_trn_h, 2490 gen_helper_sve_trn_s, gen_helper_sve_trn_d, 2491 }; 2492 2493 TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2494 trn_fns[a->esz], a, 0) 2495 TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2496 trn_fns[a->esz], a, 1 << a->esz) 2497 2498 TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2499 gen_helper_sve2_trn_q, a, 0) 2500 TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2501 gen_helper_sve2_trn_q, a, 16) 2502 2503 /* 2504 *** SVE Permute Vector - Predicated Group 2505 */ 2506 2507 static gen_helper_gvec_3 * const compact_fns[4] = { 2508 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d 2509 }; 2510 TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, 2511 compact_fns[a->esz], a, 0) 2512 2513 /* Call the helper that computes the ARM LastActiveElement pseudocode 2514 * function, scaled by the element size. This includes the not found 2515 * indication; e.g. not found for esz=3 is -8. 2516 */ 2517 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg) 2518 { 2519 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot 2520 * round up, as we do elsewhere, because we need the exact size. 2521 */ 2522 TCGv_ptr t_p = tcg_temp_new_ptr(); 2523 unsigned desc = 0; 2524 2525 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 2526 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 2527 2528 tcg_gen_addi_ptr(t_p, tcg_env, pred_full_reg_offset(s, pg)); 2529 2530 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc)); 2531 } 2532 2533 /* Increment LAST to the offset of the next element in the vector, 2534 * wrapping around to 0. 2535 */ 2536 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz) 2537 { 2538 unsigned vsz = vec_full_reg_size(s); 2539 2540 tcg_gen_addi_i32(last, last, 1 << esz); 2541 if (is_power_of_2(vsz)) { 2542 tcg_gen_andi_i32(last, last, vsz - 1); 2543 } else { 2544 TCGv_i32 max = tcg_constant_i32(vsz); 2545 TCGv_i32 zero = tcg_constant_i32(0); 2546 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last); 2547 } 2548 } 2549 2550 /* If LAST < 0, set LAST to the offset of the last element in the vector. */ 2551 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz) 2552 { 2553 unsigned vsz = vec_full_reg_size(s); 2554 2555 if (is_power_of_2(vsz)) { 2556 tcg_gen_andi_i32(last, last, vsz - 1); 2557 } else { 2558 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz)); 2559 TCGv_i32 zero = tcg_constant_i32(0); 2560 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last); 2561 } 2562 } 2563 2564 /* Load an unsigned element of ESZ from BASE+OFS. */ 2565 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz) 2566 { 2567 TCGv_i64 r = tcg_temp_new_i64(); 2568 2569 switch (esz) { 2570 case 0: 2571 tcg_gen_ld8u_i64(r, base, ofs); 2572 break; 2573 case 1: 2574 tcg_gen_ld16u_i64(r, base, ofs); 2575 break; 2576 case 2: 2577 tcg_gen_ld32u_i64(r, base, ofs); 2578 break; 2579 case 3: 2580 tcg_gen_ld_i64(r, base, ofs); 2581 break; 2582 default: 2583 g_assert_not_reached(); 2584 } 2585 return r; 2586 } 2587 2588 /* Load an unsigned element of ESZ from RM[LAST]. */ 2589 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last, 2590 int rm, int esz) 2591 { 2592 TCGv_ptr p = tcg_temp_new_ptr(); 2593 2594 /* Convert offset into vector into offset into ENV. 2595 * The final adjustment for the vector register base 2596 * is added via constant offset to the load. 2597 */ 2598 #if HOST_BIG_ENDIAN 2599 /* Adjust for element ordering. See vec_reg_offset. */ 2600 if (esz < 3) { 2601 tcg_gen_xori_i32(last, last, 8 - (1 << esz)); 2602 } 2603 #endif 2604 tcg_gen_ext_i32_ptr(p, last); 2605 tcg_gen_add_ptr(p, p, tcg_env); 2606 2607 return load_esz(p, vec_full_reg_offset(s, rm), esz); 2608 } 2609 2610 /* Compute CLAST for a Zreg. */ 2611 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before) 2612 { 2613 TCGv_i32 last; 2614 TCGLabel *over; 2615 TCGv_i64 ele; 2616 unsigned vsz, esz = a->esz; 2617 2618 if (!sve_access_check(s)) { 2619 return true; 2620 } 2621 2622 last = tcg_temp_new_i32(); 2623 over = gen_new_label(); 2624 2625 find_last_active(s, last, esz, a->pg); 2626 2627 /* There is of course no movcond for a 2048-bit vector, 2628 * so we must branch over the actual store. 2629 */ 2630 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over); 2631 2632 if (!before) { 2633 incr_last_active(s, last, esz); 2634 } 2635 2636 ele = load_last_active(s, last, a->rm, esz); 2637 2638 vsz = vec_full_reg_size(s); 2639 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele); 2640 2641 /* If this insn used MOVPRFX, we may need a second move. */ 2642 if (a->rd != a->rn) { 2643 TCGLabel *done = gen_new_label(); 2644 tcg_gen_br(done); 2645 2646 gen_set_label(over); 2647 do_mov_z(s, a->rd, a->rn); 2648 2649 gen_set_label(done); 2650 } else { 2651 gen_set_label(over); 2652 } 2653 return true; 2654 } 2655 2656 TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false) 2657 TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true) 2658 2659 /* Compute CLAST for a scalar. */ 2660 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm, 2661 bool before, TCGv_i64 reg_val) 2662 { 2663 TCGv_i32 last = tcg_temp_new_i32(); 2664 TCGv_i64 ele, cmp; 2665 2666 find_last_active(s, last, esz, pg); 2667 2668 /* Extend the original value of last prior to incrementing. */ 2669 cmp = tcg_temp_new_i64(); 2670 tcg_gen_ext_i32_i64(cmp, last); 2671 2672 if (!before) { 2673 incr_last_active(s, last, esz); 2674 } 2675 2676 /* The conceit here is that while last < 0 indicates not found, after 2677 * adjusting for tcg_env->vfp.zregs[rm], it is still a valid address 2678 * from which we can load garbage. We then discard the garbage with 2679 * a conditional move. 2680 */ 2681 ele = load_last_active(s, last, rm, esz); 2682 2683 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0), 2684 ele, reg_val); 2685 } 2686 2687 /* Compute CLAST for a Vreg. */ 2688 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2689 { 2690 if (sve_access_check(s)) { 2691 int esz = a->esz; 2692 int ofs = vec_reg_offset(s, a->rd, 0, esz); 2693 TCGv_i64 reg = load_esz(tcg_env, ofs, esz); 2694 2695 do_clast_scalar(s, esz, a->pg, a->rn, before, reg); 2696 write_fp_dreg(s, a->rd, reg); 2697 } 2698 return true; 2699 } 2700 2701 TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false) 2702 TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true) 2703 2704 /* Compute CLAST for a Xreg. */ 2705 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before) 2706 { 2707 TCGv_i64 reg; 2708 2709 if (!sve_access_check(s)) { 2710 return true; 2711 } 2712 2713 reg = cpu_reg(s, a->rd); 2714 switch (a->esz) { 2715 case 0: 2716 tcg_gen_ext8u_i64(reg, reg); 2717 break; 2718 case 1: 2719 tcg_gen_ext16u_i64(reg, reg); 2720 break; 2721 case 2: 2722 tcg_gen_ext32u_i64(reg, reg); 2723 break; 2724 case 3: 2725 break; 2726 default: 2727 g_assert_not_reached(); 2728 } 2729 2730 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg); 2731 return true; 2732 } 2733 2734 TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false) 2735 TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true) 2736 2737 /* Compute LAST for a scalar. */ 2738 static TCGv_i64 do_last_scalar(DisasContext *s, int esz, 2739 int pg, int rm, bool before) 2740 { 2741 TCGv_i32 last = tcg_temp_new_i32(); 2742 2743 find_last_active(s, last, esz, pg); 2744 if (before) { 2745 wrap_last_active(s, last, esz); 2746 } else { 2747 incr_last_active(s, last, esz); 2748 } 2749 2750 return load_last_active(s, last, rm, esz); 2751 } 2752 2753 /* Compute LAST for a Vreg. */ 2754 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2755 { 2756 if (sve_access_check(s)) { 2757 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2758 write_fp_dreg(s, a->rd, val); 2759 } 2760 return true; 2761 } 2762 2763 TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false) 2764 TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true) 2765 2766 /* Compute LAST for a Xreg. */ 2767 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before) 2768 { 2769 if (sve_access_check(s)) { 2770 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2771 tcg_gen_mov_i64(cpu_reg(s, a->rd), val); 2772 } 2773 return true; 2774 } 2775 2776 TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false) 2777 TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true) 2778 2779 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a) 2780 { 2781 if (!dc_isar_feature(aa64_sve, s)) { 2782 return false; 2783 } 2784 if (sve_access_check(s)) { 2785 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn)); 2786 } 2787 return true; 2788 } 2789 2790 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a) 2791 { 2792 if (!dc_isar_feature(aa64_sve, s)) { 2793 return false; 2794 } 2795 if (sve_access_check(s)) { 2796 int ofs = vec_reg_offset(s, a->rn, 0, a->esz); 2797 TCGv_i64 t = load_esz(tcg_env, ofs, a->esz); 2798 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t); 2799 } 2800 return true; 2801 } 2802 2803 static gen_helper_gvec_3 * const revb_fns[4] = { 2804 NULL, gen_helper_sve_revb_h, 2805 gen_helper_sve_revb_s, gen_helper_sve_revb_d, 2806 }; 2807 TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0) 2808 2809 static gen_helper_gvec_3 * const revh_fns[4] = { 2810 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d, 2811 }; 2812 TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0) 2813 2814 TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz, 2815 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0) 2816 2817 TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0) 2818 2819 TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz, 2820 gen_helper_sve_splice, a, a->esz) 2821 2822 TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice, 2823 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz) 2824 2825 /* 2826 *** SVE Integer Compare - Vectors Group 2827 */ 2828 2829 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a, 2830 gen_helper_gvec_flags_4 *gen_fn) 2831 { 2832 TCGv_ptr pd, zn, zm, pg; 2833 unsigned vsz; 2834 TCGv_i32 t; 2835 2836 if (gen_fn == NULL) { 2837 return false; 2838 } 2839 if (!sve_access_check(s)) { 2840 return true; 2841 } 2842 2843 vsz = vec_full_reg_size(s); 2844 t = tcg_temp_new_i32(); 2845 pd = tcg_temp_new_ptr(); 2846 zn = tcg_temp_new_ptr(); 2847 zm = tcg_temp_new_ptr(); 2848 pg = tcg_temp_new_ptr(); 2849 2850 tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd)); 2851 tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn)); 2852 tcg_gen_addi_ptr(zm, tcg_env, vec_full_reg_offset(s, a->rm)); 2853 tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg)); 2854 2855 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0))); 2856 2857 do_pred_flags(t); 2858 return true; 2859 } 2860 2861 #define DO_PPZZ(NAME, name) \ 2862 static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \ 2863 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \ 2864 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \ 2865 }; \ 2866 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \ 2867 a, name##_ppzz_fns[a->esz]) 2868 2869 DO_PPZZ(CMPEQ, cmpeq) 2870 DO_PPZZ(CMPNE, cmpne) 2871 DO_PPZZ(CMPGT, cmpgt) 2872 DO_PPZZ(CMPGE, cmpge) 2873 DO_PPZZ(CMPHI, cmphi) 2874 DO_PPZZ(CMPHS, cmphs) 2875 2876 #undef DO_PPZZ 2877 2878 #define DO_PPZW(NAME, name) \ 2879 static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \ 2880 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \ 2881 gen_helper_sve_##name##_ppzw_s, NULL \ 2882 }; \ 2883 TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \ 2884 a, name##_ppzw_fns[a->esz]) 2885 2886 DO_PPZW(CMPEQ, cmpeq) 2887 DO_PPZW(CMPNE, cmpne) 2888 DO_PPZW(CMPGT, cmpgt) 2889 DO_PPZW(CMPGE, cmpge) 2890 DO_PPZW(CMPHI, cmphi) 2891 DO_PPZW(CMPHS, cmphs) 2892 DO_PPZW(CMPLT, cmplt) 2893 DO_PPZW(CMPLE, cmple) 2894 DO_PPZW(CMPLO, cmplo) 2895 DO_PPZW(CMPLS, cmpls) 2896 2897 #undef DO_PPZW 2898 2899 /* 2900 *** SVE Integer Compare - Immediate Groups 2901 */ 2902 2903 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a, 2904 gen_helper_gvec_flags_3 *gen_fn) 2905 { 2906 TCGv_ptr pd, zn, pg; 2907 unsigned vsz; 2908 TCGv_i32 t; 2909 2910 if (gen_fn == NULL) { 2911 return false; 2912 } 2913 if (!sve_access_check(s)) { 2914 return true; 2915 } 2916 2917 vsz = vec_full_reg_size(s); 2918 t = tcg_temp_new_i32(); 2919 pd = tcg_temp_new_ptr(); 2920 zn = tcg_temp_new_ptr(); 2921 pg = tcg_temp_new_ptr(); 2922 2923 tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd)); 2924 tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn)); 2925 tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg)); 2926 2927 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm))); 2928 2929 do_pred_flags(t); 2930 return true; 2931 } 2932 2933 #define DO_PPZI(NAME, name) \ 2934 static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \ 2935 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \ 2936 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \ 2937 }; \ 2938 TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \ 2939 name##_ppzi_fns[a->esz]) 2940 2941 DO_PPZI(CMPEQ, cmpeq) 2942 DO_PPZI(CMPNE, cmpne) 2943 DO_PPZI(CMPGT, cmpgt) 2944 DO_PPZI(CMPGE, cmpge) 2945 DO_PPZI(CMPHI, cmphi) 2946 DO_PPZI(CMPHS, cmphs) 2947 DO_PPZI(CMPLT, cmplt) 2948 DO_PPZI(CMPLE, cmple) 2949 DO_PPZI(CMPLO, cmplo) 2950 DO_PPZI(CMPLS, cmpls) 2951 2952 #undef DO_PPZI 2953 2954 /* 2955 *** SVE Partition Break Group 2956 */ 2957 2958 static bool do_brk3(DisasContext *s, arg_rprr_s *a, 2959 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s) 2960 { 2961 if (!sve_access_check(s)) { 2962 return true; 2963 } 2964 2965 unsigned vsz = pred_full_reg_size(s); 2966 2967 /* Predicate sizes may be smaller and cannot use simd_desc. */ 2968 TCGv_ptr d = tcg_temp_new_ptr(); 2969 TCGv_ptr n = tcg_temp_new_ptr(); 2970 TCGv_ptr m = tcg_temp_new_ptr(); 2971 TCGv_ptr g = tcg_temp_new_ptr(); 2972 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 2973 2974 tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd)); 2975 tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn)); 2976 tcg_gen_addi_ptr(m, tcg_env, pred_full_reg_offset(s, a->rm)); 2977 tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg)); 2978 2979 if (a->s) { 2980 TCGv_i32 t = tcg_temp_new_i32(); 2981 fn_s(t, d, n, m, g, desc); 2982 do_pred_flags(t); 2983 } else { 2984 fn(d, n, m, g, desc); 2985 } 2986 return true; 2987 } 2988 2989 static bool do_brk2(DisasContext *s, arg_rpr_s *a, 2990 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s) 2991 { 2992 if (!sve_access_check(s)) { 2993 return true; 2994 } 2995 2996 unsigned vsz = pred_full_reg_size(s); 2997 2998 /* Predicate sizes may be smaller and cannot use simd_desc. */ 2999 TCGv_ptr d = tcg_temp_new_ptr(); 3000 TCGv_ptr n = tcg_temp_new_ptr(); 3001 TCGv_ptr g = tcg_temp_new_ptr(); 3002 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 3003 3004 tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd)); 3005 tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn)); 3006 tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg)); 3007 3008 if (a->s) { 3009 TCGv_i32 t = tcg_temp_new_i32(); 3010 fn_s(t, d, n, g, desc); 3011 do_pred_flags(t); 3012 } else { 3013 fn(d, n, g, desc); 3014 } 3015 return true; 3016 } 3017 3018 TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a, 3019 gen_helper_sve_brkpa, gen_helper_sve_brkpas) 3020 TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a, 3021 gen_helper_sve_brkpb, gen_helper_sve_brkpbs) 3022 3023 TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a, 3024 gen_helper_sve_brka_m, gen_helper_sve_brkas_m) 3025 TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a, 3026 gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m) 3027 3028 TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a, 3029 gen_helper_sve_brka_z, gen_helper_sve_brkas_z) 3030 TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a, 3031 gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z) 3032 3033 TRANS_FEAT(BRKN, aa64_sve, do_brk2, a, 3034 gen_helper_sve_brkn, gen_helper_sve_brkns) 3035 3036 /* 3037 *** SVE Predicate Count Group 3038 */ 3039 3040 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg) 3041 { 3042 unsigned psz = pred_full_reg_size(s); 3043 3044 if (psz <= 8) { 3045 uint64_t psz_mask; 3046 3047 tcg_gen_ld_i64(val, tcg_env, pred_full_reg_offset(s, pn)); 3048 if (pn != pg) { 3049 TCGv_i64 g = tcg_temp_new_i64(); 3050 tcg_gen_ld_i64(g, tcg_env, pred_full_reg_offset(s, pg)); 3051 tcg_gen_and_i64(val, val, g); 3052 } 3053 3054 /* Reduce the pred_esz_masks value simply to reduce the 3055 * size of the code generated here. 3056 */ 3057 psz_mask = MAKE_64BIT_MASK(0, psz * 8); 3058 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask); 3059 3060 tcg_gen_ctpop_i64(val, val); 3061 } else { 3062 TCGv_ptr t_pn = tcg_temp_new_ptr(); 3063 TCGv_ptr t_pg = tcg_temp_new_ptr(); 3064 unsigned desc = 0; 3065 3066 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz); 3067 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 3068 3069 tcg_gen_addi_ptr(t_pn, tcg_env, pred_full_reg_offset(s, pn)); 3070 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 3071 3072 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc)); 3073 } 3074 } 3075 3076 static bool trans_CNTP(DisasContext *s, arg_CNTP *a) 3077 { 3078 if (!dc_isar_feature(aa64_sve, s)) { 3079 return false; 3080 } 3081 if (sve_access_check(s)) { 3082 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg); 3083 } 3084 return true; 3085 } 3086 3087 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a) 3088 { 3089 if (!dc_isar_feature(aa64_sve, s)) { 3090 return false; 3091 } 3092 if (sve_access_check(s)) { 3093 TCGv_i64 reg = cpu_reg(s, a->rd); 3094 TCGv_i64 val = tcg_temp_new_i64(); 3095 3096 do_cntp(s, val, a->esz, a->pg, a->pg); 3097 if (a->d) { 3098 tcg_gen_sub_i64(reg, reg, val); 3099 } else { 3100 tcg_gen_add_i64(reg, reg, val); 3101 } 3102 } 3103 return true; 3104 } 3105 3106 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3107 { 3108 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3109 return false; 3110 } 3111 if (sve_access_check(s)) { 3112 unsigned vsz = vec_full_reg_size(s); 3113 TCGv_i64 val = tcg_temp_new_i64(); 3114 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds; 3115 3116 do_cntp(s, val, a->esz, a->pg, a->pg); 3117 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 3118 vec_full_reg_offset(s, a->rn), val, vsz, vsz); 3119 } 3120 return true; 3121 } 3122 3123 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a) 3124 { 3125 if (!dc_isar_feature(aa64_sve, s)) { 3126 return false; 3127 } 3128 if (sve_access_check(s)) { 3129 TCGv_i64 reg = cpu_reg(s, a->rd); 3130 TCGv_i64 val = tcg_temp_new_i64(); 3131 3132 do_cntp(s, val, a->esz, a->pg, a->pg); 3133 do_sat_addsub_32(reg, val, a->u, a->d); 3134 } 3135 return true; 3136 } 3137 3138 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a) 3139 { 3140 if (!dc_isar_feature(aa64_sve, s)) { 3141 return false; 3142 } 3143 if (sve_access_check(s)) { 3144 TCGv_i64 reg = cpu_reg(s, a->rd); 3145 TCGv_i64 val = tcg_temp_new_i64(); 3146 3147 do_cntp(s, val, a->esz, a->pg, a->pg); 3148 do_sat_addsub_64(reg, val, a->u, a->d); 3149 } 3150 return true; 3151 } 3152 3153 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3154 { 3155 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3156 return false; 3157 } 3158 if (sve_access_check(s)) { 3159 TCGv_i64 val = tcg_temp_new_i64(); 3160 do_cntp(s, val, a->esz, a->pg, a->pg); 3161 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d); 3162 } 3163 return true; 3164 } 3165 3166 /* 3167 *** SVE Integer Compare Scalars Group 3168 */ 3169 3170 static bool trans_CTERM(DisasContext *s, arg_CTERM *a) 3171 { 3172 if (!dc_isar_feature(aa64_sve, s)) { 3173 return false; 3174 } 3175 if (!sve_access_check(s)) { 3176 return true; 3177 } 3178 3179 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ); 3180 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf); 3181 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf); 3182 TCGv_i64 cmp = tcg_temp_new_i64(); 3183 3184 tcg_gen_setcond_i64(cond, cmp, rn, rm); 3185 tcg_gen_extrl_i64_i32(cpu_NF, cmp); 3186 3187 /* VF = !NF & !CF. */ 3188 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1); 3189 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF); 3190 3191 /* Both NF and VF actually look at bit 31. */ 3192 tcg_gen_neg_i32(cpu_NF, cpu_NF); 3193 tcg_gen_neg_i32(cpu_VF, cpu_VF); 3194 return true; 3195 } 3196 3197 static bool trans_WHILE(DisasContext *s, arg_WHILE *a) 3198 { 3199 TCGv_i64 op0, op1, t0, t1, tmax; 3200 TCGv_i32 t2; 3201 TCGv_ptr ptr; 3202 unsigned vsz = vec_full_reg_size(s); 3203 unsigned desc = 0; 3204 TCGCond cond; 3205 uint64_t maxval; 3206 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */ 3207 bool eq = a->eq == a->lt; 3208 3209 /* The greater-than conditions are all SVE2. */ 3210 if (a->lt 3211 ? !dc_isar_feature(aa64_sve, s) 3212 : !dc_isar_feature(aa64_sve2, s)) { 3213 return false; 3214 } 3215 if (!sve_access_check(s)) { 3216 return true; 3217 } 3218 3219 op0 = read_cpu_reg(s, a->rn, 1); 3220 op1 = read_cpu_reg(s, a->rm, 1); 3221 3222 if (!a->sf) { 3223 if (a->u) { 3224 tcg_gen_ext32u_i64(op0, op0); 3225 tcg_gen_ext32u_i64(op1, op1); 3226 } else { 3227 tcg_gen_ext32s_i64(op0, op0); 3228 tcg_gen_ext32s_i64(op1, op1); 3229 } 3230 } 3231 3232 /* For the helper, compress the different conditions into a computation 3233 * of how many iterations for which the condition is true. 3234 */ 3235 t0 = tcg_temp_new_i64(); 3236 t1 = tcg_temp_new_i64(); 3237 3238 if (a->lt) { 3239 tcg_gen_sub_i64(t0, op1, op0); 3240 if (a->u) { 3241 maxval = a->sf ? UINT64_MAX : UINT32_MAX; 3242 cond = eq ? TCG_COND_LEU : TCG_COND_LTU; 3243 } else { 3244 maxval = a->sf ? INT64_MAX : INT32_MAX; 3245 cond = eq ? TCG_COND_LE : TCG_COND_LT; 3246 } 3247 } else { 3248 tcg_gen_sub_i64(t0, op0, op1); 3249 if (a->u) { 3250 maxval = 0; 3251 cond = eq ? TCG_COND_GEU : TCG_COND_GTU; 3252 } else { 3253 maxval = a->sf ? INT64_MIN : INT32_MIN; 3254 cond = eq ? TCG_COND_GE : TCG_COND_GT; 3255 } 3256 } 3257 3258 tmax = tcg_constant_i64(vsz >> a->esz); 3259 if (eq) { 3260 /* Equality means one more iteration. */ 3261 tcg_gen_addi_i64(t0, t0, 1); 3262 3263 /* 3264 * For the less-than while, if op1 is maxval (and the only time 3265 * the addition above could overflow), then we produce an all-true 3266 * predicate by setting the count to the vector length. This is 3267 * because the pseudocode is described as an increment + compare 3268 * loop, and the maximum integer would always compare true. 3269 * Similarly, the greater-than while has the same issue with the 3270 * minimum integer due to the decrement + compare loop. 3271 */ 3272 tcg_gen_movi_i64(t1, maxval); 3273 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0); 3274 } 3275 3276 /* Bound to the maximum. */ 3277 tcg_gen_umin_i64(t0, t0, tmax); 3278 3279 /* Set the count to zero if the condition is false. */ 3280 tcg_gen_movi_i64(t1, 0); 3281 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1); 3282 3283 /* Since we're bounded, pass as a 32-bit type. */ 3284 t2 = tcg_temp_new_i32(); 3285 tcg_gen_extrl_i64_i32(t2, t0); 3286 3287 /* Scale elements to bits. */ 3288 tcg_gen_shli_i32(t2, t2, a->esz); 3289 3290 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3291 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3292 3293 ptr = tcg_temp_new_ptr(); 3294 tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd)); 3295 3296 if (a->lt) { 3297 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); 3298 } else { 3299 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc)); 3300 } 3301 do_pred_flags(t2); 3302 return true; 3303 } 3304 3305 static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) 3306 { 3307 TCGv_i64 op0, op1, diff, t1, tmax; 3308 TCGv_i32 t2; 3309 TCGv_ptr ptr; 3310 unsigned vsz = vec_full_reg_size(s); 3311 unsigned desc = 0; 3312 3313 if (!dc_isar_feature(aa64_sve2, s)) { 3314 return false; 3315 } 3316 if (!sve_access_check(s)) { 3317 return true; 3318 } 3319 3320 op0 = read_cpu_reg(s, a->rn, 1); 3321 op1 = read_cpu_reg(s, a->rm, 1); 3322 3323 tmax = tcg_constant_i64(vsz); 3324 diff = tcg_temp_new_i64(); 3325 3326 if (a->rw) { 3327 /* WHILERW */ 3328 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */ 3329 t1 = tcg_temp_new_i64(); 3330 tcg_gen_sub_i64(diff, op0, op1); 3331 tcg_gen_sub_i64(t1, op1, op0); 3332 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1); 3333 /* Round down to a multiple of ESIZE. */ 3334 tcg_gen_andi_i64(diff, diff, -1 << a->esz); 3335 /* If op1 == op0, diff == 0, and the condition is always true. */ 3336 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff); 3337 } else { 3338 /* WHILEWR */ 3339 tcg_gen_sub_i64(diff, op1, op0); 3340 /* Round down to a multiple of ESIZE. */ 3341 tcg_gen_andi_i64(diff, diff, -1 << a->esz); 3342 /* If op0 >= op1, diff <= 0, the condition is always true. */ 3343 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff); 3344 } 3345 3346 /* Bound to the maximum. */ 3347 tcg_gen_umin_i64(diff, diff, tmax); 3348 3349 /* Since we're bounded, pass as a 32-bit type. */ 3350 t2 = tcg_temp_new_i32(); 3351 tcg_gen_extrl_i64_i32(t2, diff); 3352 3353 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3354 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3355 3356 ptr = tcg_temp_new_ptr(); 3357 tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd)); 3358 3359 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); 3360 do_pred_flags(t2); 3361 return true; 3362 } 3363 3364 /* 3365 *** SVE Integer Wide Immediate - Unpredicated Group 3366 */ 3367 3368 static bool trans_FDUP(DisasContext *s, arg_FDUP *a) 3369 { 3370 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3371 return false; 3372 } 3373 if (sve_access_check(s)) { 3374 unsigned vsz = vec_full_reg_size(s); 3375 int dofs = vec_full_reg_offset(s, a->rd); 3376 uint64_t imm; 3377 3378 /* Decode the VFP immediate. */ 3379 imm = vfp_expand_imm(a->esz, a->imm); 3380 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm); 3381 } 3382 return true; 3383 } 3384 3385 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a) 3386 { 3387 if (!dc_isar_feature(aa64_sve, s)) { 3388 return false; 3389 } 3390 if (sve_access_check(s)) { 3391 unsigned vsz = vec_full_reg_size(s); 3392 int dofs = vec_full_reg_offset(s, a->rd); 3393 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm); 3394 } 3395 return true; 3396 } 3397 3398 TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a) 3399 3400 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a) 3401 { 3402 a->imm = -a->imm; 3403 return trans_ADD_zzi(s, a); 3404 } 3405 3406 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a) 3407 { 3408 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 }; 3409 static const GVecGen2s op[4] = { 3410 { .fni8 = tcg_gen_vec_sub8_i64, 3411 .fniv = tcg_gen_sub_vec, 3412 .fno = gen_helper_sve_subri_b, 3413 .opt_opc = vecop_list, 3414 .vece = MO_8, 3415 .scalar_first = true }, 3416 { .fni8 = tcg_gen_vec_sub16_i64, 3417 .fniv = tcg_gen_sub_vec, 3418 .fno = gen_helper_sve_subri_h, 3419 .opt_opc = vecop_list, 3420 .vece = MO_16, 3421 .scalar_first = true }, 3422 { .fni4 = tcg_gen_sub_i32, 3423 .fniv = tcg_gen_sub_vec, 3424 .fno = gen_helper_sve_subri_s, 3425 .opt_opc = vecop_list, 3426 .vece = MO_32, 3427 .scalar_first = true }, 3428 { .fni8 = tcg_gen_sub_i64, 3429 .fniv = tcg_gen_sub_vec, 3430 .fno = gen_helper_sve_subri_d, 3431 .opt_opc = vecop_list, 3432 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 3433 .vece = MO_64, 3434 .scalar_first = true } 3435 }; 3436 3437 if (!dc_isar_feature(aa64_sve, s)) { 3438 return false; 3439 } 3440 if (sve_access_check(s)) { 3441 unsigned vsz = vec_full_reg_size(s); 3442 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd), 3443 vec_full_reg_offset(s, a->rn), 3444 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]); 3445 } 3446 return true; 3447 } 3448 3449 TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a) 3450 3451 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d) 3452 { 3453 if (sve_access_check(s)) { 3454 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 3455 tcg_constant_i64(a->imm), u, d); 3456 } 3457 return true; 3458 } 3459 3460 TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false) 3461 TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false) 3462 TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true) 3463 TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true) 3464 3465 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn) 3466 { 3467 if (sve_access_check(s)) { 3468 unsigned vsz = vec_full_reg_size(s); 3469 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 3470 vec_full_reg_offset(s, a->rn), 3471 tcg_constant_i64(a->imm), vsz, vsz, 0, fn); 3472 } 3473 return true; 3474 } 3475 3476 #define DO_ZZI(NAME, name) \ 3477 static gen_helper_gvec_2i * const name##i_fns[4] = { \ 3478 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \ 3479 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \ 3480 }; \ 3481 TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz]) 3482 3483 DO_ZZI(SMAX, smax) 3484 DO_ZZI(UMAX, umax) 3485 DO_ZZI(SMIN, smin) 3486 DO_ZZI(UMIN, umin) 3487 3488 #undef DO_ZZI 3489 3490 static gen_helper_gvec_4 * const dot_fns[2][2] = { 3491 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h }, 3492 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h } 3493 }; 3494 TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz, 3495 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0) 3496 3497 /* 3498 * SVE Multiply - Indexed 3499 */ 3500 3501 TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, 3502 gen_helper_gvec_sdot_idx_b, a) 3503 TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, 3504 gen_helper_gvec_sdot_idx_h, a) 3505 TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, 3506 gen_helper_gvec_udot_idx_b, a) 3507 TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, 3508 gen_helper_gvec_udot_idx_h, a) 3509 3510 TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3511 gen_helper_gvec_sudot_idx_b, a) 3512 TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3513 gen_helper_gvec_usdot_idx_b, a) 3514 3515 #define DO_SVE2_RRX(NAME, FUNC) \ 3516 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3517 a->rd, a->rn, a->rm, a->index) 3518 3519 DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h) 3520 DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s) 3521 DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d) 3522 3523 DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h) 3524 DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s) 3525 DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d) 3526 3527 DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h) 3528 DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s) 3529 DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d) 3530 3531 #undef DO_SVE2_RRX 3532 3533 #define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \ 3534 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3535 a->rd, a->rn, a->rm, (a->index << 1) | TOP) 3536 3537 DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false) 3538 DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false) 3539 DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true) 3540 DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true) 3541 3542 DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false) 3543 DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false) 3544 DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true) 3545 DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true) 3546 3547 DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false) 3548 DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false) 3549 DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true) 3550 DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true) 3551 3552 #undef DO_SVE2_RRX_TB 3553 3554 #define DO_SVE2_RRXR(NAME, FUNC) \ 3555 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a) 3556 3557 DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h) 3558 DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s) 3559 DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d) 3560 3561 DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h) 3562 DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s) 3563 DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d) 3564 3565 DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h) 3566 DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s) 3567 DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d) 3568 3569 DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h) 3570 DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s) 3571 DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d) 3572 3573 #undef DO_SVE2_RRXR 3574 3575 #define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \ 3576 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3577 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP) 3578 3579 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false) 3580 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false) 3581 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true) 3582 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true) 3583 3584 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false) 3585 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false) 3586 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true) 3587 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true) 3588 3589 DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false) 3590 DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false) 3591 DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true) 3592 DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true) 3593 3594 DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false) 3595 DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false) 3596 DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true) 3597 DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true) 3598 3599 DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false) 3600 DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false) 3601 DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true) 3602 DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true) 3603 3604 DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false) 3605 DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false) 3606 DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true) 3607 DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true) 3608 3609 #undef DO_SVE2_RRXR_TB 3610 3611 #define DO_SVE2_RRXR_ROT(NAME, FUNC) \ 3612 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3613 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot) 3614 3615 DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h) 3616 DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s) 3617 3618 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h) 3619 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s) 3620 3621 DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s) 3622 DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) 3623 3624 #undef DO_SVE2_RRXR_ROT 3625 3626 /* 3627 *** SVE Floating Point Multiply-Add Indexed Group 3628 */ 3629 3630 static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) 3631 { 3632 static gen_helper_gvec_4_ptr * const fns[4] = { 3633 NULL, 3634 gen_helper_gvec_fmla_idx_h, 3635 gen_helper_gvec_fmla_idx_s, 3636 gen_helper_gvec_fmla_idx_d, 3637 }; 3638 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, 3639 (a->index << 1) | sub, 3640 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3641 } 3642 3643 TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false) 3644 TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true) 3645 3646 /* 3647 *** SVE Floating Point Multiply Indexed Group 3648 */ 3649 3650 static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = { 3651 NULL, gen_helper_gvec_fmul_idx_h, 3652 gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d, 3653 }; 3654 TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz, 3655 fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index, 3656 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3657 3658 /* 3659 *** SVE Floating Point Fast Reduction Group 3660 */ 3661 3662 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr, 3663 TCGv_ptr, TCGv_i32); 3664 3665 static bool do_reduce(DisasContext *s, arg_rpr_esz *a, 3666 gen_helper_fp_reduce *fn) 3667 { 3668 unsigned vsz, p2vsz; 3669 TCGv_i32 t_desc; 3670 TCGv_ptr t_zn, t_pg, status; 3671 TCGv_i64 temp; 3672 3673 if (fn == NULL) { 3674 return false; 3675 } 3676 if (!sve_access_check(s)) { 3677 return true; 3678 } 3679 3680 vsz = vec_full_reg_size(s); 3681 p2vsz = pow2ceil(vsz); 3682 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz)); 3683 temp = tcg_temp_new_i64(); 3684 t_zn = tcg_temp_new_ptr(); 3685 t_pg = tcg_temp_new_ptr(); 3686 3687 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); 3688 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); 3689 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3690 3691 fn(temp, t_zn, t_pg, status, t_desc); 3692 3693 write_fp_dreg(s, a->rd, temp); 3694 return true; 3695 } 3696 3697 #define DO_VPZ(NAME, name) \ 3698 static gen_helper_fp_reduce * const name##_fns[4] = { \ 3699 NULL, gen_helper_sve_##name##_h, \ 3700 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3701 }; \ 3702 TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz]) 3703 3704 DO_VPZ(FADDV, faddv) 3705 DO_VPZ(FMINNMV, fminnmv) 3706 DO_VPZ(FMAXNMV, fmaxnmv) 3707 DO_VPZ(FMINV, fminv) 3708 DO_VPZ(FMAXV, fmaxv) 3709 3710 #undef DO_VPZ 3711 3712 /* 3713 *** SVE Floating Point Unary Operations - Unpredicated Group 3714 */ 3715 3716 static gen_helper_gvec_2_ptr * const frecpe_fns[] = { 3717 NULL, gen_helper_gvec_frecpe_h, 3718 gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, 3719 }; 3720 TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0) 3721 3722 static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { 3723 NULL, gen_helper_gvec_frsqrte_h, 3724 gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, 3725 }; 3726 TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0) 3727 3728 /* 3729 *** SVE Floating Point Compare with Zero Group 3730 */ 3731 3732 static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a, 3733 gen_helper_gvec_3_ptr *fn) 3734 { 3735 if (fn == NULL) { 3736 return false; 3737 } 3738 if (sve_access_check(s)) { 3739 unsigned vsz = vec_full_reg_size(s); 3740 TCGv_ptr status = 3741 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3742 3743 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd), 3744 vec_full_reg_offset(s, a->rn), 3745 pred_full_reg_offset(s, a->pg), 3746 status, vsz, vsz, 0, fn); 3747 } 3748 return true; 3749 } 3750 3751 #define DO_PPZ(NAME, name) \ 3752 static gen_helper_gvec_3_ptr * const name##_fns[] = { \ 3753 NULL, gen_helper_sve_##name##_h, \ 3754 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3755 }; \ 3756 TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz]) 3757 3758 DO_PPZ(FCMGE_ppz0, fcmge0) 3759 DO_PPZ(FCMGT_ppz0, fcmgt0) 3760 DO_PPZ(FCMLE_ppz0, fcmle0) 3761 DO_PPZ(FCMLT_ppz0, fcmlt0) 3762 DO_PPZ(FCMEQ_ppz0, fcmeq0) 3763 DO_PPZ(FCMNE_ppz0, fcmne0) 3764 3765 #undef DO_PPZ 3766 3767 /* 3768 *** SVE floating-point trig multiply-add coefficient 3769 */ 3770 3771 static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { 3772 NULL, gen_helper_sve_ftmad_h, 3773 gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d, 3774 }; 3775 TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, 3776 ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm, 3777 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3778 3779 /* 3780 *** SVE Floating Point Accumulating Reduction Group 3781 */ 3782 3783 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) 3784 { 3785 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr, 3786 TCGv_ptr, TCGv_ptr, TCGv_i32); 3787 static fadda_fn * const fns[3] = { 3788 gen_helper_sve_fadda_h, 3789 gen_helper_sve_fadda_s, 3790 gen_helper_sve_fadda_d, 3791 }; 3792 unsigned vsz = vec_full_reg_size(s); 3793 TCGv_ptr t_rm, t_pg, t_fpst; 3794 TCGv_i64 t_val; 3795 TCGv_i32 t_desc; 3796 3797 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3798 return false; 3799 } 3800 s->is_nonstreaming = true; 3801 if (!sve_access_check(s)) { 3802 return true; 3803 } 3804 3805 t_val = load_esz(tcg_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz); 3806 t_rm = tcg_temp_new_ptr(); 3807 t_pg = tcg_temp_new_ptr(); 3808 tcg_gen_addi_ptr(t_rm, tcg_env, vec_full_reg_offset(s, a->rm)); 3809 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); 3810 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3811 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 3812 3813 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc); 3814 3815 write_fp_dreg(s, a->rd, t_val); 3816 return true; 3817 } 3818 3819 /* 3820 *** SVE Floating Point Arithmetic - Unpredicated Group 3821 */ 3822 3823 #define DO_FP3(NAME, name) \ 3824 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ 3825 NULL, gen_helper_gvec_##name##_h, \ 3826 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ 3827 }; \ 3828 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0) 3829 3830 DO_FP3(FADD_zzz, fadd) 3831 DO_FP3(FSUB_zzz, fsub) 3832 DO_FP3(FMUL_zzz, fmul) 3833 DO_FP3(FRECPS, recps) 3834 DO_FP3(FRSQRTS, rsqrts) 3835 3836 #undef DO_FP3 3837 3838 static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = { 3839 NULL, gen_helper_gvec_ftsmul_h, 3840 gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d 3841 }; 3842 TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, 3843 ftsmul_fns[a->esz], a, 0) 3844 3845 /* 3846 *** SVE Floating Point Arithmetic - Predicated Group 3847 */ 3848 3849 #define DO_ZPZZ_FP(NAME, FEAT, name) \ 3850 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ 3851 NULL, gen_helper_##name##_h, \ 3852 gen_helper_##name##_s, gen_helper_##name##_d \ 3853 }; \ 3854 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) 3855 3856 DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd) 3857 DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub) 3858 DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul) 3859 DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin) 3860 DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax) 3861 DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) 3862 DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) 3863 DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) 3864 DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn) 3865 DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv) 3866 DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx) 3867 3868 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr, 3869 TCGv_i64, TCGv_ptr, TCGv_i32); 3870 3871 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16, 3872 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn) 3873 { 3874 unsigned vsz = vec_full_reg_size(s); 3875 TCGv_ptr t_zd, t_zn, t_pg, status; 3876 TCGv_i32 desc; 3877 3878 t_zd = tcg_temp_new_ptr(); 3879 t_zn = tcg_temp_new_ptr(); 3880 t_pg = tcg_temp_new_ptr(); 3881 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, zd)); 3882 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, zn)); 3883 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 3884 3885 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 3886 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 3887 fn(t_zd, t_zn, t_pg, scalar, status, desc); 3888 } 3889 3890 static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm, 3891 gen_helper_sve_fp2scalar *fn) 3892 { 3893 if (fn == NULL) { 3894 return false; 3895 } 3896 if (sve_access_check(s)) { 3897 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, 3898 tcg_constant_i64(imm), fn); 3899 } 3900 return true; 3901 } 3902 3903 #define DO_FP_IMM(NAME, name, const0, const1) \ 3904 static gen_helper_sve_fp2scalar * const name##_fns[4] = { \ 3905 NULL, gen_helper_sve_##name##_h, \ 3906 gen_helper_sve_##name##_s, \ 3907 gen_helper_sve_##name##_d \ 3908 }; \ 3909 static uint64_t const name##_const[4][2] = { \ 3910 { -1, -1 }, \ 3911 { float16_##const0, float16_##const1 }, \ 3912 { float32_##const0, float32_##const1 }, \ 3913 { float64_##const0, float64_##const1 }, \ 3914 }; \ 3915 TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ 3916 name##_const[a->esz][a->imm], name##_fns[a->esz]) 3917 3918 DO_FP_IMM(FADD, fadds, half, one) 3919 DO_FP_IMM(FSUB, fsubs, half, one) 3920 DO_FP_IMM(FMUL, fmuls, half, two) 3921 DO_FP_IMM(FSUBR, fsubrs, half, one) 3922 DO_FP_IMM(FMAXNM, fmaxnms, zero, one) 3923 DO_FP_IMM(FMINNM, fminnms, zero, one) 3924 DO_FP_IMM(FMAX, fmaxs, zero, one) 3925 DO_FP_IMM(FMIN, fmins, zero, one) 3926 3927 #undef DO_FP_IMM 3928 3929 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a, 3930 gen_helper_gvec_4_ptr *fn) 3931 { 3932 if (fn == NULL) { 3933 return false; 3934 } 3935 if (sve_access_check(s)) { 3936 unsigned vsz = vec_full_reg_size(s); 3937 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3938 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd), 3939 vec_full_reg_offset(s, a->rn), 3940 vec_full_reg_offset(s, a->rm), 3941 pred_full_reg_offset(s, a->pg), 3942 status, vsz, vsz, 0, fn); 3943 } 3944 return true; 3945 } 3946 3947 #define DO_FPCMP(NAME, name) \ 3948 static gen_helper_gvec_4_ptr * const name##_fns[4] = { \ 3949 NULL, gen_helper_sve_##name##_h, \ 3950 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 3951 }; \ 3952 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz]) 3953 3954 DO_FPCMP(FCMGE, fcmge) 3955 DO_FPCMP(FCMGT, fcmgt) 3956 DO_FPCMP(FCMEQ, fcmeq) 3957 DO_FPCMP(FCMNE, fcmne) 3958 DO_FPCMP(FCMUO, fcmuo) 3959 DO_FPCMP(FACGE, facge) 3960 DO_FPCMP(FACGT, facgt) 3961 3962 #undef DO_FPCMP 3963 3964 static gen_helper_gvec_4_ptr * const fcadd_fns[] = { 3965 NULL, gen_helper_sve_fcadd_h, 3966 gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d, 3967 }; 3968 TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], 3969 a->rd, a->rn, a->rm, a->pg, a->rot, 3970 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3971 3972 #define DO_FMLA(NAME, name) \ 3973 static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ 3974 NULL, gen_helper_sve_##name##_h, \ 3975 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 3976 }; \ 3977 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \ 3978 a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ 3979 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3980 3981 DO_FMLA(FMLA_zpzzz, fmla_zpzzz) 3982 DO_FMLA(FMLS_zpzzz, fmls_zpzzz) 3983 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz) 3984 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz) 3985 3986 #undef DO_FMLA 3987 3988 static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { 3989 NULL, gen_helper_sve_fcmla_zpzzz_h, 3990 gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, 3991 }; 3992 TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], 3993 a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, 3994 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3995 3996 static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { 3997 NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL 3998 }; 3999 TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz], 4000 a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot, 4001 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4002 4003 /* 4004 *** SVE Floating Point Unary Operations Predicated Group 4005 */ 4006 4007 TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4008 gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR) 4009 TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4010 gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR) 4011 4012 TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 4013 gen_helper_sve_bfcvt, a, 0, FPST_FPCR) 4014 4015 TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4016 gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR) 4017 TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4018 gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR) 4019 TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4020 gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR) 4021 TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4022 gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR) 4023 4024 TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4025 gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16) 4026 TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4027 gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16) 4028 TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4029 gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16) 4030 TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4031 gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16) 4032 TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4033 gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16) 4034 TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4035 gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16) 4036 4037 TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4038 gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR) 4039 TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4040 gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR) 4041 TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4042 gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR) 4043 TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4044 gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR) 4045 TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4046 gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR) 4047 TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4048 gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR) 4049 4050 TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4051 gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR) 4052 TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4053 gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR) 4054 4055 static gen_helper_gvec_3_ptr * const frint_fns[] = { 4056 NULL, 4057 gen_helper_sve_frint_h, 4058 gen_helper_sve_frint_s, 4059 gen_helper_sve_frint_d 4060 }; 4061 TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz], 4062 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4063 4064 static gen_helper_gvec_3_ptr * const frintx_fns[] = { 4065 NULL, 4066 gen_helper_sve_frintx_h, 4067 gen_helper_sve_frintx_s, 4068 gen_helper_sve_frintx_d 4069 }; 4070 TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz], 4071 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4072 4073 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, 4074 ARMFPRounding mode, gen_helper_gvec_3_ptr *fn) 4075 { 4076 unsigned vsz; 4077 TCGv_i32 tmode; 4078 TCGv_ptr status; 4079 4080 if (fn == NULL) { 4081 return false; 4082 } 4083 if (!sve_access_check(s)) { 4084 return true; 4085 } 4086 4087 vsz = vec_full_reg_size(s); 4088 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4089 tmode = gen_set_rmode(mode, status); 4090 4091 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 4092 vec_full_reg_offset(s, a->rn), 4093 pred_full_reg_offset(s, a->pg), 4094 status, vsz, vsz, 0, fn); 4095 4096 gen_restore_rmode(tmode, status); 4097 return true; 4098 } 4099 4100 TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a, 4101 FPROUNDING_TIEEVEN, frint_fns[a->esz]) 4102 TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a, 4103 FPROUNDING_POSINF, frint_fns[a->esz]) 4104 TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a, 4105 FPROUNDING_NEGINF, frint_fns[a->esz]) 4106 TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a, 4107 FPROUNDING_ZERO, frint_fns[a->esz]) 4108 TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a, 4109 FPROUNDING_TIEAWAY, frint_fns[a->esz]) 4110 4111 static gen_helper_gvec_3_ptr * const frecpx_fns[] = { 4112 NULL, gen_helper_sve_frecpx_h, 4113 gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d, 4114 }; 4115 TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz], 4116 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4117 4118 static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { 4119 NULL, gen_helper_sve_fsqrt_h, 4120 gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d, 4121 }; 4122 TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz], 4123 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4124 4125 TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4126 gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16) 4127 TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4128 gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16) 4129 TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4130 gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16) 4131 4132 TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4133 gen_helper_sve_scvt_ss, a, 0, FPST_FPCR) 4134 TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4135 gen_helper_sve_scvt_ds, a, 0, FPST_FPCR) 4136 4137 TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4138 gen_helper_sve_scvt_sd, a, 0, FPST_FPCR) 4139 TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4140 gen_helper_sve_scvt_dd, a, 0, FPST_FPCR) 4141 4142 TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4143 gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16) 4144 TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4145 gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16) 4146 TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4147 gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16) 4148 4149 TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4150 gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR) 4151 TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4152 gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR) 4153 TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4154 gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR) 4155 4156 TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4157 gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR) 4158 4159 /* 4160 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group 4161 */ 4162 4163 /* Subroutine loading a vector register at VOFS of LEN bytes. 4164 * The load should begin at the address Rn + IMM. 4165 */ 4166 4167 void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, 4168 int len, int rn, int imm) 4169 { 4170 int len_align = QEMU_ALIGN_DOWN(len, 16); 4171 int len_remain = len % 16; 4172 int nparts = len / 16 + ctpop8(len_remain); 4173 int midx = get_mem_index(s); 4174 TCGv_i64 dirty_addr, clean_addr, t0, t1; 4175 TCGv_i128 t16; 4176 4177 dirty_addr = tcg_temp_new_i64(); 4178 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4179 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); 4180 4181 /* 4182 * Note that unpredicated load/store of vector/predicate registers 4183 * are defined as a stream of bytes, which equates to little-endian 4184 * operations on larger quantities. 4185 * Attempt to keep code expansion to a minimum by limiting the 4186 * amount of unrolling done. 4187 */ 4188 if (nparts <= 4) { 4189 int i; 4190 4191 t0 = tcg_temp_new_i64(); 4192 t1 = tcg_temp_new_i64(); 4193 t16 = tcg_temp_new_i128(); 4194 4195 for (i = 0; i < len_align; i += 16) { 4196 tcg_gen_qemu_ld_i128(t16, clean_addr, midx, 4197 MO_LE | MO_128 | MO_ATOM_NONE); 4198 tcg_gen_extr_i128_i64(t0, t1, t16); 4199 tcg_gen_st_i64(t0, base, vofs + i); 4200 tcg_gen_st_i64(t1, base, vofs + i + 8); 4201 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4202 } 4203 } else { 4204 TCGLabel *loop = gen_new_label(); 4205 TCGv_ptr tp, i = tcg_temp_new_ptr(); 4206 4207 tcg_gen_movi_ptr(i, 0); 4208 gen_set_label(loop); 4209 4210 t16 = tcg_temp_new_i128(); 4211 tcg_gen_qemu_ld_i128(t16, clean_addr, midx, 4212 MO_LE | MO_128 | MO_ATOM_NONE); 4213 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4214 4215 tp = tcg_temp_new_ptr(); 4216 tcg_gen_add_ptr(tp, base, i); 4217 tcg_gen_addi_ptr(i, i, 16); 4218 4219 t0 = tcg_temp_new_i64(); 4220 t1 = tcg_temp_new_i64(); 4221 tcg_gen_extr_i128_i64(t0, t1, t16); 4222 4223 tcg_gen_st_i64(t0, tp, vofs); 4224 tcg_gen_st_i64(t1, tp, vofs + 8); 4225 4226 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4227 } 4228 4229 /* 4230 * Predicate register loads can be any multiple of 2. 4231 * Note that we still store the entire 64-bit unit into tcg_env. 4232 */ 4233 if (len_remain >= 8) { 4234 t0 = tcg_temp_new_i64(); 4235 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE); 4236 tcg_gen_st_i64(t0, base, vofs + len_align); 4237 len_remain -= 8; 4238 len_align += 8; 4239 if (len_remain) { 4240 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4241 } 4242 } 4243 if (len_remain) { 4244 t0 = tcg_temp_new_i64(); 4245 switch (len_remain) { 4246 case 2: 4247 case 4: 4248 case 8: 4249 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, 4250 MO_LE | ctz32(len_remain) | MO_ATOM_NONE); 4251 break; 4252 4253 case 6: 4254 t1 = tcg_temp_new_i64(); 4255 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE); 4256 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4257 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); 4258 tcg_gen_deposit_i64(t0, t0, t1, 32, 32); 4259 break; 4260 4261 default: 4262 g_assert_not_reached(); 4263 } 4264 tcg_gen_st_i64(t0, base, vofs + len_align); 4265 } 4266 } 4267 4268 /* Similarly for stores. */ 4269 void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, 4270 int len, int rn, int imm) 4271 { 4272 int len_align = QEMU_ALIGN_DOWN(len, 16); 4273 int len_remain = len % 16; 4274 int nparts = len / 16 + ctpop8(len_remain); 4275 int midx = get_mem_index(s); 4276 TCGv_i64 dirty_addr, clean_addr, t0, t1; 4277 TCGv_i128 t16; 4278 4279 dirty_addr = tcg_temp_new_i64(); 4280 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4281 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); 4282 4283 /* Note that unpredicated load/store of vector/predicate registers 4284 * are defined as a stream of bytes, which equates to little-endian 4285 * operations on larger quantities. There is no nice way to force 4286 * a little-endian store for aarch64_be-linux-user out of line. 4287 * 4288 * Attempt to keep code expansion to a minimum by limiting the 4289 * amount of unrolling done. 4290 */ 4291 if (nparts <= 4) { 4292 int i; 4293 4294 t0 = tcg_temp_new_i64(); 4295 t1 = tcg_temp_new_i64(); 4296 t16 = tcg_temp_new_i128(); 4297 for (i = 0; i < len_align; i += 16) { 4298 tcg_gen_ld_i64(t0, base, vofs + i); 4299 tcg_gen_ld_i64(t1, base, vofs + i + 8); 4300 tcg_gen_concat_i64_i128(t16, t0, t1); 4301 tcg_gen_qemu_st_i128(t16, clean_addr, midx, 4302 MO_LE | MO_128 | MO_ATOM_NONE); 4303 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4304 } 4305 } else { 4306 TCGLabel *loop = gen_new_label(); 4307 TCGv_ptr tp, i = tcg_temp_new_ptr(); 4308 4309 tcg_gen_movi_ptr(i, 0); 4310 gen_set_label(loop); 4311 4312 t0 = tcg_temp_new_i64(); 4313 t1 = tcg_temp_new_i64(); 4314 tp = tcg_temp_new_ptr(); 4315 tcg_gen_add_ptr(tp, base, i); 4316 tcg_gen_ld_i64(t0, tp, vofs); 4317 tcg_gen_ld_i64(t1, tp, vofs + 8); 4318 tcg_gen_addi_ptr(i, i, 16); 4319 4320 t16 = tcg_temp_new_i128(); 4321 tcg_gen_concat_i64_i128(t16, t0, t1); 4322 4323 tcg_gen_qemu_st_i128(t16, clean_addr, midx, 4324 MO_LE | MO_128 | MO_ATOM_NONE); 4325 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4326 4327 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4328 } 4329 4330 /* Predicate register stores can be any multiple of 2. */ 4331 if (len_remain >= 8) { 4332 t0 = tcg_temp_new_i64(); 4333 tcg_gen_ld_i64(t0, base, vofs + len_align); 4334 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE); 4335 len_remain -= 8; 4336 len_align += 8; 4337 if (len_remain) { 4338 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4339 } 4340 } 4341 if (len_remain) { 4342 t0 = tcg_temp_new_i64(); 4343 tcg_gen_ld_i64(t0, base, vofs + len_align); 4344 4345 switch (len_remain) { 4346 case 2: 4347 case 4: 4348 case 8: 4349 tcg_gen_qemu_st_i64(t0, clean_addr, midx, 4350 MO_LE | ctz32(len_remain) | MO_ATOM_NONE); 4351 break; 4352 4353 case 6: 4354 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE); 4355 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4356 tcg_gen_shri_i64(t0, t0, 32); 4357 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); 4358 break; 4359 4360 default: 4361 g_assert_not_reached(); 4362 } 4363 } 4364 } 4365 4366 static bool trans_LDR_zri(DisasContext *s, arg_rri *a) 4367 { 4368 if (!dc_isar_feature(aa64_sve, s)) { 4369 return false; 4370 } 4371 if (sve_access_check(s)) { 4372 int size = vec_full_reg_size(s); 4373 int off = vec_full_reg_offset(s, a->rd); 4374 gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size); 4375 } 4376 return true; 4377 } 4378 4379 static bool trans_LDR_pri(DisasContext *s, arg_rri *a) 4380 { 4381 if (!dc_isar_feature(aa64_sve, s)) { 4382 return false; 4383 } 4384 if (sve_access_check(s)) { 4385 int size = pred_full_reg_size(s); 4386 int off = pred_full_reg_offset(s, a->rd); 4387 gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size); 4388 } 4389 return true; 4390 } 4391 4392 static bool trans_STR_zri(DisasContext *s, arg_rri *a) 4393 { 4394 if (!dc_isar_feature(aa64_sve, s)) { 4395 return false; 4396 } 4397 if (sve_access_check(s)) { 4398 int size = vec_full_reg_size(s); 4399 int off = vec_full_reg_offset(s, a->rd); 4400 gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size); 4401 } 4402 return true; 4403 } 4404 4405 static bool trans_STR_pri(DisasContext *s, arg_rri *a) 4406 { 4407 if (!dc_isar_feature(aa64_sve, s)) { 4408 return false; 4409 } 4410 if (sve_access_check(s)) { 4411 int size = pred_full_reg_size(s); 4412 int off = pred_full_reg_offset(s, a->rd); 4413 gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size); 4414 } 4415 return true; 4416 } 4417 4418 /* 4419 *** SVE Memory - Contiguous Load Group 4420 */ 4421 4422 /* The memory mode of the dtype. */ 4423 static const MemOp dtype_mop[16] = { 4424 MO_UB, MO_UB, MO_UB, MO_UB, 4425 MO_SL, MO_UW, MO_UW, MO_UW, 4426 MO_SW, MO_SW, MO_UL, MO_UL, 4427 MO_SB, MO_SB, MO_SB, MO_UQ 4428 }; 4429 4430 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE) 4431 4432 /* The vector element size of dtype. */ 4433 static const uint8_t dtype_esz[16] = { 4434 0, 1, 2, 3, 4435 3, 1, 2, 3, 4436 3, 2, 2, 3, 4437 3, 2, 1, 3 4438 }; 4439 4440 uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs, 4441 uint32_t msz, bool is_write, uint32_t data) 4442 { 4443 uint32_t sizem1; 4444 uint32_t desc = 0; 4445 4446 /* Assert all of the data fits, with or without MTE enabled. */ 4447 assert(nregs >= 1 && nregs <= 4); 4448 sizem1 = (nregs << msz) - 1; 4449 assert(sizem1 <= R_MTEDESC_SIZEM1_MASK >> R_MTEDESC_SIZEM1_SHIFT); 4450 assert(data < 1u << SVE_MTEDESC_SHIFT); 4451 4452 if (s->mte_active[0]) { 4453 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 4454 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4455 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4456 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 4457 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, sizem1); 4458 desc <<= SVE_MTEDESC_SHIFT; 4459 } 4460 return simd_desc(vsz, vsz, desc | data); 4461 } 4462 4463 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 4464 int dtype, uint32_t nregs, bool is_write, 4465 gen_helper_gvec_mem *fn) 4466 { 4467 TCGv_ptr t_pg; 4468 uint32_t desc; 4469 4470 if (!s->mte_active[0]) { 4471 addr = clean_data_tbi(s, addr); 4472 } 4473 4474 /* 4475 * For e.g. LD4, there are not enough arguments to pass all 4 4476 * registers as pointers, so encode the regno into the data field. 4477 * For consistency, do this even for LD1. 4478 */ 4479 desc = make_svemte_desc(s, vec_full_reg_size(s), nregs, 4480 dtype_msz(dtype), is_write, zt); 4481 t_pg = tcg_temp_new_ptr(); 4482 4483 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 4484 fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); 4485 } 4486 4487 /* Indexed by [mte][be][dtype][nreg] */ 4488 static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = { 4489 { /* mte inactive, little-endian */ 4490 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4491 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4492 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4493 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4494 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4495 4496 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL }, 4497 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r, 4498 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r }, 4499 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL }, 4500 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL }, 4501 4502 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL }, 4503 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL }, 4504 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r, 4505 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r }, 4506 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL }, 4507 4508 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4509 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4510 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4511 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, 4512 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, 4513 4514 /* mte inactive, big-endian */ 4515 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4516 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4517 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4518 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4519 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4520 4521 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL }, 4522 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r, 4523 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r }, 4524 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL }, 4525 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL }, 4526 4527 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL }, 4528 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL }, 4529 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r, 4530 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r }, 4531 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL }, 4532 4533 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4534 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4535 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4536 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, 4537 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } }, 4538 4539 { /* mte active, little-endian */ 4540 { { gen_helper_sve_ld1bb_r_mte, 4541 gen_helper_sve_ld2bb_r_mte, 4542 gen_helper_sve_ld3bb_r_mte, 4543 gen_helper_sve_ld4bb_r_mte }, 4544 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 4545 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 4546 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 4547 4548 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL }, 4549 { gen_helper_sve_ld1hh_le_r_mte, 4550 gen_helper_sve_ld2hh_le_r_mte, 4551 gen_helper_sve_ld3hh_le_r_mte, 4552 gen_helper_sve_ld4hh_le_r_mte }, 4553 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL }, 4554 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL }, 4555 4556 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL }, 4557 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL }, 4558 { gen_helper_sve_ld1ss_le_r_mte, 4559 gen_helper_sve_ld2ss_le_r_mte, 4560 gen_helper_sve_ld3ss_le_r_mte, 4561 gen_helper_sve_ld4ss_le_r_mte }, 4562 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL }, 4563 4564 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 4565 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 4566 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 4567 { gen_helper_sve_ld1dd_le_r_mte, 4568 gen_helper_sve_ld2dd_le_r_mte, 4569 gen_helper_sve_ld3dd_le_r_mte, 4570 gen_helper_sve_ld4dd_le_r_mte } }, 4571 4572 /* mte active, big-endian */ 4573 { { gen_helper_sve_ld1bb_r_mte, 4574 gen_helper_sve_ld2bb_r_mte, 4575 gen_helper_sve_ld3bb_r_mte, 4576 gen_helper_sve_ld4bb_r_mte }, 4577 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 4578 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 4579 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 4580 4581 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL }, 4582 { gen_helper_sve_ld1hh_be_r_mte, 4583 gen_helper_sve_ld2hh_be_r_mte, 4584 gen_helper_sve_ld3hh_be_r_mte, 4585 gen_helper_sve_ld4hh_be_r_mte }, 4586 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL }, 4587 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL }, 4588 4589 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL }, 4590 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL }, 4591 { gen_helper_sve_ld1ss_be_r_mte, 4592 gen_helper_sve_ld2ss_be_r_mte, 4593 gen_helper_sve_ld3ss_be_r_mte, 4594 gen_helper_sve_ld4ss_be_r_mte }, 4595 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL }, 4596 4597 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 4598 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 4599 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 4600 { gen_helper_sve_ld1dd_be_r_mte, 4601 gen_helper_sve_ld2dd_be_r_mte, 4602 gen_helper_sve_ld3dd_be_r_mte, 4603 gen_helper_sve_ld4dd_be_r_mte } } }, 4604 }; 4605 4606 static void do_ld_zpa(DisasContext *s, int zt, int pg, 4607 TCGv_i64 addr, int dtype, int nreg) 4608 { 4609 gen_helper_gvec_mem *fn 4610 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg]; 4611 4612 /* 4613 * While there are holes in the table, they are not 4614 * accessible via the instruction encoding. 4615 */ 4616 assert(fn != NULL); 4617 do_mem_zpa(s, zt, pg, addr, dtype, nreg + 1, false, fn); 4618 } 4619 4620 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) 4621 { 4622 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 4623 return false; 4624 } 4625 if (sve_access_check(s)) { 4626 TCGv_i64 addr = tcg_temp_new_i64(); 4627 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4628 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4629 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 4630 } 4631 return true; 4632 } 4633 4634 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a) 4635 { 4636 if (!dc_isar_feature(aa64_sve, s)) { 4637 return false; 4638 } 4639 if (sve_access_check(s)) { 4640 int vsz = vec_full_reg_size(s); 4641 int elements = vsz >> dtype_esz[a->dtype]; 4642 TCGv_i64 addr = tcg_temp_new_i64(); 4643 4644 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 4645 (a->imm * elements * (a->nreg + 1)) 4646 << dtype_msz(a->dtype)); 4647 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 4648 } 4649 return true; 4650 } 4651 4652 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a) 4653 { 4654 static gen_helper_gvec_mem * const fns[2][2][16] = { 4655 { /* mte inactive, little-endian */ 4656 { gen_helper_sve_ldff1bb_r, 4657 gen_helper_sve_ldff1bhu_r, 4658 gen_helper_sve_ldff1bsu_r, 4659 gen_helper_sve_ldff1bdu_r, 4660 4661 gen_helper_sve_ldff1sds_le_r, 4662 gen_helper_sve_ldff1hh_le_r, 4663 gen_helper_sve_ldff1hsu_le_r, 4664 gen_helper_sve_ldff1hdu_le_r, 4665 4666 gen_helper_sve_ldff1hds_le_r, 4667 gen_helper_sve_ldff1hss_le_r, 4668 gen_helper_sve_ldff1ss_le_r, 4669 gen_helper_sve_ldff1sdu_le_r, 4670 4671 gen_helper_sve_ldff1bds_r, 4672 gen_helper_sve_ldff1bss_r, 4673 gen_helper_sve_ldff1bhs_r, 4674 gen_helper_sve_ldff1dd_le_r }, 4675 4676 /* mte inactive, big-endian */ 4677 { gen_helper_sve_ldff1bb_r, 4678 gen_helper_sve_ldff1bhu_r, 4679 gen_helper_sve_ldff1bsu_r, 4680 gen_helper_sve_ldff1bdu_r, 4681 4682 gen_helper_sve_ldff1sds_be_r, 4683 gen_helper_sve_ldff1hh_be_r, 4684 gen_helper_sve_ldff1hsu_be_r, 4685 gen_helper_sve_ldff1hdu_be_r, 4686 4687 gen_helper_sve_ldff1hds_be_r, 4688 gen_helper_sve_ldff1hss_be_r, 4689 gen_helper_sve_ldff1ss_be_r, 4690 gen_helper_sve_ldff1sdu_be_r, 4691 4692 gen_helper_sve_ldff1bds_r, 4693 gen_helper_sve_ldff1bss_r, 4694 gen_helper_sve_ldff1bhs_r, 4695 gen_helper_sve_ldff1dd_be_r } }, 4696 4697 { /* mte active, little-endian */ 4698 { gen_helper_sve_ldff1bb_r_mte, 4699 gen_helper_sve_ldff1bhu_r_mte, 4700 gen_helper_sve_ldff1bsu_r_mte, 4701 gen_helper_sve_ldff1bdu_r_mte, 4702 4703 gen_helper_sve_ldff1sds_le_r_mte, 4704 gen_helper_sve_ldff1hh_le_r_mte, 4705 gen_helper_sve_ldff1hsu_le_r_mte, 4706 gen_helper_sve_ldff1hdu_le_r_mte, 4707 4708 gen_helper_sve_ldff1hds_le_r_mte, 4709 gen_helper_sve_ldff1hss_le_r_mte, 4710 gen_helper_sve_ldff1ss_le_r_mte, 4711 gen_helper_sve_ldff1sdu_le_r_mte, 4712 4713 gen_helper_sve_ldff1bds_r_mte, 4714 gen_helper_sve_ldff1bss_r_mte, 4715 gen_helper_sve_ldff1bhs_r_mte, 4716 gen_helper_sve_ldff1dd_le_r_mte }, 4717 4718 /* mte active, big-endian */ 4719 { gen_helper_sve_ldff1bb_r_mte, 4720 gen_helper_sve_ldff1bhu_r_mte, 4721 gen_helper_sve_ldff1bsu_r_mte, 4722 gen_helper_sve_ldff1bdu_r_mte, 4723 4724 gen_helper_sve_ldff1sds_be_r_mte, 4725 gen_helper_sve_ldff1hh_be_r_mte, 4726 gen_helper_sve_ldff1hsu_be_r_mte, 4727 gen_helper_sve_ldff1hdu_be_r_mte, 4728 4729 gen_helper_sve_ldff1hds_be_r_mte, 4730 gen_helper_sve_ldff1hss_be_r_mte, 4731 gen_helper_sve_ldff1ss_be_r_mte, 4732 gen_helper_sve_ldff1sdu_be_r_mte, 4733 4734 gen_helper_sve_ldff1bds_r_mte, 4735 gen_helper_sve_ldff1bss_r_mte, 4736 gen_helper_sve_ldff1bhs_r_mte, 4737 gen_helper_sve_ldff1dd_be_r_mte } }, 4738 }; 4739 4740 if (!dc_isar_feature(aa64_sve, s)) { 4741 return false; 4742 } 4743 s->is_nonstreaming = true; 4744 if (sve_access_check(s)) { 4745 TCGv_i64 addr = tcg_temp_new_i64(); 4746 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4747 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4748 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 4749 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 4750 } 4751 return true; 4752 } 4753 4754 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) 4755 { 4756 static gen_helper_gvec_mem * const fns[2][2][16] = { 4757 { /* mte inactive, little-endian */ 4758 { gen_helper_sve_ldnf1bb_r, 4759 gen_helper_sve_ldnf1bhu_r, 4760 gen_helper_sve_ldnf1bsu_r, 4761 gen_helper_sve_ldnf1bdu_r, 4762 4763 gen_helper_sve_ldnf1sds_le_r, 4764 gen_helper_sve_ldnf1hh_le_r, 4765 gen_helper_sve_ldnf1hsu_le_r, 4766 gen_helper_sve_ldnf1hdu_le_r, 4767 4768 gen_helper_sve_ldnf1hds_le_r, 4769 gen_helper_sve_ldnf1hss_le_r, 4770 gen_helper_sve_ldnf1ss_le_r, 4771 gen_helper_sve_ldnf1sdu_le_r, 4772 4773 gen_helper_sve_ldnf1bds_r, 4774 gen_helper_sve_ldnf1bss_r, 4775 gen_helper_sve_ldnf1bhs_r, 4776 gen_helper_sve_ldnf1dd_le_r }, 4777 4778 /* mte inactive, big-endian */ 4779 { gen_helper_sve_ldnf1bb_r, 4780 gen_helper_sve_ldnf1bhu_r, 4781 gen_helper_sve_ldnf1bsu_r, 4782 gen_helper_sve_ldnf1bdu_r, 4783 4784 gen_helper_sve_ldnf1sds_be_r, 4785 gen_helper_sve_ldnf1hh_be_r, 4786 gen_helper_sve_ldnf1hsu_be_r, 4787 gen_helper_sve_ldnf1hdu_be_r, 4788 4789 gen_helper_sve_ldnf1hds_be_r, 4790 gen_helper_sve_ldnf1hss_be_r, 4791 gen_helper_sve_ldnf1ss_be_r, 4792 gen_helper_sve_ldnf1sdu_be_r, 4793 4794 gen_helper_sve_ldnf1bds_r, 4795 gen_helper_sve_ldnf1bss_r, 4796 gen_helper_sve_ldnf1bhs_r, 4797 gen_helper_sve_ldnf1dd_be_r } }, 4798 4799 { /* mte inactive, little-endian */ 4800 { gen_helper_sve_ldnf1bb_r_mte, 4801 gen_helper_sve_ldnf1bhu_r_mte, 4802 gen_helper_sve_ldnf1bsu_r_mte, 4803 gen_helper_sve_ldnf1bdu_r_mte, 4804 4805 gen_helper_sve_ldnf1sds_le_r_mte, 4806 gen_helper_sve_ldnf1hh_le_r_mte, 4807 gen_helper_sve_ldnf1hsu_le_r_mte, 4808 gen_helper_sve_ldnf1hdu_le_r_mte, 4809 4810 gen_helper_sve_ldnf1hds_le_r_mte, 4811 gen_helper_sve_ldnf1hss_le_r_mte, 4812 gen_helper_sve_ldnf1ss_le_r_mte, 4813 gen_helper_sve_ldnf1sdu_le_r_mte, 4814 4815 gen_helper_sve_ldnf1bds_r_mte, 4816 gen_helper_sve_ldnf1bss_r_mte, 4817 gen_helper_sve_ldnf1bhs_r_mte, 4818 gen_helper_sve_ldnf1dd_le_r_mte }, 4819 4820 /* mte inactive, big-endian */ 4821 { gen_helper_sve_ldnf1bb_r_mte, 4822 gen_helper_sve_ldnf1bhu_r_mte, 4823 gen_helper_sve_ldnf1bsu_r_mte, 4824 gen_helper_sve_ldnf1bdu_r_mte, 4825 4826 gen_helper_sve_ldnf1sds_be_r_mte, 4827 gen_helper_sve_ldnf1hh_be_r_mte, 4828 gen_helper_sve_ldnf1hsu_be_r_mte, 4829 gen_helper_sve_ldnf1hdu_be_r_mte, 4830 4831 gen_helper_sve_ldnf1hds_be_r_mte, 4832 gen_helper_sve_ldnf1hss_be_r_mte, 4833 gen_helper_sve_ldnf1ss_be_r_mte, 4834 gen_helper_sve_ldnf1sdu_be_r_mte, 4835 4836 gen_helper_sve_ldnf1bds_r_mte, 4837 gen_helper_sve_ldnf1bss_r_mte, 4838 gen_helper_sve_ldnf1bhs_r_mte, 4839 gen_helper_sve_ldnf1dd_be_r_mte } }, 4840 }; 4841 4842 if (!dc_isar_feature(aa64_sve, s)) { 4843 return false; 4844 } 4845 s->is_nonstreaming = true; 4846 if (sve_access_check(s)) { 4847 int vsz = vec_full_reg_size(s); 4848 int elements = vsz >> dtype_esz[a->dtype]; 4849 int off = (a->imm * elements) << dtype_msz(a->dtype); 4850 TCGv_i64 addr = tcg_temp_new_i64(); 4851 4852 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off); 4853 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 4854 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 4855 } 4856 return true; 4857 } 4858 4859 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 4860 { 4861 unsigned vsz = vec_full_reg_size(s); 4862 TCGv_ptr t_pg; 4863 int poff; 4864 uint32_t desc; 4865 4866 /* Load the first quadword using the normal predicated load helpers. */ 4867 if (!s->mte_active[0]) { 4868 addr = clean_data_tbi(s, addr); 4869 } 4870 4871 poff = pred_full_reg_offset(s, pg); 4872 if (vsz > 16) { 4873 /* 4874 * Zero-extend the first 16 bits of the predicate into a temporary. 4875 * This avoids triggering an assert making sure we don't have bits 4876 * set within a predicate beyond VQ, but we have lowered VQ to 1 4877 * for this load operation. 4878 */ 4879 TCGv_i64 tmp = tcg_temp_new_i64(); 4880 #if HOST_BIG_ENDIAN 4881 poff += 6; 4882 #endif 4883 tcg_gen_ld16u_i64(tmp, tcg_env, poff); 4884 4885 poff = offsetof(CPUARMState, vfp.preg_tmp); 4886 tcg_gen_st_i64(tmp, tcg_env, poff); 4887 } 4888 4889 t_pg = tcg_temp_new_ptr(); 4890 tcg_gen_addi_ptr(t_pg, tcg_env, poff); 4891 4892 gen_helper_gvec_mem *fn 4893 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 4894 desc = make_svemte_desc(s, 16, 1, dtype_msz(dtype), false, zt); 4895 fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); 4896 4897 /* Replicate that first quadword. */ 4898 if (vsz > 16) { 4899 int doff = vec_full_reg_offset(s, zt); 4900 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16); 4901 } 4902 } 4903 4904 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a) 4905 { 4906 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 4907 return false; 4908 } 4909 if (sve_access_check(s)) { 4910 int msz = dtype_msz(a->dtype); 4911 TCGv_i64 addr = tcg_temp_new_i64(); 4912 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz); 4913 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4914 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 4915 } 4916 return true; 4917 } 4918 4919 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a) 4920 { 4921 if (!dc_isar_feature(aa64_sve, s)) { 4922 return false; 4923 } 4924 if (sve_access_check(s)) { 4925 TCGv_i64 addr = tcg_temp_new_i64(); 4926 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16); 4927 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 4928 } 4929 return true; 4930 } 4931 4932 static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 4933 { 4934 unsigned vsz = vec_full_reg_size(s); 4935 unsigned vsz_r32; 4936 TCGv_ptr t_pg; 4937 int poff, doff; 4938 uint32_t desc; 4939 4940 if (vsz < 32) { 4941 /* 4942 * Note that this UNDEFINED check comes after CheckSVEEnabled() 4943 * in the ARM pseudocode, which is the sve_access_check() done 4944 * in our caller. We should not now return false from the caller. 4945 */ 4946 unallocated_encoding(s); 4947 return; 4948 } 4949 4950 /* Load the first octaword using the normal predicated load helpers. */ 4951 if (!s->mte_active[0]) { 4952 addr = clean_data_tbi(s, addr); 4953 } 4954 4955 poff = pred_full_reg_offset(s, pg); 4956 if (vsz > 32) { 4957 /* 4958 * Zero-extend the first 32 bits of the predicate into a temporary. 4959 * This avoids triggering an assert making sure we don't have bits 4960 * set within a predicate beyond VQ, but we have lowered VQ to 2 4961 * for this load operation. 4962 */ 4963 TCGv_i64 tmp = tcg_temp_new_i64(); 4964 #if HOST_BIG_ENDIAN 4965 poff += 4; 4966 #endif 4967 tcg_gen_ld32u_i64(tmp, tcg_env, poff); 4968 4969 poff = offsetof(CPUARMState, vfp.preg_tmp); 4970 tcg_gen_st_i64(tmp, tcg_env, poff); 4971 } 4972 4973 t_pg = tcg_temp_new_ptr(); 4974 tcg_gen_addi_ptr(t_pg, tcg_env, poff); 4975 4976 gen_helper_gvec_mem *fn 4977 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 4978 desc = make_svemte_desc(s, 32, 1, dtype_msz(dtype), false, zt); 4979 fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); 4980 4981 /* 4982 * Replicate that first octaword. 4983 * The replication happens in units of 32; if the full vector size 4984 * is not a multiple of 32, the final bits are zeroed. 4985 */ 4986 doff = vec_full_reg_offset(s, zt); 4987 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32); 4988 if (vsz >= 64) { 4989 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32); 4990 } 4991 vsz -= vsz_r32; 4992 if (vsz) { 4993 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0); 4994 } 4995 } 4996 4997 static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a) 4998 { 4999 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 5000 return false; 5001 } 5002 if (a->rm == 31) { 5003 return false; 5004 } 5005 s->is_nonstreaming = true; 5006 if (sve_access_check(s)) { 5007 TCGv_i64 addr = tcg_temp_new_i64(); 5008 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 5009 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5010 do_ldro(s, a->rd, a->pg, addr, a->dtype); 5011 } 5012 return true; 5013 } 5014 5015 static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a) 5016 { 5017 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 5018 return false; 5019 } 5020 s->is_nonstreaming = true; 5021 if (sve_access_check(s)) { 5022 TCGv_i64 addr = tcg_temp_new_i64(); 5023 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32); 5024 do_ldro(s, a->rd, a->pg, addr, a->dtype); 5025 } 5026 return true; 5027 } 5028 5029 /* Load and broadcast element. */ 5030 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) 5031 { 5032 unsigned vsz = vec_full_reg_size(s); 5033 unsigned psz = pred_full_reg_size(s); 5034 unsigned esz = dtype_esz[a->dtype]; 5035 unsigned msz = dtype_msz(a->dtype); 5036 TCGLabel *over; 5037 TCGv_i64 temp, clean_addr; 5038 MemOp memop; 5039 5040 if (!dc_isar_feature(aa64_sve, s)) { 5041 return false; 5042 } 5043 if (!sve_access_check(s)) { 5044 return true; 5045 } 5046 5047 over = gen_new_label(); 5048 5049 /* If the guarding predicate has no bits set, no load occurs. */ 5050 if (psz <= 8) { 5051 /* Reduce the pred_esz_masks value simply to reduce the 5052 * size of the code generated here. 5053 */ 5054 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8); 5055 temp = tcg_temp_new_i64(); 5056 tcg_gen_ld_i64(temp, tcg_env, pred_full_reg_offset(s, a->pg)); 5057 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask); 5058 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over); 5059 } else { 5060 TCGv_i32 t32 = tcg_temp_new_i32(); 5061 find_last_active(s, t32, esz, a->pg); 5062 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over); 5063 } 5064 5065 /* Load the data. */ 5066 temp = tcg_temp_new_i64(); 5067 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz); 5068 5069 memop = finalize_memop(s, dtype_mop[a->dtype]); 5070 clean_addr = gen_mte_check1(s, temp, false, true, memop); 5071 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), memop); 5072 5073 /* Broadcast to *all* elements. */ 5074 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 5075 vsz, vsz, temp); 5076 5077 /* Zero the inactive elements. */ 5078 gen_set_label(over); 5079 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false); 5080 } 5081 5082 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 5083 int msz, int esz, int nreg) 5084 { 5085 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = { 5086 { { { gen_helper_sve_st1bb_r, 5087 gen_helper_sve_st1bh_r, 5088 gen_helper_sve_st1bs_r, 5089 gen_helper_sve_st1bd_r }, 5090 { NULL, 5091 gen_helper_sve_st1hh_le_r, 5092 gen_helper_sve_st1hs_le_r, 5093 gen_helper_sve_st1hd_le_r }, 5094 { NULL, NULL, 5095 gen_helper_sve_st1ss_le_r, 5096 gen_helper_sve_st1sd_le_r }, 5097 { NULL, NULL, NULL, 5098 gen_helper_sve_st1dd_le_r } }, 5099 { { gen_helper_sve_st1bb_r, 5100 gen_helper_sve_st1bh_r, 5101 gen_helper_sve_st1bs_r, 5102 gen_helper_sve_st1bd_r }, 5103 { NULL, 5104 gen_helper_sve_st1hh_be_r, 5105 gen_helper_sve_st1hs_be_r, 5106 gen_helper_sve_st1hd_be_r }, 5107 { NULL, NULL, 5108 gen_helper_sve_st1ss_be_r, 5109 gen_helper_sve_st1sd_be_r }, 5110 { NULL, NULL, NULL, 5111 gen_helper_sve_st1dd_be_r } } }, 5112 5113 { { { gen_helper_sve_st1bb_r_mte, 5114 gen_helper_sve_st1bh_r_mte, 5115 gen_helper_sve_st1bs_r_mte, 5116 gen_helper_sve_st1bd_r_mte }, 5117 { NULL, 5118 gen_helper_sve_st1hh_le_r_mte, 5119 gen_helper_sve_st1hs_le_r_mte, 5120 gen_helper_sve_st1hd_le_r_mte }, 5121 { NULL, NULL, 5122 gen_helper_sve_st1ss_le_r_mte, 5123 gen_helper_sve_st1sd_le_r_mte }, 5124 { NULL, NULL, NULL, 5125 gen_helper_sve_st1dd_le_r_mte } }, 5126 { { gen_helper_sve_st1bb_r_mte, 5127 gen_helper_sve_st1bh_r_mte, 5128 gen_helper_sve_st1bs_r_mte, 5129 gen_helper_sve_st1bd_r_mte }, 5130 { NULL, 5131 gen_helper_sve_st1hh_be_r_mte, 5132 gen_helper_sve_st1hs_be_r_mte, 5133 gen_helper_sve_st1hd_be_r_mte }, 5134 { NULL, NULL, 5135 gen_helper_sve_st1ss_be_r_mte, 5136 gen_helper_sve_st1sd_be_r_mte }, 5137 { NULL, NULL, NULL, 5138 gen_helper_sve_st1dd_be_r_mte } } }, 5139 }; 5140 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = { 5141 { { { gen_helper_sve_st2bb_r, 5142 gen_helper_sve_st2hh_le_r, 5143 gen_helper_sve_st2ss_le_r, 5144 gen_helper_sve_st2dd_le_r }, 5145 { gen_helper_sve_st3bb_r, 5146 gen_helper_sve_st3hh_le_r, 5147 gen_helper_sve_st3ss_le_r, 5148 gen_helper_sve_st3dd_le_r }, 5149 { gen_helper_sve_st4bb_r, 5150 gen_helper_sve_st4hh_le_r, 5151 gen_helper_sve_st4ss_le_r, 5152 gen_helper_sve_st4dd_le_r } }, 5153 { { gen_helper_sve_st2bb_r, 5154 gen_helper_sve_st2hh_be_r, 5155 gen_helper_sve_st2ss_be_r, 5156 gen_helper_sve_st2dd_be_r }, 5157 { gen_helper_sve_st3bb_r, 5158 gen_helper_sve_st3hh_be_r, 5159 gen_helper_sve_st3ss_be_r, 5160 gen_helper_sve_st3dd_be_r }, 5161 { gen_helper_sve_st4bb_r, 5162 gen_helper_sve_st4hh_be_r, 5163 gen_helper_sve_st4ss_be_r, 5164 gen_helper_sve_st4dd_be_r } } }, 5165 { { { gen_helper_sve_st2bb_r_mte, 5166 gen_helper_sve_st2hh_le_r_mte, 5167 gen_helper_sve_st2ss_le_r_mte, 5168 gen_helper_sve_st2dd_le_r_mte }, 5169 { gen_helper_sve_st3bb_r_mte, 5170 gen_helper_sve_st3hh_le_r_mte, 5171 gen_helper_sve_st3ss_le_r_mte, 5172 gen_helper_sve_st3dd_le_r_mte }, 5173 { gen_helper_sve_st4bb_r_mte, 5174 gen_helper_sve_st4hh_le_r_mte, 5175 gen_helper_sve_st4ss_le_r_mte, 5176 gen_helper_sve_st4dd_le_r_mte } }, 5177 { { gen_helper_sve_st2bb_r_mte, 5178 gen_helper_sve_st2hh_be_r_mte, 5179 gen_helper_sve_st2ss_be_r_mte, 5180 gen_helper_sve_st2dd_be_r_mte }, 5181 { gen_helper_sve_st3bb_r_mte, 5182 gen_helper_sve_st3hh_be_r_mte, 5183 gen_helper_sve_st3ss_be_r_mte, 5184 gen_helper_sve_st3dd_be_r_mte }, 5185 { gen_helper_sve_st4bb_r_mte, 5186 gen_helper_sve_st4hh_be_r_mte, 5187 gen_helper_sve_st4ss_be_r_mte, 5188 gen_helper_sve_st4dd_be_r_mte } } }, 5189 }; 5190 gen_helper_gvec_mem *fn; 5191 int be = s->be_data == MO_BE; 5192 5193 if (nreg == 0) { 5194 /* ST1 */ 5195 fn = fn_single[s->mte_active[0]][be][msz][esz]; 5196 } else { 5197 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */ 5198 assert(msz == esz); 5199 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz]; 5200 } 5201 assert(fn != NULL); 5202 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg + 1, true, fn); 5203 } 5204 5205 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) 5206 { 5207 if (!dc_isar_feature(aa64_sve, s)) { 5208 return false; 5209 } 5210 if (a->rm == 31 || a->msz > a->esz) { 5211 return false; 5212 } 5213 if (sve_access_check(s)) { 5214 TCGv_i64 addr = tcg_temp_new_i64(); 5215 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz); 5216 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5217 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5218 } 5219 return true; 5220 } 5221 5222 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a) 5223 { 5224 if (!dc_isar_feature(aa64_sve, s)) { 5225 return false; 5226 } 5227 if (a->msz > a->esz) { 5228 return false; 5229 } 5230 if (sve_access_check(s)) { 5231 int vsz = vec_full_reg_size(s); 5232 int elements = vsz >> a->esz; 5233 TCGv_i64 addr = tcg_temp_new_i64(); 5234 5235 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 5236 (a->imm * elements * (a->nreg + 1)) << a->msz); 5237 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5238 } 5239 return true; 5240 } 5241 5242 /* 5243 *** SVE gather loads / scatter stores 5244 */ 5245 5246 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, 5247 int scale, TCGv_i64 scalar, int msz, bool is_write, 5248 gen_helper_gvec_mem_scatter *fn) 5249 { 5250 TCGv_ptr t_zm = tcg_temp_new_ptr(); 5251 TCGv_ptr t_pg = tcg_temp_new_ptr(); 5252 TCGv_ptr t_zt = tcg_temp_new_ptr(); 5253 uint32_t desc; 5254 5255 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); 5256 tcg_gen_addi_ptr(t_zm, tcg_env, vec_full_reg_offset(s, zm)); 5257 tcg_gen_addi_ptr(t_zt, tcg_env, vec_full_reg_offset(s, zt)); 5258 5259 desc = make_svemte_desc(s, vec_full_reg_size(s), 1, msz, is_write, scale); 5260 fn(tcg_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc)); 5261 } 5262 5263 /* Indexed by [mte][be][ff][xs][u][msz]. */ 5264 static gen_helper_gvec_mem_scatter * const 5265 gather_load_fn32[2][2][2][2][2][3] = { 5266 { /* MTE Inactive */ 5267 { /* Little-endian */ 5268 { { { gen_helper_sve_ldbss_zsu, 5269 gen_helper_sve_ldhss_le_zsu, 5270 NULL, }, 5271 { gen_helper_sve_ldbsu_zsu, 5272 gen_helper_sve_ldhsu_le_zsu, 5273 gen_helper_sve_ldss_le_zsu, } }, 5274 { { gen_helper_sve_ldbss_zss, 5275 gen_helper_sve_ldhss_le_zss, 5276 NULL, }, 5277 { gen_helper_sve_ldbsu_zss, 5278 gen_helper_sve_ldhsu_le_zss, 5279 gen_helper_sve_ldss_le_zss, } } }, 5280 5281 /* First-fault */ 5282 { { { gen_helper_sve_ldffbss_zsu, 5283 gen_helper_sve_ldffhss_le_zsu, 5284 NULL, }, 5285 { gen_helper_sve_ldffbsu_zsu, 5286 gen_helper_sve_ldffhsu_le_zsu, 5287 gen_helper_sve_ldffss_le_zsu, } }, 5288 { { gen_helper_sve_ldffbss_zss, 5289 gen_helper_sve_ldffhss_le_zss, 5290 NULL, }, 5291 { gen_helper_sve_ldffbsu_zss, 5292 gen_helper_sve_ldffhsu_le_zss, 5293 gen_helper_sve_ldffss_le_zss, } } } }, 5294 5295 { /* Big-endian */ 5296 { { { gen_helper_sve_ldbss_zsu, 5297 gen_helper_sve_ldhss_be_zsu, 5298 NULL, }, 5299 { gen_helper_sve_ldbsu_zsu, 5300 gen_helper_sve_ldhsu_be_zsu, 5301 gen_helper_sve_ldss_be_zsu, } }, 5302 { { gen_helper_sve_ldbss_zss, 5303 gen_helper_sve_ldhss_be_zss, 5304 NULL, }, 5305 { gen_helper_sve_ldbsu_zss, 5306 gen_helper_sve_ldhsu_be_zss, 5307 gen_helper_sve_ldss_be_zss, } } }, 5308 5309 /* First-fault */ 5310 { { { gen_helper_sve_ldffbss_zsu, 5311 gen_helper_sve_ldffhss_be_zsu, 5312 NULL, }, 5313 { gen_helper_sve_ldffbsu_zsu, 5314 gen_helper_sve_ldffhsu_be_zsu, 5315 gen_helper_sve_ldffss_be_zsu, } }, 5316 { { gen_helper_sve_ldffbss_zss, 5317 gen_helper_sve_ldffhss_be_zss, 5318 NULL, }, 5319 { gen_helper_sve_ldffbsu_zss, 5320 gen_helper_sve_ldffhsu_be_zss, 5321 gen_helper_sve_ldffss_be_zss, } } } } }, 5322 { /* MTE Active */ 5323 { /* Little-endian */ 5324 { { { gen_helper_sve_ldbss_zsu_mte, 5325 gen_helper_sve_ldhss_le_zsu_mte, 5326 NULL, }, 5327 { gen_helper_sve_ldbsu_zsu_mte, 5328 gen_helper_sve_ldhsu_le_zsu_mte, 5329 gen_helper_sve_ldss_le_zsu_mte, } }, 5330 { { gen_helper_sve_ldbss_zss_mte, 5331 gen_helper_sve_ldhss_le_zss_mte, 5332 NULL, }, 5333 { gen_helper_sve_ldbsu_zss_mte, 5334 gen_helper_sve_ldhsu_le_zss_mte, 5335 gen_helper_sve_ldss_le_zss_mte, } } }, 5336 5337 /* First-fault */ 5338 { { { gen_helper_sve_ldffbss_zsu_mte, 5339 gen_helper_sve_ldffhss_le_zsu_mte, 5340 NULL, }, 5341 { gen_helper_sve_ldffbsu_zsu_mte, 5342 gen_helper_sve_ldffhsu_le_zsu_mte, 5343 gen_helper_sve_ldffss_le_zsu_mte, } }, 5344 { { gen_helper_sve_ldffbss_zss_mte, 5345 gen_helper_sve_ldffhss_le_zss_mte, 5346 NULL, }, 5347 { gen_helper_sve_ldffbsu_zss_mte, 5348 gen_helper_sve_ldffhsu_le_zss_mte, 5349 gen_helper_sve_ldffss_le_zss_mte, } } } }, 5350 5351 { /* Big-endian */ 5352 { { { gen_helper_sve_ldbss_zsu_mte, 5353 gen_helper_sve_ldhss_be_zsu_mte, 5354 NULL, }, 5355 { gen_helper_sve_ldbsu_zsu_mte, 5356 gen_helper_sve_ldhsu_be_zsu_mte, 5357 gen_helper_sve_ldss_be_zsu_mte, } }, 5358 { { gen_helper_sve_ldbss_zss_mte, 5359 gen_helper_sve_ldhss_be_zss_mte, 5360 NULL, }, 5361 { gen_helper_sve_ldbsu_zss_mte, 5362 gen_helper_sve_ldhsu_be_zss_mte, 5363 gen_helper_sve_ldss_be_zss_mte, } } }, 5364 5365 /* First-fault */ 5366 { { { gen_helper_sve_ldffbss_zsu_mte, 5367 gen_helper_sve_ldffhss_be_zsu_mte, 5368 NULL, }, 5369 { gen_helper_sve_ldffbsu_zsu_mte, 5370 gen_helper_sve_ldffhsu_be_zsu_mte, 5371 gen_helper_sve_ldffss_be_zsu_mte, } }, 5372 { { gen_helper_sve_ldffbss_zss_mte, 5373 gen_helper_sve_ldffhss_be_zss_mte, 5374 NULL, }, 5375 { gen_helper_sve_ldffbsu_zss_mte, 5376 gen_helper_sve_ldffhsu_be_zss_mte, 5377 gen_helper_sve_ldffss_be_zss_mte, } } } } }, 5378 }; 5379 5380 /* Note that we overload xs=2 to indicate 64-bit offset. */ 5381 static gen_helper_gvec_mem_scatter * const 5382 gather_load_fn64[2][2][2][3][2][4] = { 5383 { /* MTE Inactive */ 5384 { /* Little-endian */ 5385 { { { gen_helper_sve_ldbds_zsu, 5386 gen_helper_sve_ldhds_le_zsu, 5387 gen_helper_sve_ldsds_le_zsu, 5388 NULL, }, 5389 { gen_helper_sve_ldbdu_zsu, 5390 gen_helper_sve_ldhdu_le_zsu, 5391 gen_helper_sve_ldsdu_le_zsu, 5392 gen_helper_sve_lddd_le_zsu, } }, 5393 { { gen_helper_sve_ldbds_zss, 5394 gen_helper_sve_ldhds_le_zss, 5395 gen_helper_sve_ldsds_le_zss, 5396 NULL, }, 5397 { gen_helper_sve_ldbdu_zss, 5398 gen_helper_sve_ldhdu_le_zss, 5399 gen_helper_sve_ldsdu_le_zss, 5400 gen_helper_sve_lddd_le_zss, } }, 5401 { { gen_helper_sve_ldbds_zd, 5402 gen_helper_sve_ldhds_le_zd, 5403 gen_helper_sve_ldsds_le_zd, 5404 NULL, }, 5405 { gen_helper_sve_ldbdu_zd, 5406 gen_helper_sve_ldhdu_le_zd, 5407 gen_helper_sve_ldsdu_le_zd, 5408 gen_helper_sve_lddd_le_zd, } } }, 5409 5410 /* First-fault */ 5411 { { { gen_helper_sve_ldffbds_zsu, 5412 gen_helper_sve_ldffhds_le_zsu, 5413 gen_helper_sve_ldffsds_le_zsu, 5414 NULL, }, 5415 { gen_helper_sve_ldffbdu_zsu, 5416 gen_helper_sve_ldffhdu_le_zsu, 5417 gen_helper_sve_ldffsdu_le_zsu, 5418 gen_helper_sve_ldffdd_le_zsu, } }, 5419 { { gen_helper_sve_ldffbds_zss, 5420 gen_helper_sve_ldffhds_le_zss, 5421 gen_helper_sve_ldffsds_le_zss, 5422 NULL, }, 5423 { gen_helper_sve_ldffbdu_zss, 5424 gen_helper_sve_ldffhdu_le_zss, 5425 gen_helper_sve_ldffsdu_le_zss, 5426 gen_helper_sve_ldffdd_le_zss, } }, 5427 { { gen_helper_sve_ldffbds_zd, 5428 gen_helper_sve_ldffhds_le_zd, 5429 gen_helper_sve_ldffsds_le_zd, 5430 NULL, }, 5431 { gen_helper_sve_ldffbdu_zd, 5432 gen_helper_sve_ldffhdu_le_zd, 5433 gen_helper_sve_ldffsdu_le_zd, 5434 gen_helper_sve_ldffdd_le_zd, } } } }, 5435 { /* Big-endian */ 5436 { { { gen_helper_sve_ldbds_zsu, 5437 gen_helper_sve_ldhds_be_zsu, 5438 gen_helper_sve_ldsds_be_zsu, 5439 NULL, }, 5440 { gen_helper_sve_ldbdu_zsu, 5441 gen_helper_sve_ldhdu_be_zsu, 5442 gen_helper_sve_ldsdu_be_zsu, 5443 gen_helper_sve_lddd_be_zsu, } }, 5444 { { gen_helper_sve_ldbds_zss, 5445 gen_helper_sve_ldhds_be_zss, 5446 gen_helper_sve_ldsds_be_zss, 5447 NULL, }, 5448 { gen_helper_sve_ldbdu_zss, 5449 gen_helper_sve_ldhdu_be_zss, 5450 gen_helper_sve_ldsdu_be_zss, 5451 gen_helper_sve_lddd_be_zss, } }, 5452 { { gen_helper_sve_ldbds_zd, 5453 gen_helper_sve_ldhds_be_zd, 5454 gen_helper_sve_ldsds_be_zd, 5455 NULL, }, 5456 { gen_helper_sve_ldbdu_zd, 5457 gen_helper_sve_ldhdu_be_zd, 5458 gen_helper_sve_ldsdu_be_zd, 5459 gen_helper_sve_lddd_be_zd, } } }, 5460 5461 /* First-fault */ 5462 { { { gen_helper_sve_ldffbds_zsu, 5463 gen_helper_sve_ldffhds_be_zsu, 5464 gen_helper_sve_ldffsds_be_zsu, 5465 NULL, }, 5466 { gen_helper_sve_ldffbdu_zsu, 5467 gen_helper_sve_ldffhdu_be_zsu, 5468 gen_helper_sve_ldffsdu_be_zsu, 5469 gen_helper_sve_ldffdd_be_zsu, } }, 5470 { { gen_helper_sve_ldffbds_zss, 5471 gen_helper_sve_ldffhds_be_zss, 5472 gen_helper_sve_ldffsds_be_zss, 5473 NULL, }, 5474 { gen_helper_sve_ldffbdu_zss, 5475 gen_helper_sve_ldffhdu_be_zss, 5476 gen_helper_sve_ldffsdu_be_zss, 5477 gen_helper_sve_ldffdd_be_zss, } }, 5478 { { gen_helper_sve_ldffbds_zd, 5479 gen_helper_sve_ldffhds_be_zd, 5480 gen_helper_sve_ldffsds_be_zd, 5481 NULL, }, 5482 { gen_helper_sve_ldffbdu_zd, 5483 gen_helper_sve_ldffhdu_be_zd, 5484 gen_helper_sve_ldffsdu_be_zd, 5485 gen_helper_sve_ldffdd_be_zd, } } } } }, 5486 { /* MTE Active */ 5487 { /* Little-endian */ 5488 { { { gen_helper_sve_ldbds_zsu_mte, 5489 gen_helper_sve_ldhds_le_zsu_mte, 5490 gen_helper_sve_ldsds_le_zsu_mte, 5491 NULL, }, 5492 { gen_helper_sve_ldbdu_zsu_mte, 5493 gen_helper_sve_ldhdu_le_zsu_mte, 5494 gen_helper_sve_ldsdu_le_zsu_mte, 5495 gen_helper_sve_lddd_le_zsu_mte, } }, 5496 { { gen_helper_sve_ldbds_zss_mte, 5497 gen_helper_sve_ldhds_le_zss_mte, 5498 gen_helper_sve_ldsds_le_zss_mte, 5499 NULL, }, 5500 { gen_helper_sve_ldbdu_zss_mte, 5501 gen_helper_sve_ldhdu_le_zss_mte, 5502 gen_helper_sve_ldsdu_le_zss_mte, 5503 gen_helper_sve_lddd_le_zss_mte, } }, 5504 { { gen_helper_sve_ldbds_zd_mte, 5505 gen_helper_sve_ldhds_le_zd_mte, 5506 gen_helper_sve_ldsds_le_zd_mte, 5507 NULL, }, 5508 { gen_helper_sve_ldbdu_zd_mte, 5509 gen_helper_sve_ldhdu_le_zd_mte, 5510 gen_helper_sve_ldsdu_le_zd_mte, 5511 gen_helper_sve_lddd_le_zd_mte, } } }, 5512 5513 /* First-fault */ 5514 { { { gen_helper_sve_ldffbds_zsu_mte, 5515 gen_helper_sve_ldffhds_le_zsu_mte, 5516 gen_helper_sve_ldffsds_le_zsu_mte, 5517 NULL, }, 5518 { gen_helper_sve_ldffbdu_zsu_mte, 5519 gen_helper_sve_ldffhdu_le_zsu_mte, 5520 gen_helper_sve_ldffsdu_le_zsu_mte, 5521 gen_helper_sve_ldffdd_le_zsu_mte, } }, 5522 { { gen_helper_sve_ldffbds_zss_mte, 5523 gen_helper_sve_ldffhds_le_zss_mte, 5524 gen_helper_sve_ldffsds_le_zss_mte, 5525 NULL, }, 5526 { gen_helper_sve_ldffbdu_zss_mte, 5527 gen_helper_sve_ldffhdu_le_zss_mte, 5528 gen_helper_sve_ldffsdu_le_zss_mte, 5529 gen_helper_sve_ldffdd_le_zss_mte, } }, 5530 { { gen_helper_sve_ldffbds_zd_mte, 5531 gen_helper_sve_ldffhds_le_zd_mte, 5532 gen_helper_sve_ldffsds_le_zd_mte, 5533 NULL, }, 5534 { gen_helper_sve_ldffbdu_zd_mte, 5535 gen_helper_sve_ldffhdu_le_zd_mte, 5536 gen_helper_sve_ldffsdu_le_zd_mte, 5537 gen_helper_sve_ldffdd_le_zd_mte, } } } }, 5538 { /* Big-endian */ 5539 { { { gen_helper_sve_ldbds_zsu_mte, 5540 gen_helper_sve_ldhds_be_zsu_mte, 5541 gen_helper_sve_ldsds_be_zsu_mte, 5542 NULL, }, 5543 { gen_helper_sve_ldbdu_zsu_mte, 5544 gen_helper_sve_ldhdu_be_zsu_mte, 5545 gen_helper_sve_ldsdu_be_zsu_mte, 5546 gen_helper_sve_lddd_be_zsu_mte, } }, 5547 { { gen_helper_sve_ldbds_zss_mte, 5548 gen_helper_sve_ldhds_be_zss_mte, 5549 gen_helper_sve_ldsds_be_zss_mte, 5550 NULL, }, 5551 { gen_helper_sve_ldbdu_zss_mte, 5552 gen_helper_sve_ldhdu_be_zss_mte, 5553 gen_helper_sve_ldsdu_be_zss_mte, 5554 gen_helper_sve_lddd_be_zss_mte, } }, 5555 { { gen_helper_sve_ldbds_zd_mte, 5556 gen_helper_sve_ldhds_be_zd_mte, 5557 gen_helper_sve_ldsds_be_zd_mte, 5558 NULL, }, 5559 { gen_helper_sve_ldbdu_zd_mte, 5560 gen_helper_sve_ldhdu_be_zd_mte, 5561 gen_helper_sve_ldsdu_be_zd_mte, 5562 gen_helper_sve_lddd_be_zd_mte, } } }, 5563 5564 /* First-fault */ 5565 { { { gen_helper_sve_ldffbds_zsu_mte, 5566 gen_helper_sve_ldffhds_be_zsu_mte, 5567 gen_helper_sve_ldffsds_be_zsu_mte, 5568 NULL, }, 5569 { gen_helper_sve_ldffbdu_zsu_mte, 5570 gen_helper_sve_ldffhdu_be_zsu_mte, 5571 gen_helper_sve_ldffsdu_be_zsu_mte, 5572 gen_helper_sve_ldffdd_be_zsu_mte, } }, 5573 { { gen_helper_sve_ldffbds_zss_mte, 5574 gen_helper_sve_ldffhds_be_zss_mte, 5575 gen_helper_sve_ldffsds_be_zss_mte, 5576 NULL, }, 5577 { gen_helper_sve_ldffbdu_zss_mte, 5578 gen_helper_sve_ldffhdu_be_zss_mte, 5579 gen_helper_sve_ldffsdu_be_zss_mte, 5580 gen_helper_sve_ldffdd_be_zss_mte, } }, 5581 { { gen_helper_sve_ldffbds_zd_mte, 5582 gen_helper_sve_ldffhds_be_zd_mte, 5583 gen_helper_sve_ldffsds_be_zd_mte, 5584 NULL, }, 5585 { gen_helper_sve_ldffbdu_zd_mte, 5586 gen_helper_sve_ldffhdu_be_zd_mte, 5587 gen_helper_sve_ldffsdu_be_zd_mte, 5588 gen_helper_sve_ldffdd_be_zd_mte, } } } } }, 5589 }; 5590 5591 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) 5592 { 5593 gen_helper_gvec_mem_scatter *fn = NULL; 5594 bool be = s->be_data == MO_BE; 5595 bool mte = s->mte_active[0]; 5596 5597 if (!dc_isar_feature(aa64_sve, s)) { 5598 return false; 5599 } 5600 s->is_nonstreaming = true; 5601 if (!sve_access_check(s)) { 5602 return true; 5603 } 5604 5605 switch (a->esz) { 5606 case MO_32: 5607 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz]; 5608 break; 5609 case MO_64: 5610 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz]; 5611 break; 5612 } 5613 assert(fn != NULL); 5614 5615 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 5616 cpu_reg_sp(s, a->rn), a->msz, false, fn); 5617 return true; 5618 } 5619 5620 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) 5621 { 5622 gen_helper_gvec_mem_scatter *fn = NULL; 5623 bool be = s->be_data == MO_BE; 5624 bool mte = s->mte_active[0]; 5625 5626 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) { 5627 return false; 5628 } 5629 if (!dc_isar_feature(aa64_sve, s)) { 5630 return false; 5631 } 5632 s->is_nonstreaming = true; 5633 if (!sve_access_check(s)) { 5634 return true; 5635 } 5636 5637 switch (a->esz) { 5638 case MO_32: 5639 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz]; 5640 break; 5641 case MO_64: 5642 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz]; 5643 break; 5644 } 5645 assert(fn != NULL); 5646 5647 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x]) 5648 * by loading the immediate into the scalar parameter. 5649 */ 5650 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5651 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn); 5652 return true; 5653 } 5654 5655 static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a) 5656 { 5657 gen_helper_gvec_mem_scatter *fn = NULL; 5658 bool be = s->be_data == MO_BE; 5659 bool mte = s->mte_active[0]; 5660 5661 if (a->esz < a->msz + !a->u) { 5662 return false; 5663 } 5664 if (!dc_isar_feature(aa64_sve2, s)) { 5665 return false; 5666 } 5667 s->is_nonstreaming = true; 5668 if (!sve_access_check(s)) { 5669 return true; 5670 } 5671 5672 switch (a->esz) { 5673 case MO_32: 5674 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz]; 5675 break; 5676 case MO_64: 5677 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz]; 5678 break; 5679 } 5680 assert(fn != NULL); 5681 5682 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5683 cpu_reg(s, a->rm), a->msz, false, fn); 5684 return true; 5685 } 5686 5687 /* Indexed by [mte][be][xs][msz]. */ 5688 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = { 5689 { /* MTE Inactive */ 5690 { /* Little-endian */ 5691 { gen_helper_sve_stbs_zsu, 5692 gen_helper_sve_sths_le_zsu, 5693 gen_helper_sve_stss_le_zsu, }, 5694 { gen_helper_sve_stbs_zss, 5695 gen_helper_sve_sths_le_zss, 5696 gen_helper_sve_stss_le_zss, } }, 5697 { /* Big-endian */ 5698 { gen_helper_sve_stbs_zsu, 5699 gen_helper_sve_sths_be_zsu, 5700 gen_helper_sve_stss_be_zsu, }, 5701 { gen_helper_sve_stbs_zss, 5702 gen_helper_sve_sths_be_zss, 5703 gen_helper_sve_stss_be_zss, } } }, 5704 { /* MTE Active */ 5705 { /* Little-endian */ 5706 { gen_helper_sve_stbs_zsu_mte, 5707 gen_helper_sve_sths_le_zsu_mte, 5708 gen_helper_sve_stss_le_zsu_mte, }, 5709 { gen_helper_sve_stbs_zss_mte, 5710 gen_helper_sve_sths_le_zss_mte, 5711 gen_helper_sve_stss_le_zss_mte, } }, 5712 { /* Big-endian */ 5713 { gen_helper_sve_stbs_zsu_mte, 5714 gen_helper_sve_sths_be_zsu_mte, 5715 gen_helper_sve_stss_be_zsu_mte, }, 5716 { gen_helper_sve_stbs_zss_mte, 5717 gen_helper_sve_sths_be_zss_mte, 5718 gen_helper_sve_stss_be_zss_mte, } } }, 5719 }; 5720 5721 /* Note that we overload xs=2 to indicate 64-bit offset. */ 5722 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = { 5723 { /* MTE Inactive */ 5724 { /* Little-endian */ 5725 { gen_helper_sve_stbd_zsu, 5726 gen_helper_sve_sthd_le_zsu, 5727 gen_helper_sve_stsd_le_zsu, 5728 gen_helper_sve_stdd_le_zsu, }, 5729 { gen_helper_sve_stbd_zss, 5730 gen_helper_sve_sthd_le_zss, 5731 gen_helper_sve_stsd_le_zss, 5732 gen_helper_sve_stdd_le_zss, }, 5733 { gen_helper_sve_stbd_zd, 5734 gen_helper_sve_sthd_le_zd, 5735 gen_helper_sve_stsd_le_zd, 5736 gen_helper_sve_stdd_le_zd, } }, 5737 { /* Big-endian */ 5738 { gen_helper_sve_stbd_zsu, 5739 gen_helper_sve_sthd_be_zsu, 5740 gen_helper_sve_stsd_be_zsu, 5741 gen_helper_sve_stdd_be_zsu, }, 5742 { gen_helper_sve_stbd_zss, 5743 gen_helper_sve_sthd_be_zss, 5744 gen_helper_sve_stsd_be_zss, 5745 gen_helper_sve_stdd_be_zss, }, 5746 { gen_helper_sve_stbd_zd, 5747 gen_helper_sve_sthd_be_zd, 5748 gen_helper_sve_stsd_be_zd, 5749 gen_helper_sve_stdd_be_zd, } } }, 5750 { /* MTE Inactive */ 5751 { /* Little-endian */ 5752 { gen_helper_sve_stbd_zsu_mte, 5753 gen_helper_sve_sthd_le_zsu_mte, 5754 gen_helper_sve_stsd_le_zsu_mte, 5755 gen_helper_sve_stdd_le_zsu_mte, }, 5756 { gen_helper_sve_stbd_zss_mte, 5757 gen_helper_sve_sthd_le_zss_mte, 5758 gen_helper_sve_stsd_le_zss_mte, 5759 gen_helper_sve_stdd_le_zss_mte, }, 5760 { gen_helper_sve_stbd_zd_mte, 5761 gen_helper_sve_sthd_le_zd_mte, 5762 gen_helper_sve_stsd_le_zd_mte, 5763 gen_helper_sve_stdd_le_zd_mte, } }, 5764 { /* Big-endian */ 5765 { gen_helper_sve_stbd_zsu_mte, 5766 gen_helper_sve_sthd_be_zsu_mte, 5767 gen_helper_sve_stsd_be_zsu_mte, 5768 gen_helper_sve_stdd_be_zsu_mte, }, 5769 { gen_helper_sve_stbd_zss_mte, 5770 gen_helper_sve_sthd_be_zss_mte, 5771 gen_helper_sve_stsd_be_zss_mte, 5772 gen_helper_sve_stdd_be_zss_mte, }, 5773 { gen_helper_sve_stbd_zd_mte, 5774 gen_helper_sve_sthd_be_zd_mte, 5775 gen_helper_sve_stsd_be_zd_mte, 5776 gen_helper_sve_stdd_be_zd_mte, } } }, 5777 }; 5778 5779 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) 5780 { 5781 gen_helper_gvec_mem_scatter *fn; 5782 bool be = s->be_data == MO_BE; 5783 bool mte = s->mte_active[0]; 5784 5785 if (a->esz < a->msz || (a->msz == 0 && a->scale)) { 5786 return false; 5787 } 5788 if (!dc_isar_feature(aa64_sve, s)) { 5789 return false; 5790 } 5791 s->is_nonstreaming = true; 5792 if (!sve_access_check(s)) { 5793 return true; 5794 } 5795 switch (a->esz) { 5796 case MO_32: 5797 fn = scatter_store_fn32[mte][be][a->xs][a->msz]; 5798 break; 5799 case MO_64: 5800 fn = scatter_store_fn64[mte][be][a->xs][a->msz]; 5801 break; 5802 default: 5803 g_assert_not_reached(); 5804 } 5805 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 5806 cpu_reg_sp(s, a->rn), a->msz, true, fn); 5807 return true; 5808 } 5809 5810 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) 5811 { 5812 gen_helper_gvec_mem_scatter *fn = NULL; 5813 bool be = s->be_data == MO_BE; 5814 bool mte = s->mte_active[0]; 5815 5816 if (a->esz < a->msz) { 5817 return false; 5818 } 5819 if (!dc_isar_feature(aa64_sve, s)) { 5820 return false; 5821 } 5822 s->is_nonstreaming = true; 5823 if (!sve_access_check(s)) { 5824 return true; 5825 } 5826 5827 switch (a->esz) { 5828 case MO_32: 5829 fn = scatter_store_fn32[mte][be][0][a->msz]; 5830 break; 5831 case MO_64: 5832 fn = scatter_store_fn64[mte][be][2][a->msz]; 5833 break; 5834 } 5835 assert(fn != NULL); 5836 5837 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x]) 5838 * by loading the immediate into the scalar parameter. 5839 */ 5840 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5841 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn); 5842 return true; 5843 } 5844 5845 static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a) 5846 { 5847 gen_helper_gvec_mem_scatter *fn; 5848 bool be = s->be_data == MO_BE; 5849 bool mte = s->mte_active[0]; 5850 5851 if (a->esz < a->msz) { 5852 return false; 5853 } 5854 if (!dc_isar_feature(aa64_sve2, s)) { 5855 return false; 5856 } 5857 s->is_nonstreaming = true; 5858 if (!sve_access_check(s)) { 5859 return true; 5860 } 5861 5862 switch (a->esz) { 5863 case MO_32: 5864 fn = scatter_store_fn32[mte][be][0][a->msz]; 5865 break; 5866 case MO_64: 5867 fn = scatter_store_fn64[mte][be][2][a->msz]; 5868 break; 5869 default: 5870 g_assert_not_reached(); 5871 } 5872 5873 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5874 cpu_reg(s, a->rm), a->msz, true, fn); 5875 return true; 5876 } 5877 5878 /* 5879 * Prefetches 5880 */ 5881 5882 static bool trans_PRF(DisasContext *s, arg_PRF *a) 5883 { 5884 if (!dc_isar_feature(aa64_sve, s)) { 5885 return false; 5886 } 5887 /* Prefetch is a nop within QEMU. */ 5888 (void)sve_access_check(s); 5889 return true; 5890 } 5891 5892 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a) 5893 { 5894 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 5895 return false; 5896 } 5897 /* Prefetch is a nop within QEMU. */ 5898 (void)sve_access_check(s); 5899 return true; 5900 } 5901 5902 static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a) 5903 { 5904 if (!dc_isar_feature(aa64_sve, s)) { 5905 return false; 5906 } 5907 /* Prefetch is a nop within QEMU. */ 5908 s->is_nonstreaming = true; 5909 (void)sve_access_check(s); 5910 return true; 5911 } 5912 5913 /* 5914 * Move Prefix 5915 * 5916 * TODO: The implementation so far could handle predicated merging movprfx. 5917 * The helper functions as written take an extra source register to 5918 * use in the operation, but the result is only written when predication 5919 * succeeds. For unpredicated movprfx, we need to rearrange the helpers 5920 * to allow the final write back to the destination to be unconditional. 5921 * For predicated zeroing movprfx, we need to rearrange the helpers to 5922 * allow the final write back to zero inactives. 5923 * 5924 * In the meantime, just emit the moves. 5925 */ 5926 5927 TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn) 5928 TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz) 5929 TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false) 5930 5931 /* 5932 * SVE2 Integer Multiply - Unpredicated 5933 */ 5934 5935 TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a) 5936 5937 static gen_helper_gvec_3 * const smulh_zzz_fns[4] = { 5938 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h, 5939 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d, 5940 }; 5941 TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5942 smulh_zzz_fns[a->esz], a, 0) 5943 5944 static gen_helper_gvec_3 * const umulh_zzz_fns[4] = { 5945 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h, 5946 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d, 5947 }; 5948 TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5949 umulh_zzz_fns[a->esz], a, 0) 5950 5951 TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5952 gen_helper_gvec_pmul_b, a, 0) 5953 5954 static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = { 5955 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h, 5956 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d, 5957 }; 5958 TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5959 sqdmulh_zzz_fns[a->esz], a, 0) 5960 5961 static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = { 5962 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h, 5963 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d, 5964 }; 5965 TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5966 sqrdmulh_zzz_fns[a->esz], a, 0) 5967 5968 /* 5969 * SVE2 Integer - Predicated 5970 */ 5971 5972 static gen_helper_gvec_4 * const sadlp_fns[4] = { 5973 NULL, gen_helper_sve2_sadalp_zpzz_h, 5974 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d, 5975 }; 5976 TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 5977 sadlp_fns[a->esz], a, 0) 5978 5979 static gen_helper_gvec_4 * const uadlp_fns[4] = { 5980 NULL, gen_helper_sve2_uadalp_zpzz_h, 5981 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d, 5982 }; 5983 TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 5984 uadlp_fns[a->esz], a, 0) 5985 5986 /* 5987 * SVE2 integer unary operations (predicated) 5988 */ 5989 5990 TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz, 5991 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0) 5992 5993 TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz, 5994 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0) 5995 5996 static gen_helper_gvec_3 * const sqabs_fns[4] = { 5997 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h, 5998 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d, 5999 }; 6000 TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0) 6001 6002 static gen_helper_gvec_3 * const sqneg_fns[4] = { 6003 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h, 6004 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d, 6005 }; 6006 TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0) 6007 6008 DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl) 6009 DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl) 6010 DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl) 6011 6012 DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl) 6013 DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl) 6014 DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl) 6015 6016 DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd) 6017 DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd) 6018 DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub) 6019 6020 DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd) 6021 DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd) 6022 DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub) 6023 6024 DO_ZPZZ(ADDP, aa64_sve2, sve2_addp) 6025 DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp) 6026 DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp) 6027 DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp) 6028 DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp) 6029 6030 DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd) 6031 DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd) 6032 DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub) 6033 DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub) 6034 DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd) 6035 DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd) 6036 6037 /* 6038 * SVE2 Widening Integer Arithmetic 6039 */ 6040 6041 static gen_helper_gvec_3 * const saddl_fns[4] = { 6042 NULL, gen_helper_sve2_saddl_h, 6043 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d, 6044 }; 6045 TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6046 saddl_fns[a->esz], a, 0) 6047 TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6048 saddl_fns[a->esz], a, 3) 6049 TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 6050 saddl_fns[a->esz], a, 2) 6051 6052 static gen_helper_gvec_3 * const ssubl_fns[4] = { 6053 NULL, gen_helper_sve2_ssubl_h, 6054 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d, 6055 }; 6056 TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6057 ssubl_fns[a->esz], a, 0) 6058 TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6059 ssubl_fns[a->esz], a, 3) 6060 TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 6061 ssubl_fns[a->esz], a, 2) 6062 TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz, 6063 ssubl_fns[a->esz], a, 1) 6064 6065 static gen_helper_gvec_3 * const sabdl_fns[4] = { 6066 NULL, gen_helper_sve2_sabdl_h, 6067 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d, 6068 }; 6069 TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6070 sabdl_fns[a->esz], a, 0) 6071 TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6072 sabdl_fns[a->esz], a, 3) 6073 6074 static gen_helper_gvec_3 * const uaddl_fns[4] = { 6075 NULL, gen_helper_sve2_uaddl_h, 6076 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d, 6077 }; 6078 TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6079 uaddl_fns[a->esz], a, 0) 6080 TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6081 uaddl_fns[a->esz], a, 3) 6082 6083 static gen_helper_gvec_3 * const usubl_fns[4] = { 6084 NULL, gen_helper_sve2_usubl_h, 6085 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d, 6086 }; 6087 TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6088 usubl_fns[a->esz], a, 0) 6089 TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6090 usubl_fns[a->esz], a, 3) 6091 6092 static gen_helper_gvec_3 * const uabdl_fns[4] = { 6093 NULL, gen_helper_sve2_uabdl_h, 6094 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d, 6095 }; 6096 TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6097 uabdl_fns[a->esz], a, 0) 6098 TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6099 uabdl_fns[a->esz], a, 3) 6100 6101 static gen_helper_gvec_3 * const sqdmull_fns[4] = { 6102 NULL, gen_helper_sve2_sqdmull_zzz_h, 6103 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d, 6104 }; 6105 TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6106 sqdmull_fns[a->esz], a, 0) 6107 TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6108 sqdmull_fns[a->esz], a, 3) 6109 6110 static gen_helper_gvec_3 * const smull_fns[4] = { 6111 NULL, gen_helper_sve2_smull_zzz_h, 6112 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d, 6113 }; 6114 TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6115 smull_fns[a->esz], a, 0) 6116 TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6117 smull_fns[a->esz], a, 3) 6118 6119 static gen_helper_gvec_3 * const umull_fns[4] = { 6120 NULL, gen_helper_sve2_umull_zzz_h, 6121 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d, 6122 }; 6123 TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6124 umull_fns[a->esz], a, 0) 6125 TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6126 umull_fns[a->esz], a, 3) 6127 6128 static gen_helper_gvec_3 * const eoril_fns[4] = { 6129 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h, 6130 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d, 6131 }; 6132 TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2) 6133 TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1) 6134 6135 static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel) 6136 { 6137 static gen_helper_gvec_3 * const fns[4] = { 6138 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h, 6139 NULL, gen_helper_sve2_pmull_d, 6140 }; 6141 6142 if (a->esz == 0) { 6143 if (!dc_isar_feature(aa64_sve2_pmull128, s)) { 6144 return false; 6145 } 6146 s->is_nonstreaming = true; 6147 } else if (!dc_isar_feature(aa64_sve, s)) { 6148 return false; 6149 } 6150 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel); 6151 } 6152 6153 TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false) 6154 TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true) 6155 6156 static gen_helper_gvec_3 * const saddw_fns[4] = { 6157 NULL, gen_helper_sve2_saddw_h, 6158 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d, 6159 }; 6160 TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0) 6161 TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1) 6162 6163 static gen_helper_gvec_3 * const ssubw_fns[4] = { 6164 NULL, gen_helper_sve2_ssubw_h, 6165 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d, 6166 }; 6167 TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0) 6168 TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1) 6169 6170 static gen_helper_gvec_3 * const uaddw_fns[4] = { 6171 NULL, gen_helper_sve2_uaddw_h, 6172 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d, 6173 }; 6174 TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0) 6175 TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1) 6176 6177 static gen_helper_gvec_3 * const usubw_fns[4] = { 6178 NULL, gen_helper_sve2_usubw_h, 6179 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d, 6180 }; 6181 TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0) 6182 TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1) 6183 6184 static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6185 { 6186 int top = imm & 1; 6187 int shl = imm >> 1; 6188 int halfbits = 4 << vece; 6189 6190 if (top) { 6191 if (shl == halfbits) { 6192 TCGv_vec t = tcg_temp_new_vec_matching(d); 6193 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); 6194 tcg_gen_and_vec(vece, d, n, t); 6195 } else { 6196 tcg_gen_sari_vec(vece, d, n, halfbits); 6197 tcg_gen_shli_vec(vece, d, d, shl); 6198 } 6199 } else { 6200 tcg_gen_shli_vec(vece, d, n, halfbits); 6201 tcg_gen_sari_vec(vece, d, d, halfbits - shl); 6202 } 6203 } 6204 6205 static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm) 6206 { 6207 int halfbits = 4 << vece; 6208 int top = imm & 1; 6209 int shl = (imm >> 1); 6210 int shift; 6211 uint64_t mask; 6212 6213 mask = MAKE_64BIT_MASK(0, halfbits); 6214 mask <<= shl; 6215 mask = dup_const(vece, mask); 6216 6217 shift = shl - top * halfbits; 6218 if (shift < 0) { 6219 tcg_gen_shri_i64(d, n, -shift); 6220 } else { 6221 tcg_gen_shli_i64(d, n, shift); 6222 } 6223 tcg_gen_andi_i64(d, d, mask); 6224 } 6225 6226 static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6227 { 6228 gen_ushll_i64(MO_16, d, n, imm); 6229 } 6230 6231 static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6232 { 6233 gen_ushll_i64(MO_32, d, n, imm); 6234 } 6235 6236 static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6237 { 6238 gen_ushll_i64(MO_64, d, n, imm); 6239 } 6240 6241 static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6242 { 6243 int halfbits = 4 << vece; 6244 int top = imm & 1; 6245 int shl = imm >> 1; 6246 6247 if (top) { 6248 if (shl == halfbits) { 6249 TCGv_vec t = tcg_temp_new_vec_matching(d); 6250 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); 6251 tcg_gen_and_vec(vece, d, n, t); 6252 } else { 6253 tcg_gen_shri_vec(vece, d, n, halfbits); 6254 tcg_gen_shli_vec(vece, d, d, shl); 6255 } 6256 } else { 6257 if (shl == 0) { 6258 TCGv_vec t = tcg_temp_new_vec_matching(d); 6259 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6260 tcg_gen_and_vec(vece, d, n, t); 6261 } else { 6262 tcg_gen_shli_vec(vece, d, n, halfbits); 6263 tcg_gen_shri_vec(vece, d, d, halfbits - shl); 6264 } 6265 } 6266 } 6267 6268 static bool do_shll_tb(DisasContext *s, arg_rri_esz *a, 6269 const GVecGen2i ops[3], bool sel) 6270 { 6271 6272 if (a->esz < 0 || a->esz > 2) { 6273 return false; 6274 } 6275 if (sve_access_check(s)) { 6276 unsigned vsz = vec_full_reg_size(s); 6277 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 6278 vec_full_reg_offset(s, a->rn), 6279 vsz, vsz, (a->imm << 1) | sel, 6280 &ops[a->esz]); 6281 } 6282 return true; 6283 } 6284 6285 static const TCGOpcode sshll_list[] = { 6286 INDEX_op_shli_vec, INDEX_op_sari_vec, 0 6287 }; 6288 static const GVecGen2i sshll_ops[3] = { 6289 { .fniv = gen_sshll_vec, 6290 .opt_opc = sshll_list, 6291 .fno = gen_helper_sve2_sshll_h, 6292 .vece = MO_16 }, 6293 { .fniv = gen_sshll_vec, 6294 .opt_opc = sshll_list, 6295 .fno = gen_helper_sve2_sshll_s, 6296 .vece = MO_32 }, 6297 { .fniv = gen_sshll_vec, 6298 .opt_opc = sshll_list, 6299 .fno = gen_helper_sve2_sshll_d, 6300 .vece = MO_64 } 6301 }; 6302 TRANS_FEAT(SSHLLB, aa64_sve2, do_shll_tb, a, sshll_ops, false) 6303 TRANS_FEAT(SSHLLT, aa64_sve2, do_shll_tb, a, sshll_ops, true) 6304 6305 static const TCGOpcode ushll_list[] = { 6306 INDEX_op_shli_vec, INDEX_op_shri_vec, 0 6307 }; 6308 static const GVecGen2i ushll_ops[3] = { 6309 { .fni8 = gen_ushll16_i64, 6310 .fniv = gen_ushll_vec, 6311 .opt_opc = ushll_list, 6312 .fno = gen_helper_sve2_ushll_h, 6313 .vece = MO_16 }, 6314 { .fni8 = gen_ushll32_i64, 6315 .fniv = gen_ushll_vec, 6316 .opt_opc = ushll_list, 6317 .fno = gen_helper_sve2_ushll_s, 6318 .vece = MO_32 }, 6319 { .fni8 = gen_ushll64_i64, 6320 .fniv = gen_ushll_vec, 6321 .opt_opc = ushll_list, 6322 .fno = gen_helper_sve2_ushll_d, 6323 .vece = MO_64 }, 6324 }; 6325 TRANS_FEAT(USHLLB, aa64_sve2, do_shll_tb, a, ushll_ops, false) 6326 TRANS_FEAT(USHLLT, aa64_sve2, do_shll_tb, a, ushll_ops, true) 6327 6328 static gen_helper_gvec_3 * const bext_fns[4] = { 6329 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h, 6330 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d, 6331 }; 6332 TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6333 bext_fns[a->esz], a, 0) 6334 6335 static gen_helper_gvec_3 * const bdep_fns[4] = { 6336 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h, 6337 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d, 6338 }; 6339 TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6340 bdep_fns[a->esz], a, 0) 6341 6342 static gen_helper_gvec_3 * const bgrp_fns[4] = { 6343 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h, 6344 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d, 6345 }; 6346 TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6347 bgrp_fns[a->esz], a, 0) 6348 6349 static gen_helper_gvec_3 * const cadd_fns[4] = { 6350 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h, 6351 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d, 6352 }; 6353 TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6354 cadd_fns[a->esz], a, 0) 6355 TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6356 cadd_fns[a->esz], a, 1) 6357 6358 static gen_helper_gvec_3 * const sqcadd_fns[4] = { 6359 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h, 6360 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d, 6361 }; 6362 TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6363 sqcadd_fns[a->esz], a, 0) 6364 TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6365 sqcadd_fns[a->esz], a, 1) 6366 6367 static gen_helper_gvec_4 * const sabal_fns[4] = { 6368 NULL, gen_helper_sve2_sabal_h, 6369 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d, 6370 }; 6371 TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0) 6372 TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1) 6373 6374 static gen_helper_gvec_4 * const uabal_fns[4] = { 6375 NULL, gen_helper_sve2_uabal_h, 6376 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d, 6377 }; 6378 TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0) 6379 TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1) 6380 6381 static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel) 6382 { 6383 static gen_helper_gvec_4 * const fns[2] = { 6384 gen_helper_sve2_adcl_s, 6385 gen_helper_sve2_adcl_d, 6386 }; 6387 /* 6388 * Note that in this case the ESZ field encodes both size and sign. 6389 * Split out 'subtract' into bit 1 of the data field for the helper. 6390 */ 6391 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel); 6392 } 6393 6394 TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false) 6395 TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true) 6396 6397 TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a) 6398 TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a) 6399 TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a) 6400 TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a) 6401 TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a) 6402 TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a) 6403 6404 TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a) 6405 TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a) 6406 6407 static bool do_narrow_extract(DisasContext *s, arg_rri_esz *a, 6408 const GVecGen2 ops[3]) 6409 { 6410 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0) { 6411 return false; 6412 } 6413 if (sve_access_check(s)) { 6414 unsigned vsz = vec_full_reg_size(s); 6415 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd), 6416 vec_full_reg_offset(s, a->rn), 6417 vsz, vsz, &ops[a->esz]); 6418 } 6419 return true; 6420 } 6421 6422 static const TCGOpcode sqxtn_list[] = { 6423 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 6424 }; 6425 6426 static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6427 { 6428 TCGv_vec t = tcg_temp_new_vec_matching(d); 6429 int halfbits = 4 << vece; 6430 int64_t mask = (1ull << halfbits) - 1; 6431 int64_t min = -1ull << (halfbits - 1); 6432 int64_t max = -min - 1; 6433 6434 tcg_gen_dupi_vec(vece, t, min); 6435 tcg_gen_smax_vec(vece, d, n, t); 6436 tcg_gen_dupi_vec(vece, t, max); 6437 tcg_gen_smin_vec(vece, d, d, t); 6438 tcg_gen_dupi_vec(vece, t, mask); 6439 tcg_gen_and_vec(vece, d, d, t); 6440 } 6441 6442 static const GVecGen2 sqxtnb_ops[3] = { 6443 { .fniv = gen_sqxtnb_vec, 6444 .opt_opc = sqxtn_list, 6445 .fno = gen_helper_sve2_sqxtnb_h, 6446 .vece = MO_16 }, 6447 { .fniv = gen_sqxtnb_vec, 6448 .opt_opc = sqxtn_list, 6449 .fno = gen_helper_sve2_sqxtnb_s, 6450 .vece = MO_32 }, 6451 { .fniv = gen_sqxtnb_vec, 6452 .opt_opc = sqxtn_list, 6453 .fno = gen_helper_sve2_sqxtnb_d, 6454 .vece = MO_64 }, 6455 }; 6456 TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops) 6457 6458 static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6459 { 6460 TCGv_vec t = tcg_temp_new_vec_matching(d); 6461 int halfbits = 4 << vece; 6462 int64_t mask = (1ull << halfbits) - 1; 6463 int64_t min = -1ull << (halfbits - 1); 6464 int64_t max = -min - 1; 6465 6466 tcg_gen_dupi_vec(vece, t, min); 6467 tcg_gen_smax_vec(vece, n, n, t); 6468 tcg_gen_dupi_vec(vece, t, max); 6469 tcg_gen_smin_vec(vece, n, n, t); 6470 tcg_gen_shli_vec(vece, n, n, halfbits); 6471 tcg_gen_dupi_vec(vece, t, mask); 6472 tcg_gen_bitsel_vec(vece, d, t, d, n); 6473 } 6474 6475 static const GVecGen2 sqxtnt_ops[3] = { 6476 { .fniv = gen_sqxtnt_vec, 6477 .opt_opc = sqxtn_list, 6478 .load_dest = true, 6479 .fno = gen_helper_sve2_sqxtnt_h, 6480 .vece = MO_16 }, 6481 { .fniv = gen_sqxtnt_vec, 6482 .opt_opc = sqxtn_list, 6483 .load_dest = true, 6484 .fno = gen_helper_sve2_sqxtnt_s, 6485 .vece = MO_32 }, 6486 { .fniv = gen_sqxtnt_vec, 6487 .opt_opc = sqxtn_list, 6488 .load_dest = true, 6489 .fno = gen_helper_sve2_sqxtnt_d, 6490 .vece = MO_64 }, 6491 }; 6492 TRANS_FEAT(SQXTNT, aa64_sve2, do_narrow_extract, a, sqxtnt_ops) 6493 6494 static const TCGOpcode uqxtn_list[] = { 6495 INDEX_op_shli_vec, INDEX_op_umin_vec, 0 6496 }; 6497 6498 static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6499 { 6500 TCGv_vec t = tcg_temp_new_vec_matching(d); 6501 int halfbits = 4 << vece; 6502 int64_t max = (1ull << halfbits) - 1; 6503 6504 tcg_gen_dupi_vec(vece, t, max); 6505 tcg_gen_umin_vec(vece, d, n, t); 6506 } 6507 6508 static const GVecGen2 uqxtnb_ops[3] = { 6509 { .fniv = gen_uqxtnb_vec, 6510 .opt_opc = uqxtn_list, 6511 .fno = gen_helper_sve2_uqxtnb_h, 6512 .vece = MO_16 }, 6513 { .fniv = gen_uqxtnb_vec, 6514 .opt_opc = uqxtn_list, 6515 .fno = gen_helper_sve2_uqxtnb_s, 6516 .vece = MO_32 }, 6517 { .fniv = gen_uqxtnb_vec, 6518 .opt_opc = uqxtn_list, 6519 .fno = gen_helper_sve2_uqxtnb_d, 6520 .vece = MO_64 }, 6521 }; 6522 TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops) 6523 6524 static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6525 { 6526 TCGv_vec t = tcg_temp_new_vec_matching(d); 6527 int halfbits = 4 << vece; 6528 int64_t max = (1ull << halfbits) - 1; 6529 6530 tcg_gen_dupi_vec(vece, t, max); 6531 tcg_gen_umin_vec(vece, n, n, t); 6532 tcg_gen_shli_vec(vece, n, n, halfbits); 6533 tcg_gen_bitsel_vec(vece, d, t, d, n); 6534 } 6535 6536 static const GVecGen2 uqxtnt_ops[3] = { 6537 { .fniv = gen_uqxtnt_vec, 6538 .opt_opc = uqxtn_list, 6539 .load_dest = true, 6540 .fno = gen_helper_sve2_uqxtnt_h, 6541 .vece = MO_16 }, 6542 { .fniv = gen_uqxtnt_vec, 6543 .opt_opc = uqxtn_list, 6544 .load_dest = true, 6545 .fno = gen_helper_sve2_uqxtnt_s, 6546 .vece = MO_32 }, 6547 { .fniv = gen_uqxtnt_vec, 6548 .opt_opc = uqxtn_list, 6549 .load_dest = true, 6550 .fno = gen_helper_sve2_uqxtnt_d, 6551 .vece = MO_64 }, 6552 }; 6553 TRANS_FEAT(UQXTNT, aa64_sve2, do_narrow_extract, a, uqxtnt_ops) 6554 6555 static const TCGOpcode sqxtun_list[] = { 6556 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0 6557 }; 6558 6559 static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6560 { 6561 TCGv_vec t = tcg_temp_new_vec_matching(d); 6562 int halfbits = 4 << vece; 6563 int64_t max = (1ull << halfbits) - 1; 6564 6565 tcg_gen_dupi_vec(vece, t, 0); 6566 tcg_gen_smax_vec(vece, d, n, t); 6567 tcg_gen_dupi_vec(vece, t, max); 6568 tcg_gen_umin_vec(vece, d, d, t); 6569 } 6570 6571 static const GVecGen2 sqxtunb_ops[3] = { 6572 { .fniv = gen_sqxtunb_vec, 6573 .opt_opc = sqxtun_list, 6574 .fno = gen_helper_sve2_sqxtunb_h, 6575 .vece = MO_16 }, 6576 { .fniv = gen_sqxtunb_vec, 6577 .opt_opc = sqxtun_list, 6578 .fno = gen_helper_sve2_sqxtunb_s, 6579 .vece = MO_32 }, 6580 { .fniv = gen_sqxtunb_vec, 6581 .opt_opc = sqxtun_list, 6582 .fno = gen_helper_sve2_sqxtunb_d, 6583 .vece = MO_64 }, 6584 }; 6585 TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops) 6586 6587 static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6588 { 6589 TCGv_vec t = tcg_temp_new_vec_matching(d); 6590 int halfbits = 4 << vece; 6591 int64_t max = (1ull << halfbits) - 1; 6592 6593 tcg_gen_dupi_vec(vece, t, 0); 6594 tcg_gen_smax_vec(vece, n, n, t); 6595 tcg_gen_dupi_vec(vece, t, max); 6596 tcg_gen_umin_vec(vece, n, n, t); 6597 tcg_gen_shli_vec(vece, n, n, halfbits); 6598 tcg_gen_bitsel_vec(vece, d, t, d, n); 6599 } 6600 6601 static const GVecGen2 sqxtunt_ops[3] = { 6602 { .fniv = gen_sqxtunt_vec, 6603 .opt_opc = sqxtun_list, 6604 .load_dest = true, 6605 .fno = gen_helper_sve2_sqxtunt_h, 6606 .vece = MO_16 }, 6607 { .fniv = gen_sqxtunt_vec, 6608 .opt_opc = sqxtun_list, 6609 .load_dest = true, 6610 .fno = gen_helper_sve2_sqxtunt_s, 6611 .vece = MO_32 }, 6612 { .fniv = gen_sqxtunt_vec, 6613 .opt_opc = sqxtun_list, 6614 .load_dest = true, 6615 .fno = gen_helper_sve2_sqxtunt_d, 6616 .vece = MO_64 }, 6617 }; 6618 TRANS_FEAT(SQXTUNT, aa64_sve2, do_narrow_extract, a, sqxtunt_ops) 6619 6620 static bool do_shr_narrow(DisasContext *s, arg_rri_esz *a, 6621 const GVecGen2i ops[3]) 6622 { 6623 if (a->esz < 0 || a->esz > MO_32) { 6624 return false; 6625 } 6626 assert(a->imm > 0 && a->imm <= (8 << a->esz)); 6627 if (sve_access_check(s)) { 6628 unsigned vsz = vec_full_reg_size(s); 6629 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 6630 vec_full_reg_offset(s, a->rn), 6631 vsz, vsz, a->imm, &ops[a->esz]); 6632 } 6633 return true; 6634 } 6635 6636 static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 6637 { 6638 int halfbits = 4 << vece; 6639 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 6640 6641 tcg_gen_shri_i64(d, n, shr); 6642 tcg_gen_andi_i64(d, d, mask); 6643 } 6644 6645 static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6646 { 6647 gen_shrnb_i64(MO_16, d, n, shr); 6648 } 6649 6650 static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6651 { 6652 gen_shrnb_i64(MO_32, d, n, shr); 6653 } 6654 6655 static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6656 { 6657 gen_shrnb_i64(MO_64, d, n, shr); 6658 } 6659 6660 static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 6661 { 6662 TCGv_vec t = tcg_temp_new_vec_matching(d); 6663 int halfbits = 4 << vece; 6664 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 6665 6666 tcg_gen_shri_vec(vece, n, n, shr); 6667 tcg_gen_dupi_vec(vece, t, mask); 6668 tcg_gen_and_vec(vece, d, n, t); 6669 } 6670 6671 static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 }; 6672 static const GVecGen2i shrnb_ops[3] = { 6673 { .fni8 = gen_shrnb16_i64, 6674 .fniv = gen_shrnb_vec, 6675 .opt_opc = shrnb_vec_list, 6676 .fno = gen_helper_sve2_shrnb_h, 6677 .vece = MO_16 }, 6678 { .fni8 = gen_shrnb32_i64, 6679 .fniv = gen_shrnb_vec, 6680 .opt_opc = shrnb_vec_list, 6681 .fno = gen_helper_sve2_shrnb_s, 6682 .vece = MO_32 }, 6683 { .fni8 = gen_shrnb64_i64, 6684 .fniv = gen_shrnb_vec, 6685 .opt_opc = shrnb_vec_list, 6686 .fno = gen_helper_sve2_shrnb_d, 6687 .vece = MO_64 }, 6688 }; 6689 TRANS_FEAT(SHRNB, aa64_sve2, do_shr_narrow, a, shrnb_ops) 6690 6691 static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 6692 { 6693 int halfbits = 4 << vece; 6694 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 6695 6696 tcg_gen_shli_i64(n, n, halfbits - shr); 6697 tcg_gen_andi_i64(n, n, ~mask); 6698 tcg_gen_andi_i64(d, d, mask); 6699 tcg_gen_or_i64(d, d, n); 6700 } 6701 6702 static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6703 { 6704 gen_shrnt_i64(MO_16, d, n, shr); 6705 } 6706 6707 static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6708 { 6709 gen_shrnt_i64(MO_32, d, n, shr); 6710 } 6711 6712 static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6713 { 6714 tcg_gen_shri_i64(n, n, shr); 6715 tcg_gen_deposit_i64(d, d, n, 32, 32); 6716 } 6717 6718 static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 6719 { 6720 TCGv_vec t = tcg_temp_new_vec_matching(d); 6721 int halfbits = 4 << vece; 6722 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 6723 6724 tcg_gen_shli_vec(vece, n, n, halfbits - shr); 6725 tcg_gen_dupi_vec(vece, t, mask); 6726 tcg_gen_bitsel_vec(vece, d, t, d, n); 6727 } 6728 6729 static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 }; 6730 static const GVecGen2i shrnt_ops[3] = { 6731 { .fni8 = gen_shrnt16_i64, 6732 .fniv = gen_shrnt_vec, 6733 .opt_opc = shrnt_vec_list, 6734 .load_dest = true, 6735 .fno = gen_helper_sve2_shrnt_h, 6736 .vece = MO_16 }, 6737 { .fni8 = gen_shrnt32_i64, 6738 .fniv = gen_shrnt_vec, 6739 .opt_opc = shrnt_vec_list, 6740 .load_dest = true, 6741 .fno = gen_helper_sve2_shrnt_s, 6742 .vece = MO_32 }, 6743 { .fni8 = gen_shrnt64_i64, 6744 .fniv = gen_shrnt_vec, 6745 .opt_opc = shrnt_vec_list, 6746 .load_dest = true, 6747 .fno = gen_helper_sve2_shrnt_d, 6748 .vece = MO_64 }, 6749 }; 6750 TRANS_FEAT(SHRNT, aa64_sve2, do_shr_narrow, a, shrnt_ops) 6751 6752 static const GVecGen2i rshrnb_ops[3] = { 6753 { .fno = gen_helper_sve2_rshrnb_h }, 6754 { .fno = gen_helper_sve2_rshrnb_s }, 6755 { .fno = gen_helper_sve2_rshrnb_d }, 6756 }; 6757 TRANS_FEAT(RSHRNB, aa64_sve2, do_shr_narrow, a, rshrnb_ops) 6758 6759 static const GVecGen2i rshrnt_ops[3] = { 6760 { .fno = gen_helper_sve2_rshrnt_h }, 6761 { .fno = gen_helper_sve2_rshrnt_s }, 6762 { .fno = gen_helper_sve2_rshrnt_d }, 6763 }; 6764 TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops) 6765 6766 static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d, 6767 TCGv_vec n, int64_t shr) 6768 { 6769 TCGv_vec t = tcg_temp_new_vec_matching(d); 6770 int halfbits = 4 << vece; 6771 6772 tcg_gen_sari_vec(vece, n, n, shr); 6773 tcg_gen_dupi_vec(vece, t, 0); 6774 tcg_gen_smax_vec(vece, n, n, t); 6775 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6776 tcg_gen_umin_vec(vece, d, n, t); 6777 } 6778 6779 static const TCGOpcode sqshrunb_vec_list[] = { 6780 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0 6781 }; 6782 static const GVecGen2i sqshrunb_ops[3] = { 6783 { .fniv = gen_sqshrunb_vec, 6784 .opt_opc = sqshrunb_vec_list, 6785 .fno = gen_helper_sve2_sqshrunb_h, 6786 .vece = MO_16 }, 6787 { .fniv = gen_sqshrunb_vec, 6788 .opt_opc = sqshrunb_vec_list, 6789 .fno = gen_helper_sve2_sqshrunb_s, 6790 .vece = MO_32 }, 6791 { .fniv = gen_sqshrunb_vec, 6792 .opt_opc = sqshrunb_vec_list, 6793 .fno = gen_helper_sve2_sqshrunb_d, 6794 .vece = MO_64 }, 6795 }; 6796 TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops) 6797 6798 static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d, 6799 TCGv_vec n, int64_t shr) 6800 { 6801 TCGv_vec t = tcg_temp_new_vec_matching(d); 6802 int halfbits = 4 << vece; 6803 6804 tcg_gen_sari_vec(vece, n, n, shr); 6805 tcg_gen_dupi_vec(vece, t, 0); 6806 tcg_gen_smax_vec(vece, n, n, t); 6807 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6808 tcg_gen_umin_vec(vece, n, n, t); 6809 tcg_gen_shli_vec(vece, n, n, halfbits); 6810 tcg_gen_bitsel_vec(vece, d, t, d, n); 6811 } 6812 6813 static const TCGOpcode sqshrunt_vec_list[] = { 6814 INDEX_op_shli_vec, INDEX_op_sari_vec, 6815 INDEX_op_smax_vec, INDEX_op_umin_vec, 0 6816 }; 6817 static const GVecGen2i sqshrunt_ops[3] = { 6818 { .fniv = gen_sqshrunt_vec, 6819 .opt_opc = sqshrunt_vec_list, 6820 .load_dest = true, 6821 .fno = gen_helper_sve2_sqshrunt_h, 6822 .vece = MO_16 }, 6823 { .fniv = gen_sqshrunt_vec, 6824 .opt_opc = sqshrunt_vec_list, 6825 .load_dest = true, 6826 .fno = gen_helper_sve2_sqshrunt_s, 6827 .vece = MO_32 }, 6828 { .fniv = gen_sqshrunt_vec, 6829 .opt_opc = sqshrunt_vec_list, 6830 .load_dest = true, 6831 .fno = gen_helper_sve2_sqshrunt_d, 6832 .vece = MO_64 }, 6833 }; 6834 TRANS_FEAT(SQSHRUNT, aa64_sve2, do_shr_narrow, a, sqshrunt_ops) 6835 6836 static const GVecGen2i sqrshrunb_ops[3] = { 6837 { .fno = gen_helper_sve2_sqrshrunb_h }, 6838 { .fno = gen_helper_sve2_sqrshrunb_s }, 6839 { .fno = gen_helper_sve2_sqrshrunb_d }, 6840 }; 6841 TRANS_FEAT(SQRSHRUNB, aa64_sve2, do_shr_narrow, a, sqrshrunb_ops) 6842 6843 static const GVecGen2i sqrshrunt_ops[3] = { 6844 { .fno = gen_helper_sve2_sqrshrunt_h }, 6845 { .fno = gen_helper_sve2_sqrshrunt_s }, 6846 { .fno = gen_helper_sve2_sqrshrunt_d }, 6847 }; 6848 TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops) 6849 6850 static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d, 6851 TCGv_vec n, int64_t shr) 6852 { 6853 TCGv_vec t = tcg_temp_new_vec_matching(d); 6854 int halfbits = 4 << vece; 6855 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 6856 int64_t min = -max - 1; 6857 6858 tcg_gen_sari_vec(vece, n, n, shr); 6859 tcg_gen_dupi_vec(vece, t, min); 6860 tcg_gen_smax_vec(vece, n, n, t); 6861 tcg_gen_dupi_vec(vece, t, max); 6862 tcg_gen_smin_vec(vece, n, n, t); 6863 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6864 tcg_gen_and_vec(vece, d, n, t); 6865 } 6866 6867 static const TCGOpcode sqshrnb_vec_list[] = { 6868 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0 6869 }; 6870 static const GVecGen2i sqshrnb_ops[3] = { 6871 { .fniv = gen_sqshrnb_vec, 6872 .opt_opc = sqshrnb_vec_list, 6873 .fno = gen_helper_sve2_sqshrnb_h, 6874 .vece = MO_16 }, 6875 { .fniv = gen_sqshrnb_vec, 6876 .opt_opc = sqshrnb_vec_list, 6877 .fno = gen_helper_sve2_sqshrnb_s, 6878 .vece = MO_32 }, 6879 { .fniv = gen_sqshrnb_vec, 6880 .opt_opc = sqshrnb_vec_list, 6881 .fno = gen_helper_sve2_sqshrnb_d, 6882 .vece = MO_64 }, 6883 }; 6884 TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops) 6885 6886 static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d, 6887 TCGv_vec n, int64_t shr) 6888 { 6889 TCGv_vec t = tcg_temp_new_vec_matching(d); 6890 int halfbits = 4 << vece; 6891 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 6892 int64_t min = -max - 1; 6893 6894 tcg_gen_sari_vec(vece, n, n, shr); 6895 tcg_gen_dupi_vec(vece, t, min); 6896 tcg_gen_smax_vec(vece, n, n, t); 6897 tcg_gen_dupi_vec(vece, t, max); 6898 tcg_gen_smin_vec(vece, n, n, t); 6899 tcg_gen_shli_vec(vece, n, n, halfbits); 6900 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6901 tcg_gen_bitsel_vec(vece, d, t, d, n); 6902 } 6903 6904 static const TCGOpcode sqshrnt_vec_list[] = { 6905 INDEX_op_shli_vec, INDEX_op_sari_vec, 6906 INDEX_op_smax_vec, INDEX_op_smin_vec, 0 6907 }; 6908 static const GVecGen2i sqshrnt_ops[3] = { 6909 { .fniv = gen_sqshrnt_vec, 6910 .opt_opc = sqshrnt_vec_list, 6911 .load_dest = true, 6912 .fno = gen_helper_sve2_sqshrnt_h, 6913 .vece = MO_16 }, 6914 { .fniv = gen_sqshrnt_vec, 6915 .opt_opc = sqshrnt_vec_list, 6916 .load_dest = true, 6917 .fno = gen_helper_sve2_sqshrnt_s, 6918 .vece = MO_32 }, 6919 { .fniv = gen_sqshrnt_vec, 6920 .opt_opc = sqshrnt_vec_list, 6921 .load_dest = true, 6922 .fno = gen_helper_sve2_sqshrnt_d, 6923 .vece = MO_64 }, 6924 }; 6925 TRANS_FEAT(SQSHRNT, aa64_sve2, do_shr_narrow, a, sqshrnt_ops) 6926 6927 static const GVecGen2i sqrshrnb_ops[3] = { 6928 { .fno = gen_helper_sve2_sqrshrnb_h }, 6929 { .fno = gen_helper_sve2_sqrshrnb_s }, 6930 { .fno = gen_helper_sve2_sqrshrnb_d }, 6931 }; 6932 TRANS_FEAT(SQRSHRNB, aa64_sve2, do_shr_narrow, a, sqrshrnb_ops) 6933 6934 static const GVecGen2i sqrshrnt_ops[3] = { 6935 { .fno = gen_helper_sve2_sqrshrnt_h }, 6936 { .fno = gen_helper_sve2_sqrshrnt_s }, 6937 { .fno = gen_helper_sve2_sqrshrnt_d }, 6938 }; 6939 TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops) 6940 6941 static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d, 6942 TCGv_vec n, int64_t shr) 6943 { 6944 TCGv_vec t = tcg_temp_new_vec_matching(d); 6945 int halfbits = 4 << vece; 6946 6947 tcg_gen_shri_vec(vece, n, n, shr); 6948 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6949 tcg_gen_umin_vec(vece, d, n, t); 6950 } 6951 6952 static const TCGOpcode uqshrnb_vec_list[] = { 6953 INDEX_op_shri_vec, INDEX_op_umin_vec, 0 6954 }; 6955 static const GVecGen2i uqshrnb_ops[3] = { 6956 { .fniv = gen_uqshrnb_vec, 6957 .opt_opc = uqshrnb_vec_list, 6958 .fno = gen_helper_sve2_uqshrnb_h, 6959 .vece = MO_16 }, 6960 { .fniv = gen_uqshrnb_vec, 6961 .opt_opc = uqshrnb_vec_list, 6962 .fno = gen_helper_sve2_uqshrnb_s, 6963 .vece = MO_32 }, 6964 { .fniv = gen_uqshrnb_vec, 6965 .opt_opc = uqshrnb_vec_list, 6966 .fno = gen_helper_sve2_uqshrnb_d, 6967 .vece = MO_64 }, 6968 }; 6969 TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops) 6970 6971 static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d, 6972 TCGv_vec n, int64_t shr) 6973 { 6974 TCGv_vec t = tcg_temp_new_vec_matching(d); 6975 int halfbits = 4 << vece; 6976 6977 tcg_gen_shri_vec(vece, n, n, shr); 6978 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6979 tcg_gen_umin_vec(vece, n, n, t); 6980 tcg_gen_shli_vec(vece, n, n, halfbits); 6981 tcg_gen_bitsel_vec(vece, d, t, d, n); 6982 } 6983 6984 static const TCGOpcode uqshrnt_vec_list[] = { 6985 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0 6986 }; 6987 static const GVecGen2i uqshrnt_ops[3] = { 6988 { .fniv = gen_uqshrnt_vec, 6989 .opt_opc = uqshrnt_vec_list, 6990 .load_dest = true, 6991 .fno = gen_helper_sve2_uqshrnt_h, 6992 .vece = MO_16 }, 6993 { .fniv = gen_uqshrnt_vec, 6994 .opt_opc = uqshrnt_vec_list, 6995 .load_dest = true, 6996 .fno = gen_helper_sve2_uqshrnt_s, 6997 .vece = MO_32 }, 6998 { .fniv = gen_uqshrnt_vec, 6999 .opt_opc = uqshrnt_vec_list, 7000 .load_dest = true, 7001 .fno = gen_helper_sve2_uqshrnt_d, 7002 .vece = MO_64 }, 7003 }; 7004 TRANS_FEAT(UQSHRNT, aa64_sve2, do_shr_narrow, a, uqshrnt_ops) 7005 7006 static const GVecGen2i uqrshrnb_ops[3] = { 7007 { .fno = gen_helper_sve2_uqrshrnb_h }, 7008 { .fno = gen_helper_sve2_uqrshrnb_s }, 7009 { .fno = gen_helper_sve2_uqrshrnb_d }, 7010 }; 7011 TRANS_FEAT(UQRSHRNB, aa64_sve2, do_shr_narrow, a, uqrshrnb_ops) 7012 7013 static const GVecGen2i uqrshrnt_ops[3] = { 7014 { .fno = gen_helper_sve2_uqrshrnt_h }, 7015 { .fno = gen_helper_sve2_uqrshrnt_s }, 7016 { .fno = gen_helper_sve2_uqrshrnt_d }, 7017 }; 7018 TRANS_FEAT(UQRSHRNT, aa64_sve2, do_shr_narrow, a, uqrshrnt_ops) 7019 7020 #define DO_SVE2_ZZZ_NARROW(NAME, name) \ 7021 static gen_helper_gvec_3 * const name##_fns[4] = { \ 7022 NULL, gen_helper_sve2_##name##_h, \ 7023 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \ 7024 }; \ 7025 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \ 7026 name##_fns[a->esz], a, 0) 7027 7028 DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb) 7029 DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt) 7030 DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb) 7031 DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt) 7032 7033 DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb) 7034 DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt) 7035 DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb) 7036 DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt) 7037 7038 static gen_helper_gvec_flags_4 * const match_fns[4] = { 7039 gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL 7040 }; 7041 TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz]) 7042 7043 static gen_helper_gvec_flags_4 * const nmatch_fns[4] = { 7044 gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL 7045 }; 7046 TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz]) 7047 7048 static gen_helper_gvec_4 * const histcnt_fns[4] = { 7049 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d 7050 }; 7051 TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz, 7052 histcnt_fns[a->esz], a, 0) 7053 7054 TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz, 7055 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0) 7056 7057 DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz) 7058 DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz) 7059 DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz) 7060 DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz) 7061 DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz) 7062 7063 /* 7064 * SVE Integer Multiply-Add (unpredicated) 7065 */ 7066 7067 TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, 7068 gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra, 7069 0, FPST_FPCR) 7070 TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, 7071 gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra, 7072 0, FPST_FPCR) 7073 7074 static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = { 7075 NULL, gen_helper_sve2_sqdmlal_zzzw_h, 7076 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d, 7077 }; 7078 TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7079 sqdmlal_zzzw_fns[a->esz], a, 0) 7080 TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7081 sqdmlal_zzzw_fns[a->esz], a, 3) 7082 TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 7083 sqdmlal_zzzw_fns[a->esz], a, 2) 7084 7085 static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = { 7086 NULL, gen_helper_sve2_sqdmlsl_zzzw_h, 7087 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d, 7088 }; 7089 TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7090 sqdmlsl_zzzw_fns[a->esz], a, 0) 7091 TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7092 sqdmlsl_zzzw_fns[a->esz], a, 3) 7093 TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 7094 sqdmlsl_zzzw_fns[a->esz], a, 2) 7095 7096 static gen_helper_gvec_4 * const sqrdmlah_fns[] = { 7097 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h, 7098 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d, 7099 }; 7100 TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 7101 sqrdmlah_fns[a->esz], a, 0) 7102 7103 static gen_helper_gvec_4 * const sqrdmlsh_fns[] = { 7104 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h, 7105 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d, 7106 }; 7107 TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 7108 sqrdmlsh_fns[a->esz], a, 0) 7109 7110 static gen_helper_gvec_4 * const smlal_zzzw_fns[] = { 7111 NULL, gen_helper_sve2_smlal_zzzw_h, 7112 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d, 7113 }; 7114 TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7115 smlal_zzzw_fns[a->esz], a, 0) 7116 TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7117 smlal_zzzw_fns[a->esz], a, 1) 7118 7119 static gen_helper_gvec_4 * const umlal_zzzw_fns[] = { 7120 NULL, gen_helper_sve2_umlal_zzzw_h, 7121 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d, 7122 }; 7123 TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7124 umlal_zzzw_fns[a->esz], a, 0) 7125 TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7126 umlal_zzzw_fns[a->esz], a, 1) 7127 7128 static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = { 7129 NULL, gen_helper_sve2_smlsl_zzzw_h, 7130 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d, 7131 }; 7132 TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7133 smlsl_zzzw_fns[a->esz], a, 0) 7134 TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7135 smlsl_zzzw_fns[a->esz], a, 1) 7136 7137 static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = { 7138 NULL, gen_helper_sve2_umlsl_zzzw_h, 7139 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d, 7140 }; 7141 TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7142 umlsl_zzzw_fns[a->esz], a, 0) 7143 TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7144 umlsl_zzzw_fns[a->esz], a, 1) 7145 7146 static gen_helper_gvec_4 * const cmla_fns[] = { 7147 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h, 7148 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d, 7149 }; 7150 TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7151 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7152 7153 static gen_helper_gvec_4 * const cdot_fns[] = { 7154 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d 7155 }; 7156 TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7157 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7158 7159 static gen_helper_gvec_4 * const sqrdcmlah_fns[] = { 7160 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h, 7161 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d, 7162 }; 7163 TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7164 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7165 7166 TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7167 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0) 7168 7169 TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz, 7170 gen_helper_crypto_aesmc, a->rd, a->rd, 0) 7171 TRANS_FEAT_NONSTREAMING(AESIMC, aa64_sve2_aes, gen_gvec_ool_zz, 7172 gen_helper_crypto_aesimc, a->rd, a->rd, 0) 7173 7174 TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7175 gen_helper_crypto_aese, a, 0) 7176 TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7177 gen_helper_crypto_aesd, a, 0) 7178 7179 TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7180 gen_helper_crypto_sm4e, a, 0) 7181 TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7182 gen_helper_crypto_sm4ekey, a, 0) 7183 7184 TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, 7185 gen_gvec_rax1, a) 7186 7187 TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz, 7188 gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR) 7189 TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, 7190 gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR) 7191 7192 TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 7193 gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR) 7194 7195 TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, 7196 gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR) 7197 TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz, 7198 gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR) 7199 7200 TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a, 7201 FPROUNDING_ODD, gen_helper_sve_fcvt_ds) 7202 TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a, 7203 FPROUNDING_ODD, gen_helper_sve2_fcvtnt_ds) 7204 7205 static gen_helper_gvec_3_ptr * const flogb_fns[] = { 7206 NULL, gen_helper_flogb_h, 7207 gen_helper_flogb_s, gen_helper_flogb_d 7208 }; 7209 TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz], 7210 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 7211 7212 static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel) 7213 { 7214 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s, 7215 a->rd, a->rn, a->rm, a->ra, 7216 (sel << 1) | sub, tcg_env); 7217 } 7218 7219 TRANS_FEAT(FMLALB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, false) 7220 TRANS_FEAT(FMLALT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, true) 7221 TRANS_FEAT(FMLSLB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, false) 7222 TRANS_FEAT(FMLSLT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, true) 7223 7224 static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel) 7225 { 7226 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s, 7227 a->rd, a->rn, a->rm, a->ra, 7228 (a->index << 2) | (sel << 1) | sub, tcg_env); 7229 } 7230 7231 TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false) 7232 TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true) 7233 TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false) 7234 TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true) 7235 7236 TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7237 gen_helper_gvec_smmla_b, a, 0) 7238 TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7239 gen_helper_gvec_usmmla_b, a, 0) 7240 TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7241 gen_helper_gvec_ummla_b, a, 0) 7242 7243 TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, 7244 gen_helper_gvec_bfdot, a, 0) 7245 TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz, 7246 gen_helper_gvec_bfdot_idx, a) 7247 7248 TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, 7249 gen_helper_gvec_bfmmla, a, 0) 7250 7251 static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) 7252 { 7253 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, 7254 a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR); 7255 } 7256 7257 TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) 7258 TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true) 7259 7260 static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) 7261 { 7262 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, 7263 a->rd, a->rn, a->rm, a->ra, 7264 (a->index << 1) | sel, FPST_FPCR); 7265 } 7266 7267 TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) 7268 TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true) 7269 7270 static bool trans_PSEL(DisasContext *s, arg_psel *a) 7271 { 7272 int vl = vec_full_reg_size(s); 7273 int pl = pred_gvec_reg_size(s); 7274 int elements = vl >> a->esz; 7275 TCGv_i64 tmp, didx, dbit; 7276 TCGv_ptr ptr; 7277 7278 if (!dc_isar_feature(aa64_sme, s)) { 7279 return false; 7280 } 7281 if (!sve_access_check(s)) { 7282 return true; 7283 } 7284 7285 tmp = tcg_temp_new_i64(); 7286 dbit = tcg_temp_new_i64(); 7287 didx = tcg_temp_new_i64(); 7288 ptr = tcg_temp_new_ptr(); 7289 7290 /* Compute the predicate element. */ 7291 tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm); 7292 if (is_power_of_2(elements)) { 7293 tcg_gen_andi_i64(tmp, tmp, elements - 1); 7294 } else { 7295 tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements)); 7296 } 7297 7298 /* Extract the predicate byte and bit indices. */ 7299 tcg_gen_shli_i64(tmp, tmp, a->esz); 7300 tcg_gen_andi_i64(dbit, tmp, 7); 7301 tcg_gen_shri_i64(didx, tmp, 3); 7302 if (HOST_BIG_ENDIAN) { 7303 tcg_gen_xori_i64(didx, didx, 7); 7304 } 7305 7306 /* Load the predicate word. */ 7307 tcg_gen_trunc_i64_ptr(ptr, didx); 7308 tcg_gen_add_ptr(ptr, ptr, tcg_env); 7309 tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm)); 7310 7311 /* Extract the predicate bit and replicate to MO_64. */ 7312 tcg_gen_shr_i64(tmp, tmp, dbit); 7313 tcg_gen_andi_i64(tmp, tmp, 1); 7314 tcg_gen_neg_i64(tmp, tmp); 7315 7316 /* Apply to either copy the source, or write zeros. */ 7317 tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd), 7318 pred_full_reg_offset(s, a->pn), tmp, pl, pl); 7319 return true; 7320 } 7321 7322 static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7323 { 7324 tcg_gen_smax_i32(d, a, n); 7325 tcg_gen_smin_i32(d, d, m); 7326 } 7327 7328 static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7329 { 7330 tcg_gen_smax_i64(d, a, n); 7331 tcg_gen_smin_i64(d, d, m); 7332 } 7333 7334 static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7335 TCGv_vec m, TCGv_vec a) 7336 { 7337 tcg_gen_smax_vec(vece, d, a, n); 7338 tcg_gen_smin_vec(vece, d, d, m); 7339 } 7340 7341 static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 7342 uint32_t a, uint32_t oprsz, uint32_t maxsz) 7343 { 7344 static const TCGOpcode vecop[] = { 7345 INDEX_op_smin_vec, INDEX_op_smax_vec, 0 7346 }; 7347 static const GVecGen4 ops[4] = { 7348 { .fniv = gen_sclamp_vec, 7349 .fno = gen_helper_gvec_sclamp_b, 7350 .opt_opc = vecop, 7351 .vece = MO_8 }, 7352 { .fniv = gen_sclamp_vec, 7353 .fno = gen_helper_gvec_sclamp_h, 7354 .opt_opc = vecop, 7355 .vece = MO_16 }, 7356 { .fni4 = gen_sclamp_i32, 7357 .fniv = gen_sclamp_vec, 7358 .fno = gen_helper_gvec_sclamp_s, 7359 .opt_opc = vecop, 7360 .vece = MO_32 }, 7361 { .fni8 = gen_sclamp_i64, 7362 .fniv = gen_sclamp_vec, 7363 .fno = gen_helper_gvec_sclamp_d, 7364 .opt_opc = vecop, 7365 .vece = MO_64, 7366 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 7367 }; 7368 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 7369 } 7370 7371 TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a) 7372 7373 static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7374 { 7375 tcg_gen_umax_i32(d, a, n); 7376 tcg_gen_umin_i32(d, d, m); 7377 } 7378 7379 static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7380 { 7381 tcg_gen_umax_i64(d, a, n); 7382 tcg_gen_umin_i64(d, d, m); 7383 } 7384 7385 static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7386 TCGv_vec m, TCGv_vec a) 7387 { 7388 tcg_gen_umax_vec(vece, d, a, n); 7389 tcg_gen_umin_vec(vece, d, d, m); 7390 } 7391 7392 static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 7393 uint32_t a, uint32_t oprsz, uint32_t maxsz) 7394 { 7395 static const TCGOpcode vecop[] = { 7396 INDEX_op_umin_vec, INDEX_op_umax_vec, 0 7397 }; 7398 static const GVecGen4 ops[4] = { 7399 { .fniv = gen_uclamp_vec, 7400 .fno = gen_helper_gvec_uclamp_b, 7401 .opt_opc = vecop, 7402 .vece = MO_8 }, 7403 { .fniv = gen_uclamp_vec, 7404 .fno = gen_helper_gvec_uclamp_h, 7405 .opt_opc = vecop, 7406 .vece = MO_16 }, 7407 { .fni4 = gen_uclamp_i32, 7408 .fniv = gen_uclamp_vec, 7409 .fno = gen_helper_gvec_uclamp_s, 7410 .opt_opc = vecop, 7411 .vece = MO_32 }, 7412 { .fni8 = gen_uclamp_i64, 7413 .fniv = gen_uclamp_vec, 7414 .fno = gen_helper_gvec_uclamp_d, 7415 .opt_opc = vecop, 7416 .vece = MO_64, 7417 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 7418 }; 7419 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 7420 } 7421 7422 TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a) 7423