1 /* 2 * AArch64 SVE translation 3 * 4 * Copyright (c) 2018 Linaro, Ltd 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "translate.h" 22 #include "translate-a64.h" 23 #include "fpu/softfloat.h" 24 25 26 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, 27 TCGv_i64, uint32_t, uint32_t); 28 29 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr, 30 TCGv_ptr, TCGv_i32); 31 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr, 32 TCGv_ptr, TCGv_ptr, TCGv_i32); 33 34 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32); 35 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr, 36 TCGv_ptr, TCGv_i64, TCGv_i32); 37 38 /* 39 * Helpers for extracting complex instruction fields. 40 */ 41 42 /* See e.g. ASR (immediate, predicated). 43 * Returns -1 for unallocated encoding; diagnose later. 44 */ 45 static int tszimm_esz(DisasContext *s, int x) 46 { 47 x >>= 3; /* discard imm3 */ 48 return 31 - clz32(x); 49 } 50 51 static int tszimm_shr(DisasContext *s, int x) 52 { 53 return (16 << tszimm_esz(s, x)) - x; 54 } 55 56 /* See e.g. LSL (immediate, predicated). */ 57 static int tszimm_shl(DisasContext *s, int x) 58 { 59 return x - (8 << tszimm_esz(s, x)); 60 } 61 62 /* The SH bit is in bit 8. Extract the low 8 and shift. */ 63 static inline int expand_imm_sh8s(DisasContext *s, int x) 64 { 65 return (int8_t)x << (x & 0x100 ? 8 : 0); 66 } 67 68 static inline int expand_imm_sh8u(DisasContext *s, int x) 69 { 70 return (uint8_t)x << (x & 0x100 ? 8 : 0); 71 } 72 73 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype) 74 * with unsigned data. C.f. SVE Memory Contiguous Load Group. 75 */ 76 static inline int msz_dtype(DisasContext *s, int msz) 77 { 78 static const uint8_t dtype[4] = { 0, 5, 10, 15 }; 79 return dtype[msz]; 80 } 81 82 /* 83 * Include the generated decoder. 84 */ 85 86 #include "decode-sve.c.inc" 87 88 /* 89 * Implement all of the translator functions referenced by the decoder. 90 */ 91 92 /* Invoke an out-of-line helper on 2 Zregs. */ 93 static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn, 94 int rd, int rn, int data) 95 { 96 if (fn == NULL) { 97 return false; 98 } 99 if (sve_access_check(s)) { 100 unsigned vsz = vec_full_reg_size(s); 101 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 102 vec_full_reg_offset(s, rn), 103 vsz, vsz, data, fn); 104 } 105 return true; 106 } 107 108 static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 109 int rd, int rn, int data, 110 ARMFPStatusFlavour flavour) 111 { 112 if (fn == NULL) { 113 return false; 114 } 115 if (sve_access_check(s)) { 116 unsigned vsz = vec_full_reg_size(s); 117 TCGv_ptr status = fpstatus_ptr(flavour); 118 119 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 120 vec_full_reg_offset(s, rn), 121 status, vsz, vsz, data, fn); 122 } 123 return true; 124 } 125 126 static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 127 arg_rr_esz *a, int data) 128 { 129 return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data, 130 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 131 } 132 133 /* Invoke an out-of-line helper on 3 Zregs. */ 134 static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 135 int rd, int rn, int rm, int data) 136 { 137 if (fn == NULL) { 138 return false; 139 } 140 if (sve_access_check(s)) { 141 unsigned vsz = vec_full_reg_size(s); 142 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 143 vec_full_reg_offset(s, rn), 144 vec_full_reg_offset(s, rm), 145 vsz, vsz, data, fn); 146 } 147 return true; 148 } 149 150 static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 151 arg_rrr_esz *a, int data) 152 { 153 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data); 154 } 155 156 /* Invoke an out-of-line helper on 3 Zregs, plus float_status. */ 157 static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 158 int rd, int rn, int rm, 159 int data, ARMFPStatusFlavour flavour) 160 { 161 if (fn == NULL) { 162 return false; 163 } 164 if (sve_access_check(s)) { 165 unsigned vsz = vec_full_reg_size(s); 166 TCGv_ptr status = fpstatus_ptr(flavour); 167 168 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 169 vec_full_reg_offset(s, rn), 170 vec_full_reg_offset(s, rm), 171 status, vsz, vsz, data, fn); 172 } 173 return true; 174 } 175 176 static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 177 arg_rrr_esz *a, int data) 178 { 179 return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, 180 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 181 } 182 183 /* Invoke an out-of-line helper on 4 Zregs. */ 184 static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 185 int rd, int rn, int rm, int ra, int data) 186 { 187 if (fn == NULL) { 188 return false; 189 } 190 if (sve_access_check(s)) { 191 unsigned vsz = vec_full_reg_size(s); 192 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 193 vec_full_reg_offset(s, rn), 194 vec_full_reg_offset(s, rm), 195 vec_full_reg_offset(s, ra), 196 vsz, vsz, data, fn); 197 } 198 return true; 199 } 200 201 static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 202 arg_rrrr_esz *a, int data) 203 { 204 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data); 205 } 206 207 static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn, 208 arg_rrxr_esz *a) 209 { 210 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index); 211 } 212 213 /* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */ 214 static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 215 int rd, int rn, int rm, int ra, 216 int data, TCGv_ptr ptr) 217 { 218 if (fn == NULL) { 219 return false; 220 } 221 if (sve_access_check(s)) { 222 unsigned vsz = vec_full_reg_size(s); 223 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 224 vec_full_reg_offset(s, rn), 225 vec_full_reg_offset(s, rm), 226 vec_full_reg_offset(s, ra), 227 ptr, vsz, vsz, data, fn); 228 } 229 return true; 230 } 231 232 static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 233 int rd, int rn, int rm, int ra, 234 int data, ARMFPStatusFlavour flavour) 235 { 236 TCGv_ptr status = fpstatus_ptr(flavour); 237 bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status); 238 return ret; 239 } 240 241 /* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */ 242 static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn, 243 int rd, int rn, int rm, int ra, int pg, 244 int data, ARMFPStatusFlavour flavour) 245 { 246 if (fn == NULL) { 247 return false; 248 } 249 if (sve_access_check(s)) { 250 unsigned vsz = vec_full_reg_size(s); 251 TCGv_ptr status = fpstatus_ptr(flavour); 252 253 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd), 254 vec_full_reg_offset(s, rn), 255 vec_full_reg_offset(s, rm), 256 vec_full_reg_offset(s, ra), 257 pred_full_reg_offset(s, pg), 258 status, vsz, vsz, data, fn); 259 } 260 return true; 261 } 262 263 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */ 264 static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn, 265 int rd, int rn, int pg, int data) 266 { 267 if (fn == NULL) { 268 return false; 269 } 270 if (sve_access_check(s)) { 271 unsigned vsz = vec_full_reg_size(s); 272 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 273 vec_full_reg_offset(s, rn), 274 pred_full_reg_offset(s, pg), 275 vsz, vsz, data, fn); 276 } 277 return true; 278 } 279 280 static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn, 281 arg_rpr_esz *a, int data) 282 { 283 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data); 284 } 285 286 static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn, 287 arg_rpri_esz *a) 288 { 289 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm); 290 } 291 292 static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn, 293 int rd, int rn, int pg, int data, 294 ARMFPStatusFlavour flavour) 295 { 296 if (fn == NULL) { 297 return false; 298 } 299 if (sve_access_check(s)) { 300 unsigned vsz = vec_full_reg_size(s); 301 TCGv_ptr status = fpstatus_ptr(flavour); 302 303 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 304 vec_full_reg_offset(s, rn), 305 pred_full_reg_offset(s, pg), 306 status, vsz, vsz, data, fn); 307 } 308 return true; 309 } 310 311 static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 312 arg_rpr_esz *a, int data, 313 ARMFPStatusFlavour flavour) 314 { 315 return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour); 316 } 317 318 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 319 static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn, 320 int rd, int rn, int rm, int pg, int data) 321 { 322 if (fn == NULL) { 323 return false; 324 } 325 if (sve_access_check(s)) { 326 unsigned vsz = vec_full_reg_size(s); 327 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 328 vec_full_reg_offset(s, rn), 329 vec_full_reg_offset(s, rm), 330 pred_full_reg_offset(s, pg), 331 vsz, vsz, data, fn); 332 } 333 return true; 334 } 335 336 static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn, 337 arg_rprr_esz *a, int data) 338 { 339 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data); 340 } 341 342 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 343 static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn, 344 int rd, int rn, int rm, int pg, int data, 345 ARMFPStatusFlavour flavour) 346 { 347 if (fn == NULL) { 348 return false; 349 } 350 if (sve_access_check(s)) { 351 unsigned vsz = vec_full_reg_size(s); 352 TCGv_ptr status = fpstatus_ptr(flavour); 353 354 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 355 vec_full_reg_offset(s, rn), 356 vec_full_reg_offset(s, rm), 357 pred_full_reg_offset(s, pg), 358 status, vsz, vsz, data, fn); 359 } 360 return true; 361 } 362 363 static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 364 arg_rprr_esz *a) 365 { 366 return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0, 367 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 368 } 369 370 /* Invoke a vector expander on two Zregs and an immediate. */ 371 static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 372 int esz, int rd, int rn, uint64_t imm) 373 { 374 if (gvec_fn == NULL) { 375 return false; 376 } 377 if (sve_access_check(s)) { 378 unsigned vsz = vec_full_reg_size(s); 379 gvec_fn(esz, vec_full_reg_offset(s, rd), 380 vec_full_reg_offset(s, rn), imm, vsz, vsz); 381 } 382 return true; 383 } 384 385 static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 386 arg_rri_esz *a) 387 { 388 if (a->esz < 0) { 389 /* Invalid tsz encoding -- see tszimm_esz. */ 390 return false; 391 } 392 return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm); 393 } 394 395 /* Invoke a vector expander on three Zregs. */ 396 static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn, 397 int esz, int rd, int rn, int rm) 398 { 399 if (gvec_fn == NULL) { 400 return false; 401 } 402 if (sve_access_check(s)) { 403 unsigned vsz = vec_full_reg_size(s); 404 gvec_fn(esz, vec_full_reg_offset(s, rd), 405 vec_full_reg_offset(s, rn), 406 vec_full_reg_offset(s, rm), vsz, vsz); 407 } 408 return true; 409 } 410 411 static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn, 412 arg_rrr_esz *a) 413 { 414 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm); 415 } 416 417 /* Invoke a vector expander on four Zregs. */ 418 static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn, 419 arg_rrrr_esz *a) 420 { 421 if (gvec_fn == NULL) { 422 return false; 423 } 424 if (sve_access_check(s)) { 425 unsigned vsz = vec_full_reg_size(s); 426 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 427 vec_full_reg_offset(s, a->rn), 428 vec_full_reg_offset(s, a->rm), 429 vec_full_reg_offset(s, a->ra), vsz, vsz); 430 } 431 return true; 432 } 433 434 /* Invoke a vector move on two Zregs. */ 435 static bool do_mov_z(DisasContext *s, int rd, int rn) 436 { 437 if (sve_access_check(s)) { 438 unsigned vsz = vec_full_reg_size(s); 439 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd), 440 vec_full_reg_offset(s, rn), vsz, vsz); 441 } 442 return true; 443 } 444 445 /* Initialize a Zreg with replications of a 64-bit immediate. */ 446 static void do_dupi_z(DisasContext *s, int rd, uint64_t word) 447 { 448 unsigned vsz = vec_full_reg_size(s); 449 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word); 450 } 451 452 /* Invoke a vector expander on three Pregs. */ 453 static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn, 454 int rd, int rn, int rm) 455 { 456 if (sve_access_check(s)) { 457 unsigned psz = pred_gvec_reg_size(s); 458 gvec_fn(MO_64, pred_full_reg_offset(s, rd), 459 pred_full_reg_offset(s, rn), 460 pred_full_reg_offset(s, rm), psz, psz); 461 } 462 return true; 463 } 464 465 /* Invoke a vector move on two Pregs. */ 466 static bool do_mov_p(DisasContext *s, int rd, int rn) 467 { 468 if (sve_access_check(s)) { 469 unsigned psz = pred_gvec_reg_size(s); 470 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd), 471 pred_full_reg_offset(s, rn), psz, psz); 472 } 473 return true; 474 } 475 476 /* Set the cpu flags as per a return from an SVE helper. */ 477 static void do_pred_flags(TCGv_i32 t) 478 { 479 tcg_gen_mov_i32(cpu_NF, t); 480 tcg_gen_andi_i32(cpu_ZF, t, 2); 481 tcg_gen_andi_i32(cpu_CF, t, 1); 482 tcg_gen_movi_i32(cpu_VF, 0); 483 } 484 485 /* Subroutines computing the ARM PredTest psuedofunction. */ 486 static void do_predtest1(TCGv_i64 d, TCGv_i64 g) 487 { 488 TCGv_i32 t = tcg_temp_new_i32(); 489 490 gen_helper_sve_predtest1(t, d, g); 491 do_pred_flags(t); 492 } 493 494 static void do_predtest(DisasContext *s, int dofs, int gofs, int words) 495 { 496 TCGv_ptr dptr = tcg_temp_new_ptr(); 497 TCGv_ptr gptr = tcg_temp_new_ptr(); 498 TCGv_i32 t = tcg_temp_new_i32(); 499 500 tcg_gen_addi_ptr(dptr, cpu_env, dofs); 501 tcg_gen_addi_ptr(gptr, cpu_env, gofs); 502 503 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words)); 504 505 do_pred_flags(t); 506 } 507 508 /* For each element size, the bits within a predicate word that are active. */ 509 const uint64_t pred_esz_masks[5] = { 510 0xffffffffffffffffull, 0x5555555555555555ull, 511 0x1111111111111111ull, 0x0101010101010101ull, 512 0x0001000100010001ull, 513 }; 514 515 static bool trans_INVALID(DisasContext *s, arg_INVALID *a) 516 { 517 unallocated_encoding(s); 518 return true; 519 } 520 521 /* 522 *** SVE Logical - Unpredicated Group 523 */ 524 525 TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a) 526 TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a) 527 TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a) 528 TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a) 529 530 static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 531 { 532 TCGv_i64 t = tcg_temp_new_i64(); 533 uint64_t mask = dup_const(MO_8, 0xff >> sh); 534 535 tcg_gen_xor_i64(t, n, m); 536 tcg_gen_shri_i64(d, t, sh); 537 tcg_gen_shli_i64(t, t, 8 - sh); 538 tcg_gen_andi_i64(d, d, mask); 539 tcg_gen_andi_i64(t, t, ~mask); 540 tcg_gen_or_i64(d, d, t); 541 } 542 543 static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 544 { 545 TCGv_i64 t = tcg_temp_new_i64(); 546 uint64_t mask = dup_const(MO_16, 0xffff >> sh); 547 548 tcg_gen_xor_i64(t, n, m); 549 tcg_gen_shri_i64(d, t, sh); 550 tcg_gen_shli_i64(t, t, 16 - sh); 551 tcg_gen_andi_i64(d, d, mask); 552 tcg_gen_andi_i64(t, t, ~mask); 553 tcg_gen_or_i64(d, d, t); 554 } 555 556 static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh) 557 { 558 tcg_gen_xor_i32(d, n, m); 559 tcg_gen_rotri_i32(d, d, sh); 560 } 561 562 static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 563 { 564 tcg_gen_xor_i64(d, n, m); 565 tcg_gen_rotri_i64(d, d, sh); 566 } 567 568 static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 569 TCGv_vec m, int64_t sh) 570 { 571 tcg_gen_xor_vec(vece, d, n, m); 572 tcg_gen_rotri_vec(vece, d, d, sh); 573 } 574 575 void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 576 uint32_t rm_ofs, int64_t shift, 577 uint32_t opr_sz, uint32_t max_sz) 578 { 579 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 }; 580 static const GVecGen3i ops[4] = { 581 { .fni8 = gen_xar8_i64, 582 .fniv = gen_xar_vec, 583 .fno = gen_helper_sve2_xar_b, 584 .opt_opc = vecop, 585 .vece = MO_8 }, 586 { .fni8 = gen_xar16_i64, 587 .fniv = gen_xar_vec, 588 .fno = gen_helper_sve2_xar_h, 589 .opt_opc = vecop, 590 .vece = MO_16 }, 591 { .fni4 = gen_xar_i32, 592 .fniv = gen_xar_vec, 593 .fno = gen_helper_sve2_xar_s, 594 .opt_opc = vecop, 595 .vece = MO_32 }, 596 { .fni8 = gen_xar_i64, 597 .fniv = gen_xar_vec, 598 .fno = gen_helper_gvec_xar_d, 599 .opt_opc = vecop, 600 .vece = MO_64 } 601 }; 602 int esize = 8 << vece; 603 604 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */ 605 tcg_debug_assert(shift >= 0); 606 tcg_debug_assert(shift <= esize); 607 shift &= esize - 1; 608 609 if (shift == 0) { 610 /* xar with no rotate devolves to xor. */ 611 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz); 612 } else { 613 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 614 shift, &ops[vece]); 615 } 616 } 617 618 static bool trans_XAR(DisasContext *s, arg_rrri_esz *a) 619 { 620 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { 621 return false; 622 } 623 if (sve_access_check(s)) { 624 unsigned vsz = vec_full_reg_size(s); 625 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd), 626 vec_full_reg_offset(s, a->rn), 627 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz); 628 } 629 return true; 630 } 631 632 static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 633 { 634 tcg_gen_xor_i64(d, n, m); 635 tcg_gen_xor_i64(d, d, k); 636 } 637 638 static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 639 TCGv_vec m, TCGv_vec k) 640 { 641 tcg_gen_xor_vec(vece, d, n, m); 642 tcg_gen_xor_vec(vece, d, d, k); 643 } 644 645 static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 646 uint32_t a, uint32_t oprsz, uint32_t maxsz) 647 { 648 static const GVecGen4 op = { 649 .fni8 = gen_eor3_i64, 650 .fniv = gen_eor3_vec, 651 .fno = gen_helper_sve2_eor3, 652 .vece = MO_64, 653 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 654 }; 655 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 656 } 657 658 TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a) 659 660 static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 661 { 662 tcg_gen_andc_i64(d, m, k); 663 tcg_gen_xor_i64(d, d, n); 664 } 665 666 static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 667 TCGv_vec m, TCGv_vec k) 668 { 669 tcg_gen_andc_vec(vece, d, m, k); 670 tcg_gen_xor_vec(vece, d, d, n); 671 } 672 673 static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 674 uint32_t a, uint32_t oprsz, uint32_t maxsz) 675 { 676 static const GVecGen4 op = { 677 .fni8 = gen_bcax_i64, 678 .fniv = gen_bcax_vec, 679 .fno = gen_helper_sve2_bcax, 680 .vece = MO_64, 681 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 682 }; 683 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 684 } 685 686 TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a) 687 688 static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 689 uint32_t a, uint32_t oprsz, uint32_t maxsz) 690 { 691 /* BSL differs from the generic bitsel in argument ordering. */ 692 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz); 693 } 694 695 TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a) 696 697 static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 698 { 699 tcg_gen_andc_i64(n, k, n); 700 tcg_gen_andc_i64(m, m, k); 701 tcg_gen_or_i64(d, n, m); 702 } 703 704 static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 705 TCGv_vec m, TCGv_vec k) 706 { 707 if (TCG_TARGET_HAS_bitsel_vec) { 708 tcg_gen_not_vec(vece, n, n); 709 tcg_gen_bitsel_vec(vece, d, k, n, m); 710 } else { 711 tcg_gen_andc_vec(vece, n, k, n); 712 tcg_gen_andc_vec(vece, m, m, k); 713 tcg_gen_or_vec(vece, d, n, m); 714 } 715 } 716 717 static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 718 uint32_t a, uint32_t oprsz, uint32_t maxsz) 719 { 720 static const GVecGen4 op = { 721 .fni8 = gen_bsl1n_i64, 722 .fniv = gen_bsl1n_vec, 723 .fno = gen_helper_sve2_bsl1n, 724 .vece = MO_64, 725 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 726 }; 727 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 728 } 729 730 TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a) 731 732 static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 733 { 734 /* 735 * Z[dn] = (n & k) | (~m & ~k) 736 * = | ~(m | k) 737 */ 738 tcg_gen_and_i64(n, n, k); 739 if (TCG_TARGET_HAS_orc_i64) { 740 tcg_gen_or_i64(m, m, k); 741 tcg_gen_orc_i64(d, n, m); 742 } else { 743 tcg_gen_nor_i64(m, m, k); 744 tcg_gen_or_i64(d, n, m); 745 } 746 } 747 748 static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 749 TCGv_vec m, TCGv_vec k) 750 { 751 if (TCG_TARGET_HAS_bitsel_vec) { 752 tcg_gen_not_vec(vece, m, m); 753 tcg_gen_bitsel_vec(vece, d, k, n, m); 754 } else { 755 tcg_gen_and_vec(vece, n, n, k); 756 tcg_gen_or_vec(vece, m, m, k); 757 tcg_gen_orc_vec(vece, d, n, m); 758 } 759 } 760 761 static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 762 uint32_t a, uint32_t oprsz, uint32_t maxsz) 763 { 764 static const GVecGen4 op = { 765 .fni8 = gen_bsl2n_i64, 766 .fniv = gen_bsl2n_vec, 767 .fno = gen_helper_sve2_bsl2n, 768 .vece = MO_64, 769 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 770 }; 771 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 772 } 773 774 TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a) 775 776 static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 777 { 778 tcg_gen_and_i64(n, n, k); 779 tcg_gen_andc_i64(m, m, k); 780 tcg_gen_nor_i64(d, n, m); 781 } 782 783 static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 784 TCGv_vec m, TCGv_vec k) 785 { 786 tcg_gen_bitsel_vec(vece, d, k, n, m); 787 tcg_gen_not_vec(vece, d, d); 788 } 789 790 static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 791 uint32_t a, uint32_t oprsz, uint32_t maxsz) 792 { 793 static const GVecGen4 op = { 794 .fni8 = gen_nbsl_i64, 795 .fniv = gen_nbsl_vec, 796 .fno = gen_helper_sve2_nbsl, 797 .vece = MO_64, 798 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 799 }; 800 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 801 } 802 803 TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a) 804 805 /* 806 *** SVE Integer Arithmetic - Unpredicated Group 807 */ 808 809 TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a) 810 TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a) 811 TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a) 812 TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a) 813 TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a) 814 TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a) 815 816 /* 817 *** SVE Integer Arithmetic - Binary Predicated Group 818 */ 819 820 /* Select active elememnts from Zn and inactive elements from Zm, 821 * storing the result in Zd. 822 */ 823 static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz) 824 { 825 static gen_helper_gvec_4 * const fns[4] = { 826 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h, 827 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d 828 }; 829 return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0); 830 } 831 832 #define DO_ZPZZ(NAME, FEAT, name) \ 833 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \ 834 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \ 835 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \ 836 }; \ 837 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \ 838 name##_zpzz_fns[a->esz], a, 0) 839 840 DO_ZPZZ(AND_zpzz, aa64_sve, sve_and) 841 DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor) 842 DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr) 843 DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic) 844 845 DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add) 846 DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub) 847 848 DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax) 849 DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax) 850 DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin) 851 DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin) 852 DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd) 853 DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd) 854 855 DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul) 856 DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh) 857 DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh) 858 859 DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr) 860 DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr) 861 DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl) 862 863 static gen_helper_gvec_4 * const sdiv_fns[4] = { 864 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d 865 }; 866 TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0) 867 868 static gen_helper_gvec_4 * const udiv_fns[4] = { 869 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d 870 }; 871 TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0) 872 873 TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz) 874 875 /* 876 *** SVE Integer Arithmetic - Unary Predicated Group 877 */ 878 879 #define DO_ZPZ(NAME, FEAT, name) \ 880 static gen_helper_gvec_3 * const name##_fns[4] = { \ 881 gen_helper_##name##_b, gen_helper_##name##_h, \ 882 gen_helper_##name##_s, gen_helper_##name##_d, \ 883 }; \ 884 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0) 885 886 DO_ZPZ(CLS, aa64_sve, sve_cls) 887 DO_ZPZ(CLZ, aa64_sve, sve_clz) 888 DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz) 889 DO_ZPZ(CNOT, aa64_sve, sve_cnot) 890 DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz) 891 DO_ZPZ(ABS, aa64_sve, sve_abs) 892 DO_ZPZ(NEG, aa64_sve, sve_neg) 893 DO_ZPZ(RBIT, aa64_sve, sve_rbit) 894 895 static gen_helper_gvec_3 * const fabs_fns[4] = { 896 NULL, gen_helper_sve_fabs_h, 897 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d, 898 }; 899 TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0) 900 901 static gen_helper_gvec_3 * const fneg_fns[4] = { 902 NULL, gen_helper_sve_fneg_h, 903 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d, 904 }; 905 TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0) 906 907 static gen_helper_gvec_3 * const sxtb_fns[4] = { 908 NULL, gen_helper_sve_sxtb_h, 909 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d, 910 }; 911 TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0) 912 913 static gen_helper_gvec_3 * const uxtb_fns[4] = { 914 NULL, gen_helper_sve_uxtb_h, 915 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d, 916 }; 917 TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0) 918 919 static gen_helper_gvec_3 * const sxth_fns[4] = { 920 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d 921 }; 922 TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0) 923 924 static gen_helper_gvec_3 * const uxth_fns[4] = { 925 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d 926 }; 927 TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0) 928 929 TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz, 930 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0) 931 TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz, 932 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0) 933 934 /* 935 *** SVE Integer Reduction Group 936 */ 937 938 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32); 939 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a, 940 gen_helper_gvec_reduc *fn) 941 { 942 unsigned vsz = vec_full_reg_size(s); 943 TCGv_ptr t_zn, t_pg; 944 TCGv_i32 desc; 945 TCGv_i64 temp; 946 947 if (fn == NULL) { 948 return false; 949 } 950 if (!sve_access_check(s)) { 951 return true; 952 } 953 954 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 955 temp = tcg_temp_new_i64(); 956 t_zn = tcg_temp_new_ptr(); 957 t_pg = tcg_temp_new_ptr(); 958 959 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); 960 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); 961 fn(temp, t_zn, t_pg, desc); 962 963 write_fp_dreg(s, a->rd, temp); 964 return true; 965 } 966 967 #define DO_VPZ(NAME, name) \ 968 static gen_helper_gvec_reduc * const name##_fns[4] = { \ 969 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \ 970 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 971 }; \ 972 TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz]) 973 974 DO_VPZ(ORV, orv) 975 DO_VPZ(ANDV, andv) 976 DO_VPZ(EORV, eorv) 977 978 DO_VPZ(UADDV, uaddv) 979 DO_VPZ(SMAXV, smaxv) 980 DO_VPZ(UMAXV, umaxv) 981 DO_VPZ(SMINV, sminv) 982 DO_VPZ(UMINV, uminv) 983 984 static gen_helper_gvec_reduc * const saddv_fns[4] = { 985 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h, 986 gen_helper_sve_saddv_s, NULL 987 }; 988 TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz]) 989 990 #undef DO_VPZ 991 992 /* 993 *** SVE Shift by Immediate - Predicated Group 994 */ 995 996 /* 997 * Copy Zn into Zd, storing zeros into inactive elements. 998 * If invert, store zeros into the active elements. 999 */ 1000 static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg, 1001 int esz, bool invert) 1002 { 1003 static gen_helper_gvec_3 * const fns[4] = { 1004 gen_helper_sve_movz_b, gen_helper_sve_movz_h, 1005 gen_helper_sve_movz_s, gen_helper_sve_movz_d, 1006 }; 1007 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert); 1008 } 1009 1010 static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr, 1011 gen_helper_gvec_3 * const fns[4]) 1012 { 1013 int max; 1014 1015 if (a->esz < 0) { 1016 /* Invalid tsz encoding -- see tszimm_esz. */ 1017 return false; 1018 } 1019 1020 /* 1021 * Shift by element size is architecturally valid. 1022 * For arithmetic right-shift, it's the same as by one less. 1023 * For logical shifts and ASRD, it is a zeroing operation. 1024 */ 1025 max = 8 << a->esz; 1026 if (a->imm >= max) { 1027 if (asr) { 1028 a->imm = max - 1; 1029 } else { 1030 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); 1031 } 1032 } 1033 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a); 1034 } 1035 1036 static gen_helper_gvec_3 * const asr_zpzi_fns[4] = { 1037 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h, 1038 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d, 1039 }; 1040 TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns) 1041 1042 static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = { 1043 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h, 1044 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d, 1045 }; 1046 TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns) 1047 1048 static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = { 1049 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h, 1050 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d, 1051 }; 1052 TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns) 1053 1054 static gen_helper_gvec_3 * const asrd_fns[4] = { 1055 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h, 1056 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d, 1057 }; 1058 TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns) 1059 1060 static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = { 1061 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h, 1062 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d, 1063 }; 1064 TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 1065 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a) 1066 1067 static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = { 1068 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h, 1069 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d, 1070 }; 1071 TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 1072 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a) 1073 1074 static gen_helper_gvec_3 * const srshr_fns[4] = { 1075 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h, 1076 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d, 1077 }; 1078 TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 1079 a->esz < 0 ? NULL : srshr_fns[a->esz], a) 1080 1081 static gen_helper_gvec_3 * const urshr_fns[4] = { 1082 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h, 1083 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d, 1084 }; 1085 TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 1086 a->esz < 0 ? NULL : urshr_fns[a->esz], a) 1087 1088 static gen_helper_gvec_3 * const sqshlu_fns[4] = { 1089 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h, 1090 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d, 1091 }; 1092 TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi, 1093 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a) 1094 1095 /* 1096 *** SVE Bitwise Shift - Predicated Group 1097 */ 1098 1099 #define DO_ZPZW(NAME, name) \ 1100 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \ 1101 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \ 1102 gen_helper_sve_##name##_zpzw_s, NULL \ 1103 }; \ 1104 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \ 1105 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0) 1106 1107 DO_ZPZW(ASR, asr) 1108 DO_ZPZW(LSR, lsr) 1109 DO_ZPZW(LSL, lsl) 1110 1111 #undef DO_ZPZW 1112 1113 /* 1114 *** SVE Bitwise Shift - Unpredicated Group 1115 */ 1116 1117 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr, 1118 void (*gvec_fn)(unsigned, uint32_t, uint32_t, 1119 int64_t, uint32_t, uint32_t)) 1120 { 1121 if (a->esz < 0) { 1122 /* Invalid tsz encoding -- see tszimm_esz. */ 1123 return false; 1124 } 1125 if (sve_access_check(s)) { 1126 unsigned vsz = vec_full_reg_size(s); 1127 /* Shift by element size is architecturally valid. For 1128 arithmetic right-shift, it's the same as by one less. 1129 Otherwise it is a zeroing operation. */ 1130 if (a->imm >= 8 << a->esz) { 1131 if (asr) { 1132 a->imm = (8 << a->esz) - 1; 1133 } else { 1134 do_dupi_z(s, a->rd, 0); 1135 return true; 1136 } 1137 } 1138 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 1139 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz); 1140 } 1141 return true; 1142 } 1143 1144 TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari) 1145 TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri) 1146 TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli) 1147 1148 #define DO_ZZW(NAME, name) \ 1149 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \ 1150 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \ 1151 gen_helper_sve_##name##_zzw_s, NULL \ 1152 }; \ 1153 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \ 1154 name##_zzw_fns[a->esz], a, 0) 1155 1156 DO_ZZW(ASR_zzw, asr) 1157 DO_ZZW(LSR_zzw, lsr) 1158 DO_ZZW(LSL_zzw, lsl) 1159 1160 #undef DO_ZZW 1161 1162 /* 1163 *** SVE Integer Multiply-Add Group 1164 */ 1165 1166 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a, 1167 gen_helper_gvec_5 *fn) 1168 { 1169 if (sve_access_check(s)) { 1170 unsigned vsz = vec_full_reg_size(s); 1171 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd), 1172 vec_full_reg_offset(s, a->ra), 1173 vec_full_reg_offset(s, a->rn), 1174 vec_full_reg_offset(s, a->rm), 1175 pred_full_reg_offset(s, a->pg), 1176 vsz, vsz, 0, fn); 1177 } 1178 return true; 1179 } 1180 1181 static gen_helper_gvec_5 * const mla_fns[4] = { 1182 gen_helper_sve_mla_b, gen_helper_sve_mla_h, 1183 gen_helper_sve_mla_s, gen_helper_sve_mla_d, 1184 }; 1185 TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz]) 1186 1187 static gen_helper_gvec_5 * const mls_fns[4] = { 1188 gen_helper_sve_mls_b, gen_helper_sve_mls_h, 1189 gen_helper_sve_mls_s, gen_helper_sve_mls_d, 1190 }; 1191 TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz]) 1192 1193 /* 1194 *** SVE Index Generation Group 1195 */ 1196 1197 static bool do_index(DisasContext *s, int esz, int rd, 1198 TCGv_i64 start, TCGv_i64 incr) 1199 { 1200 unsigned vsz; 1201 TCGv_i32 desc; 1202 TCGv_ptr t_zd; 1203 1204 if (!sve_access_check(s)) { 1205 return true; 1206 } 1207 1208 vsz = vec_full_reg_size(s); 1209 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1210 t_zd = tcg_temp_new_ptr(); 1211 1212 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd)); 1213 if (esz == 3) { 1214 gen_helper_sve_index_d(t_zd, start, incr, desc); 1215 } else { 1216 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 1217 static index_fn * const fns[3] = { 1218 gen_helper_sve_index_b, 1219 gen_helper_sve_index_h, 1220 gen_helper_sve_index_s, 1221 }; 1222 TCGv_i32 s32 = tcg_temp_new_i32(); 1223 TCGv_i32 i32 = tcg_temp_new_i32(); 1224 1225 tcg_gen_extrl_i64_i32(s32, start); 1226 tcg_gen_extrl_i64_i32(i32, incr); 1227 fns[esz](t_zd, s32, i32, desc); 1228 } 1229 return true; 1230 } 1231 1232 TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd, 1233 tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2)) 1234 TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd, 1235 tcg_constant_i64(a->imm), cpu_reg(s, a->rm)) 1236 TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd, 1237 cpu_reg(s, a->rn), tcg_constant_i64(a->imm)) 1238 TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd, 1239 cpu_reg(s, a->rn), cpu_reg(s, a->rm)) 1240 1241 /* 1242 *** SVE Stack Allocation Group 1243 */ 1244 1245 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a) 1246 { 1247 if (!dc_isar_feature(aa64_sve, s)) { 1248 return false; 1249 } 1250 if (sve_access_check(s)) { 1251 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1252 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1253 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s)); 1254 } 1255 return true; 1256 } 1257 1258 static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a) 1259 { 1260 if (!dc_isar_feature(aa64_sme, s)) { 1261 return false; 1262 } 1263 if (sme_enabled_check(s)) { 1264 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1265 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1266 tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s)); 1267 } 1268 return true; 1269 } 1270 1271 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a) 1272 { 1273 if (!dc_isar_feature(aa64_sve, s)) { 1274 return false; 1275 } 1276 if (sve_access_check(s)) { 1277 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1278 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1279 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s)); 1280 } 1281 return true; 1282 } 1283 1284 static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a) 1285 { 1286 if (!dc_isar_feature(aa64_sme, s)) { 1287 return false; 1288 } 1289 if (sme_enabled_check(s)) { 1290 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1291 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1292 tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s)); 1293 } 1294 return true; 1295 } 1296 1297 static bool trans_RDVL(DisasContext *s, arg_RDVL *a) 1298 { 1299 if (!dc_isar_feature(aa64_sve, s)) { 1300 return false; 1301 } 1302 if (sve_access_check(s)) { 1303 TCGv_i64 reg = cpu_reg(s, a->rd); 1304 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s)); 1305 } 1306 return true; 1307 } 1308 1309 static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a) 1310 { 1311 if (!dc_isar_feature(aa64_sme, s)) { 1312 return false; 1313 } 1314 if (sme_enabled_check(s)) { 1315 TCGv_i64 reg = cpu_reg(s, a->rd); 1316 tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s)); 1317 } 1318 return true; 1319 } 1320 1321 /* 1322 *** SVE Compute Vector Address Group 1323 */ 1324 1325 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn) 1326 { 1327 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm); 1328 } 1329 1330 TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32) 1331 TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64) 1332 TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32) 1333 TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32) 1334 1335 /* 1336 *** SVE Integer Misc - Unpredicated Group 1337 */ 1338 1339 static gen_helper_gvec_2 * const fexpa_fns[4] = { 1340 NULL, gen_helper_sve_fexpa_h, 1341 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d, 1342 }; 1343 TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz, 1344 fexpa_fns[a->esz], a->rd, a->rn, 0) 1345 1346 static gen_helper_gvec_3 * const ftssel_fns[4] = { 1347 NULL, gen_helper_sve_ftssel_h, 1348 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d, 1349 }; 1350 TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, 1351 ftssel_fns[a->esz], a, 0) 1352 1353 /* 1354 *** SVE Predicate Logical Operations Group 1355 */ 1356 1357 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a, 1358 const GVecGen4 *gvec_op) 1359 { 1360 if (!sve_access_check(s)) { 1361 return true; 1362 } 1363 1364 unsigned psz = pred_gvec_reg_size(s); 1365 int dofs = pred_full_reg_offset(s, a->rd); 1366 int nofs = pred_full_reg_offset(s, a->rn); 1367 int mofs = pred_full_reg_offset(s, a->rm); 1368 int gofs = pred_full_reg_offset(s, a->pg); 1369 1370 if (!a->s) { 1371 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1372 return true; 1373 } 1374 1375 if (psz == 8) { 1376 /* Do the operation and the flags generation in temps. */ 1377 TCGv_i64 pd = tcg_temp_new_i64(); 1378 TCGv_i64 pn = tcg_temp_new_i64(); 1379 TCGv_i64 pm = tcg_temp_new_i64(); 1380 TCGv_i64 pg = tcg_temp_new_i64(); 1381 1382 tcg_gen_ld_i64(pn, cpu_env, nofs); 1383 tcg_gen_ld_i64(pm, cpu_env, mofs); 1384 tcg_gen_ld_i64(pg, cpu_env, gofs); 1385 1386 gvec_op->fni8(pd, pn, pm, pg); 1387 tcg_gen_st_i64(pd, cpu_env, dofs); 1388 1389 do_predtest1(pd, pg); 1390 } else { 1391 /* The operation and flags generation is large. The computation 1392 * of the flags depends on the original contents of the guarding 1393 * predicate. If the destination overwrites the guarding predicate, 1394 * then the easiest way to get this right is to save a copy. 1395 */ 1396 int tofs = gofs; 1397 if (a->rd == a->pg) { 1398 tofs = offsetof(CPUARMState, vfp.preg_tmp); 1399 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz); 1400 } 1401 1402 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1403 do_predtest(s, dofs, tofs, psz / 8); 1404 } 1405 return true; 1406 } 1407 1408 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1409 { 1410 tcg_gen_and_i64(pd, pn, pm); 1411 tcg_gen_and_i64(pd, pd, pg); 1412 } 1413 1414 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1415 TCGv_vec pm, TCGv_vec pg) 1416 { 1417 tcg_gen_and_vec(vece, pd, pn, pm); 1418 tcg_gen_and_vec(vece, pd, pd, pg); 1419 } 1420 1421 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a) 1422 { 1423 static const GVecGen4 op = { 1424 .fni8 = gen_and_pg_i64, 1425 .fniv = gen_and_pg_vec, 1426 .fno = gen_helper_sve_and_pppp, 1427 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1428 }; 1429 1430 if (!dc_isar_feature(aa64_sve, s)) { 1431 return false; 1432 } 1433 if (!a->s) { 1434 if (a->rn == a->rm) { 1435 if (a->pg == a->rn) { 1436 return do_mov_p(s, a->rd, a->rn); 1437 } 1438 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg); 1439 } else if (a->pg == a->rn || a->pg == a->rm) { 1440 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm); 1441 } 1442 } 1443 return do_pppp_flags(s, a, &op); 1444 } 1445 1446 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1447 { 1448 tcg_gen_andc_i64(pd, pn, pm); 1449 tcg_gen_and_i64(pd, pd, pg); 1450 } 1451 1452 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1453 TCGv_vec pm, TCGv_vec pg) 1454 { 1455 tcg_gen_andc_vec(vece, pd, pn, pm); 1456 tcg_gen_and_vec(vece, pd, pd, pg); 1457 } 1458 1459 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a) 1460 { 1461 static const GVecGen4 op = { 1462 .fni8 = gen_bic_pg_i64, 1463 .fniv = gen_bic_pg_vec, 1464 .fno = gen_helper_sve_bic_pppp, 1465 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1466 }; 1467 1468 if (!dc_isar_feature(aa64_sve, s)) { 1469 return false; 1470 } 1471 if (!a->s && a->pg == a->rn) { 1472 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm); 1473 } 1474 return do_pppp_flags(s, a, &op); 1475 } 1476 1477 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1478 { 1479 tcg_gen_xor_i64(pd, pn, pm); 1480 tcg_gen_and_i64(pd, pd, pg); 1481 } 1482 1483 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1484 TCGv_vec pm, TCGv_vec pg) 1485 { 1486 tcg_gen_xor_vec(vece, pd, pn, pm); 1487 tcg_gen_and_vec(vece, pd, pd, pg); 1488 } 1489 1490 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a) 1491 { 1492 static const GVecGen4 op = { 1493 .fni8 = gen_eor_pg_i64, 1494 .fniv = gen_eor_pg_vec, 1495 .fno = gen_helper_sve_eor_pppp, 1496 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1497 }; 1498 1499 if (!dc_isar_feature(aa64_sve, s)) { 1500 return false; 1501 } 1502 /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */ 1503 if (!a->s && a->pg == a->rm) { 1504 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn); 1505 } 1506 return do_pppp_flags(s, a, &op); 1507 } 1508 1509 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a) 1510 { 1511 if (a->s || !dc_isar_feature(aa64_sve, s)) { 1512 return false; 1513 } 1514 if (sve_access_check(s)) { 1515 unsigned psz = pred_gvec_reg_size(s); 1516 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd), 1517 pred_full_reg_offset(s, a->pg), 1518 pred_full_reg_offset(s, a->rn), 1519 pred_full_reg_offset(s, a->rm), psz, psz); 1520 } 1521 return true; 1522 } 1523 1524 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1525 { 1526 tcg_gen_or_i64(pd, pn, pm); 1527 tcg_gen_and_i64(pd, pd, pg); 1528 } 1529 1530 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1531 TCGv_vec pm, TCGv_vec pg) 1532 { 1533 tcg_gen_or_vec(vece, pd, pn, pm); 1534 tcg_gen_and_vec(vece, pd, pd, pg); 1535 } 1536 1537 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a) 1538 { 1539 static const GVecGen4 op = { 1540 .fni8 = gen_orr_pg_i64, 1541 .fniv = gen_orr_pg_vec, 1542 .fno = gen_helper_sve_orr_pppp, 1543 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1544 }; 1545 1546 if (!dc_isar_feature(aa64_sve, s)) { 1547 return false; 1548 } 1549 if (!a->s && a->pg == a->rn && a->rn == a->rm) { 1550 return do_mov_p(s, a->rd, a->rn); 1551 } 1552 return do_pppp_flags(s, a, &op); 1553 } 1554 1555 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1556 { 1557 tcg_gen_orc_i64(pd, pn, pm); 1558 tcg_gen_and_i64(pd, pd, pg); 1559 } 1560 1561 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1562 TCGv_vec pm, TCGv_vec pg) 1563 { 1564 tcg_gen_orc_vec(vece, pd, pn, pm); 1565 tcg_gen_and_vec(vece, pd, pd, pg); 1566 } 1567 1568 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a) 1569 { 1570 static const GVecGen4 op = { 1571 .fni8 = gen_orn_pg_i64, 1572 .fniv = gen_orn_pg_vec, 1573 .fno = gen_helper_sve_orn_pppp, 1574 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1575 }; 1576 1577 if (!dc_isar_feature(aa64_sve, s)) { 1578 return false; 1579 } 1580 return do_pppp_flags(s, a, &op); 1581 } 1582 1583 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1584 { 1585 tcg_gen_or_i64(pd, pn, pm); 1586 tcg_gen_andc_i64(pd, pg, pd); 1587 } 1588 1589 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1590 TCGv_vec pm, TCGv_vec pg) 1591 { 1592 tcg_gen_or_vec(vece, pd, pn, pm); 1593 tcg_gen_andc_vec(vece, pd, pg, pd); 1594 } 1595 1596 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a) 1597 { 1598 static const GVecGen4 op = { 1599 .fni8 = gen_nor_pg_i64, 1600 .fniv = gen_nor_pg_vec, 1601 .fno = gen_helper_sve_nor_pppp, 1602 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1603 }; 1604 1605 if (!dc_isar_feature(aa64_sve, s)) { 1606 return false; 1607 } 1608 return do_pppp_flags(s, a, &op); 1609 } 1610 1611 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1612 { 1613 tcg_gen_and_i64(pd, pn, pm); 1614 tcg_gen_andc_i64(pd, pg, pd); 1615 } 1616 1617 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1618 TCGv_vec pm, TCGv_vec pg) 1619 { 1620 tcg_gen_and_vec(vece, pd, pn, pm); 1621 tcg_gen_andc_vec(vece, pd, pg, pd); 1622 } 1623 1624 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a) 1625 { 1626 static const GVecGen4 op = { 1627 .fni8 = gen_nand_pg_i64, 1628 .fniv = gen_nand_pg_vec, 1629 .fno = gen_helper_sve_nand_pppp, 1630 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1631 }; 1632 1633 if (!dc_isar_feature(aa64_sve, s)) { 1634 return false; 1635 } 1636 return do_pppp_flags(s, a, &op); 1637 } 1638 1639 /* 1640 *** SVE Predicate Misc Group 1641 */ 1642 1643 static bool trans_PTEST(DisasContext *s, arg_PTEST *a) 1644 { 1645 if (!dc_isar_feature(aa64_sve, s)) { 1646 return false; 1647 } 1648 if (sve_access_check(s)) { 1649 int nofs = pred_full_reg_offset(s, a->rn); 1650 int gofs = pred_full_reg_offset(s, a->pg); 1651 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8); 1652 1653 if (words == 1) { 1654 TCGv_i64 pn = tcg_temp_new_i64(); 1655 TCGv_i64 pg = tcg_temp_new_i64(); 1656 1657 tcg_gen_ld_i64(pn, cpu_env, nofs); 1658 tcg_gen_ld_i64(pg, cpu_env, gofs); 1659 do_predtest1(pn, pg); 1660 } else { 1661 do_predtest(s, nofs, gofs, words); 1662 } 1663 } 1664 return true; 1665 } 1666 1667 /* See the ARM pseudocode DecodePredCount. */ 1668 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz) 1669 { 1670 unsigned elements = fullsz >> esz; 1671 unsigned bound; 1672 1673 switch (pattern) { 1674 case 0x0: /* POW2 */ 1675 return pow2floor(elements); 1676 case 0x1: /* VL1 */ 1677 case 0x2: /* VL2 */ 1678 case 0x3: /* VL3 */ 1679 case 0x4: /* VL4 */ 1680 case 0x5: /* VL5 */ 1681 case 0x6: /* VL6 */ 1682 case 0x7: /* VL7 */ 1683 case 0x8: /* VL8 */ 1684 bound = pattern; 1685 break; 1686 case 0x9: /* VL16 */ 1687 case 0xa: /* VL32 */ 1688 case 0xb: /* VL64 */ 1689 case 0xc: /* VL128 */ 1690 case 0xd: /* VL256 */ 1691 bound = 16 << (pattern - 9); 1692 break; 1693 case 0x1d: /* MUL4 */ 1694 return elements - elements % 4; 1695 case 0x1e: /* MUL3 */ 1696 return elements - elements % 3; 1697 case 0x1f: /* ALL */ 1698 return elements; 1699 default: /* #uimm5 */ 1700 return 0; 1701 } 1702 return elements >= bound ? bound : 0; 1703 } 1704 1705 /* This handles all of the predicate initialization instructions, 1706 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32 1707 * so that decode_pred_count returns 0. For SETFFR, we will have 1708 * set RD == 16 == FFR. 1709 */ 1710 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag) 1711 { 1712 if (!sve_access_check(s)) { 1713 return true; 1714 } 1715 1716 unsigned fullsz = vec_full_reg_size(s); 1717 unsigned ofs = pred_full_reg_offset(s, rd); 1718 unsigned numelem, setsz, i; 1719 uint64_t word, lastword; 1720 TCGv_i64 t; 1721 1722 numelem = decode_pred_count(fullsz, pat, esz); 1723 1724 /* Determine what we must store into each bit, and how many. */ 1725 if (numelem == 0) { 1726 lastword = word = 0; 1727 setsz = fullsz; 1728 } else { 1729 setsz = numelem << esz; 1730 lastword = word = pred_esz_masks[esz]; 1731 if (setsz % 64) { 1732 lastword &= MAKE_64BIT_MASK(0, setsz % 64); 1733 } 1734 } 1735 1736 t = tcg_temp_new_i64(); 1737 if (fullsz <= 64) { 1738 tcg_gen_movi_i64(t, lastword); 1739 tcg_gen_st_i64(t, cpu_env, ofs); 1740 goto done; 1741 } 1742 1743 if (word == lastword) { 1744 unsigned maxsz = size_for_gvec(fullsz / 8); 1745 unsigned oprsz = size_for_gvec(setsz / 8); 1746 1747 if (oprsz * 8 == setsz) { 1748 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word); 1749 goto done; 1750 } 1751 } 1752 1753 setsz /= 8; 1754 fullsz /= 8; 1755 1756 tcg_gen_movi_i64(t, word); 1757 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) { 1758 tcg_gen_st_i64(t, cpu_env, ofs + i); 1759 } 1760 if (lastword != word) { 1761 tcg_gen_movi_i64(t, lastword); 1762 tcg_gen_st_i64(t, cpu_env, ofs + i); 1763 i += 8; 1764 } 1765 if (i < fullsz) { 1766 tcg_gen_movi_i64(t, 0); 1767 for (; i < fullsz; i += 8) { 1768 tcg_gen_st_i64(t, cpu_env, ofs + i); 1769 } 1770 } 1771 1772 done: 1773 /* PTRUES */ 1774 if (setflag) { 1775 tcg_gen_movi_i32(cpu_NF, -(word != 0)); 1776 tcg_gen_movi_i32(cpu_CF, word == 0); 1777 tcg_gen_movi_i32(cpu_VF, 0); 1778 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1779 } 1780 return true; 1781 } 1782 1783 TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s) 1784 1785 /* Note pat == 31 is #all, to set all elements. */ 1786 TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve, 1787 do_predset, 0, FFR_PRED_NUM, 31, false) 1788 1789 /* Note pat == 32 is #unimp, to set no elements. */ 1790 TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false) 1791 1792 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a) 1793 { 1794 /* The path through do_pppp_flags is complicated enough to want to avoid 1795 * duplication. Frob the arguments into the form of a predicated AND. 1796 */ 1797 arg_rprr_s alt_a = { 1798 .rd = a->rd, .pg = a->pg, .s = a->s, 1799 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM, 1800 }; 1801 1802 s->is_nonstreaming = true; 1803 return trans_AND_pppp(s, &alt_a); 1804 } 1805 1806 TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM) 1807 TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn) 1808 1809 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a, 1810 void (*gen_fn)(TCGv_i32, TCGv_ptr, 1811 TCGv_ptr, TCGv_i32)) 1812 { 1813 if (!sve_access_check(s)) { 1814 return true; 1815 } 1816 1817 TCGv_ptr t_pd = tcg_temp_new_ptr(); 1818 TCGv_ptr t_pg = tcg_temp_new_ptr(); 1819 TCGv_i32 t; 1820 unsigned desc = 0; 1821 1822 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 1823 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 1824 1825 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd)); 1826 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn)); 1827 t = tcg_temp_new_i32(); 1828 1829 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc)); 1830 1831 do_pred_flags(t); 1832 return true; 1833 } 1834 1835 TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst) 1836 TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext) 1837 1838 /* 1839 *** SVE Element Count Group 1840 */ 1841 1842 /* Perform an inline saturating addition of a 32-bit value within 1843 * a 64-bit register. The second operand is known to be positive, 1844 * which halves the comparisions we must perform to bound the result. 1845 */ 1846 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1847 { 1848 int64_t ibound; 1849 1850 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 1851 if (u) { 1852 tcg_gen_ext32u_i64(reg, reg); 1853 } else { 1854 tcg_gen_ext32s_i64(reg, reg); 1855 } 1856 if (d) { 1857 tcg_gen_sub_i64(reg, reg, val); 1858 ibound = (u ? 0 : INT32_MIN); 1859 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound)); 1860 } else { 1861 tcg_gen_add_i64(reg, reg, val); 1862 ibound = (u ? UINT32_MAX : INT32_MAX); 1863 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound)); 1864 } 1865 } 1866 1867 /* Similarly with 64-bit values. */ 1868 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1869 { 1870 TCGv_i64 t0 = tcg_temp_new_i64(); 1871 TCGv_i64 t2; 1872 1873 if (u) { 1874 if (d) { 1875 tcg_gen_sub_i64(t0, reg, val); 1876 t2 = tcg_constant_i64(0); 1877 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0); 1878 } else { 1879 tcg_gen_add_i64(t0, reg, val); 1880 t2 = tcg_constant_i64(-1); 1881 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0); 1882 } 1883 } else { 1884 TCGv_i64 t1 = tcg_temp_new_i64(); 1885 if (d) { 1886 /* Detect signed overflow for subtraction. */ 1887 tcg_gen_xor_i64(t0, reg, val); 1888 tcg_gen_sub_i64(t1, reg, val); 1889 tcg_gen_xor_i64(reg, reg, t1); 1890 tcg_gen_and_i64(t0, t0, reg); 1891 1892 /* Bound the result. */ 1893 tcg_gen_movi_i64(reg, INT64_MIN); 1894 t2 = tcg_constant_i64(0); 1895 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1); 1896 } else { 1897 /* Detect signed overflow for addition. */ 1898 tcg_gen_xor_i64(t0, reg, val); 1899 tcg_gen_add_i64(reg, reg, val); 1900 tcg_gen_xor_i64(t1, reg, val); 1901 tcg_gen_andc_i64(t0, t1, t0); 1902 1903 /* Bound the result. */ 1904 tcg_gen_movi_i64(t1, INT64_MAX); 1905 t2 = tcg_constant_i64(0); 1906 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg); 1907 } 1908 } 1909 } 1910 1911 /* Similarly with a vector and a scalar operand. */ 1912 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn, 1913 TCGv_i64 val, bool u, bool d) 1914 { 1915 unsigned vsz = vec_full_reg_size(s); 1916 TCGv_ptr dptr, nptr; 1917 TCGv_i32 t32, desc; 1918 TCGv_i64 t64; 1919 1920 dptr = tcg_temp_new_ptr(); 1921 nptr = tcg_temp_new_ptr(); 1922 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd)); 1923 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn)); 1924 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1925 1926 switch (esz) { 1927 case MO_8: 1928 t32 = tcg_temp_new_i32(); 1929 tcg_gen_extrl_i64_i32(t32, val); 1930 if (d) { 1931 tcg_gen_neg_i32(t32, t32); 1932 } 1933 if (u) { 1934 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc); 1935 } else { 1936 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc); 1937 } 1938 break; 1939 1940 case MO_16: 1941 t32 = tcg_temp_new_i32(); 1942 tcg_gen_extrl_i64_i32(t32, val); 1943 if (d) { 1944 tcg_gen_neg_i32(t32, t32); 1945 } 1946 if (u) { 1947 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc); 1948 } else { 1949 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc); 1950 } 1951 break; 1952 1953 case MO_32: 1954 t64 = tcg_temp_new_i64(); 1955 if (d) { 1956 tcg_gen_neg_i64(t64, val); 1957 } else { 1958 tcg_gen_mov_i64(t64, val); 1959 } 1960 if (u) { 1961 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc); 1962 } else { 1963 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc); 1964 } 1965 break; 1966 1967 case MO_64: 1968 if (u) { 1969 if (d) { 1970 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc); 1971 } else { 1972 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc); 1973 } 1974 } else if (d) { 1975 t64 = tcg_temp_new_i64(); 1976 tcg_gen_neg_i64(t64, val); 1977 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc); 1978 } else { 1979 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc); 1980 } 1981 break; 1982 1983 default: 1984 g_assert_not_reached(); 1985 } 1986 } 1987 1988 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a) 1989 { 1990 if (!dc_isar_feature(aa64_sve, s)) { 1991 return false; 1992 } 1993 if (sve_access_check(s)) { 1994 unsigned fullsz = vec_full_reg_size(s); 1995 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1996 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm); 1997 } 1998 return true; 1999 } 2000 2001 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a) 2002 { 2003 if (!dc_isar_feature(aa64_sve, s)) { 2004 return false; 2005 } 2006 if (sve_access_check(s)) { 2007 unsigned fullsz = vec_full_reg_size(s); 2008 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2009 int inc = numelem * a->imm * (a->d ? -1 : 1); 2010 TCGv_i64 reg = cpu_reg(s, a->rd); 2011 2012 tcg_gen_addi_i64(reg, reg, inc); 2013 } 2014 return true; 2015 } 2016 2017 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a) 2018 { 2019 if (!dc_isar_feature(aa64_sve, s)) { 2020 return false; 2021 } 2022 if (!sve_access_check(s)) { 2023 return true; 2024 } 2025 2026 unsigned fullsz = vec_full_reg_size(s); 2027 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2028 int inc = numelem * a->imm; 2029 TCGv_i64 reg = cpu_reg(s, a->rd); 2030 2031 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 2032 if (inc == 0) { 2033 if (a->u) { 2034 tcg_gen_ext32u_i64(reg, reg); 2035 } else { 2036 tcg_gen_ext32s_i64(reg, reg); 2037 } 2038 } else { 2039 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d); 2040 } 2041 return true; 2042 } 2043 2044 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a) 2045 { 2046 if (!dc_isar_feature(aa64_sve, s)) { 2047 return false; 2048 } 2049 if (!sve_access_check(s)) { 2050 return true; 2051 } 2052 2053 unsigned fullsz = vec_full_reg_size(s); 2054 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2055 int inc = numelem * a->imm; 2056 TCGv_i64 reg = cpu_reg(s, a->rd); 2057 2058 if (inc != 0) { 2059 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d); 2060 } 2061 return true; 2062 } 2063 2064 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 2065 { 2066 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2067 return false; 2068 } 2069 2070 unsigned fullsz = vec_full_reg_size(s); 2071 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2072 int inc = numelem * a->imm; 2073 2074 if (inc != 0) { 2075 if (sve_access_check(s)) { 2076 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd), 2077 vec_full_reg_offset(s, a->rn), 2078 tcg_constant_i64(a->d ? -inc : inc), 2079 fullsz, fullsz); 2080 } 2081 } else { 2082 do_mov_z(s, a->rd, a->rn); 2083 } 2084 return true; 2085 } 2086 2087 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 2088 { 2089 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2090 return false; 2091 } 2092 2093 unsigned fullsz = vec_full_reg_size(s); 2094 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2095 int inc = numelem * a->imm; 2096 2097 if (inc != 0) { 2098 if (sve_access_check(s)) { 2099 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 2100 tcg_constant_i64(inc), a->u, a->d); 2101 } 2102 } else { 2103 do_mov_z(s, a->rd, a->rn); 2104 } 2105 return true; 2106 } 2107 2108 /* 2109 *** SVE Bitwise Immediate Group 2110 */ 2111 2112 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn) 2113 { 2114 uint64_t imm; 2115 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2116 extract32(a->dbm, 0, 6), 2117 extract32(a->dbm, 6, 6))) { 2118 return false; 2119 } 2120 return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm); 2121 } 2122 2123 TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi) 2124 TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori) 2125 TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori) 2126 2127 static bool trans_DUPM(DisasContext *s, arg_DUPM *a) 2128 { 2129 uint64_t imm; 2130 2131 if (!dc_isar_feature(aa64_sve, s)) { 2132 return false; 2133 } 2134 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2135 extract32(a->dbm, 0, 6), 2136 extract32(a->dbm, 6, 6))) { 2137 return false; 2138 } 2139 if (sve_access_check(s)) { 2140 do_dupi_z(s, a->rd, imm); 2141 } 2142 return true; 2143 } 2144 2145 /* 2146 *** SVE Integer Wide Immediate - Predicated Group 2147 */ 2148 2149 /* Implement all merging copies. This is used for CPY (immediate), 2150 * FCPY, CPY (scalar), CPY (SIMD&FP scalar). 2151 */ 2152 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg, 2153 TCGv_i64 val) 2154 { 2155 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2156 static gen_cpy * const fns[4] = { 2157 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h, 2158 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d, 2159 }; 2160 unsigned vsz = vec_full_reg_size(s); 2161 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2162 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2163 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2164 TCGv_ptr t_pg = tcg_temp_new_ptr(); 2165 2166 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd)); 2167 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn)); 2168 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 2169 2170 fns[esz](t_zd, t_zn, t_pg, val, desc); 2171 } 2172 2173 static bool trans_FCPY(DisasContext *s, arg_FCPY *a) 2174 { 2175 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2176 return false; 2177 } 2178 if (sve_access_check(s)) { 2179 /* Decode the VFP immediate. */ 2180 uint64_t imm = vfp_expand_imm(a->esz, a->imm); 2181 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm)); 2182 } 2183 return true; 2184 } 2185 2186 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a) 2187 { 2188 if (!dc_isar_feature(aa64_sve, s)) { 2189 return false; 2190 } 2191 if (sve_access_check(s)) { 2192 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm)); 2193 } 2194 return true; 2195 } 2196 2197 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a) 2198 { 2199 static gen_helper_gvec_2i * const fns[4] = { 2200 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h, 2201 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d, 2202 }; 2203 2204 if (!dc_isar_feature(aa64_sve, s)) { 2205 return false; 2206 } 2207 if (sve_access_check(s)) { 2208 unsigned vsz = vec_full_reg_size(s); 2209 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 2210 pred_full_reg_offset(s, a->pg), 2211 tcg_constant_i64(a->imm), 2212 vsz, vsz, 0, fns[a->esz]); 2213 } 2214 return true; 2215 } 2216 2217 /* 2218 *** SVE Permute Extract Group 2219 */ 2220 2221 static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm) 2222 { 2223 if (!sve_access_check(s)) { 2224 return true; 2225 } 2226 2227 unsigned vsz = vec_full_reg_size(s); 2228 unsigned n_ofs = imm >= vsz ? 0 : imm; 2229 unsigned n_siz = vsz - n_ofs; 2230 unsigned d = vec_full_reg_offset(s, rd); 2231 unsigned n = vec_full_reg_offset(s, rn); 2232 unsigned m = vec_full_reg_offset(s, rm); 2233 2234 /* Use host vector move insns if we have appropriate sizes 2235 * and no unfortunate overlap. 2236 */ 2237 if (m != d 2238 && n_ofs == size_for_gvec(n_ofs) 2239 && n_siz == size_for_gvec(n_siz) 2240 && (d != n || n_siz <= n_ofs)) { 2241 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz); 2242 if (n_ofs != 0) { 2243 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs); 2244 } 2245 } else { 2246 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext); 2247 } 2248 return true; 2249 } 2250 2251 TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm) 2252 TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm) 2253 2254 /* 2255 *** SVE Permute - Unpredicated Group 2256 */ 2257 2258 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a) 2259 { 2260 if (!dc_isar_feature(aa64_sve, s)) { 2261 return false; 2262 } 2263 if (sve_access_check(s)) { 2264 unsigned vsz = vec_full_reg_size(s); 2265 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd), 2266 vsz, vsz, cpu_reg_sp(s, a->rn)); 2267 } 2268 return true; 2269 } 2270 2271 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a) 2272 { 2273 if (!dc_isar_feature(aa64_sve, s)) { 2274 return false; 2275 } 2276 if ((a->imm & 0x1f) == 0) { 2277 return false; 2278 } 2279 if (sve_access_check(s)) { 2280 unsigned vsz = vec_full_reg_size(s); 2281 unsigned dofs = vec_full_reg_offset(s, a->rd); 2282 unsigned esz, index; 2283 2284 esz = ctz32(a->imm); 2285 index = a->imm >> (esz + 1); 2286 2287 if ((index << esz) < vsz) { 2288 unsigned nofs = vec_reg_offset(s, a->rn, index, esz); 2289 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz); 2290 } else { 2291 /* 2292 * While dup_mem handles 128-bit elements, dup_imm does not. 2293 * Thankfully element size doesn't matter for splatting zero. 2294 */ 2295 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); 2296 } 2297 } 2298 return true; 2299 } 2300 2301 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val) 2302 { 2303 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2304 static gen_insr * const fns[4] = { 2305 gen_helper_sve_insr_b, gen_helper_sve_insr_h, 2306 gen_helper_sve_insr_s, gen_helper_sve_insr_d, 2307 }; 2308 unsigned vsz = vec_full_reg_size(s); 2309 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2310 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2311 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2312 2313 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd)); 2314 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); 2315 2316 fns[a->esz](t_zd, t_zn, val, desc); 2317 } 2318 2319 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a) 2320 { 2321 if (!dc_isar_feature(aa64_sve, s)) { 2322 return false; 2323 } 2324 if (sve_access_check(s)) { 2325 TCGv_i64 t = tcg_temp_new_i64(); 2326 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64)); 2327 do_insr_i64(s, a, t); 2328 } 2329 return true; 2330 } 2331 2332 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a) 2333 { 2334 if (!dc_isar_feature(aa64_sve, s)) { 2335 return false; 2336 } 2337 if (sve_access_check(s)) { 2338 do_insr_i64(s, a, cpu_reg(s, a->rm)); 2339 } 2340 return true; 2341 } 2342 2343 static gen_helper_gvec_2 * const rev_fns[4] = { 2344 gen_helper_sve_rev_b, gen_helper_sve_rev_h, 2345 gen_helper_sve_rev_s, gen_helper_sve_rev_d 2346 }; 2347 TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0) 2348 2349 static gen_helper_gvec_3 * const sve_tbl_fns[4] = { 2350 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h, 2351 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d 2352 }; 2353 TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0) 2354 2355 static gen_helper_gvec_4 * const sve2_tbl_fns[4] = { 2356 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h, 2357 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d 2358 }; 2359 TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz], 2360 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0) 2361 2362 static gen_helper_gvec_3 * const tbx_fns[4] = { 2363 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h, 2364 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d 2365 }; 2366 TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0) 2367 2368 static bool trans_UNPK(DisasContext *s, arg_UNPK *a) 2369 { 2370 static gen_helper_gvec_2 * const fns[4][2] = { 2371 { NULL, NULL }, 2372 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h }, 2373 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s }, 2374 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d }, 2375 }; 2376 2377 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2378 return false; 2379 } 2380 if (sve_access_check(s)) { 2381 unsigned vsz = vec_full_reg_size(s); 2382 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd), 2383 vec_full_reg_offset(s, a->rn) 2384 + (a->h ? vsz / 2 : 0), 2385 vsz, vsz, 0, fns[a->esz][a->u]); 2386 } 2387 return true; 2388 } 2389 2390 /* 2391 *** SVE Permute - Predicates Group 2392 */ 2393 2394 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd, 2395 gen_helper_gvec_3 *fn) 2396 { 2397 if (!sve_access_check(s)) { 2398 return true; 2399 } 2400 2401 unsigned vsz = pred_full_reg_size(s); 2402 2403 TCGv_ptr t_d = tcg_temp_new_ptr(); 2404 TCGv_ptr t_n = tcg_temp_new_ptr(); 2405 TCGv_ptr t_m = tcg_temp_new_ptr(); 2406 uint32_t desc = 0; 2407 2408 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2409 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2410 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2411 2412 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd)); 2413 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn)); 2414 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm)); 2415 2416 fn(t_d, t_n, t_m, tcg_constant_i32(desc)); 2417 return true; 2418 } 2419 2420 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd, 2421 gen_helper_gvec_2 *fn) 2422 { 2423 if (!sve_access_check(s)) { 2424 return true; 2425 } 2426 2427 unsigned vsz = pred_full_reg_size(s); 2428 TCGv_ptr t_d = tcg_temp_new_ptr(); 2429 TCGv_ptr t_n = tcg_temp_new_ptr(); 2430 uint32_t desc = 0; 2431 2432 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd)); 2433 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn)); 2434 2435 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2436 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2437 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2438 2439 fn(t_d, t_n, tcg_constant_i32(desc)); 2440 return true; 2441 } 2442 2443 TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p) 2444 TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p) 2445 TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p) 2446 TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p) 2447 TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p) 2448 TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p) 2449 2450 TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p) 2451 TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p) 2452 TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p) 2453 2454 /* 2455 *** SVE Permute - Interleaving Group 2456 */ 2457 2458 static gen_helper_gvec_3 * const zip_fns[4] = { 2459 gen_helper_sve_zip_b, gen_helper_sve_zip_h, 2460 gen_helper_sve_zip_s, gen_helper_sve_zip_d, 2461 }; 2462 TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2463 zip_fns[a->esz], a, 0) 2464 TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2465 zip_fns[a->esz], a, vec_full_reg_size(s) / 2) 2466 2467 TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2468 gen_helper_sve2_zip_q, a, 0) 2469 TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2470 gen_helper_sve2_zip_q, a, 2471 QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2) 2472 2473 static gen_helper_gvec_3 * const uzp_fns[4] = { 2474 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h, 2475 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d, 2476 }; 2477 2478 TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2479 uzp_fns[a->esz], a, 0) 2480 TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2481 uzp_fns[a->esz], a, 1 << a->esz) 2482 2483 TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2484 gen_helper_sve2_uzp_q, a, 0) 2485 TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2486 gen_helper_sve2_uzp_q, a, 16) 2487 2488 static gen_helper_gvec_3 * const trn_fns[4] = { 2489 gen_helper_sve_trn_b, gen_helper_sve_trn_h, 2490 gen_helper_sve_trn_s, gen_helper_sve_trn_d, 2491 }; 2492 2493 TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2494 trn_fns[a->esz], a, 0) 2495 TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2496 trn_fns[a->esz], a, 1 << a->esz) 2497 2498 TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2499 gen_helper_sve2_trn_q, a, 0) 2500 TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2501 gen_helper_sve2_trn_q, a, 16) 2502 2503 /* 2504 *** SVE Permute Vector - Predicated Group 2505 */ 2506 2507 static gen_helper_gvec_3 * const compact_fns[4] = { 2508 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d 2509 }; 2510 TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, 2511 compact_fns[a->esz], a, 0) 2512 2513 /* Call the helper that computes the ARM LastActiveElement pseudocode 2514 * function, scaled by the element size. This includes the not found 2515 * indication; e.g. not found for esz=3 is -8. 2516 */ 2517 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg) 2518 { 2519 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot 2520 * round up, as we do elsewhere, because we need the exact size. 2521 */ 2522 TCGv_ptr t_p = tcg_temp_new_ptr(); 2523 unsigned desc = 0; 2524 2525 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 2526 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 2527 2528 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg)); 2529 2530 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc)); 2531 } 2532 2533 /* Increment LAST to the offset of the next element in the vector, 2534 * wrapping around to 0. 2535 */ 2536 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz) 2537 { 2538 unsigned vsz = vec_full_reg_size(s); 2539 2540 tcg_gen_addi_i32(last, last, 1 << esz); 2541 if (is_power_of_2(vsz)) { 2542 tcg_gen_andi_i32(last, last, vsz - 1); 2543 } else { 2544 TCGv_i32 max = tcg_constant_i32(vsz); 2545 TCGv_i32 zero = tcg_constant_i32(0); 2546 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last); 2547 } 2548 } 2549 2550 /* If LAST < 0, set LAST to the offset of the last element in the vector. */ 2551 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz) 2552 { 2553 unsigned vsz = vec_full_reg_size(s); 2554 2555 if (is_power_of_2(vsz)) { 2556 tcg_gen_andi_i32(last, last, vsz - 1); 2557 } else { 2558 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz)); 2559 TCGv_i32 zero = tcg_constant_i32(0); 2560 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last); 2561 } 2562 } 2563 2564 /* Load an unsigned element of ESZ from BASE+OFS. */ 2565 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz) 2566 { 2567 TCGv_i64 r = tcg_temp_new_i64(); 2568 2569 switch (esz) { 2570 case 0: 2571 tcg_gen_ld8u_i64(r, base, ofs); 2572 break; 2573 case 1: 2574 tcg_gen_ld16u_i64(r, base, ofs); 2575 break; 2576 case 2: 2577 tcg_gen_ld32u_i64(r, base, ofs); 2578 break; 2579 case 3: 2580 tcg_gen_ld_i64(r, base, ofs); 2581 break; 2582 default: 2583 g_assert_not_reached(); 2584 } 2585 return r; 2586 } 2587 2588 /* Load an unsigned element of ESZ from RM[LAST]. */ 2589 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last, 2590 int rm, int esz) 2591 { 2592 TCGv_ptr p = tcg_temp_new_ptr(); 2593 2594 /* Convert offset into vector into offset into ENV. 2595 * The final adjustment for the vector register base 2596 * is added via constant offset to the load. 2597 */ 2598 #if HOST_BIG_ENDIAN 2599 /* Adjust for element ordering. See vec_reg_offset. */ 2600 if (esz < 3) { 2601 tcg_gen_xori_i32(last, last, 8 - (1 << esz)); 2602 } 2603 #endif 2604 tcg_gen_ext_i32_ptr(p, last); 2605 tcg_gen_add_ptr(p, p, cpu_env); 2606 2607 return load_esz(p, vec_full_reg_offset(s, rm), esz); 2608 } 2609 2610 /* Compute CLAST for a Zreg. */ 2611 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before) 2612 { 2613 TCGv_i32 last; 2614 TCGLabel *over; 2615 TCGv_i64 ele; 2616 unsigned vsz, esz = a->esz; 2617 2618 if (!sve_access_check(s)) { 2619 return true; 2620 } 2621 2622 last = tcg_temp_new_i32(); 2623 over = gen_new_label(); 2624 2625 find_last_active(s, last, esz, a->pg); 2626 2627 /* There is of course no movcond for a 2048-bit vector, 2628 * so we must branch over the actual store. 2629 */ 2630 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over); 2631 2632 if (!before) { 2633 incr_last_active(s, last, esz); 2634 } 2635 2636 ele = load_last_active(s, last, a->rm, esz); 2637 2638 vsz = vec_full_reg_size(s); 2639 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele); 2640 2641 /* If this insn used MOVPRFX, we may need a second move. */ 2642 if (a->rd != a->rn) { 2643 TCGLabel *done = gen_new_label(); 2644 tcg_gen_br(done); 2645 2646 gen_set_label(over); 2647 do_mov_z(s, a->rd, a->rn); 2648 2649 gen_set_label(done); 2650 } else { 2651 gen_set_label(over); 2652 } 2653 return true; 2654 } 2655 2656 TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false) 2657 TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true) 2658 2659 /* Compute CLAST for a scalar. */ 2660 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm, 2661 bool before, TCGv_i64 reg_val) 2662 { 2663 TCGv_i32 last = tcg_temp_new_i32(); 2664 TCGv_i64 ele, cmp; 2665 2666 find_last_active(s, last, esz, pg); 2667 2668 /* Extend the original value of last prior to incrementing. */ 2669 cmp = tcg_temp_new_i64(); 2670 tcg_gen_ext_i32_i64(cmp, last); 2671 2672 if (!before) { 2673 incr_last_active(s, last, esz); 2674 } 2675 2676 /* The conceit here is that while last < 0 indicates not found, after 2677 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address 2678 * from which we can load garbage. We then discard the garbage with 2679 * a conditional move. 2680 */ 2681 ele = load_last_active(s, last, rm, esz); 2682 2683 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0), 2684 ele, reg_val); 2685 } 2686 2687 /* Compute CLAST for a Vreg. */ 2688 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2689 { 2690 if (sve_access_check(s)) { 2691 int esz = a->esz; 2692 int ofs = vec_reg_offset(s, a->rd, 0, esz); 2693 TCGv_i64 reg = load_esz(cpu_env, ofs, esz); 2694 2695 do_clast_scalar(s, esz, a->pg, a->rn, before, reg); 2696 write_fp_dreg(s, a->rd, reg); 2697 } 2698 return true; 2699 } 2700 2701 TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false) 2702 TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true) 2703 2704 /* Compute CLAST for a Xreg. */ 2705 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before) 2706 { 2707 TCGv_i64 reg; 2708 2709 if (!sve_access_check(s)) { 2710 return true; 2711 } 2712 2713 reg = cpu_reg(s, a->rd); 2714 switch (a->esz) { 2715 case 0: 2716 tcg_gen_ext8u_i64(reg, reg); 2717 break; 2718 case 1: 2719 tcg_gen_ext16u_i64(reg, reg); 2720 break; 2721 case 2: 2722 tcg_gen_ext32u_i64(reg, reg); 2723 break; 2724 case 3: 2725 break; 2726 default: 2727 g_assert_not_reached(); 2728 } 2729 2730 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg); 2731 return true; 2732 } 2733 2734 TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false) 2735 TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true) 2736 2737 /* Compute LAST for a scalar. */ 2738 static TCGv_i64 do_last_scalar(DisasContext *s, int esz, 2739 int pg, int rm, bool before) 2740 { 2741 TCGv_i32 last = tcg_temp_new_i32(); 2742 2743 find_last_active(s, last, esz, pg); 2744 if (before) { 2745 wrap_last_active(s, last, esz); 2746 } else { 2747 incr_last_active(s, last, esz); 2748 } 2749 2750 return load_last_active(s, last, rm, esz); 2751 } 2752 2753 /* Compute LAST for a Vreg. */ 2754 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2755 { 2756 if (sve_access_check(s)) { 2757 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2758 write_fp_dreg(s, a->rd, val); 2759 } 2760 return true; 2761 } 2762 2763 TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false) 2764 TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true) 2765 2766 /* Compute LAST for a Xreg. */ 2767 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before) 2768 { 2769 if (sve_access_check(s)) { 2770 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2771 tcg_gen_mov_i64(cpu_reg(s, a->rd), val); 2772 } 2773 return true; 2774 } 2775 2776 TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false) 2777 TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true) 2778 2779 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a) 2780 { 2781 if (!dc_isar_feature(aa64_sve, s)) { 2782 return false; 2783 } 2784 if (sve_access_check(s)) { 2785 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn)); 2786 } 2787 return true; 2788 } 2789 2790 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a) 2791 { 2792 if (!dc_isar_feature(aa64_sve, s)) { 2793 return false; 2794 } 2795 if (sve_access_check(s)) { 2796 int ofs = vec_reg_offset(s, a->rn, 0, a->esz); 2797 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz); 2798 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t); 2799 } 2800 return true; 2801 } 2802 2803 static gen_helper_gvec_3 * const revb_fns[4] = { 2804 NULL, gen_helper_sve_revb_h, 2805 gen_helper_sve_revb_s, gen_helper_sve_revb_d, 2806 }; 2807 TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0) 2808 2809 static gen_helper_gvec_3 * const revh_fns[4] = { 2810 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d, 2811 }; 2812 TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0) 2813 2814 TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz, 2815 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0) 2816 2817 TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0) 2818 2819 TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz, 2820 gen_helper_sve_splice, a, a->esz) 2821 2822 TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice, 2823 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz) 2824 2825 /* 2826 *** SVE Integer Compare - Vectors Group 2827 */ 2828 2829 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a, 2830 gen_helper_gvec_flags_4 *gen_fn) 2831 { 2832 TCGv_ptr pd, zn, zm, pg; 2833 unsigned vsz; 2834 TCGv_i32 t; 2835 2836 if (gen_fn == NULL) { 2837 return false; 2838 } 2839 if (!sve_access_check(s)) { 2840 return true; 2841 } 2842 2843 vsz = vec_full_reg_size(s); 2844 t = tcg_temp_new_i32(); 2845 pd = tcg_temp_new_ptr(); 2846 zn = tcg_temp_new_ptr(); 2847 zm = tcg_temp_new_ptr(); 2848 pg = tcg_temp_new_ptr(); 2849 2850 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd)); 2851 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn)); 2852 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm)); 2853 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg)); 2854 2855 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0))); 2856 2857 do_pred_flags(t); 2858 return true; 2859 } 2860 2861 #define DO_PPZZ(NAME, name) \ 2862 static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \ 2863 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \ 2864 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \ 2865 }; \ 2866 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \ 2867 a, name##_ppzz_fns[a->esz]) 2868 2869 DO_PPZZ(CMPEQ, cmpeq) 2870 DO_PPZZ(CMPNE, cmpne) 2871 DO_PPZZ(CMPGT, cmpgt) 2872 DO_PPZZ(CMPGE, cmpge) 2873 DO_PPZZ(CMPHI, cmphi) 2874 DO_PPZZ(CMPHS, cmphs) 2875 2876 #undef DO_PPZZ 2877 2878 #define DO_PPZW(NAME, name) \ 2879 static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \ 2880 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \ 2881 gen_helper_sve_##name##_ppzw_s, NULL \ 2882 }; \ 2883 TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \ 2884 a, name##_ppzw_fns[a->esz]) 2885 2886 DO_PPZW(CMPEQ, cmpeq) 2887 DO_PPZW(CMPNE, cmpne) 2888 DO_PPZW(CMPGT, cmpgt) 2889 DO_PPZW(CMPGE, cmpge) 2890 DO_PPZW(CMPHI, cmphi) 2891 DO_PPZW(CMPHS, cmphs) 2892 DO_PPZW(CMPLT, cmplt) 2893 DO_PPZW(CMPLE, cmple) 2894 DO_PPZW(CMPLO, cmplo) 2895 DO_PPZW(CMPLS, cmpls) 2896 2897 #undef DO_PPZW 2898 2899 /* 2900 *** SVE Integer Compare - Immediate Groups 2901 */ 2902 2903 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a, 2904 gen_helper_gvec_flags_3 *gen_fn) 2905 { 2906 TCGv_ptr pd, zn, pg; 2907 unsigned vsz; 2908 TCGv_i32 t; 2909 2910 if (gen_fn == NULL) { 2911 return false; 2912 } 2913 if (!sve_access_check(s)) { 2914 return true; 2915 } 2916 2917 vsz = vec_full_reg_size(s); 2918 t = tcg_temp_new_i32(); 2919 pd = tcg_temp_new_ptr(); 2920 zn = tcg_temp_new_ptr(); 2921 pg = tcg_temp_new_ptr(); 2922 2923 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd)); 2924 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn)); 2925 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg)); 2926 2927 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm))); 2928 2929 do_pred_flags(t); 2930 return true; 2931 } 2932 2933 #define DO_PPZI(NAME, name) \ 2934 static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \ 2935 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \ 2936 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \ 2937 }; \ 2938 TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \ 2939 name##_ppzi_fns[a->esz]) 2940 2941 DO_PPZI(CMPEQ, cmpeq) 2942 DO_PPZI(CMPNE, cmpne) 2943 DO_PPZI(CMPGT, cmpgt) 2944 DO_PPZI(CMPGE, cmpge) 2945 DO_PPZI(CMPHI, cmphi) 2946 DO_PPZI(CMPHS, cmphs) 2947 DO_PPZI(CMPLT, cmplt) 2948 DO_PPZI(CMPLE, cmple) 2949 DO_PPZI(CMPLO, cmplo) 2950 DO_PPZI(CMPLS, cmpls) 2951 2952 #undef DO_PPZI 2953 2954 /* 2955 *** SVE Partition Break Group 2956 */ 2957 2958 static bool do_brk3(DisasContext *s, arg_rprr_s *a, 2959 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s) 2960 { 2961 if (!sve_access_check(s)) { 2962 return true; 2963 } 2964 2965 unsigned vsz = pred_full_reg_size(s); 2966 2967 /* Predicate sizes may be smaller and cannot use simd_desc. */ 2968 TCGv_ptr d = tcg_temp_new_ptr(); 2969 TCGv_ptr n = tcg_temp_new_ptr(); 2970 TCGv_ptr m = tcg_temp_new_ptr(); 2971 TCGv_ptr g = tcg_temp_new_ptr(); 2972 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 2973 2974 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd)); 2975 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn)); 2976 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm)); 2977 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg)); 2978 2979 if (a->s) { 2980 TCGv_i32 t = tcg_temp_new_i32(); 2981 fn_s(t, d, n, m, g, desc); 2982 do_pred_flags(t); 2983 } else { 2984 fn(d, n, m, g, desc); 2985 } 2986 return true; 2987 } 2988 2989 static bool do_brk2(DisasContext *s, arg_rpr_s *a, 2990 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s) 2991 { 2992 if (!sve_access_check(s)) { 2993 return true; 2994 } 2995 2996 unsigned vsz = pred_full_reg_size(s); 2997 2998 /* Predicate sizes may be smaller and cannot use simd_desc. */ 2999 TCGv_ptr d = tcg_temp_new_ptr(); 3000 TCGv_ptr n = tcg_temp_new_ptr(); 3001 TCGv_ptr g = tcg_temp_new_ptr(); 3002 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 3003 3004 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd)); 3005 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn)); 3006 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg)); 3007 3008 if (a->s) { 3009 TCGv_i32 t = tcg_temp_new_i32(); 3010 fn_s(t, d, n, g, desc); 3011 do_pred_flags(t); 3012 } else { 3013 fn(d, n, g, desc); 3014 } 3015 return true; 3016 } 3017 3018 TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a, 3019 gen_helper_sve_brkpa, gen_helper_sve_brkpas) 3020 TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a, 3021 gen_helper_sve_brkpb, gen_helper_sve_brkpbs) 3022 3023 TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a, 3024 gen_helper_sve_brka_m, gen_helper_sve_brkas_m) 3025 TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a, 3026 gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m) 3027 3028 TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a, 3029 gen_helper_sve_brka_z, gen_helper_sve_brkas_z) 3030 TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a, 3031 gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z) 3032 3033 TRANS_FEAT(BRKN, aa64_sve, do_brk2, a, 3034 gen_helper_sve_brkn, gen_helper_sve_brkns) 3035 3036 /* 3037 *** SVE Predicate Count Group 3038 */ 3039 3040 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg) 3041 { 3042 unsigned psz = pred_full_reg_size(s); 3043 3044 if (psz <= 8) { 3045 uint64_t psz_mask; 3046 3047 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn)); 3048 if (pn != pg) { 3049 TCGv_i64 g = tcg_temp_new_i64(); 3050 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg)); 3051 tcg_gen_and_i64(val, val, g); 3052 } 3053 3054 /* Reduce the pred_esz_masks value simply to reduce the 3055 * size of the code generated here. 3056 */ 3057 psz_mask = MAKE_64BIT_MASK(0, psz * 8); 3058 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask); 3059 3060 tcg_gen_ctpop_i64(val, val); 3061 } else { 3062 TCGv_ptr t_pn = tcg_temp_new_ptr(); 3063 TCGv_ptr t_pg = tcg_temp_new_ptr(); 3064 unsigned desc = 0; 3065 3066 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz); 3067 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 3068 3069 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn)); 3070 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 3071 3072 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc)); 3073 } 3074 } 3075 3076 static bool trans_CNTP(DisasContext *s, arg_CNTP *a) 3077 { 3078 if (!dc_isar_feature(aa64_sve, s)) { 3079 return false; 3080 } 3081 if (sve_access_check(s)) { 3082 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg); 3083 } 3084 return true; 3085 } 3086 3087 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a) 3088 { 3089 if (!dc_isar_feature(aa64_sve, s)) { 3090 return false; 3091 } 3092 if (sve_access_check(s)) { 3093 TCGv_i64 reg = cpu_reg(s, a->rd); 3094 TCGv_i64 val = tcg_temp_new_i64(); 3095 3096 do_cntp(s, val, a->esz, a->pg, a->pg); 3097 if (a->d) { 3098 tcg_gen_sub_i64(reg, reg, val); 3099 } else { 3100 tcg_gen_add_i64(reg, reg, val); 3101 } 3102 } 3103 return true; 3104 } 3105 3106 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3107 { 3108 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3109 return false; 3110 } 3111 if (sve_access_check(s)) { 3112 unsigned vsz = vec_full_reg_size(s); 3113 TCGv_i64 val = tcg_temp_new_i64(); 3114 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds; 3115 3116 do_cntp(s, val, a->esz, a->pg, a->pg); 3117 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 3118 vec_full_reg_offset(s, a->rn), val, vsz, vsz); 3119 } 3120 return true; 3121 } 3122 3123 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a) 3124 { 3125 if (!dc_isar_feature(aa64_sve, s)) { 3126 return false; 3127 } 3128 if (sve_access_check(s)) { 3129 TCGv_i64 reg = cpu_reg(s, a->rd); 3130 TCGv_i64 val = tcg_temp_new_i64(); 3131 3132 do_cntp(s, val, a->esz, a->pg, a->pg); 3133 do_sat_addsub_32(reg, val, a->u, a->d); 3134 } 3135 return true; 3136 } 3137 3138 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a) 3139 { 3140 if (!dc_isar_feature(aa64_sve, s)) { 3141 return false; 3142 } 3143 if (sve_access_check(s)) { 3144 TCGv_i64 reg = cpu_reg(s, a->rd); 3145 TCGv_i64 val = tcg_temp_new_i64(); 3146 3147 do_cntp(s, val, a->esz, a->pg, a->pg); 3148 do_sat_addsub_64(reg, val, a->u, a->d); 3149 } 3150 return true; 3151 } 3152 3153 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3154 { 3155 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3156 return false; 3157 } 3158 if (sve_access_check(s)) { 3159 TCGv_i64 val = tcg_temp_new_i64(); 3160 do_cntp(s, val, a->esz, a->pg, a->pg); 3161 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d); 3162 } 3163 return true; 3164 } 3165 3166 /* 3167 *** SVE Integer Compare Scalars Group 3168 */ 3169 3170 static bool trans_CTERM(DisasContext *s, arg_CTERM *a) 3171 { 3172 if (!dc_isar_feature(aa64_sve, s)) { 3173 return false; 3174 } 3175 if (!sve_access_check(s)) { 3176 return true; 3177 } 3178 3179 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ); 3180 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf); 3181 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf); 3182 TCGv_i64 cmp = tcg_temp_new_i64(); 3183 3184 tcg_gen_setcond_i64(cond, cmp, rn, rm); 3185 tcg_gen_extrl_i64_i32(cpu_NF, cmp); 3186 3187 /* VF = !NF & !CF. */ 3188 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1); 3189 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF); 3190 3191 /* Both NF and VF actually look at bit 31. */ 3192 tcg_gen_neg_i32(cpu_NF, cpu_NF); 3193 tcg_gen_neg_i32(cpu_VF, cpu_VF); 3194 return true; 3195 } 3196 3197 static bool trans_WHILE(DisasContext *s, arg_WHILE *a) 3198 { 3199 TCGv_i64 op0, op1, t0, t1, tmax; 3200 TCGv_i32 t2; 3201 TCGv_ptr ptr; 3202 unsigned vsz = vec_full_reg_size(s); 3203 unsigned desc = 0; 3204 TCGCond cond; 3205 uint64_t maxval; 3206 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */ 3207 bool eq = a->eq == a->lt; 3208 3209 /* The greater-than conditions are all SVE2. */ 3210 if (a->lt 3211 ? !dc_isar_feature(aa64_sve, s) 3212 : !dc_isar_feature(aa64_sve2, s)) { 3213 return false; 3214 } 3215 if (!sve_access_check(s)) { 3216 return true; 3217 } 3218 3219 op0 = read_cpu_reg(s, a->rn, 1); 3220 op1 = read_cpu_reg(s, a->rm, 1); 3221 3222 if (!a->sf) { 3223 if (a->u) { 3224 tcg_gen_ext32u_i64(op0, op0); 3225 tcg_gen_ext32u_i64(op1, op1); 3226 } else { 3227 tcg_gen_ext32s_i64(op0, op0); 3228 tcg_gen_ext32s_i64(op1, op1); 3229 } 3230 } 3231 3232 /* For the helper, compress the different conditions into a computation 3233 * of how many iterations for which the condition is true. 3234 */ 3235 t0 = tcg_temp_new_i64(); 3236 t1 = tcg_temp_new_i64(); 3237 3238 if (a->lt) { 3239 tcg_gen_sub_i64(t0, op1, op0); 3240 if (a->u) { 3241 maxval = a->sf ? UINT64_MAX : UINT32_MAX; 3242 cond = eq ? TCG_COND_LEU : TCG_COND_LTU; 3243 } else { 3244 maxval = a->sf ? INT64_MAX : INT32_MAX; 3245 cond = eq ? TCG_COND_LE : TCG_COND_LT; 3246 } 3247 } else { 3248 tcg_gen_sub_i64(t0, op0, op1); 3249 if (a->u) { 3250 maxval = 0; 3251 cond = eq ? TCG_COND_GEU : TCG_COND_GTU; 3252 } else { 3253 maxval = a->sf ? INT64_MIN : INT32_MIN; 3254 cond = eq ? TCG_COND_GE : TCG_COND_GT; 3255 } 3256 } 3257 3258 tmax = tcg_constant_i64(vsz >> a->esz); 3259 if (eq) { 3260 /* Equality means one more iteration. */ 3261 tcg_gen_addi_i64(t0, t0, 1); 3262 3263 /* 3264 * For the less-than while, if op1 is maxval (and the only time 3265 * the addition above could overflow), then we produce an all-true 3266 * predicate by setting the count to the vector length. This is 3267 * because the pseudocode is described as an increment + compare 3268 * loop, and the maximum integer would always compare true. 3269 * Similarly, the greater-than while has the same issue with the 3270 * minimum integer due to the decrement + compare loop. 3271 */ 3272 tcg_gen_movi_i64(t1, maxval); 3273 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0); 3274 } 3275 3276 /* Bound to the maximum. */ 3277 tcg_gen_umin_i64(t0, t0, tmax); 3278 3279 /* Set the count to zero if the condition is false. */ 3280 tcg_gen_movi_i64(t1, 0); 3281 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1); 3282 3283 /* Since we're bounded, pass as a 32-bit type. */ 3284 t2 = tcg_temp_new_i32(); 3285 tcg_gen_extrl_i64_i32(t2, t0); 3286 3287 /* Scale elements to bits. */ 3288 tcg_gen_shli_i32(t2, t2, a->esz); 3289 3290 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3291 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3292 3293 ptr = tcg_temp_new_ptr(); 3294 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd)); 3295 3296 if (a->lt) { 3297 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); 3298 } else { 3299 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc)); 3300 } 3301 do_pred_flags(t2); 3302 return true; 3303 } 3304 3305 static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) 3306 { 3307 TCGv_i64 op0, op1, diff, t1, tmax; 3308 TCGv_i32 t2; 3309 TCGv_ptr ptr; 3310 unsigned vsz = vec_full_reg_size(s); 3311 unsigned desc = 0; 3312 3313 if (!dc_isar_feature(aa64_sve2, s)) { 3314 return false; 3315 } 3316 if (!sve_access_check(s)) { 3317 return true; 3318 } 3319 3320 op0 = read_cpu_reg(s, a->rn, 1); 3321 op1 = read_cpu_reg(s, a->rm, 1); 3322 3323 tmax = tcg_constant_i64(vsz); 3324 diff = tcg_temp_new_i64(); 3325 3326 if (a->rw) { 3327 /* WHILERW */ 3328 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */ 3329 t1 = tcg_temp_new_i64(); 3330 tcg_gen_sub_i64(diff, op0, op1); 3331 tcg_gen_sub_i64(t1, op1, op0); 3332 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1); 3333 /* Round down to a multiple of ESIZE. */ 3334 tcg_gen_andi_i64(diff, diff, -1 << a->esz); 3335 /* If op1 == op0, diff == 0, and the condition is always true. */ 3336 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff); 3337 } else { 3338 /* WHILEWR */ 3339 tcg_gen_sub_i64(diff, op1, op0); 3340 /* Round down to a multiple of ESIZE. */ 3341 tcg_gen_andi_i64(diff, diff, -1 << a->esz); 3342 /* If op0 >= op1, diff <= 0, the condition is always true. */ 3343 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff); 3344 } 3345 3346 /* Bound to the maximum. */ 3347 tcg_gen_umin_i64(diff, diff, tmax); 3348 3349 /* Since we're bounded, pass as a 32-bit type. */ 3350 t2 = tcg_temp_new_i32(); 3351 tcg_gen_extrl_i64_i32(t2, diff); 3352 3353 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3354 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3355 3356 ptr = tcg_temp_new_ptr(); 3357 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd)); 3358 3359 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); 3360 do_pred_flags(t2); 3361 return true; 3362 } 3363 3364 /* 3365 *** SVE Integer Wide Immediate - Unpredicated Group 3366 */ 3367 3368 static bool trans_FDUP(DisasContext *s, arg_FDUP *a) 3369 { 3370 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3371 return false; 3372 } 3373 if (sve_access_check(s)) { 3374 unsigned vsz = vec_full_reg_size(s); 3375 int dofs = vec_full_reg_offset(s, a->rd); 3376 uint64_t imm; 3377 3378 /* Decode the VFP immediate. */ 3379 imm = vfp_expand_imm(a->esz, a->imm); 3380 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm); 3381 } 3382 return true; 3383 } 3384 3385 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a) 3386 { 3387 if (!dc_isar_feature(aa64_sve, s)) { 3388 return false; 3389 } 3390 if (sve_access_check(s)) { 3391 unsigned vsz = vec_full_reg_size(s); 3392 int dofs = vec_full_reg_offset(s, a->rd); 3393 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm); 3394 } 3395 return true; 3396 } 3397 3398 TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a) 3399 3400 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a) 3401 { 3402 a->imm = -a->imm; 3403 return trans_ADD_zzi(s, a); 3404 } 3405 3406 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a) 3407 { 3408 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 }; 3409 static const GVecGen2s op[4] = { 3410 { .fni8 = tcg_gen_vec_sub8_i64, 3411 .fniv = tcg_gen_sub_vec, 3412 .fno = gen_helper_sve_subri_b, 3413 .opt_opc = vecop_list, 3414 .vece = MO_8, 3415 .scalar_first = true }, 3416 { .fni8 = tcg_gen_vec_sub16_i64, 3417 .fniv = tcg_gen_sub_vec, 3418 .fno = gen_helper_sve_subri_h, 3419 .opt_opc = vecop_list, 3420 .vece = MO_16, 3421 .scalar_first = true }, 3422 { .fni4 = tcg_gen_sub_i32, 3423 .fniv = tcg_gen_sub_vec, 3424 .fno = gen_helper_sve_subri_s, 3425 .opt_opc = vecop_list, 3426 .vece = MO_32, 3427 .scalar_first = true }, 3428 { .fni8 = tcg_gen_sub_i64, 3429 .fniv = tcg_gen_sub_vec, 3430 .fno = gen_helper_sve_subri_d, 3431 .opt_opc = vecop_list, 3432 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 3433 .vece = MO_64, 3434 .scalar_first = true } 3435 }; 3436 3437 if (!dc_isar_feature(aa64_sve, s)) { 3438 return false; 3439 } 3440 if (sve_access_check(s)) { 3441 unsigned vsz = vec_full_reg_size(s); 3442 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd), 3443 vec_full_reg_offset(s, a->rn), 3444 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]); 3445 } 3446 return true; 3447 } 3448 3449 TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a) 3450 3451 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d) 3452 { 3453 if (sve_access_check(s)) { 3454 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 3455 tcg_constant_i64(a->imm), u, d); 3456 } 3457 return true; 3458 } 3459 3460 TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false) 3461 TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false) 3462 TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true) 3463 TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true) 3464 3465 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn) 3466 { 3467 if (sve_access_check(s)) { 3468 unsigned vsz = vec_full_reg_size(s); 3469 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 3470 vec_full_reg_offset(s, a->rn), 3471 tcg_constant_i64(a->imm), vsz, vsz, 0, fn); 3472 } 3473 return true; 3474 } 3475 3476 #define DO_ZZI(NAME, name) \ 3477 static gen_helper_gvec_2i * const name##i_fns[4] = { \ 3478 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \ 3479 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \ 3480 }; \ 3481 TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz]) 3482 3483 DO_ZZI(SMAX, smax) 3484 DO_ZZI(UMAX, umax) 3485 DO_ZZI(SMIN, smin) 3486 DO_ZZI(UMIN, umin) 3487 3488 #undef DO_ZZI 3489 3490 static gen_helper_gvec_4 * const dot_fns[2][2] = { 3491 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h }, 3492 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h } 3493 }; 3494 TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz, 3495 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0) 3496 3497 /* 3498 * SVE Multiply - Indexed 3499 */ 3500 3501 TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, 3502 gen_helper_gvec_sdot_idx_b, a) 3503 TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, 3504 gen_helper_gvec_sdot_idx_h, a) 3505 TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, 3506 gen_helper_gvec_udot_idx_b, a) 3507 TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, 3508 gen_helper_gvec_udot_idx_h, a) 3509 3510 TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3511 gen_helper_gvec_sudot_idx_b, a) 3512 TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3513 gen_helper_gvec_usdot_idx_b, a) 3514 3515 #define DO_SVE2_RRX(NAME, FUNC) \ 3516 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3517 a->rd, a->rn, a->rm, a->index) 3518 3519 DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h) 3520 DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s) 3521 DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d) 3522 3523 DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h) 3524 DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s) 3525 DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d) 3526 3527 DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h) 3528 DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s) 3529 DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d) 3530 3531 #undef DO_SVE2_RRX 3532 3533 #define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \ 3534 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3535 a->rd, a->rn, a->rm, (a->index << 1) | TOP) 3536 3537 DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false) 3538 DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false) 3539 DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true) 3540 DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true) 3541 3542 DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false) 3543 DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false) 3544 DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true) 3545 DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true) 3546 3547 DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false) 3548 DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false) 3549 DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true) 3550 DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true) 3551 3552 #undef DO_SVE2_RRX_TB 3553 3554 #define DO_SVE2_RRXR(NAME, FUNC) \ 3555 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a) 3556 3557 DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h) 3558 DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s) 3559 DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d) 3560 3561 DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h) 3562 DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s) 3563 DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d) 3564 3565 DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h) 3566 DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s) 3567 DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d) 3568 3569 DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h) 3570 DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s) 3571 DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d) 3572 3573 #undef DO_SVE2_RRXR 3574 3575 #define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \ 3576 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3577 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP) 3578 3579 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false) 3580 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false) 3581 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true) 3582 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true) 3583 3584 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false) 3585 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false) 3586 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true) 3587 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true) 3588 3589 DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false) 3590 DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false) 3591 DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true) 3592 DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true) 3593 3594 DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false) 3595 DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false) 3596 DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true) 3597 DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true) 3598 3599 DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false) 3600 DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false) 3601 DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true) 3602 DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true) 3603 3604 DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false) 3605 DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false) 3606 DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true) 3607 DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true) 3608 3609 #undef DO_SVE2_RRXR_TB 3610 3611 #define DO_SVE2_RRXR_ROT(NAME, FUNC) \ 3612 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3613 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot) 3614 3615 DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h) 3616 DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s) 3617 3618 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h) 3619 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s) 3620 3621 DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s) 3622 DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) 3623 3624 #undef DO_SVE2_RRXR_ROT 3625 3626 /* 3627 *** SVE Floating Point Multiply-Add Indexed Group 3628 */ 3629 3630 static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) 3631 { 3632 static gen_helper_gvec_4_ptr * const fns[4] = { 3633 NULL, 3634 gen_helper_gvec_fmla_idx_h, 3635 gen_helper_gvec_fmla_idx_s, 3636 gen_helper_gvec_fmla_idx_d, 3637 }; 3638 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, 3639 (a->index << 1) | sub, 3640 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3641 } 3642 3643 TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false) 3644 TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true) 3645 3646 /* 3647 *** SVE Floating Point Multiply Indexed Group 3648 */ 3649 3650 static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = { 3651 NULL, gen_helper_gvec_fmul_idx_h, 3652 gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d, 3653 }; 3654 TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz, 3655 fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index, 3656 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3657 3658 /* 3659 *** SVE Floating Point Fast Reduction Group 3660 */ 3661 3662 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr, 3663 TCGv_ptr, TCGv_i32); 3664 3665 static bool do_reduce(DisasContext *s, arg_rpr_esz *a, 3666 gen_helper_fp_reduce *fn) 3667 { 3668 unsigned vsz, p2vsz; 3669 TCGv_i32 t_desc; 3670 TCGv_ptr t_zn, t_pg, status; 3671 TCGv_i64 temp; 3672 3673 if (fn == NULL) { 3674 return false; 3675 } 3676 if (!sve_access_check(s)) { 3677 return true; 3678 } 3679 3680 vsz = vec_full_reg_size(s); 3681 p2vsz = pow2ceil(vsz); 3682 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz)); 3683 temp = tcg_temp_new_i64(); 3684 t_zn = tcg_temp_new_ptr(); 3685 t_pg = tcg_temp_new_ptr(); 3686 3687 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); 3688 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); 3689 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3690 3691 fn(temp, t_zn, t_pg, status, t_desc); 3692 3693 write_fp_dreg(s, a->rd, temp); 3694 return true; 3695 } 3696 3697 #define DO_VPZ(NAME, name) \ 3698 static gen_helper_fp_reduce * const name##_fns[4] = { \ 3699 NULL, gen_helper_sve_##name##_h, \ 3700 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3701 }; \ 3702 TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz]) 3703 3704 DO_VPZ(FADDV, faddv) 3705 DO_VPZ(FMINNMV, fminnmv) 3706 DO_VPZ(FMAXNMV, fmaxnmv) 3707 DO_VPZ(FMINV, fminv) 3708 DO_VPZ(FMAXV, fmaxv) 3709 3710 #undef DO_VPZ 3711 3712 /* 3713 *** SVE Floating Point Unary Operations - Unpredicated Group 3714 */ 3715 3716 static gen_helper_gvec_2_ptr * const frecpe_fns[] = { 3717 NULL, gen_helper_gvec_frecpe_h, 3718 gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, 3719 }; 3720 TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0) 3721 3722 static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { 3723 NULL, gen_helper_gvec_frsqrte_h, 3724 gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, 3725 }; 3726 TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0) 3727 3728 /* 3729 *** SVE Floating Point Compare with Zero Group 3730 */ 3731 3732 static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a, 3733 gen_helper_gvec_3_ptr *fn) 3734 { 3735 if (fn == NULL) { 3736 return false; 3737 } 3738 if (sve_access_check(s)) { 3739 unsigned vsz = vec_full_reg_size(s); 3740 TCGv_ptr status = 3741 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3742 3743 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd), 3744 vec_full_reg_offset(s, a->rn), 3745 pred_full_reg_offset(s, a->pg), 3746 status, vsz, vsz, 0, fn); 3747 } 3748 return true; 3749 } 3750 3751 #define DO_PPZ(NAME, name) \ 3752 static gen_helper_gvec_3_ptr * const name##_fns[] = { \ 3753 NULL, gen_helper_sve_##name##_h, \ 3754 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3755 }; \ 3756 TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz]) 3757 3758 DO_PPZ(FCMGE_ppz0, fcmge0) 3759 DO_PPZ(FCMGT_ppz0, fcmgt0) 3760 DO_PPZ(FCMLE_ppz0, fcmle0) 3761 DO_PPZ(FCMLT_ppz0, fcmlt0) 3762 DO_PPZ(FCMEQ_ppz0, fcmeq0) 3763 DO_PPZ(FCMNE_ppz0, fcmne0) 3764 3765 #undef DO_PPZ 3766 3767 /* 3768 *** SVE floating-point trig multiply-add coefficient 3769 */ 3770 3771 static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { 3772 NULL, gen_helper_sve_ftmad_h, 3773 gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d, 3774 }; 3775 TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, 3776 ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm, 3777 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3778 3779 /* 3780 *** SVE Floating Point Accumulating Reduction Group 3781 */ 3782 3783 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) 3784 { 3785 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr, 3786 TCGv_ptr, TCGv_ptr, TCGv_i32); 3787 static fadda_fn * const fns[3] = { 3788 gen_helper_sve_fadda_h, 3789 gen_helper_sve_fadda_s, 3790 gen_helper_sve_fadda_d, 3791 }; 3792 unsigned vsz = vec_full_reg_size(s); 3793 TCGv_ptr t_rm, t_pg, t_fpst; 3794 TCGv_i64 t_val; 3795 TCGv_i32 t_desc; 3796 3797 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3798 return false; 3799 } 3800 s->is_nonstreaming = true; 3801 if (!sve_access_check(s)) { 3802 return true; 3803 } 3804 3805 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz); 3806 t_rm = tcg_temp_new_ptr(); 3807 t_pg = tcg_temp_new_ptr(); 3808 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm)); 3809 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); 3810 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3811 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 3812 3813 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc); 3814 3815 write_fp_dreg(s, a->rd, t_val); 3816 return true; 3817 } 3818 3819 /* 3820 *** SVE Floating Point Arithmetic - Unpredicated Group 3821 */ 3822 3823 #define DO_FP3(NAME, name) \ 3824 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ 3825 NULL, gen_helper_gvec_##name##_h, \ 3826 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ 3827 }; \ 3828 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0) 3829 3830 DO_FP3(FADD_zzz, fadd) 3831 DO_FP3(FSUB_zzz, fsub) 3832 DO_FP3(FMUL_zzz, fmul) 3833 DO_FP3(FRECPS, recps) 3834 DO_FP3(FRSQRTS, rsqrts) 3835 3836 #undef DO_FP3 3837 3838 static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = { 3839 NULL, gen_helper_gvec_ftsmul_h, 3840 gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d 3841 }; 3842 TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, 3843 ftsmul_fns[a->esz], a, 0) 3844 3845 /* 3846 *** SVE Floating Point Arithmetic - Predicated Group 3847 */ 3848 3849 #define DO_ZPZZ_FP(NAME, FEAT, name) \ 3850 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ 3851 NULL, gen_helper_##name##_h, \ 3852 gen_helper_##name##_s, gen_helper_##name##_d \ 3853 }; \ 3854 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) 3855 3856 DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd) 3857 DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub) 3858 DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul) 3859 DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin) 3860 DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax) 3861 DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) 3862 DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) 3863 DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) 3864 DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn) 3865 DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv) 3866 DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx) 3867 3868 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr, 3869 TCGv_i64, TCGv_ptr, TCGv_i32); 3870 3871 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16, 3872 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn) 3873 { 3874 unsigned vsz = vec_full_reg_size(s); 3875 TCGv_ptr t_zd, t_zn, t_pg, status; 3876 TCGv_i32 desc; 3877 3878 t_zd = tcg_temp_new_ptr(); 3879 t_zn = tcg_temp_new_ptr(); 3880 t_pg = tcg_temp_new_ptr(); 3881 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd)); 3882 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn)); 3883 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 3884 3885 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 3886 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 3887 fn(t_zd, t_zn, t_pg, scalar, status, desc); 3888 } 3889 3890 static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm, 3891 gen_helper_sve_fp2scalar *fn) 3892 { 3893 if (fn == NULL) { 3894 return false; 3895 } 3896 if (sve_access_check(s)) { 3897 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, 3898 tcg_constant_i64(imm), fn); 3899 } 3900 return true; 3901 } 3902 3903 #define DO_FP_IMM(NAME, name, const0, const1) \ 3904 static gen_helper_sve_fp2scalar * const name##_fns[4] = { \ 3905 NULL, gen_helper_sve_##name##_h, \ 3906 gen_helper_sve_##name##_s, \ 3907 gen_helper_sve_##name##_d \ 3908 }; \ 3909 static uint64_t const name##_const[4][2] = { \ 3910 { -1, -1 }, \ 3911 { float16_##const0, float16_##const1 }, \ 3912 { float32_##const0, float32_##const1 }, \ 3913 { float64_##const0, float64_##const1 }, \ 3914 }; \ 3915 TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ 3916 name##_const[a->esz][a->imm], name##_fns[a->esz]) 3917 3918 DO_FP_IMM(FADD, fadds, half, one) 3919 DO_FP_IMM(FSUB, fsubs, half, one) 3920 DO_FP_IMM(FMUL, fmuls, half, two) 3921 DO_FP_IMM(FSUBR, fsubrs, half, one) 3922 DO_FP_IMM(FMAXNM, fmaxnms, zero, one) 3923 DO_FP_IMM(FMINNM, fminnms, zero, one) 3924 DO_FP_IMM(FMAX, fmaxs, zero, one) 3925 DO_FP_IMM(FMIN, fmins, zero, one) 3926 3927 #undef DO_FP_IMM 3928 3929 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a, 3930 gen_helper_gvec_4_ptr *fn) 3931 { 3932 if (fn == NULL) { 3933 return false; 3934 } 3935 if (sve_access_check(s)) { 3936 unsigned vsz = vec_full_reg_size(s); 3937 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3938 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd), 3939 vec_full_reg_offset(s, a->rn), 3940 vec_full_reg_offset(s, a->rm), 3941 pred_full_reg_offset(s, a->pg), 3942 status, vsz, vsz, 0, fn); 3943 } 3944 return true; 3945 } 3946 3947 #define DO_FPCMP(NAME, name) \ 3948 static gen_helper_gvec_4_ptr * const name##_fns[4] = { \ 3949 NULL, gen_helper_sve_##name##_h, \ 3950 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 3951 }; \ 3952 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz]) 3953 3954 DO_FPCMP(FCMGE, fcmge) 3955 DO_FPCMP(FCMGT, fcmgt) 3956 DO_FPCMP(FCMEQ, fcmeq) 3957 DO_FPCMP(FCMNE, fcmne) 3958 DO_FPCMP(FCMUO, fcmuo) 3959 DO_FPCMP(FACGE, facge) 3960 DO_FPCMP(FACGT, facgt) 3961 3962 #undef DO_FPCMP 3963 3964 static gen_helper_gvec_4_ptr * const fcadd_fns[] = { 3965 NULL, gen_helper_sve_fcadd_h, 3966 gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d, 3967 }; 3968 TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], 3969 a->rd, a->rn, a->rm, a->pg, a->rot, 3970 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3971 3972 #define DO_FMLA(NAME, name) \ 3973 static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ 3974 NULL, gen_helper_sve_##name##_h, \ 3975 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 3976 }; \ 3977 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \ 3978 a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ 3979 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3980 3981 DO_FMLA(FMLA_zpzzz, fmla_zpzzz) 3982 DO_FMLA(FMLS_zpzzz, fmls_zpzzz) 3983 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz) 3984 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz) 3985 3986 #undef DO_FMLA 3987 3988 static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { 3989 NULL, gen_helper_sve_fcmla_zpzzz_h, 3990 gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, 3991 }; 3992 TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], 3993 a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, 3994 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3995 3996 static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { 3997 NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL 3998 }; 3999 TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz], 4000 a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot, 4001 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4002 4003 /* 4004 *** SVE Floating Point Unary Operations Predicated Group 4005 */ 4006 4007 TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4008 gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR) 4009 TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4010 gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR) 4011 4012 TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 4013 gen_helper_sve_bfcvt, a, 0, FPST_FPCR) 4014 4015 TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4016 gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR) 4017 TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4018 gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR) 4019 TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4020 gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR) 4021 TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4022 gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR) 4023 4024 TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4025 gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16) 4026 TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4027 gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16) 4028 TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4029 gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16) 4030 TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4031 gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16) 4032 TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4033 gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16) 4034 TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4035 gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16) 4036 4037 TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4038 gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR) 4039 TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4040 gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR) 4041 TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4042 gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR) 4043 TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4044 gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR) 4045 TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4046 gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR) 4047 TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4048 gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR) 4049 4050 TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4051 gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR) 4052 TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4053 gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR) 4054 4055 static gen_helper_gvec_3_ptr * const frint_fns[] = { 4056 NULL, 4057 gen_helper_sve_frint_h, 4058 gen_helper_sve_frint_s, 4059 gen_helper_sve_frint_d 4060 }; 4061 TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz], 4062 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4063 4064 static gen_helper_gvec_3_ptr * const frintx_fns[] = { 4065 NULL, 4066 gen_helper_sve_frintx_h, 4067 gen_helper_sve_frintx_s, 4068 gen_helper_sve_frintx_d 4069 }; 4070 TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz], 4071 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4072 4073 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, 4074 ARMFPRounding mode, gen_helper_gvec_3_ptr *fn) 4075 { 4076 unsigned vsz; 4077 TCGv_i32 tmode; 4078 TCGv_ptr status; 4079 4080 if (fn == NULL) { 4081 return false; 4082 } 4083 if (!sve_access_check(s)) { 4084 return true; 4085 } 4086 4087 vsz = vec_full_reg_size(s); 4088 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4089 tmode = gen_set_rmode(mode, status); 4090 4091 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 4092 vec_full_reg_offset(s, a->rn), 4093 pred_full_reg_offset(s, a->pg), 4094 status, vsz, vsz, 0, fn); 4095 4096 gen_restore_rmode(tmode, status); 4097 return true; 4098 } 4099 4100 TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a, 4101 FPROUNDING_TIEEVEN, frint_fns[a->esz]) 4102 TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a, 4103 FPROUNDING_POSINF, frint_fns[a->esz]) 4104 TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a, 4105 FPROUNDING_NEGINF, frint_fns[a->esz]) 4106 TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a, 4107 FPROUNDING_ZERO, frint_fns[a->esz]) 4108 TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a, 4109 FPROUNDING_TIEAWAY, frint_fns[a->esz]) 4110 4111 static gen_helper_gvec_3_ptr * const frecpx_fns[] = { 4112 NULL, gen_helper_sve_frecpx_h, 4113 gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d, 4114 }; 4115 TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz], 4116 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4117 4118 static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { 4119 NULL, gen_helper_sve_fsqrt_h, 4120 gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d, 4121 }; 4122 TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz], 4123 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4124 4125 TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4126 gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16) 4127 TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4128 gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16) 4129 TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4130 gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16) 4131 4132 TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4133 gen_helper_sve_scvt_ss, a, 0, FPST_FPCR) 4134 TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4135 gen_helper_sve_scvt_ds, a, 0, FPST_FPCR) 4136 4137 TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4138 gen_helper_sve_scvt_sd, a, 0, FPST_FPCR) 4139 TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4140 gen_helper_sve_scvt_dd, a, 0, FPST_FPCR) 4141 4142 TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4143 gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16) 4144 TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4145 gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16) 4146 TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4147 gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16) 4148 4149 TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4150 gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR) 4151 TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4152 gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR) 4153 TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4154 gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR) 4155 4156 TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4157 gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR) 4158 4159 /* 4160 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group 4161 */ 4162 4163 /* Subroutine loading a vector register at VOFS of LEN bytes. 4164 * The load should begin at the address Rn + IMM. 4165 */ 4166 4167 void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, 4168 int len, int rn, int imm) 4169 { 4170 int len_align = QEMU_ALIGN_DOWN(len, 16); 4171 int len_remain = len % 16; 4172 int nparts = len / 16 + ctpop8(len_remain); 4173 int midx = get_mem_index(s); 4174 TCGv_i64 dirty_addr, clean_addr, t0, t1; 4175 TCGv_i128 t16; 4176 4177 dirty_addr = tcg_temp_new_i64(); 4178 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4179 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len); 4180 4181 /* 4182 * Note that unpredicated load/store of vector/predicate registers 4183 * are defined as a stream of bytes, which equates to little-endian 4184 * operations on larger quantities. 4185 * Attempt to keep code expansion to a minimum by limiting the 4186 * amount of unrolling done. 4187 */ 4188 if (nparts <= 4) { 4189 int i; 4190 4191 t0 = tcg_temp_new_i64(); 4192 t1 = tcg_temp_new_i64(); 4193 t16 = tcg_temp_new_i128(); 4194 4195 for (i = 0; i < len_align; i += 16) { 4196 tcg_gen_qemu_ld_i128(t16, clean_addr, midx, 4197 MO_LE | MO_128 | MO_ATOM_NONE); 4198 tcg_gen_extr_i128_i64(t0, t1, t16); 4199 tcg_gen_st_i64(t0, base, vofs + i); 4200 tcg_gen_st_i64(t1, base, vofs + i + 8); 4201 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4202 } 4203 } else { 4204 TCGLabel *loop = gen_new_label(); 4205 TCGv_ptr tp, i = tcg_temp_new_ptr(); 4206 4207 tcg_gen_movi_ptr(i, 0); 4208 gen_set_label(loop); 4209 4210 t16 = tcg_temp_new_i128(); 4211 tcg_gen_qemu_ld_i128(t16, clean_addr, midx, 4212 MO_LE | MO_128 | MO_ATOM_NONE); 4213 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4214 4215 tp = tcg_temp_new_ptr(); 4216 tcg_gen_add_ptr(tp, base, i); 4217 tcg_gen_addi_ptr(i, i, 16); 4218 4219 t0 = tcg_temp_new_i64(); 4220 t1 = tcg_temp_new_i64(); 4221 tcg_gen_extr_i128_i64(t0, t1, t16); 4222 4223 tcg_gen_st_i64(t0, tp, vofs); 4224 tcg_gen_st_i64(t1, tp, vofs + 8); 4225 4226 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4227 } 4228 4229 /* 4230 * Predicate register loads can be any multiple of 2. 4231 * Note that we still store the entire 64-bit unit into cpu_env. 4232 */ 4233 if (len_remain >= 8) { 4234 t0 = tcg_temp_new_i64(); 4235 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE); 4236 tcg_gen_st_i64(t0, base, vofs + len_align); 4237 len_remain -= 8; 4238 len_align += 8; 4239 if (len_remain) { 4240 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4241 } 4242 } 4243 if (len_remain) { 4244 t0 = tcg_temp_new_i64(); 4245 switch (len_remain) { 4246 case 2: 4247 case 4: 4248 case 8: 4249 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, 4250 MO_LE | ctz32(len_remain) | MO_ATOM_NONE); 4251 break; 4252 4253 case 6: 4254 t1 = tcg_temp_new_i64(); 4255 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE); 4256 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4257 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); 4258 tcg_gen_deposit_i64(t0, t0, t1, 32, 32); 4259 break; 4260 4261 default: 4262 g_assert_not_reached(); 4263 } 4264 tcg_gen_st_i64(t0, base, vofs + len_align); 4265 } 4266 } 4267 4268 /* Similarly for stores. */ 4269 void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, 4270 int len, int rn, int imm) 4271 { 4272 int len_align = QEMU_ALIGN_DOWN(len, 16); 4273 int len_remain = len % 16; 4274 int nparts = len / 16 + ctpop8(len_remain); 4275 int midx = get_mem_index(s); 4276 TCGv_i64 dirty_addr, clean_addr, t0, t1; 4277 TCGv_i128 t16; 4278 4279 dirty_addr = tcg_temp_new_i64(); 4280 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4281 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len); 4282 4283 /* Note that unpredicated load/store of vector/predicate registers 4284 * are defined as a stream of bytes, which equates to little-endian 4285 * operations on larger quantities. There is no nice way to force 4286 * a little-endian store for aarch64_be-linux-user out of line. 4287 * 4288 * Attempt to keep code expansion to a minimum by limiting the 4289 * amount of unrolling done. 4290 */ 4291 if (nparts <= 4) { 4292 int i; 4293 4294 t0 = tcg_temp_new_i64(); 4295 t1 = tcg_temp_new_i64(); 4296 t16 = tcg_temp_new_i128(); 4297 for (i = 0; i < len_align; i += 8) { 4298 tcg_gen_ld_i64(t0, base, vofs + i); 4299 tcg_gen_ld_i64(t1, base, vofs + i + 8); 4300 tcg_gen_concat_i64_i128(t16, t0, t1); 4301 tcg_gen_qemu_st_i128(t16, clean_addr, midx, 4302 MO_LE | MO_128 | MO_ATOM_NONE); 4303 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4304 } 4305 } else { 4306 TCGLabel *loop = gen_new_label(); 4307 TCGv_ptr tp, i = tcg_temp_new_ptr(); 4308 4309 tcg_gen_movi_ptr(i, 0); 4310 gen_set_label(loop); 4311 4312 t0 = tcg_temp_new_i64(); 4313 t1 = tcg_temp_new_i64(); 4314 tp = tcg_temp_new_ptr(); 4315 tcg_gen_add_ptr(tp, base, i); 4316 tcg_gen_ld_i64(t0, tp, vofs); 4317 tcg_gen_ld_i64(t1, tp, vofs + 8); 4318 tcg_gen_addi_ptr(i, i, 16); 4319 4320 t16 = tcg_temp_new_i128(); 4321 tcg_gen_concat_i64_i128(t16, t0, t1); 4322 4323 tcg_gen_qemu_st_i128(t16, clean_addr, midx, MO_LEUQ); 4324 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4325 4326 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4327 } 4328 4329 /* Predicate register stores can be any multiple of 2. */ 4330 if (len_remain >= 8) { 4331 t0 = tcg_temp_new_i64(); 4332 tcg_gen_st_i64(t0, base, vofs + len_align); 4333 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE); 4334 len_remain -= 8; 4335 len_align += 8; 4336 if (len_remain) { 4337 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4338 } 4339 } 4340 if (len_remain) { 4341 t0 = tcg_temp_new_i64(); 4342 tcg_gen_ld_i64(t0, base, vofs + len_align); 4343 4344 switch (len_remain) { 4345 case 2: 4346 case 4: 4347 case 8: 4348 tcg_gen_qemu_st_i64(t0, clean_addr, midx, 4349 MO_LE | ctz32(len_remain) | MO_ATOM_NONE); 4350 break; 4351 4352 case 6: 4353 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE); 4354 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4355 tcg_gen_shri_i64(t0, t0, 32); 4356 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); 4357 break; 4358 4359 default: 4360 g_assert_not_reached(); 4361 } 4362 } 4363 } 4364 4365 static bool trans_LDR_zri(DisasContext *s, arg_rri *a) 4366 { 4367 if (!dc_isar_feature(aa64_sve, s)) { 4368 return false; 4369 } 4370 if (sve_access_check(s)) { 4371 int size = vec_full_reg_size(s); 4372 int off = vec_full_reg_offset(s, a->rd); 4373 gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size); 4374 } 4375 return true; 4376 } 4377 4378 static bool trans_LDR_pri(DisasContext *s, arg_rri *a) 4379 { 4380 if (!dc_isar_feature(aa64_sve, s)) { 4381 return false; 4382 } 4383 if (sve_access_check(s)) { 4384 int size = pred_full_reg_size(s); 4385 int off = pred_full_reg_offset(s, a->rd); 4386 gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size); 4387 } 4388 return true; 4389 } 4390 4391 static bool trans_STR_zri(DisasContext *s, arg_rri *a) 4392 { 4393 if (!dc_isar_feature(aa64_sve, s)) { 4394 return false; 4395 } 4396 if (sve_access_check(s)) { 4397 int size = vec_full_reg_size(s); 4398 int off = vec_full_reg_offset(s, a->rd); 4399 gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size); 4400 } 4401 return true; 4402 } 4403 4404 static bool trans_STR_pri(DisasContext *s, arg_rri *a) 4405 { 4406 if (!dc_isar_feature(aa64_sve, s)) { 4407 return false; 4408 } 4409 if (sve_access_check(s)) { 4410 int size = pred_full_reg_size(s); 4411 int off = pred_full_reg_offset(s, a->rd); 4412 gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size); 4413 } 4414 return true; 4415 } 4416 4417 /* 4418 *** SVE Memory - Contiguous Load Group 4419 */ 4420 4421 /* The memory mode of the dtype. */ 4422 static const MemOp dtype_mop[16] = { 4423 MO_UB, MO_UB, MO_UB, MO_UB, 4424 MO_SL, MO_UW, MO_UW, MO_UW, 4425 MO_SW, MO_SW, MO_UL, MO_UL, 4426 MO_SB, MO_SB, MO_SB, MO_UQ 4427 }; 4428 4429 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE) 4430 4431 /* The vector element size of dtype. */ 4432 static const uint8_t dtype_esz[16] = { 4433 0, 1, 2, 3, 4434 3, 1, 2, 3, 4435 3, 2, 2, 3, 4436 3, 2, 1, 3 4437 }; 4438 4439 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 4440 int dtype, uint32_t mte_n, bool is_write, 4441 gen_helper_gvec_mem *fn) 4442 { 4443 unsigned vsz = vec_full_reg_size(s); 4444 TCGv_ptr t_pg; 4445 int desc = 0; 4446 4447 /* 4448 * For e.g. LD4, there are not enough arguments to pass all 4 4449 * registers as pointers, so encode the regno into the data field. 4450 * For consistency, do this even for LD1. 4451 */ 4452 if (s->mte_active[0]) { 4453 int msz = dtype_msz(dtype); 4454 4455 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 4456 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4457 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4458 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 4459 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1); 4460 desc <<= SVE_MTEDESC_SHIFT; 4461 } else { 4462 addr = clean_data_tbi(s, addr); 4463 } 4464 4465 desc = simd_desc(vsz, vsz, zt | desc); 4466 t_pg = tcg_temp_new_ptr(); 4467 4468 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 4469 fn(cpu_env, t_pg, addr, tcg_constant_i32(desc)); 4470 } 4471 4472 /* Indexed by [mte][be][dtype][nreg] */ 4473 static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = { 4474 { /* mte inactive, little-endian */ 4475 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4476 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4477 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4478 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4479 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4480 4481 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL }, 4482 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r, 4483 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r }, 4484 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL }, 4485 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL }, 4486 4487 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL }, 4488 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL }, 4489 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r, 4490 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r }, 4491 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL }, 4492 4493 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4494 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4495 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4496 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, 4497 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, 4498 4499 /* mte inactive, big-endian */ 4500 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4501 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4502 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4503 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4504 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4505 4506 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL }, 4507 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r, 4508 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r }, 4509 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL }, 4510 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL }, 4511 4512 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL }, 4513 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL }, 4514 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r, 4515 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r }, 4516 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL }, 4517 4518 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4519 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4520 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4521 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, 4522 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } }, 4523 4524 { /* mte active, little-endian */ 4525 { { gen_helper_sve_ld1bb_r_mte, 4526 gen_helper_sve_ld2bb_r_mte, 4527 gen_helper_sve_ld3bb_r_mte, 4528 gen_helper_sve_ld4bb_r_mte }, 4529 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 4530 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 4531 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 4532 4533 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL }, 4534 { gen_helper_sve_ld1hh_le_r_mte, 4535 gen_helper_sve_ld2hh_le_r_mte, 4536 gen_helper_sve_ld3hh_le_r_mte, 4537 gen_helper_sve_ld4hh_le_r_mte }, 4538 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL }, 4539 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL }, 4540 4541 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL }, 4542 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL }, 4543 { gen_helper_sve_ld1ss_le_r_mte, 4544 gen_helper_sve_ld2ss_le_r_mte, 4545 gen_helper_sve_ld3ss_le_r_mte, 4546 gen_helper_sve_ld4ss_le_r_mte }, 4547 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL }, 4548 4549 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 4550 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 4551 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 4552 { gen_helper_sve_ld1dd_le_r_mte, 4553 gen_helper_sve_ld2dd_le_r_mte, 4554 gen_helper_sve_ld3dd_le_r_mte, 4555 gen_helper_sve_ld4dd_le_r_mte } }, 4556 4557 /* mte active, big-endian */ 4558 { { gen_helper_sve_ld1bb_r_mte, 4559 gen_helper_sve_ld2bb_r_mte, 4560 gen_helper_sve_ld3bb_r_mte, 4561 gen_helper_sve_ld4bb_r_mte }, 4562 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 4563 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 4564 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 4565 4566 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL }, 4567 { gen_helper_sve_ld1hh_be_r_mte, 4568 gen_helper_sve_ld2hh_be_r_mte, 4569 gen_helper_sve_ld3hh_be_r_mte, 4570 gen_helper_sve_ld4hh_be_r_mte }, 4571 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL }, 4572 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL }, 4573 4574 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL }, 4575 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL }, 4576 { gen_helper_sve_ld1ss_be_r_mte, 4577 gen_helper_sve_ld2ss_be_r_mte, 4578 gen_helper_sve_ld3ss_be_r_mte, 4579 gen_helper_sve_ld4ss_be_r_mte }, 4580 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL }, 4581 4582 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 4583 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 4584 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 4585 { gen_helper_sve_ld1dd_be_r_mte, 4586 gen_helper_sve_ld2dd_be_r_mte, 4587 gen_helper_sve_ld3dd_be_r_mte, 4588 gen_helper_sve_ld4dd_be_r_mte } } }, 4589 }; 4590 4591 static void do_ld_zpa(DisasContext *s, int zt, int pg, 4592 TCGv_i64 addr, int dtype, int nreg) 4593 { 4594 gen_helper_gvec_mem *fn 4595 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg]; 4596 4597 /* 4598 * While there are holes in the table, they are not 4599 * accessible via the instruction encoding. 4600 */ 4601 assert(fn != NULL); 4602 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn); 4603 } 4604 4605 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) 4606 { 4607 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 4608 return false; 4609 } 4610 if (sve_access_check(s)) { 4611 TCGv_i64 addr = tcg_temp_new_i64(); 4612 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4613 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4614 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 4615 } 4616 return true; 4617 } 4618 4619 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a) 4620 { 4621 if (!dc_isar_feature(aa64_sve, s)) { 4622 return false; 4623 } 4624 if (sve_access_check(s)) { 4625 int vsz = vec_full_reg_size(s); 4626 int elements = vsz >> dtype_esz[a->dtype]; 4627 TCGv_i64 addr = tcg_temp_new_i64(); 4628 4629 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 4630 (a->imm * elements * (a->nreg + 1)) 4631 << dtype_msz(a->dtype)); 4632 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 4633 } 4634 return true; 4635 } 4636 4637 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a) 4638 { 4639 static gen_helper_gvec_mem * const fns[2][2][16] = { 4640 { /* mte inactive, little-endian */ 4641 { gen_helper_sve_ldff1bb_r, 4642 gen_helper_sve_ldff1bhu_r, 4643 gen_helper_sve_ldff1bsu_r, 4644 gen_helper_sve_ldff1bdu_r, 4645 4646 gen_helper_sve_ldff1sds_le_r, 4647 gen_helper_sve_ldff1hh_le_r, 4648 gen_helper_sve_ldff1hsu_le_r, 4649 gen_helper_sve_ldff1hdu_le_r, 4650 4651 gen_helper_sve_ldff1hds_le_r, 4652 gen_helper_sve_ldff1hss_le_r, 4653 gen_helper_sve_ldff1ss_le_r, 4654 gen_helper_sve_ldff1sdu_le_r, 4655 4656 gen_helper_sve_ldff1bds_r, 4657 gen_helper_sve_ldff1bss_r, 4658 gen_helper_sve_ldff1bhs_r, 4659 gen_helper_sve_ldff1dd_le_r }, 4660 4661 /* mte inactive, big-endian */ 4662 { gen_helper_sve_ldff1bb_r, 4663 gen_helper_sve_ldff1bhu_r, 4664 gen_helper_sve_ldff1bsu_r, 4665 gen_helper_sve_ldff1bdu_r, 4666 4667 gen_helper_sve_ldff1sds_be_r, 4668 gen_helper_sve_ldff1hh_be_r, 4669 gen_helper_sve_ldff1hsu_be_r, 4670 gen_helper_sve_ldff1hdu_be_r, 4671 4672 gen_helper_sve_ldff1hds_be_r, 4673 gen_helper_sve_ldff1hss_be_r, 4674 gen_helper_sve_ldff1ss_be_r, 4675 gen_helper_sve_ldff1sdu_be_r, 4676 4677 gen_helper_sve_ldff1bds_r, 4678 gen_helper_sve_ldff1bss_r, 4679 gen_helper_sve_ldff1bhs_r, 4680 gen_helper_sve_ldff1dd_be_r } }, 4681 4682 { /* mte active, little-endian */ 4683 { gen_helper_sve_ldff1bb_r_mte, 4684 gen_helper_sve_ldff1bhu_r_mte, 4685 gen_helper_sve_ldff1bsu_r_mte, 4686 gen_helper_sve_ldff1bdu_r_mte, 4687 4688 gen_helper_sve_ldff1sds_le_r_mte, 4689 gen_helper_sve_ldff1hh_le_r_mte, 4690 gen_helper_sve_ldff1hsu_le_r_mte, 4691 gen_helper_sve_ldff1hdu_le_r_mte, 4692 4693 gen_helper_sve_ldff1hds_le_r_mte, 4694 gen_helper_sve_ldff1hss_le_r_mte, 4695 gen_helper_sve_ldff1ss_le_r_mte, 4696 gen_helper_sve_ldff1sdu_le_r_mte, 4697 4698 gen_helper_sve_ldff1bds_r_mte, 4699 gen_helper_sve_ldff1bss_r_mte, 4700 gen_helper_sve_ldff1bhs_r_mte, 4701 gen_helper_sve_ldff1dd_le_r_mte }, 4702 4703 /* mte active, big-endian */ 4704 { gen_helper_sve_ldff1bb_r_mte, 4705 gen_helper_sve_ldff1bhu_r_mte, 4706 gen_helper_sve_ldff1bsu_r_mte, 4707 gen_helper_sve_ldff1bdu_r_mte, 4708 4709 gen_helper_sve_ldff1sds_be_r_mte, 4710 gen_helper_sve_ldff1hh_be_r_mte, 4711 gen_helper_sve_ldff1hsu_be_r_mte, 4712 gen_helper_sve_ldff1hdu_be_r_mte, 4713 4714 gen_helper_sve_ldff1hds_be_r_mte, 4715 gen_helper_sve_ldff1hss_be_r_mte, 4716 gen_helper_sve_ldff1ss_be_r_mte, 4717 gen_helper_sve_ldff1sdu_be_r_mte, 4718 4719 gen_helper_sve_ldff1bds_r_mte, 4720 gen_helper_sve_ldff1bss_r_mte, 4721 gen_helper_sve_ldff1bhs_r_mte, 4722 gen_helper_sve_ldff1dd_be_r_mte } }, 4723 }; 4724 4725 if (!dc_isar_feature(aa64_sve, s)) { 4726 return false; 4727 } 4728 s->is_nonstreaming = true; 4729 if (sve_access_check(s)) { 4730 TCGv_i64 addr = tcg_temp_new_i64(); 4731 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4732 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4733 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 4734 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 4735 } 4736 return true; 4737 } 4738 4739 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) 4740 { 4741 static gen_helper_gvec_mem * const fns[2][2][16] = { 4742 { /* mte inactive, little-endian */ 4743 { gen_helper_sve_ldnf1bb_r, 4744 gen_helper_sve_ldnf1bhu_r, 4745 gen_helper_sve_ldnf1bsu_r, 4746 gen_helper_sve_ldnf1bdu_r, 4747 4748 gen_helper_sve_ldnf1sds_le_r, 4749 gen_helper_sve_ldnf1hh_le_r, 4750 gen_helper_sve_ldnf1hsu_le_r, 4751 gen_helper_sve_ldnf1hdu_le_r, 4752 4753 gen_helper_sve_ldnf1hds_le_r, 4754 gen_helper_sve_ldnf1hss_le_r, 4755 gen_helper_sve_ldnf1ss_le_r, 4756 gen_helper_sve_ldnf1sdu_le_r, 4757 4758 gen_helper_sve_ldnf1bds_r, 4759 gen_helper_sve_ldnf1bss_r, 4760 gen_helper_sve_ldnf1bhs_r, 4761 gen_helper_sve_ldnf1dd_le_r }, 4762 4763 /* mte inactive, big-endian */ 4764 { gen_helper_sve_ldnf1bb_r, 4765 gen_helper_sve_ldnf1bhu_r, 4766 gen_helper_sve_ldnf1bsu_r, 4767 gen_helper_sve_ldnf1bdu_r, 4768 4769 gen_helper_sve_ldnf1sds_be_r, 4770 gen_helper_sve_ldnf1hh_be_r, 4771 gen_helper_sve_ldnf1hsu_be_r, 4772 gen_helper_sve_ldnf1hdu_be_r, 4773 4774 gen_helper_sve_ldnf1hds_be_r, 4775 gen_helper_sve_ldnf1hss_be_r, 4776 gen_helper_sve_ldnf1ss_be_r, 4777 gen_helper_sve_ldnf1sdu_be_r, 4778 4779 gen_helper_sve_ldnf1bds_r, 4780 gen_helper_sve_ldnf1bss_r, 4781 gen_helper_sve_ldnf1bhs_r, 4782 gen_helper_sve_ldnf1dd_be_r } }, 4783 4784 { /* mte inactive, little-endian */ 4785 { gen_helper_sve_ldnf1bb_r_mte, 4786 gen_helper_sve_ldnf1bhu_r_mte, 4787 gen_helper_sve_ldnf1bsu_r_mte, 4788 gen_helper_sve_ldnf1bdu_r_mte, 4789 4790 gen_helper_sve_ldnf1sds_le_r_mte, 4791 gen_helper_sve_ldnf1hh_le_r_mte, 4792 gen_helper_sve_ldnf1hsu_le_r_mte, 4793 gen_helper_sve_ldnf1hdu_le_r_mte, 4794 4795 gen_helper_sve_ldnf1hds_le_r_mte, 4796 gen_helper_sve_ldnf1hss_le_r_mte, 4797 gen_helper_sve_ldnf1ss_le_r_mte, 4798 gen_helper_sve_ldnf1sdu_le_r_mte, 4799 4800 gen_helper_sve_ldnf1bds_r_mte, 4801 gen_helper_sve_ldnf1bss_r_mte, 4802 gen_helper_sve_ldnf1bhs_r_mte, 4803 gen_helper_sve_ldnf1dd_le_r_mte }, 4804 4805 /* mte inactive, big-endian */ 4806 { gen_helper_sve_ldnf1bb_r_mte, 4807 gen_helper_sve_ldnf1bhu_r_mte, 4808 gen_helper_sve_ldnf1bsu_r_mte, 4809 gen_helper_sve_ldnf1bdu_r_mte, 4810 4811 gen_helper_sve_ldnf1sds_be_r_mte, 4812 gen_helper_sve_ldnf1hh_be_r_mte, 4813 gen_helper_sve_ldnf1hsu_be_r_mte, 4814 gen_helper_sve_ldnf1hdu_be_r_mte, 4815 4816 gen_helper_sve_ldnf1hds_be_r_mte, 4817 gen_helper_sve_ldnf1hss_be_r_mte, 4818 gen_helper_sve_ldnf1ss_be_r_mte, 4819 gen_helper_sve_ldnf1sdu_be_r_mte, 4820 4821 gen_helper_sve_ldnf1bds_r_mte, 4822 gen_helper_sve_ldnf1bss_r_mte, 4823 gen_helper_sve_ldnf1bhs_r_mte, 4824 gen_helper_sve_ldnf1dd_be_r_mte } }, 4825 }; 4826 4827 if (!dc_isar_feature(aa64_sve, s)) { 4828 return false; 4829 } 4830 s->is_nonstreaming = true; 4831 if (sve_access_check(s)) { 4832 int vsz = vec_full_reg_size(s); 4833 int elements = vsz >> dtype_esz[a->dtype]; 4834 int off = (a->imm * elements) << dtype_msz(a->dtype); 4835 TCGv_i64 addr = tcg_temp_new_i64(); 4836 4837 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off); 4838 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 4839 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 4840 } 4841 return true; 4842 } 4843 4844 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 4845 { 4846 unsigned vsz = vec_full_reg_size(s); 4847 TCGv_ptr t_pg; 4848 int poff; 4849 4850 /* Load the first quadword using the normal predicated load helpers. */ 4851 poff = pred_full_reg_offset(s, pg); 4852 if (vsz > 16) { 4853 /* 4854 * Zero-extend the first 16 bits of the predicate into a temporary. 4855 * This avoids triggering an assert making sure we don't have bits 4856 * set within a predicate beyond VQ, but we have lowered VQ to 1 4857 * for this load operation. 4858 */ 4859 TCGv_i64 tmp = tcg_temp_new_i64(); 4860 #if HOST_BIG_ENDIAN 4861 poff += 6; 4862 #endif 4863 tcg_gen_ld16u_i64(tmp, cpu_env, poff); 4864 4865 poff = offsetof(CPUARMState, vfp.preg_tmp); 4866 tcg_gen_st_i64(tmp, cpu_env, poff); 4867 } 4868 4869 t_pg = tcg_temp_new_ptr(); 4870 tcg_gen_addi_ptr(t_pg, cpu_env, poff); 4871 4872 gen_helper_gvec_mem *fn 4873 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 4874 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt))); 4875 4876 /* Replicate that first quadword. */ 4877 if (vsz > 16) { 4878 int doff = vec_full_reg_offset(s, zt); 4879 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16); 4880 } 4881 } 4882 4883 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a) 4884 { 4885 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 4886 return false; 4887 } 4888 if (sve_access_check(s)) { 4889 int msz = dtype_msz(a->dtype); 4890 TCGv_i64 addr = tcg_temp_new_i64(); 4891 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz); 4892 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4893 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 4894 } 4895 return true; 4896 } 4897 4898 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a) 4899 { 4900 if (!dc_isar_feature(aa64_sve, s)) { 4901 return false; 4902 } 4903 if (sve_access_check(s)) { 4904 TCGv_i64 addr = tcg_temp_new_i64(); 4905 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16); 4906 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 4907 } 4908 return true; 4909 } 4910 4911 static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 4912 { 4913 unsigned vsz = vec_full_reg_size(s); 4914 unsigned vsz_r32; 4915 TCGv_ptr t_pg; 4916 int poff, doff; 4917 4918 if (vsz < 32) { 4919 /* 4920 * Note that this UNDEFINED check comes after CheckSVEEnabled() 4921 * in the ARM pseudocode, which is the sve_access_check() done 4922 * in our caller. We should not now return false from the caller. 4923 */ 4924 unallocated_encoding(s); 4925 return; 4926 } 4927 4928 /* Load the first octaword using the normal predicated load helpers. */ 4929 4930 poff = pred_full_reg_offset(s, pg); 4931 if (vsz > 32) { 4932 /* 4933 * Zero-extend the first 32 bits of the predicate into a temporary. 4934 * This avoids triggering an assert making sure we don't have bits 4935 * set within a predicate beyond VQ, but we have lowered VQ to 2 4936 * for this load operation. 4937 */ 4938 TCGv_i64 tmp = tcg_temp_new_i64(); 4939 #if HOST_BIG_ENDIAN 4940 poff += 4; 4941 #endif 4942 tcg_gen_ld32u_i64(tmp, cpu_env, poff); 4943 4944 poff = offsetof(CPUARMState, vfp.preg_tmp); 4945 tcg_gen_st_i64(tmp, cpu_env, poff); 4946 } 4947 4948 t_pg = tcg_temp_new_ptr(); 4949 tcg_gen_addi_ptr(t_pg, cpu_env, poff); 4950 4951 gen_helper_gvec_mem *fn 4952 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 4953 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt))); 4954 4955 /* 4956 * Replicate that first octaword. 4957 * The replication happens in units of 32; if the full vector size 4958 * is not a multiple of 32, the final bits are zeroed. 4959 */ 4960 doff = vec_full_reg_offset(s, zt); 4961 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32); 4962 if (vsz >= 64) { 4963 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32); 4964 } 4965 vsz -= vsz_r32; 4966 if (vsz) { 4967 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0); 4968 } 4969 } 4970 4971 static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a) 4972 { 4973 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 4974 return false; 4975 } 4976 if (a->rm == 31) { 4977 return false; 4978 } 4979 s->is_nonstreaming = true; 4980 if (sve_access_check(s)) { 4981 TCGv_i64 addr = tcg_temp_new_i64(); 4982 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4983 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4984 do_ldro(s, a->rd, a->pg, addr, a->dtype); 4985 } 4986 return true; 4987 } 4988 4989 static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a) 4990 { 4991 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 4992 return false; 4993 } 4994 s->is_nonstreaming = true; 4995 if (sve_access_check(s)) { 4996 TCGv_i64 addr = tcg_temp_new_i64(); 4997 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32); 4998 do_ldro(s, a->rd, a->pg, addr, a->dtype); 4999 } 5000 return true; 5001 } 5002 5003 /* Load and broadcast element. */ 5004 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) 5005 { 5006 unsigned vsz = vec_full_reg_size(s); 5007 unsigned psz = pred_full_reg_size(s); 5008 unsigned esz = dtype_esz[a->dtype]; 5009 unsigned msz = dtype_msz(a->dtype); 5010 TCGLabel *over; 5011 TCGv_i64 temp, clean_addr; 5012 5013 if (!dc_isar_feature(aa64_sve, s)) { 5014 return false; 5015 } 5016 if (!sve_access_check(s)) { 5017 return true; 5018 } 5019 5020 over = gen_new_label(); 5021 5022 /* If the guarding predicate has no bits set, no load occurs. */ 5023 if (psz <= 8) { 5024 /* Reduce the pred_esz_masks value simply to reduce the 5025 * size of the code generated here. 5026 */ 5027 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8); 5028 temp = tcg_temp_new_i64(); 5029 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg)); 5030 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask); 5031 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over); 5032 } else { 5033 TCGv_i32 t32 = tcg_temp_new_i32(); 5034 find_last_active(s, t32, esz, a->pg); 5035 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over); 5036 } 5037 5038 /* Load the data. */ 5039 temp = tcg_temp_new_i64(); 5040 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz); 5041 clean_addr = gen_mte_check1(s, temp, false, true, msz); 5042 5043 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), 5044 finalize_memop(s, dtype_mop[a->dtype])); 5045 5046 /* Broadcast to *all* elements. */ 5047 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 5048 vsz, vsz, temp); 5049 5050 /* Zero the inactive elements. */ 5051 gen_set_label(over); 5052 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false); 5053 } 5054 5055 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 5056 int msz, int esz, int nreg) 5057 { 5058 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = { 5059 { { { gen_helper_sve_st1bb_r, 5060 gen_helper_sve_st1bh_r, 5061 gen_helper_sve_st1bs_r, 5062 gen_helper_sve_st1bd_r }, 5063 { NULL, 5064 gen_helper_sve_st1hh_le_r, 5065 gen_helper_sve_st1hs_le_r, 5066 gen_helper_sve_st1hd_le_r }, 5067 { NULL, NULL, 5068 gen_helper_sve_st1ss_le_r, 5069 gen_helper_sve_st1sd_le_r }, 5070 { NULL, NULL, NULL, 5071 gen_helper_sve_st1dd_le_r } }, 5072 { { gen_helper_sve_st1bb_r, 5073 gen_helper_sve_st1bh_r, 5074 gen_helper_sve_st1bs_r, 5075 gen_helper_sve_st1bd_r }, 5076 { NULL, 5077 gen_helper_sve_st1hh_be_r, 5078 gen_helper_sve_st1hs_be_r, 5079 gen_helper_sve_st1hd_be_r }, 5080 { NULL, NULL, 5081 gen_helper_sve_st1ss_be_r, 5082 gen_helper_sve_st1sd_be_r }, 5083 { NULL, NULL, NULL, 5084 gen_helper_sve_st1dd_be_r } } }, 5085 5086 { { { gen_helper_sve_st1bb_r_mte, 5087 gen_helper_sve_st1bh_r_mte, 5088 gen_helper_sve_st1bs_r_mte, 5089 gen_helper_sve_st1bd_r_mte }, 5090 { NULL, 5091 gen_helper_sve_st1hh_le_r_mte, 5092 gen_helper_sve_st1hs_le_r_mte, 5093 gen_helper_sve_st1hd_le_r_mte }, 5094 { NULL, NULL, 5095 gen_helper_sve_st1ss_le_r_mte, 5096 gen_helper_sve_st1sd_le_r_mte }, 5097 { NULL, NULL, NULL, 5098 gen_helper_sve_st1dd_le_r_mte } }, 5099 { { gen_helper_sve_st1bb_r_mte, 5100 gen_helper_sve_st1bh_r_mte, 5101 gen_helper_sve_st1bs_r_mte, 5102 gen_helper_sve_st1bd_r_mte }, 5103 { NULL, 5104 gen_helper_sve_st1hh_be_r_mte, 5105 gen_helper_sve_st1hs_be_r_mte, 5106 gen_helper_sve_st1hd_be_r_mte }, 5107 { NULL, NULL, 5108 gen_helper_sve_st1ss_be_r_mte, 5109 gen_helper_sve_st1sd_be_r_mte }, 5110 { NULL, NULL, NULL, 5111 gen_helper_sve_st1dd_be_r_mte } } }, 5112 }; 5113 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = { 5114 { { { gen_helper_sve_st2bb_r, 5115 gen_helper_sve_st2hh_le_r, 5116 gen_helper_sve_st2ss_le_r, 5117 gen_helper_sve_st2dd_le_r }, 5118 { gen_helper_sve_st3bb_r, 5119 gen_helper_sve_st3hh_le_r, 5120 gen_helper_sve_st3ss_le_r, 5121 gen_helper_sve_st3dd_le_r }, 5122 { gen_helper_sve_st4bb_r, 5123 gen_helper_sve_st4hh_le_r, 5124 gen_helper_sve_st4ss_le_r, 5125 gen_helper_sve_st4dd_le_r } }, 5126 { { gen_helper_sve_st2bb_r, 5127 gen_helper_sve_st2hh_be_r, 5128 gen_helper_sve_st2ss_be_r, 5129 gen_helper_sve_st2dd_be_r }, 5130 { gen_helper_sve_st3bb_r, 5131 gen_helper_sve_st3hh_be_r, 5132 gen_helper_sve_st3ss_be_r, 5133 gen_helper_sve_st3dd_be_r }, 5134 { gen_helper_sve_st4bb_r, 5135 gen_helper_sve_st4hh_be_r, 5136 gen_helper_sve_st4ss_be_r, 5137 gen_helper_sve_st4dd_be_r } } }, 5138 { { { gen_helper_sve_st2bb_r_mte, 5139 gen_helper_sve_st2hh_le_r_mte, 5140 gen_helper_sve_st2ss_le_r_mte, 5141 gen_helper_sve_st2dd_le_r_mte }, 5142 { gen_helper_sve_st3bb_r_mte, 5143 gen_helper_sve_st3hh_le_r_mte, 5144 gen_helper_sve_st3ss_le_r_mte, 5145 gen_helper_sve_st3dd_le_r_mte }, 5146 { gen_helper_sve_st4bb_r_mte, 5147 gen_helper_sve_st4hh_le_r_mte, 5148 gen_helper_sve_st4ss_le_r_mte, 5149 gen_helper_sve_st4dd_le_r_mte } }, 5150 { { gen_helper_sve_st2bb_r_mte, 5151 gen_helper_sve_st2hh_be_r_mte, 5152 gen_helper_sve_st2ss_be_r_mte, 5153 gen_helper_sve_st2dd_be_r_mte }, 5154 { gen_helper_sve_st3bb_r_mte, 5155 gen_helper_sve_st3hh_be_r_mte, 5156 gen_helper_sve_st3ss_be_r_mte, 5157 gen_helper_sve_st3dd_be_r_mte }, 5158 { gen_helper_sve_st4bb_r_mte, 5159 gen_helper_sve_st4hh_be_r_mte, 5160 gen_helper_sve_st4ss_be_r_mte, 5161 gen_helper_sve_st4dd_be_r_mte } } }, 5162 }; 5163 gen_helper_gvec_mem *fn; 5164 int be = s->be_data == MO_BE; 5165 5166 if (nreg == 0) { 5167 /* ST1 */ 5168 fn = fn_single[s->mte_active[0]][be][msz][esz]; 5169 nreg = 1; 5170 } else { 5171 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */ 5172 assert(msz == esz); 5173 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz]; 5174 } 5175 assert(fn != NULL); 5176 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn); 5177 } 5178 5179 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) 5180 { 5181 if (!dc_isar_feature(aa64_sve, s)) { 5182 return false; 5183 } 5184 if (a->rm == 31 || a->msz > a->esz) { 5185 return false; 5186 } 5187 if (sve_access_check(s)) { 5188 TCGv_i64 addr = tcg_temp_new_i64(); 5189 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz); 5190 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5191 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5192 } 5193 return true; 5194 } 5195 5196 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a) 5197 { 5198 if (!dc_isar_feature(aa64_sve, s)) { 5199 return false; 5200 } 5201 if (a->msz > a->esz) { 5202 return false; 5203 } 5204 if (sve_access_check(s)) { 5205 int vsz = vec_full_reg_size(s); 5206 int elements = vsz >> a->esz; 5207 TCGv_i64 addr = tcg_temp_new_i64(); 5208 5209 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 5210 (a->imm * elements * (a->nreg + 1)) << a->msz); 5211 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5212 } 5213 return true; 5214 } 5215 5216 /* 5217 *** SVE gather loads / scatter stores 5218 */ 5219 5220 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, 5221 int scale, TCGv_i64 scalar, int msz, bool is_write, 5222 gen_helper_gvec_mem_scatter *fn) 5223 { 5224 unsigned vsz = vec_full_reg_size(s); 5225 TCGv_ptr t_zm = tcg_temp_new_ptr(); 5226 TCGv_ptr t_pg = tcg_temp_new_ptr(); 5227 TCGv_ptr t_zt = tcg_temp_new_ptr(); 5228 int desc = 0; 5229 5230 if (s->mte_active[0]) { 5231 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 5232 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 5233 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 5234 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 5235 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1); 5236 desc <<= SVE_MTEDESC_SHIFT; 5237 } 5238 desc = simd_desc(vsz, vsz, desc | scale); 5239 5240 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 5241 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm)); 5242 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt)); 5243 fn(cpu_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc)); 5244 } 5245 5246 /* Indexed by [mte][be][ff][xs][u][msz]. */ 5247 static gen_helper_gvec_mem_scatter * const 5248 gather_load_fn32[2][2][2][2][2][3] = { 5249 { /* MTE Inactive */ 5250 { /* Little-endian */ 5251 { { { gen_helper_sve_ldbss_zsu, 5252 gen_helper_sve_ldhss_le_zsu, 5253 NULL, }, 5254 { gen_helper_sve_ldbsu_zsu, 5255 gen_helper_sve_ldhsu_le_zsu, 5256 gen_helper_sve_ldss_le_zsu, } }, 5257 { { gen_helper_sve_ldbss_zss, 5258 gen_helper_sve_ldhss_le_zss, 5259 NULL, }, 5260 { gen_helper_sve_ldbsu_zss, 5261 gen_helper_sve_ldhsu_le_zss, 5262 gen_helper_sve_ldss_le_zss, } } }, 5263 5264 /* First-fault */ 5265 { { { gen_helper_sve_ldffbss_zsu, 5266 gen_helper_sve_ldffhss_le_zsu, 5267 NULL, }, 5268 { gen_helper_sve_ldffbsu_zsu, 5269 gen_helper_sve_ldffhsu_le_zsu, 5270 gen_helper_sve_ldffss_le_zsu, } }, 5271 { { gen_helper_sve_ldffbss_zss, 5272 gen_helper_sve_ldffhss_le_zss, 5273 NULL, }, 5274 { gen_helper_sve_ldffbsu_zss, 5275 gen_helper_sve_ldffhsu_le_zss, 5276 gen_helper_sve_ldffss_le_zss, } } } }, 5277 5278 { /* Big-endian */ 5279 { { { gen_helper_sve_ldbss_zsu, 5280 gen_helper_sve_ldhss_be_zsu, 5281 NULL, }, 5282 { gen_helper_sve_ldbsu_zsu, 5283 gen_helper_sve_ldhsu_be_zsu, 5284 gen_helper_sve_ldss_be_zsu, } }, 5285 { { gen_helper_sve_ldbss_zss, 5286 gen_helper_sve_ldhss_be_zss, 5287 NULL, }, 5288 { gen_helper_sve_ldbsu_zss, 5289 gen_helper_sve_ldhsu_be_zss, 5290 gen_helper_sve_ldss_be_zss, } } }, 5291 5292 /* First-fault */ 5293 { { { gen_helper_sve_ldffbss_zsu, 5294 gen_helper_sve_ldffhss_be_zsu, 5295 NULL, }, 5296 { gen_helper_sve_ldffbsu_zsu, 5297 gen_helper_sve_ldffhsu_be_zsu, 5298 gen_helper_sve_ldffss_be_zsu, } }, 5299 { { gen_helper_sve_ldffbss_zss, 5300 gen_helper_sve_ldffhss_be_zss, 5301 NULL, }, 5302 { gen_helper_sve_ldffbsu_zss, 5303 gen_helper_sve_ldffhsu_be_zss, 5304 gen_helper_sve_ldffss_be_zss, } } } } }, 5305 { /* MTE Active */ 5306 { /* Little-endian */ 5307 { { { gen_helper_sve_ldbss_zsu_mte, 5308 gen_helper_sve_ldhss_le_zsu_mte, 5309 NULL, }, 5310 { gen_helper_sve_ldbsu_zsu_mte, 5311 gen_helper_sve_ldhsu_le_zsu_mte, 5312 gen_helper_sve_ldss_le_zsu_mte, } }, 5313 { { gen_helper_sve_ldbss_zss_mte, 5314 gen_helper_sve_ldhss_le_zss_mte, 5315 NULL, }, 5316 { gen_helper_sve_ldbsu_zss_mte, 5317 gen_helper_sve_ldhsu_le_zss_mte, 5318 gen_helper_sve_ldss_le_zss_mte, } } }, 5319 5320 /* First-fault */ 5321 { { { gen_helper_sve_ldffbss_zsu_mte, 5322 gen_helper_sve_ldffhss_le_zsu_mte, 5323 NULL, }, 5324 { gen_helper_sve_ldffbsu_zsu_mte, 5325 gen_helper_sve_ldffhsu_le_zsu_mte, 5326 gen_helper_sve_ldffss_le_zsu_mte, } }, 5327 { { gen_helper_sve_ldffbss_zss_mte, 5328 gen_helper_sve_ldffhss_le_zss_mte, 5329 NULL, }, 5330 { gen_helper_sve_ldffbsu_zss_mte, 5331 gen_helper_sve_ldffhsu_le_zss_mte, 5332 gen_helper_sve_ldffss_le_zss_mte, } } } }, 5333 5334 { /* Big-endian */ 5335 { { { gen_helper_sve_ldbss_zsu_mte, 5336 gen_helper_sve_ldhss_be_zsu_mte, 5337 NULL, }, 5338 { gen_helper_sve_ldbsu_zsu_mte, 5339 gen_helper_sve_ldhsu_be_zsu_mte, 5340 gen_helper_sve_ldss_be_zsu_mte, } }, 5341 { { gen_helper_sve_ldbss_zss_mte, 5342 gen_helper_sve_ldhss_be_zss_mte, 5343 NULL, }, 5344 { gen_helper_sve_ldbsu_zss_mte, 5345 gen_helper_sve_ldhsu_be_zss_mte, 5346 gen_helper_sve_ldss_be_zss_mte, } } }, 5347 5348 /* First-fault */ 5349 { { { gen_helper_sve_ldffbss_zsu_mte, 5350 gen_helper_sve_ldffhss_be_zsu_mte, 5351 NULL, }, 5352 { gen_helper_sve_ldffbsu_zsu_mte, 5353 gen_helper_sve_ldffhsu_be_zsu_mte, 5354 gen_helper_sve_ldffss_be_zsu_mte, } }, 5355 { { gen_helper_sve_ldffbss_zss_mte, 5356 gen_helper_sve_ldffhss_be_zss_mte, 5357 NULL, }, 5358 { gen_helper_sve_ldffbsu_zss_mte, 5359 gen_helper_sve_ldffhsu_be_zss_mte, 5360 gen_helper_sve_ldffss_be_zss_mte, } } } } }, 5361 }; 5362 5363 /* Note that we overload xs=2 to indicate 64-bit offset. */ 5364 static gen_helper_gvec_mem_scatter * const 5365 gather_load_fn64[2][2][2][3][2][4] = { 5366 { /* MTE Inactive */ 5367 { /* Little-endian */ 5368 { { { gen_helper_sve_ldbds_zsu, 5369 gen_helper_sve_ldhds_le_zsu, 5370 gen_helper_sve_ldsds_le_zsu, 5371 NULL, }, 5372 { gen_helper_sve_ldbdu_zsu, 5373 gen_helper_sve_ldhdu_le_zsu, 5374 gen_helper_sve_ldsdu_le_zsu, 5375 gen_helper_sve_lddd_le_zsu, } }, 5376 { { gen_helper_sve_ldbds_zss, 5377 gen_helper_sve_ldhds_le_zss, 5378 gen_helper_sve_ldsds_le_zss, 5379 NULL, }, 5380 { gen_helper_sve_ldbdu_zss, 5381 gen_helper_sve_ldhdu_le_zss, 5382 gen_helper_sve_ldsdu_le_zss, 5383 gen_helper_sve_lddd_le_zss, } }, 5384 { { gen_helper_sve_ldbds_zd, 5385 gen_helper_sve_ldhds_le_zd, 5386 gen_helper_sve_ldsds_le_zd, 5387 NULL, }, 5388 { gen_helper_sve_ldbdu_zd, 5389 gen_helper_sve_ldhdu_le_zd, 5390 gen_helper_sve_ldsdu_le_zd, 5391 gen_helper_sve_lddd_le_zd, } } }, 5392 5393 /* First-fault */ 5394 { { { gen_helper_sve_ldffbds_zsu, 5395 gen_helper_sve_ldffhds_le_zsu, 5396 gen_helper_sve_ldffsds_le_zsu, 5397 NULL, }, 5398 { gen_helper_sve_ldffbdu_zsu, 5399 gen_helper_sve_ldffhdu_le_zsu, 5400 gen_helper_sve_ldffsdu_le_zsu, 5401 gen_helper_sve_ldffdd_le_zsu, } }, 5402 { { gen_helper_sve_ldffbds_zss, 5403 gen_helper_sve_ldffhds_le_zss, 5404 gen_helper_sve_ldffsds_le_zss, 5405 NULL, }, 5406 { gen_helper_sve_ldffbdu_zss, 5407 gen_helper_sve_ldffhdu_le_zss, 5408 gen_helper_sve_ldffsdu_le_zss, 5409 gen_helper_sve_ldffdd_le_zss, } }, 5410 { { gen_helper_sve_ldffbds_zd, 5411 gen_helper_sve_ldffhds_le_zd, 5412 gen_helper_sve_ldffsds_le_zd, 5413 NULL, }, 5414 { gen_helper_sve_ldffbdu_zd, 5415 gen_helper_sve_ldffhdu_le_zd, 5416 gen_helper_sve_ldffsdu_le_zd, 5417 gen_helper_sve_ldffdd_le_zd, } } } }, 5418 { /* Big-endian */ 5419 { { { gen_helper_sve_ldbds_zsu, 5420 gen_helper_sve_ldhds_be_zsu, 5421 gen_helper_sve_ldsds_be_zsu, 5422 NULL, }, 5423 { gen_helper_sve_ldbdu_zsu, 5424 gen_helper_sve_ldhdu_be_zsu, 5425 gen_helper_sve_ldsdu_be_zsu, 5426 gen_helper_sve_lddd_be_zsu, } }, 5427 { { gen_helper_sve_ldbds_zss, 5428 gen_helper_sve_ldhds_be_zss, 5429 gen_helper_sve_ldsds_be_zss, 5430 NULL, }, 5431 { gen_helper_sve_ldbdu_zss, 5432 gen_helper_sve_ldhdu_be_zss, 5433 gen_helper_sve_ldsdu_be_zss, 5434 gen_helper_sve_lddd_be_zss, } }, 5435 { { gen_helper_sve_ldbds_zd, 5436 gen_helper_sve_ldhds_be_zd, 5437 gen_helper_sve_ldsds_be_zd, 5438 NULL, }, 5439 { gen_helper_sve_ldbdu_zd, 5440 gen_helper_sve_ldhdu_be_zd, 5441 gen_helper_sve_ldsdu_be_zd, 5442 gen_helper_sve_lddd_be_zd, } } }, 5443 5444 /* First-fault */ 5445 { { { gen_helper_sve_ldffbds_zsu, 5446 gen_helper_sve_ldffhds_be_zsu, 5447 gen_helper_sve_ldffsds_be_zsu, 5448 NULL, }, 5449 { gen_helper_sve_ldffbdu_zsu, 5450 gen_helper_sve_ldffhdu_be_zsu, 5451 gen_helper_sve_ldffsdu_be_zsu, 5452 gen_helper_sve_ldffdd_be_zsu, } }, 5453 { { gen_helper_sve_ldffbds_zss, 5454 gen_helper_sve_ldffhds_be_zss, 5455 gen_helper_sve_ldffsds_be_zss, 5456 NULL, }, 5457 { gen_helper_sve_ldffbdu_zss, 5458 gen_helper_sve_ldffhdu_be_zss, 5459 gen_helper_sve_ldffsdu_be_zss, 5460 gen_helper_sve_ldffdd_be_zss, } }, 5461 { { gen_helper_sve_ldffbds_zd, 5462 gen_helper_sve_ldffhds_be_zd, 5463 gen_helper_sve_ldffsds_be_zd, 5464 NULL, }, 5465 { gen_helper_sve_ldffbdu_zd, 5466 gen_helper_sve_ldffhdu_be_zd, 5467 gen_helper_sve_ldffsdu_be_zd, 5468 gen_helper_sve_ldffdd_be_zd, } } } } }, 5469 { /* MTE Active */ 5470 { /* Little-endian */ 5471 { { { gen_helper_sve_ldbds_zsu_mte, 5472 gen_helper_sve_ldhds_le_zsu_mte, 5473 gen_helper_sve_ldsds_le_zsu_mte, 5474 NULL, }, 5475 { gen_helper_sve_ldbdu_zsu_mte, 5476 gen_helper_sve_ldhdu_le_zsu_mte, 5477 gen_helper_sve_ldsdu_le_zsu_mte, 5478 gen_helper_sve_lddd_le_zsu_mte, } }, 5479 { { gen_helper_sve_ldbds_zss_mte, 5480 gen_helper_sve_ldhds_le_zss_mte, 5481 gen_helper_sve_ldsds_le_zss_mte, 5482 NULL, }, 5483 { gen_helper_sve_ldbdu_zss_mte, 5484 gen_helper_sve_ldhdu_le_zss_mte, 5485 gen_helper_sve_ldsdu_le_zss_mte, 5486 gen_helper_sve_lddd_le_zss_mte, } }, 5487 { { gen_helper_sve_ldbds_zd_mte, 5488 gen_helper_sve_ldhds_le_zd_mte, 5489 gen_helper_sve_ldsds_le_zd_mte, 5490 NULL, }, 5491 { gen_helper_sve_ldbdu_zd_mte, 5492 gen_helper_sve_ldhdu_le_zd_mte, 5493 gen_helper_sve_ldsdu_le_zd_mte, 5494 gen_helper_sve_lddd_le_zd_mte, } } }, 5495 5496 /* First-fault */ 5497 { { { gen_helper_sve_ldffbds_zsu_mte, 5498 gen_helper_sve_ldffhds_le_zsu_mte, 5499 gen_helper_sve_ldffsds_le_zsu_mte, 5500 NULL, }, 5501 { gen_helper_sve_ldffbdu_zsu_mte, 5502 gen_helper_sve_ldffhdu_le_zsu_mte, 5503 gen_helper_sve_ldffsdu_le_zsu_mte, 5504 gen_helper_sve_ldffdd_le_zsu_mte, } }, 5505 { { gen_helper_sve_ldffbds_zss_mte, 5506 gen_helper_sve_ldffhds_le_zss_mte, 5507 gen_helper_sve_ldffsds_le_zss_mte, 5508 NULL, }, 5509 { gen_helper_sve_ldffbdu_zss_mte, 5510 gen_helper_sve_ldffhdu_le_zss_mte, 5511 gen_helper_sve_ldffsdu_le_zss_mte, 5512 gen_helper_sve_ldffdd_le_zss_mte, } }, 5513 { { gen_helper_sve_ldffbds_zd_mte, 5514 gen_helper_sve_ldffhds_le_zd_mte, 5515 gen_helper_sve_ldffsds_le_zd_mte, 5516 NULL, }, 5517 { gen_helper_sve_ldffbdu_zd_mte, 5518 gen_helper_sve_ldffhdu_le_zd_mte, 5519 gen_helper_sve_ldffsdu_le_zd_mte, 5520 gen_helper_sve_ldffdd_le_zd_mte, } } } }, 5521 { /* Big-endian */ 5522 { { { gen_helper_sve_ldbds_zsu_mte, 5523 gen_helper_sve_ldhds_be_zsu_mte, 5524 gen_helper_sve_ldsds_be_zsu_mte, 5525 NULL, }, 5526 { gen_helper_sve_ldbdu_zsu_mte, 5527 gen_helper_sve_ldhdu_be_zsu_mte, 5528 gen_helper_sve_ldsdu_be_zsu_mte, 5529 gen_helper_sve_lddd_be_zsu_mte, } }, 5530 { { gen_helper_sve_ldbds_zss_mte, 5531 gen_helper_sve_ldhds_be_zss_mte, 5532 gen_helper_sve_ldsds_be_zss_mte, 5533 NULL, }, 5534 { gen_helper_sve_ldbdu_zss_mte, 5535 gen_helper_sve_ldhdu_be_zss_mte, 5536 gen_helper_sve_ldsdu_be_zss_mte, 5537 gen_helper_sve_lddd_be_zss_mte, } }, 5538 { { gen_helper_sve_ldbds_zd_mte, 5539 gen_helper_sve_ldhds_be_zd_mte, 5540 gen_helper_sve_ldsds_be_zd_mte, 5541 NULL, }, 5542 { gen_helper_sve_ldbdu_zd_mte, 5543 gen_helper_sve_ldhdu_be_zd_mte, 5544 gen_helper_sve_ldsdu_be_zd_mte, 5545 gen_helper_sve_lddd_be_zd_mte, } } }, 5546 5547 /* First-fault */ 5548 { { { gen_helper_sve_ldffbds_zsu_mte, 5549 gen_helper_sve_ldffhds_be_zsu_mte, 5550 gen_helper_sve_ldffsds_be_zsu_mte, 5551 NULL, }, 5552 { gen_helper_sve_ldffbdu_zsu_mte, 5553 gen_helper_sve_ldffhdu_be_zsu_mte, 5554 gen_helper_sve_ldffsdu_be_zsu_mte, 5555 gen_helper_sve_ldffdd_be_zsu_mte, } }, 5556 { { gen_helper_sve_ldffbds_zss_mte, 5557 gen_helper_sve_ldffhds_be_zss_mte, 5558 gen_helper_sve_ldffsds_be_zss_mte, 5559 NULL, }, 5560 { gen_helper_sve_ldffbdu_zss_mte, 5561 gen_helper_sve_ldffhdu_be_zss_mte, 5562 gen_helper_sve_ldffsdu_be_zss_mte, 5563 gen_helper_sve_ldffdd_be_zss_mte, } }, 5564 { { gen_helper_sve_ldffbds_zd_mte, 5565 gen_helper_sve_ldffhds_be_zd_mte, 5566 gen_helper_sve_ldffsds_be_zd_mte, 5567 NULL, }, 5568 { gen_helper_sve_ldffbdu_zd_mte, 5569 gen_helper_sve_ldffhdu_be_zd_mte, 5570 gen_helper_sve_ldffsdu_be_zd_mte, 5571 gen_helper_sve_ldffdd_be_zd_mte, } } } } }, 5572 }; 5573 5574 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) 5575 { 5576 gen_helper_gvec_mem_scatter *fn = NULL; 5577 bool be = s->be_data == MO_BE; 5578 bool mte = s->mte_active[0]; 5579 5580 if (!dc_isar_feature(aa64_sve, s)) { 5581 return false; 5582 } 5583 s->is_nonstreaming = true; 5584 if (!sve_access_check(s)) { 5585 return true; 5586 } 5587 5588 switch (a->esz) { 5589 case MO_32: 5590 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz]; 5591 break; 5592 case MO_64: 5593 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz]; 5594 break; 5595 } 5596 assert(fn != NULL); 5597 5598 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 5599 cpu_reg_sp(s, a->rn), a->msz, false, fn); 5600 return true; 5601 } 5602 5603 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) 5604 { 5605 gen_helper_gvec_mem_scatter *fn = NULL; 5606 bool be = s->be_data == MO_BE; 5607 bool mte = s->mte_active[0]; 5608 5609 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) { 5610 return false; 5611 } 5612 if (!dc_isar_feature(aa64_sve, s)) { 5613 return false; 5614 } 5615 s->is_nonstreaming = true; 5616 if (!sve_access_check(s)) { 5617 return true; 5618 } 5619 5620 switch (a->esz) { 5621 case MO_32: 5622 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz]; 5623 break; 5624 case MO_64: 5625 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz]; 5626 break; 5627 } 5628 assert(fn != NULL); 5629 5630 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x]) 5631 * by loading the immediate into the scalar parameter. 5632 */ 5633 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5634 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn); 5635 return true; 5636 } 5637 5638 static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a) 5639 { 5640 gen_helper_gvec_mem_scatter *fn = NULL; 5641 bool be = s->be_data == MO_BE; 5642 bool mte = s->mte_active[0]; 5643 5644 if (a->esz < a->msz + !a->u) { 5645 return false; 5646 } 5647 if (!dc_isar_feature(aa64_sve2, s)) { 5648 return false; 5649 } 5650 s->is_nonstreaming = true; 5651 if (!sve_access_check(s)) { 5652 return true; 5653 } 5654 5655 switch (a->esz) { 5656 case MO_32: 5657 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz]; 5658 break; 5659 case MO_64: 5660 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz]; 5661 break; 5662 } 5663 assert(fn != NULL); 5664 5665 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5666 cpu_reg(s, a->rm), a->msz, false, fn); 5667 return true; 5668 } 5669 5670 /* Indexed by [mte][be][xs][msz]. */ 5671 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = { 5672 { /* MTE Inactive */ 5673 { /* Little-endian */ 5674 { gen_helper_sve_stbs_zsu, 5675 gen_helper_sve_sths_le_zsu, 5676 gen_helper_sve_stss_le_zsu, }, 5677 { gen_helper_sve_stbs_zss, 5678 gen_helper_sve_sths_le_zss, 5679 gen_helper_sve_stss_le_zss, } }, 5680 { /* Big-endian */ 5681 { gen_helper_sve_stbs_zsu, 5682 gen_helper_sve_sths_be_zsu, 5683 gen_helper_sve_stss_be_zsu, }, 5684 { gen_helper_sve_stbs_zss, 5685 gen_helper_sve_sths_be_zss, 5686 gen_helper_sve_stss_be_zss, } } }, 5687 { /* MTE Active */ 5688 { /* Little-endian */ 5689 { gen_helper_sve_stbs_zsu_mte, 5690 gen_helper_sve_sths_le_zsu_mte, 5691 gen_helper_sve_stss_le_zsu_mte, }, 5692 { gen_helper_sve_stbs_zss_mte, 5693 gen_helper_sve_sths_le_zss_mte, 5694 gen_helper_sve_stss_le_zss_mte, } }, 5695 { /* Big-endian */ 5696 { gen_helper_sve_stbs_zsu_mte, 5697 gen_helper_sve_sths_be_zsu_mte, 5698 gen_helper_sve_stss_be_zsu_mte, }, 5699 { gen_helper_sve_stbs_zss_mte, 5700 gen_helper_sve_sths_be_zss_mte, 5701 gen_helper_sve_stss_be_zss_mte, } } }, 5702 }; 5703 5704 /* Note that we overload xs=2 to indicate 64-bit offset. */ 5705 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = { 5706 { /* MTE Inactive */ 5707 { /* Little-endian */ 5708 { gen_helper_sve_stbd_zsu, 5709 gen_helper_sve_sthd_le_zsu, 5710 gen_helper_sve_stsd_le_zsu, 5711 gen_helper_sve_stdd_le_zsu, }, 5712 { gen_helper_sve_stbd_zss, 5713 gen_helper_sve_sthd_le_zss, 5714 gen_helper_sve_stsd_le_zss, 5715 gen_helper_sve_stdd_le_zss, }, 5716 { gen_helper_sve_stbd_zd, 5717 gen_helper_sve_sthd_le_zd, 5718 gen_helper_sve_stsd_le_zd, 5719 gen_helper_sve_stdd_le_zd, } }, 5720 { /* Big-endian */ 5721 { gen_helper_sve_stbd_zsu, 5722 gen_helper_sve_sthd_be_zsu, 5723 gen_helper_sve_stsd_be_zsu, 5724 gen_helper_sve_stdd_be_zsu, }, 5725 { gen_helper_sve_stbd_zss, 5726 gen_helper_sve_sthd_be_zss, 5727 gen_helper_sve_stsd_be_zss, 5728 gen_helper_sve_stdd_be_zss, }, 5729 { gen_helper_sve_stbd_zd, 5730 gen_helper_sve_sthd_be_zd, 5731 gen_helper_sve_stsd_be_zd, 5732 gen_helper_sve_stdd_be_zd, } } }, 5733 { /* MTE Inactive */ 5734 { /* Little-endian */ 5735 { gen_helper_sve_stbd_zsu_mte, 5736 gen_helper_sve_sthd_le_zsu_mte, 5737 gen_helper_sve_stsd_le_zsu_mte, 5738 gen_helper_sve_stdd_le_zsu_mte, }, 5739 { gen_helper_sve_stbd_zss_mte, 5740 gen_helper_sve_sthd_le_zss_mte, 5741 gen_helper_sve_stsd_le_zss_mte, 5742 gen_helper_sve_stdd_le_zss_mte, }, 5743 { gen_helper_sve_stbd_zd_mte, 5744 gen_helper_sve_sthd_le_zd_mte, 5745 gen_helper_sve_stsd_le_zd_mte, 5746 gen_helper_sve_stdd_le_zd_mte, } }, 5747 { /* Big-endian */ 5748 { gen_helper_sve_stbd_zsu_mte, 5749 gen_helper_sve_sthd_be_zsu_mte, 5750 gen_helper_sve_stsd_be_zsu_mte, 5751 gen_helper_sve_stdd_be_zsu_mte, }, 5752 { gen_helper_sve_stbd_zss_mte, 5753 gen_helper_sve_sthd_be_zss_mte, 5754 gen_helper_sve_stsd_be_zss_mte, 5755 gen_helper_sve_stdd_be_zss_mte, }, 5756 { gen_helper_sve_stbd_zd_mte, 5757 gen_helper_sve_sthd_be_zd_mte, 5758 gen_helper_sve_stsd_be_zd_mte, 5759 gen_helper_sve_stdd_be_zd_mte, } } }, 5760 }; 5761 5762 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) 5763 { 5764 gen_helper_gvec_mem_scatter *fn; 5765 bool be = s->be_data == MO_BE; 5766 bool mte = s->mte_active[0]; 5767 5768 if (a->esz < a->msz || (a->msz == 0 && a->scale)) { 5769 return false; 5770 } 5771 if (!dc_isar_feature(aa64_sve, s)) { 5772 return false; 5773 } 5774 s->is_nonstreaming = true; 5775 if (!sve_access_check(s)) { 5776 return true; 5777 } 5778 switch (a->esz) { 5779 case MO_32: 5780 fn = scatter_store_fn32[mte][be][a->xs][a->msz]; 5781 break; 5782 case MO_64: 5783 fn = scatter_store_fn64[mte][be][a->xs][a->msz]; 5784 break; 5785 default: 5786 g_assert_not_reached(); 5787 } 5788 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 5789 cpu_reg_sp(s, a->rn), a->msz, true, fn); 5790 return true; 5791 } 5792 5793 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) 5794 { 5795 gen_helper_gvec_mem_scatter *fn = NULL; 5796 bool be = s->be_data == MO_BE; 5797 bool mte = s->mte_active[0]; 5798 5799 if (a->esz < a->msz) { 5800 return false; 5801 } 5802 if (!dc_isar_feature(aa64_sve, s)) { 5803 return false; 5804 } 5805 s->is_nonstreaming = true; 5806 if (!sve_access_check(s)) { 5807 return true; 5808 } 5809 5810 switch (a->esz) { 5811 case MO_32: 5812 fn = scatter_store_fn32[mte][be][0][a->msz]; 5813 break; 5814 case MO_64: 5815 fn = scatter_store_fn64[mte][be][2][a->msz]; 5816 break; 5817 } 5818 assert(fn != NULL); 5819 5820 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x]) 5821 * by loading the immediate into the scalar parameter. 5822 */ 5823 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5824 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn); 5825 return true; 5826 } 5827 5828 static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a) 5829 { 5830 gen_helper_gvec_mem_scatter *fn; 5831 bool be = s->be_data == MO_BE; 5832 bool mte = s->mte_active[0]; 5833 5834 if (a->esz < a->msz) { 5835 return false; 5836 } 5837 if (!dc_isar_feature(aa64_sve2, s)) { 5838 return false; 5839 } 5840 s->is_nonstreaming = true; 5841 if (!sve_access_check(s)) { 5842 return true; 5843 } 5844 5845 switch (a->esz) { 5846 case MO_32: 5847 fn = scatter_store_fn32[mte][be][0][a->msz]; 5848 break; 5849 case MO_64: 5850 fn = scatter_store_fn64[mte][be][2][a->msz]; 5851 break; 5852 default: 5853 g_assert_not_reached(); 5854 } 5855 5856 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5857 cpu_reg(s, a->rm), a->msz, true, fn); 5858 return true; 5859 } 5860 5861 /* 5862 * Prefetches 5863 */ 5864 5865 static bool trans_PRF(DisasContext *s, arg_PRF *a) 5866 { 5867 if (!dc_isar_feature(aa64_sve, s)) { 5868 return false; 5869 } 5870 /* Prefetch is a nop within QEMU. */ 5871 (void)sve_access_check(s); 5872 return true; 5873 } 5874 5875 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a) 5876 { 5877 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 5878 return false; 5879 } 5880 /* Prefetch is a nop within QEMU. */ 5881 (void)sve_access_check(s); 5882 return true; 5883 } 5884 5885 static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a) 5886 { 5887 if (!dc_isar_feature(aa64_sve, s)) { 5888 return false; 5889 } 5890 /* Prefetch is a nop within QEMU. */ 5891 s->is_nonstreaming = true; 5892 (void)sve_access_check(s); 5893 return true; 5894 } 5895 5896 /* 5897 * Move Prefix 5898 * 5899 * TODO: The implementation so far could handle predicated merging movprfx. 5900 * The helper functions as written take an extra source register to 5901 * use in the operation, but the result is only written when predication 5902 * succeeds. For unpredicated movprfx, we need to rearrange the helpers 5903 * to allow the final write back to the destination to be unconditional. 5904 * For predicated zeroing movprfx, we need to rearrange the helpers to 5905 * allow the final write back to zero inactives. 5906 * 5907 * In the meantime, just emit the moves. 5908 */ 5909 5910 TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn) 5911 TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz) 5912 TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false) 5913 5914 /* 5915 * SVE2 Integer Multiply - Unpredicated 5916 */ 5917 5918 TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a) 5919 5920 static gen_helper_gvec_3 * const smulh_zzz_fns[4] = { 5921 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h, 5922 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d, 5923 }; 5924 TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5925 smulh_zzz_fns[a->esz], a, 0) 5926 5927 static gen_helper_gvec_3 * const umulh_zzz_fns[4] = { 5928 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h, 5929 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d, 5930 }; 5931 TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5932 umulh_zzz_fns[a->esz], a, 0) 5933 5934 TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5935 gen_helper_gvec_pmul_b, a, 0) 5936 5937 static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = { 5938 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h, 5939 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d, 5940 }; 5941 TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5942 sqdmulh_zzz_fns[a->esz], a, 0) 5943 5944 static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = { 5945 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h, 5946 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d, 5947 }; 5948 TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5949 sqrdmulh_zzz_fns[a->esz], a, 0) 5950 5951 /* 5952 * SVE2 Integer - Predicated 5953 */ 5954 5955 static gen_helper_gvec_4 * const sadlp_fns[4] = { 5956 NULL, gen_helper_sve2_sadalp_zpzz_h, 5957 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d, 5958 }; 5959 TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 5960 sadlp_fns[a->esz], a, 0) 5961 5962 static gen_helper_gvec_4 * const uadlp_fns[4] = { 5963 NULL, gen_helper_sve2_uadalp_zpzz_h, 5964 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d, 5965 }; 5966 TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 5967 uadlp_fns[a->esz], a, 0) 5968 5969 /* 5970 * SVE2 integer unary operations (predicated) 5971 */ 5972 5973 TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz, 5974 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0) 5975 5976 TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz, 5977 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0) 5978 5979 static gen_helper_gvec_3 * const sqabs_fns[4] = { 5980 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h, 5981 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d, 5982 }; 5983 TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0) 5984 5985 static gen_helper_gvec_3 * const sqneg_fns[4] = { 5986 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h, 5987 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d, 5988 }; 5989 TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0) 5990 5991 DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl) 5992 DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl) 5993 DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl) 5994 5995 DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl) 5996 DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl) 5997 DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl) 5998 5999 DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd) 6000 DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd) 6001 DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub) 6002 6003 DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd) 6004 DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd) 6005 DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub) 6006 6007 DO_ZPZZ(ADDP, aa64_sve2, sve2_addp) 6008 DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp) 6009 DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp) 6010 DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp) 6011 DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp) 6012 6013 DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd) 6014 DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd) 6015 DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub) 6016 DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub) 6017 DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd) 6018 DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd) 6019 6020 /* 6021 * SVE2 Widening Integer Arithmetic 6022 */ 6023 6024 static gen_helper_gvec_3 * const saddl_fns[4] = { 6025 NULL, gen_helper_sve2_saddl_h, 6026 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d, 6027 }; 6028 TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6029 saddl_fns[a->esz], a, 0) 6030 TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6031 saddl_fns[a->esz], a, 3) 6032 TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 6033 saddl_fns[a->esz], a, 2) 6034 6035 static gen_helper_gvec_3 * const ssubl_fns[4] = { 6036 NULL, gen_helper_sve2_ssubl_h, 6037 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d, 6038 }; 6039 TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6040 ssubl_fns[a->esz], a, 0) 6041 TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6042 ssubl_fns[a->esz], a, 3) 6043 TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 6044 ssubl_fns[a->esz], a, 2) 6045 TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz, 6046 ssubl_fns[a->esz], a, 1) 6047 6048 static gen_helper_gvec_3 * const sabdl_fns[4] = { 6049 NULL, gen_helper_sve2_sabdl_h, 6050 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d, 6051 }; 6052 TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6053 sabdl_fns[a->esz], a, 0) 6054 TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6055 sabdl_fns[a->esz], a, 3) 6056 6057 static gen_helper_gvec_3 * const uaddl_fns[4] = { 6058 NULL, gen_helper_sve2_uaddl_h, 6059 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d, 6060 }; 6061 TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6062 uaddl_fns[a->esz], a, 0) 6063 TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6064 uaddl_fns[a->esz], a, 3) 6065 6066 static gen_helper_gvec_3 * const usubl_fns[4] = { 6067 NULL, gen_helper_sve2_usubl_h, 6068 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d, 6069 }; 6070 TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6071 usubl_fns[a->esz], a, 0) 6072 TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6073 usubl_fns[a->esz], a, 3) 6074 6075 static gen_helper_gvec_3 * const uabdl_fns[4] = { 6076 NULL, gen_helper_sve2_uabdl_h, 6077 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d, 6078 }; 6079 TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6080 uabdl_fns[a->esz], a, 0) 6081 TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6082 uabdl_fns[a->esz], a, 3) 6083 6084 static gen_helper_gvec_3 * const sqdmull_fns[4] = { 6085 NULL, gen_helper_sve2_sqdmull_zzz_h, 6086 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d, 6087 }; 6088 TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6089 sqdmull_fns[a->esz], a, 0) 6090 TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6091 sqdmull_fns[a->esz], a, 3) 6092 6093 static gen_helper_gvec_3 * const smull_fns[4] = { 6094 NULL, gen_helper_sve2_smull_zzz_h, 6095 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d, 6096 }; 6097 TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6098 smull_fns[a->esz], a, 0) 6099 TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6100 smull_fns[a->esz], a, 3) 6101 6102 static gen_helper_gvec_3 * const umull_fns[4] = { 6103 NULL, gen_helper_sve2_umull_zzz_h, 6104 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d, 6105 }; 6106 TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6107 umull_fns[a->esz], a, 0) 6108 TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6109 umull_fns[a->esz], a, 3) 6110 6111 static gen_helper_gvec_3 * const eoril_fns[4] = { 6112 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h, 6113 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d, 6114 }; 6115 TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2) 6116 TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1) 6117 6118 static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel) 6119 { 6120 static gen_helper_gvec_3 * const fns[4] = { 6121 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h, 6122 NULL, gen_helper_sve2_pmull_d, 6123 }; 6124 6125 if (a->esz == 0) { 6126 if (!dc_isar_feature(aa64_sve2_pmull128, s)) { 6127 return false; 6128 } 6129 s->is_nonstreaming = true; 6130 } else if (!dc_isar_feature(aa64_sve, s)) { 6131 return false; 6132 } 6133 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel); 6134 } 6135 6136 TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false) 6137 TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true) 6138 6139 static gen_helper_gvec_3 * const saddw_fns[4] = { 6140 NULL, gen_helper_sve2_saddw_h, 6141 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d, 6142 }; 6143 TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0) 6144 TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1) 6145 6146 static gen_helper_gvec_3 * const ssubw_fns[4] = { 6147 NULL, gen_helper_sve2_ssubw_h, 6148 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d, 6149 }; 6150 TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0) 6151 TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1) 6152 6153 static gen_helper_gvec_3 * const uaddw_fns[4] = { 6154 NULL, gen_helper_sve2_uaddw_h, 6155 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d, 6156 }; 6157 TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0) 6158 TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1) 6159 6160 static gen_helper_gvec_3 * const usubw_fns[4] = { 6161 NULL, gen_helper_sve2_usubw_h, 6162 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d, 6163 }; 6164 TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0) 6165 TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1) 6166 6167 static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6168 { 6169 int top = imm & 1; 6170 int shl = imm >> 1; 6171 int halfbits = 4 << vece; 6172 6173 if (top) { 6174 if (shl == halfbits) { 6175 TCGv_vec t = tcg_temp_new_vec_matching(d); 6176 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); 6177 tcg_gen_and_vec(vece, d, n, t); 6178 } else { 6179 tcg_gen_sari_vec(vece, d, n, halfbits); 6180 tcg_gen_shli_vec(vece, d, d, shl); 6181 } 6182 } else { 6183 tcg_gen_shli_vec(vece, d, n, halfbits); 6184 tcg_gen_sari_vec(vece, d, d, halfbits - shl); 6185 } 6186 } 6187 6188 static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm) 6189 { 6190 int halfbits = 4 << vece; 6191 int top = imm & 1; 6192 int shl = (imm >> 1); 6193 int shift; 6194 uint64_t mask; 6195 6196 mask = MAKE_64BIT_MASK(0, halfbits); 6197 mask <<= shl; 6198 mask = dup_const(vece, mask); 6199 6200 shift = shl - top * halfbits; 6201 if (shift < 0) { 6202 tcg_gen_shri_i64(d, n, -shift); 6203 } else { 6204 tcg_gen_shli_i64(d, n, shift); 6205 } 6206 tcg_gen_andi_i64(d, d, mask); 6207 } 6208 6209 static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6210 { 6211 gen_ushll_i64(MO_16, d, n, imm); 6212 } 6213 6214 static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6215 { 6216 gen_ushll_i64(MO_32, d, n, imm); 6217 } 6218 6219 static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6220 { 6221 gen_ushll_i64(MO_64, d, n, imm); 6222 } 6223 6224 static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6225 { 6226 int halfbits = 4 << vece; 6227 int top = imm & 1; 6228 int shl = imm >> 1; 6229 6230 if (top) { 6231 if (shl == halfbits) { 6232 TCGv_vec t = tcg_temp_new_vec_matching(d); 6233 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); 6234 tcg_gen_and_vec(vece, d, n, t); 6235 } else { 6236 tcg_gen_shri_vec(vece, d, n, halfbits); 6237 tcg_gen_shli_vec(vece, d, d, shl); 6238 } 6239 } else { 6240 if (shl == 0) { 6241 TCGv_vec t = tcg_temp_new_vec_matching(d); 6242 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6243 tcg_gen_and_vec(vece, d, n, t); 6244 } else { 6245 tcg_gen_shli_vec(vece, d, n, halfbits); 6246 tcg_gen_shri_vec(vece, d, d, halfbits - shl); 6247 } 6248 } 6249 } 6250 6251 static bool do_shll_tb(DisasContext *s, arg_rri_esz *a, 6252 const GVecGen2i ops[3], bool sel) 6253 { 6254 6255 if (a->esz < 0 || a->esz > 2) { 6256 return false; 6257 } 6258 if (sve_access_check(s)) { 6259 unsigned vsz = vec_full_reg_size(s); 6260 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 6261 vec_full_reg_offset(s, a->rn), 6262 vsz, vsz, (a->imm << 1) | sel, 6263 &ops[a->esz]); 6264 } 6265 return true; 6266 } 6267 6268 static const TCGOpcode sshll_list[] = { 6269 INDEX_op_shli_vec, INDEX_op_sari_vec, 0 6270 }; 6271 static const GVecGen2i sshll_ops[3] = { 6272 { .fniv = gen_sshll_vec, 6273 .opt_opc = sshll_list, 6274 .fno = gen_helper_sve2_sshll_h, 6275 .vece = MO_16 }, 6276 { .fniv = gen_sshll_vec, 6277 .opt_opc = sshll_list, 6278 .fno = gen_helper_sve2_sshll_s, 6279 .vece = MO_32 }, 6280 { .fniv = gen_sshll_vec, 6281 .opt_opc = sshll_list, 6282 .fno = gen_helper_sve2_sshll_d, 6283 .vece = MO_64 } 6284 }; 6285 TRANS_FEAT(SSHLLB, aa64_sve2, do_shll_tb, a, sshll_ops, false) 6286 TRANS_FEAT(SSHLLT, aa64_sve2, do_shll_tb, a, sshll_ops, true) 6287 6288 static const TCGOpcode ushll_list[] = { 6289 INDEX_op_shli_vec, INDEX_op_shri_vec, 0 6290 }; 6291 static const GVecGen2i ushll_ops[3] = { 6292 { .fni8 = gen_ushll16_i64, 6293 .fniv = gen_ushll_vec, 6294 .opt_opc = ushll_list, 6295 .fno = gen_helper_sve2_ushll_h, 6296 .vece = MO_16 }, 6297 { .fni8 = gen_ushll32_i64, 6298 .fniv = gen_ushll_vec, 6299 .opt_opc = ushll_list, 6300 .fno = gen_helper_sve2_ushll_s, 6301 .vece = MO_32 }, 6302 { .fni8 = gen_ushll64_i64, 6303 .fniv = gen_ushll_vec, 6304 .opt_opc = ushll_list, 6305 .fno = gen_helper_sve2_ushll_d, 6306 .vece = MO_64 }, 6307 }; 6308 TRANS_FEAT(USHLLB, aa64_sve2, do_shll_tb, a, ushll_ops, false) 6309 TRANS_FEAT(USHLLT, aa64_sve2, do_shll_tb, a, ushll_ops, true) 6310 6311 static gen_helper_gvec_3 * const bext_fns[4] = { 6312 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h, 6313 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d, 6314 }; 6315 TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6316 bext_fns[a->esz], a, 0) 6317 6318 static gen_helper_gvec_3 * const bdep_fns[4] = { 6319 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h, 6320 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d, 6321 }; 6322 TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6323 bdep_fns[a->esz], a, 0) 6324 6325 static gen_helper_gvec_3 * const bgrp_fns[4] = { 6326 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h, 6327 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d, 6328 }; 6329 TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6330 bgrp_fns[a->esz], a, 0) 6331 6332 static gen_helper_gvec_3 * const cadd_fns[4] = { 6333 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h, 6334 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d, 6335 }; 6336 TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6337 cadd_fns[a->esz], a, 0) 6338 TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6339 cadd_fns[a->esz], a, 1) 6340 6341 static gen_helper_gvec_3 * const sqcadd_fns[4] = { 6342 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h, 6343 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d, 6344 }; 6345 TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6346 sqcadd_fns[a->esz], a, 0) 6347 TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6348 sqcadd_fns[a->esz], a, 1) 6349 6350 static gen_helper_gvec_4 * const sabal_fns[4] = { 6351 NULL, gen_helper_sve2_sabal_h, 6352 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d, 6353 }; 6354 TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0) 6355 TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1) 6356 6357 static gen_helper_gvec_4 * const uabal_fns[4] = { 6358 NULL, gen_helper_sve2_uabal_h, 6359 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d, 6360 }; 6361 TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0) 6362 TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1) 6363 6364 static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel) 6365 { 6366 static gen_helper_gvec_4 * const fns[2] = { 6367 gen_helper_sve2_adcl_s, 6368 gen_helper_sve2_adcl_d, 6369 }; 6370 /* 6371 * Note that in this case the ESZ field encodes both size and sign. 6372 * Split out 'subtract' into bit 1 of the data field for the helper. 6373 */ 6374 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel); 6375 } 6376 6377 TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false) 6378 TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true) 6379 6380 TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a) 6381 TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a) 6382 TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a) 6383 TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a) 6384 TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a) 6385 TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a) 6386 6387 TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a) 6388 TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a) 6389 6390 static bool do_narrow_extract(DisasContext *s, arg_rri_esz *a, 6391 const GVecGen2 ops[3]) 6392 { 6393 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0) { 6394 return false; 6395 } 6396 if (sve_access_check(s)) { 6397 unsigned vsz = vec_full_reg_size(s); 6398 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd), 6399 vec_full_reg_offset(s, a->rn), 6400 vsz, vsz, &ops[a->esz]); 6401 } 6402 return true; 6403 } 6404 6405 static const TCGOpcode sqxtn_list[] = { 6406 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 6407 }; 6408 6409 static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6410 { 6411 TCGv_vec t = tcg_temp_new_vec_matching(d); 6412 int halfbits = 4 << vece; 6413 int64_t mask = (1ull << halfbits) - 1; 6414 int64_t min = -1ull << (halfbits - 1); 6415 int64_t max = -min - 1; 6416 6417 tcg_gen_dupi_vec(vece, t, min); 6418 tcg_gen_smax_vec(vece, d, n, t); 6419 tcg_gen_dupi_vec(vece, t, max); 6420 tcg_gen_smin_vec(vece, d, d, t); 6421 tcg_gen_dupi_vec(vece, t, mask); 6422 tcg_gen_and_vec(vece, d, d, t); 6423 } 6424 6425 static const GVecGen2 sqxtnb_ops[3] = { 6426 { .fniv = gen_sqxtnb_vec, 6427 .opt_opc = sqxtn_list, 6428 .fno = gen_helper_sve2_sqxtnb_h, 6429 .vece = MO_16 }, 6430 { .fniv = gen_sqxtnb_vec, 6431 .opt_opc = sqxtn_list, 6432 .fno = gen_helper_sve2_sqxtnb_s, 6433 .vece = MO_32 }, 6434 { .fniv = gen_sqxtnb_vec, 6435 .opt_opc = sqxtn_list, 6436 .fno = gen_helper_sve2_sqxtnb_d, 6437 .vece = MO_64 }, 6438 }; 6439 TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops) 6440 6441 static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6442 { 6443 TCGv_vec t = tcg_temp_new_vec_matching(d); 6444 int halfbits = 4 << vece; 6445 int64_t mask = (1ull << halfbits) - 1; 6446 int64_t min = -1ull << (halfbits - 1); 6447 int64_t max = -min - 1; 6448 6449 tcg_gen_dupi_vec(vece, t, min); 6450 tcg_gen_smax_vec(vece, n, n, t); 6451 tcg_gen_dupi_vec(vece, t, max); 6452 tcg_gen_smin_vec(vece, n, n, t); 6453 tcg_gen_shli_vec(vece, n, n, halfbits); 6454 tcg_gen_dupi_vec(vece, t, mask); 6455 tcg_gen_bitsel_vec(vece, d, t, d, n); 6456 } 6457 6458 static const GVecGen2 sqxtnt_ops[3] = { 6459 { .fniv = gen_sqxtnt_vec, 6460 .opt_opc = sqxtn_list, 6461 .load_dest = true, 6462 .fno = gen_helper_sve2_sqxtnt_h, 6463 .vece = MO_16 }, 6464 { .fniv = gen_sqxtnt_vec, 6465 .opt_opc = sqxtn_list, 6466 .load_dest = true, 6467 .fno = gen_helper_sve2_sqxtnt_s, 6468 .vece = MO_32 }, 6469 { .fniv = gen_sqxtnt_vec, 6470 .opt_opc = sqxtn_list, 6471 .load_dest = true, 6472 .fno = gen_helper_sve2_sqxtnt_d, 6473 .vece = MO_64 }, 6474 }; 6475 TRANS_FEAT(SQXTNT, aa64_sve2, do_narrow_extract, a, sqxtnt_ops) 6476 6477 static const TCGOpcode uqxtn_list[] = { 6478 INDEX_op_shli_vec, INDEX_op_umin_vec, 0 6479 }; 6480 6481 static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6482 { 6483 TCGv_vec t = tcg_temp_new_vec_matching(d); 6484 int halfbits = 4 << vece; 6485 int64_t max = (1ull << halfbits) - 1; 6486 6487 tcg_gen_dupi_vec(vece, t, max); 6488 tcg_gen_umin_vec(vece, d, n, t); 6489 } 6490 6491 static const GVecGen2 uqxtnb_ops[3] = { 6492 { .fniv = gen_uqxtnb_vec, 6493 .opt_opc = uqxtn_list, 6494 .fno = gen_helper_sve2_uqxtnb_h, 6495 .vece = MO_16 }, 6496 { .fniv = gen_uqxtnb_vec, 6497 .opt_opc = uqxtn_list, 6498 .fno = gen_helper_sve2_uqxtnb_s, 6499 .vece = MO_32 }, 6500 { .fniv = gen_uqxtnb_vec, 6501 .opt_opc = uqxtn_list, 6502 .fno = gen_helper_sve2_uqxtnb_d, 6503 .vece = MO_64 }, 6504 }; 6505 TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops) 6506 6507 static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6508 { 6509 TCGv_vec t = tcg_temp_new_vec_matching(d); 6510 int halfbits = 4 << vece; 6511 int64_t max = (1ull << halfbits) - 1; 6512 6513 tcg_gen_dupi_vec(vece, t, max); 6514 tcg_gen_umin_vec(vece, n, n, t); 6515 tcg_gen_shli_vec(vece, n, n, halfbits); 6516 tcg_gen_bitsel_vec(vece, d, t, d, n); 6517 } 6518 6519 static const GVecGen2 uqxtnt_ops[3] = { 6520 { .fniv = gen_uqxtnt_vec, 6521 .opt_opc = uqxtn_list, 6522 .load_dest = true, 6523 .fno = gen_helper_sve2_uqxtnt_h, 6524 .vece = MO_16 }, 6525 { .fniv = gen_uqxtnt_vec, 6526 .opt_opc = uqxtn_list, 6527 .load_dest = true, 6528 .fno = gen_helper_sve2_uqxtnt_s, 6529 .vece = MO_32 }, 6530 { .fniv = gen_uqxtnt_vec, 6531 .opt_opc = uqxtn_list, 6532 .load_dest = true, 6533 .fno = gen_helper_sve2_uqxtnt_d, 6534 .vece = MO_64 }, 6535 }; 6536 TRANS_FEAT(UQXTNT, aa64_sve2, do_narrow_extract, a, uqxtnt_ops) 6537 6538 static const TCGOpcode sqxtun_list[] = { 6539 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0 6540 }; 6541 6542 static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6543 { 6544 TCGv_vec t = tcg_temp_new_vec_matching(d); 6545 int halfbits = 4 << vece; 6546 int64_t max = (1ull << halfbits) - 1; 6547 6548 tcg_gen_dupi_vec(vece, t, 0); 6549 tcg_gen_smax_vec(vece, d, n, t); 6550 tcg_gen_dupi_vec(vece, t, max); 6551 tcg_gen_umin_vec(vece, d, d, t); 6552 } 6553 6554 static const GVecGen2 sqxtunb_ops[3] = { 6555 { .fniv = gen_sqxtunb_vec, 6556 .opt_opc = sqxtun_list, 6557 .fno = gen_helper_sve2_sqxtunb_h, 6558 .vece = MO_16 }, 6559 { .fniv = gen_sqxtunb_vec, 6560 .opt_opc = sqxtun_list, 6561 .fno = gen_helper_sve2_sqxtunb_s, 6562 .vece = MO_32 }, 6563 { .fniv = gen_sqxtunb_vec, 6564 .opt_opc = sqxtun_list, 6565 .fno = gen_helper_sve2_sqxtunb_d, 6566 .vece = MO_64 }, 6567 }; 6568 TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops) 6569 6570 static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6571 { 6572 TCGv_vec t = tcg_temp_new_vec_matching(d); 6573 int halfbits = 4 << vece; 6574 int64_t max = (1ull << halfbits) - 1; 6575 6576 tcg_gen_dupi_vec(vece, t, 0); 6577 tcg_gen_smax_vec(vece, n, n, t); 6578 tcg_gen_dupi_vec(vece, t, max); 6579 tcg_gen_umin_vec(vece, n, n, t); 6580 tcg_gen_shli_vec(vece, n, n, halfbits); 6581 tcg_gen_bitsel_vec(vece, d, t, d, n); 6582 } 6583 6584 static const GVecGen2 sqxtunt_ops[3] = { 6585 { .fniv = gen_sqxtunt_vec, 6586 .opt_opc = sqxtun_list, 6587 .load_dest = true, 6588 .fno = gen_helper_sve2_sqxtunt_h, 6589 .vece = MO_16 }, 6590 { .fniv = gen_sqxtunt_vec, 6591 .opt_opc = sqxtun_list, 6592 .load_dest = true, 6593 .fno = gen_helper_sve2_sqxtunt_s, 6594 .vece = MO_32 }, 6595 { .fniv = gen_sqxtunt_vec, 6596 .opt_opc = sqxtun_list, 6597 .load_dest = true, 6598 .fno = gen_helper_sve2_sqxtunt_d, 6599 .vece = MO_64 }, 6600 }; 6601 TRANS_FEAT(SQXTUNT, aa64_sve2, do_narrow_extract, a, sqxtunt_ops) 6602 6603 static bool do_shr_narrow(DisasContext *s, arg_rri_esz *a, 6604 const GVecGen2i ops[3]) 6605 { 6606 if (a->esz < 0 || a->esz > MO_32) { 6607 return false; 6608 } 6609 assert(a->imm > 0 && a->imm <= (8 << a->esz)); 6610 if (sve_access_check(s)) { 6611 unsigned vsz = vec_full_reg_size(s); 6612 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 6613 vec_full_reg_offset(s, a->rn), 6614 vsz, vsz, a->imm, &ops[a->esz]); 6615 } 6616 return true; 6617 } 6618 6619 static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 6620 { 6621 int halfbits = 4 << vece; 6622 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 6623 6624 tcg_gen_shri_i64(d, n, shr); 6625 tcg_gen_andi_i64(d, d, mask); 6626 } 6627 6628 static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6629 { 6630 gen_shrnb_i64(MO_16, d, n, shr); 6631 } 6632 6633 static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6634 { 6635 gen_shrnb_i64(MO_32, d, n, shr); 6636 } 6637 6638 static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6639 { 6640 gen_shrnb_i64(MO_64, d, n, shr); 6641 } 6642 6643 static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 6644 { 6645 TCGv_vec t = tcg_temp_new_vec_matching(d); 6646 int halfbits = 4 << vece; 6647 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 6648 6649 tcg_gen_shri_vec(vece, n, n, shr); 6650 tcg_gen_dupi_vec(vece, t, mask); 6651 tcg_gen_and_vec(vece, d, n, t); 6652 } 6653 6654 static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 }; 6655 static const GVecGen2i shrnb_ops[3] = { 6656 { .fni8 = gen_shrnb16_i64, 6657 .fniv = gen_shrnb_vec, 6658 .opt_opc = shrnb_vec_list, 6659 .fno = gen_helper_sve2_shrnb_h, 6660 .vece = MO_16 }, 6661 { .fni8 = gen_shrnb32_i64, 6662 .fniv = gen_shrnb_vec, 6663 .opt_opc = shrnb_vec_list, 6664 .fno = gen_helper_sve2_shrnb_s, 6665 .vece = MO_32 }, 6666 { .fni8 = gen_shrnb64_i64, 6667 .fniv = gen_shrnb_vec, 6668 .opt_opc = shrnb_vec_list, 6669 .fno = gen_helper_sve2_shrnb_d, 6670 .vece = MO_64 }, 6671 }; 6672 TRANS_FEAT(SHRNB, aa64_sve2, do_shr_narrow, a, shrnb_ops) 6673 6674 static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 6675 { 6676 int halfbits = 4 << vece; 6677 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 6678 6679 tcg_gen_shli_i64(n, n, halfbits - shr); 6680 tcg_gen_andi_i64(n, n, ~mask); 6681 tcg_gen_andi_i64(d, d, mask); 6682 tcg_gen_or_i64(d, d, n); 6683 } 6684 6685 static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6686 { 6687 gen_shrnt_i64(MO_16, d, n, shr); 6688 } 6689 6690 static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6691 { 6692 gen_shrnt_i64(MO_32, d, n, shr); 6693 } 6694 6695 static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6696 { 6697 tcg_gen_shri_i64(n, n, shr); 6698 tcg_gen_deposit_i64(d, d, n, 32, 32); 6699 } 6700 6701 static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 6702 { 6703 TCGv_vec t = tcg_temp_new_vec_matching(d); 6704 int halfbits = 4 << vece; 6705 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 6706 6707 tcg_gen_shli_vec(vece, n, n, halfbits - shr); 6708 tcg_gen_dupi_vec(vece, t, mask); 6709 tcg_gen_bitsel_vec(vece, d, t, d, n); 6710 } 6711 6712 static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 }; 6713 static const GVecGen2i shrnt_ops[3] = { 6714 { .fni8 = gen_shrnt16_i64, 6715 .fniv = gen_shrnt_vec, 6716 .opt_opc = shrnt_vec_list, 6717 .load_dest = true, 6718 .fno = gen_helper_sve2_shrnt_h, 6719 .vece = MO_16 }, 6720 { .fni8 = gen_shrnt32_i64, 6721 .fniv = gen_shrnt_vec, 6722 .opt_opc = shrnt_vec_list, 6723 .load_dest = true, 6724 .fno = gen_helper_sve2_shrnt_s, 6725 .vece = MO_32 }, 6726 { .fni8 = gen_shrnt64_i64, 6727 .fniv = gen_shrnt_vec, 6728 .opt_opc = shrnt_vec_list, 6729 .load_dest = true, 6730 .fno = gen_helper_sve2_shrnt_d, 6731 .vece = MO_64 }, 6732 }; 6733 TRANS_FEAT(SHRNT, aa64_sve2, do_shr_narrow, a, shrnt_ops) 6734 6735 static const GVecGen2i rshrnb_ops[3] = { 6736 { .fno = gen_helper_sve2_rshrnb_h }, 6737 { .fno = gen_helper_sve2_rshrnb_s }, 6738 { .fno = gen_helper_sve2_rshrnb_d }, 6739 }; 6740 TRANS_FEAT(RSHRNB, aa64_sve2, do_shr_narrow, a, rshrnb_ops) 6741 6742 static const GVecGen2i rshrnt_ops[3] = { 6743 { .fno = gen_helper_sve2_rshrnt_h }, 6744 { .fno = gen_helper_sve2_rshrnt_s }, 6745 { .fno = gen_helper_sve2_rshrnt_d }, 6746 }; 6747 TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops) 6748 6749 static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d, 6750 TCGv_vec n, int64_t shr) 6751 { 6752 TCGv_vec t = tcg_temp_new_vec_matching(d); 6753 int halfbits = 4 << vece; 6754 6755 tcg_gen_sari_vec(vece, n, n, shr); 6756 tcg_gen_dupi_vec(vece, t, 0); 6757 tcg_gen_smax_vec(vece, n, n, t); 6758 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6759 tcg_gen_umin_vec(vece, d, n, t); 6760 } 6761 6762 static const TCGOpcode sqshrunb_vec_list[] = { 6763 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0 6764 }; 6765 static const GVecGen2i sqshrunb_ops[3] = { 6766 { .fniv = gen_sqshrunb_vec, 6767 .opt_opc = sqshrunb_vec_list, 6768 .fno = gen_helper_sve2_sqshrunb_h, 6769 .vece = MO_16 }, 6770 { .fniv = gen_sqshrunb_vec, 6771 .opt_opc = sqshrunb_vec_list, 6772 .fno = gen_helper_sve2_sqshrunb_s, 6773 .vece = MO_32 }, 6774 { .fniv = gen_sqshrunb_vec, 6775 .opt_opc = sqshrunb_vec_list, 6776 .fno = gen_helper_sve2_sqshrunb_d, 6777 .vece = MO_64 }, 6778 }; 6779 TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops) 6780 6781 static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d, 6782 TCGv_vec n, int64_t shr) 6783 { 6784 TCGv_vec t = tcg_temp_new_vec_matching(d); 6785 int halfbits = 4 << vece; 6786 6787 tcg_gen_sari_vec(vece, n, n, shr); 6788 tcg_gen_dupi_vec(vece, t, 0); 6789 tcg_gen_smax_vec(vece, n, n, t); 6790 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6791 tcg_gen_umin_vec(vece, n, n, t); 6792 tcg_gen_shli_vec(vece, n, n, halfbits); 6793 tcg_gen_bitsel_vec(vece, d, t, d, n); 6794 } 6795 6796 static const TCGOpcode sqshrunt_vec_list[] = { 6797 INDEX_op_shli_vec, INDEX_op_sari_vec, 6798 INDEX_op_smax_vec, INDEX_op_umin_vec, 0 6799 }; 6800 static const GVecGen2i sqshrunt_ops[3] = { 6801 { .fniv = gen_sqshrunt_vec, 6802 .opt_opc = sqshrunt_vec_list, 6803 .load_dest = true, 6804 .fno = gen_helper_sve2_sqshrunt_h, 6805 .vece = MO_16 }, 6806 { .fniv = gen_sqshrunt_vec, 6807 .opt_opc = sqshrunt_vec_list, 6808 .load_dest = true, 6809 .fno = gen_helper_sve2_sqshrunt_s, 6810 .vece = MO_32 }, 6811 { .fniv = gen_sqshrunt_vec, 6812 .opt_opc = sqshrunt_vec_list, 6813 .load_dest = true, 6814 .fno = gen_helper_sve2_sqshrunt_d, 6815 .vece = MO_64 }, 6816 }; 6817 TRANS_FEAT(SQSHRUNT, aa64_sve2, do_shr_narrow, a, sqshrunt_ops) 6818 6819 static const GVecGen2i sqrshrunb_ops[3] = { 6820 { .fno = gen_helper_sve2_sqrshrunb_h }, 6821 { .fno = gen_helper_sve2_sqrshrunb_s }, 6822 { .fno = gen_helper_sve2_sqrshrunb_d }, 6823 }; 6824 TRANS_FEAT(SQRSHRUNB, aa64_sve2, do_shr_narrow, a, sqrshrunb_ops) 6825 6826 static const GVecGen2i sqrshrunt_ops[3] = { 6827 { .fno = gen_helper_sve2_sqrshrunt_h }, 6828 { .fno = gen_helper_sve2_sqrshrunt_s }, 6829 { .fno = gen_helper_sve2_sqrshrunt_d }, 6830 }; 6831 TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops) 6832 6833 static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d, 6834 TCGv_vec n, int64_t shr) 6835 { 6836 TCGv_vec t = tcg_temp_new_vec_matching(d); 6837 int halfbits = 4 << vece; 6838 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 6839 int64_t min = -max - 1; 6840 6841 tcg_gen_sari_vec(vece, n, n, shr); 6842 tcg_gen_dupi_vec(vece, t, min); 6843 tcg_gen_smax_vec(vece, n, n, t); 6844 tcg_gen_dupi_vec(vece, t, max); 6845 tcg_gen_smin_vec(vece, n, n, t); 6846 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6847 tcg_gen_and_vec(vece, d, n, t); 6848 } 6849 6850 static const TCGOpcode sqshrnb_vec_list[] = { 6851 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0 6852 }; 6853 static const GVecGen2i sqshrnb_ops[3] = { 6854 { .fniv = gen_sqshrnb_vec, 6855 .opt_opc = sqshrnb_vec_list, 6856 .fno = gen_helper_sve2_sqshrnb_h, 6857 .vece = MO_16 }, 6858 { .fniv = gen_sqshrnb_vec, 6859 .opt_opc = sqshrnb_vec_list, 6860 .fno = gen_helper_sve2_sqshrnb_s, 6861 .vece = MO_32 }, 6862 { .fniv = gen_sqshrnb_vec, 6863 .opt_opc = sqshrnb_vec_list, 6864 .fno = gen_helper_sve2_sqshrnb_d, 6865 .vece = MO_64 }, 6866 }; 6867 TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops) 6868 6869 static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d, 6870 TCGv_vec n, int64_t shr) 6871 { 6872 TCGv_vec t = tcg_temp_new_vec_matching(d); 6873 int halfbits = 4 << vece; 6874 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 6875 int64_t min = -max - 1; 6876 6877 tcg_gen_sari_vec(vece, n, n, shr); 6878 tcg_gen_dupi_vec(vece, t, min); 6879 tcg_gen_smax_vec(vece, n, n, t); 6880 tcg_gen_dupi_vec(vece, t, max); 6881 tcg_gen_smin_vec(vece, n, n, t); 6882 tcg_gen_shli_vec(vece, n, n, halfbits); 6883 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6884 tcg_gen_bitsel_vec(vece, d, t, d, n); 6885 } 6886 6887 static const TCGOpcode sqshrnt_vec_list[] = { 6888 INDEX_op_shli_vec, INDEX_op_sari_vec, 6889 INDEX_op_smax_vec, INDEX_op_smin_vec, 0 6890 }; 6891 static const GVecGen2i sqshrnt_ops[3] = { 6892 { .fniv = gen_sqshrnt_vec, 6893 .opt_opc = sqshrnt_vec_list, 6894 .load_dest = true, 6895 .fno = gen_helper_sve2_sqshrnt_h, 6896 .vece = MO_16 }, 6897 { .fniv = gen_sqshrnt_vec, 6898 .opt_opc = sqshrnt_vec_list, 6899 .load_dest = true, 6900 .fno = gen_helper_sve2_sqshrnt_s, 6901 .vece = MO_32 }, 6902 { .fniv = gen_sqshrnt_vec, 6903 .opt_opc = sqshrnt_vec_list, 6904 .load_dest = true, 6905 .fno = gen_helper_sve2_sqshrnt_d, 6906 .vece = MO_64 }, 6907 }; 6908 TRANS_FEAT(SQSHRNT, aa64_sve2, do_shr_narrow, a, sqshrnt_ops) 6909 6910 static const GVecGen2i sqrshrnb_ops[3] = { 6911 { .fno = gen_helper_sve2_sqrshrnb_h }, 6912 { .fno = gen_helper_sve2_sqrshrnb_s }, 6913 { .fno = gen_helper_sve2_sqrshrnb_d }, 6914 }; 6915 TRANS_FEAT(SQRSHRNB, aa64_sve2, do_shr_narrow, a, sqrshrnb_ops) 6916 6917 static const GVecGen2i sqrshrnt_ops[3] = { 6918 { .fno = gen_helper_sve2_sqrshrnt_h }, 6919 { .fno = gen_helper_sve2_sqrshrnt_s }, 6920 { .fno = gen_helper_sve2_sqrshrnt_d }, 6921 }; 6922 TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops) 6923 6924 static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d, 6925 TCGv_vec n, int64_t shr) 6926 { 6927 TCGv_vec t = tcg_temp_new_vec_matching(d); 6928 int halfbits = 4 << vece; 6929 6930 tcg_gen_shri_vec(vece, n, n, shr); 6931 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6932 tcg_gen_umin_vec(vece, d, n, t); 6933 } 6934 6935 static const TCGOpcode uqshrnb_vec_list[] = { 6936 INDEX_op_shri_vec, INDEX_op_umin_vec, 0 6937 }; 6938 static const GVecGen2i uqshrnb_ops[3] = { 6939 { .fniv = gen_uqshrnb_vec, 6940 .opt_opc = uqshrnb_vec_list, 6941 .fno = gen_helper_sve2_uqshrnb_h, 6942 .vece = MO_16 }, 6943 { .fniv = gen_uqshrnb_vec, 6944 .opt_opc = uqshrnb_vec_list, 6945 .fno = gen_helper_sve2_uqshrnb_s, 6946 .vece = MO_32 }, 6947 { .fniv = gen_uqshrnb_vec, 6948 .opt_opc = uqshrnb_vec_list, 6949 .fno = gen_helper_sve2_uqshrnb_d, 6950 .vece = MO_64 }, 6951 }; 6952 TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops) 6953 6954 static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d, 6955 TCGv_vec n, int64_t shr) 6956 { 6957 TCGv_vec t = tcg_temp_new_vec_matching(d); 6958 int halfbits = 4 << vece; 6959 6960 tcg_gen_shri_vec(vece, n, n, shr); 6961 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6962 tcg_gen_umin_vec(vece, n, n, t); 6963 tcg_gen_shli_vec(vece, n, n, halfbits); 6964 tcg_gen_bitsel_vec(vece, d, t, d, n); 6965 } 6966 6967 static const TCGOpcode uqshrnt_vec_list[] = { 6968 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0 6969 }; 6970 static const GVecGen2i uqshrnt_ops[3] = { 6971 { .fniv = gen_uqshrnt_vec, 6972 .opt_opc = uqshrnt_vec_list, 6973 .load_dest = true, 6974 .fno = gen_helper_sve2_uqshrnt_h, 6975 .vece = MO_16 }, 6976 { .fniv = gen_uqshrnt_vec, 6977 .opt_opc = uqshrnt_vec_list, 6978 .load_dest = true, 6979 .fno = gen_helper_sve2_uqshrnt_s, 6980 .vece = MO_32 }, 6981 { .fniv = gen_uqshrnt_vec, 6982 .opt_opc = uqshrnt_vec_list, 6983 .load_dest = true, 6984 .fno = gen_helper_sve2_uqshrnt_d, 6985 .vece = MO_64 }, 6986 }; 6987 TRANS_FEAT(UQSHRNT, aa64_sve2, do_shr_narrow, a, uqshrnt_ops) 6988 6989 static const GVecGen2i uqrshrnb_ops[3] = { 6990 { .fno = gen_helper_sve2_uqrshrnb_h }, 6991 { .fno = gen_helper_sve2_uqrshrnb_s }, 6992 { .fno = gen_helper_sve2_uqrshrnb_d }, 6993 }; 6994 TRANS_FEAT(UQRSHRNB, aa64_sve2, do_shr_narrow, a, uqrshrnb_ops) 6995 6996 static const GVecGen2i uqrshrnt_ops[3] = { 6997 { .fno = gen_helper_sve2_uqrshrnt_h }, 6998 { .fno = gen_helper_sve2_uqrshrnt_s }, 6999 { .fno = gen_helper_sve2_uqrshrnt_d }, 7000 }; 7001 TRANS_FEAT(UQRSHRNT, aa64_sve2, do_shr_narrow, a, uqrshrnt_ops) 7002 7003 #define DO_SVE2_ZZZ_NARROW(NAME, name) \ 7004 static gen_helper_gvec_3 * const name##_fns[4] = { \ 7005 NULL, gen_helper_sve2_##name##_h, \ 7006 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \ 7007 }; \ 7008 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \ 7009 name##_fns[a->esz], a, 0) 7010 7011 DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb) 7012 DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt) 7013 DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb) 7014 DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt) 7015 7016 DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb) 7017 DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt) 7018 DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb) 7019 DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt) 7020 7021 static gen_helper_gvec_flags_4 * const match_fns[4] = { 7022 gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL 7023 }; 7024 TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz]) 7025 7026 static gen_helper_gvec_flags_4 * const nmatch_fns[4] = { 7027 gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL 7028 }; 7029 TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz]) 7030 7031 static gen_helper_gvec_4 * const histcnt_fns[4] = { 7032 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d 7033 }; 7034 TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz, 7035 histcnt_fns[a->esz], a, 0) 7036 7037 TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz, 7038 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0) 7039 7040 DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz) 7041 DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz) 7042 DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz) 7043 DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz) 7044 DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz) 7045 7046 /* 7047 * SVE Integer Multiply-Add (unpredicated) 7048 */ 7049 7050 TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, 7051 gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra, 7052 0, FPST_FPCR) 7053 TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, 7054 gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra, 7055 0, FPST_FPCR) 7056 7057 static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = { 7058 NULL, gen_helper_sve2_sqdmlal_zzzw_h, 7059 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d, 7060 }; 7061 TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7062 sqdmlal_zzzw_fns[a->esz], a, 0) 7063 TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7064 sqdmlal_zzzw_fns[a->esz], a, 3) 7065 TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 7066 sqdmlal_zzzw_fns[a->esz], a, 2) 7067 7068 static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = { 7069 NULL, gen_helper_sve2_sqdmlsl_zzzw_h, 7070 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d, 7071 }; 7072 TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7073 sqdmlsl_zzzw_fns[a->esz], a, 0) 7074 TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7075 sqdmlsl_zzzw_fns[a->esz], a, 3) 7076 TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 7077 sqdmlsl_zzzw_fns[a->esz], a, 2) 7078 7079 static gen_helper_gvec_4 * const sqrdmlah_fns[] = { 7080 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h, 7081 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d, 7082 }; 7083 TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 7084 sqrdmlah_fns[a->esz], a, 0) 7085 7086 static gen_helper_gvec_4 * const sqrdmlsh_fns[] = { 7087 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h, 7088 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d, 7089 }; 7090 TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 7091 sqrdmlsh_fns[a->esz], a, 0) 7092 7093 static gen_helper_gvec_4 * const smlal_zzzw_fns[] = { 7094 NULL, gen_helper_sve2_smlal_zzzw_h, 7095 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d, 7096 }; 7097 TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7098 smlal_zzzw_fns[a->esz], a, 0) 7099 TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7100 smlal_zzzw_fns[a->esz], a, 1) 7101 7102 static gen_helper_gvec_4 * const umlal_zzzw_fns[] = { 7103 NULL, gen_helper_sve2_umlal_zzzw_h, 7104 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d, 7105 }; 7106 TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7107 umlal_zzzw_fns[a->esz], a, 0) 7108 TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7109 umlal_zzzw_fns[a->esz], a, 1) 7110 7111 static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = { 7112 NULL, gen_helper_sve2_smlsl_zzzw_h, 7113 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d, 7114 }; 7115 TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7116 smlsl_zzzw_fns[a->esz], a, 0) 7117 TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7118 smlsl_zzzw_fns[a->esz], a, 1) 7119 7120 static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = { 7121 NULL, gen_helper_sve2_umlsl_zzzw_h, 7122 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d, 7123 }; 7124 TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7125 umlsl_zzzw_fns[a->esz], a, 0) 7126 TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7127 umlsl_zzzw_fns[a->esz], a, 1) 7128 7129 static gen_helper_gvec_4 * const cmla_fns[] = { 7130 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h, 7131 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d, 7132 }; 7133 TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7134 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7135 7136 static gen_helper_gvec_4 * const cdot_fns[] = { 7137 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d 7138 }; 7139 TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7140 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7141 7142 static gen_helper_gvec_4 * const sqrdcmlah_fns[] = { 7143 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h, 7144 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d, 7145 }; 7146 TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7147 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7148 7149 TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7150 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0) 7151 7152 TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz, 7153 gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt) 7154 7155 TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7156 gen_helper_crypto_aese, a, false) 7157 TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7158 gen_helper_crypto_aese, a, true) 7159 7160 TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7161 gen_helper_crypto_sm4e, a, 0) 7162 TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7163 gen_helper_crypto_sm4ekey, a, 0) 7164 7165 TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, 7166 gen_gvec_rax1, a) 7167 7168 TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz, 7169 gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR) 7170 TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, 7171 gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR) 7172 7173 TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 7174 gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR) 7175 7176 TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, 7177 gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR) 7178 TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz, 7179 gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR) 7180 7181 TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a, 7182 FPROUNDING_ODD, gen_helper_sve_fcvt_ds) 7183 TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a, 7184 FPROUNDING_ODD, gen_helper_sve2_fcvtnt_ds) 7185 7186 static gen_helper_gvec_3_ptr * const flogb_fns[] = { 7187 NULL, gen_helper_flogb_h, 7188 gen_helper_flogb_s, gen_helper_flogb_d 7189 }; 7190 TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz], 7191 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 7192 7193 static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel) 7194 { 7195 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s, 7196 a->rd, a->rn, a->rm, a->ra, 7197 (sel << 1) | sub, cpu_env); 7198 } 7199 7200 TRANS_FEAT(FMLALB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, false) 7201 TRANS_FEAT(FMLALT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, true) 7202 TRANS_FEAT(FMLSLB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, false) 7203 TRANS_FEAT(FMLSLT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, true) 7204 7205 static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel) 7206 { 7207 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s, 7208 a->rd, a->rn, a->rm, a->ra, 7209 (a->index << 2) | (sel << 1) | sub, cpu_env); 7210 } 7211 7212 TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false) 7213 TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true) 7214 TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false) 7215 TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true) 7216 7217 TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7218 gen_helper_gvec_smmla_b, a, 0) 7219 TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7220 gen_helper_gvec_usmmla_b, a, 0) 7221 TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7222 gen_helper_gvec_ummla_b, a, 0) 7223 7224 TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, 7225 gen_helper_gvec_bfdot, a, 0) 7226 TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz, 7227 gen_helper_gvec_bfdot_idx, a) 7228 7229 TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, 7230 gen_helper_gvec_bfmmla, a, 0) 7231 7232 static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) 7233 { 7234 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, 7235 a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR); 7236 } 7237 7238 TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) 7239 TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true) 7240 7241 static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) 7242 { 7243 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, 7244 a->rd, a->rn, a->rm, a->ra, 7245 (a->index << 1) | sel, FPST_FPCR); 7246 } 7247 7248 TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) 7249 TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true) 7250 7251 static bool trans_PSEL(DisasContext *s, arg_psel *a) 7252 { 7253 int vl = vec_full_reg_size(s); 7254 int pl = pred_gvec_reg_size(s); 7255 int elements = vl >> a->esz; 7256 TCGv_i64 tmp, didx, dbit; 7257 TCGv_ptr ptr; 7258 7259 if (!dc_isar_feature(aa64_sme, s)) { 7260 return false; 7261 } 7262 if (!sve_access_check(s)) { 7263 return true; 7264 } 7265 7266 tmp = tcg_temp_new_i64(); 7267 dbit = tcg_temp_new_i64(); 7268 didx = tcg_temp_new_i64(); 7269 ptr = tcg_temp_new_ptr(); 7270 7271 /* Compute the predicate element. */ 7272 tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm); 7273 if (is_power_of_2(elements)) { 7274 tcg_gen_andi_i64(tmp, tmp, elements - 1); 7275 } else { 7276 tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements)); 7277 } 7278 7279 /* Extract the predicate byte and bit indices. */ 7280 tcg_gen_shli_i64(tmp, tmp, a->esz); 7281 tcg_gen_andi_i64(dbit, tmp, 7); 7282 tcg_gen_shri_i64(didx, tmp, 3); 7283 if (HOST_BIG_ENDIAN) { 7284 tcg_gen_xori_i64(didx, didx, 7); 7285 } 7286 7287 /* Load the predicate word. */ 7288 tcg_gen_trunc_i64_ptr(ptr, didx); 7289 tcg_gen_add_ptr(ptr, ptr, cpu_env); 7290 tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm)); 7291 7292 /* Extract the predicate bit and replicate to MO_64. */ 7293 tcg_gen_shr_i64(tmp, tmp, dbit); 7294 tcg_gen_andi_i64(tmp, tmp, 1); 7295 tcg_gen_neg_i64(tmp, tmp); 7296 7297 /* Apply to either copy the source, or write zeros. */ 7298 tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd), 7299 pred_full_reg_offset(s, a->pn), tmp, pl, pl); 7300 return true; 7301 } 7302 7303 static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7304 { 7305 tcg_gen_smax_i32(d, a, n); 7306 tcg_gen_smin_i32(d, d, m); 7307 } 7308 7309 static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7310 { 7311 tcg_gen_smax_i64(d, a, n); 7312 tcg_gen_smin_i64(d, d, m); 7313 } 7314 7315 static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7316 TCGv_vec m, TCGv_vec a) 7317 { 7318 tcg_gen_smax_vec(vece, d, a, n); 7319 tcg_gen_smin_vec(vece, d, d, m); 7320 } 7321 7322 static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 7323 uint32_t a, uint32_t oprsz, uint32_t maxsz) 7324 { 7325 static const TCGOpcode vecop[] = { 7326 INDEX_op_smin_vec, INDEX_op_smax_vec, 0 7327 }; 7328 static const GVecGen4 ops[4] = { 7329 { .fniv = gen_sclamp_vec, 7330 .fno = gen_helper_gvec_sclamp_b, 7331 .opt_opc = vecop, 7332 .vece = MO_8 }, 7333 { .fniv = gen_sclamp_vec, 7334 .fno = gen_helper_gvec_sclamp_h, 7335 .opt_opc = vecop, 7336 .vece = MO_16 }, 7337 { .fni4 = gen_sclamp_i32, 7338 .fniv = gen_sclamp_vec, 7339 .fno = gen_helper_gvec_sclamp_s, 7340 .opt_opc = vecop, 7341 .vece = MO_32 }, 7342 { .fni8 = gen_sclamp_i64, 7343 .fniv = gen_sclamp_vec, 7344 .fno = gen_helper_gvec_sclamp_d, 7345 .opt_opc = vecop, 7346 .vece = MO_64, 7347 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 7348 }; 7349 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 7350 } 7351 7352 TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a) 7353 7354 static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7355 { 7356 tcg_gen_umax_i32(d, a, n); 7357 tcg_gen_umin_i32(d, d, m); 7358 } 7359 7360 static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7361 { 7362 tcg_gen_umax_i64(d, a, n); 7363 tcg_gen_umin_i64(d, d, m); 7364 } 7365 7366 static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7367 TCGv_vec m, TCGv_vec a) 7368 { 7369 tcg_gen_umax_vec(vece, d, a, n); 7370 tcg_gen_umin_vec(vece, d, d, m); 7371 } 7372 7373 static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 7374 uint32_t a, uint32_t oprsz, uint32_t maxsz) 7375 { 7376 static const TCGOpcode vecop[] = { 7377 INDEX_op_umin_vec, INDEX_op_umax_vec, 0 7378 }; 7379 static const GVecGen4 ops[4] = { 7380 { .fniv = gen_uclamp_vec, 7381 .fno = gen_helper_gvec_uclamp_b, 7382 .opt_opc = vecop, 7383 .vece = MO_8 }, 7384 { .fniv = gen_uclamp_vec, 7385 .fno = gen_helper_gvec_uclamp_h, 7386 .opt_opc = vecop, 7387 .vece = MO_16 }, 7388 { .fni4 = gen_uclamp_i32, 7389 .fniv = gen_uclamp_vec, 7390 .fno = gen_helper_gvec_uclamp_s, 7391 .opt_opc = vecop, 7392 .vece = MO_32 }, 7393 { .fni8 = gen_uclamp_i64, 7394 .fniv = gen_uclamp_vec, 7395 .fno = gen_helper_gvec_uclamp_d, 7396 .opt_opc = vecop, 7397 .vece = MO_64, 7398 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 7399 }; 7400 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 7401 } 7402 7403 TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a) 7404