1 /* 2 * AArch64 SVE translation 3 * 4 * Copyright (c) 2018 Linaro, Ltd 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "translate.h" 22 #include "translate-a64.h" 23 #include "fpu/softfloat.h" 24 25 26 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, 27 TCGv_i64, uint32_t, uint32_t); 28 29 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr, 30 TCGv_ptr, TCGv_i32); 31 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr, 32 TCGv_ptr, TCGv_ptr, TCGv_i32); 33 34 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32); 35 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr, 36 TCGv_ptr, TCGv_i64, TCGv_i32); 37 38 /* 39 * Helpers for extracting complex instruction fields. 40 */ 41 42 /* See e.g. ASR (immediate, predicated). 43 * Returns -1 for unallocated encoding; diagnose later. 44 */ 45 static int tszimm_esz(DisasContext *s, int x) 46 { 47 x >>= 3; /* discard imm3 */ 48 return 31 - clz32(x); 49 } 50 51 static int tszimm_shr(DisasContext *s, int x) 52 { 53 return (16 << tszimm_esz(s, x)) - x; 54 } 55 56 /* See e.g. LSL (immediate, predicated). */ 57 static int tszimm_shl(DisasContext *s, int x) 58 { 59 return x - (8 << tszimm_esz(s, x)); 60 } 61 62 /* The SH bit is in bit 8. Extract the low 8 and shift. */ 63 static inline int expand_imm_sh8s(DisasContext *s, int x) 64 { 65 return (int8_t)x << (x & 0x100 ? 8 : 0); 66 } 67 68 static inline int expand_imm_sh8u(DisasContext *s, int x) 69 { 70 return (uint8_t)x << (x & 0x100 ? 8 : 0); 71 } 72 73 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype) 74 * with unsigned data. C.f. SVE Memory Contiguous Load Group. 75 */ 76 static inline int msz_dtype(DisasContext *s, int msz) 77 { 78 static const uint8_t dtype[4] = { 0, 5, 10, 15 }; 79 return dtype[msz]; 80 } 81 82 /* 83 * Include the generated decoder. 84 */ 85 86 #include "decode-sve.c.inc" 87 88 /* 89 * Implement all of the translator functions referenced by the decoder. 90 */ 91 92 /* Invoke an out-of-line helper on 2 Zregs. */ 93 static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn, 94 int rd, int rn, int data) 95 { 96 if (fn == NULL) { 97 return false; 98 } 99 if (sve_access_check(s)) { 100 unsigned vsz = vec_full_reg_size(s); 101 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 102 vec_full_reg_offset(s, rn), 103 vsz, vsz, data, fn); 104 } 105 return true; 106 } 107 108 static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 109 int rd, int rn, int data, 110 ARMFPStatusFlavour flavour) 111 { 112 if (fn == NULL) { 113 return false; 114 } 115 if (sve_access_check(s)) { 116 unsigned vsz = vec_full_reg_size(s); 117 TCGv_ptr status = fpstatus_ptr(flavour); 118 119 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 120 vec_full_reg_offset(s, rn), 121 status, vsz, vsz, data, fn); 122 } 123 return true; 124 } 125 126 static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 127 arg_rr_esz *a, int data) 128 { 129 return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data, 130 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 131 } 132 133 /* Invoke an out-of-line helper on 3 Zregs. */ 134 static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 135 int rd, int rn, int rm, int data) 136 { 137 if (fn == NULL) { 138 return false; 139 } 140 if (sve_access_check(s)) { 141 unsigned vsz = vec_full_reg_size(s); 142 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 143 vec_full_reg_offset(s, rn), 144 vec_full_reg_offset(s, rm), 145 vsz, vsz, data, fn); 146 } 147 return true; 148 } 149 150 static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 151 arg_rrr_esz *a, int data) 152 { 153 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data); 154 } 155 156 /* Invoke an out-of-line helper on 3 Zregs, plus float_status. */ 157 static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 158 int rd, int rn, int rm, 159 int data, ARMFPStatusFlavour flavour) 160 { 161 if (fn == NULL) { 162 return false; 163 } 164 if (sve_access_check(s)) { 165 unsigned vsz = vec_full_reg_size(s); 166 TCGv_ptr status = fpstatus_ptr(flavour); 167 168 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 169 vec_full_reg_offset(s, rn), 170 vec_full_reg_offset(s, rm), 171 status, vsz, vsz, data, fn); 172 } 173 return true; 174 } 175 176 static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 177 arg_rrr_esz *a, int data) 178 { 179 return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, 180 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 181 } 182 183 /* Invoke an out-of-line helper on 4 Zregs. */ 184 static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 185 int rd, int rn, int rm, int ra, int data) 186 { 187 if (fn == NULL) { 188 return false; 189 } 190 if (sve_access_check(s)) { 191 unsigned vsz = vec_full_reg_size(s); 192 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 193 vec_full_reg_offset(s, rn), 194 vec_full_reg_offset(s, rm), 195 vec_full_reg_offset(s, ra), 196 vsz, vsz, data, fn); 197 } 198 return true; 199 } 200 201 static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 202 arg_rrrr_esz *a, int data) 203 { 204 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data); 205 } 206 207 static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn, 208 arg_rrxr_esz *a) 209 { 210 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index); 211 } 212 213 /* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */ 214 static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 215 int rd, int rn, int rm, int ra, 216 int data, TCGv_ptr ptr) 217 { 218 if (fn == NULL) { 219 return false; 220 } 221 if (sve_access_check(s)) { 222 unsigned vsz = vec_full_reg_size(s); 223 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 224 vec_full_reg_offset(s, rn), 225 vec_full_reg_offset(s, rm), 226 vec_full_reg_offset(s, ra), 227 ptr, vsz, vsz, data, fn); 228 } 229 return true; 230 } 231 232 static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 233 int rd, int rn, int rm, int ra, 234 int data, ARMFPStatusFlavour flavour) 235 { 236 TCGv_ptr status = fpstatus_ptr(flavour); 237 bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status); 238 return ret; 239 } 240 241 /* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */ 242 static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn, 243 int rd, int rn, int rm, int ra, int pg, 244 int data, ARMFPStatusFlavour flavour) 245 { 246 if (fn == NULL) { 247 return false; 248 } 249 if (sve_access_check(s)) { 250 unsigned vsz = vec_full_reg_size(s); 251 TCGv_ptr status = fpstatus_ptr(flavour); 252 253 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd), 254 vec_full_reg_offset(s, rn), 255 vec_full_reg_offset(s, rm), 256 vec_full_reg_offset(s, ra), 257 pred_full_reg_offset(s, pg), 258 status, vsz, vsz, data, fn); 259 } 260 return true; 261 } 262 263 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */ 264 static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn, 265 int rd, int rn, int pg, int data) 266 { 267 if (fn == NULL) { 268 return false; 269 } 270 if (sve_access_check(s)) { 271 unsigned vsz = vec_full_reg_size(s); 272 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 273 vec_full_reg_offset(s, rn), 274 pred_full_reg_offset(s, pg), 275 vsz, vsz, data, fn); 276 } 277 return true; 278 } 279 280 static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn, 281 arg_rpr_esz *a, int data) 282 { 283 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data); 284 } 285 286 static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn, 287 arg_rpri_esz *a) 288 { 289 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm); 290 } 291 292 static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn, 293 int rd, int rn, int pg, int data, 294 ARMFPStatusFlavour flavour) 295 { 296 if (fn == NULL) { 297 return false; 298 } 299 if (sve_access_check(s)) { 300 unsigned vsz = vec_full_reg_size(s); 301 TCGv_ptr status = fpstatus_ptr(flavour); 302 303 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 304 vec_full_reg_offset(s, rn), 305 pred_full_reg_offset(s, pg), 306 status, vsz, vsz, data, fn); 307 } 308 return true; 309 } 310 311 static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 312 arg_rpr_esz *a, int data, 313 ARMFPStatusFlavour flavour) 314 { 315 return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour); 316 } 317 318 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 319 static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn, 320 int rd, int rn, int rm, int pg, int data) 321 { 322 if (fn == NULL) { 323 return false; 324 } 325 if (sve_access_check(s)) { 326 unsigned vsz = vec_full_reg_size(s); 327 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 328 vec_full_reg_offset(s, rn), 329 vec_full_reg_offset(s, rm), 330 pred_full_reg_offset(s, pg), 331 vsz, vsz, data, fn); 332 } 333 return true; 334 } 335 336 static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn, 337 arg_rprr_esz *a, int data) 338 { 339 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data); 340 } 341 342 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 343 static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn, 344 int rd, int rn, int rm, int pg, int data, 345 ARMFPStatusFlavour flavour) 346 { 347 if (fn == NULL) { 348 return false; 349 } 350 if (sve_access_check(s)) { 351 unsigned vsz = vec_full_reg_size(s); 352 TCGv_ptr status = fpstatus_ptr(flavour); 353 354 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 355 vec_full_reg_offset(s, rn), 356 vec_full_reg_offset(s, rm), 357 pred_full_reg_offset(s, pg), 358 status, vsz, vsz, data, fn); 359 } 360 return true; 361 } 362 363 static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 364 arg_rprr_esz *a) 365 { 366 return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0, 367 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 368 } 369 370 /* Invoke a vector expander on two Zregs and an immediate. */ 371 static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 372 int esz, int rd, int rn, uint64_t imm) 373 { 374 if (gvec_fn == NULL) { 375 return false; 376 } 377 if (sve_access_check(s)) { 378 unsigned vsz = vec_full_reg_size(s); 379 gvec_fn(esz, vec_full_reg_offset(s, rd), 380 vec_full_reg_offset(s, rn), imm, vsz, vsz); 381 } 382 return true; 383 } 384 385 static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 386 arg_rri_esz *a) 387 { 388 if (a->esz < 0) { 389 /* Invalid tsz encoding -- see tszimm_esz. */ 390 return false; 391 } 392 return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm); 393 } 394 395 /* Invoke a vector expander on three Zregs. */ 396 static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn, 397 int esz, int rd, int rn, int rm) 398 { 399 if (gvec_fn == NULL) { 400 return false; 401 } 402 if (sve_access_check(s)) { 403 unsigned vsz = vec_full_reg_size(s); 404 gvec_fn(esz, vec_full_reg_offset(s, rd), 405 vec_full_reg_offset(s, rn), 406 vec_full_reg_offset(s, rm), vsz, vsz); 407 } 408 return true; 409 } 410 411 static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn, 412 arg_rrr_esz *a) 413 { 414 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm); 415 } 416 417 /* Invoke a vector expander on four Zregs. */ 418 static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn, 419 arg_rrrr_esz *a) 420 { 421 if (gvec_fn == NULL) { 422 return false; 423 } 424 if (sve_access_check(s)) { 425 unsigned vsz = vec_full_reg_size(s); 426 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 427 vec_full_reg_offset(s, a->rn), 428 vec_full_reg_offset(s, a->rm), 429 vec_full_reg_offset(s, a->ra), vsz, vsz); 430 } 431 return true; 432 } 433 434 /* Invoke a vector move on two Zregs. */ 435 static bool do_mov_z(DisasContext *s, int rd, int rn) 436 { 437 if (sve_access_check(s)) { 438 unsigned vsz = vec_full_reg_size(s); 439 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd), 440 vec_full_reg_offset(s, rn), vsz, vsz); 441 } 442 return true; 443 } 444 445 /* Initialize a Zreg with replications of a 64-bit immediate. */ 446 static void do_dupi_z(DisasContext *s, int rd, uint64_t word) 447 { 448 unsigned vsz = vec_full_reg_size(s); 449 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word); 450 } 451 452 /* Invoke a vector expander on three Pregs. */ 453 static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn, 454 int rd, int rn, int rm) 455 { 456 if (sve_access_check(s)) { 457 unsigned psz = pred_gvec_reg_size(s); 458 gvec_fn(MO_64, pred_full_reg_offset(s, rd), 459 pred_full_reg_offset(s, rn), 460 pred_full_reg_offset(s, rm), psz, psz); 461 } 462 return true; 463 } 464 465 /* Invoke a vector move on two Pregs. */ 466 static bool do_mov_p(DisasContext *s, int rd, int rn) 467 { 468 if (sve_access_check(s)) { 469 unsigned psz = pred_gvec_reg_size(s); 470 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd), 471 pred_full_reg_offset(s, rn), psz, psz); 472 } 473 return true; 474 } 475 476 /* Set the cpu flags as per a return from an SVE helper. */ 477 static void do_pred_flags(TCGv_i32 t) 478 { 479 tcg_gen_mov_i32(cpu_NF, t); 480 tcg_gen_andi_i32(cpu_ZF, t, 2); 481 tcg_gen_andi_i32(cpu_CF, t, 1); 482 tcg_gen_movi_i32(cpu_VF, 0); 483 } 484 485 /* Subroutines computing the ARM PredTest psuedofunction. */ 486 static void do_predtest1(TCGv_i64 d, TCGv_i64 g) 487 { 488 TCGv_i32 t = tcg_temp_new_i32(); 489 490 gen_helper_sve_predtest1(t, d, g); 491 do_pred_flags(t); 492 } 493 494 static void do_predtest(DisasContext *s, int dofs, int gofs, int words) 495 { 496 TCGv_ptr dptr = tcg_temp_new_ptr(); 497 TCGv_ptr gptr = tcg_temp_new_ptr(); 498 TCGv_i32 t = tcg_temp_new_i32(); 499 500 tcg_gen_addi_ptr(dptr, cpu_env, dofs); 501 tcg_gen_addi_ptr(gptr, cpu_env, gofs); 502 503 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words)); 504 505 do_pred_flags(t); 506 } 507 508 /* For each element size, the bits within a predicate word that are active. */ 509 const uint64_t pred_esz_masks[5] = { 510 0xffffffffffffffffull, 0x5555555555555555ull, 511 0x1111111111111111ull, 0x0101010101010101ull, 512 0x0001000100010001ull, 513 }; 514 515 static bool trans_INVALID(DisasContext *s, arg_INVALID *a) 516 { 517 unallocated_encoding(s); 518 return true; 519 } 520 521 /* 522 *** SVE Logical - Unpredicated Group 523 */ 524 525 TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a) 526 TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a) 527 TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a) 528 TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a) 529 530 static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 531 { 532 TCGv_i64 t = tcg_temp_new_i64(); 533 uint64_t mask = dup_const(MO_8, 0xff >> sh); 534 535 tcg_gen_xor_i64(t, n, m); 536 tcg_gen_shri_i64(d, t, sh); 537 tcg_gen_shli_i64(t, t, 8 - sh); 538 tcg_gen_andi_i64(d, d, mask); 539 tcg_gen_andi_i64(t, t, ~mask); 540 tcg_gen_or_i64(d, d, t); 541 } 542 543 static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 544 { 545 TCGv_i64 t = tcg_temp_new_i64(); 546 uint64_t mask = dup_const(MO_16, 0xffff >> sh); 547 548 tcg_gen_xor_i64(t, n, m); 549 tcg_gen_shri_i64(d, t, sh); 550 tcg_gen_shli_i64(t, t, 16 - sh); 551 tcg_gen_andi_i64(d, d, mask); 552 tcg_gen_andi_i64(t, t, ~mask); 553 tcg_gen_or_i64(d, d, t); 554 } 555 556 static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh) 557 { 558 tcg_gen_xor_i32(d, n, m); 559 tcg_gen_rotri_i32(d, d, sh); 560 } 561 562 static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 563 { 564 tcg_gen_xor_i64(d, n, m); 565 tcg_gen_rotri_i64(d, d, sh); 566 } 567 568 static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 569 TCGv_vec m, int64_t sh) 570 { 571 tcg_gen_xor_vec(vece, d, n, m); 572 tcg_gen_rotri_vec(vece, d, d, sh); 573 } 574 575 void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 576 uint32_t rm_ofs, int64_t shift, 577 uint32_t opr_sz, uint32_t max_sz) 578 { 579 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 }; 580 static const GVecGen3i ops[4] = { 581 { .fni8 = gen_xar8_i64, 582 .fniv = gen_xar_vec, 583 .fno = gen_helper_sve2_xar_b, 584 .opt_opc = vecop, 585 .vece = MO_8 }, 586 { .fni8 = gen_xar16_i64, 587 .fniv = gen_xar_vec, 588 .fno = gen_helper_sve2_xar_h, 589 .opt_opc = vecop, 590 .vece = MO_16 }, 591 { .fni4 = gen_xar_i32, 592 .fniv = gen_xar_vec, 593 .fno = gen_helper_sve2_xar_s, 594 .opt_opc = vecop, 595 .vece = MO_32 }, 596 { .fni8 = gen_xar_i64, 597 .fniv = gen_xar_vec, 598 .fno = gen_helper_gvec_xar_d, 599 .opt_opc = vecop, 600 .vece = MO_64 } 601 }; 602 int esize = 8 << vece; 603 604 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */ 605 tcg_debug_assert(shift >= 0); 606 tcg_debug_assert(shift <= esize); 607 shift &= esize - 1; 608 609 if (shift == 0) { 610 /* xar with no rotate devolves to xor. */ 611 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz); 612 } else { 613 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 614 shift, &ops[vece]); 615 } 616 } 617 618 static bool trans_XAR(DisasContext *s, arg_rrri_esz *a) 619 { 620 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { 621 return false; 622 } 623 if (sve_access_check(s)) { 624 unsigned vsz = vec_full_reg_size(s); 625 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd), 626 vec_full_reg_offset(s, a->rn), 627 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz); 628 } 629 return true; 630 } 631 632 static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 633 { 634 tcg_gen_xor_i64(d, n, m); 635 tcg_gen_xor_i64(d, d, k); 636 } 637 638 static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 639 TCGv_vec m, TCGv_vec k) 640 { 641 tcg_gen_xor_vec(vece, d, n, m); 642 tcg_gen_xor_vec(vece, d, d, k); 643 } 644 645 static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 646 uint32_t a, uint32_t oprsz, uint32_t maxsz) 647 { 648 static const GVecGen4 op = { 649 .fni8 = gen_eor3_i64, 650 .fniv = gen_eor3_vec, 651 .fno = gen_helper_sve2_eor3, 652 .vece = MO_64, 653 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 654 }; 655 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 656 } 657 658 TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a) 659 660 static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 661 { 662 tcg_gen_andc_i64(d, m, k); 663 tcg_gen_xor_i64(d, d, n); 664 } 665 666 static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 667 TCGv_vec m, TCGv_vec k) 668 { 669 tcg_gen_andc_vec(vece, d, m, k); 670 tcg_gen_xor_vec(vece, d, d, n); 671 } 672 673 static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 674 uint32_t a, uint32_t oprsz, uint32_t maxsz) 675 { 676 static const GVecGen4 op = { 677 .fni8 = gen_bcax_i64, 678 .fniv = gen_bcax_vec, 679 .fno = gen_helper_sve2_bcax, 680 .vece = MO_64, 681 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 682 }; 683 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 684 } 685 686 TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a) 687 688 static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 689 uint32_t a, uint32_t oprsz, uint32_t maxsz) 690 { 691 /* BSL differs from the generic bitsel in argument ordering. */ 692 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz); 693 } 694 695 TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a) 696 697 static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 698 { 699 tcg_gen_andc_i64(n, k, n); 700 tcg_gen_andc_i64(m, m, k); 701 tcg_gen_or_i64(d, n, m); 702 } 703 704 static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 705 TCGv_vec m, TCGv_vec k) 706 { 707 if (TCG_TARGET_HAS_bitsel_vec) { 708 tcg_gen_not_vec(vece, n, n); 709 tcg_gen_bitsel_vec(vece, d, k, n, m); 710 } else { 711 tcg_gen_andc_vec(vece, n, k, n); 712 tcg_gen_andc_vec(vece, m, m, k); 713 tcg_gen_or_vec(vece, d, n, m); 714 } 715 } 716 717 static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 718 uint32_t a, uint32_t oprsz, uint32_t maxsz) 719 { 720 static const GVecGen4 op = { 721 .fni8 = gen_bsl1n_i64, 722 .fniv = gen_bsl1n_vec, 723 .fno = gen_helper_sve2_bsl1n, 724 .vece = MO_64, 725 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 726 }; 727 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 728 } 729 730 TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a) 731 732 static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 733 { 734 /* 735 * Z[dn] = (n & k) | (~m & ~k) 736 * = | ~(m | k) 737 */ 738 tcg_gen_and_i64(n, n, k); 739 if (TCG_TARGET_HAS_orc_i64) { 740 tcg_gen_or_i64(m, m, k); 741 tcg_gen_orc_i64(d, n, m); 742 } else { 743 tcg_gen_nor_i64(m, m, k); 744 tcg_gen_or_i64(d, n, m); 745 } 746 } 747 748 static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 749 TCGv_vec m, TCGv_vec k) 750 { 751 if (TCG_TARGET_HAS_bitsel_vec) { 752 tcg_gen_not_vec(vece, m, m); 753 tcg_gen_bitsel_vec(vece, d, k, n, m); 754 } else { 755 tcg_gen_and_vec(vece, n, n, k); 756 tcg_gen_or_vec(vece, m, m, k); 757 tcg_gen_orc_vec(vece, d, n, m); 758 } 759 } 760 761 static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 762 uint32_t a, uint32_t oprsz, uint32_t maxsz) 763 { 764 static const GVecGen4 op = { 765 .fni8 = gen_bsl2n_i64, 766 .fniv = gen_bsl2n_vec, 767 .fno = gen_helper_sve2_bsl2n, 768 .vece = MO_64, 769 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 770 }; 771 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 772 } 773 774 TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a) 775 776 static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 777 { 778 tcg_gen_and_i64(n, n, k); 779 tcg_gen_andc_i64(m, m, k); 780 tcg_gen_nor_i64(d, n, m); 781 } 782 783 static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 784 TCGv_vec m, TCGv_vec k) 785 { 786 tcg_gen_bitsel_vec(vece, d, k, n, m); 787 tcg_gen_not_vec(vece, d, d); 788 } 789 790 static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 791 uint32_t a, uint32_t oprsz, uint32_t maxsz) 792 { 793 static const GVecGen4 op = { 794 .fni8 = gen_nbsl_i64, 795 .fniv = gen_nbsl_vec, 796 .fno = gen_helper_sve2_nbsl, 797 .vece = MO_64, 798 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 799 }; 800 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 801 } 802 803 TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a) 804 805 /* 806 *** SVE Integer Arithmetic - Unpredicated Group 807 */ 808 809 TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a) 810 TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a) 811 TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a) 812 TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a) 813 TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a) 814 TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a) 815 816 /* 817 *** SVE Integer Arithmetic - Binary Predicated Group 818 */ 819 820 /* Select active elememnts from Zn and inactive elements from Zm, 821 * storing the result in Zd. 822 */ 823 static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz) 824 { 825 static gen_helper_gvec_4 * const fns[4] = { 826 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h, 827 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d 828 }; 829 return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0); 830 } 831 832 #define DO_ZPZZ(NAME, FEAT, name) \ 833 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \ 834 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \ 835 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \ 836 }; \ 837 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \ 838 name##_zpzz_fns[a->esz], a, 0) 839 840 DO_ZPZZ(AND_zpzz, aa64_sve, sve_and) 841 DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor) 842 DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr) 843 DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic) 844 845 DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add) 846 DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub) 847 848 DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax) 849 DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax) 850 DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin) 851 DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin) 852 DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd) 853 DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd) 854 855 DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul) 856 DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh) 857 DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh) 858 859 DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr) 860 DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr) 861 DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl) 862 863 static gen_helper_gvec_4 * const sdiv_fns[4] = { 864 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d 865 }; 866 TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0) 867 868 static gen_helper_gvec_4 * const udiv_fns[4] = { 869 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d 870 }; 871 TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0) 872 873 TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz) 874 875 /* 876 *** SVE Integer Arithmetic - Unary Predicated Group 877 */ 878 879 #define DO_ZPZ(NAME, FEAT, name) \ 880 static gen_helper_gvec_3 * const name##_fns[4] = { \ 881 gen_helper_##name##_b, gen_helper_##name##_h, \ 882 gen_helper_##name##_s, gen_helper_##name##_d, \ 883 }; \ 884 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0) 885 886 DO_ZPZ(CLS, aa64_sve, sve_cls) 887 DO_ZPZ(CLZ, aa64_sve, sve_clz) 888 DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz) 889 DO_ZPZ(CNOT, aa64_sve, sve_cnot) 890 DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz) 891 DO_ZPZ(ABS, aa64_sve, sve_abs) 892 DO_ZPZ(NEG, aa64_sve, sve_neg) 893 DO_ZPZ(RBIT, aa64_sve, sve_rbit) 894 895 static gen_helper_gvec_3 * const fabs_fns[4] = { 896 NULL, gen_helper_sve_fabs_h, 897 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d, 898 }; 899 TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0) 900 901 static gen_helper_gvec_3 * const fneg_fns[4] = { 902 NULL, gen_helper_sve_fneg_h, 903 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d, 904 }; 905 TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0) 906 907 static gen_helper_gvec_3 * const sxtb_fns[4] = { 908 NULL, gen_helper_sve_sxtb_h, 909 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d, 910 }; 911 TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0) 912 913 static gen_helper_gvec_3 * const uxtb_fns[4] = { 914 NULL, gen_helper_sve_uxtb_h, 915 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d, 916 }; 917 TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0) 918 919 static gen_helper_gvec_3 * const sxth_fns[4] = { 920 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d 921 }; 922 TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0) 923 924 static gen_helper_gvec_3 * const uxth_fns[4] = { 925 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d 926 }; 927 TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0) 928 929 TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz, 930 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0) 931 TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz, 932 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0) 933 934 /* 935 *** SVE Integer Reduction Group 936 */ 937 938 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32); 939 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a, 940 gen_helper_gvec_reduc *fn) 941 { 942 unsigned vsz = vec_full_reg_size(s); 943 TCGv_ptr t_zn, t_pg; 944 TCGv_i32 desc; 945 TCGv_i64 temp; 946 947 if (fn == NULL) { 948 return false; 949 } 950 if (!sve_access_check(s)) { 951 return true; 952 } 953 954 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 955 temp = tcg_temp_new_i64(); 956 t_zn = tcg_temp_new_ptr(); 957 t_pg = tcg_temp_new_ptr(); 958 959 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); 960 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); 961 fn(temp, t_zn, t_pg, desc); 962 963 write_fp_dreg(s, a->rd, temp); 964 return true; 965 } 966 967 #define DO_VPZ(NAME, name) \ 968 static gen_helper_gvec_reduc * const name##_fns[4] = { \ 969 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \ 970 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 971 }; \ 972 TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz]) 973 974 DO_VPZ(ORV, orv) 975 DO_VPZ(ANDV, andv) 976 DO_VPZ(EORV, eorv) 977 978 DO_VPZ(UADDV, uaddv) 979 DO_VPZ(SMAXV, smaxv) 980 DO_VPZ(UMAXV, umaxv) 981 DO_VPZ(SMINV, sminv) 982 DO_VPZ(UMINV, uminv) 983 984 static gen_helper_gvec_reduc * const saddv_fns[4] = { 985 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h, 986 gen_helper_sve_saddv_s, NULL 987 }; 988 TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz]) 989 990 #undef DO_VPZ 991 992 /* 993 *** SVE Shift by Immediate - Predicated Group 994 */ 995 996 /* 997 * Copy Zn into Zd, storing zeros into inactive elements. 998 * If invert, store zeros into the active elements. 999 */ 1000 static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg, 1001 int esz, bool invert) 1002 { 1003 static gen_helper_gvec_3 * const fns[4] = { 1004 gen_helper_sve_movz_b, gen_helper_sve_movz_h, 1005 gen_helper_sve_movz_s, gen_helper_sve_movz_d, 1006 }; 1007 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert); 1008 } 1009 1010 static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr, 1011 gen_helper_gvec_3 * const fns[4]) 1012 { 1013 int max; 1014 1015 if (a->esz < 0) { 1016 /* Invalid tsz encoding -- see tszimm_esz. */ 1017 return false; 1018 } 1019 1020 /* 1021 * Shift by element size is architecturally valid. 1022 * For arithmetic right-shift, it's the same as by one less. 1023 * For logical shifts and ASRD, it is a zeroing operation. 1024 */ 1025 max = 8 << a->esz; 1026 if (a->imm >= max) { 1027 if (asr) { 1028 a->imm = max - 1; 1029 } else { 1030 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); 1031 } 1032 } 1033 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a); 1034 } 1035 1036 static gen_helper_gvec_3 * const asr_zpzi_fns[4] = { 1037 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h, 1038 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d, 1039 }; 1040 TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns) 1041 1042 static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = { 1043 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h, 1044 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d, 1045 }; 1046 TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns) 1047 1048 static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = { 1049 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h, 1050 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d, 1051 }; 1052 TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns) 1053 1054 static gen_helper_gvec_3 * const asrd_fns[4] = { 1055 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h, 1056 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d, 1057 }; 1058 TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns) 1059 1060 static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = { 1061 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h, 1062 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d, 1063 }; 1064 TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 1065 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a) 1066 1067 static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = { 1068 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h, 1069 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d, 1070 }; 1071 TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 1072 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a) 1073 1074 static gen_helper_gvec_3 * const srshr_fns[4] = { 1075 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h, 1076 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d, 1077 }; 1078 TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 1079 a->esz < 0 ? NULL : srshr_fns[a->esz], a) 1080 1081 static gen_helper_gvec_3 * const urshr_fns[4] = { 1082 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h, 1083 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d, 1084 }; 1085 TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 1086 a->esz < 0 ? NULL : urshr_fns[a->esz], a) 1087 1088 static gen_helper_gvec_3 * const sqshlu_fns[4] = { 1089 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h, 1090 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d, 1091 }; 1092 TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi, 1093 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a) 1094 1095 /* 1096 *** SVE Bitwise Shift - Predicated Group 1097 */ 1098 1099 #define DO_ZPZW(NAME, name) \ 1100 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \ 1101 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \ 1102 gen_helper_sve_##name##_zpzw_s, NULL \ 1103 }; \ 1104 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \ 1105 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0) 1106 1107 DO_ZPZW(ASR, asr) 1108 DO_ZPZW(LSR, lsr) 1109 DO_ZPZW(LSL, lsl) 1110 1111 #undef DO_ZPZW 1112 1113 /* 1114 *** SVE Bitwise Shift - Unpredicated Group 1115 */ 1116 1117 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr, 1118 void (*gvec_fn)(unsigned, uint32_t, uint32_t, 1119 int64_t, uint32_t, uint32_t)) 1120 { 1121 if (a->esz < 0) { 1122 /* Invalid tsz encoding -- see tszimm_esz. */ 1123 return false; 1124 } 1125 if (sve_access_check(s)) { 1126 unsigned vsz = vec_full_reg_size(s); 1127 /* Shift by element size is architecturally valid. For 1128 arithmetic right-shift, it's the same as by one less. 1129 Otherwise it is a zeroing operation. */ 1130 if (a->imm >= 8 << a->esz) { 1131 if (asr) { 1132 a->imm = (8 << a->esz) - 1; 1133 } else { 1134 do_dupi_z(s, a->rd, 0); 1135 return true; 1136 } 1137 } 1138 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 1139 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz); 1140 } 1141 return true; 1142 } 1143 1144 TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari) 1145 TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri) 1146 TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli) 1147 1148 #define DO_ZZW(NAME, name) \ 1149 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \ 1150 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \ 1151 gen_helper_sve_##name##_zzw_s, NULL \ 1152 }; \ 1153 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \ 1154 name##_zzw_fns[a->esz], a, 0) 1155 1156 DO_ZZW(ASR_zzw, asr) 1157 DO_ZZW(LSR_zzw, lsr) 1158 DO_ZZW(LSL_zzw, lsl) 1159 1160 #undef DO_ZZW 1161 1162 /* 1163 *** SVE Integer Multiply-Add Group 1164 */ 1165 1166 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a, 1167 gen_helper_gvec_5 *fn) 1168 { 1169 if (sve_access_check(s)) { 1170 unsigned vsz = vec_full_reg_size(s); 1171 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd), 1172 vec_full_reg_offset(s, a->ra), 1173 vec_full_reg_offset(s, a->rn), 1174 vec_full_reg_offset(s, a->rm), 1175 pred_full_reg_offset(s, a->pg), 1176 vsz, vsz, 0, fn); 1177 } 1178 return true; 1179 } 1180 1181 static gen_helper_gvec_5 * const mla_fns[4] = { 1182 gen_helper_sve_mla_b, gen_helper_sve_mla_h, 1183 gen_helper_sve_mla_s, gen_helper_sve_mla_d, 1184 }; 1185 TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz]) 1186 1187 static gen_helper_gvec_5 * const mls_fns[4] = { 1188 gen_helper_sve_mls_b, gen_helper_sve_mls_h, 1189 gen_helper_sve_mls_s, gen_helper_sve_mls_d, 1190 }; 1191 TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz]) 1192 1193 /* 1194 *** SVE Index Generation Group 1195 */ 1196 1197 static bool do_index(DisasContext *s, int esz, int rd, 1198 TCGv_i64 start, TCGv_i64 incr) 1199 { 1200 unsigned vsz; 1201 TCGv_i32 desc; 1202 TCGv_ptr t_zd; 1203 1204 if (!sve_access_check(s)) { 1205 return true; 1206 } 1207 1208 vsz = vec_full_reg_size(s); 1209 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1210 t_zd = tcg_temp_new_ptr(); 1211 1212 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd)); 1213 if (esz == 3) { 1214 gen_helper_sve_index_d(t_zd, start, incr, desc); 1215 } else { 1216 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 1217 static index_fn * const fns[3] = { 1218 gen_helper_sve_index_b, 1219 gen_helper_sve_index_h, 1220 gen_helper_sve_index_s, 1221 }; 1222 TCGv_i32 s32 = tcg_temp_new_i32(); 1223 TCGv_i32 i32 = tcg_temp_new_i32(); 1224 1225 tcg_gen_extrl_i64_i32(s32, start); 1226 tcg_gen_extrl_i64_i32(i32, incr); 1227 fns[esz](t_zd, s32, i32, desc); 1228 } 1229 return true; 1230 } 1231 1232 TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd, 1233 tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2)) 1234 TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd, 1235 tcg_constant_i64(a->imm), cpu_reg(s, a->rm)) 1236 TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd, 1237 cpu_reg(s, a->rn), tcg_constant_i64(a->imm)) 1238 TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd, 1239 cpu_reg(s, a->rn), cpu_reg(s, a->rm)) 1240 1241 /* 1242 *** SVE Stack Allocation Group 1243 */ 1244 1245 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a) 1246 { 1247 if (!dc_isar_feature(aa64_sve, s)) { 1248 return false; 1249 } 1250 if (sve_access_check(s)) { 1251 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1252 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1253 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s)); 1254 } 1255 return true; 1256 } 1257 1258 static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a) 1259 { 1260 if (!dc_isar_feature(aa64_sme, s)) { 1261 return false; 1262 } 1263 if (sme_enabled_check(s)) { 1264 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1265 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1266 tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s)); 1267 } 1268 return true; 1269 } 1270 1271 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a) 1272 { 1273 if (!dc_isar_feature(aa64_sve, s)) { 1274 return false; 1275 } 1276 if (sve_access_check(s)) { 1277 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1278 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1279 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s)); 1280 } 1281 return true; 1282 } 1283 1284 static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a) 1285 { 1286 if (!dc_isar_feature(aa64_sme, s)) { 1287 return false; 1288 } 1289 if (sme_enabled_check(s)) { 1290 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1291 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1292 tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s)); 1293 } 1294 return true; 1295 } 1296 1297 static bool trans_RDVL(DisasContext *s, arg_RDVL *a) 1298 { 1299 if (!dc_isar_feature(aa64_sve, s)) { 1300 return false; 1301 } 1302 if (sve_access_check(s)) { 1303 TCGv_i64 reg = cpu_reg(s, a->rd); 1304 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s)); 1305 } 1306 return true; 1307 } 1308 1309 static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a) 1310 { 1311 if (!dc_isar_feature(aa64_sme, s)) { 1312 return false; 1313 } 1314 if (sme_enabled_check(s)) { 1315 TCGv_i64 reg = cpu_reg(s, a->rd); 1316 tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s)); 1317 } 1318 return true; 1319 } 1320 1321 /* 1322 *** SVE Compute Vector Address Group 1323 */ 1324 1325 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn) 1326 { 1327 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm); 1328 } 1329 1330 TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32) 1331 TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64) 1332 TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32) 1333 TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32) 1334 1335 /* 1336 *** SVE Integer Misc - Unpredicated Group 1337 */ 1338 1339 static gen_helper_gvec_2 * const fexpa_fns[4] = { 1340 NULL, gen_helper_sve_fexpa_h, 1341 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d, 1342 }; 1343 TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz, 1344 fexpa_fns[a->esz], a->rd, a->rn, 0) 1345 1346 static gen_helper_gvec_3 * const ftssel_fns[4] = { 1347 NULL, gen_helper_sve_ftssel_h, 1348 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d, 1349 }; 1350 TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, 1351 ftssel_fns[a->esz], a, 0) 1352 1353 /* 1354 *** SVE Predicate Logical Operations Group 1355 */ 1356 1357 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a, 1358 const GVecGen4 *gvec_op) 1359 { 1360 if (!sve_access_check(s)) { 1361 return true; 1362 } 1363 1364 unsigned psz = pred_gvec_reg_size(s); 1365 int dofs = pred_full_reg_offset(s, a->rd); 1366 int nofs = pred_full_reg_offset(s, a->rn); 1367 int mofs = pred_full_reg_offset(s, a->rm); 1368 int gofs = pred_full_reg_offset(s, a->pg); 1369 1370 if (!a->s) { 1371 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1372 return true; 1373 } 1374 1375 if (psz == 8) { 1376 /* Do the operation and the flags generation in temps. */ 1377 TCGv_i64 pd = tcg_temp_new_i64(); 1378 TCGv_i64 pn = tcg_temp_new_i64(); 1379 TCGv_i64 pm = tcg_temp_new_i64(); 1380 TCGv_i64 pg = tcg_temp_new_i64(); 1381 1382 tcg_gen_ld_i64(pn, cpu_env, nofs); 1383 tcg_gen_ld_i64(pm, cpu_env, mofs); 1384 tcg_gen_ld_i64(pg, cpu_env, gofs); 1385 1386 gvec_op->fni8(pd, pn, pm, pg); 1387 tcg_gen_st_i64(pd, cpu_env, dofs); 1388 1389 do_predtest1(pd, pg); 1390 } else { 1391 /* The operation and flags generation is large. The computation 1392 * of the flags depends on the original contents of the guarding 1393 * predicate. If the destination overwrites the guarding predicate, 1394 * then the easiest way to get this right is to save a copy. 1395 */ 1396 int tofs = gofs; 1397 if (a->rd == a->pg) { 1398 tofs = offsetof(CPUARMState, vfp.preg_tmp); 1399 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz); 1400 } 1401 1402 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1403 do_predtest(s, dofs, tofs, psz / 8); 1404 } 1405 return true; 1406 } 1407 1408 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1409 { 1410 tcg_gen_and_i64(pd, pn, pm); 1411 tcg_gen_and_i64(pd, pd, pg); 1412 } 1413 1414 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1415 TCGv_vec pm, TCGv_vec pg) 1416 { 1417 tcg_gen_and_vec(vece, pd, pn, pm); 1418 tcg_gen_and_vec(vece, pd, pd, pg); 1419 } 1420 1421 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a) 1422 { 1423 static const GVecGen4 op = { 1424 .fni8 = gen_and_pg_i64, 1425 .fniv = gen_and_pg_vec, 1426 .fno = gen_helper_sve_and_pppp, 1427 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1428 }; 1429 1430 if (!dc_isar_feature(aa64_sve, s)) { 1431 return false; 1432 } 1433 if (!a->s) { 1434 if (a->rn == a->rm) { 1435 if (a->pg == a->rn) { 1436 return do_mov_p(s, a->rd, a->rn); 1437 } 1438 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg); 1439 } else if (a->pg == a->rn || a->pg == a->rm) { 1440 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm); 1441 } 1442 } 1443 return do_pppp_flags(s, a, &op); 1444 } 1445 1446 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1447 { 1448 tcg_gen_andc_i64(pd, pn, pm); 1449 tcg_gen_and_i64(pd, pd, pg); 1450 } 1451 1452 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1453 TCGv_vec pm, TCGv_vec pg) 1454 { 1455 tcg_gen_andc_vec(vece, pd, pn, pm); 1456 tcg_gen_and_vec(vece, pd, pd, pg); 1457 } 1458 1459 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a) 1460 { 1461 static const GVecGen4 op = { 1462 .fni8 = gen_bic_pg_i64, 1463 .fniv = gen_bic_pg_vec, 1464 .fno = gen_helper_sve_bic_pppp, 1465 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1466 }; 1467 1468 if (!dc_isar_feature(aa64_sve, s)) { 1469 return false; 1470 } 1471 if (!a->s && a->pg == a->rn) { 1472 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm); 1473 } 1474 return do_pppp_flags(s, a, &op); 1475 } 1476 1477 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1478 { 1479 tcg_gen_xor_i64(pd, pn, pm); 1480 tcg_gen_and_i64(pd, pd, pg); 1481 } 1482 1483 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1484 TCGv_vec pm, TCGv_vec pg) 1485 { 1486 tcg_gen_xor_vec(vece, pd, pn, pm); 1487 tcg_gen_and_vec(vece, pd, pd, pg); 1488 } 1489 1490 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a) 1491 { 1492 static const GVecGen4 op = { 1493 .fni8 = gen_eor_pg_i64, 1494 .fniv = gen_eor_pg_vec, 1495 .fno = gen_helper_sve_eor_pppp, 1496 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1497 }; 1498 1499 if (!dc_isar_feature(aa64_sve, s)) { 1500 return false; 1501 } 1502 /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */ 1503 if (!a->s && a->pg == a->rm) { 1504 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn); 1505 } 1506 return do_pppp_flags(s, a, &op); 1507 } 1508 1509 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a) 1510 { 1511 if (a->s || !dc_isar_feature(aa64_sve, s)) { 1512 return false; 1513 } 1514 if (sve_access_check(s)) { 1515 unsigned psz = pred_gvec_reg_size(s); 1516 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd), 1517 pred_full_reg_offset(s, a->pg), 1518 pred_full_reg_offset(s, a->rn), 1519 pred_full_reg_offset(s, a->rm), psz, psz); 1520 } 1521 return true; 1522 } 1523 1524 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1525 { 1526 tcg_gen_or_i64(pd, pn, pm); 1527 tcg_gen_and_i64(pd, pd, pg); 1528 } 1529 1530 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1531 TCGv_vec pm, TCGv_vec pg) 1532 { 1533 tcg_gen_or_vec(vece, pd, pn, pm); 1534 tcg_gen_and_vec(vece, pd, pd, pg); 1535 } 1536 1537 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a) 1538 { 1539 static const GVecGen4 op = { 1540 .fni8 = gen_orr_pg_i64, 1541 .fniv = gen_orr_pg_vec, 1542 .fno = gen_helper_sve_orr_pppp, 1543 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1544 }; 1545 1546 if (!dc_isar_feature(aa64_sve, s)) { 1547 return false; 1548 } 1549 if (!a->s && a->pg == a->rn && a->rn == a->rm) { 1550 return do_mov_p(s, a->rd, a->rn); 1551 } 1552 return do_pppp_flags(s, a, &op); 1553 } 1554 1555 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1556 { 1557 tcg_gen_orc_i64(pd, pn, pm); 1558 tcg_gen_and_i64(pd, pd, pg); 1559 } 1560 1561 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1562 TCGv_vec pm, TCGv_vec pg) 1563 { 1564 tcg_gen_orc_vec(vece, pd, pn, pm); 1565 tcg_gen_and_vec(vece, pd, pd, pg); 1566 } 1567 1568 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a) 1569 { 1570 static const GVecGen4 op = { 1571 .fni8 = gen_orn_pg_i64, 1572 .fniv = gen_orn_pg_vec, 1573 .fno = gen_helper_sve_orn_pppp, 1574 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1575 }; 1576 1577 if (!dc_isar_feature(aa64_sve, s)) { 1578 return false; 1579 } 1580 return do_pppp_flags(s, a, &op); 1581 } 1582 1583 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1584 { 1585 tcg_gen_or_i64(pd, pn, pm); 1586 tcg_gen_andc_i64(pd, pg, pd); 1587 } 1588 1589 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1590 TCGv_vec pm, TCGv_vec pg) 1591 { 1592 tcg_gen_or_vec(vece, pd, pn, pm); 1593 tcg_gen_andc_vec(vece, pd, pg, pd); 1594 } 1595 1596 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a) 1597 { 1598 static const GVecGen4 op = { 1599 .fni8 = gen_nor_pg_i64, 1600 .fniv = gen_nor_pg_vec, 1601 .fno = gen_helper_sve_nor_pppp, 1602 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1603 }; 1604 1605 if (!dc_isar_feature(aa64_sve, s)) { 1606 return false; 1607 } 1608 return do_pppp_flags(s, a, &op); 1609 } 1610 1611 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1612 { 1613 tcg_gen_and_i64(pd, pn, pm); 1614 tcg_gen_andc_i64(pd, pg, pd); 1615 } 1616 1617 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1618 TCGv_vec pm, TCGv_vec pg) 1619 { 1620 tcg_gen_and_vec(vece, pd, pn, pm); 1621 tcg_gen_andc_vec(vece, pd, pg, pd); 1622 } 1623 1624 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a) 1625 { 1626 static const GVecGen4 op = { 1627 .fni8 = gen_nand_pg_i64, 1628 .fniv = gen_nand_pg_vec, 1629 .fno = gen_helper_sve_nand_pppp, 1630 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1631 }; 1632 1633 if (!dc_isar_feature(aa64_sve, s)) { 1634 return false; 1635 } 1636 return do_pppp_flags(s, a, &op); 1637 } 1638 1639 /* 1640 *** SVE Predicate Misc Group 1641 */ 1642 1643 static bool trans_PTEST(DisasContext *s, arg_PTEST *a) 1644 { 1645 if (!dc_isar_feature(aa64_sve, s)) { 1646 return false; 1647 } 1648 if (sve_access_check(s)) { 1649 int nofs = pred_full_reg_offset(s, a->rn); 1650 int gofs = pred_full_reg_offset(s, a->pg); 1651 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8); 1652 1653 if (words == 1) { 1654 TCGv_i64 pn = tcg_temp_new_i64(); 1655 TCGv_i64 pg = tcg_temp_new_i64(); 1656 1657 tcg_gen_ld_i64(pn, cpu_env, nofs); 1658 tcg_gen_ld_i64(pg, cpu_env, gofs); 1659 do_predtest1(pn, pg); 1660 } else { 1661 do_predtest(s, nofs, gofs, words); 1662 } 1663 } 1664 return true; 1665 } 1666 1667 /* See the ARM pseudocode DecodePredCount. */ 1668 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz) 1669 { 1670 unsigned elements = fullsz >> esz; 1671 unsigned bound; 1672 1673 switch (pattern) { 1674 case 0x0: /* POW2 */ 1675 return pow2floor(elements); 1676 case 0x1: /* VL1 */ 1677 case 0x2: /* VL2 */ 1678 case 0x3: /* VL3 */ 1679 case 0x4: /* VL4 */ 1680 case 0x5: /* VL5 */ 1681 case 0x6: /* VL6 */ 1682 case 0x7: /* VL7 */ 1683 case 0x8: /* VL8 */ 1684 bound = pattern; 1685 break; 1686 case 0x9: /* VL16 */ 1687 case 0xa: /* VL32 */ 1688 case 0xb: /* VL64 */ 1689 case 0xc: /* VL128 */ 1690 case 0xd: /* VL256 */ 1691 bound = 16 << (pattern - 9); 1692 break; 1693 case 0x1d: /* MUL4 */ 1694 return elements - elements % 4; 1695 case 0x1e: /* MUL3 */ 1696 return elements - elements % 3; 1697 case 0x1f: /* ALL */ 1698 return elements; 1699 default: /* #uimm5 */ 1700 return 0; 1701 } 1702 return elements >= bound ? bound : 0; 1703 } 1704 1705 /* This handles all of the predicate initialization instructions, 1706 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32 1707 * so that decode_pred_count returns 0. For SETFFR, we will have 1708 * set RD == 16 == FFR. 1709 */ 1710 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag) 1711 { 1712 if (!sve_access_check(s)) { 1713 return true; 1714 } 1715 1716 unsigned fullsz = vec_full_reg_size(s); 1717 unsigned ofs = pred_full_reg_offset(s, rd); 1718 unsigned numelem, setsz, i; 1719 uint64_t word, lastword; 1720 TCGv_i64 t; 1721 1722 numelem = decode_pred_count(fullsz, pat, esz); 1723 1724 /* Determine what we must store into each bit, and how many. */ 1725 if (numelem == 0) { 1726 lastword = word = 0; 1727 setsz = fullsz; 1728 } else { 1729 setsz = numelem << esz; 1730 lastword = word = pred_esz_masks[esz]; 1731 if (setsz % 64) { 1732 lastword &= MAKE_64BIT_MASK(0, setsz % 64); 1733 } 1734 } 1735 1736 t = tcg_temp_new_i64(); 1737 if (fullsz <= 64) { 1738 tcg_gen_movi_i64(t, lastword); 1739 tcg_gen_st_i64(t, cpu_env, ofs); 1740 goto done; 1741 } 1742 1743 if (word == lastword) { 1744 unsigned maxsz = size_for_gvec(fullsz / 8); 1745 unsigned oprsz = size_for_gvec(setsz / 8); 1746 1747 if (oprsz * 8 == setsz) { 1748 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word); 1749 goto done; 1750 } 1751 } 1752 1753 setsz /= 8; 1754 fullsz /= 8; 1755 1756 tcg_gen_movi_i64(t, word); 1757 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) { 1758 tcg_gen_st_i64(t, cpu_env, ofs + i); 1759 } 1760 if (lastword != word) { 1761 tcg_gen_movi_i64(t, lastword); 1762 tcg_gen_st_i64(t, cpu_env, ofs + i); 1763 i += 8; 1764 } 1765 if (i < fullsz) { 1766 tcg_gen_movi_i64(t, 0); 1767 for (; i < fullsz; i += 8) { 1768 tcg_gen_st_i64(t, cpu_env, ofs + i); 1769 } 1770 } 1771 1772 done: 1773 /* PTRUES */ 1774 if (setflag) { 1775 tcg_gen_movi_i32(cpu_NF, -(word != 0)); 1776 tcg_gen_movi_i32(cpu_CF, word == 0); 1777 tcg_gen_movi_i32(cpu_VF, 0); 1778 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1779 } 1780 return true; 1781 } 1782 1783 TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s) 1784 1785 /* Note pat == 31 is #all, to set all elements. */ 1786 TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve, 1787 do_predset, 0, FFR_PRED_NUM, 31, false) 1788 1789 /* Note pat == 32 is #unimp, to set no elements. */ 1790 TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false) 1791 1792 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a) 1793 { 1794 /* The path through do_pppp_flags is complicated enough to want to avoid 1795 * duplication. Frob the arguments into the form of a predicated AND. 1796 */ 1797 arg_rprr_s alt_a = { 1798 .rd = a->rd, .pg = a->pg, .s = a->s, 1799 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM, 1800 }; 1801 1802 s->is_nonstreaming = true; 1803 return trans_AND_pppp(s, &alt_a); 1804 } 1805 1806 TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM) 1807 TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn) 1808 1809 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a, 1810 void (*gen_fn)(TCGv_i32, TCGv_ptr, 1811 TCGv_ptr, TCGv_i32)) 1812 { 1813 if (!sve_access_check(s)) { 1814 return true; 1815 } 1816 1817 TCGv_ptr t_pd = tcg_temp_new_ptr(); 1818 TCGv_ptr t_pg = tcg_temp_new_ptr(); 1819 TCGv_i32 t; 1820 unsigned desc = 0; 1821 1822 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 1823 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 1824 1825 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd)); 1826 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn)); 1827 t = tcg_temp_new_i32(); 1828 1829 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc)); 1830 1831 do_pred_flags(t); 1832 return true; 1833 } 1834 1835 TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst) 1836 TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext) 1837 1838 /* 1839 *** SVE Element Count Group 1840 */ 1841 1842 /* Perform an inline saturating addition of a 32-bit value within 1843 * a 64-bit register. The second operand is known to be positive, 1844 * which halves the comparisions we must perform to bound the result. 1845 */ 1846 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1847 { 1848 int64_t ibound; 1849 1850 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 1851 if (u) { 1852 tcg_gen_ext32u_i64(reg, reg); 1853 } else { 1854 tcg_gen_ext32s_i64(reg, reg); 1855 } 1856 if (d) { 1857 tcg_gen_sub_i64(reg, reg, val); 1858 ibound = (u ? 0 : INT32_MIN); 1859 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound)); 1860 } else { 1861 tcg_gen_add_i64(reg, reg, val); 1862 ibound = (u ? UINT32_MAX : INT32_MAX); 1863 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound)); 1864 } 1865 } 1866 1867 /* Similarly with 64-bit values. */ 1868 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1869 { 1870 TCGv_i64 t0 = tcg_temp_new_i64(); 1871 TCGv_i64 t2; 1872 1873 if (u) { 1874 if (d) { 1875 tcg_gen_sub_i64(t0, reg, val); 1876 t2 = tcg_constant_i64(0); 1877 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0); 1878 } else { 1879 tcg_gen_add_i64(t0, reg, val); 1880 t2 = tcg_constant_i64(-1); 1881 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0); 1882 } 1883 } else { 1884 TCGv_i64 t1 = tcg_temp_new_i64(); 1885 if (d) { 1886 /* Detect signed overflow for subtraction. */ 1887 tcg_gen_xor_i64(t0, reg, val); 1888 tcg_gen_sub_i64(t1, reg, val); 1889 tcg_gen_xor_i64(reg, reg, t1); 1890 tcg_gen_and_i64(t0, t0, reg); 1891 1892 /* Bound the result. */ 1893 tcg_gen_movi_i64(reg, INT64_MIN); 1894 t2 = tcg_constant_i64(0); 1895 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1); 1896 } else { 1897 /* Detect signed overflow for addition. */ 1898 tcg_gen_xor_i64(t0, reg, val); 1899 tcg_gen_add_i64(reg, reg, val); 1900 tcg_gen_xor_i64(t1, reg, val); 1901 tcg_gen_andc_i64(t0, t1, t0); 1902 1903 /* Bound the result. */ 1904 tcg_gen_movi_i64(t1, INT64_MAX); 1905 t2 = tcg_constant_i64(0); 1906 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg); 1907 } 1908 } 1909 } 1910 1911 /* Similarly with a vector and a scalar operand. */ 1912 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn, 1913 TCGv_i64 val, bool u, bool d) 1914 { 1915 unsigned vsz = vec_full_reg_size(s); 1916 TCGv_ptr dptr, nptr; 1917 TCGv_i32 t32, desc; 1918 TCGv_i64 t64; 1919 1920 dptr = tcg_temp_new_ptr(); 1921 nptr = tcg_temp_new_ptr(); 1922 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd)); 1923 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn)); 1924 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1925 1926 switch (esz) { 1927 case MO_8: 1928 t32 = tcg_temp_new_i32(); 1929 tcg_gen_extrl_i64_i32(t32, val); 1930 if (d) { 1931 tcg_gen_neg_i32(t32, t32); 1932 } 1933 if (u) { 1934 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc); 1935 } else { 1936 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc); 1937 } 1938 break; 1939 1940 case MO_16: 1941 t32 = tcg_temp_new_i32(); 1942 tcg_gen_extrl_i64_i32(t32, val); 1943 if (d) { 1944 tcg_gen_neg_i32(t32, t32); 1945 } 1946 if (u) { 1947 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc); 1948 } else { 1949 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc); 1950 } 1951 break; 1952 1953 case MO_32: 1954 t64 = tcg_temp_new_i64(); 1955 if (d) { 1956 tcg_gen_neg_i64(t64, val); 1957 } else { 1958 tcg_gen_mov_i64(t64, val); 1959 } 1960 if (u) { 1961 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc); 1962 } else { 1963 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc); 1964 } 1965 break; 1966 1967 case MO_64: 1968 if (u) { 1969 if (d) { 1970 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc); 1971 } else { 1972 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc); 1973 } 1974 } else if (d) { 1975 t64 = tcg_temp_new_i64(); 1976 tcg_gen_neg_i64(t64, val); 1977 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc); 1978 } else { 1979 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc); 1980 } 1981 break; 1982 1983 default: 1984 g_assert_not_reached(); 1985 } 1986 } 1987 1988 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a) 1989 { 1990 if (!dc_isar_feature(aa64_sve, s)) { 1991 return false; 1992 } 1993 if (sve_access_check(s)) { 1994 unsigned fullsz = vec_full_reg_size(s); 1995 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 1996 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm); 1997 } 1998 return true; 1999 } 2000 2001 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a) 2002 { 2003 if (!dc_isar_feature(aa64_sve, s)) { 2004 return false; 2005 } 2006 if (sve_access_check(s)) { 2007 unsigned fullsz = vec_full_reg_size(s); 2008 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2009 int inc = numelem * a->imm * (a->d ? -1 : 1); 2010 TCGv_i64 reg = cpu_reg(s, a->rd); 2011 2012 tcg_gen_addi_i64(reg, reg, inc); 2013 } 2014 return true; 2015 } 2016 2017 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a) 2018 { 2019 if (!dc_isar_feature(aa64_sve, s)) { 2020 return false; 2021 } 2022 if (!sve_access_check(s)) { 2023 return true; 2024 } 2025 2026 unsigned fullsz = vec_full_reg_size(s); 2027 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2028 int inc = numelem * a->imm; 2029 TCGv_i64 reg = cpu_reg(s, a->rd); 2030 2031 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 2032 if (inc == 0) { 2033 if (a->u) { 2034 tcg_gen_ext32u_i64(reg, reg); 2035 } else { 2036 tcg_gen_ext32s_i64(reg, reg); 2037 } 2038 } else { 2039 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d); 2040 } 2041 return true; 2042 } 2043 2044 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a) 2045 { 2046 if (!dc_isar_feature(aa64_sve, s)) { 2047 return false; 2048 } 2049 if (!sve_access_check(s)) { 2050 return true; 2051 } 2052 2053 unsigned fullsz = vec_full_reg_size(s); 2054 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2055 int inc = numelem * a->imm; 2056 TCGv_i64 reg = cpu_reg(s, a->rd); 2057 2058 if (inc != 0) { 2059 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d); 2060 } 2061 return true; 2062 } 2063 2064 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 2065 { 2066 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2067 return false; 2068 } 2069 2070 unsigned fullsz = vec_full_reg_size(s); 2071 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2072 int inc = numelem * a->imm; 2073 2074 if (inc != 0) { 2075 if (sve_access_check(s)) { 2076 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd), 2077 vec_full_reg_offset(s, a->rn), 2078 tcg_constant_i64(a->d ? -inc : inc), 2079 fullsz, fullsz); 2080 } 2081 } else { 2082 do_mov_z(s, a->rd, a->rn); 2083 } 2084 return true; 2085 } 2086 2087 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 2088 { 2089 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2090 return false; 2091 } 2092 2093 unsigned fullsz = vec_full_reg_size(s); 2094 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2095 int inc = numelem * a->imm; 2096 2097 if (inc != 0) { 2098 if (sve_access_check(s)) { 2099 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 2100 tcg_constant_i64(inc), a->u, a->d); 2101 } 2102 } else { 2103 do_mov_z(s, a->rd, a->rn); 2104 } 2105 return true; 2106 } 2107 2108 /* 2109 *** SVE Bitwise Immediate Group 2110 */ 2111 2112 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn) 2113 { 2114 uint64_t imm; 2115 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2116 extract32(a->dbm, 0, 6), 2117 extract32(a->dbm, 6, 6))) { 2118 return false; 2119 } 2120 return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm); 2121 } 2122 2123 TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi) 2124 TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori) 2125 TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori) 2126 2127 static bool trans_DUPM(DisasContext *s, arg_DUPM *a) 2128 { 2129 uint64_t imm; 2130 2131 if (!dc_isar_feature(aa64_sve, s)) { 2132 return false; 2133 } 2134 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2135 extract32(a->dbm, 0, 6), 2136 extract32(a->dbm, 6, 6))) { 2137 return false; 2138 } 2139 if (sve_access_check(s)) { 2140 do_dupi_z(s, a->rd, imm); 2141 } 2142 return true; 2143 } 2144 2145 /* 2146 *** SVE Integer Wide Immediate - Predicated Group 2147 */ 2148 2149 /* Implement all merging copies. This is used for CPY (immediate), 2150 * FCPY, CPY (scalar), CPY (SIMD&FP scalar). 2151 */ 2152 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg, 2153 TCGv_i64 val) 2154 { 2155 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2156 static gen_cpy * const fns[4] = { 2157 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h, 2158 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d, 2159 }; 2160 unsigned vsz = vec_full_reg_size(s); 2161 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2162 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2163 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2164 TCGv_ptr t_pg = tcg_temp_new_ptr(); 2165 2166 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd)); 2167 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn)); 2168 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 2169 2170 fns[esz](t_zd, t_zn, t_pg, val, desc); 2171 } 2172 2173 static bool trans_FCPY(DisasContext *s, arg_FCPY *a) 2174 { 2175 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2176 return false; 2177 } 2178 if (sve_access_check(s)) { 2179 /* Decode the VFP immediate. */ 2180 uint64_t imm = vfp_expand_imm(a->esz, a->imm); 2181 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm)); 2182 } 2183 return true; 2184 } 2185 2186 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a) 2187 { 2188 if (!dc_isar_feature(aa64_sve, s)) { 2189 return false; 2190 } 2191 if (sve_access_check(s)) { 2192 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm)); 2193 } 2194 return true; 2195 } 2196 2197 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a) 2198 { 2199 static gen_helper_gvec_2i * const fns[4] = { 2200 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h, 2201 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d, 2202 }; 2203 2204 if (!dc_isar_feature(aa64_sve, s)) { 2205 return false; 2206 } 2207 if (sve_access_check(s)) { 2208 unsigned vsz = vec_full_reg_size(s); 2209 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 2210 pred_full_reg_offset(s, a->pg), 2211 tcg_constant_i64(a->imm), 2212 vsz, vsz, 0, fns[a->esz]); 2213 } 2214 return true; 2215 } 2216 2217 /* 2218 *** SVE Permute Extract Group 2219 */ 2220 2221 static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm) 2222 { 2223 if (!sve_access_check(s)) { 2224 return true; 2225 } 2226 2227 unsigned vsz = vec_full_reg_size(s); 2228 unsigned n_ofs = imm >= vsz ? 0 : imm; 2229 unsigned n_siz = vsz - n_ofs; 2230 unsigned d = vec_full_reg_offset(s, rd); 2231 unsigned n = vec_full_reg_offset(s, rn); 2232 unsigned m = vec_full_reg_offset(s, rm); 2233 2234 /* Use host vector move insns if we have appropriate sizes 2235 * and no unfortunate overlap. 2236 */ 2237 if (m != d 2238 && n_ofs == size_for_gvec(n_ofs) 2239 && n_siz == size_for_gvec(n_siz) 2240 && (d != n || n_siz <= n_ofs)) { 2241 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz); 2242 if (n_ofs != 0) { 2243 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs); 2244 } 2245 } else { 2246 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext); 2247 } 2248 return true; 2249 } 2250 2251 TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm) 2252 TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm) 2253 2254 /* 2255 *** SVE Permute - Unpredicated Group 2256 */ 2257 2258 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a) 2259 { 2260 if (!dc_isar_feature(aa64_sve, s)) { 2261 return false; 2262 } 2263 if (sve_access_check(s)) { 2264 unsigned vsz = vec_full_reg_size(s); 2265 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd), 2266 vsz, vsz, cpu_reg_sp(s, a->rn)); 2267 } 2268 return true; 2269 } 2270 2271 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a) 2272 { 2273 if (!dc_isar_feature(aa64_sve, s)) { 2274 return false; 2275 } 2276 if ((a->imm & 0x1f) == 0) { 2277 return false; 2278 } 2279 if (sve_access_check(s)) { 2280 unsigned vsz = vec_full_reg_size(s); 2281 unsigned dofs = vec_full_reg_offset(s, a->rd); 2282 unsigned esz, index; 2283 2284 esz = ctz32(a->imm); 2285 index = a->imm >> (esz + 1); 2286 2287 if ((index << esz) < vsz) { 2288 unsigned nofs = vec_reg_offset(s, a->rn, index, esz); 2289 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz); 2290 } else { 2291 /* 2292 * While dup_mem handles 128-bit elements, dup_imm does not. 2293 * Thankfully element size doesn't matter for splatting zero. 2294 */ 2295 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); 2296 } 2297 } 2298 return true; 2299 } 2300 2301 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val) 2302 { 2303 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2304 static gen_insr * const fns[4] = { 2305 gen_helper_sve_insr_b, gen_helper_sve_insr_h, 2306 gen_helper_sve_insr_s, gen_helper_sve_insr_d, 2307 }; 2308 unsigned vsz = vec_full_reg_size(s); 2309 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2310 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2311 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2312 2313 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd)); 2314 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); 2315 2316 fns[a->esz](t_zd, t_zn, val, desc); 2317 } 2318 2319 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a) 2320 { 2321 if (!dc_isar_feature(aa64_sve, s)) { 2322 return false; 2323 } 2324 if (sve_access_check(s)) { 2325 TCGv_i64 t = tcg_temp_new_i64(); 2326 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64)); 2327 do_insr_i64(s, a, t); 2328 } 2329 return true; 2330 } 2331 2332 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a) 2333 { 2334 if (!dc_isar_feature(aa64_sve, s)) { 2335 return false; 2336 } 2337 if (sve_access_check(s)) { 2338 do_insr_i64(s, a, cpu_reg(s, a->rm)); 2339 } 2340 return true; 2341 } 2342 2343 static gen_helper_gvec_2 * const rev_fns[4] = { 2344 gen_helper_sve_rev_b, gen_helper_sve_rev_h, 2345 gen_helper_sve_rev_s, gen_helper_sve_rev_d 2346 }; 2347 TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0) 2348 2349 static gen_helper_gvec_3 * const sve_tbl_fns[4] = { 2350 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h, 2351 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d 2352 }; 2353 TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0) 2354 2355 static gen_helper_gvec_4 * const sve2_tbl_fns[4] = { 2356 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h, 2357 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d 2358 }; 2359 TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz], 2360 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0) 2361 2362 static gen_helper_gvec_3 * const tbx_fns[4] = { 2363 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h, 2364 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d 2365 }; 2366 TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0) 2367 2368 static bool trans_UNPK(DisasContext *s, arg_UNPK *a) 2369 { 2370 static gen_helper_gvec_2 * const fns[4][2] = { 2371 { NULL, NULL }, 2372 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h }, 2373 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s }, 2374 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d }, 2375 }; 2376 2377 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2378 return false; 2379 } 2380 if (sve_access_check(s)) { 2381 unsigned vsz = vec_full_reg_size(s); 2382 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd), 2383 vec_full_reg_offset(s, a->rn) 2384 + (a->h ? vsz / 2 : 0), 2385 vsz, vsz, 0, fns[a->esz][a->u]); 2386 } 2387 return true; 2388 } 2389 2390 /* 2391 *** SVE Permute - Predicates Group 2392 */ 2393 2394 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd, 2395 gen_helper_gvec_3 *fn) 2396 { 2397 if (!sve_access_check(s)) { 2398 return true; 2399 } 2400 2401 unsigned vsz = pred_full_reg_size(s); 2402 2403 TCGv_ptr t_d = tcg_temp_new_ptr(); 2404 TCGv_ptr t_n = tcg_temp_new_ptr(); 2405 TCGv_ptr t_m = tcg_temp_new_ptr(); 2406 uint32_t desc = 0; 2407 2408 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2409 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2410 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2411 2412 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd)); 2413 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn)); 2414 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm)); 2415 2416 fn(t_d, t_n, t_m, tcg_constant_i32(desc)); 2417 return true; 2418 } 2419 2420 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd, 2421 gen_helper_gvec_2 *fn) 2422 { 2423 if (!sve_access_check(s)) { 2424 return true; 2425 } 2426 2427 unsigned vsz = pred_full_reg_size(s); 2428 TCGv_ptr t_d = tcg_temp_new_ptr(); 2429 TCGv_ptr t_n = tcg_temp_new_ptr(); 2430 uint32_t desc = 0; 2431 2432 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd)); 2433 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn)); 2434 2435 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2436 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2437 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2438 2439 fn(t_d, t_n, tcg_constant_i32(desc)); 2440 return true; 2441 } 2442 2443 TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p) 2444 TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p) 2445 TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p) 2446 TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p) 2447 TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p) 2448 TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p) 2449 2450 TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p) 2451 TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p) 2452 TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p) 2453 2454 /* 2455 *** SVE Permute - Interleaving Group 2456 */ 2457 2458 static gen_helper_gvec_3 * const zip_fns[4] = { 2459 gen_helper_sve_zip_b, gen_helper_sve_zip_h, 2460 gen_helper_sve_zip_s, gen_helper_sve_zip_d, 2461 }; 2462 TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2463 zip_fns[a->esz], a, 0) 2464 TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2465 zip_fns[a->esz], a, vec_full_reg_size(s) / 2) 2466 2467 TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2468 gen_helper_sve2_zip_q, a, 0) 2469 TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2470 gen_helper_sve2_zip_q, a, 2471 QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2) 2472 2473 static gen_helper_gvec_3 * const uzp_fns[4] = { 2474 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h, 2475 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d, 2476 }; 2477 2478 TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2479 uzp_fns[a->esz], a, 0) 2480 TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2481 uzp_fns[a->esz], a, 1 << a->esz) 2482 2483 TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2484 gen_helper_sve2_uzp_q, a, 0) 2485 TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2486 gen_helper_sve2_uzp_q, a, 16) 2487 2488 static gen_helper_gvec_3 * const trn_fns[4] = { 2489 gen_helper_sve_trn_b, gen_helper_sve_trn_h, 2490 gen_helper_sve_trn_s, gen_helper_sve_trn_d, 2491 }; 2492 2493 TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2494 trn_fns[a->esz], a, 0) 2495 TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2496 trn_fns[a->esz], a, 1 << a->esz) 2497 2498 TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2499 gen_helper_sve2_trn_q, a, 0) 2500 TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2501 gen_helper_sve2_trn_q, a, 16) 2502 2503 /* 2504 *** SVE Permute Vector - Predicated Group 2505 */ 2506 2507 static gen_helper_gvec_3 * const compact_fns[4] = { 2508 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d 2509 }; 2510 TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, 2511 compact_fns[a->esz], a, 0) 2512 2513 /* Call the helper that computes the ARM LastActiveElement pseudocode 2514 * function, scaled by the element size. This includes the not found 2515 * indication; e.g. not found for esz=3 is -8. 2516 */ 2517 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg) 2518 { 2519 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot 2520 * round up, as we do elsewhere, because we need the exact size. 2521 */ 2522 TCGv_ptr t_p = tcg_temp_new_ptr(); 2523 unsigned desc = 0; 2524 2525 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 2526 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 2527 2528 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg)); 2529 2530 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc)); 2531 } 2532 2533 /* Increment LAST to the offset of the next element in the vector, 2534 * wrapping around to 0. 2535 */ 2536 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz) 2537 { 2538 unsigned vsz = vec_full_reg_size(s); 2539 2540 tcg_gen_addi_i32(last, last, 1 << esz); 2541 if (is_power_of_2(vsz)) { 2542 tcg_gen_andi_i32(last, last, vsz - 1); 2543 } else { 2544 TCGv_i32 max = tcg_constant_i32(vsz); 2545 TCGv_i32 zero = tcg_constant_i32(0); 2546 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last); 2547 } 2548 } 2549 2550 /* If LAST < 0, set LAST to the offset of the last element in the vector. */ 2551 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz) 2552 { 2553 unsigned vsz = vec_full_reg_size(s); 2554 2555 if (is_power_of_2(vsz)) { 2556 tcg_gen_andi_i32(last, last, vsz - 1); 2557 } else { 2558 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz)); 2559 TCGv_i32 zero = tcg_constant_i32(0); 2560 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last); 2561 } 2562 } 2563 2564 /* Load an unsigned element of ESZ from BASE+OFS. */ 2565 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz) 2566 { 2567 TCGv_i64 r = tcg_temp_new_i64(); 2568 2569 switch (esz) { 2570 case 0: 2571 tcg_gen_ld8u_i64(r, base, ofs); 2572 break; 2573 case 1: 2574 tcg_gen_ld16u_i64(r, base, ofs); 2575 break; 2576 case 2: 2577 tcg_gen_ld32u_i64(r, base, ofs); 2578 break; 2579 case 3: 2580 tcg_gen_ld_i64(r, base, ofs); 2581 break; 2582 default: 2583 g_assert_not_reached(); 2584 } 2585 return r; 2586 } 2587 2588 /* Load an unsigned element of ESZ from RM[LAST]. */ 2589 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last, 2590 int rm, int esz) 2591 { 2592 TCGv_ptr p = tcg_temp_new_ptr(); 2593 2594 /* Convert offset into vector into offset into ENV. 2595 * The final adjustment for the vector register base 2596 * is added via constant offset to the load. 2597 */ 2598 #if HOST_BIG_ENDIAN 2599 /* Adjust for element ordering. See vec_reg_offset. */ 2600 if (esz < 3) { 2601 tcg_gen_xori_i32(last, last, 8 - (1 << esz)); 2602 } 2603 #endif 2604 tcg_gen_ext_i32_ptr(p, last); 2605 tcg_gen_add_ptr(p, p, cpu_env); 2606 2607 return load_esz(p, vec_full_reg_offset(s, rm), esz); 2608 } 2609 2610 /* Compute CLAST for a Zreg. */ 2611 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before) 2612 { 2613 TCGv_i32 last; 2614 TCGLabel *over; 2615 TCGv_i64 ele; 2616 unsigned vsz, esz = a->esz; 2617 2618 if (!sve_access_check(s)) { 2619 return true; 2620 } 2621 2622 last = tcg_temp_new_i32(); 2623 over = gen_new_label(); 2624 2625 find_last_active(s, last, esz, a->pg); 2626 2627 /* There is of course no movcond for a 2048-bit vector, 2628 * so we must branch over the actual store. 2629 */ 2630 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over); 2631 2632 if (!before) { 2633 incr_last_active(s, last, esz); 2634 } 2635 2636 ele = load_last_active(s, last, a->rm, esz); 2637 2638 vsz = vec_full_reg_size(s); 2639 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele); 2640 2641 /* If this insn used MOVPRFX, we may need a second move. */ 2642 if (a->rd != a->rn) { 2643 TCGLabel *done = gen_new_label(); 2644 tcg_gen_br(done); 2645 2646 gen_set_label(over); 2647 do_mov_z(s, a->rd, a->rn); 2648 2649 gen_set_label(done); 2650 } else { 2651 gen_set_label(over); 2652 } 2653 return true; 2654 } 2655 2656 TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false) 2657 TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true) 2658 2659 /* Compute CLAST for a scalar. */ 2660 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm, 2661 bool before, TCGv_i64 reg_val) 2662 { 2663 TCGv_i32 last = tcg_temp_new_i32(); 2664 TCGv_i64 ele, cmp; 2665 2666 find_last_active(s, last, esz, pg); 2667 2668 /* Extend the original value of last prior to incrementing. */ 2669 cmp = tcg_temp_new_i64(); 2670 tcg_gen_ext_i32_i64(cmp, last); 2671 2672 if (!before) { 2673 incr_last_active(s, last, esz); 2674 } 2675 2676 /* The conceit here is that while last < 0 indicates not found, after 2677 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address 2678 * from which we can load garbage. We then discard the garbage with 2679 * a conditional move. 2680 */ 2681 ele = load_last_active(s, last, rm, esz); 2682 2683 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0), 2684 ele, reg_val); 2685 } 2686 2687 /* Compute CLAST for a Vreg. */ 2688 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2689 { 2690 if (sve_access_check(s)) { 2691 int esz = a->esz; 2692 int ofs = vec_reg_offset(s, a->rd, 0, esz); 2693 TCGv_i64 reg = load_esz(cpu_env, ofs, esz); 2694 2695 do_clast_scalar(s, esz, a->pg, a->rn, before, reg); 2696 write_fp_dreg(s, a->rd, reg); 2697 } 2698 return true; 2699 } 2700 2701 TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false) 2702 TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true) 2703 2704 /* Compute CLAST for a Xreg. */ 2705 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before) 2706 { 2707 TCGv_i64 reg; 2708 2709 if (!sve_access_check(s)) { 2710 return true; 2711 } 2712 2713 reg = cpu_reg(s, a->rd); 2714 switch (a->esz) { 2715 case 0: 2716 tcg_gen_ext8u_i64(reg, reg); 2717 break; 2718 case 1: 2719 tcg_gen_ext16u_i64(reg, reg); 2720 break; 2721 case 2: 2722 tcg_gen_ext32u_i64(reg, reg); 2723 break; 2724 case 3: 2725 break; 2726 default: 2727 g_assert_not_reached(); 2728 } 2729 2730 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg); 2731 return true; 2732 } 2733 2734 TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false) 2735 TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true) 2736 2737 /* Compute LAST for a scalar. */ 2738 static TCGv_i64 do_last_scalar(DisasContext *s, int esz, 2739 int pg, int rm, bool before) 2740 { 2741 TCGv_i32 last = tcg_temp_new_i32(); 2742 2743 find_last_active(s, last, esz, pg); 2744 if (before) { 2745 wrap_last_active(s, last, esz); 2746 } else { 2747 incr_last_active(s, last, esz); 2748 } 2749 2750 return load_last_active(s, last, rm, esz); 2751 } 2752 2753 /* Compute LAST for a Vreg. */ 2754 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2755 { 2756 if (sve_access_check(s)) { 2757 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2758 write_fp_dreg(s, a->rd, val); 2759 } 2760 return true; 2761 } 2762 2763 TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false) 2764 TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true) 2765 2766 /* Compute LAST for a Xreg. */ 2767 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before) 2768 { 2769 if (sve_access_check(s)) { 2770 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2771 tcg_gen_mov_i64(cpu_reg(s, a->rd), val); 2772 } 2773 return true; 2774 } 2775 2776 TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false) 2777 TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true) 2778 2779 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a) 2780 { 2781 if (!dc_isar_feature(aa64_sve, s)) { 2782 return false; 2783 } 2784 if (sve_access_check(s)) { 2785 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn)); 2786 } 2787 return true; 2788 } 2789 2790 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a) 2791 { 2792 if (!dc_isar_feature(aa64_sve, s)) { 2793 return false; 2794 } 2795 if (sve_access_check(s)) { 2796 int ofs = vec_reg_offset(s, a->rn, 0, a->esz); 2797 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz); 2798 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t); 2799 } 2800 return true; 2801 } 2802 2803 static gen_helper_gvec_3 * const revb_fns[4] = { 2804 NULL, gen_helper_sve_revb_h, 2805 gen_helper_sve_revb_s, gen_helper_sve_revb_d, 2806 }; 2807 TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0) 2808 2809 static gen_helper_gvec_3 * const revh_fns[4] = { 2810 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d, 2811 }; 2812 TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0) 2813 2814 TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz, 2815 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0) 2816 2817 TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0) 2818 2819 TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz, 2820 gen_helper_sve_splice, a, a->esz) 2821 2822 TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice, 2823 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz) 2824 2825 /* 2826 *** SVE Integer Compare - Vectors Group 2827 */ 2828 2829 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a, 2830 gen_helper_gvec_flags_4 *gen_fn) 2831 { 2832 TCGv_ptr pd, zn, zm, pg; 2833 unsigned vsz; 2834 TCGv_i32 t; 2835 2836 if (gen_fn == NULL) { 2837 return false; 2838 } 2839 if (!sve_access_check(s)) { 2840 return true; 2841 } 2842 2843 vsz = vec_full_reg_size(s); 2844 t = tcg_temp_new_i32(); 2845 pd = tcg_temp_new_ptr(); 2846 zn = tcg_temp_new_ptr(); 2847 zm = tcg_temp_new_ptr(); 2848 pg = tcg_temp_new_ptr(); 2849 2850 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd)); 2851 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn)); 2852 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm)); 2853 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg)); 2854 2855 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0))); 2856 2857 do_pred_flags(t); 2858 return true; 2859 } 2860 2861 #define DO_PPZZ(NAME, name) \ 2862 static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \ 2863 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \ 2864 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \ 2865 }; \ 2866 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \ 2867 a, name##_ppzz_fns[a->esz]) 2868 2869 DO_PPZZ(CMPEQ, cmpeq) 2870 DO_PPZZ(CMPNE, cmpne) 2871 DO_PPZZ(CMPGT, cmpgt) 2872 DO_PPZZ(CMPGE, cmpge) 2873 DO_PPZZ(CMPHI, cmphi) 2874 DO_PPZZ(CMPHS, cmphs) 2875 2876 #undef DO_PPZZ 2877 2878 #define DO_PPZW(NAME, name) \ 2879 static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \ 2880 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \ 2881 gen_helper_sve_##name##_ppzw_s, NULL \ 2882 }; \ 2883 TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \ 2884 a, name##_ppzw_fns[a->esz]) 2885 2886 DO_PPZW(CMPEQ, cmpeq) 2887 DO_PPZW(CMPNE, cmpne) 2888 DO_PPZW(CMPGT, cmpgt) 2889 DO_PPZW(CMPGE, cmpge) 2890 DO_PPZW(CMPHI, cmphi) 2891 DO_PPZW(CMPHS, cmphs) 2892 DO_PPZW(CMPLT, cmplt) 2893 DO_PPZW(CMPLE, cmple) 2894 DO_PPZW(CMPLO, cmplo) 2895 DO_PPZW(CMPLS, cmpls) 2896 2897 #undef DO_PPZW 2898 2899 /* 2900 *** SVE Integer Compare - Immediate Groups 2901 */ 2902 2903 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a, 2904 gen_helper_gvec_flags_3 *gen_fn) 2905 { 2906 TCGv_ptr pd, zn, pg; 2907 unsigned vsz; 2908 TCGv_i32 t; 2909 2910 if (gen_fn == NULL) { 2911 return false; 2912 } 2913 if (!sve_access_check(s)) { 2914 return true; 2915 } 2916 2917 vsz = vec_full_reg_size(s); 2918 t = tcg_temp_new_i32(); 2919 pd = tcg_temp_new_ptr(); 2920 zn = tcg_temp_new_ptr(); 2921 pg = tcg_temp_new_ptr(); 2922 2923 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd)); 2924 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn)); 2925 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg)); 2926 2927 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm))); 2928 2929 do_pred_flags(t); 2930 return true; 2931 } 2932 2933 #define DO_PPZI(NAME, name) \ 2934 static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \ 2935 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \ 2936 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \ 2937 }; \ 2938 TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \ 2939 name##_ppzi_fns[a->esz]) 2940 2941 DO_PPZI(CMPEQ, cmpeq) 2942 DO_PPZI(CMPNE, cmpne) 2943 DO_PPZI(CMPGT, cmpgt) 2944 DO_PPZI(CMPGE, cmpge) 2945 DO_PPZI(CMPHI, cmphi) 2946 DO_PPZI(CMPHS, cmphs) 2947 DO_PPZI(CMPLT, cmplt) 2948 DO_PPZI(CMPLE, cmple) 2949 DO_PPZI(CMPLO, cmplo) 2950 DO_PPZI(CMPLS, cmpls) 2951 2952 #undef DO_PPZI 2953 2954 /* 2955 *** SVE Partition Break Group 2956 */ 2957 2958 static bool do_brk3(DisasContext *s, arg_rprr_s *a, 2959 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s) 2960 { 2961 if (!sve_access_check(s)) { 2962 return true; 2963 } 2964 2965 unsigned vsz = pred_full_reg_size(s); 2966 2967 /* Predicate sizes may be smaller and cannot use simd_desc. */ 2968 TCGv_ptr d = tcg_temp_new_ptr(); 2969 TCGv_ptr n = tcg_temp_new_ptr(); 2970 TCGv_ptr m = tcg_temp_new_ptr(); 2971 TCGv_ptr g = tcg_temp_new_ptr(); 2972 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 2973 2974 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd)); 2975 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn)); 2976 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm)); 2977 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg)); 2978 2979 if (a->s) { 2980 TCGv_i32 t = tcg_temp_new_i32(); 2981 fn_s(t, d, n, m, g, desc); 2982 do_pred_flags(t); 2983 } else { 2984 fn(d, n, m, g, desc); 2985 } 2986 return true; 2987 } 2988 2989 static bool do_brk2(DisasContext *s, arg_rpr_s *a, 2990 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s) 2991 { 2992 if (!sve_access_check(s)) { 2993 return true; 2994 } 2995 2996 unsigned vsz = pred_full_reg_size(s); 2997 2998 /* Predicate sizes may be smaller and cannot use simd_desc. */ 2999 TCGv_ptr d = tcg_temp_new_ptr(); 3000 TCGv_ptr n = tcg_temp_new_ptr(); 3001 TCGv_ptr g = tcg_temp_new_ptr(); 3002 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 3003 3004 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd)); 3005 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn)); 3006 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg)); 3007 3008 if (a->s) { 3009 TCGv_i32 t = tcg_temp_new_i32(); 3010 fn_s(t, d, n, g, desc); 3011 do_pred_flags(t); 3012 } else { 3013 fn(d, n, g, desc); 3014 } 3015 return true; 3016 } 3017 3018 TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a, 3019 gen_helper_sve_brkpa, gen_helper_sve_brkpas) 3020 TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a, 3021 gen_helper_sve_brkpb, gen_helper_sve_brkpbs) 3022 3023 TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a, 3024 gen_helper_sve_brka_m, gen_helper_sve_brkas_m) 3025 TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a, 3026 gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m) 3027 3028 TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a, 3029 gen_helper_sve_brka_z, gen_helper_sve_brkas_z) 3030 TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a, 3031 gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z) 3032 3033 TRANS_FEAT(BRKN, aa64_sve, do_brk2, a, 3034 gen_helper_sve_brkn, gen_helper_sve_brkns) 3035 3036 /* 3037 *** SVE Predicate Count Group 3038 */ 3039 3040 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg) 3041 { 3042 unsigned psz = pred_full_reg_size(s); 3043 3044 if (psz <= 8) { 3045 uint64_t psz_mask; 3046 3047 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn)); 3048 if (pn != pg) { 3049 TCGv_i64 g = tcg_temp_new_i64(); 3050 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg)); 3051 tcg_gen_and_i64(val, val, g); 3052 } 3053 3054 /* Reduce the pred_esz_masks value simply to reduce the 3055 * size of the code generated here. 3056 */ 3057 psz_mask = MAKE_64BIT_MASK(0, psz * 8); 3058 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask); 3059 3060 tcg_gen_ctpop_i64(val, val); 3061 } else { 3062 TCGv_ptr t_pn = tcg_temp_new_ptr(); 3063 TCGv_ptr t_pg = tcg_temp_new_ptr(); 3064 unsigned desc = 0; 3065 3066 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz); 3067 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 3068 3069 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn)); 3070 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 3071 3072 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc)); 3073 } 3074 } 3075 3076 static bool trans_CNTP(DisasContext *s, arg_CNTP *a) 3077 { 3078 if (!dc_isar_feature(aa64_sve, s)) { 3079 return false; 3080 } 3081 if (sve_access_check(s)) { 3082 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg); 3083 } 3084 return true; 3085 } 3086 3087 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a) 3088 { 3089 if (!dc_isar_feature(aa64_sve, s)) { 3090 return false; 3091 } 3092 if (sve_access_check(s)) { 3093 TCGv_i64 reg = cpu_reg(s, a->rd); 3094 TCGv_i64 val = tcg_temp_new_i64(); 3095 3096 do_cntp(s, val, a->esz, a->pg, a->pg); 3097 if (a->d) { 3098 tcg_gen_sub_i64(reg, reg, val); 3099 } else { 3100 tcg_gen_add_i64(reg, reg, val); 3101 } 3102 } 3103 return true; 3104 } 3105 3106 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3107 { 3108 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3109 return false; 3110 } 3111 if (sve_access_check(s)) { 3112 unsigned vsz = vec_full_reg_size(s); 3113 TCGv_i64 val = tcg_temp_new_i64(); 3114 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds; 3115 3116 do_cntp(s, val, a->esz, a->pg, a->pg); 3117 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 3118 vec_full_reg_offset(s, a->rn), val, vsz, vsz); 3119 } 3120 return true; 3121 } 3122 3123 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a) 3124 { 3125 if (!dc_isar_feature(aa64_sve, s)) { 3126 return false; 3127 } 3128 if (sve_access_check(s)) { 3129 TCGv_i64 reg = cpu_reg(s, a->rd); 3130 TCGv_i64 val = tcg_temp_new_i64(); 3131 3132 do_cntp(s, val, a->esz, a->pg, a->pg); 3133 do_sat_addsub_32(reg, val, a->u, a->d); 3134 } 3135 return true; 3136 } 3137 3138 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a) 3139 { 3140 if (!dc_isar_feature(aa64_sve, s)) { 3141 return false; 3142 } 3143 if (sve_access_check(s)) { 3144 TCGv_i64 reg = cpu_reg(s, a->rd); 3145 TCGv_i64 val = tcg_temp_new_i64(); 3146 3147 do_cntp(s, val, a->esz, a->pg, a->pg); 3148 do_sat_addsub_64(reg, val, a->u, a->d); 3149 } 3150 return true; 3151 } 3152 3153 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3154 { 3155 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3156 return false; 3157 } 3158 if (sve_access_check(s)) { 3159 TCGv_i64 val = tcg_temp_new_i64(); 3160 do_cntp(s, val, a->esz, a->pg, a->pg); 3161 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d); 3162 } 3163 return true; 3164 } 3165 3166 /* 3167 *** SVE Integer Compare Scalars Group 3168 */ 3169 3170 static bool trans_CTERM(DisasContext *s, arg_CTERM *a) 3171 { 3172 if (!dc_isar_feature(aa64_sve, s)) { 3173 return false; 3174 } 3175 if (!sve_access_check(s)) { 3176 return true; 3177 } 3178 3179 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ); 3180 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf); 3181 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf); 3182 TCGv_i64 cmp = tcg_temp_new_i64(); 3183 3184 tcg_gen_setcond_i64(cond, cmp, rn, rm); 3185 tcg_gen_extrl_i64_i32(cpu_NF, cmp); 3186 3187 /* VF = !NF & !CF. */ 3188 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1); 3189 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF); 3190 3191 /* Both NF and VF actually look at bit 31. */ 3192 tcg_gen_neg_i32(cpu_NF, cpu_NF); 3193 tcg_gen_neg_i32(cpu_VF, cpu_VF); 3194 return true; 3195 } 3196 3197 static bool trans_WHILE(DisasContext *s, arg_WHILE *a) 3198 { 3199 TCGv_i64 op0, op1, t0, t1, tmax; 3200 TCGv_i32 t2; 3201 TCGv_ptr ptr; 3202 unsigned vsz = vec_full_reg_size(s); 3203 unsigned desc = 0; 3204 TCGCond cond; 3205 uint64_t maxval; 3206 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */ 3207 bool eq = a->eq == a->lt; 3208 3209 /* The greater-than conditions are all SVE2. */ 3210 if (a->lt 3211 ? !dc_isar_feature(aa64_sve, s) 3212 : !dc_isar_feature(aa64_sve2, s)) { 3213 return false; 3214 } 3215 if (!sve_access_check(s)) { 3216 return true; 3217 } 3218 3219 op0 = read_cpu_reg(s, a->rn, 1); 3220 op1 = read_cpu_reg(s, a->rm, 1); 3221 3222 if (!a->sf) { 3223 if (a->u) { 3224 tcg_gen_ext32u_i64(op0, op0); 3225 tcg_gen_ext32u_i64(op1, op1); 3226 } else { 3227 tcg_gen_ext32s_i64(op0, op0); 3228 tcg_gen_ext32s_i64(op1, op1); 3229 } 3230 } 3231 3232 /* For the helper, compress the different conditions into a computation 3233 * of how many iterations for which the condition is true. 3234 */ 3235 t0 = tcg_temp_new_i64(); 3236 t1 = tcg_temp_new_i64(); 3237 3238 if (a->lt) { 3239 tcg_gen_sub_i64(t0, op1, op0); 3240 if (a->u) { 3241 maxval = a->sf ? UINT64_MAX : UINT32_MAX; 3242 cond = eq ? TCG_COND_LEU : TCG_COND_LTU; 3243 } else { 3244 maxval = a->sf ? INT64_MAX : INT32_MAX; 3245 cond = eq ? TCG_COND_LE : TCG_COND_LT; 3246 } 3247 } else { 3248 tcg_gen_sub_i64(t0, op0, op1); 3249 if (a->u) { 3250 maxval = 0; 3251 cond = eq ? TCG_COND_GEU : TCG_COND_GTU; 3252 } else { 3253 maxval = a->sf ? INT64_MIN : INT32_MIN; 3254 cond = eq ? TCG_COND_GE : TCG_COND_GT; 3255 } 3256 } 3257 3258 tmax = tcg_constant_i64(vsz >> a->esz); 3259 if (eq) { 3260 /* Equality means one more iteration. */ 3261 tcg_gen_addi_i64(t0, t0, 1); 3262 3263 /* 3264 * For the less-than while, if op1 is maxval (and the only time 3265 * the addition above could overflow), then we produce an all-true 3266 * predicate by setting the count to the vector length. This is 3267 * because the pseudocode is described as an increment + compare 3268 * loop, and the maximum integer would always compare true. 3269 * Similarly, the greater-than while has the same issue with the 3270 * minimum integer due to the decrement + compare loop. 3271 */ 3272 tcg_gen_movi_i64(t1, maxval); 3273 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0); 3274 } 3275 3276 /* Bound to the maximum. */ 3277 tcg_gen_umin_i64(t0, t0, tmax); 3278 3279 /* Set the count to zero if the condition is false. */ 3280 tcg_gen_movi_i64(t1, 0); 3281 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1); 3282 3283 /* Since we're bounded, pass as a 32-bit type. */ 3284 t2 = tcg_temp_new_i32(); 3285 tcg_gen_extrl_i64_i32(t2, t0); 3286 3287 /* Scale elements to bits. */ 3288 tcg_gen_shli_i32(t2, t2, a->esz); 3289 3290 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3291 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3292 3293 ptr = tcg_temp_new_ptr(); 3294 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd)); 3295 3296 if (a->lt) { 3297 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); 3298 } else { 3299 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc)); 3300 } 3301 do_pred_flags(t2); 3302 return true; 3303 } 3304 3305 static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) 3306 { 3307 TCGv_i64 op0, op1, diff, t1, tmax; 3308 TCGv_i32 t2; 3309 TCGv_ptr ptr; 3310 unsigned vsz = vec_full_reg_size(s); 3311 unsigned desc = 0; 3312 3313 if (!dc_isar_feature(aa64_sve2, s)) { 3314 return false; 3315 } 3316 if (!sve_access_check(s)) { 3317 return true; 3318 } 3319 3320 op0 = read_cpu_reg(s, a->rn, 1); 3321 op1 = read_cpu_reg(s, a->rm, 1); 3322 3323 tmax = tcg_constant_i64(vsz); 3324 diff = tcg_temp_new_i64(); 3325 3326 if (a->rw) { 3327 /* WHILERW */ 3328 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */ 3329 t1 = tcg_temp_new_i64(); 3330 tcg_gen_sub_i64(diff, op0, op1); 3331 tcg_gen_sub_i64(t1, op1, op0); 3332 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1); 3333 /* Round down to a multiple of ESIZE. */ 3334 tcg_gen_andi_i64(diff, diff, -1 << a->esz); 3335 /* If op1 == op0, diff == 0, and the condition is always true. */ 3336 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff); 3337 } else { 3338 /* WHILEWR */ 3339 tcg_gen_sub_i64(diff, op1, op0); 3340 /* Round down to a multiple of ESIZE. */ 3341 tcg_gen_andi_i64(diff, diff, -1 << a->esz); 3342 /* If op0 >= op1, diff <= 0, the condition is always true. */ 3343 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff); 3344 } 3345 3346 /* Bound to the maximum. */ 3347 tcg_gen_umin_i64(diff, diff, tmax); 3348 3349 /* Since we're bounded, pass as a 32-bit type. */ 3350 t2 = tcg_temp_new_i32(); 3351 tcg_gen_extrl_i64_i32(t2, diff); 3352 3353 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3354 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3355 3356 ptr = tcg_temp_new_ptr(); 3357 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd)); 3358 3359 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); 3360 do_pred_flags(t2); 3361 return true; 3362 } 3363 3364 /* 3365 *** SVE Integer Wide Immediate - Unpredicated Group 3366 */ 3367 3368 static bool trans_FDUP(DisasContext *s, arg_FDUP *a) 3369 { 3370 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3371 return false; 3372 } 3373 if (sve_access_check(s)) { 3374 unsigned vsz = vec_full_reg_size(s); 3375 int dofs = vec_full_reg_offset(s, a->rd); 3376 uint64_t imm; 3377 3378 /* Decode the VFP immediate. */ 3379 imm = vfp_expand_imm(a->esz, a->imm); 3380 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm); 3381 } 3382 return true; 3383 } 3384 3385 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a) 3386 { 3387 if (!dc_isar_feature(aa64_sve, s)) { 3388 return false; 3389 } 3390 if (sve_access_check(s)) { 3391 unsigned vsz = vec_full_reg_size(s); 3392 int dofs = vec_full_reg_offset(s, a->rd); 3393 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm); 3394 } 3395 return true; 3396 } 3397 3398 TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a) 3399 3400 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a) 3401 { 3402 a->imm = -a->imm; 3403 return trans_ADD_zzi(s, a); 3404 } 3405 3406 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a) 3407 { 3408 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 }; 3409 static const GVecGen2s op[4] = { 3410 { .fni8 = tcg_gen_vec_sub8_i64, 3411 .fniv = tcg_gen_sub_vec, 3412 .fno = gen_helper_sve_subri_b, 3413 .opt_opc = vecop_list, 3414 .vece = MO_8, 3415 .scalar_first = true }, 3416 { .fni8 = tcg_gen_vec_sub16_i64, 3417 .fniv = tcg_gen_sub_vec, 3418 .fno = gen_helper_sve_subri_h, 3419 .opt_opc = vecop_list, 3420 .vece = MO_16, 3421 .scalar_first = true }, 3422 { .fni4 = tcg_gen_sub_i32, 3423 .fniv = tcg_gen_sub_vec, 3424 .fno = gen_helper_sve_subri_s, 3425 .opt_opc = vecop_list, 3426 .vece = MO_32, 3427 .scalar_first = true }, 3428 { .fni8 = tcg_gen_sub_i64, 3429 .fniv = tcg_gen_sub_vec, 3430 .fno = gen_helper_sve_subri_d, 3431 .opt_opc = vecop_list, 3432 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 3433 .vece = MO_64, 3434 .scalar_first = true } 3435 }; 3436 3437 if (!dc_isar_feature(aa64_sve, s)) { 3438 return false; 3439 } 3440 if (sve_access_check(s)) { 3441 unsigned vsz = vec_full_reg_size(s); 3442 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd), 3443 vec_full_reg_offset(s, a->rn), 3444 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]); 3445 } 3446 return true; 3447 } 3448 3449 TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a) 3450 3451 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d) 3452 { 3453 if (sve_access_check(s)) { 3454 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 3455 tcg_constant_i64(a->imm), u, d); 3456 } 3457 return true; 3458 } 3459 3460 TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false) 3461 TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false) 3462 TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true) 3463 TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true) 3464 3465 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn) 3466 { 3467 if (sve_access_check(s)) { 3468 unsigned vsz = vec_full_reg_size(s); 3469 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 3470 vec_full_reg_offset(s, a->rn), 3471 tcg_constant_i64(a->imm), vsz, vsz, 0, fn); 3472 } 3473 return true; 3474 } 3475 3476 #define DO_ZZI(NAME, name) \ 3477 static gen_helper_gvec_2i * const name##i_fns[4] = { \ 3478 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \ 3479 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \ 3480 }; \ 3481 TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz]) 3482 3483 DO_ZZI(SMAX, smax) 3484 DO_ZZI(UMAX, umax) 3485 DO_ZZI(SMIN, smin) 3486 DO_ZZI(UMIN, umin) 3487 3488 #undef DO_ZZI 3489 3490 static gen_helper_gvec_4 * const dot_fns[2][2] = { 3491 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h }, 3492 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h } 3493 }; 3494 TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz, 3495 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0) 3496 3497 /* 3498 * SVE Multiply - Indexed 3499 */ 3500 3501 TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, 3502 gen_helper_gvec_sdot_idx_b, a) 3503 TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, 3504 gen_helper_gvec_sdot_idx_h, a) 3505 TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, 3506 gen_helper_gvec_udot_idx_b, a) 3507 TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, 3508 gen_helper_gvec_udot_idx_h, a) 3509 3510 TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3511 gen_helper_gvec_sudot_idx_b, a) 3512 TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3513 gen_helper_gvec_usdot_idx_b, a) 3514 3515 #define DO_SVE2_RRX(NAME, FUNC) \ 3516 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3517 a->rd, a->rn, a->rm, a->index) 3518 3519 DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h) 3520 DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s) 3521 DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d) 3522 3523 DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h) 3524 DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s) 3525 DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d) 3526 3527 DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h) 3528 DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s) 3529 DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d) 3530 3531 #undef DO_SVE2_RRX 3532 3533 #define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \ 3534 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3535 a->rd, a->rn, a->rm, (a->index << 1) | TOP) 3536 3537 DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false) 3538 DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false) 3539 DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true) 3540 DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true) 3541 3542 DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false) 3543 DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false) 3544 DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true) 3545 DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true) 3546 3547 DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false) 3548 DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false) 3549 DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true) 3550 DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true) 3551 3552 #undef DO_SVE2_RRX_TB 3553 3554 #define DO_SVE2_RRXR(NAME, FUNC) \ 3555 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a) 3556 3557 DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h) 3558 DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s) 3559 DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d) 3560 3561 DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h) 3562 DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s) 3563 DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d) 3564 3565 DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h) 3566 DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s) 3567 DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d) 3568 3569 DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h) 3570 DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s) 3571 DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d) 3572 3573 #undef DO_SVE2_RRXR 3574 3575 #define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \ 3576 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3577 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP) 3578 3579 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false) 3580 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false) 3581 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true) 3582 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true) 3583 3584 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false) 3585 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false) 3586 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true) 3587 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true) 3588 3589 DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false) 3590 DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false) 3591 DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true) 3592 DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true) 3593 3594 DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false) 3595 DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false) 3596 DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true) 3597 DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true) 3598 3599 DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false) 3600 DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false) 3601 DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true) 3602 DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true) 3603 3604 DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false) 3605 DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false) 3606 DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true) 3607 DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true) 3608 3609 #undef DO_SVE2_RRXR_TB 3610 3611 #define DO_SVE2_RRXR_ROT(NAME, FUNC) \ 3612 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3613 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot) 3614 3615 DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h) 3616 DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s) 3617 3618 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h) 3619 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s) 3620 3621 DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s) 3622 DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) 3623 3624 #undef DO_SVE2_RRXR_ROT 3625 3626 /* 3627 *** SVE Floating Point Multiply-Add Indexed Group 3628 */ 3629 3630 static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) 3631 { 3632 static gen_helper_gvec_4_ptr * const fns[4] = { 3633 NULL, 3634 gen_helper_gvec_fmla_idx_h, 3635 gen_helper_gvec_fmla_idx_s, 3636 gen_helper_gvec_fmla_idx_d, 3637 }; 3638 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, 3639 (a->index << 1) | sub, 3640 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3641 } 3642 3643 TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false) 3644 TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true) 3645 3646 /* 3647 *** SVE Floating Point Multiply Indexed Group 3648 */ 3649 3650 static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = { 3651 NULL, gen_helper_gvec_fmul_idx_h, 3652 gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d, 3653 }; 3654 TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz, 3655 fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index, 3656 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3657 3658 /* 3659 *** SVE Floating Point Fast Reduction Group 3660 */ 3661 3662 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr, 3663 TCGv_ptr, TCGv_i32); 3664 3665 static bool do_reduce(DisasContext *s, arg_rpr_esz *a, 3666 gen_helper_fp_reduce *fn) 3667 { 3668 unsigned vsz, p2vsz; 3669 TCGv_i32 t_desc; 3670 TCGv_ptr t_zn, t_pg, status; 3671 TCGv_i64 temp; 3672 3673 if (fn == NULL) { 3674 return false; 3675 } 3676 if (!sve_access_check(s)) { 3677 return true; 3678 } 3679 3680 vsz = vec_full_reg_size(s); 3681 p2vsz = pow2ceil(vsz); 3682 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz)); 3683 temp = tcg_temp_new_i64(); 3684 t_zn = tcg_temp_new_ptr(); 3685 t_pg = tcg_temp_new_ptr(); 3686 3687 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); 3688 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); 3689 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3690 3691 fn(temp, t_zn, t_pg, status, t_desc); 3692 3693 write_fp_dreg(s, a->rd, temp); 3694 return true; 3695 } 3696 3697 #define DO_VPZ(NAME, name) \ 3698 static gen_helper_fp_reduce * const name##_fns[4] = { \ 3699 NULL, gen_helper_sve_##name##_h, \ 3700 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3701 }; \ 3702 TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz]) 3703 3704 DO_VPZ(FADDV, faddv) 3705 DO_VPZ(FMINNMV, fminnmv) 3706 DO_VPZ(FMAXNMV, fmaxnmv) 3707 DO_VPZ(FMINV, fminv) 3708 DO_VPZ(FMAXV, fmaxv) 3709 3710 #undef DO_VPZ 3711 3712 /* 3713 *** SVE Floating Point Unary Operations - Unpredicated Group 3714 */ 3715 3716 static gen_helper_gvec_2_ptr * const frecpe_fns[] = { 3717 NULL, gen_helper_gvec_frecpe_h, 3718 gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, 3719 }; 3720 TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0) 3721 3722 static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { 3723 NULL, gen_helper_gvec_frsqrte_h, 3724 gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, 3725 }; 3726 TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0) 3727 3728 /* 3729 *** SVE Floating Point Compare with Zero Group 3730 */ 3731 3732 static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a, 3733 gen_helper_gvec_3_ptr *fn) 3734 { 3735 if (fn == NULL) { 3736 return false; 3737 } 3738 if (sve_access_check(s)) { 3739 unsigned vsz = vec_full_reg_size(s); 3740 TCGv_ptr status = 3741 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3742 3743 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd), 3744 vec_full_reg_offset(s, a->rn), 3745 pred_full_reg_offset(s, a->pg), 3746 status, vsz, vsz, 0, fn); 3747 } 3748 return true; 3749 } 3750 3751 #define DO_PPZ(NAME, name) \ 3752 static gen_helper_gvec_3_ptr * const name##_fns[] = { \ 3753 NULL, gen_helper_sve_##name##_h, \ 3754 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3755 }; \ 3756 TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz]) 3757 3758 DO_PPZ(FCMGE_ppz0, fcmge0) 3759 DO_PPZ(FCMGT_ppz0, fcmgt0) 3760 DO_PPZ(FCMLE_ppz0, fcmle0) 3761 DO_PPZ(FCMLT_ppz0, fcmlt0) 3762 DO_PPZ(FCMEQ_ppz0, fcmeq0) 3763 DO_PPZ(FCMNE_ppz0, fcmne0) 3764 3765 #undef DO_PPZ 3766 3767 /* 3768 *** SVE floating-point trig multiply-add coefficient 3769 */ 3770 3771 static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { 3772 NULL, gen_helper_sve_ftmad_h, 3773 gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d, 3774 }; 3775 TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, 3776 ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm, 3777 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3778 3779 /* 3780 *** SVE Floating Point Accumulating Reduction Group 3781 */ 3782 3783 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) 3784 { 3785 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr, 3786 TCGv_ptr, TCGv_ptr, TCGv_i32); 3787 static fadda_fn * const fns[3] = { 3788 gen_helper_sve_fadda_h, 3789 gen_helper_sve_fadda_s, 3790 gen_helper_sve_fadda_d, 3791 }; 3792 unsigned vsz = vec_full_reg_size(s); 3793 TCGv_ptr t_rm, t_pg, t_fpst; 3794 TCGv_i64 t_val; 3795 TCGv_i32 t_desc; 3796 3797 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3798 return false; 3799 } 3800 s->is_nonstreaming = true; 3801 if (!sve_access_check(s)) { 3802 return true; 3803 } 3804 3805 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz); 3806 t_rm = tcg_temp_new_ptr(); 3807 t_pg = tcg_temp_new_ptr(); 3808 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm)); 3809 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); 3810 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3811 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 3812 3813 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc); 3814 3815 write_fp_dreg(s, a->rd, t_val); 3816 return true; 3817 } 3818 3819 /* 3820 *** SVE Floating Point Arithmetic - Unpredicated Group 3821 */ 3822 3823 #define DO_FP3(NAME, name) \ 3824 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ 3825 NULL, gen_helper_gvec_##name##_h, \ 3826 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ 3827 }; \ 3828 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0) 3829 3830 DO_FP3(FADD_zzz, fadd) 3831 DO_FP3(FSUB_zzz, fsub) 3832 DO_FP3(FMUL_zzz, fmul) 3833 DO_FP3(FRECPS, recps) 3834 DO_FP3(FRSQRTS, rsqrts) 3835 3836 #undef DO_FP3 3837 3838 static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = { 3839 NULL, gen_helper_gvec_ftsmul_h, 3840 gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d 3841 }; 3842 TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, 3843 ftsmul_fns[a->esz], a, 0) 3844 3845 /* 3846 *** SVE Floating Point Arithmetic - Predicated Group 3847 */ 3848 3849 #define DO_ZPZZ_FP(NAME, FEAT, name) \ 3850 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ 3851 NULL, gen_helper_##name##_h, \ 3852 gen_helper_##name##_s, gen_helper_##name##_d \ 3853 }; \ 3854 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) 3855 3856 DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd) 3857 DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub) 3858 DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul) 3859 DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin) 3860 DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax) 3861 DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) 3862 DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) 3863 DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) 3864 DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn) 3865 DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv) 3866 DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx) 3867 3868 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr, 3869 TCGv_i64, TCGv_ptr, TCGv_i32); 3870 3871 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16, 3872 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn) 3873 { 3874 unsigned vsz = vec_full_reg_size(s); 3875 TCGv_ptr t_zd, t_zn, t_pg, status; 3876 TCGv_i32 desc; 3877 3878 t_zd = tcg_temp_new_ptr(); 3879 t_zn = tcg_temp_new_ptr(); 3880 t_pg = tcg_temp_new_ptr(); 3881 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd)); 3882 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn)); 3883 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 3884 3885 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 3886 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 3887 fn(t_zd, t_zn, t_pg, scalar, status, desc); 3888 } 3889 3890 static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm, 3891 gen_helper_sve_fp2scalar *fn) 3892 { 3893 if (fn == NULL) { 3894 return false; 3895 } 3896 if (sve_access_check(s)) { 3897 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, 3898 tcg_constant_i64(imm), fn); 3899 } 3900 return true; 3901 } 3902 3903 #define DO_FP_IMM(NAME, name, const0, const1) \ 3904 static gen_helper_sve_fp2scalar * const name##_fns[4] = { \ 3905 NULL, gen_helper_sve_##name##_h, \ 3906 gen_helper_sve_##name##_s, \ 3907 gen_helper_sve_##name##_d \ 3908 }; \ 3909 static uint64_t const name##_const[4][2] = { \ 3910 { -1, -1 }, \ 3911 { float16_##const0, float16_##const1 }, \ 3912 { float32_##const0, float32_##const1 }, \ 3913 { float64_##const0, float64_##const1 }, \ 3914 }; \ 3915 TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ 3916 name##_const[a->esz][a->imm], name##_fns[a->esz]) 3917 3918 DO_FP_IMM(FADD, fadds, half, one) 3919 DO_FP_IMM(FSUB, fsubs, half, one) 3920 DO_FP_IMM(FMUL, fmuls, half, two) 3921 DO_FP_IMM(FSUBR, fsubrs, half, one) 3922 DO_FP_IMM(FMAXNM, fmaxnms, zero, one) 3923 DO_FP_IMM(FMINNM, fminnms, zero, one) 3924 DO_FP_IMM(FMAX, fmaxs, zero, one) 3925 DO_FP_IMM(FMIN, fmins, zero, one) 3926 3927 #undef DO_FP_IMM 3928 3929 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a, 3930 gen_helper_gvec_4_ptr *fn) 3931 { 3932 if (fn == NULL) { 3933 return false; 3934 } 3935 if (sve_access_check(s)) { 3936 unsigned vsz = vec_full_reg_size(s); 3937 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3938 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd), 3939 vec_full_reg_offset(s, a->rn), 3940 vec_full_reg_offset(s, a->rm), 3941 pred_full_reg_offset(s, a->pg), 3942 status, vsz, vsz, 0, fn); 3943 } 3944 return true; 3945 } 3946 3947 #define DO_FPCMP(NAME, name) \ 3948 static gen_helper_gvec_4_ptr * const name##_fns[4] = { \ 3949 NULL, gen_helper_sve_##name##_h, \ 3950 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 3951 }; \ 3952 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz]) 3953 3954 DO_FPCMP(FCMGE, fcmge) 3955 DO_FPCMP(FCMGT, fcmgt) 3956 DO_FPCMP(FCMEQ, fcmeq) 3957 DO_FPCMP(FCMNE, fcmne) 3958 DO_FPCMP(FCMUO, fcmuo) 3959 DO_FPCMP(FACGE, facge) 3960 DO_FPCMP(FACGT, facgt) 3961 3962 #undef DO_FPCMP 3963 3964 static gen_helper_gvec_4_ptr * const fcadd_fns[] = { 3965 NULL, gen_helper_sve_fcadd_h, 3966 gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d, 3967 }; 3968 TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], 3969 a->rd, a->rn, a->rm, a->pg, a->rot, 3970 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3971 3972 #define DO_FMLA(NAME, name) \ 3973 static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ 3974 NULL, gen_helper_sve_##name##_h, \ 3975 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 3976 }; \ 3977 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \ 3978 a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ 3979 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3980 3981 DO_FMLA(FMLA_zpzzz, fmla_zpzzz) 3982 DO_FMLA(FMLS_zpzzz, fmls_zpzzz) 3983 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz) 3984 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz) 3985 3986 #undef DO_FMLA 3987 3988 static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { 3989 NULL, gen_helper_sve_fcmla_zpzzz_h, 3990 gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, 3991 }; 3992 TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], 3993 a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, 3994 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3995 3996 static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { 3997 NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL 3998 }; 3999 TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz], 4000 a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot, 4001 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4002 4003 /* 4004 *** SVE Floating Point Unary Operations Predicated Group 4005 */ 4006 4007 TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4008 gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR) 4009 TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4010 gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR) 4011 4012 TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 4013 gen_helper_sve_bfcvt, a, 0, FPST_FPCR) 4014 4015 TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4016 gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR) 4017 TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4018 gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR) 4019 TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4020 gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR) 4021 TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4022 gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR) 4023 4024 TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4025 gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16) 4026 TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4027 gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16) 4028 TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4029 gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16) 4030 TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4031 gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16) 4032 TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4033 gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16) 4034 TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4035 gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16) 4036 4037 TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4038 gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR) 4039 TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4040 gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR) 4041 TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4042 gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR) 4043 TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4044 gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR) 4045 TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4046 gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR) 4047 TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4048 gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR) 4049 4050 TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4051 gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR) 4052 TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4053 gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR) 4054 4055 static gen_helper_gvec_3_ptr * const frint_fns[] = { 4056 NULL, 4057 gen_helper_sve_frint_h, 4058 gen_helper_sve_frint_s, 4059 gen_helper_sve_frint_d 4060 }; 4061 TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz], 4062 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4063 4064 static gen_helper_gvec_3_ptr * const frintx_fns[] = { 4065 NULL, 4066 gen_helper_sve_frintx_h, 4067 gen_helper_sve_frintx_s, 4068 gen_helper_sve_frintx_d 4069 }; 4070 TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz], 4071 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4072 4073 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, 4074 ARMFPRounding mode, gen_helper_gvec_3_ptr *fn) 4075 { 4076 unsigned vsz; 4077 TCGv_i32 tmode; 4078 TCGv_ptr status; 4079 4080 if (fn == NULL) { 4081 return false; 4082 } 4083 if (!sve_access_check(s)) { 4084 return true; 4085 } 4086 4087 vsz = vec_full_reg_size(s); 4088 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4089 tmode = gen_set_rmode(mode, status); 4090 4091 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 4092 vec_full_reg_offset(s, a->rn), 4093 pred_full_reg_offset(s, a->pg), 4094 status, vsz, vsz, 0, fn); 4095 4096 gen_restore_rmode(tmode, status); 4097 return true; 4098 } 4099 4100 TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a, 4101 FPROUNDING_TIEEVEN, frint_fns[a->esz]) 4102 TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a, 4103 FPROUNDING_POSINF, frint_fns[a->esz]) 4104 TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a, 4105 FPROUNDING_NEGINF, frint_fns[a->esz]) 4106 TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a, 4107 FPROUNDING_ZERO, frint_fns[a->esz]) 4108 TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a, 4109 FPROUNDING_TIEAWAY, frint_fns[a->esz]) 4110 4111 static gen_helper_gvec_3_ptr * const frecpx_fns[] = { 4112 NULL, gen_helper_sve_frecpx_h, 4113 gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d, 4114 }; 4115 TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz], 4116 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4117 4118 static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { 4119 NULL, gen_helper_sve_fsqrt_h, 4120 gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d, 4121 }; 4122 TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz], 4123 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4124 4125 TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4126 gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16) 4127 TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4128 gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16) 4129 TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4130 gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16) 4131 4132 TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4133 gen_helper_sve_scvt_ss, a, 0, FPST_FPCR) 4134 TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4135 gen_helper_sve_scvt_ds, a, 0, FPST_FPCR) 4136 4137 TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4138 gen_helper_sve_scvt_sd, a, 0, FPST_FPCR) 4139 TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4140 gen_helper_sve_scvt_dd, a, 0, FPST_FPCR) 4141 4142 TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4143 gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16) 4144 TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4145 gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16) 4146 TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4147 gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16) 4148 4149 TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4150 gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR) 4151 TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4152 gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR) 4153 TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4154 gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR) 4155 4156 TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4157 gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR) 4158 4159 /* 4160 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group 4161 */ 4162 4163 /* Subroutine loading a vector register at VOFS of LEN bytes. 4164 * The load should begin at the address Rn + IMM. 4165 */ 4166 4167 void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, 4168 int len, int rn, int imm) 4169 { 4170 int len_align = QEMU_ALIGN_DOWN(len, 16); 4171 int len_remain = len % 16; 4172 int nparts = len / 16 + ctpop8(len_remain); 4173 int midx = get_mem_index(s); 4174 TCGv_i64 dirty_addr, clean_addr, t0, t1; 4175 TCGv_i128 t16; 4176 4177 dirty_addr = tcg_temp_new_i64(); 4178 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4179 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); 4180 4181 /* 4182 * Note that unpredicated load/store of vector/predicate registers 4183 * are defined as a stream of bytes, which equates to little-endian 4184 * operations on larger quantities. 4185 * Attempt to keep code expansion to a minimum by limiting the 4186 * amount of unrolling done. 4187 */ 4188 if (nparts <= 4) { 4189 int i; 4190 4191 t0 = tcg_temp_new_i64(); 4192 t1 = tcg_temp_new_i64(); 4193 t16 = tcg_temp_new_i128(); 4194 4195 for (i = 0; i < len_align; i += 16) { 4196 tcg_gen_qemu_ld_i128(t16, clean_addr, midx, 4197 MO_LE | MO_128 | MO_ATOM_NONE); 4198 tcg_gen_extr_i128_i64(t0, t1, t16); 4199 tcg_gen_st_i64(t0, base, vofs + i); 4200 tcg_gen_st_i64(t1, base, vofs + i + 8); 4201 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4202 } 4203 } else { 4204 TCGLabel *loop = gen_new_label(); 4205 TCGv_ptr tp, i = tcg_temp_new_ptr(); 4206 4207 tcg_gen_movi_ptr(i, 0); 4208 gen_set_label(loop); 4209 4210 t16 = tcg_temp_new_i128(); 4211 tcg_gen_qemu_ld_i128(t16, clean_addr, midx, 4212 MO_LE | MO_128 | MO_ATOM_NONE); 4213 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4214 4215 tp = tcg_temp_new_ptr(); 4216 tcg_gen_add_ptr(tp, base, i); 4217 tcg_gen_addi_ptr(i, i, 16); 4218 4219 t0 = tcg_temp_new_i64(); 4220 t1 = tcg_temp_new_i64(); 4221 tcg_gen_extr_i128_i64(t0, t1, t16); 4222 4223 tcg_gen_st_i64(t0, tp, vofs); 4224 tcg_gen_st_i64(t1, tp, vofs + 8); 4225 4226 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4227 } 4228 4229 /* 4230 * Predicate register loads can be any multiple of 2. 4231 * Note that we still store the entire 64-bit unit into cpu_env. 4232 */ 4233 if (len_remain >= 8) { 4234 t0 = tcg_temp_new_i64(); 4235 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE); 4236 tcg_gen_st_i64(t0, base, vofs + len_align); 4237 len_remain -= 8; 4238 len_align += 8; 4239 if (len_remain) { 4240 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4241 } 4242 } 4243 if (len_remain) { 4244 t0 = tcg_temp_new_i64(); 4245 switch (len_remain) { 4246 case 2: 4247 case 4: 4248 case 8: 4249 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, 4250 MO_LE | ctz32(len_remain) | MO_ATOM_NONE); 4251 break; 4252 4253 case 6: 4254 t1 = tcg_temp_new_i64(); 4255 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE); 4256 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4257 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); 4258 tcg_gen_deposit_i64(t0, t0, t1, 32, 32); 4259 break; 4260 4261 default: 4262 g_assert_not_reached(); 4263 } 4264 tcg_gen_st_i64(t0, base, vofs + len_align); 4265 } 4266 } 4267 4268 /* Similarly for stores. */ 4269 void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, 4270 int len, int rn, int imm) 4271 { 4272 int len_align = QEMU_ALIGN_DOWN(len, 16); 4273 int len_remain = len % 16; 4274 int nparts = len / 16 + ctpop8(len_remain); 4275 int midx = get_mem_index(s); 4276 TCGv_i64 dirty_addr, clean_addr, t0, t1; 4277 TCGv_i128 t16; 4278 4279 dirty_addr = tcg_temp_new_i64(); 4280 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4281 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); 4282 4283 /* Note that unpredicated load/store of vector/predicate registers 4284 * are defined as a stream of bytes, which equates to little-endian 4285 * operations on larger quantities. There is no nice way to force 4286 * a little-endian store for aarch64_be-linux-user out of line. 4287 * 4288 * Attempt to keep code expansion to a minimum by limiting the 4289 * amount of unrolling done. 4290 */ 4291 if (nparts <= 4) { 4292 int i; 4293 4294 t0 = tcg_temp_new_i64(); 4295 t1 = tcg_temp_new_i64(); 4296 t16 = tcg_temp_new_i128(); 4297 for (i = 0; i < len_align; i += 8) { 4298 tcg_gen_ld_i64(t0, base, vofs + i); 4299 tcg_gen_ld_i64(t1, base, vofs + i + 8); 4300 tcg_gen_concat_i64_i128(t16, t0, t1); 4301 tcg_gen_qemu_st_i128(t16, clean_addr, midx, 4302 MO_LE | MO_128 | MO_ATOM_NONE); 4303 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4304 } 4305 } else { 4306 TCGLabel *loop = gen_new_label(); 4307 TCGv_ptr tp, i = tcg_temp_new_ptr(); 4308 4309 tcg_gen_movi_ptr(i, 0); 4310 gen_set_label(loop); 4311 4312 t0 = tcg_temp_new_i64(); 4313 t1 = tcg_temp_new_i64(); 4314 tp = tcg_temp_new_ptr(); 4315 tcg_gen_add_ptr(tp, base, i); 4316 tcg_gen_ld_i64(t0, tp, vofs); 4317 tcg_gen_ld_i64(t1, tp, vofs + 8); 4318 tcg_gen_addi_ptr(i, i, 16); 4319 4320 t16 = tcg_temp_new_i128(); 4321 tcg_gen_concat_i64_i128(t16, t0, t1); 4322 4323 tcg_gen_qemu_st_i128(t16, clean_addr, midx, MO_LEUQ); 4324 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4325 4326 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4327 } 4328 4329 /* Predicate register stores can be any multiple of 2. */ 4330 if (len_remain >= 8) { 4331 t0 = tcg_temp_new_i64(); 4332 tcg_gen_st_i64(t0, base, vofs + len_align); 4333 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE); 4334 len_remain -= 8; 4335 len_align += 8; 4336 if (len_remain) { 4337 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4338 } 4339 } 4340 if (len_remain) { 4341 t0 = tcg_temp_new_i64(); 4342 tcg_gen_ld_i64(t0, base, vofs + len_align); 4343 4344 switch (len_remain) { 4345 case 2: 4346 case 4: 4347 case 8: 4348 tcg_gen_qemu_st_i64(t0, clean_addr, midx, 4349 MO_LE | ctz32(len_remain) | MO_ATOM_NONE); 4350 break; 4351 4352 case 6: 4353 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE); 4354 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4355 tcg_gen_shri_i64(t0, t0, 32); 4356 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); 4357 break; 4358 4359 default: 4360 g_assert_not_reached(); 4361 } 4362 } 4363 } 4364 4365 static bool trans_LDR_zri(DisasContext *s, arg_rri *a) 4366 { 4367 if (!dc_isar_feature(aa64_sve, s)) { 4368 return false; 4369 } 4370 if (sve_access_check(s)) { 4371 int size = vec_full_reg_size(s); 4372 int off = vec_full_reg_offset(s, a->rd); 4373 gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size); 4374 } 4375 return true; 4376 } 4377 4378 static bool trans_LDR_pri(DisasContext *s, arg_rri *a) 4379 { 4380 if (!dc_isar_feature(aa64_sve, s)) { 4381 return false; 4382 } 4383 if (sve_access_check(s)) { 4384 int size = pred_full_reg_size(s); 4385 int off = pred_full_reg_offset(s, a->rd); 4386 gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size); 4387 } 4388 return true; 4389 } 4390 4391 static bool trans_STR_zri(DisasContext *s, arg_rri *a) 4392 { 4393 if (!dc_isar_feature(aa64_sve, s)) { 4394 return false; 4395 } 4396 if (sve_access_check(s)) { 4397 int size = vec_full_reg_size(s); 4398 int off = vec_full_reg_offset(s, a->rd); 4399 gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size); 4400 } 4401 return true; 4402 } 4403 4404 static bool trans_STR_pri(DisasContext *s, arg_rri *a) 4405 { 4406 if (!dc_isar_feature(aa64_sve, s)) { 4407 return false; 4408 } 4409 if (sve_access_check(s)) { 4410 int size = pred_full_reg_size(s); 4411 int off = pred_full_reg_offset(s, a->rd); 4412 gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size); 4413 } 4414 return true; 4415 } 4416 4417 /* 4418 *** SVE Memory - Contiguous Load Group 4419 */ 4420 4421 /* The memory mode of the dtype. */ 4422 static const MemOp dtype_mop[16] = { 4423 MO_UB, MO_UB, MO_UB, MO_UB, 4424 MO_SL, MO_UW, MO_UW, MO_UW, 4425 MO_SW, MO_SW, MO_UL, MO_UL, 4426 MO_SB, MO_SB, MO_SB, MO_UQ 4427 }; 4428 4429 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE) 4430 4431 /* The vector element size of dtype. */ 4432 static const uint8_t dtype_esz[16] = { 4433 0, 1, 2, 3, 4434 3, 1, 2, 3, 4435 3, 2, 2, 3, 4436 3, 2, 1, 3 4437 }; 4438 4439 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 4440 int dtype, uint32_t mte_n, bool is_write, 4441 gen_helper_gvec_mem *fn) 4442 { 4443 unsigned vsz = vec_full_reg_size(s); 4444 TCGv_ptr t_pg; 4445 int desc = 0; 4446 4447 /* 4448 * For e.g. LD4, there are not enough arguments to pass all 4 4449 * registers as pointers, so encode the regno into the data field. 4450 * For consistency, do this even for LD1. 4451 */ 4452 if (s->mte_active[0]) { 4453 int msz = dtype_msz(dtype); 4454 4455 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 4456 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4457 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4458 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 4459 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1); 4460 desc <<= SVE_MTEDESC_SHIFT; 4461 } else { 4462 addr = clean_data_tbi(s, addr); 4463 } 4464 4465 desc = simd_desc(vsz, vsz, zt | desc); 4466 t_pg = tcg_temp_new_ptr(); 4467 4468 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 4469 fn(cpu_env, t_pg, addr, tcg_constant_i32(desc)); 4470 } 4471 4472 /* Indexed by [mte][be][dtype][nreg] */ 4473 static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = { 4474 { /* mte inactive, little-endian */ 4475 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4476 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4477 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4478 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4479 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4480 4481 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL }, 4482 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r, 4483 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r }, 4484 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL }, 4485 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL }, 4486 4487 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL }, 4488 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL }, 4489 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r, 4490 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r }, 4491 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL }, 4492 4493 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4494 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4495 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4496 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, 4497 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, 4498 4499 /* mte inactive, big-endian */ 4500 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4501 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4502 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4503 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4504 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4505 4506 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL }, 4507 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r, 4508 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r }, 4509 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL }, 4510 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL }, 4511 4512 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL }, 4513 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL }, 4514 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r, 4515 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r }, 4516 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL }, 4517 4518 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4519 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4520 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4521 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, 4522 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } }, 4523 4524 { /* mte active, little-endian */ 4525 { { gen_helper_sve_ld1bb_r_mte, 4526 gen_helper_sve_ld2bb_r_mte, 4527 gen_helper_sve_ld3bb_r_mte, 4528 gen_helper_sve_ld4bb_r_mte }, 4529 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 4530 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 4531 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 4532 4533 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL }, 4534 { gen_helper_sve_ld1hh_le_r_mte, 4535 gen_helper_sve_ld2hh_le_r_mte, 4536 gen_helper_sve_ld3hh_le_r_mte, 4537 gen_helper_sve_ld4hh_le_r_mte }, 4538 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL }, 4539 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL }, 4540 4541 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL }, 4542 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL }, 4543 { gen_helper_sve_ld1ss_le_r_mte, 4544 gen_helper_sve_ld2ss_le_r_mte, 4545 gen_helper_sve_ld3ss_le_r_mte, 4546 gen_helper_sve_ld4ss_le_r_mte }, 4547 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL }, 4548 4549 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 4550 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 4551 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 4552 { gen_helper_sve_ld1dd_le_r_mte, 4553 gen_helper_sve_ld2dd_le_r_mte, 4554 gen_helper_sve_ld3dd_le_r_mte, 4555 gen_helper_sve_ld4dd_le_r_mte } }, 4556 4557 /* mte active, big-endian */ 4558 { { gen_helper_sve_ld1bb_r_mte, 4559 gen_helper_sve_ld2bb_r_mte, 4560 gen_helper_sve_ld3bb_r_mte, 4561 gen_helper_sve_ld4bb_r_mte }, 4562 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 4563 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 4564 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 4565 4566 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL }, 4567 { gen_helper_sve_ld1hh_be_r_mte, 4568 gen_helper_sve_ld2hh_be_r_mte, 4569 gen_helper_sve_ld3hh_be_r_mte, 4570 gen_helper_sve_ld4hh_be_r_mte }, 4571 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL }, 4572 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL }, 4573 4574 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL }, 4575 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL }, 4576 { gen_helper_sve_ld1ss_be_r_mte, 4577 gen_helper_sve_ld2ss_be_r_mte, 4578 gen_helper_sve_ld3ss_be_r_mte, 4579 gen_helper_sve_ld4ss_be_r_mte }, 4580 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL }, 4581 4582 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 4583 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 4584 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 4585 { gen_helper_sve_ld1dd_be_r_mte, 4586 gen_helper_sve_ld2dd_be_r_mte, 4587 gen_helper_sve_ld3dd_be_r_mte, 4588 gen_helper_sve_ld4dd_be_r_mte } } }, 4589 }; 4590 4591 static void do_ld_zpa(DisasContext *s, int zt, int pg, 4592 TCGv_i64 addr, int dtype, int nreg) 4593 { 4594 gen_helper_gvec_mem *fn 4595 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg]; 4596 4597 /* 4598 * While there are holes in the table, they are not 4599 * accessible via the instruction encoding. 4600 */ 4601 assert(fn != NULL); 4602 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn); 4603 } 4604 4605 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) 4606 { 4607 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 4608 return false; 4609 } 4610 if (sve_access_check(s)) { 4611 TCGv_i64 addr = tcg_temp_new_i64(); 4612 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4613 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4614 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 4615 } 4616 return true; 4617 } 4618 4619 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a) 4620 { 4621 if (!dc_isar_feature(aa64_sve, s)) { 4622 return false; 4623 } 4624 if (sve_access_check(s)) { 4625 int vsz = vec_full_reg_size(s); 4626 int elements = vsz >> dtype_esz[a->dtype]; 4627 TCGv_i64 addr = tcg_temp_new_i64(); 4628 4629 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 4630 (a->imm * elements * (a->nreg + 1)) 4631 << dtype_msz(a->dtype)); 4632 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 4633 } 4634 return true; 4635 } 4636 4637 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a) 4638 { 4639 static gen_helper_gvec_mem * const fns[2][2][16] = { 4640 { /* mte inactive, little-endian */ 4641 { gen_helper_sve_ldff1bb_r, 4642 gen_helper_sve_ldff1bhu_r, 4643 gen_helper_sve_ldff1bsu_r, 4644 gen_helper_sve_ldff1bdu_r, 4645 4646 gen_helper_sve_ldff1sds_le_r, 4647 gen_helper_sve_ldff1hh_le_r, 4648 gen_helper_sve_ldff1hsu_le_r, 4649 gen_helper_sve_ldff1hdu_le_r, 4650 4651 gen_helper_sve_ldff1hds_le_r, 4652 gen_helper_sve_ldff1hss_le_r, 4653 gen_helper_sve_ldff1ss_le_r, 4654 gen_helper_sve_ldff1sdu_le_r, 4655 4656 gen_helper_sve_ldff1bds_r, 4657 gen_helper_sve_ldff1bss_r, 4658 gen_helper_sve_ldff1bhs_r, 4659 gen_helper_sve_ldff1dd_le_r }, 4660 4661 /* mte inactive, big-endian */ 4662 { gen_helper_sve_ldff1bb_r, 4663 gen_helper_sve_ldff1bhu_r, 4664 gen_helper_sve_ldff1bsu_r, 4665 gen_helper_sve_ldff1bdu_r, 4666 4667 gen_helper_sve_ldff1sds_be_r, 4668 gen_helper_sve_ldff1hh_be_r, 4669 gen_helper_sve_ldff1hsu_be_r, 4670 gen_helper_sve_ldff1hdu_be_r, 4671 4672 gen_helper_sve_ldff1hds_be_r, 4673 gen_helper_sve_ldff1hss_be_r, 4674 gen_helper_sve_ldff1ss_be_r, 4675 gen_helper_sve_ldff1sdu_be_r, 4676 4677 gen_helper_sve_ldff1bds_r, 4678 gen_helper_sve_ldff1bss_r, 4679 gen_helper_sve_ldff1bhs_r, 4680 gen_helper_sve_ldff1dd_be_r } }, 4681 4682 { /* mte active, little-endian */ 4683 { gen_helper_sve_ldff1bb_r_mte, 4684 gen_helper_sve_ldff1bhu_r_mte, 4685 gen_helper_sve_ldff1bsu_r_mte, 4686 gen_helper_sve_ldff1bdu_r_mte, 4687 4688 gen_helper_sve_ldff1sds_le_r_mte, 4689 gen_helper_sve_ldff1hh_le_r_mte, 4690 gen_helper_sve_ldff1hsu_le_r_mte, 4691 gen_helper_sve_ldff1hdu_le_r_mte, 4692 4693 gen_helper_sve_ldff1hds_le_r_mte, 4694 gen_helper_sve_ldff1hss_le_r_mte, 4695 gen_helper_sve_ldff1ss_le_r_mte, 4696 gen_helper_sve_ldff1sdu_le_r_mte, 4697 4698 gen_helper_sve_ldff1bds_r_mte, 4699 gen_helper_sve_ldff1bss_r_mte, 4700 gen_helper_sve_ldff1bhs_r_mte, 4701 gen_helper_sve_ldff1dd_le_r_mte }, 4702 4703 /* mte active, big-endian */ 4704 { gen_helper_sve_ldff1bb_r_mte, 4705 gen_helper_sve_ldff1bhu_r_mte, 4706 gen_helper_sve_ldff1bsu_r_mte, 4707 gen_helper_sve_ldff1bdu_r_mte, 4708 4709 gen_helper_sve_ldff1sds_be_r_mte, 4710 gen_helper_sve_ldff1hh_be_r_mte, 4711 gen_helper_sve_ldff1hsu_be_r_mte, 4712 gen_helper_sve_ldff1hdu_be_r_mte, 4713 4714 gen_helper_sve_ldff1hds_be_r_mte, 4715 gen_helper_sve_ldff1hss_be_r_mte, 4716 gen_helper_sve_ldff1ss_be_r_mte, 4717 gen_helper_sve_ldff1sdu_be_r_mte, 4718 4719 gen_helper_sve_ldff1bds_r_mte, 4720 gen_helper_sve_ldff1bss_r_mte, 4721 gen_helper_sve_ldff1bhs_r_mte, 4722 gen_helper_sve_ldff1dd_be_r_mte } }, 4723 }; 4724 4725 if (!dc_isar_feature(aa64_sve, s)) { 4726 return false; 4727 } 4728 s->is_nonstreaming = true; 4729 if (sve_access_check(s)) { 4730 TCGv_i64 addr = tcg_temp_new_i64(); 4731 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4732 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4733 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 4734 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 4735 } 4736 return true; 4737 } 4738 4739 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) 4740 { 4741 static gen_helper_gvec_mem * const fns[2][2][16] = { 4742 { /* mte inactive, little-endian */ 4743 { gen_helper_sve_ldnf1bb_r, 4744 gen_helper_sve_ldnf1bhu_r, 4745 gen_helper_sve_ldnf1bsu_r, 4746 gen_helper_sve_ldnf1bdu_r, 4747 4748 gen_helper_sve_ldnf1sds_le_r, 4749 gen_helper_sve_ldnf1hh_le_r, 4750 gen_helper_sve_ldnf1hsu_le_r, 4751 gen_helper_sve_ldnf1hdu_le_r, 4752 4753 gen_helper_sve_ldnf1hds_le_r, 4754 gen_helper_sve_ldnf1hss_le_r, 4755 gen_helper_sve_ldnf1ss_le_r, 4756 gen_helper_sve_ldnf1sdu_le_r, 4757 4758 gen_helper_sve_ldnf1bds_r, 4759 gen_helper_sve_ldnf1bss_r, 4760 gen_helper_sve_ldnf1bhs_r, 4761 gen_helper_sve_ldnf1dd_le_r }, 4762 4763 /* mte inactive, big-endian */ 4764 { gen_helper_sve_ldnf1bb_r, 4765 gen_helper_sve_ldnf1bhu_r, 4766 gen_helper_sve_ldnf1bsu_r, 4767 gen_helper_sve_ldnf1bdu_r, 4768 4769 gen_helper_sve_ldnf1sds_be_r, 4770 gen_helper_sve_ldnf1hh_be_r, 4771 gen_helper_sve_ldnf1hsu_be_r, 4772 gen_helper_sve_ldnf1hdu_be_r, 4773 4774 gen_helper_sve_ldnf1hds_be_r, 4775 gen_helper_sve_ldnf1hss_be_r, 4776 gen_helper_sve_ldnf1ss_be_r, 4777 gen_helper_sve_ldnf1sdu_be_r, 4778 4779 gen_helper_sve_ldnf1bds_r, 4780 gen_helper_sve_ldnf1bss_r, 4781 gen_helper_sve_ldnf1bhs_r, 4782 gen_helper_sve_ldnf1dd_be_r } }, 4783 4784 { /* mte inactive, little-endian */ 4785 { gen_helper_sve_ldnf1bb_r_mte, 4786 gen_helper_sve_ldnf1bhu_r_mte, 4787 gen_helper_sve_ldnf1bsu_r_mte, 4788 gen_helper_sve_ldnf1bdu_r_mte, 4789 4790 gen_helper_sve_ldnf1sds_le_r_mte, 4791 gen_helper_sve_ldnf1hh_le_r_mte, 4792 gen_helper_sve_ldnf1hsu_le_r_mte, 4793 gen_helper_sve_ldnf1hdu_le_r_mte, 4794 4795 gen_helper_sve_ldnf1hds_le_r_mte, 4796 gen_helper_sve_ldnf1hss_le_r_mte, 4797 gen_helper_sve_ldnf1ss_le_r_mte, 4798 gen_helper_sve_ldnf1sdu_le_r_mte, 4799 4800 gen_helper_sve_ldnf1bds_r_mte, 4801 gen_helper_sve_ldnf1bss_r_mte, 4802 gen_helper_sve_ldnf1bhs_r_mte, 4803 gen_helper_sve_ldnf1dd_le_r_mte }, 4804 4805 /* mte inactive, big-endian */ 4806 { gen_helper_sve_ldnf1bb_r_mte, 4807 gen_helper_sve_ldnf1bhu_r_mte, 4808 gen_helper_sve_ldnf1bsu_r_mte, 4809 gen_helper_sve_ldnf1bdu_r_mte, 4810 4811 gen_helper_sve_ldnf1sds_be_r_mte, 4812 gen_helper_sve_ldnf1hh_be_r_mte, 4813 gen_helper_sve_ldnf1hsu_be_r_mte, 4814 gen_helper_sve_ldnf1hdu_be_r_mte, 4815 4816 gen_helper_sve_ldnf1hds_be_r_mte, 4817 gen_helper_sve_ldnf1hss_be_r_mte, 4818 gen_helper_sve_ldnf1ss_be_r_mte, 4819 gen_helper_sve_ldnf1sdu_be_r_mte, 4820 4821 gen_helper_sve_ldnf1bds_r_mte, 4822 gen_helper_sve_ldnf1bss_r_mte, 4823 gen_helper_sve_ldnf1bhs_r_mte, 4824 gen_helper_sve_ldnf1dd_be_r_mte } }, 4825 }; 4826 4827 if (!dc_isar_feature(aa64_sve, s)) { 4828 return false; 4829 } 4830 s->is_nonstreaming = true; 4831 if (sve_access_check(s)) { 4832 int vsz = vec_full_reg_size(s); 4833 int elements = vsz >> dtype_esz[a->dtype]; 4834 int off = (a->imm * elements) << dtype_msz(a->dtype); 4835 TCGv_i64 addr = tcg_temp_new_i64(); 4836 4837 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off); 4838 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 4839 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 4840 } 4841 return true; 4842 } 4843 4844 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 4845 { 4846 unsigned vsz = vec_full_reg_size(s); 4847 TCGv_ptr t_pg; 4848 int poff; 4849 4850 /* Load the first quadword using the normal predicated load helpers. */ 4851 poff = pred_full_reg_offset(s, pg); 4852 if (vsz > 16) { 4853 /* 4854 * Zero-extend the first 16 bits of the predicate into a temporary. 4855 * This avoids triggering an assert making sure we don't have bits 4856 * set within a predicate beyond VQ, but we have lowered VQ to 1 4857 * for this load operation. 4858 */ 4859 TCGv_i64 tmp = tcg_temp_new_i64(); 4860 #if HOST_BIG_ENDIAN 4861 poff += 6; 4862 #endif 4863 tcg_gen_ld16u_i64(tmp, cpu_env, poff); 4864 4865 poff = offsetof(CPUARMState, vfp.preg_tmp); 4866 tcg_gen_st_i64(tmp, cpu_env, poff); 4867 } 4868 4869 t_pg = tcg_temp_new_ptr(); 4870 tcg_gen_addi_ptr(t_pg, cpu_env, poff); 4871 4872 gen_helper_gvec_mem *fn 4873 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 4874 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt))); 4875 4876 /* Replicate that first quadword. */ 4877 if (vsz > 16) { 4878 int doff = vec_full_reg_offset(s, zt); 4879 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16); 4880 } 4881 } 4882 4883 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a) 4884 { 4885 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 4886 return false; 4887 } 4888 if (sve_access_check(s)) { 4889 int msz = dtype_msz(a->dtype); 4890 TCGv_i64 addr = tcg_temp_new_i64(); 4891 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz); 4892 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4893 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 4894 } 4895 return true; 4896 } 4897 4898 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a) 4899 { 4900 if (!dc_isar_feature(aa64_sve, s)) { 4901 return false; 4902 } 4903 if (sve_access_check(s)) { 4904 TCGv_i64 addr = tcg_temp_new_i64(); 4905 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16); 4906 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 4907 } 4908 return true; 4909 } 4910 4911 static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 4912 { 4913 unsigned vsz = vec_full_reg_size(s); 4914 unsigned vsz_r32; 4915 TCGv_ptr t_pg; 4916 int poff, doff; 4917 4918 if (vsz < 32) { 4919 /* 4920 * Note that this UNDEFINED check comes after CheckSVEEnabled() 4921 * in the ARM pseudocode, which is the sve_access_check() done 4922 * in our caller. We should not now return false from the caller. 4923 */ 4924 unallocated_encoding(s); 4925 return; 4926 } 4927 4928 /* Load the first octaword using the normal predicated load helpers. */ 4929 4930 poff = pred_full_reg_offset(s, pg); 4931 if (vsz > 32) { 4932 /* 4933 * Zero-extend the first 32 bits of the predicate into a temporary. 4934 * This avoids triggering an assert making sure we don't have bits 4935 * set within a predicate beyond VQ, but we have lowered VQ to 2 4936 * for this load operation. 4937 */ 4938 TCGv_i64 tmp = tcg_temp_new_i64(); 4939 #if HOST_BIG_ENDIAN 4940 poff += 4; 4941 #endif 4942 tcg_gen_ld32u_i64(tmp, cpu_env, poff); 4943 4944 poff = offsetof(CPUARMState, vfp.preg_tmp); 4945 tcg_gen_st_i64(tmp, cpu_env, poff); 4946 } 4947 4948 t_pg = tcg_temp_new_ptr(); 4949 tcg_gen_addi_ptr(t_pg, cpu_env, poff); 4950 4951 gen_helper_gvec_mem *fn 4952 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 4953 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt))); 4954 4955 /* 4956 * Replicate that first octaword. 4957 * The replication happens in units of 32; if the full vector size 4958 * is not a multiple of 32, the final bits are zeroed. 4959 */ 4960 doff = vec_full_reg_offset(s, zt); 4961 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32); 4962 if (vsz >= 64) { 4963 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32); 4964 } 4965 vsz -= vsz_r32; 4966 if (vsz) { 4967 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0); 4968 } 4969 } 4970 4971 static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a) 4972 { 4973 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 4974 return false; 4975 } 4976 if (a->rm == 31) { 4977 return false; 4978 } 4979 s->is_nonstreaming = true; 4980 if (sve_access_check(s)) { 4981 TCGv_i64 addr = tcg_temp_new_i64(); 4982 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4983 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4984 do_ldro(s, a->rd, a->pg, addr, a->dtype); 4985 } 4986 return true; 4987 } 4988 4989 static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a) 4990 { 4991 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 4992 return false; 4993 } 4994 s->is_nonstreaming = true; 4995 if (sve_access_check(s)) { 4996 TCGv_i64 addr = tcg_temp_new_i64(); 4997 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32); 4998 do_ldro(s, a->rd, a->pg, addr, a->dtype); 4999 } 5000 return true; 5001 } 5002 5003 /* Load and broadcast element. */ 5004 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) 5005 { 5006 unsigned vsz = vec_full_reg_size(s); 5007 unsigned psz = pred_full_reg_size(s); 5008 unsigned esz = dtype_esz[a->dtype]; 5009 unsigned msz = dtype_msz(a->dtype); 5010 TCGLabel *over; 5011 TCGv_i64 temp, clean_addr; 5012 MemOp memop; 5013 5014 if (!dc_isar_feature(aa64_sve, s)) { 5015 return false; 5016 } 5017 if (!sve_access_check(s)) { 5018 return true; 5019 } 5020 5021 over = gen_new_label(); 5022 5023 /* If the guarding predicate has no bits set, no load occurs. */ 5024 if (psz <= 8) { 5025 /* Reduce the pred_esz_masks value simply to reduce the 5026 * size of the code generated here. 5027 */ 5028 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8); 5029 temp = tcg_temp_new_i64(); 5030 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg)); 5031 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask); 5032 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over); 5033 } else { 5034 TCGv_i32 t32 = tcg_temp_new_i32(); 5035 find_last_active(s, t32, esz, a->pg); 5036 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over); 5037 } 5038 5039 /* Load the data. */ 5040 temp = tcg_temp_new_i64(); 5041 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz); 5042 5043 memop = finalize_memop(s, dtype_mop[a->dtype]); 5044 clean_addr = gen_mte_check1(s, temp, false, true, memop); 5045 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), memop); 5046 5047 /* Broadcast to *all* elements. */ 5048 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 5049 vsz, vsz, temp); 5050 5051 /* Zero the inactive elements. */ 5052 gen_set_label(over); 5053 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false); 5054 } 5055 5056 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 5057 int msz, int esz, int nreg) 5058 { 5059 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = { 5060 { { { gen_helper_sve_st1bb_r, 5061 gen_helper_sve_st1bh_r, 5062 gen_helper_sve_st1bs_r, 5063 gen_helper_sve_st1bd_r }, 5064 { NULL, 5065 gen_helper_sve_st1hh_le_r, 5066 gen_helper_sve_st1hs_le_r, 5067 gen_helper_sve_st1hd_le_r }, 5068 { NULL, NULL, 5069 gen_helper_sve_st1ss_le_r, 5070 gen_helper_sve_st1sd_le_r }, 5071 { NULL, NULL, NULL, 5072 gen_helper_sve_st1dd_le_r } }, 5073 { { gen_helper_sve_st1bb_r, 5074 gen_helper_sve_st1bh_r, 5075 gen_helper_sve_st1bs_r, 5076 gen_helper_sve_st1bd_r }, 5077 { NULL, 5078 gen_helper_sve_st1hh_be_r, 5079 gen_helper_sve_st1hs_be_r, 5080 gen_helper_sve_st1hd_be_r }, 5081 { NULL, NULL, 5082 gen_helper_sve_st1ss_be_r, 5083 gen_helper_sve_st1sd_be_r }, 5084 { NULL, NULL, NULL, 5085 gen_helper_sve_st1dd_be_r } } }, 5086 5087 { { { gen_helper_sve_st1bb_r_mte, 5088 gen_helper_sve_st1bh_r_mte, 5089 gen_helper_sve_st1bs_r_mte, 5090 gen_helper_sve_st1bd_r_mte }, 5091 { NULL, 5092 gen_helper_sve_st1hh_le_r_mte, 5093 gen_helper_sve_st1hs_le_r_mte, 5094 gen_helper_sve_st1hd_le_r_mte }, 5095 { NULL, NULL, 5096 gen_helper_sve_st1ss_le_r_mte, 5097 gen_helper_sve_st1sd_le_r_mte }, 5098 { NULL, NULL, NULL, 5099 gen_helper_sve_st1dd_le_r_mte } }, 5100 { { gen_helper_sve_st1bb_r_mte, 5101 gen_helper_sve_st1bh_r_mte, 5102 gen_helper_sve_st1bs_r_mte, 5103 gen_helper_sve_st1bd_r_mte }, 5104 { NULL, 5105 gen_helper_sve_st1hh_be_r_mte, 5106 gen_helper_sve_st1hs_be_r_mte, 5107 gen_helper_sve_st1hd_be_r_mte }, 5108 { NULL, NULL, 5109 gen_helper_sve_st1ss_be_r_mte, 5110 gen_helper_sve_st1sd_be_r_mte }, 5111 { NULL, NULL, NULL, 5112 gen_helper_sve_st1dd_be_r_mte } } }, 5113 }; 5114 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = { 5115 { { { gen_helper_sve_st2bb_r, 5116 gen_helper_sve_st2hh_le_r, 5117 gen_helper_sve_st2ss_le_r, 5118 gen_helper_sve_st2dd_le_r }, 5119 { gen_helper_sve_st3bb_r, 5120 gen_helper_sve_st3hh_le_r, 5121 gen_helper_sve_st3ss_le_r, 5122 gen_helper_sve_st3dd_le_r }, 5123 { gen_helper_sve_st4bb_r, 5124 gen_helper_sve_st4hh_le_r, 5125 gen_helper_sve_st4ss_le_r, 5126 gen_helper_sve_st4dd_le_r } }, 5127 { { gen_helper_sve_st2bb_r, 5128 gen_helper_sve_st2hh_be_r, 5129 gen_helper_sve_st2ss_be_r, 5130 gen_helper_sve_st2dd_be_r }, 5131 { gen_helper_sve_st3bb_r, 5132 gen_helper_sve_st3hh_be_r, 5133 gen_helper_sve_st3ss_be_r, 5134 gen_helper_sve_st3dd_be_r }, 5135 { gen_helper_sve_st4bb_r, 5136 gen_helper_sve_st4hh_be_r, 5137 gen_helper_sve_st4ss_be_r, 5138 gen_helper_sve_st4dd_be_r } } }, 5139 { { { gen_helper_sve_st2bb_r_mte, 5140 gen_helper_sve_st2hh_le_r_mte, 5141 gen_helper_sve_st2ss_le_r_mte, 5142 gen_helper_sve_st2dd_le_r_mte }, 5143 { gen_helper_sve_st3bb_r_mte, 5144 gen_helper_sve_st3hh_le_r_mte, 5145 gen_helper_sve_st3ss_le_r_mte, 5146 gen_helper_sve_st3dd_le_r_mte }, 5147 { gen_helper_sve_st4bb_r_mte, 5148 gen_helper_sve_st4hh_le_r_mte, 5149 gen_helper_sve_st4ss_le_r_mte, 5150 gen_helper_sve_st4dd_le_r_mte } }, 5151 { { gen_helper_sve_st2bb_r_mte, 5152 gen_helper_sve_st2hh_be_r_mte, 5153 gen_helper_sve_st2ss_be_r_mte, 5154 gen_helper_sve_st2dd_be_r_mte }, 5155 { gen_helper_sve_st3bb_r_mte, 5156 gen_helper_sve_st3hh_be_r_mte, 5157 gen_helper_sve_st3ss_be_r_mte, 5158 gen_helper_sve_st3dd_be_r_mte }, 5159 { gen_helper_sve_st4bb_r_mte, 5160 gen_helper_sve_st4hh_be_r_mte, 5161 gen_helper_sve_st4ss_be_r_mte, 5162 gen_helper_sve_st4dd_be_r_mte } } }, 5163 }; 5164 gen_helper_gvec_mem *fn; 5165 int be = s->be_data == MO_BE; 5166 5167 if (nreg == 0) { 5168 /* ST1 */ 5169 fn = fn_single[s->mte_active[0]][be][msz][esz]; 5170 nreg = 1; 5171 } else { 5172 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */ 5173 assert(msz == esz); 5174 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz]; 5175 } 5176 assert(fn != NULL); 5177 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn); 5178 } 5179 5180 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) 5181 { 5182 if (!dc_isar_feature(aa64_sve, s)) { 5183 return false; 5184 } 5185 if (a->rm == 31 || a->msz > a->esz) { 5186 return false; 5187 } 5188 if (sve_access_check(s)) { 5189 TCGv_i64 addr = tcg_temp_new_i64(); 5190 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz); 5191 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5192 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5193 } 5194 return true; 5195 } 5196 5197 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a) 5198 { 5199 if (!dc_isar_feature(aa64_sve, s)) { 5200 return false; 5201 } 5202 if (a->msz > a->esz) { 5203 return false; 5204 } 5205 if (sve_access_check(s)) { 5206 int vsz = vec_full_reg_size(s); 5207 int elements = vsz >> a->esz; 5208 TCGv_i64 addr = tcg_temp_new_i64(); 5209 5210 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 5211 (a->imm * elements * (a->nreg + 1)) << a->msz); 5212 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5213 } 5214 return true; 5215 } 5216 5217 /* 5218 *** SVE gather loads / scatter stores 5219 */ 5220 5221 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, 5222 int scale, TCGv_i64 scalar, int msz, bool is_write, 5223 gen_helper_gvec_mem_scatter *fn) 5224 { 5225 unsigned vsz = vec_full_reg_size(s); 5226 TCGv_ptr t_zm = tcg_temp_new_ptr(); 5227 TCGv_ptr t_pg = tcg_temp_new_ptr(); 5228 TCGv_ptr t_zt = tcg_temp_new_ptr(); 5229 int desc = 0; 5230 5231 if (s->mte_active[0]) { 5232 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 5233 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 5234 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 5235 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 5236 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1); 5237 desc <<= SVE_MTEDESC_SHIFT; 5238 } 5239 desc = simd_desc(vsz, vsz, desc | scale); 5240 5241 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 5242 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm)); 5243 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt)); 5244 fn(cpu_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc)); 5245 } 5246 5247 /* Indexed by [mte][be][ff][xs][u][msz]. */ 5248 static gen_helper_gvec_mem_scatter * const 5249 gather_load_fn32[2][2][2][2][2][3] = { 5250 { /* MTE Inactive */ 5251 { /* Little-endian */ 5252 { { { gen_helper_sve_ldbss_zsu, 5253 gen_helper_sve_ldhss_le_zsu, 5254 NULL, }, 5255 { gen_helper_sve_ldbsu_zsu, 5256 gen_helper_sve_ldhsu_le_zsu, 5257 gen_helper_sve_ldss_le_zsu, } }, 5258 { { gen_helper_sve_ldbss_zss, 5259 gen_helper_sve_ldhss_le_zss, 5260 NULL, }, 5261 { gen_helper_sve_ldbsu_zss, 5262 gen_helper_sve_ldhsu_le_zss, 5263 gen_helper_sve_ldss_le_zss, } } }, 5264 5265 /* First-fault */ 5266 { { { gen_helper_sve_ldffbss_zsu, 5267 gen_helper_sve_ldffhss_le_zsu, 5268 NULL, }, 5269 { gen_helper_sve_ldffbsu_zsu, 5270 gen_helper_sve_ldffhsu_le_zsu, 5271 gen_helper_sve_ldffss_le_zsu, } }, 5272 { { gen_helper_sve_ldffbss_zss, 5273 gen_helper_sve_ldffhss_le_zss, 5274 NULL, }, 5275 { gen_helper_sve_ldffbsu_zss, 5276 gen_helper_sve_ldffhsu_le_zss, 5277 gen_helper_sve_ldffss_le_zss, } } } }, 5278 5279 { /* Big-endian */ 5280 { { { gen_helper_sve_ldbss_zsu, 5281 gen_helper_sve_ldhss_be_zsu, 5282 NULL, }, 5283 { gen_helper_sve_ldbsu_zsu, 5284 gen_helper_sve_ldhsu_be_zsu, 5285 gen_helper_sve_ldss_be_zsu, } }, 5286 { { gen_helper_sve_ldbss_zss, 5287 gen_helper_sve_ldhss_be_zss, 5288 NULL, }, 5289 { gen_helper_sve_ldbsu_zss, 5290 gen_helper_sve_ldhsu_be_zss, 5291 gen_helper_sve_ldss_be_zss, } } }, 5292 5293 /* First-fault */ 5294 { { { gen_helper_sve_ldffbss_zsu, 5295 gen_helper_sve_ldffhss_be_zsu, 5296 NULL, }, 5297 { gen_helper_sve_ldffbsu_zsu, 5298 gen_helper_sve_ldffhsu_be_zsu, 5299 gen_helper_sve_ldffss_be_zsu, } }, 5300 { { gen_helper_sve_ldffbss_zss, 5301 gen_helper_sve_ldffhss_be_zss, 5302 NULL, }, 5303 { gen_helper_sve_ldffbsu_zss, 5304 gen_helper_sve_ldffhsu_be_zss, 5305 gen_helper_sve_ldffss_be_zss, } } } } }, 5306 { /* MTE Active */ 5307 { /* Little-endian */ 5308 { { { gen_helper_sve_ldbss_zsu_mte, 5309 gen_helper_sve_ldhss_le_zsu_mte, 5310 NULL, }, 5311 { gen_helper_sve_ldbsu_zsu_mte, 5312 gen_helper_sve_ldhsu_le_zsu_mte, 5313 gen_helper_sve_ldss_le_zsu_mte, } }, 5314 { { gen_helper_sve_ldbss_zss_mte, 5315 gen_helper_sve_ldhss_le_zss_mte, 5316 NULL, }, 5317 { gen_helper_sve_ldbsu_zss_mte, 5318 gen_helper_sve_ldhsu_le_zss_mte, 5319 gen_helper_sve_ldss_le_zss_mte, } } }, 5320 5321 /* First-fault */ 5322 { { { gen_helper_sve_ldffbss_zsu_mte, 5323 gen_helper_sve_ldffhss_le_zsu_mte, 5324 NULL, }, 5325 { gen_helper_sve_ldffbsu_zsu_mte, 5326 gen_helper_sve_ldffhsu_le_zsu_mte, 5327 gen_helper_sve_ldffss_le_zsu_mte, } }, 5328 { { gen_helper_sve_ldffbss_zss_mte, 5329 gen_helper_sve_ldffhss_le_zss_mte, 5330 NULL, }, 5331 { gen_helper_sve_ldffbsu_zss_mte, 5332 gen_helper_sve_ldffhsu_le_zss_mte, 5333 gen_helper_sve_ldffss_le_zss_mte, } } } }, 5334 5335 { /* Big-endian */ 5336 { { { gen_helper_sve_ldbss_zsu_mte, 5337 gen_helper_sve_ldhss_be_zsu_mte, 5338 NULL, }, 5339 { gen_helper_sve_ldbsu_zsu_mte, 5340 gen_helper_sve_ldhsu_be_zsu_mte, 5341 gen_helper_sve_ldss_be_zsu_mte, } }, 5342 { { gen_helper_sve_ldbss_zss_mte, 5343 gen_helper_sve_ldhss_be_zss_mte, 5344 NULL, }, 5345 { gen_helper_sve_ldbsu_zss_mte, 5346 gen_helper_sve_ldhsu_be_zss_mte, 5347 gen_helper_sve_ldss_be_zss_mte, } } }, 5348 5349 /* First-fault */ 5350 { { { gen_helper_sve_ldffbss_zsu_mte, 5351 gen_helper_sve_ldffhss_be_zsu_mte, 5352 NULL, }, 5353 { gen_helper_sve_ldffbsu_zsu_mte, 5354 gen_helper_sve_ldffhsu_be_zsu_mte, 5355 gen_helper_sve_ldffss_be_zsu_mte, } }, 5356 { { gen_helper_sve_ldffbss_zss_mte, 5357 gen_helper_sve_ldffhss_be_zss_mte, 5358 NULL, }, 5359 { gen_helper_sve_ldffbsu_zss_mte, 5360 gen_helper_sve_ldffhsu_be_zss_mte, 5361 gen_helper_sve_ldffss_be_zss_mte, } } } } }, 5362 }; 5363 5364 /* Note that we overload xs=2 to indicate 64-bit offset. */ 5365 static gen_helper_gvec_mem_scatter * const 5366 gather_load_fn64[2][2][2][3][2][4] = { 5367 { /* MTE Inactive */ 5368 { /* Little-endian */ 5369 { { { gen_helper_sve_ldbds_zsu, 5370 gen_helper_sve_ldhds_le_zsu, 5371 gen_helper_sve_ldsds_le_zsu, 5372 NULL, }, 5373 { gen_helper_sve_ldbdu_zsu, 5374 gen_helper_sve_ldhdu_le_zsu, 5375 gen_helper_sve_ldsdu_le_zsu, 5376 gen_helper_sve_lddd_le_zsu, } }, 5377 { { gen_helper_sve_ldbds_zss, 5378 gen_helper_sve_ldhds_le_zss, 5379 gen_helper_sve_ldsds_le_zss, 5380 NULL, }, 5381 { gen_helper_sve_ldbdu_zss, 5382 gen_helper_sve_ldhdu_le_zss, 5383 gen_helper_sve_ldsdu_le_zss, 5384 gen_helper_sve_lddd_le_zss, } }, 5385 { { gen_helper_sve_ldbds_zd, 5386 gen_helper_sve_ldhds_le_zd, 5387 gen_helper_sve_ldsds_le_zd, 5388 NULL, }, 5389 { gen_helper_sve_ldbdu_zd, 5390 gen_helper_sve_ldhdu_le_zd, 5391 gen_helper_sve_ldsdu_le_zd, 5392 gen_helper_sve_lddd_le_zd, } } }, 5393 5394 /* First-fault */ 5395 { { { gen_helper_sve_ldffbds_zsu, 5396 gen_helper_sve_ldffhds_le_zsu, 5397 gen_helper_sve_ldffsds_le_zsu, 5398 NULL, }, 5399 { gen_helper_sve_ldffbdu_zsu, 5400 gen_helper_sve_ldffhdu_le_zsu, 5401 gen_helper_sve_ldffsdu_le_zsu, 5402 gen_helper_sve_ldffdd_le_zsu, } }, 5403 { { gen_helper_sve_ldffbds_zss, 5404 gen_helper_sve_ldffhds_le_zss, 5405 gen_helper_sve_ldffsds_le_zss, 5406 NULL, }, 5407 { gen_helper_sve_ldffbdu_zss, 5408 gen_helper_sve_ldffhdu_le_zss, 5409 gen_helper_sve_ldffsdu_le_zss, 5410 gen_helper_sve_ldffdd_le_zss, } }, 5411 { { gen_helper_sve_ldffbds_zd, 5412 gen_helper_sve_ldffhds_le_zd, 5413 gen_helper_sve_ldffsds_le_zd, 5414 NULL, }, 5415 { gen_helper_sve_ldffbdu_zd, 5416 gen_helper_sve_ldffhdu_le_zd, 5417 gen_helper_sve_ldffsdu_le_zd, 5418 gen_helper_sve_ldffdd_le_zd, } } } }, 5419 { /* Big-endian */ 5420 { { { gen_helper_sve_ldbds_zsu, 5421 gen_helper_sve_ldhds_be_zsu, 5422 gen_helper_sve_ldsds_be_zsu, 5423 NULL, }, 5424 { gen_helper_sve_ldbdu_zsu, 5425 gen_helper_sve_ldhdu_be_zsu, 5426 gen_helper_sve_ldsdu_be_zsu, 5427 gen_helper_sve_lddd_be_zsu, } }, 5428 { { gen_helper_sve_ldbds_zss, 5429 gen_helper_sve_ldhds_be_zss, 5430 gen_helper_sve_ldsds_be_zss, 5431 NULL, }, 5432 { gen_helper_sve_ldbdu_zss, 5433 gen_helper_sve_ldhdu_be_zss, 5434 gen_helper_sve_ldsdu_be_zss, 5435 gen_helper_sve_lddd_be_zss, } }, 5436 { { gen_helper_sve_ldbds_zd, 5437 gen_helper_sve_ldhds_be_zd, 5438 gen_helper_sve_ldsds_be_zd, 5439 NULL, }, 5440 { gen_helper_sve_ldbdu_zd, 5441 gen_helper_sve_ldhdu_be_zd, 5442 gen_helper_sve_ldsdu_be_zd, 5443 gen_helper_sve_lddd_be_zd, } } }, 5444 5445 /* First-fault */ 5446 { { { gen_helper_sve_ldffbds_zsu, 5447 gen_helper_sve_ldffhds_be_zsu, 5448 gen_helper_sve_ldffsds_be_zsu, 5449 NULL, }, 5450 { gen_helper_sve_ldffbdu_zsu, 5451 gen_helper_sve_ldffhdu_be_zsu, 5452 gen_helper_sve_ldffsdu_be_zsu, 5453 gen_helper_sve_ldffdd_be_zsu, } }, 5454 { { gen_helper_sve_ldffbds_zss, 5455 gen_helper_sve_ldffhds_be_zss, 5456 gen_helper_sve_ldffsds_be_zss, 5457 NULL, }, 5458 { gen_helper_sve_ldffbdu_zss, 5459 gen_helper_sve_ldffhdu_be_zss, 5460 gen_helper_sve_ldffsdu_be_zss, 5461 gen_helper_sve_ldffdd_be_zss, } }, 5462 { { gen_helper_sve_ldffbds_zd, 5463 gen_helper_sve_ldffhds_be_zd, 5464 gen_helper_sve_ldffsds_be_zd, 5465 NULL, }, 5466 { gen_helper_sve_ldffbdu_zd, 5467 gen_helper_sve_ldffhdu_be_zd, 5468 gen_helper_sve_ldffsdu_be_zd, 5469 gen_helper_sve_ldffdd_be_zd, } } } } }, 5470 { /* MTE Active */ 5471 { /* Little-endian */ 5472 { { { gen_helper_sve_ldbds_zsu_mte, 5473 gen_helper_sve_ldhds_le_zsu_mte, 5474 gen_helper_sve_ldsds_le_zsu_mte, 5475 NULL, }, 5476 { gen_helper_sve_ldbdu_zsu_mte, 5477 gen_helper_sve_ldhdu_le_zsu_mte, 5478 gen_helper_sve_ldsdu_le_zsu_mte, 5479 gen_helper_sve_lddd_le_zsu_mte, } }, 5480 { { gen_helper_sve_ldbds_zss_mte, 5481 gen_helper_sve_ldhds_le_zss_mte, 5482 gen_helper_sve_ldsds_le_zss_mte, 5483 NULL, }, 5484 { gen_helper_sve_ldbdu_zss_mte, 5485 gen_helper_sve_ldhdu_le_zss_mte, 5486 gen_helper_sve_ldsdu_le_zss_mte, 5487 gen_helper_sve_lddd_le_zss_mte, } }, 5488 { { gen_helper_sve_ldbds_zd_mte, 5489 gen_helper_sve_ldhds_le_zd_mte, 5490 gen_helper_sve_ldsds_le_zd_mte, 5491 NULL, }, 5492 { gen_helper_sve_ldbdu_zd_mte, 5493 gen_helper_sve_ldhdu_le_zd_mte, 5494 gen_helper_sve_ldsdu_le_zd_mte, 5495 gen_helper_sve_lddd_le_zd_mte, } } }, 5496 5497 /* First-fault */ 5498 { { { gen_helper_sve_ldffbds_zsu_mte, 5499 gen_helper_sve_ldffhds_le_zsu_mte, 5500 gen_helper_sve_ldffsds_le_zsu_mte, 5501 NULL, }, 5502 { gen_helper_sve_ldffbdu_zsu_mte, 5503 gen_helper_sve_ldffhdu_le_zsu_mte, 5504 gen_helper_sve_ldffsdu_le_zsu_mte, 5505 gen_helper_sve_ldffdd_le_zsu_mte, } }, 5506 { { gen_helper_sve_ldffbds_zss_mte, 5507 gen_helper_sve_ldffhds_le_zss_mte, 5508 gen_helper_sve_ldffsds_le_zss_mte, 5509 NULL, }, 5510 { gen_helper_sve_ldffbdu_zss_mte, 5511 gen_helper_sve_ldffhdu_le_zss_mte, 5512 gen_helper_sve_ldffsdu_le_zss_mte, 5513 gen_helper_sve_ldffdd_le_zss_mte, } }, 5514 { { gen_helper_sve_ldffbds_zd_mte, 5515 gen_helper_sve_ldffhds_le_zd_mte, 5516 gen_helper_sve_ldffsds_le_zd_mte, 5517 NULL, }, 5518 { gen_helper_sve_ldffbdu_zd_mte, 5519 gen_helper_sve_ldffhdu_le_zd_mte, 5520 gen_helper_sve_ldffsdu_le_zd_mte, 5521 gen_helper_sve_ldffdd_le_zd_mte, } } } }, 5522 { /* Big-endian */ 5523 { { { gen_helper_sve_ldbds_zsu_mte, 5524 gen_helper_sve_ldhds_be_zsu_mte, 5525 gen_helper_sve_ldsds_be_zsu_mte, 5526 NULL, }, 5527 { gen_helper_sve_ldbdu_zsu_mte, 5528 gen_helper_sve_ldhdu_be_zsu_mte, 5529 gen_helper_sve_ldsdu_be_zsu_mte, 5530 gen_helper_sve_lddd_be_zsu_mte, } }, 5531 { { gen_helper_sve_ldbds_zss_mte, 5532 gen_helper_sve_ldhds_be_zss_mte, 5533 gen_helper_sve_ldsds_be_zss_mte, 5534 NULL, }, 5535 { gen_helper_sve_ldbdu_zss_mte, 5536 gen_helper_sve_ldhdu_be_zss_mte, 5537 gen_helper_sve_ldsdu_be_zss_mte, 5538 gen_helper_sve_lddd_be_zss_mte, } }, 5539 { { gen_helper_sve_ldbds_zd_mte, 5540 gen_helper_sve_ldhds_be_zd_mte, 5541 gen_helper_sve_ldsds_be_zd_mte, 5542 NULL, }, 5543 { gen_helper_sve_ldbdu_zd_mte, 5544 gen_helper_sve_ldhdu_be_zd_mte, 5545 gen_helper_sve_ldsdu_be_zd_mte, 5546 gen_helper_sve_lddd_be_zd_mte, } } }, 5547 5548 /* First-fault */ 5549 { { { gen_helper_sve_ldffbds_zsu_mte, 5550 gen_helper_sve_ldffhds_be_zsu_mte, 5551 gen_helper_sve_ldffsds_be_zsu_mte, 5552 NULL, }, 5553 { gen_helper_sve_ldffbdu_zsu_mte, 5554 gen_helper_sve_ldffhdu_be_zsu_mte, 5555 gen_helper_sve_ldffsdu_be_zsu_mte, 5556 gen_helper_sve_ldffdd_be_zsu_mte, } }, 5557 { { gen_helper_sve_ldffbds_zss_mte, 5558 gen_helper_sve_ldffhds_be_zss_mte, 5559 gen_helper_sve_ldffsds_be_zss_mte, 5560 NULL, }, 5561 { gen_helper_sve_ldffbdu_zss_mte, 5562 gen_helper_sve_ldffhdu_be_zss_mte, 5563 gen_helper_sve_ldffsdu_be_zss_mte, 5564 gen_helper_sve_ldffdd_be_zss_mte, } }, 5565 { { gen_helper_sve_ldffbds_zd_mte, 5566 gen_helper_sve_ldffhds_be_zd_mte, 5567 gen_helper_sve_ldffsds_be_zd_mte, 5568 NULL, }, 5569 { gen_helper_sve_ldffbdu_zd_mte, 5570 gen_helper_sve_ldffhdu_be_zd_mte, 5571 gen_helper_sve_ldffsdu_be_zd_mte, 5572 gen_helper_sve_ldffdd_be_zd_mte, } } } } }, 5573 }; 5574 5575 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) 5576 { 5577 gen_helper_gvec_mem_scatter *fn = NULL; 5578 bool be = s->be_data == MO_BE; 5579 bool mte = s->mte_active[0]; 5580 5581 if (!dc_isar_feature(aa64_sve, s)) { 5582 return false; 5583 } 5584 s->is_nonstreaming = true; 5585 if (!sve_access_check(s)) { 5586 return true; 5587 } 5588 5589 switch (a->esz) { 5590 case MO_32: 5591 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz]; 5592 break; 5593 case MO_64: 5594 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz]; 5595 break; 5596 } 5597 assert(fn != NULL); 5598 5599 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 5600 cpu_reg_sp(s, a->rn), a->msz, false, fn); 5601 return true; 5602 } 5603 5604 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) 5605 { 5606 gen_helper_gvec_mem_scatter *fn = NULL; 5607 bool be = s->be_data == MO_BE; 5608 bool mte = s->mte_active[0]; 5609 5610 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) { 5611 return false; 5612 } 5613 if (!dc_isar_feature(aa64_sve, s)) { 5614 return false; 5615 } 5616 s->is_nonstreaming = true; 5617 if (!sve_access_check(s)) { 5618 return true; 5619 } 5620 5621 switch (a->esz) { 5622 case MO_32: 5623 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz]; 5624 break; 5625 case MO_64: 5626 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz]; 5627 break; 5628 } 5629 assert(fn != NULL); 5630 5631 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x]) 5632 * by loading the immediate into the scalar parameter. 5633 */ 5634 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5635 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn); 5636 return true; 5637 } 5638 5639 static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a) 5640 { 5641 gen_helper_gvec_mem_scatter *fn = NULL; 5642 bool be = s->be_data == MO_BE; 5643 bool mte = s->mte_active[0]; 5644 5645 if (a->esz < a->msz + !a->u) { 5646 return false; 5647 } 5648 if (!dc_isar_feature(aa64_sve2, s)) { 5649 return false; 5650 } 5651 s->is_nonstreaming = true; 5652 if (!sve_access_check(s)) { 5653 return true; 5654 } 5655 5656 switch (a->esz) { 5657 case MO_32: 5658 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz]; 5659 break; 5660 case MO_64: 5661 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz]; 5662 break; 5663 } 5664 assert(fn != NULL); 5665 5666 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5667 cpu_reg(s, a->rm), a->msz, false, fn); 5668 return true; 5669 } 5670 5671 /* Indexed by [mte][be][xs][msz]. */ 5672 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = { 5673 { /* MTE Inactive */ 5674 { /* Little-endian */ 5675 { gen_helper_sve_stbs_zsu, 5676 gen_helper_sve_sths_le_zsu, 5677 gen_helper_sve_stss_le_zsu, }, 5678 { gen_helper_sve_stbs_zss, 5679 gen_helper_sve_sths_le_zss, 5680 gen_helper_sve_stss_le_zss, } }, 5681 { /* Big-endian */ 5682 { gen_helper_sve_stbs_zsu, 5683 gen_helper_sve_sths_be_zsu, 5684 gen_helper_sve_stss_be_zsu, }, 5685 { gen_helper_sve_stbs_zss, 5686 gen_helper_sve_sths_be_zss, 5687 gen_helper_sve_stss_be_zss, } } }, 5688 { /* MTE Active */ 5689 { /* Little-endian */ 5690 { gen_helper_sve_stbs_zsu_mte, 5691 gen_helper_sve_sths_le_zsu_mte, 5692 gen_helper_sve_stss_le_zsu_mte, }, 5693 { gen_helper_sve_stbs_zss_mte, 5694 gen_helper_sve_sths_le_zss_mte, 5695 gen_helper_sve_stss_le_zss_mte, } }, 5696 { /* Big-endian */ 5697 { gen_helper_sve_stbs_zsu_mte, 5698 gen_helper_sve_sths_be_zsu_mte, 5699 gen_helper_sve_stss_be_zsu_mte, }, 5700 { gen_helper_sve_stbs_zss_mte, 5701 gen_helper_sve_sths_be_zss_mte, 5702 gen_helper_sve_stss_be_zss_mte, } } }, 5703 }; 5704 5705 /* Note that we overload xs=2 to indicate 64-bit offset. */ 5706 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = { 5707 { /* MTE Inactive */ 5708 { /* Little-endian */ 5709 { gen_helper_sve_stbd_zsu, 5710 gen_helper_sve_sthd_le_zsu, 5711 gen_helper_sve_stsd_le_zsu, 5712 gen_helper_sve_stdd_le_zsu, }, 5713 { gen_helper_sve_stbd_zss, 5714 gen_helper_sve_sthd_le_zss, 5715 gen_helper_sve_stsd_le_zss, 5716 gen_helper_sve_stdd_le_zss, }, 5717 { gen_helper_sve_stbd_zd, 5718 gen_helper_sve_sthd_le_zd, 5719 gen_helper_sve_stsd_le_zd, 5720 gen_helper_sve_stdd_le_zd, } }, 5721 { /* Big-endian */ 5722 { gen_helper_sve_stbd_zsu, 5723 gen_helper_sve_sthd_be_zsu, 5724 gen_helper_sve_stsd_be_zsu, 5725 gen_helper_sve_stdd_be_zsu, }, 5726 { gen_helper_sve_stbd_zss, 5727 gen_helper_sve_sthd_be_zss, 5728 gen_helper_sve_stsd_be_zss, 5729 gen_helper_sve_stdd_be_zss, }, 5730 { gen_helper_sve_stbd_zd, 5731 gen_helper_sve_sthd_be_zd, 5732 gen_helper_sve_stsd_be_zd, 5733 gen_helper_sve_stdd_be_zd, } } }, 5734 { /* MTE Inactive */ 5735 { /* Little-endian */ 5736 { gen_helper_sve_stbd_zsu_mte, 5737 gen_helper_sve_sthd_le_zsu_mte, 5738 gen_helper_sve_stsd_le_zsu_mte, 5739 gen_helper_sve_stdd_le_zsu_mte, }, 5740 { gen_helper_sve_stbd_zss_mte, 5741 gen_helper_sve_sthd_le_zss_mte, 5742 gen_helper_sve_stsd_le_zss_mte, 5743 gen_helper_sve_stdd_le_zss_mte, }, 5744 { gen_helper_sve_stbd_zd_mte, 5745 gen_helper_sve_sthd_le_zd_mte, 5746 gen_helper_sve_stsd_le_zd_mte, 5747 gen_helper_sve_stdd_le_zd_mte, } }, 5748 { /* Big-endian */ 5749 { gen_helper_sve_stbd_zsu_mte, 5750 gen_helper_sve_sthd_be_zsu_mte, 5751 gen_helper_sve_stsd_be_zsu_mte, 5752 gen_helper_sve_stdd_be_zsu_mte, }, 5753 { gen_helper_sve_stbd_zss_mte, 5754 gen_helper_sve_sthd_be_zss_mte, 5755 gen_helper_sve_stsd_be_zss_mte, 5756 gen_helper_sve_stdd_be_zss_mte, }, 5757 { gen_helper_sve_stbd_zd_mte, 5758 gen_helper_sve_sthd_be_zd_mte, 5759 gen_helper_sve_stsd_be_zd_mte, 5760 gen_helper_sve_stdd_be_zd_mte, } } }, 5761 }; 5762 5763 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) 5764 { 5765 gen_helper_gvec_mem_scatter *fn; 5766 bool be = s->be_data == MO_BE; 5767 bool mte = s->mte_active[0]; 5768 5769 if (a->esz < a->msz || (a->msz == 0 && a->scale)) { 5770 return false; 5771 } 5772 if (!dc_isar_feature(aa64_sve, s)) { 5773 return false; 5774 } 5775 s->is_nonstreaming = true; 5776 if (!sve_access_check(s)) { 5777 return true; 5778 } 5779 switch (a->esz) { 5780 case MO_32: 5781 fn = scatter_store_fn32[mte][be][a->xs][a->msz]; 5782 break; 5783 case MO_64: 5784 fn = scatter_store_fn64[mte][be][a->xs][a->msz]; 5785 break; 5786 default: 5787 g_assert_not_reached(); 5788 } 5789 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 5790 cpu_reg_sp(s, a->rn), a->msz, true, fn); 5791 return true; 5792 } 5793 5794 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) 5795 { 5796 gen_helper_gvec_mem_scatter *fn = NULL; 5797 bool be = s->be_data == MO_BE; 5798 bool mte = s->mte_active[0]; 5799 5800 if (a->esz < a->msz) { 5801 return false; 5802 } 5803 if (!dc_isar_feature(aa64_sve, s)) { 5804 return false; 5805 } 5806 s->is_nonstreaming = true; 5807 if (!sve_access_check(s)) { 5808 return true; 5809 } 5810 5811 switch (a->esz) { 5812 case MO_32: 5813 fn = scatter_store_fn32[mte][be][0][a->msz]; 5814 break; 5815 case MO_64: 5816 fn = scatter_store_fn64[mte][be][2][a->msz]; 5817 break; 5818 } 5819 assert(fn != NULL); 5820 5821 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x]) 5822 * by loading the immediate into the scalar parameter. 5823 */ 5824 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5825 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn); 5826 return true; 5827 } 5828 5829 static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a) 5830 { 5831 gen_helper_gvec_mem_scatter *fn; 5832 bool be = s->be_data == MO_BE; 5833 bool mte = s->mte_active[0]; 5834 5835 if (a->esz < a->msz) { 5836 return false; 5837 } 5838 if (!dc_isar_feature(aa64_sve2, s)) { 5839 return false; 5840 } 5841 s->is_nonstreaming = true; 5842 if (!sve_access_check(s)) { 5843 return true; 5844 } 5845 5846 switch (a->esz) { 5847 case MO_32: 5848 fn = scatter_store_fn32[mte][be][0][a->msz]; 5849 break; 5850 case MO_64: 5851 fn = scatter_store_fn64[mte][be][2][a->msz]; 5852 break; 5853 default: 5854 g_assert_not_reached(); 5855 } 5856 5857 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5858 cpu_reg(s, a->rm), a->msz, true, fn); 5859 return true; 5860 } 5861 5862 /* 5863 * Prefetches 5864 */ 5865 5866 static bool trans_PRF(DisasContext *s, arg_PRF *a) 5867 { 5868 if (!dc_isar_feature(aa64_sve, s)) { 5869 return false; 5870 } 5871 /* Prefetch is a nop within QEMU. */ 5872 (void)sve_access_check(s); 5873 return true; 5874 } 5875 5876 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a) 5877 { 5878 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 5879 return false; 5880 } 5881 /* Prefetch is a nop within QEMU. */ 5882 (void)sve_access_check(s); 5883 return true; 5884 } 5885 5886 static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a) 5887 { 5888 if (!dc_isar_feature(aa64_sve, s)) { 5889 return false; 5890 } 5891 /* Prefetch is a nop within QEMU. */ 5892 s->is_nonstreaming = true; 5893 (void)sve_access_check(s); 5894 return true; 5895 } 5896 5897 /* 5898 * Move Prefix 5899 * 5900 * TODO: The implementation so far could handle predicated merging movprfx. 5901 * The helper functions as written take an extra source register to 5902 * use in the operation, but the result is only written when predication 5903 * succeeds. For unpredicated movprfx, we need to rearrange the helpers 5904 * to allow the final write back to the destination to be unconditional. 5905 * For predicated zeroing movprfx, we need to rearrange the helpers to 5906 * allow the final write back to zero inactives. 5907 * 5908 * In the meantime, just emit the moves. 5909 */ 5910 5911 TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn) 5912 TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz) 5913 TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false) 5914 5915 /* 5916 * SVE2 Integer Multiply - Unpredicated 5917 */ 5918 5919 TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a) 5920 5921 static gen_helper_gvec_3 * const smulh_zzz_fns[4] = { 5922 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h, 5923 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d, 5924 }; 5925 TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5926 smulh_zzz_fns[a->esz], a, 0) 5927 5928 static gen_helper_gvec_3 * const umulh_zzz_fns[4] = { 5929 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h, 5930 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d, 5931 }; 5932 TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5933 umulh_zzz_fns[a->esz], a, 0) 5934 5935 TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5936 gen_helper_gvec_pmul_b, a, 0) 5937 5938 static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = { 5939 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h, 5940 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d, 5941 }; 5942 TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5943 sqdmulh_zzz_fns[a->esz], a, 0) 5944 5945 static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = { 5946 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h, 5947 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d, 5948 }; 5949 TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5950 sqrdmulh_zzz_fns[a->esz], a, 0) 5951 5952 /* 5953 * SVE2 Integer - Predicated 5954 */ 5955 5956 static gen_helper_gvec_4 * const sadlp_fns[4] = { 5957 NULL, gen_helper_sve2_sadalp_zpzz_h, 5958 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d, 5959 }; 5960 TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 5961 sadlp_fns[a->esz], a, 0) 5962 5963 static gen_helper_gvec_4 * const uadlp_fns[4] = { 5964 NULL, gen_helper_sve2_uadalp_zpzz_h, 5965 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d, 5966 }; 5967 TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 5968 uadlp_fns[a->esz], a, 0) 5969 5970 /* 5971 * SVE2 integer unary operations (predicated) 5972 */ 5973 5974 TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz, 5975 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0) 5976 5977 TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz, 5978 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0) 5979 5980 static gen_helper_gvec_3 * const sqabs_fns[4] = { 5981 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h, 5982 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d, 5983 }; 5984 TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0) 5985 5986 static gen_helper_gvec_3 * const sqneg_fns[4] = { 5987 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h, 5988 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d, 5989 }; 5990 TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0) 5991 5992 DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl) 5993 DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl) 5994 DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl) 5995 5996 DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl) 5997 DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl) 5998 DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl) 5999 6000 DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd) 6001 DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd) 6002 DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub) 6003 6004 DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd) 6005 DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd) 6006 DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub) 6007 6008 DO_ZPZZ(ADDP, aa64_sve2, sve2_addp) 6009 DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp) 6010 DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp) 6011 DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp) 6012 DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp) 6013 6014 DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd) 6015 DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd) 6016 DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub) 6017 DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub) 6018 DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd) 6019 DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd) 6020 6021 /* 6022 * SVE2 Widening Integer Arithmetic 6023 */ 6024 6025 static gen_helper_gvec_3 * const saddl_fns[4] = { 6026 NULL, gen_helper_sve2_saddl_h, 6027 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d, 6028 }; 6029 TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6030 saddl_fns[a->esz], a, 0) 6031 TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6032 saddl_fns[a->esz], a, 3) 6033 TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 6034 saddl_fns[a->esz], a, 2) 6035 6036 static gen_helper_gvec_3 * const ssubl_fns[4] = { 6037 NULL, gen_helper_sve2_ssubl_h, 6038 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d, 6039 }; 6040 TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6041 ssubl_fns[a->esz], a, 0) 6042 TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6043 ssubl_fns[a->esz], a, 3) 6044 TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 6045 ssubl_fns[a->esz], a, 2) 6046 TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz, 6047 ssubl_fns[a->esz], a, 1) 6048 6049 static gen_helper_gvec_3 * const sabdl_fns[4] = { 6050 NULL, gen_helper_sve2_sabdl_h, 6051 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d, 6052 }; 6053 TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6054 sabdl_fns[a->esz], a, 0) 6055 TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6056 sabdl_fns[a->esz], a, 3) 6057 6058 static gen_helper_gvec_3 * const uaddl_fns[4] = { 6059 NULL, gen_helper_sve2_uaddl_h, 6060 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d, 6061 }; 6062 TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6063 uaddl_fns[a->esz], a, 0) 6064 TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6065 uaddl_fns[a->esz], a, 3) 6066 6067 static gen_helper_gvec_3 * const usubl_fns[4] = { 6068 NULL, gen_helper_sve2_usubl_h, 6069 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d, 6070 }; 6071 TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6072 usubl_fns[a->esz], a, 0) 6073 TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6074 usubl_fns[a->esz], a, 3) 6075 6076 static gen_helper_gvec_3 * const uabdl_fns[4] = { 6077 NULL, gen_helper_sve2_uabdl_h, 6078 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d, 6079 }; 6080 TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6081 uabdl_fns[a->esz], a, 0) 6082 TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6083 uabdl_fns[a->esz], a, 3) 6084 6085 static gen_helper_gvec_3 * const sqdmull_fns[4] = { 6086 NULL, gen_helper_sve2_sqdmull_zzz_h, 6087 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d, 6088 }; 6089 TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6090 sqdmull_fns[a->esz], a, 0) 6091 TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6092 sqdmull_fns[a->esz], a, 3) 6093 6094 static gen_helper_gvec_3 * const smull_fns[4] = { 6095 NULL, gen_helper_sve2_smull_zzz_h, 6096 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d, 6097 }; 6098 TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6099 smull_fns[a->esz], a, 0) 6100 TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6101 smull_fns[a->esz], a, 3) 6102 6103 static gen_helper_gvec_3 * const umull_fns[4] = { 6104 NULL, gen_helper_sve2_umull_zzz_h, 6105 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d, 6106 }; 6107 TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6108 umull_fns[a->esz], a, 0) 6109 TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6110 umull_fns[a->esz], a, 3) 6111 6112 static gen_helper_gvec_3 * const eoril_fns[4] = { 6113 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h, 6114 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d, 6115 }; 6116 TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2) 6117 TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1) 6118 6119 static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel) 6120 { 6121 static gen_helper_gvec_3 * const fns[4] = { 6122 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h, 6123 NULL, gen_helper_sve2_pmull_d, 6124 }; 6125 6126 if (a->esz == 0) { 6127 if (!dc_isar_feature(aa64_sve2_pmull128, s)) { 6128 return false; 6129 } 6130 s->is_nonstreaming = true; 6131 } else if (!dc_isar_feature(aa64_sve, s)) { 6132 return false; 6133 } 6134 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel); 6135 } 6136 6137 TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false) 6138 TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true) 6139 6140 static gen_helper_gvec_3 * const saddw_fns[4] = { 6141 NULL, gen_helper_sve2_saddw_h, 6142 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d, 6143 }; 6144 TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0) 6145 TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1) 6146 6147 static gen_helper_gvec_3 * const ssubw_fns[4] = { 6148 NULL, gen_helper_sve2_ssubw_h, 6149 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d, 6150 }; 6151 TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0) 6152 TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1) 6153 6154 static gen_helper_gvec_3 * const uaddw_fns[4] = { 6155 NULL, gen_helper_sve2_uaddw_h, 6156 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d, 6157 }; 6158 TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0) 6159 TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1) 6160 6161 static gen_helper_gvec_3 * const usubw_fns[4] = { 6162 NULL, gen_helper_sve2_usubw_h, 6163 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d, 6164 }; 6165 TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0) 6166 TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1) 6167 6168 static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6169 { 6170 int top = imm & 1; 6171 int shl = imm >> 1; 6172 int halfbits = 4 << vece; 6173 6174 if (top) { 6175 if (shl == halfbits) { 6176 TCGv_vec t = tcg_temp_new_vec_matching(d); 6177 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); 6178 tcg_gen_and_vec(vece, d, n, t); 6179 } else { 6180 tcg_gen_sari_vec(vece, d, n, halfbits); 6181 tcg_gen_shli_vec(vece, d, d, shl); 6182 } 6183 } else { 6184 tcg_gen_shli_vec(vece, d, n, halfbits); 6185 tcg_gen_sari_vec(vece, d, d, halfbits - shl); 6186 } 6187 } 6188 6189 static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm) 6190 { 6191 int halfbits = 4 << vece; 6192 int top = imm & 1; 6193 int shl = (imm >> 1); 6194 int shift; 6195 uint64_t mask; 6196 6197 mask = MAKE_64BIT_MASK(0, halfbits); 6198 mask <<= shl; 6199 mask = dup_const(vece, mask); 6200 6201 shift = shl - top * halfbits; 6202 if (shift < 0) { 6203 tcg_gen_shri_i64(d, n, -shift); 6204 } else { 6205 tcg_gen_shli_i64(d, n, shift); 6206 } 6207 tcg_gen_andi_i64(d, d, mask); 6208 } 6209 6210 static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6211 { 6212 gen_ushll_i64(MO_16, d, n, imm); 6213 } 6214 6215 static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6216 { 6217 gen_ushll_i64(MO_32, d, n, imm); 6218 } 6219 6220 static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6221 { 6222 gen_ushll_i64(MO_64, d, n, imm); 6223 } 6224 6225 static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6226 { 6227 int halfbits = 4 << vece; 6228 int top = imm & 1; 6229 int shl = imm >> 1; 6230 6231 if (top) { 6232 if (shl == halfbits) { 6233 TCGv_vec t = tcg_temp_new_vec_matching(d); 6234 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); 6235 tcg_gen_and_vec(vece, d, n, t); 6236 } else { 6237 tcg_gen_shri_vec(vece, d, n, halfbits); 6238 tcg_gen_shli_vec(vece, d, d, shl); 6239 } 6240 } else { 6241 if (shl == 0) { 6242 TCGv_vec t = tcg_temp_new_vec_matching(d); 6243 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6244 tcg_gen_and_vec(vece, d, n, t); 6245 } else { 6246 tcg_gen_shli_vec(vece, d, n, halfbits); 6247 tcg_gen_shri_vec(vece, d, d, halfbits - shl); 6248 } 6249 } 6250 } 6251 6252 static bool do_shll_tb(DisasContext *s, arg_rri_esz *a, 6253 const GVecGen2i ops[3], bool sel) 6254 { 6255 6256 if (a->esz < 0 || a->esz > 2) { 6257 return false; 6258 } 6259 if (sve_access_check(s)) { 6260 unsigned vsz = vec_full_reg_size(s); 6261 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 6262 vec_full_reg_offset(s, a->rn), 6263 vsz, vsz, (a->imm << 1) | sel, 6264 &ops[a->esz]); 6265 } 6266 return true; 6267 } 6268 6269 static const TCGOpcode sshll_list[] = { 6270 INDEX_op_shli_vec, INDEX_op_sari_vec, 0 6271 }; 6272 static const GVecGen2i sshll_ops[3] = { 6273 { .fniv = gen_sshll_vec, 6274 .opt_opc = sshll_list, 6275 .fno = gen_helper_sve2_sshll_h, 6276 .vece = MO_16 }, 6277 { .fniv = gen_sshll_vec, 6278 .opt_opc = sshll_list, 6279 .fno = gen_helper_sve2_sshll_s, 6280 .vece = MO_32 }, 6281 { .fniv = gen_sshll_vec, 6282 .opt_opc = sshll_list, 6283 .fno = gen_helper_sve2_sshll_d, 6284 .vece = MO_64 } 6285 }; 6286 TRANS_FEAT(SSHLLB, aa64_sve2, do_shll_tb, a, sshll_ops, false) 6287 TRANS_FEAT(SSHLLT, aa64_sve2, do_shll_tb, a, sshll_ops, true) 6288 6289 static const TCGOpcode ushll_list[] = { 6290 INDEX_op_shli_vec, INDEX_op_shri_vec, 0 6291 }; 6292 static const GVecGen2i ushll_ops[3] = { 6293 { .fni8 = gen_ushll16_i64, 6294 .fniv = gen_ushll_vec, 6295 .opt_opc = ushll_list, 6296 .fno = gen_helper_sve2_ushll_h, 6297 .vece = MO_16 }, 6298 { .fni8 = gen_ushll32_i64, 6299 .fniv = gen_ushll_vec, 6300 .opt_opc = ushll_list, 6301 .fno = gen_helper_sve2_ushll_s, 6302 .vece = MO_32 }, 6303 { .fni8 = gen_ushll64_i64, 6304 .fniv = gen_ushll_vec, 6305 .opt_opc = ushll_list, 6306 .fno = gen_helper_sve2_ushll_d, 6307 .vece = MO_64 }, 6308 }; 6309 TRANS_FEAT(USHLLB, aa64_sve2, do_shll_tb, a, ushll_ops, false) 6310 TRANS_FEAT(USHLLT, aa64_sve2, do_shll_tb, a, ushll_ops, true) 6311 6312 static gen_helper_gvec_3 * const bext_fns[4] = { 6313 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h, 6314 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d, 6315 }; 6316 TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6317 bext_fns[a->esz], a, 0) 6318 6319 static gen_helper_gvec_3 * const bdep_fns[4] = { 6320 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h, 6321 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d, 6322 }; 6323 TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6324 bdep_fns[a->esz], a, 0) 6325 6326 static gen_helper_gvec_3 * const bgrp_fns[4] = { 6327 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h, 6328 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d, 6329 }; 6330 TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6331 bgrp_fns[a->esz], a, 0) 6332 6333 static gen_helper_gvec_3 * const cadd_fns[4] = { 6334 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h, 6335 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d, 6336 }; 6337 TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6338 cadd_fns[a->esz], a, 0) 6339 TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6340 cadd_fns[a->esz], a, 1) 6341 6342 static gen_helper_gvec_3 * const sqcadd_fns[4] = { 6343 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h, 6344 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d, 6345 }; 6346 TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6347 sqcadd_fns[a->esz], a, 0) 6348 TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6349 sqcadd_fns[a->esz], a, 1) 6350 6351 static gen_helper_gvec_4 * const sabal_fns[4] = { 6352 NULL, gen_helper_sve2_sabal_h, 6353 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d, 6354 }; 6355 TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0) 6356 TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1) 6357 6358 static gen_helper_gvec_4 * const uabal_fns[4] = { 6359 NULL, gen_helper_sve2_uabal_h, 6360 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d, 6361 }; 6362 TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0) 6363 TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1) 6364 6365 static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel) 6366 { 6367 static gen_helper_gvec_4 * const fns[2] = { 6368 gen_helper_sve2_adcl_s, 6369 gen_helper_sve2_adcl_d, 6370 }; 6371 /* 6372 * Note that in this case the ESZ field encodes both size and sign. 6373 * Split out 'subtract' into bit 1 of the data field for the helper. 6374 */ 6375 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel); 6376 } 6377 6378 TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false) 6379 TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true) 6380 6381 TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a) 6382 TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a) 6383 TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a) 6384 TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a) 6385 TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a) 6386 TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a) 6387 6388 TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a) 6389 TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a) 6390 6391 static bool do_narrow_extract(DisasContext *s, arg_rri_esz *a, 6392 const GVecGen2 ops[3]) 6393 { 6394 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0) { 6395 return false; 6396 } 6397 if (sve_access_check(s)) { 6398 unsigned vsz = vec_full_reg_size(s); 6399 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd), 6400 vec_full_reg_offset(s, a->rn), 6401 vsz, vsz, &ops[a->esz]); 6402 } 6403 return true; 6404 } 6405 6406 static const TCGOpcode sqxtn_list[] = { 6407 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 6408 }; 6409 6410 static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6411 { 6412 TCGv_vec t = tcg_temp_new_vec_matching(d); 6413 int halfbits = 4 << vece; 6414 int64_t mask = (1ull << halfbits) - 1; 6415 int64_t min = -1ull << (halfbits - 1); 6416 int64_t max = -min - 1; 6417 6418 tcg_gen_dupi_vec(vece, t, min); 6419 tcg_gen_smax_vec(vece, d, n, t); 6420 tcg_gen_dupi_vec(vece, t, max); 6421 tcg_gen_smin_vec(vece, d, d, t); 6422 tcg_gen_dupi_vec(vece, t, mask); 6423 tcg_gen_and_vec(vece, d, d, t); 6424 } 6425 6426 static const GVecGen2 sqxtnb_ops[3] = { 6427 { .fniv = gen_sqxtnb_vec, 6428 .opt_opc = sqxtn_list, 6429 .fno = gen_helper_sve2_sqxtnb_h, 6430 .vece = MO_16 }, 6431 { .fniv = gen_sqxtnb_vec, 6432 .opt_opc = sqxtn_list, 6433 .fno = gen_helper_sve2_sqxtnb_s, 6434 .vece = MO_32 }, 6435 { .fniv = gen_sqxtnb_vec, 6436 .opt_opc = sqxtn_list, 6437 .fno = gen_helper_sve2_sqxtnb_d, 6438 .vece = MO_64 }, 6439 }; 6440 TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops) 6441 6442 static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6443 { 6444 TCGv_vec t = tcg_temp_new_vec_matching(d); 6445 int halfbits = 4 << vece; 6446 int64_t mask = (1ull << halfbits) - 1; 6447 int64_t min = -1ull << (halfbits - 1); 6448 int64_t max = -min - 1; 6449 6450 tcg_gen_dupi_vec(vece, t, min); 6451 tcg_gen_smax_vec(vece, n, n, t); 6452 tcg_gen_dupi_vec(vece, t, max); 6453 tcg_gen_smin_vec(vece, n, n, t); 6454 tcg_gen_shli_vec(vece, n, n, halfbits); 6455 tcg_gen_dupi_vec(vece, t, mask); 6456 tcg_gen_bitsel_vec(vece, d, t, d, n); 6457 } 6458 6459 static const GVecGen2 sqxtnt_ops[3] = { 6460 { .fniv = gen_sqxtnt_vec, 6461 .opt_opc = sqxtn_list, 6462 .load_dest = true, 6463 .fno = gen_helper_sve2_sqxtnt_h, 6464 .vece = MO_16 }, 6465 { .fniv = gen_sqxtnt_vec, 6466 .opt_opc = sqxtn_list, 6467 .load_dest = true, 6468 .fno = gen_helper_sve2_sqxtnt_s, 6469 .vece = MO_32 }, 6470 { .fniv = gen_sqxtnt_vec, 6471 .opt_opc = sqxtn_list, 6472 .load_dest = true, 6473 .fno = gen_helper_sve2_sqxtnt_d, 6474 .vece = MO_64 }, 6475 }; 6476 TRANS_FEAT(SQXTNT, aa64_sve2, do_narrow_extract, a, sqxtnt_ops) 6477 6478 static const TCGOpcode uqxtn_list[] = { 6479 INDEX_op_shli_vec, INDEX_op_umin_vec, 0 6480 }; 6481 6482 static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6483 { 6484 TCGv_vec t = tcg_temp_new_vec_matching(d); 6485 int halfbits = 4 << vece; 6486 int64_t max = (1ull << halfbits) - 1; 6487 6488 tcg_gen_dupi_vec(vece, t, max); 6489 tcg_gen_umin_vec(vece, d, n, t); 6490 } 6491 6492 static const GVecGen2 uqxtnb_ops[3] = { 6493 { .fniv = gen_uqxtnb_vec, 6494 .opt_opc = uqxtn_list, 6495 .fno = gen_helper_sve2_uqxtnb_h, 6496 .vece = MO_16 }, 6497 { .fniv = gen_uqxtnb_vec, 6498 .opt_opc = uqxtn_list, 6499 .fno = gen_helper_sve2_uqxtnb_s, 6500 .vece = MO_32 }, 6501 { .fniv = gen_uqxtnb_vec, 6502 .opt_opc = uqxtn_list, 6503 .fno = gen_helper_sve2_uqxtnb_d, 6504 .vece = MO_64 }, 6505 }; 6506 TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops) 6507 6508 static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6509 { 6510 TCGv_vec t = tcg_temp_new_vec_matching(d); 6511 int halfbits = 4 << vece; 6512 int64_t max = (1ull << halfbits) - 1; 6513 6514 tcg_gen_dupi_vec(vece, t, max); 6515 tcg_gen_umin_vec(vece, n, n, t); 6516 tcg_gen_shli_vec(vece, n, n, halfbits); 6517 tcg_gen_bitsel_vec(vece, d, t, d, n); 6518 } 6519 6520 static const GVecGen2 uqxtnt_ops[3] = { 6521 { .fniv = gen_uqxtnt_vec, 6522 .opt_opc = uqxtn_list, 6523 .load_dest = true, 6524 .fno = gen_helper_sve2_uqxtnt_h, 6525 .vece = MO_16 }, 6526 { .fniv = gen_uqxtnt_vec, 6527 .opt_opc = uqxtn_list, 6528 .load_dest = true, 6529 .fno = gen_helper_sve2_uqxtnt_s, 6530 .vece = MO_32 }, 6531 { .fniv = gen_uqxtnt_vec, 6532 .opt_opc = uqxtn_list, 6533 .load_dest = true, 6534 .fno = gen_helper_sve2_uqxtnt_d, 6535 .vece = MO_64 }, 6536 }; 6537 TRANS_FEAT(UQXTNT, aa64_sve2, do_narrow_extract, a, uqxtnt_ops) 6538 6539 static const TCGOpcode sqxtun_list[] = { 6540 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0 6541 }; 6542 6543 static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6544 { 6545 TCGv_vec t = tcg_temp_new_vec_matching(d); 6546 int halfbits = 4 << vece; 6547 int64_t max = (1ull << halfbits) - 1; 6548 6549 tcg_gen_dupi_vec(vece, t, 0); 6550 tcg_gen_smax_vec(vece, d, n, t); 6551 tcg_gen_dupi_vec(vece, t, max); 6552 tcg_gen_umin_vec(vece, d, d, t); 6553 } 6554 6555 static const GVecGen2 sqxtunb_ops[3] = { 6556 { .fniv = gen_sqxtunb_vec, 6557 .opt_opc = sqxtun_list, 6558 .fno = gen_helper_sve2_sqxtunb_h, 6559 .vece = MO_16 }, 6560 { .fniv = gen_sqxtunb_vec, 6561 .opt_opc = sqxtun_list, 6562 .fno = gen_helper_sve2_sqxtunb_s, 6563 .vece = MO_32 }, 6564 { .fniv = gen_sqxtunb_vec, 6565 .opt_opc = sqxtun_list, 6566 .fno = gen_helper_sve2_sqxtunb_d, 6567 .vece = MO_64 }, 6568 }; 6569 TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops) 6570 6571 static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6572 { 6573 TCGv_vec t = tcg_temp_new_vec_matching(d); 6574 int halfbits = 4 << vece; 6575 int64_t max = (1ull << halfbits) - 1; 6576 6577 tcg_gen_dupi_vec(vece, t, 0); 6578 tcg_gen_smax_vec(vece, n, n, t); 6579 tcg_gen_dupi_vec(vece, t, max); 6580 tcg_gen_umin_vec(vece, n, n, t); 6581 tcg_gen_shli_vec(vece, n, n, halfbits); 6582 tcg_gen_bitsel_vec(vece, d, t, d, n); 6583 } 6584 6585 static const GVecGen2 sqxtunt_ops[3] = { 6586 { .fniv = gen_sqxtunt_vec, 6587 .opt_opc = sqxtun_list, 6588 .load_dest = true, 6589 .fno = gen_helper_sve2_sqxtunt_h, 6590 .vece = MO_16 }, 6591 { .fniv = gen_sqxtunt_vec, 6592 .opt_opc = sqxtun_list, 6593 .load_dest = true, 6594 .fno = gen_helper_sve2_sqxtunt_s, 6595 .vece = MO_32 }, 6596 { .fniv = gen_sqxtunt_vec, 6597 .opt_opc = sqxtun_list, 6598 .load_dest = true, 6599 .fno = gen_helper_sve2_sqxtunt_d, 6600 .vece = MO_64 }, 6601 }; 6602 TRANS_FEAT(SQXTUNT, aa64_sve2, do_narrow_extract, a, sqxtunt_ops) 6603 6604 static bool do_shr_narrow(DisasContext *s, arg_rri_esz *a, 6605 const GVecGen2i ops[3]) 6606 { 6607 if (a->esz < 0 || a->esz > MO_32) { 6608 return false; 6609 } 6610 assert(a->imm > 0 && a->imm <= (8 << a->esz)); 6611 if (sve_access_check(s)) { 6612 unsigned vsz = vec_full_reg_size(s); 6613 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 6614 vec_full_reg_offset(s, a->rn), 6615 vsz, vsz, a->imm, &ops[a->esz]); 6616 } 6617 return true; 6618 } 6619 6620 static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 6621 { 6622 int halfbits = 4 << vece; 6623 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 6624 6625 tcg_gen_shri_i64(d, n, shr); 6626 tcg_gen_andi_i64(d, d, mask); 6627 } 6628 6629 static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6630 { 6631 gen_shrnb_i64(MO_16, d, n, shr); 6632 } 6633 6634 static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6635 { 6636 gen_shrnb_i64(MO_32, d, n, shr); 6637 } 6638 6639 static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6640 { 6641 gen_shrnb_i64(MO_64, d, n, shr); 6642 } 6643 6644 static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 6645 { 6646 TCGv_vec t = tcg_temp_new_vec_matching(d); 6647 int halfbits = 4 << vece; 6648 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 6649 6650 tcg_gen_shri_vec(vece, n, n, shr); 6651 tcg_gen_dupi_vec(vece, t, mask); 6652 tcg_gen_and_vec(vece, d, n, t); 6653 } 6654 6655 static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 }; 6656 static const GVecGen2i shrnb_ops[3] = { 6657 { .fni8 = gen_shrnb16_i64, 6658 .fniv = gen_shrnb_vec, 6659 .opt_opc = shrnb_vec_list, 6660 .fno = gen_helper_sve2_shrnb_h, 6661 .vece = MO_16 }, 6662 { .fni8 = gen_shrnb32_i64, 6663 .fniv = gen_shrnb_vec, 6664 .opt_opc = shrnb_vec_list, 6665 .fno = gen_helper_sve2_shrnb_s, 6666 .vece = MO_32 }, 6667 { .fni8 = gen_shrnb64_i64, 6668 .fniv = gen_shrnb_vec, 6669 .opt_opc = shrnb_vec_list, 6670 .fno = gen_helper_sve2_shrnb_d, 6671 .vece = MO_64 }, 6672 }; 6673 TRANS_FEAT(SHRNB, aa64_sve2, do_shr_narrow, a, shrnb_ops) 6674 6675 static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 6676 { 6677 int halfbits = 4 << vece; 6678 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 6679 6680 tcg_gen_shli_i64(n, n, halfbits - shr); 6681 tcg_gen_andi_i64(n, n, ~mask); 6682 tcg_gen_andi_i64(d, d, mask); 6683 tcg_gen_or_i64(d, d, n); 6684 } 6685 6686 static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6687 { 6688 gen_shrnt_i64(MO_16, d, n, shr); 6689 } 6690 6691 static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6692 { 6693 gen_shrnt_i64(MO_32, d, n, shr); 6694 } 6695 6696 static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6697 { 6698 tcg_gen_shri_i64(n, n, shr); 6699 tcg_gen_deposit_i64(d, d, n, 32, 32); 6700 } 6701 6702 static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 6703 { 6704 TCGv_vec t = tcg_temp_new_vec_matching(d); 6705 int halfbits = 4 << vece; 6706 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 6707 6708 tcg_gen_shli_vec(vece, n, n, halfbits - shr); 6709 tcg_gen_dupi_vec(vece, t, mask); 6710 tcg_gen_bitsel_vec(vece, d, t, d, n); 6711 } 6712 6713 static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 }; 6714 static const GVecGen2i shrnt_ops[3] = { 6715 { .fni8 = gen_shrnt16_i64, 6716 .fniv = gen_shrnt_vec, 6717 .opt_opc = shrnt_vec_list, 6718 .load_dest = true, 6719 .fno = gen_helper_sve2_shrnt_h, 6720 .vece = MO_16 }, 6721 { .fni8 = gen_shrnt32_i64, 6722 .fniv = gen_shrnt_vec, 6723 .opt_opc = shrnt_vec_list, 6724 .load_dest = true, 6725 .fno = gen_helper_sve2_shrnt_s, 6726 .vece = MO_32 }, 6727 { .fni8 = gen_shrnt64_i64, 6728 .fniv = gen_shrnt_vec, 6729 .opt_opc = shrnt_vec_list, 6730 .load_dest = true, 6731 .fno = gen_helper_sve2_shrnt_d, 6732 .vece = MO_64 }, 6733 }; 6734 TRANS_FEAT(SHRNT, aa64_sve2, do_shr_narrow, a, shrnt_ops) 6735 6736 static const GVecGen2i rshrnb_ops[3] = { 6737 { .fno = gen_helper_sve2_rshrnb_h }, 6738 { .fno = gen_helper_sve2_rshrnb_s }, 6739 { .fno = gen_helper_sve2_rshrnb_d }, 6740 }; 6741 TRANS_FEAT(RSHRNB, aa64_sve2, do_shr_narrow, a, rshrnb_ops) 6742 6743 static const GVecGen2i rshrnt_ops[3] = { 6744 { .fno = gen_helper_sve2_rshrnt_h }, 6745 { .fno = gen_helper_sve2_rshrnt_s }, 6746 { .fno = gen_helper_sve2_rshrnt_d }, 6747 }; 6748 TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops) 6749 6750 static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d, 6751 TCGv_vec n, int64_t shr) 6752 { 6753 TCGv_vec t = tcg_temp_new_vec_matching(d); 6754 int halfbits = 4 << vece; 6755 6756 tcg_gen_sari_vec(vece, n, n, shr); 6757 tcg_gen_dupi_vec(vece, t, 0); 6758 tcg_gen_smax_vec(vece, n, n, t); 6759 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6760 tcg_gen_umin_vec(vece, d, n, t); 6761 } 6762 6763 static const TCGOpcode sqshrunb_vec_list[] = { 6764 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0 6765 }; 6766 static const GVecGen2i sqshrunb_ops[3] = { 6767 { .fniv = gen_sqshrunb_vec, 6768 .opt_opc = sqshrunb_vec_list, 6769 .fno = gen_helper_sve2_sqshrunb_h, 6770 .vece = MO_16 }, 6771 { .fniv = gen_sqshrunb_vec, 6772 .opt_opc = sqshrunb_vec_list, 6773 .fno = gen_helper_sve2_sqshrunb_s, 6774 .vece = MO_32 }, 6775 { .fniv = gen_sqshrunb_vec, 6776 .opt_opc = sqshrunb_vec_list, 6777 .fno = gen_helper_sve2_sqshrunb_d, 6778 .vece = MO_64 }, 6779 }; 6780 TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops) 6781 6782 static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d, 6783 TCGv_vec n, int64_t shr) 6784 { 6785 TCGv_vec t = tcg_temp_new_vec_matching(d); 6786 int halfbits = 4 << vece; 6787 6788 tcg_gen_sari_vec(vece, n, n, shr); 6789 tcg_gen_dupi_vec(vece, t, 0); 6790 tcg_gen_smax_vec(vece, n, n, t); 6791 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6792 tcg_gen_umin_vec(vece, n, n, t); 6793 tcg_gen_shli_vec(vece, n, n, halfbits); 6794 tcg_gen_bitsel_vec(vece, d, t, d, n); 6795 } 6796 6797 static const TCGOpcode sqshrunt_vec_list[] = { 6798 INDEX_op_shli_vec, INDEX_op_sari_vec, 6799 INDEX_op_smax_vec, INDEX_op_umin_vec, 0 6800 }; 6801 static const GVecGen2i sqshrunt_ops[3] = { 6802 { .fniv = gen_sqshrunt_vec, 6803 .opt_opc = sqshrunt_vec_list, 6804 .load_dest = true, 6805 .fno = gen_helper_sve2_sqshrunt_h, 6806 .vece = MO_16 }, 6807 { .fniv = gen_sqshrunt_vec, 6808 .opt_opc = sqshrunt_vec_list, 6809 .load_dest = true, 6810 .fno = gen_helper_sve2_sqshrunt_s, 6811 .vece = MO_32 }, 6812 { .fniv = gen_sqshrunt_vec, 6813 .opt_opc = sqshrunt_vec_list, 6814 .load_dest = true, 6815 .fno = gen_helper_sve2_sqshrunt_d, 6816 .vece = MO_64 }, 6817 }; 6818 TRANS_FEAT(SQSHRUNT, aa64_sve2, do_shr_narrow, a, sqshrunt_ops) 6819 6820 static const GVecGen2i sqrshrunb_ops[3] = { 6821 { .fno = gen_helper_sve2_sqrshrunb_h }, 6822 { .fno = gen_helper_sve2_sqrshrunb_s }, 6823 { .fno = gen_helper_sve2_sqrshrunb_d }, 6824 }; 6825 TRANS_FEAT(SQRSHRUNB, aa64_sve2, do_shr_narrow, a, sqrshrunb_ops) 6826 6827 static const GVecGen2i sqrshrunt_ops[3] = { 6828 { .fno = gen_helper_sve2_sqrshrunt_h }, 6829 { .fno = gen_helper_sve2_sqrshrunt_s }, 6830 { .fno = gen_helper_sve2_sqrshrunt_d }, 6831 }; 6832 TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops) 6833 6834 static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d, 6835 TCGv_vec n, int64_t shr) 6836 { 6837 TCGv_vec t = tcg_temp_new_vec_matching(d); 6838 int halfbits = 4 << vece; 6839 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 6840 int64_t min = -max - 1; 6841 6842 tcg_gen_sari_vec(vece, n, n, shr); 6843 tcg_gen_dupi_vec(vece, t, min); 6844 tcg_gen_smax_vec(vece, n, n, t); 6845 tcg_gen_dupi_vec(vece, t, max); 6846 tcg_gen_smin_vec(vece, n, n, t); 6847 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6848 tcg_gen_and_vec(vece, d, n, t); 6849 } 6850 6851 static const TCGOpcode sqshrnb_vec_list[] = { 6852 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0 6853 }; 6854 static const GVecGen2i sqshrnb_ops[3] = { 6855 { .fniv = gen_sqshrnb_vec, 6856 .opt_opc = sqshrnb_vec_list, 6857 .fno = gen_helper_sve2_sqshrnb_h, 6858 .vece = MO_16 }, 6859 { .fniv = gen_sqshrnb_vec, 6860 .opt_opc = sqshrnb_vec_list, 6861 .fno = gen_helper_sve2_sqshrnb_s, 6862 .vece = MO_32 }, 6863 { .fniv = gen_sqshrnb_vec, 6864 .opt_opc = sqshrnb_vec_list, 6865 .fno = gen_helper_sve2_sqshrnb_d, 6866 .vece = MO_64 }, 6867 }; 6868 TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops) 6869 6870 static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d, 6871 TCGv_vec n, int64_t shr) 6872 { 6873 TCGv_vec t = tcg_temp_new_vec_matching(d); 6874 int halfbits = 4 << vece; 6875 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 6876 int64_t min = -max - 1; 6877 6878 tcg_gen_sari_vec(vece, n, n, shr); 6879 tcg_gen_dupi_vec(vece, t, min); 6880 tcg_gen_smax_vec(vece, n, n, t); 6881 tcg_gen_dupi_vec(vece, t, max); 6882 tcg_gen_smin_vec(vece, n, n, t); 6883 tcg_gen_shli_vec(vece, n, n, halfbits); 6884 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6885 tcg_gen_bitsel_vec(vece, d, t, d, n); 6886 } 6887 6888 static const TCGOpcode sqshrnt_vec_list[] = { 6889 INDEX_op_shli_vec, INDEX_op_sari_vec, 6890 INDEX_op_smax_vec, INDEX_op_smin_vec, 0 6891 }; 6892 static const GVecGen2i sqshrnt_ops[3] = { 6893 { .fniv = gen_sqshrnt_vec, 6894 .opt_opc = sqshrnt_vec_list, 6895 .load_dest = true, 6896 .fno = gen_helper_sve2_sqshrnt_h, 6897 .vece = MO_16 }, 6898 { .fniv = gen_sqshrnt_vec, 6899 .opt_opc = sqshrnt_vec_list, 6900 .load_dest = true, 6901 .fno = gen_helper_sve2_sqshrnt_s, 6902 .vece = MO_32 }, 6903 { .fniv = gen_sqshrnt_vec, 6904 .opt_opc = sqshrnt_vec_list, 6905 .load_dest = true, 6906 .fno = gen_helper_sve2_sqshrnt_d, 6907 .vece = MO_64 }, 6908 }; 6909 TRANS_FEAT(SQSHRNT, aa64_sve2, do_shr_narrow, a, sqshrnt_ops) 6910 6911 static const GVecGen2i sqrshrnb_ops[3] = { 6912 { .fno = gen_helper_sve2_sqrshrnb_h }, 6913 { .fno = gen_helper_sve2_sqrshrnb_s }, 6914 { .fno = gen_helper_sve2_sqrshrnb_d }, 6915 }; 6916 TRANS_FEAT(SQRSHRNB, aa64_sve2, do_shr_narrow, a, sqrshrnb_ops) 6917 6918 static const GVecGen2i sqrshrnt_ops[3] = { 6919 { .fno = gen_helper_sve2_sqrshrnt_h }, 6920 { .fno = gen_helper_sve2_sqrshrnt_s }, 6921 { .fno = gen_helper_sve2_sqrshrnt_d }, 6922 }; 6923 TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops) 6924 6925 static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d, 6926 TCGv_vec n, int64_t shr) 6927 { 6928 TCGv_vec t = tcg_temp_new_vec_matching(d); 6929 int halfbits = 4 << vece; 6930 6931 tcg_gen_shri_vec(vece, n, n, shr); 6932 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6933 tcg_gen_umin_vec(vece, d, n, t); 6934 } 6935 6936 static const TCGOpcode uqshrnb_vec_list[] = { 6937 INDEX_op_shri_vec, INDEX_op_umin_vec, 0 6938 }; 6939 static const GVecGen2i uqshrnb_ops[3] = { 6940 { .fniv = gen_uqshrnb_vec, 6941 .opt_opc = uqshrnb_vec_list, 6942 .fno = gen_helper_sve2_uqshrnb_h, 6943 .vece = MO_16 }, 6944 { .fniv = gen_uqshrnb_vec, 6945 .opt_opc = uqshrnb_vec_list, 6946 .fno = gen_helper_sve2_uqshrnb_s, 6947 .vece = MO_32 }, 6948 { .fniv = gen_uqshrnb_vec, 6949 .opt_opc = uqshrnb_vec_list, 6950 .fno = gen_helper_sve2_uqshrnb_d, 6951 .vece = MO_64 }, 6952 }; 6953 TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops) 6954 6955 static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d, 6956 TCGv_vec n, int64_t shr) 6957 { 6958 TCGv_vec t = tcg_temp_new_vec_matching(d); 6959 int halfbits = 4 << vece; 6960 6961 tcg_gen_shri_vec(vece, n, n, shr); 6962 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6963 tcg_gen_umin_vec(vece, n, n, t); 6964 tcg_gen_shli_vec(vece, n, n, halfbits); 6965 tcg_gen_bitsel_vec(vece, d, t, d, n); 6966 } 6967 6968 static const TCGOpcode uqshrnt_vec_list[] = { 6969 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0 6970 }; 6971 static const GVecGen2i uqshrnt_ops[3] = { 6972 { .fniv = gen_uqshrnt_vec, 6973 .opt_opc = uqshrnt_vec_list, 6974 .load_dest = true, 6975 .fno = gen_helper_sve2_uqshrnt_h, 6976 .vece = MO_16 }, 6977 { .fniv = gen_uqshrnt_vec, 6978 .opt_opc = uqshrnt_vec_list, 6979 .load_dest = true, 6980 .fno = gen_helper_sve2_uqshrnt_s, 6981 .vece = MO_32 }, 6982 { .fniv = gen_uqshrnt_vec, 6983 .opt_opc = uqshrnt_vec_list, 6984 .load_dest = true, 6985 .fno = gen_helper_sve2_uqshrnt_d, 6986 .vece = MO_64 }, 6987 }; 6988 TRANS_FEAT(UQSHRNT, aa64_sve2, do_shr_narrow, a, uqshrnt_ops) 6989 6990 static const GVecGen2i uqrshrnb_ops[3] = { 6991 { .fno = gen_helper_sve2_uqrshrnb_h }, 6992 { .fno = gen_helper_sve2_uqrshrnb_s }, 6993 { .fno = gen_helper_sve2_uqrshrnb_d }, 6994 }; 6995 TRANS_FEAT(UQRSHRNB, aa64_sve2, do_shr_narrow, a, uqrshrnb_ops) 6996 6997 static const GVecGen2i uqrshrnt_ops[3] = { 6998 { .fno = gen_helper_sve2_uqrshrnt_h }, 6999 { .fno = gen_helper_sve2_uqrshrnt_s }, 7000 { .fno = gen_helper_sve2_uqrshrnt_d }, 7001 }; 7002 TRANS_FEAT(UQRSHRNT, aa64_sve2, do_shr_narrow, a, uqrshrnt_ops) 7003 7004 #define DO_SVE2_ZZZ_NARROW(NAME, name) \ 7005 static gen_helper_gvec_3 * const name##_fns[4] = { \ 7006 NULL, gen_helper_sve2_##name##_h, \ 7007 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \ 7008 }; \ 7009 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \ 7010 name##_fns[a->esz], a, 0) 7011 7012 DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb) 7013 DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt) 7014 DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb) 7015 DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt) 7016 7017 DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb) 7018 DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt) 7019 DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb) 7020 DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt) 7021 7022 static gen_helper_gvec_flags_4 * const match_fns[4] = { 7023 gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL 7024 }; 7025 TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz]) 7026 7027 static gen_helper_gvec_flags_4 * const nmatch_fns[4] = { 7028 gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL 7029 }; 7030 TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz]) 7031 7032 static gen_helper_gvec_4 * const histcnt_fns[4] = { 7033 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d 7034 }; 7035 TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz, 7036 histcnt_fns[a->esz], a, 0) 7037 7038 TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz, 7039 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0) 7040 7041 DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz) 7042 DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz) 7043 DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz) 7044 DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz) 7045 DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz) 7046 7047 /* 7048 * SVE Integer Multiply-Add (unpredicated) 7049 */ 7050 7051 TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, 7052 gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra, 7053 0, FPST_FPCR) 7054 TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, 7055 gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra, 7056 0, FPST_FPCR) 7057 7058 static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = { 7059 NULL, gen_helper_sve2_sqdmlal_zzzw_h, 7060 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d, 7061 }; 7062 TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7063 sqdmlal_zzzw_fns[a->esz], a, 0) 7064 TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7065 sqdmlal_zzzw_fns[a->esz], a, 3) 7066 TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 7067 sqdmlal_zzzw_fns[a->esz], a, 2) 7068 7069 static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = { 7070 NULL, gen_helper_sve2_sqdmlsl_zzzw_h, 7071 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d, 7072 }; 7073 TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7074 sqdmlsl_zzzw_fns[a->esz], a, 0) 7075 TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7076 sqdmlsl_zzzw_fns[a->esz], a, 3) 7077 TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 7078 sqdmlsl_zzzw_fns[a->esz], a, 2) 7079 7080 static gen_helper_gvec_4 * const sqrdmlah_fns[] = { 7081 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h, 7082 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d, 7083 }; 7084 TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 7085 sqrdmlah_fns[a->esz], a, 0) 7086 7087 static gen_helper_gvec_4 * const sqrdmlsh_fns[] = { 7088 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h, 7089 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d, 7090 }; 7091 TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 7092 sqrdmlsh_fns[a->esz], a, 0) 7093 7094 static gen_helper_gvec_4 * const smlal_zzzw_fns[] = { 7095 NULL, gen_helper_sve2_smlal_zzzw_h, 7096 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d, 7097 }; 7098 TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7099 smlal_zzzw_fns[a->esz], a, 0) 7100 TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7101 smlal_zzzw_fns[a->esz], a, 1) 7102 7103 static gen_helper_gvec_4 * const umlal_zzzw_fns[] = { 7104 NULL, gen_helper_sve2_umlal_zzzw_h, 7105 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d, 7106 }; 7107 TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7108 umlal_zzzw_fns[a->esz], a, 0) 7109 TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7110 umlal_zzzw_fns[a->esz], a, 1) 7111 7112 static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = { 7113 NULL, gen_helper_sve2_smlsl_zzzw_h, 7114 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d, 7115 }; 7116 TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7117 smlsl_zzzw_fns[a->esz], a, 0) 7118 TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7119 smlsl_zzzw_fns[a->esz], a, 1) 7120 7121 static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = { 7122 NULL, gen_helper_sve2_umlsl_zzzw_h, 7123 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d, 7124 }; 7125 TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7126 umlsl_zzzw_fns[a->esz], a, 0) 7127 TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7128 umlsl_zzzw_fns[a->esz], a, 1) 7129 7130 static gen_helper_gvec_4 * const cmla_fns[] = { 7131 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h, 7132 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d, 7133 }; 7134 TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7135 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7136 7137 static gen_helper_gvec_4 * const cdot_fns[] = { 7138 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d 7139 }; 7140 TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7141 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7142 7143 static gen_helper_gvec_4 * const sqrdcmlah_fns[] = { 7144 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h, 7145 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d, 7146 }; 7147 TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7148 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7149 7150 TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7151 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0) 7152 7153 TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz, 7154 gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt) 7155 7156 TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7157 gen_helper_crypto_aese, a, false) 7158 TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7159 gen_helper_crypto_aese, a, true) 7160 7161 TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7162 gen_helper_crypto_sm4e, a, 0) 7163 TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7164 gen_helper_crypto_sm4ekey, a, 0) 7165 7166 TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, 7167 gen_gvec_rax1, a) 7168 7169 TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz, 7170 gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR) 7171 TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, 7172 gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR) 7173 7174 TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 7175 gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR) 7176 7177 TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, 7178 gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR) 7179 TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz, 7180 gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR) 7181 7182 TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a, 7183 FPROUNDING_ODD, gen_helper_sve_fcvt_ds) 7184 TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a, 7185 FPROUNDING_ODD, gen_helper_sve2_fcvtnt_ds) 7186 7187 static gen_helper_gvec_3_ptr * const flogb_fns[] = { 7188 NULL, gen_helper_flogb_h, 7189 gen_helper_flogb_s, gen_helper_flogb_d 7190 }; 7191 TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz], 7192 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 7193 7194 static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel) 7195 { 7196 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s, 7197 a->rd, a->rn, a->rm, a->ra, 7198 (sel << 1) | sub, cpu_env); 7199 } 7200 7201 TRANS_FEAT(FMLALB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, false) 7202 TRANS_FEAT(FMLALT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, true) 7203 TRANS_FEAT(FMLSLB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, false) 7204 TRANS_FEAT(FMLSLT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, true) 7205 7206 static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel) 7207 { 7208 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s, 7209 a->rd, a->rn, a->rm, a->ra, 7210 (a->index << 2) | (sel << 1) | sub, cpu_env); 7211 } 7212 7213 TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false) 7214 TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true) 7215 TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false) 7216 TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true) 7217 7218 TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7219 gen_helper_gvec_smmla_b, a, 0) 7220 TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7221 gen_helper_gvec_usmmla_b, a, 0) 7222 TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7223 gen_helper_gvec_ummla_b, a, 0) 7224 7225 TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, 7226 gen_helper_gvec_bfdot, a, 0) 7227 TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz, 7228 gen_helper_gvec_bfdot_idx, a) 7229 7230 TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, 7231 gen_helper_gvec_bfmmla, a, 0) 7232 7233 static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) 7234 { 7235 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, 7236 a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR); 7237 } 7238 7239 TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) 7240 TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true) 7241 7242 static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) 7243 { 7244 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, 7245 a->rd, a->rn, a->rm, a->ra, 7246 (a->index << 1) | sel, FPST_FPCR); 7247 } 7248 7249 TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) 7250 TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true) 7251 7252 static bool trans_PSEL(DisasContext *s, arg_psel *a) 7253 { 7254 int vl = vec_full_reg_size(s); 7255 int pl = pred_gvec_reg_size(s); 7256 int elements = vl >> a->esz; 7257 TCGv_i64 tmp, didx, dbit; 7258 TCGv_ptr ptr; 7259 7260 if (!dc_isar_feature(aa64_sme, s)) { 7261 return false; 7262 } 7263 if (!sve_access_check(s)) { 7264 return true; 7265 } 7266 7267 tmp = tcg_temp_new_i64(); 7268 dbit = tcg_temp_new_i64(); 7269 didx = tcg_temp_new_i64(); 7270 ptr = tcg_temp_new_ptr(); 7271 7272 /* Compute the predicate element. */ 7273 tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm); 7274 if (is_power_of_2(elements)) { 7275 tcg_gen_andi_i64(tmp, tmp, elements - 1); 7276 } else { 7277 tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements)); 7278 } 7279 7280 /* Extract the predicate byte and bit indices. */ 7281 tcg_gen_shli_i64(tmp, tmp, a->esz); 7282 tcg_gen_andi_i64(dbit, tmp, 7); 7283 tcg_gen_shri_i64(didx, tmp, 3); 7284 if (HOST_BIG_ENDIAN) { 7285 tcg_gen_xori_i64(didx, didx, 7); 7286 } 7287 7288 /* Load the predicate word. */ 7289 tcg_gen_trunc_i64_ptr(ptr, didx); 7290 tcg_gen_add_ptr(ptr, ptr, cpu_env); 7291 tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm)); 7292 7293 /* Extract the predicate bit and replicate to MO_64. */ 7294 tcg_gen_shr_i64(tmp, tmp, dbit); 7295 tcg_gen_andi_i64(tmp, tmp, 1); 7296 tcg_gen_neg_i64(tmp, tmp); 7297 7298 /* Apply to either copy the source, or write zeros. */ 7299 tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd), 7300 pred_full_reg_offset(s, a->pn), tmp, pl, pl); 7301 return true; 7302 } 7303 7304 static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7305 { 7306 tcg_gen_smax_i32(d, a, n); 7307 tcg_gen_smin_i32(d, d, m); 7308 } 7309 7310 static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7311 { 7312 tcg_gen_smax_i64(d, a, n); 7313 tcg_gen_smin_i64(d, d, m); 7314 } 7315 7316 static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7317 TCGv_vec m, TCGv_vec a) 7318 { 7319 tcg_gen_smax_vec(vece, d, a, n); 7320 tcg_gen_smin_vec(vece, d, d, m); 7321 } 7322 7323 static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 7324 uint32_t a, uint32_t oprsz, uint32_t maxsz) 7325 { 7326 static const TCGOpcode vecop[] = { 7327 INDEX_op_smin_vec, INDEX_op_smax_vec, 0 7328 }; 7329 static const GVecGen4 ops[4] = { 7330 { .fniv = gen_sclamp_vec, 7331 .fno = gen_helper_gvec_sclamp_b, 7332 .opt_opc = vecop, 7333 .vece = MO_8 }, 7334 { .fniv = gen_sclamp_vec, 7335 .fno = gen_helper_gvec_sclamp_h, 7336 .opt_opc = vecop, 7337 .vece = MO_16 }, 7338 { .fni4 = gen_sclamp_i32, 7339 .fniv = gen_sclamp_vec, 7340 .fno = gen_helper_gvec_sclamp_s, 7341 .opt_opc = vecop, 7342 .vece = MO_32 }, 7343 { .fni8 = gen_sclamp_i64, 7344 .fniv = gen_sclamp_vec, 7345 .fno = gen_helper_gvec_sclamp_d, 7346 .opt_opc = vecop, 7347 .vece = MO_64, 7348 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 7349 }; 7350 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 7351 } 7352 7353 TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a) 7354 7355 static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7356 { 7357 tcg_gen_umax_i32(d, a, n); 7358 tcg_gen_umin_i32(d, d, m); 7359 } 7360 7361 static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7362 { 7363 tcg_gen_umax_i64(d, a, n); 7364 tcg_gen_umin_i64(d, d, m); 7365 } 7366 7367 static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7368 TCGv_vec m, TCGv_vec a) 7369 { 7370 tcg_gen_umax_vec(vece, d, a, n); 7371 tcg_gen_umin_vec(vece, d, d, m); 7372 } 7373 7374 static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 7375 uint32_t a, uint32_t oprsz, uint32_t maxsz) 7376 { 7377 static const TCGOpcode vecop[] = { 7378 INDEX_op_umin_vec, INDEX_op_umax_vec, 0 7379 }; 7380 static const GVecGen4 ops[4] = { 7381 { .fniv = gen_uclamp_vec, 7382 .fno = gen_helper_gvec_uclamp_b, 7383 .opt_opc = vecop, 7384 .vece = MO_8 }, 7385 { .fniv = gen_uclamp_vec, 7386 .fno = gen_helper_gvec_uclamp_h, 7387 .opt_opc = vecop, 7388 .vece = MO_16 }, 7389 { .fni4 = gen_uclamp_i32, 7390 .fniv = gen_uclamp_vec, 7391 .fno = gen_helper_gvec_uclamp_s, 7392 .opt_opc = vecop, 7393 .vece = MO_32 }, 7394 { .fni8 = gen_uclamp_i64, 7395 .fniv = gen_uclamp_vec, 7396 .fno = gen_helper_gvec_uclamp_d, 7397 .opt_opc = vecop, 7398 .vece = MO_64, 7399 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 7400 }; 7401 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 7402 } 7403 7404 TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a) 7405