1 /* 2 * AArch64 SVE translation 3 * 4 * Copyright (c) 2018 Linaro, Ltd 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "exec/exec-all.h" 23 #include "tcg/tcg-op.h" 24 #include "tcg/tcg-op-gvec.h" 25 #include "tcg/tcg-gvec-desc.h" 26 #include "qemu/log.h" 27 #include "arm_ldst.h" 28 #include "translate.h" 29 #include "internals.h" 30 #include "exec/helper-proto.h" 31 #include "exec/helper-gen.h" 32 #include "exec/log.h" 33 #include "translate-a64.h" 34 #include "fpu/softfloat.h" 35 36 37 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, 38 TCGv_i64, uint32_t, uint32_t); 39 40 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr, 41 TCGv_ptr, TCGv_i32); 42 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr, 43 TCGv_ptr, TCGv_ptr, TCGv_i32); 44 45 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32); 46 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr, 47 TCGv_ptr, TCGv_i64, TCGv_i32); 48 49 /* 50 * Helpers for extracting complex instruction fields. 51 */ 52 53 /* See e.g. ASR (immediate, predicated). 54 * Returns -1 for unallocated encoding; diagnose later. 55 */ 56 static int tszimm_esz(DisasContext *s, int x) 57 { 58 x >>= 3; /* discard imm3 */ 59 return 31 - clz32(x); 60 } 61 62 static int tszimm_shr(DisasContext *s, int x) 63 { 64 return (16 << tszimm_esz(s, x)) - x; 65 } 66 67 /* See e.g. LSL (immediate, predicated). */ 68 static int tszimm_shl(DisasContext *s, int x) 69 { 70 return x - (8 << tszimm_esz(s, x)); 71 } 72 73 /* The SH bit is in bit 8. Extract the low 8 and shift. */ 74 static inline int expand_imm_sh8s(DisasContext *s, int x) 75 { 76 return (int8_t)x << (x & 0x100 ? 8 : 0); 77 } 78 79 static inline int expand_imm_sh8u(DisasContext *s, int x) 80 { 81 return (uint8_t)x << (x & 0x100 ? 8 : 0); 82 } 83 84 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype) 85 * with unsigned data. C.f. SVE Memory Contiguous Load Group. 86 */ 87 static inline int msz_dtype(DisasContext *s, int msz) 88 { 89 static const uint8_t dtype[4] = { 0, 5, 10, 15 }; 90 return dtype[msz]; 91 } 92 93 /* 94 * Include the generated decoder. 95 */ 96 97 #include "decode-sve.c.inc" 98 99 /* 100 * Implement all of the translator functions referenced by the decoder. 101 */ 102 103 /* Invoke an out-of-line helper on 2 Zregs. */ 104 static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn, 105 int rd, int rn, int data) 106 { 107 if (fn == NULL) { 108 return false; 109 } 110 if (sve_access_check(s)) { 111 unsigned vsz = vec_full_reg_size(s); 112 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 113 vec_full_reg_offset(s, rn), 114 vsz, vsz, data, fn); 115 } 116 return true; 117 } 118 119 static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 120 int rd, int rn, int data, 121 ARMFPStatusFlavour flavour) 122 { 123 if (fn == NULL) { 124 return false; 125 } 126 if (sve_access_check(s)) { 127 unsigned vsz = vec_full_reg_size(s); 128 TCGv_ptr status = fpstatus_ptr(flavour); 129 130 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 131 vec_full_reg_offset(s, rn), 132 status, vsz, vsz, data, fn); 133 tcg_temp_free_ptr(status); 134 } 135 return true; 136 } 137 138 static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 139 arg_rr_esz *a, int data) 140 { 141 return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data, 142 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 143 } 144 145 /* Invoke an out-of-line helper on 3 Zregs. */ 146 static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 147 int rd, int rn, int rm, int data) 148 { 149 if (fn == NULL) { 150 return false; 151 } 152 if (sve_access_check(s)) { 153 unsigned vsz = vec_full_reg_size(s); 154 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 155 vec_full_reg_offset(s, rn), 156 vec_full_reg_offset(s, rm), 157 vsz, vsz, data, fn); 158 } 159 return true; 160 } 161 162 static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 163 arg_rrr_esz *a, int data) 164 { 165 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data); 166 } 167 168 /* Invoke an out-of-line helper on 3 Zregs, plus float_status. */ 169 static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 170 int rd, int rn, int rm, 171 int data, ARMFPStatusFlavour flavour) 172 { 173 if (fn == NULL) { 174 return false; 175 } 176 if (sve_access_check(s)) { 177 unsigned vsz = vec_full_reg_size(s); 178 TCGv_ptr status = fpstatus_ptr(flavour); 179 180 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 181 vec_full_reg_offset(s, rn), 182 vec_full_reg_offset(s, rm), 183 status, vsz, vsz, data, fn); 184 185 tcg_temp_free_ptr(status); 186 } 187 return true; 188 } 189 190 static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 191 arg_rrr_esz *a, int data) 192 { 193 return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, 194 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 195 } 196 197 /* Invoke an out-of-line helper on 4 Zregs. */ 198 static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 199 int rd, int rn, int rm, int ra, int data) 200 { 201 if (fn == NULL) { 202 return false; 203 } 204 if (sve_access_check(s)) { 205 unsigned vsz = vec_full_reg_size(s); 206 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 207 vec_full_reg_offset(s, rn), 208 vec_full_reg_offset(s, rm), 209 vec_full_reg_offset(s, ra), 210 vsz, vsz, data, fn); 211 } 212 return true; 213 } 214 215 static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 216 arg_rrrr_esz *a, int data) 217 { 218 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data); 219 } 220 221 static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn, 222 arg_rrxr_esz *a) 223 { 224 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index); 225 } 226 227 /* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */ 228 static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 229 int rd, int rn, int rm, int ra, 230 int data, TCGv_ptr ptr) 231 { 232 if (fn == NULL) { 233 return false; 234 } 235 if (sve_access_check(s)) { 236 unsigned vsz = vec_full_reg_size(s); 237 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 238 vec_full_reg_offset(s, rn), 239 vec_full_reg_offset(s, rm), 240 vec_full_reg_offset(s, ra), 241 ptr, vsz, vsz, data, fn); 242 } 243 return true; 244 } 245 246 static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 247 int rd, int rn, int rm, int ra, 248 int data, ARMFPStatusFlavour flavour) 249 { 250 TCGv_ptr status = fpstatus_ptr(flavour); 251 bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status); 252 tcg_temp_free_ptr(status); 253 return ret; 254 } 255 256 /* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */ 257 static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn, 258 int rd, int rn, int rm, int ra, int pg, 259 int data, ARMFPStatusFlavour flavour) 260 { 261 if (fn == NULL) { 262 return false; 263 } 264 if (sve_access_check(s)) { 265 unsigned vsz = vec_full_reg_size(s); 266 TCGv_ptr status = fpstatus_ptr(flavour); 267 268 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd), 269 vec_full_reg_offset(s, rn), 270 vec_full_reg_offset(s, rm), 271 vec_full_reg_offset(s, ra), 272 pred_full_reg_offset(s, pg), 273 status, vsz, vsz, data, fn); 274 275 tcg_temp_free_ptr(status); 276 } 277 return true; 278 } 279 280 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */ 281 static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn, 282 int rd, int rn, int pg, int data) 283 { 284 if (fn == NULL) { 285 return false; 286 } 287 if (sve_access_check(s)) { 288 unsigned vsz = vec_full_reg_size(s); 289 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 290 vec_full_reg_offset(s, rn), 291 pred_full_reg_offset(s, pg), 292 vsz, vsz, data, fn); 293 } 294 return true; 295 } 296 297 static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn, 298 arg_rpr_esz *a, int data) 299 { 300 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data); 301 } 302 303 static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn, 304 arg_rpri_esz *a) 305 { 306 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm); 307 } 308 309 static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn, 310 int rd, int rn, int pg, int data, 311 ARMFPStatusFlavour flavour) 312 { 313 if (fn == NULL) { 314 return false; 315 } 316 if (sve_access_check(s)) { 317 unsigned vsz = vec_full_reg_size(s); 318 TCGv_ptr status = fpstatus_ptr(flavour); 319 320 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 321 vec_full_reg_offset(s, rn), 322 pred_full_reg_offset(s, pg), 323 status, vsz, vsz, data, fn); 324 tcg_temp_free_ptr(status); 325 } 326 return true; 327 } 328 329 static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 330 arg_rpr_esz *a, int data, 331 ARMFPStatusFlavour flavour) 332 { 333 return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour); 334 } 335 336 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 337 static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn, 338 int rd, int rn, int rm, int pg, int data) 339 { 340 if (fn == NULL) { 341 return false; 342 } 343 if (sve_access_check(s)) { 344 unsigned vsz = vec_full_reg_size(s); 345 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 346 vec_full_reg_offset(s, rn), 347 vec_full_reg_offset(s, rm), 348 pred_full_reg_offset(s, pg), 349 vsz, vsz, data, fn); 350 } 351 return true; 352 } 353 354 static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn, 355 arg_rprr_esz *a, int data) 356 { 357 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data); 358 } 359 360 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 361 static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn, 362 int rd, int rn, int rm, int pg, int data, 363 ARMFPStatusFlavour flavour) 364 { 365 if (fn == NULL) { 366 return false; 367 } 368 if (sve_access_check(s)) { 369 unsigned vsz = vec_full_reg_size(s); 370 TCGv_ptr status = fpstatus_ptr(flavour); 371 372 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 373 vec_full_reg_offset(s, rn), 374 vec_full_reg_offset(s, rm), 375 pred_full_reg_offset(s, pg), 376 status, vsz, vsz, data, fn); 377 tcg_temp_free_ptr(status); 378 } 379 return true; 380 } 381 382 static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 383 arg_rprr_esz *a) 384 { 385 return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0, 386 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 387 } 388 389 /* Invoke a vector expander on two Zregs and an immediate. */ 390 static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 391 int esz, int rd, int rn, uint64_t imm) 392 { 393 if (gvec_fn == NULL) { 394 return false; 395 } 396 if (sve_access_check(s)) { 397 unsigned vsz = vec_full_reg_size(s); 398 gvec_fn(esz, vec_full_reg_offset(s, rd), 399 vec_full_reg_offset(s, rn), imm, vsz, vsz); 400 } 401 return true; 402 } 403 404 static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 405 arg_rri_esz *a) 406 { 407 if (a->esz < 0) { 408 /* Invalid tsz encoding -- see tszimm_esz. */ 409 return false; 410 } 411 return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm); 412 } 413 414 /* Invoke a vector expander on three Zregs. */ 415 static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn, 416 int esz, int rd, int rn, int rm) 417 { 418 if (gvec_fn == NULL) { 419 return false; 420 } 421 if (sve_access_check(s)) { 422 unsigned vsz = vec_full_reg_size(s); 423 gvec_fn(esz, vec_full_reg_offset(s, rd), 424 vec_full_reg_offset(s, rn), 425 vec_full_reg_offset(s, rm), vsz, vsz); 426 } 427 return true; 428 } 429 430 static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn, 431 arg_rrr_esz *a) 432 { 433 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm); 434 } 435 436 /* Invoke a vector expander on four Zregs. */ 437 static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn, 438 arg_rrrr_esz *a) 439 { 440 if (gvec_fn == NULL) { 441 return false; 442 } 443 if (sve_access_check(s)) { 444 unsigned vsz = vec_full_reg_size(s); 445 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 446 vec_full_reg_offset(s, a->rn), 447 vec_full_reg_offset(s, a->rm), 448 vec_full_reg_offset(s, a->ra), vsz, vsz); 449 } 450 return true; 451 } 452 453 /* Invoke a vector move on two Zregs. */ 454 static bool do_mov_z(DisasContext *s, int rd, int rn) 455 { 456 if (sve_access_check(s)) { 457 unsigned vsz = vec_full_reg_size(s); 458 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd), 459 vec_full_reg_offset(s, rn), vsz, vsz); 460 } 461 return true; 462 } 463 464 /* Initialize a Zreg with replications of a 64-bit immediate. */ 465 static void do_dupi_z(DisasContext *s, int rd, uint64_t word) 466 { 467 unsigned vsz = vec_full_reg_size(s); 468 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word); 469 } 470 471 /* Invoke a vector expander on three Pregs. */ 472 static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn, 473 int rd, int rn, int rm) 474 { 475 if (sve_access_check(s)) { 476 unsigned psz = pred_gvec_reg_size(s); 477 gvec_fn(MO_64, pred_full_reg_offset(s, rd), 478 pred_full_reg_offset(s, rn), 479 pred_full_reg_offset(s, rm), psz, psz); 480 } 481 return true; 482 } 483 484 /* Invoke a vector move on two Pregs. */ 485 static bool do_mov_p(DisasContext *s, int rd, int rn) 486 { 487 if (sve_access_check(s)) { 488 unsigned psz = pred_gvec_reg_size(s); 489 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd), 490 pred_full_reg_offset(s, rn), psz, psz); 491 } 492 return true; 493 } 494 495 /* Set the cpu flags as per a return from an SVE helper. */ 496 static void do_pred_flags(TCGv_i32 t) 497 { 498 tcg_gen_mov_i32(cpu_NF, t); 499 tcg_gen_andi_i32(cpu_ZF, t, 2); 500 tcg_gen_andi_i32(cpu_CF, t, 1); 501 tcg_gen_movi_i32(cpu_VF, 0); 502 } 503 504 /* Subroutines computing the ARM PredTest psuedofunction. */ 505 static void do_predtest1(TCGv_i64 d, TCGv_i64 g) 506 { 507 TCGv_i32 t = tcg_temp_new_i32(); 508 509 gen_helper_sve_predtest1(t, d, g); 510 do_pred_flags(t); 511 tcg_temp_free_i32(t); 512 } 513 514 static void do_predtest(DisasContext *s, int dofs, int gofs, int words) 515 { 516 TCGv_ptr dptr = tcg_temp_new_ptr(); 517 TCGv_ptr gptr = tcg_temp_new_ptr(); 518 TCGv_i32 t = tcg_temp_new_i32(); 519 520 tcg_gen_addi_ptr(dptr, cpu_env, dofs); 521 tcg_gen_addi_ptr(gptr, cpu_env, gofs); 522 523 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words)); 524 tcg_temp_free_ptr(dptr); 525 tcg_temp_free_ptr(gptr); 526 527 do_pred_flags(t); 528 tcg_temp_free_i32(t); 529 } 530 531 /* For each element size, the bits within a predicate word that are active. */ 532 const uint64_t pred_esz_masks[5] = { 533 0xffffffffffffffffull, 0x5555555555555555ull, 534 0x1111111111111111ull, 0x0101010101010101ull, 535 0x0001000100010001ull, 536 }; 537 538 static bool trans_INVALID(DisasContext *s, arg_INVALID *a) 539 { 540 unallocated_encoding(s); 541 return true; 542 } 543 544 /* 545 *** SVE Logical - Unpredicated Group 546 */ 547 548 TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a) 549 TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a) 550 TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a) 551 TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a) 552 553 static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 554 { 555 TCGv_i64 t = tcg_temp_new_i64(); 556 uint64_t mask = dup_const(MO_8, 0xff >> sh); 557 558 tcg_gen_xor_i64(t, n, m); 559 tcg_gen_shri_i64(d, t, sh); 560 tcg_gen_shli_i64(t, t, 8 - sh); 561 tcg_gen_andi_i64(d, d, mask); 562 tcg_gen_andi_i64(t, t, ~mask); 563 tcg_gen_or_i64(d, d, t); 564 tcg_temp_free_i64(t); 565 } 566 567 static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 568 { 569 TCGv_i64 t = tcg_temp_new_i64(); 570 uint64_t mask = dup_const(MO_16, 0xffff >> sh); 571 572 tcg_gen_xor_i64(t, n, m); 573 tcg_gen_shri_i64(d, t, sh); 574 tcg_gen_shli_i64(t, t, 16 - sh); 575 tcg_gen_andi_i64(d, d, mask); 576 tcg_gen_andi_i64(t, t, ~mask); 577 tcg_gen_or_i64(d, d, t); 578 tcg_temp_free_i64(t); 579 } 580 581 static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh) 582 { 583 tcg_gen_xor_i32(d, n, m); 584 tcg_gen_rotri_i32(d, d, sh); 585 } 586 587 static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 588 { 589 tcg_gen_xor_i64(d, n, m); 590 tcg_gen_rotri_i64(d, d, sh); 591 } 592 593 static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 594 TCGv_vec m, int64_t sh) 595 { 596 tcg_gen_xor_vec(vece, d, n, m); 597 tcg_gen_rotri_vec(vece, d, d, sh); 598 } 599 600 void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 601 uint32_t rm_ofs, int64_t shift, 602 uint32_t opr_sz, uint32_t max_sz) 603 { 604 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 }; 605 static const GVecGen3i ops[4] = { 606 { .fni8 = gen_xar8_i64, 607 .fniv = gen_xar_vec, 608 .fno = gen_helper_sve2_xar_b, 609 .opt_opc = vecop, 610 .vece = MO_8 }, 611 { .fni8 = gen_xar16_i64, 612 .fniv = gen_xar_vec, 613 .fno = gen_helper_sve2_xar_h, 614 .opt_opc = vecop, 615 .vece = MO_16 }, 616 { .fni4 = gen_xar_i32, 617 .fniv = gen_xar_vec, 618 .fno = gen_helper_sve2_xar_s, 619 .opt_opc = vecop, 620 .vece = MO_32 }, 621 { .fni8 = gen_xar_i64, 622 .fniv = gen_xar_vec, 623 .fno = gen_helper_gvec_xar_d, 624 .opt_opc = vecop, 625 .vece = MO_64 } 626 }; 627 int esize = 8 << vece; 628 629 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */ 630 tcg_debug_assert(shift >= 0); 631 tcg_debug_assert(shift <= esize); 632 shift &= esize - 1; 633 634 if (shift == 0) { 635 /* xar with no rotate devolves to xor. */ 636 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz); 637 } else { 638 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 639 shift, &ops[vece]); 640 } 641 } 642 643 static bool trans_XAR(DisasContext *s, arg_rrri_esz *a) 644 { 645 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { 646 return false; 647 } 648 if (sve_access_check(s)) { 649 unsigned vsz = vec_full_reg_size(s); 650 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd), 651 vec_full_reg_offset(s, a->rn), 652 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz); 653 } 654 return true; 655 } 656 657 static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 658 { 659 tcg_gen_xor_i64(d, n, m); 660 tcg_gen_xor_i64(d, d, k); 661 } 662 663 static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 664 TCGv_vec m, TCGv_vec k) 665 { 666 tcg_gen_xor_vec(vece, d, n, m); 667 tcg_gen_xor_vec(vece, d, d, k); 668 } 669 670 static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 671 uint32_t a, uint32_t oprsz, uint32_t maxsz) 672 { 673 static const GVecGen4 op = { 674 .fni8 = gen_eor3_i64, 675 .fniv = gen_eor3_vec, 676 .fno = gen_helper_sve2_eor3, 677 .vece = MO_64, 678 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 679 }; 680 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 681 } 682 683 TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a) 684 685 static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 686 { 687 tcg_gen_andc_i64(d, m, k); 688 tcg_gen_xor_i64(d, d, n); 689 } 690 691 static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 692 TCGv_vec m, TCGv_vec k) 693 { 694 tcg_gen_andc_vec(vece, d, m, k); 695 tcg_gen_xor_vec(vece, d, d, n); 696 } 697 698 static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 699 uint32_t a, uint32_t oprsz, uint32_t maxsz) 700 { 701 static const GVecGen4 op = { 702 .fni8 = gen_bcax_i64, 703 .fniv = gen_bcax_vec, 704 .fno = gen_helper_sve2_bcax, 705 .vece = MO_64, 706 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 707 }; 708 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 709 } 710 711 TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a) 712 713 static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 714 uint32_t a, uint32_t oprsz, uint32_t maxsz) 715 { 716 /* BSL differs from the generic bitsel in argument ordering. */ 717 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz); 718 } 719 720 TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a) 721 722 static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 723 { 724 tcg_gen_andc_i64(n, k, n); 725 tcg_gen_andc_i64(m, m, k); 726 tcg_gen_or_i64(d, n, m); 727 } 728 729 static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 730 TCGv_vec m, TCGv_vec k) 731 { 732 if (TCG_TARGET_HAS_bitsel_vec) { 733 tcg_gen_not_vec(vece, n, n); 734 tcg_gen_bitsel_vec(vece, d, k, n, m); 735 } else { 736 tcg_gen_andc_vec(vece, n, k, n); 737 tcg_gen_andc_vec(vece, m, m, k); 738 tcg_gen_or_vec(vece, d, n, m); 739 } 740 } 741 742 static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 743 uint32_t a, uint32_t oprsz, uint32_t maxsz) 744 { 745 static const GVecGen4 op = { 746 .fni8 = gen_bsl1n_i64, 747 .fniv = gen_bsl1n_vec, 748 .fno = gen_helper_sve2_bsl1n, 749 .vece = MO_64, 750 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 751 }; 752 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 753 } 754 755 TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a) 756 757 static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 758 { 759 /* 760 * Z[dn] = (n & k) | (~m & ~k) 761 * = | ~(m | k) 762 */ 763 tcg_gen_and_i64(n, n, k); 764 if (TCG_TARGET_HAS_orc_i64) { 765 tcg_gen_or_i64(m, m, k); 766 tcg_gen_orc_i64(d, n, m); 767 } else { 768 tcg_gen_nor_i64(m, m, k); 769 tcg_gen_or_i64(d, n, m); 770 } 771 } 772 773 static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 774 TCGv_vec m, TCGv_vec k) 775 { 776 if (TCG_TARGET_HAS_bitsel_vec) { 777 tcg_gen_not_vec(vece, m, m); 778 tcg_gen_bitsel_vec(vece, d, k, n, m); 779 } else { 780 tcg_gen_and_vec(vece, n, n, k); 781 tcg_gen_or_vec(vece, m, m, k); 782 tcg_gen_orc_vec(vece, d, n, m); 783 } 784 } 785 786 static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 787 uint32_t a, uint32_t oprsz, uint32_t maxsz) 788 { 789 static const GVecGen4 op = { 790 .fni8 = gen_bsl2n_i64, 791 .fniv = gen_bsl2n_vec, 792 .fno = gen_helper_sve2_bsl2n, 793 .vece = MO_64, 794 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 795 }; 796 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 797 } 798 799 TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a) 800 801 static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 802 { 803 tcg_gen_and_i64(n, n, k); 804 tcg_gen_andc_i64(m, m, k); 805 tcg_gen_nor_i64(d, n, m); 806 } 807 808 static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 809 TCGv_vec m, TCGv_vec k) 810 { 811 tcg_gen_bitsel_vec(vece, d, k, n, m); 812 tcg_gen_not_vec(vece, d, d); 813 } 814 815 static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 816 uint32_t a, uint32_t oprsz, uint32_t maxsz) 817 { 818 static const GVecGen4 op = { 819 .fni8 = gen_nbsl_i64, 820 .fniv = gen_nbsl_vec, 821 .fno = gen_helper_sve2_nbsl, 822 .vece = MO_64, 823 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 824 }; 825 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 826 } 827 828 TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a) 829 830 /* 831 *** SVE Integer Arithmetic - Unpredicated Group 832 */ 833 834 TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a) 835 TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a) 836 TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a) 837 TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a) 838 TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a) 839 TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a) 840 841 /* 842 *** SVE Integer Arithmetic - Binary Predicated Group 843 */ 844 845 /* Select active elememnts from Zn and inactive elements from Zm, 846 * storing the result in Zd. 847 */ 848 static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz) 849 { 850 static gen_helper_gvec_4 * const fns[4] = { 851 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h, 852 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d 853 }; 854 return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0); 855 } 856 857 #define DO_ZPZZ(NAME, FEAT, name) \ 858 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \ 859 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \ 860 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \ 861 }; \ 862 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \ 863 name##_zpzz_fns[a->esz], a, 0) 864 865 DO_ZPZZ(AND_zpzz, aa64_sve, sve_and) 866 DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor) 867 DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr) 868 DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic) 869 870 DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add) 871 DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub) 872 873 DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax) 874 DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax) 875 DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin) 876 DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin) 877 DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd) 878 DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd) 879 880 DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul) 881 DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh) 882 DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh) 883 884 DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr) 885 DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr) 886 DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl) 887 888 static gen_helper_gvec_4 * const sdiv_fns[4] = { 889 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d 890 }; 891 TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0) 892 893 static gen_helper_gvec_4 * const udiv_fns[4] = { 894 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d 895 }; 896 TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0) 897 898 TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz) 899 900 /* 901 *** SVE Integer Arithmetic - Unary Predicated Group 902 */ 903 904 #define DO_ZPZ(NAME, FEAT, name) \ 905 static gen_helper_gvec_3 * const name##_fns[4] = { \ 906 gen_helper_##name##_b, gen_helper_##name##_h, \ 907 gen_helper_##name##_s, gen_helper_##name##_d, \ 908 }; \ 909 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0) 910 911 DO_ZPZ(CLS, aa64_sve, sve_cls) 912 DO_ZPZ(CLZ, aa64_sve, sve_clz) 913 DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz) 914 DO_ZPZ(CNOT, aa64_sve, sve_cnot) 915 DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz) 916 DO_ZPZ(ABS, aa64_sve, sve_abs) 917 DO_ZPZ(NEG, aa64_sve, sve_neg) 918 DO_ZPZ(RBIT, aa64_sve, sve_rbit) 919 920 static gen_helper_gvec_3 * const fabs_fns[4] = { 921 NULL, gen_helper_sve_fabs_h, 922 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d, 923 }; 924 TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0) 925 926 static gen_helper_gvec_3 * const fneg_fns[4] = { 927 NULL, gen_helper_sve_fneg_h, 928 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d, 929 }; 930 TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0) 931 932 static gen_helper_gvec_3 * const sxtb_fns[4] = { 933 NULL, gen_helper_sve_sxtb_h, 934 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d, 935 }; 936 TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0) 937 938 static gen_helper_gvec_3 * const uxtb_fns[4] = { 939 NULL, gen_helper_sve_uxtb_h, 940 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d, 941 }; 942 TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0) 943 944 static gen_helper_gvec_3 * const sxth_fns[4] = { 945 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d 946 }; 947 TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0) 948 949 static gen_helper_gvec_3 * const uxth_fns[4] = { 950 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d 951 }; 952 TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0) 953 954 TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz, 955 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0) 956 TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz, 957 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0) 958 959 /* 960 *** SVE Integer Reduction Group 961 */ 962 963 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32); 964 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a, 965 gen_helper_gvec_reduc *fn) 966 { 967 unsigned vsz = vec_full_reg_size(s); 968 TCGv_ptr t_zn, t_pg; 969 TCGv_i32 desc; 970 TCGv_i64 temp; 971 972 if (fn == NULL) { 973 return false; 974 } 975 if (!sve_access_check(s)) { 976 return true; 977 } 978 979 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 980 temp = tcg_temp_new_i64(); 981 t_zn = tcg_temp_new_ptr(); 982 t_pg = tcg_temp_new_ptr(); 983 984 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); 985 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); 986 fn(temp, t_zn, t_pg, desc); 987 tcg_temp_free_ptr(t_zn); 988 tcg_temp_free_ptr(t_pg); 989 990 write_fp_dreg(s, a->rd, temp); 991 tcg_temp_free_i64(temp); 992 return true; 993 } 994 995 #define DO_VPZ(NAME, name) \ 996 static gen_helper_gvec_reduc * const name##_fns[4] = { \ 997 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \ 998 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 999 }; \ 1000 TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz]) 1001 1002 DO_VPZ(ORV, orv) 1003 DO_VPZ(ANDV, andv) 1004 DO_VPZ(EORV, eorv) 1005 1006 DO_VPZ(UADDV, uaddv) 1007 DO_VPZ(SMAXV, smaxv) 1008 DO_VPZ(UMAXV, umaxv) 1009 DO_VPZ(SMINV, sminv) 1010 DO_VPZ(UMINV, uminv) 1011 1012 static gen_helper_gvec_reduc * const saddv_fns[4] = { 1013 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h, 1014 gen_helper_sve_saddv_s, NULL 1015 }; 1016 TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz]) 1017 1018 #undef DO_VPZ 1019 1020 /* 1021 *** SVE Shift by Immediate - Predicated Group 1022 */ 1023 1024 /* 1025 * Copy Zn into Zd, storing zeros into inactive elements. 1026 * If invert, store zeros into the active elements. 1027 */ 1028 static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg, 1029 int esz, bool invert) 1030 { 1031 static gen_helper_gvec_3 * const fns[4] = { 1032 gen_helper_sve_movz_b, gen_helper_sve_movz_h, 1033 gen_helper_sve_movz_s, gen_helper_sve_movz_d, 1034 }; 1035 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert); 1036 } 1037 1038 static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr, 1039 gen_helper_gvec_3 * const fns[4]) 1040 { 1041 int max; 1042 1043 if (a->esz < 0) { 1044 /* Invalid tsz encoding -- see tszimm_esz. */ 1045 return false; 1046 } 1047 1048 /* 1049 * Shift by element size is architecturally valid. 1050 * For arithmetic right-shift, it's the same as by one less. 1051 * For logical shifts and ASRD, it is a zeroing operation. 1052 */ 1053 max = 8 << a->esz; 1054 if (a->imm >= max) { 1055 if (asr) { 1056 a->imm = max - 1; 1057 } else { 1058 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); 1059 } 1060 } 1061 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a); 1062 } 1063 1064 static gen_helper_gvec_3 * const asr_zpzi_fns[4] = { 1065 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h, 1066 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d, 1067 }; 1068 TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns) 1069 1070 static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = { 1071 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h, 1072 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d, 1073 }; 1074 TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns) 1075 1076 static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = { 1077 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h, 1078 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d, 1079 }; 1080 TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns) 1081 1082 static gen_helper_gvec_3 * const asrd_fns[4] = { 1083 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h, 1084 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d, 1085 }; 1086 TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns) 1087 1088 static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = { 1089 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h, 1090 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d, 1091 }; 1092 TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 1093 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a) 1094 1095 static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = { 1096 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h, 1097 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d, 1098 }; 1099 TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 1100 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a) 1101 1102 static gen_helper_gvec_3 * const srshr_fns[4] = { 1103 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h, 1104 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d, 1105 }; 1106 TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 1107 a->esz < 0 ? NULL : srshr_fns[a->esz], a) 1108 1109 static gen_helper_gvec_3 * const urshr_fns[4] = { 1110 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h, 1111 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d, 1112 }; 1113 TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 1114 a->esz < 0 ? NULL : urshr_fns[a->esz], a) 1115 1116 static gen_helper_gvec_3 * const sqshlu_fns[4] = { 1117 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h, 1118 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d, 1119 }; 1120 TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi, 1121 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a) 1122 1123 /* 1124 *** SVE Bitwise Shift - Predicated Group 1125 */ 1126 1127 #define DO_ZPZW(NAME, name) \ 1128 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \ 1129 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \ 1130 gen_helper_sve_##name##_zpzw_s, NULL \ 1131 }; \ 1132 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \ 1133 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0) 1134 1135 DO_ZPZW(ASR, asr) 1136 DO_ZPZW(LSR, lsr) 1137 DO_ZPZW(LSL, lsl) 1138 1139 #undef DO_ZPZW 1140 1141 /* 1142 *** SVE Bitwise Shift - Unpredicated Group 1143 */ 1144 1145 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr, 1146 void (*gvec_fn)(unsigned, uint32_t, uint32_t, 1147 int64_t, uint32_t, uint32_t)) 1148 { 1149 if (a->esz < 0) { 1150 /* Invalid tsz encoding -- see tszimm_esz. */ 1151 return false; 1152 } 1153 if (sve_access_check(s)) { 1154 unsigned vsz = vec_full_reg_size(s); 1155 /* Shift by element size is architecturally valid. For 1156 arithmetic right-shift, it's the same as by one less. 1157 Otherwise it is a zeroing operation. */ 1158 if (a->imm >= 8 << a->esz) { 1159 if (asr) { 1160 a->imm = (8 << a->esz) - 1; 1161 } else { 1162 do_dupi_z(s, a->rd, 0); 1163 return true; 1164 } 1165 } 1166 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 1167 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz); 1168 } 1169 return true; 1170 } 1171 1172 TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari) 1173 TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri) 1174 TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli) 1175 1176 #define DO_ZZW(NAME, name) \ 1177 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \ 1178 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \ 1179 gen_helper_sve_##name##_zzw_s, NULL \ 1180 }; \ 1181 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \ 1182 name##_zzw_fns[a->esz], a, 0) 1183 1184 DO_ZZW(ASR_zzw, asr) 1185 DO_ZZW(LSR_zzw, lsr) 1186 DO_ZZW(LSL_zzw, lsl) 1187 1188 #undef DO_ZZW 1189 1190 /* 1191 *** SVE Integer Multiply-Add Group 1192 */ 1193 1194 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a, 1195 gen_helper_gvec_5 *fn) 1196 { 1197 if (sve_access_check(s)) { 1198 unsigned vsz = vec_full_reg_size(s); 1199 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd), 1200 vec_full_reg_offset(s, a->ra), 1201 vec_full_reg_offset(s, a->rn), 1202 vec_full_reg_offset(s, a->rm), 1203 pred_full_reg_offset(s, a->pg), 1204 vsz, vsz, 0, fn); 1205 } 1206 return true; 1207 } 1208 1209 static gen_helper_gvec_5 * const mla_fns[4] = { 1210 gen_helper_sve_mla_b, gen_helper_sve_mla_h, 1211 gen_helper_sve_mla_s, gen_helper_sve_mla_d, 1212 }; 1213 TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz]) 1214 1215 static gen_helper_gvec_5 * const mls_fns[4] = { 1216 gen_helper_sve_mls_b, gen_helper_sve_mls_h, 1217 gen_helper_sve_mls_s, gen_helper_sve_mls_d, 1218 }; 1219 TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz]) 1220 1221 /* 1222 *** SVE Index Generation Group 1223 */ 1224 1225 static bool do_index(DisasContext *s, int esz, int rd, 1226 TCGv_i64 start, TCGv_i64 incr) 1227 { 1228 unsigned vsz; 1229 TCGv_i32 desc; 1230 TCGv_ptr t_zd; 1231 1232 if (!sve_access_check(s)) { 1233 return true; 1234 } 1235 1236 vsz = vec_full_reg_size(s); 1237 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1238 t_zd = tcg_temp_new_ptr(); 1239 1240 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd)); 1241 if (esz == 3) { 1242 gen_helper_sve_index_d(t_zd, start, incr, desc); 1243 } else { 1244 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 1245 static index_fn * const fns[3] = { 1246 gen_helper_sve_index_b, 1247 gen_helper_sve_index_h, 1248 gen_helper_sve_index_s, 1249 }; 1250 TCGv_i32 s32 = tcg_temp_new_i32(); 1251 TCGv_i32 i32 = tcg_temp_new_i32(); 1252 1253 tcg_gen_extrl_i64_i32(s32, start); 1254 tcg_gen_extrl_i64_i32(i32, incr); 1255 fns[esz](t_zd, s32, i32, desc); 1256 1257 tcg_temp_free_i32(s32); 1258 tcg_temp_free_i32(i32); 1259 } 1260 tcg_temp_free_ptr(t_zd); 1261 return true; 1262 } 1263 1264 TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd, 1265 tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2)) 1266 TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd, 1267 tcg_constant_i64(a->imm), cpu_reg(s, a->rm)) 1268 TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd, 1269 cpu_reg(s, a->rn), tcg_constant_i64(a->imm)) 1270 TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd, 1271 cpu_reg(s, a->rn), cpu_reg(s, a->rm)) 1272 1273 /* 1274 *** SVE Stack Allocation Group 1275 */ 1276 1277 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a) 1278 { 1279 if (!dc_isar_feature(aa64_sve, s)) { 1280 return false; 1281 } 1282 if (sve_access_check(s)) { 1283 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1284 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1285 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s)); 1286 } 1287 return true; 1288 } 1289 1290 static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a) 1291 { 1292 if (!dc_isar_feature(aa64_sme, s)) { 1293 return false; 1294 } 1295 if (sme_enabled_check(s)) { 1296 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1297 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1298 tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s)); 1299 } 1300 return true; 1301 } 1302 1303 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a) 1304 { 1305 if (!dc_isar_feature(aa64_sve, s)) { 1306 return false; 1307 } 1308 if (sve_access_check(s)) { 1309 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1310 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1311 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s)); 1312 } 1313 return true; 1314 } 1315 1316 static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a) 1317 { 1318 if (!dc_isar_feature(aa64_sme, s)) { 1319 return false; 1320 } 1321 if (sme_enabled_check(s)) { 1322 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1323 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1324 tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s)); 1325 } 1326 return true; 1327 } 1328 1329 static bool trans_RDVL(DisasContext *s, arg_RDVL *a) 1330 { 1331 if (!dc_isar_feature(aa64_sve, s)) { 1332 return false; 1333 } 1334 if (sve_access_check(s)) { 1335 TCGv_i64 reg = cpu_reg(s, a->rd); 1336 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s)); 1337 } 1338 return true; 1339 } 1340 1341 static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a) 1342 { 1343 if (!dc_isar_feature(aa64_sme, s)) { 1344 return false; 1345 } 1346 if (sme_enabled_check(s)) { 1347 TCGv_i64 reg = cpu_reg(s, a->rd); 1348 tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s)); 1349 } 1350 return true; 1351 } 1352 1353 /* 1354 *** SVE Compute Vector Address Group 1355 */ 1356 1357 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn) 1358 { 1359 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm); 1360 } 1361 1362 TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32) 1363 TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64) 1364 TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32) 1365 TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32) 1366 1367 /* 1368 *** SVE Integer Misc - Unpredicated Group 1369 */ 1370 1371 static gen_helper_gvec_2 * const fexpa_fns[4] = { 1372 NULL, gen_helper_sve_fexpa_h, 1373 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d, 1374 }; 1375 TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz, 1376 fexpa_fns[a->esz], a->rd, a->rn, 0) 1377 1378 static gen_helper_gvec_3 * const ftssel_fns[4] = { 1379 NULL, gen_helper_sve_ftssel_h, 1380 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d, 1381 }; 1382 TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, 1383 ftssel_fns[a->esz], a, 0) 1384 1385 /* 1386 *** SVE Predicate Logical Operations Group 1387 */ 1388 1389 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a, 1390 const GVecGen4 *gvec_op) 1391 { 1392 if (!sve_access_check(s)) { 1393 return true; 1394 } 1395 1396 unsigned psz = pred_gvec_reg_size(s); 1397 int dofs = pred_full_reg_offset(s, a->rd); 1398 int nofs = pred_full_reg_offset(s, a->rn); 1399 int mofs = pred_full_reg_offset(s, a->rm); 1400 int gofs = pred_full_reg_offset(s, a->pg); 1401 1402 if (!a->s) { 1403 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1404 return true; 1405 } 1406 1407 if (psz == 8) { 1408 /* Do the operation and the flags generation in temps. */ 1409 TCGv_i64 pd = tcg_temp_new_i64(); 1410 TCGv_i64 pn = tcg_temp_new_i64(); 1411 TCGv_i64 pm = tcg_temp_new_i64(); 1412 TCGv_i64 pg = tcg_temp_new_i64(); 1413 1414 tcg_gen_ld_i64(pn, cpu_env, nofs); 1415 tcg_gen_ld_i64(pm, cpu_env, mofs); 1416 tcg_gen_ld_i64(pg, cpu_env, gofs); 1417 1418 gvec_op->fni8(pd, pn, pm, pg); 1419 tcg_gen_st_i64(pd, cpu_env, dofs); 1420 1421 do_predtest1(pd, pg); 1422 1423 tcg_temp_free_i64(pd); 1424 tcg_temp_free_i64(pn); 1425 tcg_temp_free_i64(pm); 1426 tcg_temp_free_i64(pg); 1427 } else { 1428 /* The operation and flags generation is large. The computation 1429 * of the flags depends on the original contents of the guarding 1430 * predicate. If the destination overwrites the guarding predicate, 1431 * then the easiest way to get this right is to save a copy. 1432 */ 1433 int tofs = gofs; 1434 if (a->rd == a->pg) { 1435 tofs = offsetof(CPUARMState, vfp.preg_tmp); 1436 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz); 1437 } 1438 1439 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1440 do_predtest(s, dofs, tofs, psz / 8); 1441 } 1442 return true; 1443 } 1444 1445 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1446 { 1447 tcg_gen_and_i64(pd, pn, pm); 1448 tcg_gen_and_i64(pd, pd, pg); 1449 } 1450 1451 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1452 TCGv_vec pm, TCGv_vec pg) 1453 { 1454 tcg_gen_and_vec(vece, pd, pn, pm); 1455 tcg_gen_and_vec(vece, pd, pd, pg); 1456 } 1457 1458 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a) 1459 { 1460 static const GVecGen4 op = { 1461 .fni8 = gen_and_pg_i64, 1462 .fniv = gen_and_pg_vec, 1463 .fno = gen_helper_sve_and_pppp, 1464 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1465 }; 1466 1467 if (!dc_isar_feature(aa64_sve, s)) { 1468 return false; 1469 } 1470 if (!a->s) { 1471 if (a->rn == a->rm) { 1472 if (a->pg == a->rn) { 1473 return do_mov_p(s, a->rd, a->rn); 1474 } 1475 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg); 1476 } else if (a->pg == a->rn || a->pg == a->rm) { 1477 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm); 1478 } 1479 } 1480 return do_pppp_flags(s, a, &op); 1481 } 1482 1483 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1484 { 1485 tcg_gen_andc_i64(pd, pn, pm); 1486 tcg_gen_and_i64(pd, pd, pg); 1487 } 1488 1489 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1490 TCGv_vec pm, TCGv_vec pg) 1491 { 1492 tcg_gen_andc_vec(vece, pd, pn, pm); 1493 tcg_gen_and_vec(vece, pd, pd, pg); 1494 } 1495 1496 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a) 1497 { 1498 static const GVecGen4 op = { 1499 .fni8 = gen_bic_pg_i64, 1500 .fniv = gen_bic_pg_vec, 1501 .fno = gen_helper_sve_bic_pppp, 1502 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1503 }; 1504 1505 if (!dc_isar_feature(aa64_sve, s)) { 1506 return false; 1507 } 1508 if (!a->s && a->pg == a->rn) { 1509 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm); 1510 } 1511 return do_pppp_flags(s, a, &op); 1512 } 1513 1514 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1515 { 1516 tcg_gen_xor_i64(pd, pn, pm); 1517 tcg_gen_and_i64(pd, pd, pg); 1518 } 1519 1520 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1521 TCGv_vec pm, TCGv_vec pg) 1522 { 1523 tcg_gen_xor_vec(vece, pd, pn, pm); 1524 tcg_gen_and_vec(vece, pd, pd, pg); 1525 } 1526 1527 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a) 1528 { 1529 static const GVecGen4 op = { 1530 .fni8 = gen_eor_pg_i64, 1531 .fniv = gen_eor_pg_vec, 1532 .fno = gen_helper_sve_eor_pppp, 1533 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1534 }; 1535 1536 if (!dc_isar_feature(aa64_sve, s)) { 1537 return false; 1538 } 1539 /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */ 1540 if (!a->s && a->pg == a->rm) { 1541 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn); 1542 } 1543 return do_pppp_flags(s, a, &op); 1544 } 1545 1546 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a) 1547 { 1548 if (a->s || !dc_isar_feature(aa64_sve, s)) { 1549 return false; 1550 } 1551 if (sve_access_check(s)) { 1552 unsigned psz = pred_gvec_reg_size(s); 1553 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd), 1554 pred_full_reg_offset(s, a->pg), 1555 pred_full_reg_offset(s, a->rn), 1556 pred_full_reg_offset(s, a->rm), psz, psz); 1557 } 1558 return true; 1559 } 1560 1561 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1562 { 1563 tcg_gen_or_i64(pd, pn, pm); 1564 tcg_gen_and_i64(pd, pd, pg); 1565 } 1566 1567 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1568 TCGv_vec pm, TCGv_vec pg) 1569 { 1570 tcg_gen_or_vec(vece, pd, pn, pm); 1571 tcg_gen_and_vec(vece, pd, pd, pg); 1572 } 1573 1574 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a) 1575 { 1576 static const GVecGen4 op = { 1577 .fni8 = gen_orr_pg_i64, 1578 .fniv = gen_orr_pg_vec, 1579 .fno = gen_helper_sve_orr_pppp, 1580 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1581 }; 1582 1583 if (!dc_isar_feature(aa64_sve, s)) { 1584 return false; 1585 } 1586 if (!a->s && a->pg == a->rn && a->rn == a->rm) { 1587 return do_mov_p(s, a->rd, a->rn); 1588 } 1589 return do_pppp_flags(s, a, &op); 1590 } 1591 1592 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1593 { 1594 tcg_gen_orc_i64(pd, pn, pm); 1595 tcg_gen_and_i64(pd, pd, pg); 1596 } 1597 1598 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1599 TCGv_vec pm, TCGv_vec pg) 1600 { 1601 tcg_gen_orc_vec(vece, pd, pn, pm); 1602 tcg_gen_and_vec(vece, pd, pd, pg); 1603 } 1604 1605 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a) 1606 { 1607 static const GVecGen4 op = { 1608 .fni8 = gen_orn_pg_i64, 1609 .fniv = gen_orn_pg_vec, 1610 .fno = gen_helper_sve_orn_pppp, 1611 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1612 }; 1613 1614 if (!dc_isar_feature(aa64_sve, s)) { 1615 return false; 1616 } 1617 return do_pppp_flags(s, a, &op); 1618 } 1619 1620 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1621 { 1622 tcg_gen_or_i64(pd, pn, pm); 1623 tcg_gen_andc_i64(pd, pg, pd); 1624 } 1625 1626 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1627 TCGv_vec pm, TCGv_vec pg) 1628 { 1629 tcg_gen_or_vec(vece, pd, pn, pm); 1630 tcg_gen_andc_vec(vece, pd, pg, pd); 1631 } 1632 1633 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a) 1634 { 1635 static const GVecGen4 op = { 1636 .fni8 = gen_nor_pg_i64, 1637 .fniv = gen_nor_pg_vec, 1638 .fno = gen_helper_sve_nor_pppp, 1639 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1640 }; 1641 1642 if (!dc_isar_feature(aa64_sve, s)) { 1643 return false; 1644 } 1645 return do_pppp_flags(s, a, &op); 1646 } 1647 1648 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1649 { 1650 tcg_gen_and_i64(pd, pn, pm); 1651 tcg_gen_andc_i64(pd, pg, pd); 1652 } 1653 1654 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1655 TCGv_vec pm, TCGv_vec pg) 1656 { 1657 tcg_gen_and_vec(vece, pd, pn, pm); 1658 tcg_gen_andc_vec(vece, pd, pg, pd); 1659 } 1660 1661 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a) 1662 { 1663 static const GVecGen4 op = { 1664 .fni8 = gen_nand_pg_i64, 1665 .fniv = gen_nand_pg_vec, 1666 .fno = gen_helper_sve_nand_pppp, 1667 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1668 }; 1669 1670 if (!dc_isar_feature(aa64_sve, s)) { 1671 return false; 1672 } 1673 return do_pppp_flags(s, a, &op); 1674 } 1675 1676 /* 1677 *** SVE Predicate Misc Group 1678 */ 1679 1680 static bool trans_PTEST(DisasContext *s, arg_PTEST *a) 1681 { 1682 if (!dc_isar_feature(aa64_sve, s)) { 1683 return false; 1684 } 1685 if (sve_access_check(s)) { 1686 int nofs = pred_full_reg_offset(s, a->rn); 1687 int gofs = pred_full_reg_offset(s, a->pg); 1688 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8); 1689 1690 if (words == 1) { 1691 TCGv_i64 pn = tcg_temp_new_i64(); 1692 TCGv_i64 pg = tcg_temp_new_i64(); 1693 1694 tcg_gen_ld_i64(pn, cpu_env, nofs); 1695 tcg_gen_ld_i64(pg, cpu_env, gofs); 1696 do_predtest1(pn, pg); 1697 1698 tcg_temp_free_i64(pn); 1699 tcg_temp_free_i64(pg); 1700 } else { 1701 do_predtest(s, nofs, gofs, words); 1702 } 1703 } 1704 return true; 1705 } 1706 1707 /* See the ARM pseudocode DecodePredCount. */ 1708 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz) 1709 { 1710 unsigned elements = fullsz >> esz; 1711 unsigned bound; 1712 1713 switch (pattern) { 1714 case 0x0: /* POW2 */ 1715 return pow2floor(elements); 1716 case 0x1: /* VL1 */ 1717 case 0x2: /* VL2 */ 1718 case 0x3: /* VL3 */ 1719 case 0x4: /* VL4 */ 1720 case 0x5: /* VL5 */ 1721 case 0x6: /* VL6 */ 1722 case 0x7: /* VL7 */ 1723 case 0x8: /* VL8 */ 1724 bound = pattern; 1725 break; 1726 case 0x9: /* VL16 */ 1727 case 0xa: /* VL32 */ 1728 case 0xb: /* VL64 */ 1729 case 0xc: /* VL128 */ 1730 case 0xd: /* VL256 */ 1731 bound = 16 << (pattern - 9); 1732 break; 1733 case 0x1d: /* MUL4 */ 1734 return elements - elements % 4; 1735 case 0x1e: /* MUL3 */ 1736 return elements - elements % 3; 1737 case 0x1f: /* ALL */ 1738 return elements; 1739 default: /* #uimm5 */ 1740 return 0; 1741 } 1742 return elements >= bound ? bound : 0; 1743 } 1744 1745 /* This handles all of the predicate initialization instructions, 1746 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32 1747 * so that decode_pred_count returns 0. For SETFFR, we will have 1748 * set RD == 16 == FFR. 1749 */ 1750 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag) 1751 { 1752 if (!sve_access_check(s)) { 1753 return true; 1754 } 1755 1756 unsigned fullsz = vec_full_reg_size(s); 1757 unsigned ofs = pred_full_reg_offset(s, rd); 1758 unsigned numelem, setsz, i; 1759 uint64_t word, lastword; 1760 TCGv_i64 t; 1761 1762 numelem = decode_pred_count(fullsz, pat, esz); 1763 1764 /* Determine what we must store into each bit, and how many. */ 1765 if (numelem == 0) { 1766 lastword = word = 0; 1767 setsz = fullsz; 1768 } else { 1769 setsz = numelem << esz; 1770 lastword = word = pred_esz_masks[esz]; 1771 if (setsz % 64) { 1772 lastword &= MAKE_64BIT_MASK(0, setsz % 64); 1773 } 1774 } 1775 1776 t = tcg_temp_new_i64(); 1777 if (fullsz <= 64) { 1778 tcg_gen_movi_i64(t, lastword); 1779 tcg_gen_st_i64(t, cpu_env, ofs); 1780 goto done; 1781 } 1782 1783 if (word == lastword) { 1784 unsigned maxsz = size_for_gvec(fullsz / 8); 1785 unsigned oprsz = size_for_gvec(setsz / 8); 1786 1787 if (oprsz * 8 == setsz) { 1788 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word); 1789 goto done; 1790 } 1791 } 1792 1793 setsz /= 8; 1794 fullsz /= 8; 1795 1796 tcg_gen_movi_i64(t, word); 1797 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) { 1798 tcg_gen_st_i64(t, cpu_env, ofs + i); 1799 } 1800 if (lastword != word) { 1801 tcg_gen_movi_i64(t, lastword); 1802 tcg_gen_st_i64(t, cpu_env, ofs + i); 1803 i += 8; 1804 } 1805 if (i < fullsz) { 1806 tcg_gen_movi_i64(t, 0); 1807 for (; i < fullsz; i += 8) { 1808 tcg_gen_st_i64(t, cpu_env, ofs + i); 1809 } 1810 } 1811 1812 done: 1813 tcg_temp_free_i64(t); 1814 1815 /* PTRUES */ 1816 if (setflag) { 1817 tcg_gen_movi_i32(cpu_NF, -(word != 0)); 1818 tcg_gen_movi_i32(cpu_CF, word == 0); 1819 tcg_gen_movi_i32(cpu_VF, 0); 1820 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1821 } 1822 return true; 1823 } 1824 1825 TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s) 1826 1827 /* Note pat == 31 is #all, to set all elements. */ 1828 TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve, 1829 do_predset, 0, FFR_PRED_NUM, 31, false) 1830 1831 /* Note pat == 32 is #unimp, to set no elements. */ 1832 TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false) 1833 1834 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a) 1835 { 1836 /* The path through do_pppp_flags is complicated enough to want to avoid 1837 * duplication. Frob the arguments into the form of a predicated AND. 1838 */ 1839 arg_rprr_s alt_a = { 1840 .rd = a->rd, .pg = a->pg, .s = a->s, 1841 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM, 1842 }; 1843 1844 s->is_nonstreaming = true; 1845 return trans_AND_pppp(s, &alt_a); 1846 } 1847 1848 TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM) 1849 TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn) 1850 1851 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a, 1852 void (*gen_fn)(TCGv_i32, TCGv_ptr, 1853 TCGv_ptr, TCGv_i32)) 1854 { 1855 if (!sve_access_check(s)) { 1856 return true; 1857 } 1858 1859 TCGv_ptr t_pd = tcg_temp_new_ptr(); 1860 TCGv_ptr t_pg = tcg_temp_new_ptr(); 1861 TCGv_i32 t; 1862 unsigned desc = 0; 1863 1864 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 1865 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 1866 1867 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd)); 1868 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn)); 1869 t = tcg_temp_new_i32(); 1870 1871 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc)); 1872 tcg_temp_free_ptr(t_pd); 1873 tcg_temp_free_ptr(t_pg); 1874 1875 do_pred_flags(t); 1876 tcg_temp_free_i32(t); 1877 return true; 1878 } 1879 1880 TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst) 1881 TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext) 1882 1883 /* 1884 *** SVE Element Count Group 1885 */ 1886 1887 /* Perform an inline saturating addition of a 32-bit value within 1888 * a 64-bit register. The second operand is known to be positive, 1889 * which halves the comparisions we must perform to bound the result. 1890 */ 1891 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1892 { 1893 int64_t ibound; 1894 1895 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 1896 if (u) { 1897 tcg_gen_ext32u_i64(reg, reg); 1898 } else { 1899 tcg_gen_ext32s_i64(reg, reg); 1900 } 1901 if (d) { 1902 tcg_gen_sub_i64(reg, reg, val); 1903 ibound = (u ? 0 : INT32_MIN); 1904 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound)); 1905 } else { 1906 tcg_gen_add_i64(reg, reg, val); 1907 ibound = (u ? UINT32_MAX : INT32_MAX); 1908 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound)); 1909 } 1910 } 1911 1912 /* Similarly with 64-bit values. */ 1913 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1914 { 1915 TCGv_i64 t0 = tcg_temp_new_i64(); 1916 TCGv_i64 t2; 1917 1918 if (u) { 1919 if (d) { 1920 tcg_gen_sub_i64(t0, reg, val); 1921 t2 = tcg_constant_i64(0); 1922 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0); 1923 } else { 1924 tcg_gen_add_i64(t0, reg, val); 1925 t2 = tcg_constant_i64(-1); 1926 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0); 1927 } 1928 } else { 1929 TCGv_i64 t1 = tcg_temp_new_i64(); 1930 if (d) { 1931 /* Detect signed overflow for subtraction. */ 1932 tcg_gen_xor_i64(t0, reg, val); 1933 tcg_gen_sub_i64(t1, reg, val); 1934 tcg_gen_xor_i64(reg, reg, t1); 1935 tcg_gen_and_i64(t0, t0, reg); 1936 1937 /* Bound the result. */ 1938 tcg_gen_movi_i64(reg, INT64_MIN); 1939 t2 = tcg_constant_i64(0); 1940 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1); 1941 } else { 1942 /* Detect signed overflow for addition. */ 1943 tcg_gen_xor_i64(t0, reg, val); 1944 tcg_gen_add_i64(reg, reg, val); 1945 tcg_gen_xor_i64(t1, reg, val); 1946 tcg_gen_andc_i64(t0, t1, t0); 1947 1948 /* Bound the result. */ 1949 tcg_gen_movi_i64(t1, INT64_MAX); 1950 t2 = tcg_constant_i64(0); 1951 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg); 1952 } 1953 tcg_temp_free_i64(t1); 1954 } 1955 tcg_temp_free_i64(t0); 1956 } 1957 1958 /* Similarly with a vector and a scalar operand. */ 1959 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn, 1960 TCGv_i64 val, bool u, bool d) 1961 { 1962 unsigned vsz = vec_full_reg_size(s); 1963 TCGv_ptr dptr, nptr; 1964 TCGv_i32 t32, desc; 1965 TCGv_i64 t64; 1966 1967 dptr = tcg_temp_new_ptr(); 1968 nptr = tcg_temp_new_ptr(); 1969 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd)); 1970 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn)); 1971 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1972 1973 switch (esz) { 1974 case MO_8: 1975 t32 = tcg_temp_new_i32(); 1976 tcg_gen_extrl_i64_i32(t32, val); 1977 if (d) { 1978 tcg_gen_neg_i32(t32, t32); 1979 } 1980 if (u) { 1981 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc); 1982 } else { 1983 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc); 1984 } 1985 tcg_temp_free_i32(t32); 1986 break; 1987 1988 case MO_16: 1989 t32 = tcg_temp_new_i32(); 1990 tcg_gen_extrl_i64_i32(t32, val); 1991 if (d) { 1992 tcg_gen_neg_i32(t32, t32); 1993 } 1994 if (u) { 1995 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc); 1996 } else { 1997 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc); 1998 } 1999 tcg_temp_free_i32(t32); 2000 break; 2001 2002 case MO_32: 2003 t64 = tcg_temp_new_i64(); 2004 if (d) { 2005 tcg_gen_neg_i64(t64, val); 2006 } else { 2007 tcg_gen_mov_i64(t64, val); 2008 } 2009 if (u) { 2010 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc); 2011 } else { 2012 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc); 2013 } 2014 tcg_temp_free_i64(t64); 2015 break; 2016 2017 case MO_64: 2018 if (u) { 2019 if (d) { 2020 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc); 2021 } else { 2022 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc); 2023 } 2024 } else if (d) { 2025 t64 = tcg_temp_new_i64(); 2026 tcg_gen_neg_i64(t64, val); 2027 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc); 2028 tcg_temp_free_i64(t64); 2029 } else { 2030 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc); 2031 } 2032 break; 2033 2034 default: 2035 g_assert_not_reached(); 2036 } 2037 2038 tcg_temp_free_ptr(dptr); 2039 tcg_temp_free_ptr(nptr); 2040 } 2041 2042 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a) 2043 { 2044 if (!dc_isar_feature(aa64_sve, s)) { 2045 return false; 2046 } 2047 if (sve_access_check(s)) { 2048 unsigned fullsz = vec_full_reg_size(s); 2049 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2050 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm); 2051 } 2052 return true; 2053 } 2054 2055 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a) 2056 { 2057 if (!dc_isar_feature(aa64_sve, s)) { 2058 return false; 2059 } 2060 if (sve_access_check(s)) { 2061 unsigned fullsz = vec_full_reg_size(s); 2062 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2063 int inc = numelem * a->imm * (a->d ? -1 : 1); 2064 TCGv_i64 reg = cpu_reg(s, a->rd); 2065 2066 tcg_gen_addi_i64(reg, reg, inc); 2067 } 2068 return true; 2069 } 2070 2071 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a) 2072 { 2073 if (!dc_isar_feature(aa64_sve, s)) { 2074 return false; 2075 } 2076 if (!sve_access_check(s)) { 2077 return true; 2078 } 2079 2080 unsigned fullsz = vec_full_reg_size(s); 2081 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2082 int inc = numelem * a->imm; 2083 TCGv_i64 reg = cpu_reg(s, a->rd); 2084 2085 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 2086 if (inc == 0) { 2087 if (a->u) { 2088 tcg_gen_ext32u_i64(reg, reg); 2089 } else { 2090 tcg_gen_ext32s_i64(reg, reg); 2091 } 2092 } else { 2093 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d); 2094 } 2095 return true; 2096 } 2097 2098 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a) 2099 { 2100 if (!dc_isar_feature(aa64_sve, s)) { 2101 return false; 2102 } 2103 if (!sve_access_check(s)) { 2104 return true; 2105 } 2106 2107 unsigned fullsz = vec_full_reg_size(s); 2108 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2109 int inc = numelem * a->imm; 2110 TCGv_i64 reg = cpu_reg(s, a->rd); 2111 2112 if (inc != 0) { 2113 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d); 2114 } 2115 return true; 2116 } 2117 2118 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 2119 { 2120 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2121 return false; 2122 } 2123 2124 unsigned fullsz = vec_full_reg_size(s); 2125 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2126 int inc = numelem * a->imm; 2127 2128 if (inc != 0) { 2129 if (sve_access_check(s)) { 2130 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd), 2131 vec_full_reg_offset(s, a->rn), 2132 tcg_constant_i64(a->d ? -inc : inc), 2133 fullsz, fullsz); 2134 } 2135 } else { 2136 do_mov_z(s, a->rd, a->rn); 2137 } 2138 return true; 2139 } 2140 2141 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 2142 { 2143 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2144 return false; 2145 } 2146 2147 unsigned fullsz = vec_full_reg_size(s); 2148 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2149 int inc = numelem * a->imm; 2150 2151 if (inc != 0) { 2152 if (sve_access_check(s)) { 2153 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 2154 tcg_constant_i64(inc), a->u, a->d); 2155 } 2156 } else { 2157 do_mov_z(s, a->rd, a->rn); 2158 } 2159 return true; 2160 } 2161 2162 /* 2163 *** SVE Bitwise Immediate Group 2164 */ 2165 2166 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn) 2167 { 2168 uint64_t imm; 2169 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2170 extract32(a->dbm, 0, 6), 2171 extract32(a->dbm, 6, 6))) { 2172 return false; 2173 } 2174 return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm); 2175 } 2176 2177 TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi) 2178 TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori) 2179 TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori) 2180 2181 static bool trans_DUPM(DisasContext *s, arg_DUPM *a) 2182 { 2183 uint64_t imm; 2184 2185 if (!dc_isar_feature(aa64_sve, s)) { 2186 return false; 2187 } 2188 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2189 extract32(a->dbm, 0, 6), 2190 extract32(a->dbm, 6, 6))) { 2191 return false; 2192 } 2193 if (sve_access_check(s)) { 2194 do_dupi_z(s, a->rd, imm); 2195 } 2196 return true; 2197 } 2198 2199 /* 2200 *** SVE Integer Wide Immediate - Predicated Group 2201 */ 2202 2203 /* Implement all merging copies. This is used for CPY (immediate), 2204 * FCPY, CPY (scalar), CPY (SIMD&FP scalar). 2205 */ 2206 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg, 2207 TCGv_i64 val) 2208 { 2209 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2210 static gen_cpy * const fns[4] = { 2211 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h, 2212 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d, 2213 }; 2214 unsigned vsz = vec_full_reg_size(s); 2215 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2216 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2217 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2218 TCGv_ptr t_pg = tcg_temp_new_ptr(); 2219 2220 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd)); 2221 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn)); 2222 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 2223 2224 fns[esz](t_zd, t_zn, t_pg, val, desc); 2225 2226 tcg_temp_free_ptr(t_zd); 2227 tcg_temp_free_ptr(t_zn); 2228 tcg_temp_free_ptr(t_pg); 2229 } 2230 2231 static bool trans_FCPY(DisasContext *s, arg_FCPY *a) 2232 { 2233 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2234 return false; 2235 } 2236 if (sve_access_check(s)) { 2237 /* Decode the VFP immediate. */ 2238 uint64_t imm = vfp_expand_imm(a->esz, a->imm); 2239 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm)); 2240 } 2241 return true; 2242 } 2243 2244 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a) 2245 { 2246 if (!dc_isar_feature(aa64_sve, s)) { 2247 return false; 2248 } 2249 if (sve_access_check(s)) { 2250 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm)); 2251 } 2252 return true; 2253 } 2254 2255 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a) 2256 { 2257 static gen_helper_gvec_2i * const fns[4] = { 2258 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h, 2259 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d, 2260 }; 2261 2262 if (!dc_isar_feature(aa64_sve, s)) { 2263 return false; 2264 } 2265 if (sve_access_check(s)) { 2266 unsigned vsz = vec_full_reg_size(s); 2267 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 2268 pred_full_reg_offset(s, a->pg), 2269 tcg_constant_i64(a->imm), 2270 vsz, vsz, 0, fns[a->esz]); 2271 } 2272 return true; 2273 } 2274 2275 /* 2276 *** SVE Permute Extract Group 2277 */ 2278 2279 static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm) 2280 { 2281 if (!sve_access_check(s)) { 2282 return true; 2283 } 2284 2285 unsigned vsz = vec_full_reg_size(s); 2286 unsigned n_ofs = imm >= vsz ? 0 : imm; 2287 unsigned n_siz = vsz - n_ofs; 2288 unsigned d = vec_full_reg_offset(s, rd); 2289 unsigned n = vec_full_reg_offset(s, rn); 2290 unsigned m = vec_full_reg_offset(s, rm); 2291 2292 /* Use host vector move insns if we have appropriate sizes 2293 * and no unfortunate overlap. 2294 */ 2295 if (m != d 2296 && n_ofs == size_for_gvec(n_ofs) 2297 && n_siz == size_for_gvec(n_siz) 2298 && (d != n || n_siz <= n_ofs)) { 2299 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz); 2300 if (n_ofs != 0) { 2301 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs); 2302 } 2303 } else { 2304 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext); 2305 } 2306 return true; 2307 } 2308 2309 TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm) 2310 TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm) 2311 2312 /* 2313 *** SVE Permute - Unpredicated Group 2314 */ 2315 2316 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a) 2317 { 2318 if (!dc_isar_feature(aa64_sve, s)) { 2319 return false; 2320 } 2321 if (sve_access_check(s)) { 2322 unsigned vsz = vec_full_reg_size(s); 2323 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd), 2324 vsz, vsz, cpu_reg_sp(s, a->rn)); 2325 } 2326 return true; 2327 } 2328 2329 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a) 2330 { 2331 if (!dc_isar_feature(aa64_sve, s)) { 2332 return false; 2333 } 2334 if ((a->imm & 0x1f) == 0) { 2335 return false; 2336 } 2337 if (sve_access_check(s)) { 2338 unsigned vsz = vec_full_reg_size(s); 2339 unsigned dofs = vec_full_reg_offset(s, a->rd); 2340 unsigned esz, index; 2341 2342 esz = ctz32(a->imm); 2343 index = a->imm >> (esz + 1); 2344 2345 if ((index << esz) < vsz) { 2346 unsigned nofs = vec_reg_offset(s, a->rn, index, esz); 2347 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz); 2348 } else { 2349 /* 2350 * While dup_mem handles 128-bit elements, dup_imm does not. 2351 * Thankfully element size doesn't matter for splatting zero. 2352 */ 2353 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); 2354 } 2355 } 2356 return true; 2357 } 2358 2359 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val) 2360 { 2361 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2362 static gen_insr * const fns[4] = { 2363 gen_helper_sve_insr_b, gen_helper_sve_insr_h, 2364 gen_helper_sve_insr_s, gen_helper_sve_insr_d, 2365 }; 2366 unsigned vsz = vec_full_reg_size(s); 2367 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2368 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2369 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2370 2371 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd)); 2372 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); 2373 2374 fns[a->esz](t_zd, t_zn, val, desc); 2375 2376 tcg_temp_free_ptr(t_zd); 2377 tcg_temp_free_ptr(t_zn); 2378 } 2379 2380 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a) 2381 { 2382 if (!dc_isar_feature(aa64_sve, s)) { 2383 return false; 2384 } 2385 if (sve_access_check(s)) { 2386 TCGv_i64 t = tcg_temp_new_i64(); 2387 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64)); 2388 do_insr_i64(s, a, t); 2389 tcg_temp_free_i64(t); 2390 } 2391 return true; 2392 } 2393 2394 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a) 2395 { 2396 if (!dc_isar_feature(aa64_sve, s)) { 2397 return false; 2398 } 2399 if (sve_access_check(s)) { 2400 do_insr_i64(s, a, cpu_reg(s, a->rm)); 2401 } 2402 return true; 2403 } 2404 2405 static gen_helper_gvec_2 * const rev_fns[4] = { 2406 gen_helper_sve_rev_b, gen_helper_sve_rev_h, 2407 gen_helper_sve_rev_s, gen_helper_sve_rev_d 2408 }; 2409 TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0) 2410 2411 static gen_helper_gvec_3 * const sve_tbl_fns[4] = { 2412 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h, 2413 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d 2414 }; 2415 TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0) 2416 2417 static gen_helper_gvec_4 * const sve2_tbl_fns[4] = { 2418 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h, 2419 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d 2420 }; 2421 TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz], 2422 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0) 2423 2424 static gen_helper_gvec_3 * const tbx_fns[4] = { 2425 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h, 2426 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d 2427 }; 2428 TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0) 2429 2430 static bool trans_UNPK(DisasContext *s, arg_UNPK *a) 2431 { 2432 static gen_helper_gvec_2 * const fns[4][2] = { 2433 { NULL, NULL }, 2434 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h }, 2435 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s }, 2436 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d }, 2437 }; 2438 2439 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2440 return false; 2441 } 2442 if (sve_access_check(s)) { 2443 unsigned vsz = vec_full_reg_size(s); 2444 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd), 2445 vec_full_reg_offset(s, a->rn) 2446 + (a->h ? vsz / 2 : 0), 2447 vsz, vsz, 0, fns[a->esz][a->u]); 2448 } 2449 return true; 2450 } 2451 2452 /* 2453 *** SVE Permute - Predicates Group 2454 */ 2455 2456 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd, 2457 gen_helper_gvec_3 *fn) 2458 { 2459 if (!sve_access_check(s)) { 2460 return true; 2461 } 2462 2463 unsigned vsz = pred_full_reg_size(s); 2464 2465 TCGv_ptr t_d = tcg_temp_new_ptr(); 2466 TCGv_ptr t_n = tcg_temp_new_ptr(); 2467 TCGv_ptr t_m = tcg_temp_new_ptr(); 2468 uint32_t desc = 0; 2469 2470 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2471 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2472 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2473 2474 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd)); 2475 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn)); 2476 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm)); 2477 2478 fn(t_d, t_n, t_m, tcg_constant_i32(desc)); 2479 2480 tcg_temp_free_ptr(t_d); 2481 tcg_temp_free_ptr(t_n); 2482 tcg_temp_free_ptr(t_m); 2483 return true; 2484 } 2485 2486 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd, 2487 gen_helper_gvec_2 *fn) 2488 { 2489 if (!sve_access_check(s)) { 2490 return true; 2491 } 2492 2493 unsigned vsz = pred_full_reg_size(s); 2494 TCGv_ptr t_d = tcg_temp_new_ptr(); 2495 TCGv_ptr t_n = tcg_temp_new_ptr(); 2496 uint32_t desc = 0; 2497 2498 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd)); 2499 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn)); 2500 2501 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2502 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2503 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2504 2505 fn(t_d, t_n, tcg_constant_i32(desc)); 2506 2507 tcg_temp_free_ptr(t_d); 2508 tcg_temp_free_ptr(t_n); 2509 return true; 2510 } 2511 2512 TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p) 2513 TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p) 2514 TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p) 2515 TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p) 2516 TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p) 2517 TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p) 2518 2519 TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p) 2520 TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p) 2521 TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p) 2522 2523 /* 2524 *** SVE Permute - Interleaving Group 2525 */ 2526 2527 static gen_helper_gvec_3 * const zip_fns[4] = { 2528 gen_helper_sve_zip_b, gen_helper_sve_zip_h, 2529 gen_helper_sve_zip_s, gen_helper_sve_zip_d, 2530 }; 2531 TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2532 zip_fns[a->esz], a, 0) 2533 TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2534 zip_fns[a->esz], a, vec_full_reg_size(s) / 2) 2535 2536 TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2537 gen_helper_sve2_zip_q, a, 0) 2538 TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2539 gen_helper_sve2_zip_q, a, 2540 QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2) 2541 2542 static gen_helper_gvec_3 * const uzp_fns[4] = { 2543 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h, 2544 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d, 2545 }; 2546 2547 TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2548 uzp_fns[a->esz], a, 0) 2549 TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2550 uzp_fns[a->esz], a, 1 << a->esz) 2551 2552 TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2553 gen_helper_sve2_uzp_q, a, 0) 2554 TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2555 gen_helper_sve2_uzp_q, a, 16) 2556 2557 static gen_helper_gvec_3 * const trn_fns[4] = { 2558 gen_helper_sve_trn_b, gen_helper_sve_trn_h, 2559 gen_helper_sve_trn_s, gen_helper_sve_trn_d, 2560 }; 2561 2562 TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2563 trn_fns[a->esz], a, 0) 2564 TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2565 trn_fns[a->esz], a, 1 << a->esz) 2566 2567 TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2568 gen_helper_sve2_trn_q, a, 0) 2569 TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2570 gen_helper_sve2_trn_q, a, 16) 2571 2572 /* 2573 *** SVE Permute Vector - Predicated Group 2574 */ 2575 2576 static gen_helper_gvec_3 * const compact_fns[4] = { 2577 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d 2578 }; 2579 TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, 2580 compact_fns[a->esz], a, 0) 2581 2582 /* Call the helper that computes the ARM LastActiveElement pseudocode 2583 * function, scaled by the element size. This includes the not found 2584 * indication; e.g. not found for esz=3 is -8. 2585 */ 2586 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg) 2587 { 2588 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot 2589 * round up, as we do elsewhere, because we need the exact size. 2590 */ 2591 TCGv_ptr t_p = tcg_temp_new_ptr(); 2592 unsigned desc = 0; 2593 2594 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 2595 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 2596 2597 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg)); 2598 2599 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc)); 2600 2601 tcg_temp_free_ptr(t_p); 2602 } 2603 2604 /* Increment LAST to the offset of the next element in the vector, 2605 * wrapping around to 0. 2606 */ 2607 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz) 2608 { 2609 unsigned vsz = vec_full_reg_size(s); 2610 2611 tcg_gen_addi_i32(last, last, 1 << esz); 2612 if (is_power_of_2(vsz)) { 2613 tcg_gen_andi_i32(last, last, vsz - 1); 2614 } else { 2615 TCGv_i32 max = tcg_constant_i32(vsz); 2616 TCGv_i32 zero = tcg_constant_i32(0); 2617 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last); 2618 } 2619 } 2620 2621 /* If LAST < 0, set LAST to the offset of the last element in the vector. */ 2622 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz) 2623 { 2624 unsigned vsz = vec_full_reg_size(s); 2625 2626 if (is_power_of_2(vsz)) { 2627 tcg_gen_andi_i32(last, last, vsz - 1); 2628 } else { 2629 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz)); 2630 TCGv_i32 zero = tcg_constant_i32(0); 2631 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last); 2632 } 2633 } 2634 2635 /* Load an unsigned element of ESZ from BASE+OFS. */ 2636 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz) 2637 { 2638 TCGv_i64 r = tcg_temp_new_i64(); 2639 2640 switch (esz) { 2641 case 0: 2642 tcg_gen_ld8u_i64(r, base, ofs); 2643 break; 2644 case 1: 2645 tcg_gen_ld16u_i64(r, base, ofs); 2646 break; 2647 case 2: 2648 tcg_gen_ld32u_i64(r, base, ofs); 2649 break; 2650 case 3: 2651 tcg_gen_ld_i64(r, base, ofs); 2652 break; 2653 default: 2654 g_assert_not_reached(); 2655 } 2656 return r; 2657 } 2658 2659 /* Load an unsigned element of ESZ from RM[LAST]. */ 2660 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last, 2661 int rm, int esz) 2662 { 2663 TCGv_ptr p = tcg_temp_new_ptr(); 2664 TCGv_i64 r; 2665 2666 /* Convert offset into vector into offset into ENV. 2667 * The final adjustment for the vector register base 2668 * is added via constant offset to the load. 2669 */ 2670 #if HOST_BIG_ENDIAN 2671 /* Adjust for element ordering. See vec_reg_offset. */ 2672 if (esz < 3) { 2673 tcg_gen_xori_i32(last, last, 8 - (1 << esz)); 2674 } 2675 #endif 2676 tcg_gen_ext_i32_ptr(p, last); 2677 tcg_gen_add_ptr(p, p, cpu_env); 2678 2679 r = load_esz(p, vec_full_reg_offset(s, rm), esz); 2680 tcg_temp_free_ptr(p); 2681 2682 return r; 2683 } 2684 2685 /* Compute CLAST for a Zreg. */ 2686 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before) 2687 { 2688 TCGv_i32 last; 2689 TCGLabel *over; 2690 TCGv_i64 ele; 2691 unsigned vsz, esz = a->esz; 2692 2693 if (!sve_access_check(s)) { 2694 return true; 2695 } 2696 2697 last = tcg_temp_new_i32(); 2698 over = gen_new_label(); 2699 2700 find_last_active(s, last, esz, a->pg); 2701 2702 /* There is of course no movcond for a 2048-bit vector, 2703 * so we must branch over the actual store. 2704 */ 2705 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over); 2706 2707 if (!before) { 2708 incr_last_active(s, last, esz); 2709 } 2710 2711 ele = load_last_active(s, last, a->rm, esz); 2712 tcg_temp_free_i32(last); 2713 2714 vsz = vec_full_reg_size(s); 2715 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele); 2716 tcg_temp_free_i64(ele); 2717 2718 /* If this insn used MOVPRFX, we may need a second move. */ 2719 if (a->rd != a->rn) { 2720 TCGLabel *done = gen_new_label(); 2721 tcg_gen_br(done); 2722 2723 gen_set_label(over); 2724 do_mov_z(s, a->rd, a->rn); 2725 2726 gen_set_label(done); 2727 } else { 2728 gen_set_label(over); 2729 } 2730 return true; 2731 } 2732 2733 TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false) 2734 TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true) 2735 2736 /* Compute CLAST for a scalar. */ 2737 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm, 2738 bool before, TCGv_i64 reg_val) 2739 { 2740 TCGv_i32 last = tcg_temp_new_i32(); 2741 TCGv_i64 ele, cmp; 2742 2743 find_last_active(s, last, esz, pg); 2744 2745 /* Extend the original value of last prior to incrementing. */ 2746 cmp = tcg_temp_new_i64(); 2747 tcg_gen_ext_i32_i64(cmp, last); 2748 2749 if (!before) { 2750 incr_last_active(s, last, esz); 2751 } 2752 2753 /* The conceit here is that while last < 0 indicates not found, after 2754 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address 2755 * from which we can load garbage. We then discard the garbage with 2756 * a conditional move. 2757 */ 2758 ele = load_last_active(s, last, rm, esz); 2759 tcg_temp_free_i32(last); 2760 2761 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0), 2762 ele, reg_val); 2763 2764 tcg_temp_free_i64(cmp); 2765 tcg_temp_free_i64(ele); 2766 } 2767 2768 /* Compute CLAST for a Vreg. */ 2769 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2770 { 2771 if (sve_access_check(s)) { 2772 int esz = a->esz; 2773 int ofs = vec_reg_offset(s, a->rd, 0, esz); 2774 TCGv_i64 reg = load_esz(cpu_env, ofs, esz); 2775 2776 do_clast_scalar(s, esz, a->pg, a->rn, before, reg); 2777 write_fp_dreg(s, a->rd, reg); 2778 tcg_temp_free_i64(reg); 2779 } 2780 return true; 2781 } 2782 2783 TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false) 2784 TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true) 2785 2786 /* Compute CLAST for a Xreg. */ 2787 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before) 2788 { 2789 TCGv_i64 reg; 2790 2791 if (!sve_access_check(s)) { 2792 return true; 2793 } 2794 2795 reg = cpu_reg(s, a->rd); 2796 switch (a->esz) { 2797 case 0: 2798 tcg_gen_ext8u_i64(reg, reg); 2799 break; 2800 case 1: 2801 tcg_gen_ext16u_i64(reg, reg); 2802 break; 2803 case 2: 2804 tcg_gen_ext32u_i64(reg, reg); 2805 break; 2806 case 3: 2807 break; 2808 default: 2809 g_assert_not_reached(); 2810 } 2811 2812 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg); 2813 return true; 2814 } 2815 2816 TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false) 2817 TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true) 2818 2819 /* Compute LAST for a scalar. */ 2820 static TCGv_i64 do_last_scalar(DisasContext *s, int esz, 2821 int pg, int rm, bool before) 2822 { 2823 TCGv_i32 last = tcg_temp_new_i32(); 2824 TCGv_i64 ret; 2825 2826 find_last_active(s, last, esz, pg); 2827 if (before) { 2828 wrap_last_active(s, last, esz); 2829 } else { 2830 incr_last_active(s, last, esz); 2831 } 2832 2833 ret = load_last_active(s, last, rm, esz); 2834 tcg_temp_free_i32(last); 2835 return ret; 2836 } 2837 2838 /* Compute LAST for a Vreg. */ 2839 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2840 { 2841 if (sve_access_check(s)) { 2842 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2843 write_fp_dreg(s, a->rd, val); 2844 tcg_temp_free_i64(val); 2845 } 2846 return true; 2847 } 2848 2849 TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false) 2850 TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true) 2851 2852 /* Compute LAST for a Xreg. */ 2853 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before) 2854 { 2855 if (sve_access_check(s)) { 2856 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2857 tcg_gen_mov_i64(cpu_reg(s, a->rd), val); 2858 tcg_temp_free_i64(val); 2859 } 2860 return true; 2861 } 2862 2863 TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false) 2864 TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true) 2865 2866 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a) 2867 { 2868 if (!dc_isar_feature(aa64_sve, s)) { 2869 return false; 2870 } 2871 if (sve_access_check(s)) { 2872 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn)); 2873 } 2874 return true; 2875 } 2876 2877 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a) 2878 { 2879 if (!dc_isar_feature(aa64_sve, s)) { 2880 return false; 2881 } 2882 if (sve_access_check(s)) { 2883 int ofs = vec_reg_offset(s, a->rn, 0, a->esz); 2884 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz); 2885 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t); 2886 tcg_temp_free_i64(t); 2887 } 2888 return true; 2889 } 2890 2891 static gen_helper_gvec_3 * const revb_fns[4] = { 2892 NULL, gen_helper_sve_revb_h, 2893 gen_helper_sve_revb_s, gen_helper_sve_revb_d, 2894 }; 2895 TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0) 2896 2897 static gen_helper_gvec_3 * const revh_fns[4] = { 2898 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d, 2899 }; 2900 TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0) 2901 2902 TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz, 2903 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0) 2904 2905 TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0) 2906 2907 TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz, 2908 gen_helper_sve_splice, a, a->esz) 2909 2910 TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice, 2911 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz) 2912 2913 /* 2914 *** SVE Integer Compare - Vectors Group 2915 */ 2916 2917 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a, 2918 gen_helper_gvec_flags_4 *gen_fn) 2919 { 2920 TCGv_ptr pd, zn, zm, pg; 2921 unsigned vsz; 2922 TCGv_i32 t; 2923 2924 if (gen_fn == NULL) { 2925 return false; 2926 } 2927 if (!sve_access_check(s)) { 2928 return true; 2929 } 2930 2931 vsz = vec_full_reg_size(s); 2932 t = tcg_temp_new_i32(); 2933 pd = tcg_temp_new_ptr(); 2934 zn = tcg_temp_new_ptr(); 2935 zm = tcg_temp_new_ptr(); 2936 pg = tcg_temp_new_ptr(); 2937 2938 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd)); 2939 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn)); 2940 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm)); 2941 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg)); 2942 2943 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0))); 2944 2945 tcg_temp_free_ptr(pd); 2946 tcg_temp_free_ptr(zn); 2947 tcg_temp_free_ptr(zm); 2948 tcg_temp_free_ptr(pg); 2949 2950 do_pred_flags(t); 2951 2952 tcg_temp_free_i32(t); 2953 return true; 2954 } 2955 2956 #define DO_PPZZ(NAME, name) \ 2957 static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \ 2958 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \ 2959 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \ 2960 }; \ 2961 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \ 2962 a, name##_ppzz_fns[a->esz]) 2963 2964 DO_PPZZ(CMPEQ, cmpeq) 2965 DO_PPZZ(CMPNE, cmpne) 2966 DO_PPZZ(CMPGT, cmpgt) 2967 DO_PPZZ(CMPGE, cmpge) 2968 DO_PPZZ(CMPHI, cmphi) 2969 DO_PPZZ(CMPHS, cmphs) 2970 2971 #undef DO_PPZZ 2972 2973 #define DO_PPZW(NAME, name) \ 2974 static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \ 2975 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \ 2976 gen_helper_sve_##name##_ppzw_s, NULL \ 2977 }; \ 2978 TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \ 2979 a, name##_ppzw_fns[a->esz]) 2980 2981 DO_PPZW(CMPEQ, cmpeq) 2982 DO_PPZW(CMPNE, cmpne) 2983 DO_PPZW(CMPGT, cmpgt) 2984 DO_PPZW(CMPGE, cmpge) 2985 DO_PPZW(CMPHI, cmphi) 2986 DO_PPZW(CMPHS, cmphs) 2987 DO_PPZW(CMPLT, cmplt) 2988 DO_PPZW(CMPLE, cmple) 2989 DO_PPZW(CMPLO, cmplo) 2990 DO_PPZW(CMPLS, cmpls) 2991 2992 #undef DO_PPZW 2993 2994 /* 2995 *** SVE Integer Compare - Immediate Groups 2996 */ 2997 2998 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a, 2999 gen_helper_gvec_flags_3 *gen_fn) 3000 { 3001 TCGv_ptr pd, zn, pg; 3002 unsigned vsz; 3003 TCGv_i32 t; 3004 3005 if (gen_fn == NULL) { 3006 return false; 3007 } 3008 if (!sve_access_check(s)) { 3009 return true; 3010 } 3011 3012 vsz = vec_full_reg_size(s); 3013 t = tcg_temp_new_i32(); 3014 pd = tcg_temp_new_ptr(); 3015 zn = tcg_temp_new_ptr(); 3016 pg = tcg_temp_new_ptr(); 3017 3018 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd)); 3019 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn)); 3020 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg)); 3021 3022 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm))); 3023 3024 tcg_temp_free_ptr(pd); 3025 tcg_temp_free_ptr(zn); 3026 tcg_temp_free_ptr(pg); 3027 3028 do_pred_flags(t); 3029 3030 tcg_temp_free_i32(t); 3031 return true; 3032 } 3033 3034 #define DO_PPZI(NAME, name) \ 3035 static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \ 3036 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \ 3037 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \ 3038 }; \ 3039 TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \ 3040 name##_ppzi_fns[a->esz]) 3041 3042 DO_PPZI(CMPEQ, cmpeq) 3043 DO_PPZI(CMPNE, cmpne) 3044 DO_PPZI(CMPGT, cmpgt) 3045 DO_PPZI(CMPGE, cmpge) 3046 DO_PPZI(CMPHI, cmphi) 3047 DO_PPZI(CMPHS, cmphs) 3048 DO_PPZI(CMPLT, cmplt) 3049 DO_PPZI(CMPLE, cmple) 3050 DO_PPZI(CMPLO, cmplo) 3051 DO_PPZI(CMPLS, cmpls) 3052 3053 #undef DO_PPZI 3054 3055 /* 3056 *** SVE Partition Break Group 3057 */ 3058 3059 static bool do_brk3(DisasContext *s, arg_rprr_s *a, 3060 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s) 3061 { 3062 if (!sve_access_check(s)) { 3063 return true; 3064 } 3065 3066 unsigned vsz = pred_full_reg_size(s); 3067 3068 /* Predicate sizes may be smaller and cannot use simd_desc. */ 3069 TCGv_ptr d = tcg_temp_new_ptr(); 3070 TCGv_ptr n = tcg_temp_new_ptr(); 3071 TCGv_ptr m = tcg_temp_new_ptr(); 3072 TCGv_ptr g = tcg_temp_new_ptr(); 3073 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 3074 3075 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd)); 3076 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn)); 3077 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm)); 3078 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg)); 3079 3080 if (a->s) { 3081 TCGv_i32 t = tcg_temp_new_i32(); 3082 fn_s(t, d, n, m, g, desc); 3083 do_pred_flags(t); 3084 tcg_temp_free_i32(t); 3085 } else { 3086 fn(d, n, m, g, desc); 3087 } 3088 tcg_temp_free_ptr(d); 3089 tcg_temp_free_ptr(n); 3090 tcg_temp_free_ptr(m); 3091 tcg_temp_free_ptr(g); 3092 return true; 3093 } 3094 3095 static bool do_brk2(DisasContext *s, arg_rpr_s *a, 3096 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s) 3097 { 3098 if (!sve_access_check(s)) { 3099 return true; 3100 } 3101 3102 unsigned vsz = pred_full_reg_size(s); 3103 3104 /* Predicate sizes may be smaller and cannot use simd_desc. */ 3105 TCGv_ptr d = tcg_temp_new_ptr(); 3106 TCGv_ptr n = tcg_temp_new_ptr(); 3107 TCGv_ptr g = tcg_temp_new_ptr(); 3108 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 3109 3110 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd)); 3111 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn)); 3112 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg)); 3113 3114 if (a->s) { 3115 TCGv_i32 t = tcg_temp_new_i32(); 3116 fn_s(t, d, n, g, desc); 3117 do_pred_flags(t); 3118 tcg_temp_free_i32(t); 3119 } else { 3120 fn(d, n, g, desc); 3121 } 3122 tcg_temp_free_ptr(d); 3123 tcg_temp_free_ptr(n); 3124 tcg_temp_free_ptr(g); 3125 return true; 3126 } 3127 3128 TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a, 3129 gen_helper_sve_brkpa, gen_helper_sve_brkpas) 3130 TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a, 3131 gen_helper_sve_brkpb, gen_helper_sve_brkpbs) 3132 3133 TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a, 3134 gen_helper_sve_brka_m, gen_helper_sve_brkas_m) 3135 TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a, 3136 gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m) 3137 3138 TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a, 3139 gen_helper_sve_brka_z, gen_helper_sve_brkas_z) 3140 TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a, 3141 gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z) 3142 3143 TRANS_FEAT(BRKN, aa64_sve, do_brk2, a, 3144 gen_helper_sve_brkn, gen_helper_sve_brkns) 3145 3146 /* 3147 *** SVE Predicate Count Group 3148 */ 3149 3150 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg) 3151 { 3152 unsigned psz = pred_full_reg_size(s); 3153 3154 if (psz <= 8) { 3155 uint64_t psz_mask; 3156 3157 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn)); 3158 if (pn != pg) { 3159 TCGv_i64 g = tcg_temp_new_i64(); 3160 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg)); 3161 tcg_gen_and_i64(val, val, g); 3162 tcg_temp_free_i64(g); 3163 } 3164 3165 /* Reduce the pred_esz_masks value simply to reduce the 3166 * size of the code generated here. 3167 */ 3168 psz_mask = MAKE_64BIT_MASK(0, psz * 8); 3169 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask); 3170 3171 tcg_gen_ctpop_i64(val, val); 3172 } else { 3173 TCGv_ptr t_pn = tcg_temp_new_ptr(); 3174 TCGv_ptr t_pg = tcg_temp_new_ptr(); 3175 unsigned desc = 0; 3176 3177 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz); 3178 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 3179 3180 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn)); 3181 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 3182 3183 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc)); 3184 tcg_temp_free_ptr(t_pn); 3185 tcg_temp_free_ptr(t_pg); 3186 } 3187 } 3188 3189 static bool trans_CNTP(DisasContext *s, arg_CNTP *a) 3190 { 3191 if (!dc_isar_feature(aa64_sve, s)) { 3192 return false; 3193 } 3194 if (sve_access_check(s)) { 3195 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg); 3196 } 3197 return true; 3198 } 3199 3200 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a) 3201 { 3202 if (!dc_isar_feature(aa64_sve, s)) { 3203 return false; 3204 } 3205 if (sve_access_check(s)) { 3206 TCGv_i64 reg = cpu_reg(s, a->rd); 3207 TCGv_i64 val = tcg_temp_new_i64(); 3208 3209 do_cntp(s, val, a->esz, a->pg, a->pg); 3210 if (a->d) { 3211 tcg_gen_sub_i64(reg, reg, val); 3212 } else { 3213 tcg_gen_add_i64(reg, reg, val); 3214 } 3215 tcg_temp_free_i64(val); 3216 } 3217 return true; 3218 } 3219 3220 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3221 { 3222 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3223 return false; 3224 } 3225 if (sve_access_check(s)) { 3226 unsigned vsz = vec_full_reg_size(s); 3227 TCGv_i64 val = tcg_temp_new_i64(); 3228 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds; 3229 3230 do_cntp(s, val, a->esz, a->pg, a->pg); 3231 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 3232 vec_full_reg_offset(s, a->rn), val, vsz, vsz); 3233 } 3234 return true; 3235 } 3236 3237 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a) 3238 { 3239 if (!dc_isar_feature(aa64_sve, s)) { 3240 return false; 3241 } 3242 if (sve_access_check(s)) { 3243 TCGv_i64 reg = cpu_reg(s, a->rd); 3244 TCGv_i64 val = tcg_temp_new_i64(); 3245 3246 do_cntp(s, val, a->esz, a->pg, a->pg); 3247 do_sat_addsub_32(reg, val, a->u, a->d); 3248 } 3249 return true; 3250 } 3251 3252 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a) 3253 { 3254 if (!dc_isar_feature(aa64_sve, s)) { 3255 return false; 3256 } 3257 if (sve_access_check(s)) { 3258 TCGv_i64 reg = cpu_reg(s, a->rd); 3259 TCGv_i64 val = tcg_temp_new_i64(); 3260 3261 do_cntp(s, val, a->esz, a->pg, a->pg); 3262 do_sat_addsub_64(reg, val, a->u, a->d); 3263 } 3264 return true; 3265 } 3266 3267 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3268 { 3269 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3270 return false; 3271 } 3272 if (sve_access_check(s)) { 3273 TCGv_i64 val = tcg_temp_new_i64(); 3274 do_cntp(s, val, a->esz, a->pg, a->pg); 3275 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d); 3276 } 3277 return true; 3278 } 3279 3280 /* 3281 *** SVE Integer Compare Scalars Group 3282 */ 3283 3284 static bool trans_CTERM(DisasContext *s, arg_CTERM *a) 3285 { 3286 if (!dc_isar_feature(aa64_sve, s)) { 3287 return false; 3288 } 3289 if (!sve_access_check(s)) { 3290 return true; 3291 } 3292 3293 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ); 3294 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf); 3295 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf); 3296 TCGv_i64 cmp = tcg_temp_new_i64(); 3297 3298 tcg_gen_setcond_i64(cond, cmp, rn, rm); 3299 tcg_gen_extrl_i64_i32(cpu_NF, cmp); 3300 tcg_temp_free_i64(cmp); 3301 3302 /* VF = !NF & !CF. */ 3303 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1); 3304 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF); 3305 3306 /* Both NF and VF actually look at bit 31. */ 3307 tcg_gen_neg_i32(cpu_NF, cpu_NF); 3308 tcg_gen_neg_i32(cpu_VF, cpu_VF); 3309 return true; 3310 } 3311 3312 static bool trans_WHILE(DisasContext *s, arg_WHILE *a) 3313 { 3314 TCGv_i64 op0, op1, t0, t1, tmax; 3315 TCGv_i32 t2; 3316 TCGv_ptr ptr; 3317 unsigned vsz = vec_full_reg_size(s); 3318 unsigned desc = 0; 3319 TCGCond cond; 3320 uint64_t maxval; 3321 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */ 3322 bool eq = a->eq == a->lt; 3323 3324 /* The greater-than conditions are all SVE2. */ 3325 if (a->lt 3326 ? !dc_isar_feature(aa64_sve, s) 3327 : !dc_isar_feature(aa64_sve2, s)) { 3328 return false; 3329 } 3330 if (!sve_access_check(s)) { 3331 return true; 3332 } 3333 3334 op0 = read_cpu_reg(s, a->rn, 1); 3335 op1 = read_cpu_reg(s, a->rm, 1); 3336 3337 if (!a->sf) { 3338 if (a->u) { 3339 tcg_gen_ext32u_i64(op0, op0); 3340 tcg_gen_ext32u_i64(op1, op1); 3341 } else { 3342 tcg_gen_ext32s_i64(op0, op0); 3343 tcg_gen_ext32s_i64(op1, op1); 3344 } 3345 } 3346 3347 /* For the helper, compress the different conditions into a computation 3348 * of how many iterations for which the condition is true. 3349 */ 3350 t0 = tcg_temp_new_i64(); 3351 t1 = tcg_temp_new_i64(); 3352 3353 if (a->lt) { 3354 tcg_gen_sub_i64(t0, op1, op0); 3355 if (a->u) { 3356 maxval = a->sf ? UINT64_MAX : UINT32_MAX; 3357 cond = eq ? TCG_COND_LEU : TCG_COND_LTU; 3358 } else { 3359 maxval = a->sf ? INT64_MAX : INT32_MAX; 3360 cond = eq ? TCG_COND_LE : TCG_COND_LT; 3361 } 3362 } else { 3363 tcg_gen_sub_i64(t0, op0, op1); 3364 if (a->u) { 3365 maxval = 0; 3366 cond = eq ? TCG_COND_GEU : TCG_COND_GTU; 3367 } else { 3368 maxval = a->sf ? INT64_MIN : INT32_MIN; 3369 cond = eq ? TCG_COND_GE : TCG_COND_GT; 3370 } 3371 } 3372 3373 tmax = tcg_constant_i64(vsz >> a->esz); 3374 if (eq) { 3375 /* Equality means one more iteration. */ 3376 tcg_gen_addi_i64(t0, t0, 1); 3377 3378 /* 3379 * For the less-than while, if op1 is maxval (and the only time 3380 * the addition above could overflow), then we produce an all-true 3381 * predicate by setting the count to the vector length. This is 3382 * because the pseudocode is described as an increment + compare 3383 * loop, and the maximum integer would always compare true. 3384 * Similarly, the greater-than while has the same issue with the 3385 * minimum integer due to the decrement + compare loop. 3386 */ 3387 tcg_gen_movi_i64(t1, maxval); 3388 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0); 3389 } 3390 3391 /* Bound to the maximum. */ 3392 tcg_gen_umin_i64(t0, t0, tmax); 3393 3394 /* Set the count to zero if the condition is false. */ 3395 tcg_gen_movi_i64(t1, 0); 3396 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1); 3397 tcg_temp_free_i64(t1); 3398 3399 /* Since we're bounded, pass as a 32-bit type. */ 3400 t2 = tcg_temp_new_i32(); 3401 tcg_gen_extrl_i64_i32(t2, t0); 3402 tcg_temp_free_i64(t0); 3403 3404 /* Scale elements to bits. */ 3405 tcg_gen_shli_i32(t2, t2, a->esz); 3406 3407 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3408 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3409 3410 ptr = tcg_temp_new_ptr(); 3411 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd)); 3412 3413 if (a->lt) { 3414 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); 3415 } else { 3416 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc)); 3417 } 3418 do_pred_flags(t2); 3419 3420 tcg_temp_free_ptr(ptr); 3421 tcg_temp_free_i32(t2); 3422 return true; 3423 } 3424 3425 static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) 3426 { 3427 TCGv_i64 op0, op1, diff, t1, tmax; 3428 TCGv_i32 t2; 3429 TCGv_ptr ptr; 3430 unsigned vsz = vec_full_reg_size(s); 3431 unsigned desc = 0; 3432 3433 if (!dc_isar_feature(aa64_sve2, s)) { 3434 return false; 3435 } 3436 if (!sve_access_check(s)) { 3437 return true; 3438 } 3439 3440 op0 = read_cpu_reg(s, a->rn, 1); 3441 op1 = read_cpu_reg(s, a->rm, 1); 3442 3443 tmax = tcg_constant_i64(vsz); 3444 diff = tcg_temp_new_i64(); 3445 3446 if (a->rw) { 3447 /* WHILERW */ 3448 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */ 3449 t1 = tcg_temp_new_i64(); 3450 tcg_gen_sub_i64(diff, op0, op1); 3451 tcg_gen_sub_i64(t1, op1, op0); 3452 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1); 3453 tcg_temp_free_i64(t1); 3454 /* Round down to a multiple of ESIZE. */ 3455 tcg_gen_andi_i64(diff, diff, -1 << a->esz); 3456 /* If op1 == op0, diff == 0, and the condition is always true. */ 3457 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff); 3458 } else { 3459 /* WHILEWR */ 3460 tcg_gen_sub_i64(diff, op1, op0); 3461 /* Round down to a multiple of ESIZE. */ 3462 tcg_gen_andi_i64(diff, diff, -1 << a->esz); 3463 /* If op0 >= op1, diff <= 0, the condition is always true. */ 3464 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff); 3465 } 3466 3467 /* Bound to the maximum. */ 3468 tcg_gen_umin_i64(diff, diff, tmax); 3469 3470 /* Since we're bounded, pass as a 32-bit type. */ 3471 t2 = tcg_temp_new_i32(); 3472 tcg_gen_extrl_i64_i32(t2, diff); 3473 tcg_temp_free_i64(diff); 3474 3475 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3476 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3477 3478 ptr = tcg_temp_new_ptr(); 3479 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd)); 3480 3481 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); 3482 do_pred_flags(t2); 3483 3484 tcg_temp_free_ptr(ptr); 3485 tcg_temp_free_i32(t2); 3486 return true; 3487 } 3488 3489 /* 3490 *** SVE Integer Wide Immediate - Unpredicated Group 3491 */ 3492 3493 static bool trans_FDUP(DisasContext *s, arg_FDUP *a) 3494 { 3495 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3496 return false; 3497 } 3498 if (sve_access_check(s)) { 3499 unsigned vsz = vec_full_reg_size(s); 3500 int dofs = vec_full_reg_offset(s, a->rd); 3501 uint64_t imm; 3502 3503 /* Decode the VFP immediate. */ 3504 imm = vfp_expand_imm(a->esz, a->imm); 3505 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm); 3506 } 3507 return true; 3508 } 3509 3510 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a) 3511 { 3512 if (!dc_isar_feature(aa64_sve, s)) { 3513 return false; 3514 } 3515 if (sve_access_check(s)) { 3516 unsigned vsz = vec_full_reg_size(s); 3517 int dofs = vec_full_reg_offset(s, a->rd); 3518 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm); 3519 } 3520 return true; 3521 } 3522 3523 TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a) 3524 3525 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a) 3526 { 3527 a->imm = -a->imm; 3528 return trans_ADD_zzi(s, a); 3529 } 3530 3531 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a) 3532 { 3533 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 }; 3534 static const GVecGen2s op[4] = { 3535 { .fni8 = tcg_gen_vec_sub8_i64, 3536 .fniv = tcg_gen_sub_vec, 3537 .fno = gen_helper_sve_subri_b, 3538 .opt_opc = vecop_list, 3539 .vece = MO_8, 3540 .scalar_first = true }, 3541 { .fni8 = tcg_gen_vec_sub16_i64, 3542 .fniv = tcg_gen_sub_vec, 3543 .fno = gen_helper_sve_subri_h, 3544 .opt_opc = vecop_list, 3545 .vece = MO_16, 3546 .scalar_first = true }, 3547 { .fni4 = tcg_gen_sub_i32, 3548 .fniv = tcg_gen_sub_vec, 3549 .fno = gen_helper_sve_subri_s, 3550 .opt_opc = vecop_list, 3551 .vece = MO_32, 3552 .scalar_first = true }, 3553 { .fni8 = tcg_gen_sub_i64, 3554 .fniv = tcg_gen_sub_vec, 3555 .fno = gen_helper_sve_subri_d, 3556 .opt_opc = vecop_list, 3557 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 3558 .vece = MO_64, 3559 .scalar_first = true } 3560 }; 3561 3562 if (!dc_isar_feature(aa64_sve, s)) { 3563 return false; 3564 } 3565 if (sve_access_check(s)) { 3566 unsigned vsz = vec_full_reg_size(s); 3567 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd), 3568 vec_full_reg_offset(s, a->rn), 3569 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]); 3570 } 3571 return true; 3572 } 3573 3574 TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a) 3575 3576 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d) 3577 { 3578 if (sve_access_check(s)) { 3579 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 3580 tcg_constant_i64(a->imm), u, d); 3581 } 3582 return true; 3583 } 3584 3585 TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false) 3586 TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false) 3587 TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true) 3588 TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true) 3589 3590 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn) 3591 { 3592 if (sve_access_check(s)) { 3593 unsigned vsz = vec_full_reg_size(s); 3594 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 3595 vec_full_reg_offset(s, a->rn), 3596 tcg_constant_i64(a->imm), vsz, vsz, 0, fn); 3597 } 3598 return true; 3599 } 3600 3601 #define DO_ZZI(NAME, name) \ 3602 static gen_helper_gvec_2i * const name##i_fns[4] = { \ 3603 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \ 3604 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \ 3605 }; \ 3606 TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz]) 3607 3608 DO_ZZI(SMAX, smax) 3609 DO_ZZI(UMAX, umax) 3610 DO_ZZI(SMIN, smin) 3611 DO_ZZI(UMIN, umin) 3612 3613 #undef DO_ZZI 3614 3615 static gen_helper_gvec_4 * const dot_fns[2][2] = { 3616 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h }, 3617 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h } 3618 }; 3619 TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz, 3620 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0) 3621 3622 /* 3623 * SVE Multiply - Indexed 3624 */ 3625 3626 TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, 3627 gen_helper_gvec_sdot_idx_b, a) 3628 TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, 3629 gen_helper_gvec_sdot_idx_h, a) 3630 TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, 3631 gen_helper_gvec_udot_idx_b, a) 3632 TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, 3633 gen_helper_gvec_udot_idx_h, a) 3634 3635 TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3636 gen_helper_gvec_sudot_idx_b, a) 3637 TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3638 gen_helper_gvec_usdot_idx_b, a) 3639 3640 #define DO_SVE2_RRX(NAME, FUNC) \ 3641 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3642 a->rd, a->rn, a->rm, a->index) 3643 3644 DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h) 3645 DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s) 3646 DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d) 3647 3648 DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h) 3649 DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s) 3650 DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d) 3651 3652 DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h) 3653 DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s) 3654 DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d) 3655 3656 #undef DO_SVE2_RRX 3657 3658 #define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \ 3659 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3660 a->rd, a->rn, a->rm, (a->index << 1) | TOP) 3661 3662 DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false) 3663 DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false) 3664 DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true) 3665 DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true) 3666 3667 DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false) 3668 DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false) 3669 DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true) 3670 DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true) 3671 3672 DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false) 3673 DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false) 3674 DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true) 3675 DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true) 3676 3677 #undef DO_SVE2_RRX_TB 3678 3679 #define DO_SVE2_RRXR(NAME, FUNC) \ 3680 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a) 3681 3682 DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h) 3683 DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s) 3684 DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d) 3685 3686 DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h) 3687 DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s) 3688 DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d) 3689 3690 DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h) 3691 DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s) 3692 DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d) 3693 3694 DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h) 3695 DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s) 3696 DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d) 3697 3698 #undef DO_SVE2_RRXR 3699 3700 #define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \ 3701 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3702 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP) 3703 3704 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false) 3705 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false) 3706 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true) 3707 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true) 3708 3709 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false) 3710 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false) 3711 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true) 3712 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true) 3713 3714 DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false) 3715 DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false) 3716 DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true) 3717 DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true) 3718 3719 DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false) 3720 DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false) 3721 DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true) 3722 DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true) 3723 3724 DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false) 3725 DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false) 3726 DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true) 3727 DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true) 3728 3729 DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false) 3730 DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false) 3731 DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true) 3732 DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true) 3733 3734 #undef DO_SVE2_RRXR_TB 3735 3736 #define DO_SVE2_RRXR_ROT(NAME, FUNC) \ 3737 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3738 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot) 3739 3740 DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h) 3741 DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s) 3742 3743 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h) 3744 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s) 3745 3746 DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s) 3747 DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) 3748 3749 #undef DO_SVE2_RRXR_ROT 3750 3751 /* 3752 *** SVE Floating Point Multiply-Add Indexed Group 3753 */ 3754 3755 static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) 3756 { 3757 static gen_helper_gvec_4_ptr * const fns[4] = { 3758 NULL, 3759 gen_helper_gvec_fmla_idx_h, 3760 gen_helper_gvec_fmla_idx_s, 3761 gen_helper_gvec_fmla_idx_d, 3762 }; 3763 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, 3764 (a->index << 1) | sub, 3765 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3766 } 3767 3768 TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false) 3769 TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true) 3770 3771 /* 3772 *** SVE Floating Point Multiply Indexed Group 3773 */ 3774 3775 static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = { 3776 NULL, gen_helper_gvec_fmul_idx_h, 3777 gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d, 3778 }; 3779 TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz, 3780 fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index, 3781 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3782 3783 /* 3784 *** SVE Floating Point Fast Reduction Group 3785 */ 3786 3787 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr, 3788 TCGv_ptr, TCGv_i32); 3789 3790 static bool do_reduce(DisasContext *s, arg_rpr_esz *a, 3791 gen_helper_fp_reduce *fn) 3792 { 3793 unsigned vsz, p2vsz; 3794 TCGv_i32 t_desc; 3795 TCGv_ptr t_zn, t_pg, status; 3796 TCGv_i64 temp; 3797 3798 if (fn == NULL) { 3799 return false; 3800 } 3801 if (!sve_access_check(s)) { 3802 return true; 3803 } 3804 3805 vsz = vec_full_reg_size(s); 3806 p2vsz = pow2ceil(vsz); 3807 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz)); 3808 temp = tcg_temp_new_i64(); 3809 t_zn = tcg_temp_new_ptr(); 3810 t_pg = tcg_temp_new_ptr(); 3811 3812 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); 3813 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); 3814 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3815 3816 fn(temp, t_zn, t_pg, status, t_desc); 3817 tcg_temp_free_ptr(t_zn); 3818 tcg_temp_free_ptr(t_pg); 3819 tcg_temp_free_ptr(status); 3820 3821 write_fp_dreg(s, a->rd, temp); 3822 tcg_temp_free_i64(temp); 3823 return true; 3824 } 3825 3826 #define DO_VPZ(NAME, name) \ 3827 static gen_helper_fp_reduce * const name##_fns[4] = { \ 3828 NULL, gen_helper_sve_##name##_h, \ 3829 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3830 }; \ 3831 TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz]) 3832 3833 DO_VPZ(FADDV, faddv) 3834 DO_VPZ(FMINNMV, fminnmv) 3835 DO_VPZ(FMAXNMV, fmaxnmv) 3836 DO_VPZ(FMINV, fminv) 3837 DO_VPZ(FMAXV, fmaxv) 3838 3839 #undef DO_VPZ 3840 3841 /* 3842 *** SVE Floating Point Unary Operations - Unpredicated Group 3843 */ 3844 3845 static gen_helper_gvec_2_ptr * const frecpe_fns[] = { 3846 NULL, gen_helper_gvec_frecpe_h, 3847 gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, 3848 }; 3849 TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0) 3850 3851 static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { 3852 NULL, gen_helper_gvec_frsqrte_h, 3853 gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, 3854 }; 3855 TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0) 3856 3857 /* 3858 *** SVE Floating Point Compare with Zero Group 3859 */ 3860 3861 static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a, 3862 gen_helper_gvec_3_ptr *fn) 3863 { 3864 if (fn == NULL) { 3865 return false; 3866 } 3867 if (sve_access_check(s)) { 3868 unsigned vsz = vec_full_reg_size(s); 3869 TCGv_ptr status = 3870 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3871 3872 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd), 3873 vec_full_reg_offset(s, a->rn), 3874 pred_full_reg_offset(s, a->pg), 3875 status, vsz, vsz, 0, fn); 3876 tcg_temp_free_ptr(status); 3877 } 3878 return true; 3879 } 3880 3881 #define DO_PPZ(NAME, name) \ 3882 static gen_helper_gvec_3_ptr * const name##_fns[] = { \ 3883 NULL, gen_helper_sve_##name##_h, \ 3884 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3885 }; \ 3886 TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz]) 3887 3888 DO_PPZ(FCMGE_ppz0, fcmge0) 3889 DO_PPZ(FCMGT_ppz0, fcmgt0) 3890 DO_PPZ(FCMLE_ppz0, fcmle0) 3891 DO_PPZ(FCMLT_ppz0, fcmlt0) 3892 DO_PPZ(FCMEQ_ppz0, fcmeq0) 3893 DO_PPZ(FCMNE_ppz0, fcmne0) 3894 3895 #undef DO_PPZ 3896 3897 /* 3898 *** SVE floating-point trig multiply-add coefficient 3899 */ 3900 3901 static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { 3902 NULL, gen_helper_sve_ftmad_h, 3903 gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d, 3904 }; 3905 TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, 3906 ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm, 3907 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3908 3909 /* 3910 *** SVE Floating Point Accumulating Reduction Group 3911 */ 3912 3913 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) 3914 { 3915 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr, 3916 TCGv_ptr, TCGv_ptr, TCGv_i32); 3917 static fadda_fn * const fns[3] = { 3918 gen_helper_sve_fadda_h, 3919 gen_helper_sve_fadda_s, 3920 gen_helper_sve_fadda_d, 3921 }; 3922 unsigned vsz = vec_full_reg_size(s); 3923 TCGv_ptr t_rm, t_pg, t_fpst; 3924 TCGv_i64 t_val; 3925 TCGv_i32 t_desc; 3926 3927 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3928 return false; 3929 } 3930 s->is_nonstreaming = true; 3931 if (!sve_access_check(s)) { 3932 return true; 3933 } 3934 3935 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz); 3936 t_rm = tcg_temp_new_ptr(); 3937 t_pg = tcg_temp_new_ptr(); 3938 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm)); 3939 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); 3940 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3941 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 3942 3943 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc); 3944 3945 tcg_temp_free_ptr(t_fpst); 3946 tcg_temp_free_ptr(t_pg); 3947 tcg_temp_free_ptr(t_rm); 3948 3949 write_fp_dreg(s, a->rd, t_val); 3950 tcg_temp_free_i64(t_val); 3951 return true; 3952 } 3953 3954 /* 3955 *** SVE Floating Point Arithmetic - Unpredicated Group 3956 */ 3957 3958 #define DO_FP3(NAME, name) \ 3959 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ 3960 NULL, gen_helper_gvec_##name##_h, \ 3961 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ 3962 }; \ 3963 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0) 3964 3965 DO_FP3(FADD_zzz, fadd) 3966 DO_FP3(FSUB_zzz, fsub) 3967 DO_FP3(FMUL_zzz, fmul) 3968 DO_FP3(FRECPS, recps) 3969 DO_FP3(FRSQRTS, rsqrts) 3970 3971 #undef DO_FP3 3972 3973 static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = { 3974 NULL, gen_helper_gvec_ftsmul_h, 3975 gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d 3976 }; 3977 TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, 3978 ftsmul_fns[a->esz], a, 0) 3979 3980 /* 3981 *** SVE Floating Point Arithmetic - Predicated Group 3982 */ 3983 3984 #define DO_ZPZZ_FP(NAME, FEAT, name) \ 3985 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ 3986 NULL, gen_helper_##name##_h, \ 3987 gen_helper_##name##_s, gen_helper_##name##_d \ 3988 }; \ 3989 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) 3990 3991 DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd) 3992 DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub) 3993 DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul) 3994 DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin) 3995 DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax) 3996 DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) 3997 DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) 3998 DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) 3999 DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn) 4000 DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv) 4001 DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx) 4002 4003 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr, 4004 TCGv_i64, TCGv_ptr, TCGv_i32); 4005 4006 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16, 4007 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn) 4008 { 4009 unsigned vsz = vec_full_reg_size(s); 4010 TCGv_ptr t_zd, t_zn, t_pg, status; 4011 TCGv_i32 desc; 4012 4013 t_zd = tcg_temp_new_ptr(); 4014 t_zn = tcg_temp_new_ptr(); 4015 t_pg = tcg_temp_new_ptr(); 4016 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd)); 4017 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn)); 4018 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 4019 4020 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 4021 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 4022 fn(t_zd, t_zn, t_pg, scalar, status, desc); 4023 4024 tcg_temp_free_ptr(status); 4025 tcg_temp_free_ptr(t_pg); 4026 tcg_temp_free_ptr(t_zn); 4027 tcg_temp_free_ptr(t_zd); 4028 } 4029 4030 static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm, 4031 gen_helper_sve_fp2scalar *fn) 4032 { 4033 if (fn == NULL) { 4034 return false; 4035 } 4036 if (sve_access_check(s)) { 4037 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, 4038 tcg_constant_i64(imm), fn); 4039 } 4040 return true; 4041 } 4042 4043 #define DO_FP_IMM(NAME, name, const0, const1) \ 4044 static gen_helper_sve_fp2scalar * const name##_fns[4] = { \ 4045 NULL, gen_helper_sve_##name##_h, \ 4046 gen_helper_sve_##name##_s, \ 4047 gen_helper_sve_##name##_d \ 4048 }; \ 4049 static uint64_t const name##_const[4][2] = { \ 4050 { -1, -1 }, \ 4051 { float16_##const0, float16_##const1 }, \ 4052 { float32_##const0, float32_##const1 }, \ 4053 { float64_##const0, float64_##const1 }, \ 4054 }; \ 4055 TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ 4056 name##_const[a->esz][a->imm], name##_fns[a->esz]) 4057 4058 DO_FP_IMM(FADD, fadds, half, one) 4059 DO_FP_IMM(FSUB, fsubs, half, one) 4060 DO_FP_IMM(FMUL, fmuls, half, two) 4061 DO_FP_IMM(FSUBR, fsubrs, half, one) 4062 DO_FP_IMM(FMAXNM, fmaxnms, zero, one) 4063 DO_FP_IMM(FMINNM, fminnms, zero, one) 4064 DO_FP_IMM(FMAX, fmaxs, zero, one) 4065 DO_FP_IMM(FMIN, fmins, zero, one) 4066 4067 #undef DO_FP_IMM 4068 4069 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a, 4070 gen_helper_gvec_4_ptr *fn) 4071 { 4072 if (fn == NULL) { 4073 return false; 4074 } 4075 if (sve_access_check(s)) { 4076 unsigned vsz = vec_full_reg_size(s); 4077 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4078 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd), 4079 vec_full_reg_offset(s, a->rn), 4080 vec_full_reg_offset(s, a->rm), 4081 pred_full_reg_offset(s, a->pg), 4082 status, vsz, vsz, 0, fn); 4083 tcg_temp_free_ptr(status); 4084 } 4085 return true; 4086 } 4087 4088 #define DO_FPCMP(NAME, name) \ 4089 static gen_helper_gvec_4_ptr * const name##_fns[4] = { \ 4090 NULL, gen_helper_sve_##name##_h, \ 4091 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 4092 }; \ 4093 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz]) 4094 4095 DO_FPCMP(FCMGE, fcmge) 4096 DO_FPCMP(FCMGT, fcmgt) 4097 DO_FPCMP(FCMEQ, fcmeq) 4098 DO_FPCMP(FCMNE, fcmne) 4099 DO_FPCMP(FCMUO, fcmuo) 4100 DO_FPCMP(FACGE, facge) 4101 DO_FPCMP(FACGT, facgt) 4102 4103 #undef DO_FPCMP 4104 4105 static gen_helper_gvec_4_ptr * const fcadd_fns[] = { 4106 NULL, gen_helper_sve_fcadd_h, 4107 gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d, 4108 }; 4109 TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], 4110 a->rd, a->rn, a->rm, a->pg, a->rot, 4111 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4112 4113 #define DO_FMLA(NAME, name) \ 4114 static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ 4115 NULL, gen_helper_sve_##name##_h, \ 4116 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 4117 }; \ 4118 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \ 4119 a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ 4120 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4121 4122 DO_FMLA(FMLA_zpzzz, fmla_zpzzz) 4123 DO_FMLA(FMLS_zpzzz, fmls_zpzzz) 4124 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz) 4125 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz) 4126 4127 #undef DO_FMLA 4128 4129 static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { 4130 NULL, gen_helper_sve_fcmla_zpzzz_h, 4131 gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, 4132 }; 4133 TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], 4134 a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, 4135 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4136 4137 static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { 4138 NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL 4139 }; 4140 TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz], 4141 a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot, 4142 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4143 4144 /* 4145 *** SVE Floating Point Unary Operations Predicated Group 4146 */ 4147 4148 TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4149 gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR) 4150 TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4151 gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR) 4152 4153 TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 4154 gen_helper_sve_bfcvt, a, 0, FPST_FPCR) 4155 4156 TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4157 gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR) 4158 TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4159 gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR) 4160 TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4161 gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR) 4162 TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4163 gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR) 4164 4165 TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4166 gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16) 4167 TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4168 gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16) 4169 TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4170 gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16) 4171 TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4172 gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16) 4173 TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4174 gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16) 4175 TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4176 gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16) 4177 4178 TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4179 gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR) 4180 TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4181 gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR) 4182 TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4183 gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR) 4184 TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4185 gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR) 4186 TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4187 gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR) 4188 TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4189 gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR) 4190 4191 TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4192 gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR) 4193 TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4194 gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR) 4195 4196 static gen_helper_gvec_3_ptr * const frint_fns[] = { 4197 NULL, 4198 gen_helper_sve_frint_h, 4199 gen_helper_sve_frint_s, 4200 gen_helper_sve_frint_d 4201 }; 4202 TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz], 4203 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4204 4205 static gen_helper_gvec_3_ptr * const frintx_fns[] = { 4206 NULL, 4207 gen_helper_sve_frintx_h, 4208 gen_helper_sve_frintx_s, 4209 gen_helper_sve_frintx_d 4210 }; 4211 TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz], 4212 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4213 4214 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, 4215 int mode, gen_helper_gvec_3_ptr *fn) 4216 { 4217 unsigned vsz; 4218 TCGv_i32 tmode; 4219 TCGv_ptr status; 4220 4221 if (fn == NULL) { 4222 return false; 4223 } 4224 if (!sve_access_check(s)) { 4225 return true; 4226 } 4227 4228 vsz = vec_full_reg_size(s); 4229 tmode = tcg_const_i32(mode); 4230 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4231 4232 gen_helper_set_rmode(tmode, tmode, status); 4233 4234 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 4235 vec_full_reg_offset(s, a->rn), 4236 pred_full_reg_offset(s, a->pg), 4237 status, vsz, vsz, 0, fn); 4238 4239 gen_helper_set_rmode(tmode, tmode, status); 4240 tcg_temp_free_i32(tmode); 4241 tcg_temp_free_ptr(status); 4242 return true; 4243 } 4244 4245 TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a, 4246 float_round_nearest_even, frint_fns[a->esz]) 4247 TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a, 4248 float_round_up, frint_fns[a->esz]) 4249 TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a, 4250 float_round_down, frint_fns[a->esz]) 4251 TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a, 4252 float_round_to_zero, frint_fns[a->esz]) 4253 TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a, 4254 float_round_ties_away, frint_fns[a->esz]) 4255 4256 static gen_helper_gvec_3_ptr * const frecpx_fns[] = { 4257 NULL, gen_helper_sve_frecpx_h, 4258 gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d, 4259 }; 4260 TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz], 4261 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4262 4263 static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { 4264 NULL, gen_helper_sve_fsqrt_h, 4265 gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d, 4266 }; 4267 TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz], 4268 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4269 4270 TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4271 gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16) 4272 TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4273 gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16) 4274 TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4275 gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16) 4276 4277 TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4278 gen_helper_sve_scvt_ss, a, 0, FPST_FPCR) 4279 TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4280 gen_helper_sve_scvt_ds, a, 0, FPST_FPCR) 4281 4282 TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4283 gen_helper_sve_scvt_sd, a, 0, FPST_FPCR) 4284 TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4285 gen_helper_sve_scvt_dd, a, 0, FPST_FPCR) 4286 4287 TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4288 gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16) 4289 TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4290 gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16) 4291 TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4292 gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16) 4293 4294 TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4295 gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR) 4296 TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4297 gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR) 4298 TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4299 gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR) 4300 4301 TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4302 gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR) 4303 4304 /* 4305 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group 4306 */ 4307 4308 /* Subroutine loading a vector register at VOFS of LEN bytes. 4309 * The load should begin at the address Rn + IMM. 4310 */ 4311 4312 void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, 4313 int len, int rn, int imm) 4314 { 4315 int len_align = QEMU_ALIGN_DOWN(len, 8); 4316 int len_remain = len % 8; 4317 int nparts = len / 8 + ctpop8(len_remain); 4318 int midx = get_mem_index(s); 4319 TCGv_i64 dirty_addr, clean_addr, t0, t1; 4320 4321 dirty_addr = tcg_temp_new_i64(); 4322 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4323 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len); 4324 tcg_temp_free_i64(dirty_addr); 4325 4326 /* 4327 * Note that unpredicated load/store of vector/predicate registers 4328 * are defined as a stream of bytes, which equates to little-endian 4329 * operations on larger quantities. 4330 * Attempt to keep code expansion to a minimum by limiting the 4331 * amount of unrolling done. 4332 */ 4333 if (nparts <= 4) { 4334 int i; 4335 4336 t0 = tcg_temp_new_i64(); 4337 for (i = 0; i < len_align; i += 8) { 4338 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ); 4339 tcg_gen_st_i64(t0, base, vofs + i); 4340 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4341 } 4342 tcg_temp_free_i64(t0); 4343 } else { 4344 TCGLabel *loop = gen_new_label(); 4345 TCGv_ptr tp, i = tcg_const_ptr(0); 4346 4347 gen_set_label(loop); 4348 4349 t0 = tcg_temp_new_i64(); 4350 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ); 4351 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4352 4353 tp = tcg_temp_new_ptr(); 4354 tcg_gen_add_ptr(tp, base, i); 4355 tcg_gen_addi_ptr(i, i, 8); 4356 tcg_gen_st_i64(t0, tp, vofs); 4357 tcg_temp_free_ptr(tp); 4358 tcg_temp_free_i64(t0); 4359 4360 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4361 tcg_temp_free_ptr(i); 4362 } 4363 4364 /* 4365 * Predicate register loads can be any multiple of 2. 4366 * Note that we still store the entire 64-bit unit into cpu_env. 4367 */ 4368 if (len_remain) { 4369 t0 = tcg_temp_new_i64(); 4370 switch (len_remain) { 4371 case 2: 4372 case 4: 4373 case 8: 4374 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, 4375 MO_LE | ctz32(len_remain)); 4376 break; 4377 4378 case 6: 4379 t1 = tcg_temp_new_i64(); 4380 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL); 4381 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4382 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW); 4383 tcg_gen_deposit_i64(t0, t0, t1, 32, 32); 4384 tcg_temp_free_i64(t1); 4385 break; 4386 4387 default: 4388 g_assert_not_reached(); 4389 } 4390 tcg_gen_st_i64(t0, base, vofs + len_align); 4391 tcg_temp_free_i64(t0); 4392 } 4393 } 4394 4395 /* Similarly for stores. */ 4396 void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, 4397 int len, int rn, int imm) 4398 { 4399 int len_align = QEMU_ALIGN_DOWN(len, 8); 4400 int len_remain = len % 8; 4401 int nparts = len / 8 + ctpop8(len_remain); 4402 int midx = get_mem_index(s); 4403 TCGv_i64 dirty_addr, clean_addr, t0; 4404 4405 dirty_addr = tcg_temp_new_i64(); 4406 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4407 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len); 4408 tcg_temp_free_i64(dirty_addr); 4409 4410 /* Note that unpredicated load/store of vector/predicate registers 4411 * are defined as a stream of bytes, which equates to little-endian 4412 * operations on larger quantities. There is no nice way to force 4413 * a little-endian store for aarch64_be-linux-user out of line. 4414 * 4415 * Attempt to keep code expansion to a minimum by limiting the 4416 * amount of unrolling done. 4417 */ 4418 if (nparts <= 4) { 4419 int i; 4420 4421 t0 = tcg_temp_new_i64(); 4422 for (i = 0; i < len_align; i += 8) { 4423 tcg_gen_ld_i64(t0, base, vofs + i); 4424 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ); 4425 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4426 } 4427 tcg_temp_free_i64(t0); 4428 } else { 4429 TCGLabel *loop = gen_new_label(); 4430 TCGv_ptr tp, i = tcg_const_ptr(0); 4431 4432 gen_set_label(loop); 4433 4434 t0 = tcg_temp_new_i64(); 4435 tp = tcg_temp_new_ptr(); 4436 tcg_gen_add_ptr(tp, base, i); 4437 tcg_gen_ld_i64(t0, tp, vofs); 4438 tcg_gen_addi_ptr(i, i, 8); 4439 tcg_temp_free_ptr(tp); 4440 4441 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ); 4442 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4443 tcg_temp_free_i64(t0); 4444 4445 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4446 tcg_temp_free_ptr(i); 4447 } 4448 4449 /* Predicate register stores can be any multiple of 2. */ 4450 if (len_remain) { 4451 t0 = tcg_temp_new_i64(); 4452 tcg_gen_ld_i64(t0, base, vofs + len_align); 4453 4454 switch (len_remain) { 4455 case 2: 4456 case 4: 4457 case 8: 4458 tcg_gen_qemu_st_i64(t0, clean_addr, midx, 4459 MO_LE | ctz32(len_remain)); 4460 break; 4461 4462 case 6: 4463 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL); 4464 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4465 tcg_gen_shri_i64(t0, t0, 32); 4466 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW); 4467 break; 4468 4469 default: 4470 g_assert_not_reached(); 4471 } 4472 tcg_temp_free_i64(t0); 4473 } 4474 } 4475 4476 static bool trans_LDR_zri(DisasContext *s, arg_rri *a) 4477 { 4478 if (!dc_isar_feature(aa64_sve, s)) { 4479 return false; 4480 } 4481 if (sve_access_check(s)) { 4482 int size = vec_full_reg_size(s); 4483 int off = vec_full_reg_offset(s, a->rd); 4484 gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size); 4485 } 4486 return true; 4487 } 4488 4489 static bool trans_LDR_pri(DisasContext *s, arg_rri *a) 4490 { 4491 if (!dc_isar_feature(aa64_sve, s)) { 4492 return false; 4493 } 4494 if (sve_access_check(s)) { 4495 int size = pred_full_reg_size(s); 4496 int off = pred_full_reg_offset(s, a->rd); 4497 gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size); 4498 } 4499 return true; 4500 } 4501 4502 static bool trans_STR_zri(DisasContext *s, arg_rri *a) 4503 { 4504 if (!dc_isar_feature(aa64_sve, s)) { 4505 return false; 4506 } 4507 if (sve_access_check(s)) { 4508 int size = vec_full_reg_size(s); 4509 int off = vec_full_reg_offset(s, a->rd); 4510 gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size); 4511 } 4512 return true; 4513 } 4514 4515 static bool trans_STR_pri(DisasContext *s, arg_rri *a) 4516 { 4517 if (!dc_isar_feature(aa64_sve, s)) { 4518 return false; 4519 } 4520 if (sve_access_check(s)) { 4521 int size = pred_full_reg_size(s); 4522 int off = pred_full_reg_offset(s, a->rd); 4523 gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size); 4524 } 4525 return true; 4526 } 4527 4528 /* 4529 *** SVE Memory - Contiguous Load Group 4530 */ 4531 4532 /* The memory mode of the dtype. */ 4533 static const MemOp dtype_mop[16] = { 4534 MO_UB, MO_UB, MO_UB, MO_UB, 4535 MO_SL, MO_UW, MO_UW, MO_UW, 4536 MO_SW, MO_SW, MO_UL, MO_UL, 4537 MO_SB, MO_SB, MO_SB, MO_UQ 4538 }; 4539 4540 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE) 4541 4542 /* The vector element size of dtype. */ 4543 static const uint8_t dtype_esz[16] = { 4544 0, 1, 2, 3, 4545 3, 1, 2, 3, 4546 3, 2, 2, 3, 4547 3, 2, 1, 3 4548 }; 4549 4550 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 4551 int dtype, uint32_t mte_n, bool is_write, 4552 gen_helper_gvec_mem *fn) 4553 { 4554 unsigned vsz = vec_full_reg_size(s); 4555 TCGv_ptr t_pg; 4556 int desc = 0; 4557 4558 /* 4559 * For e.g. LD4, there are not enough arguments to pass all 4 4560 * registers as pointers, so encode the regno into the data field. 4561 * For consistency, do this even for LD1. 4562 */ 4563 if (s->mte_active[0]) { 4564 int msz = dtype_msz(dtype); 4565 4566 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 4567 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4568 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4569 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 4570 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1); 4571 desc <<= SVE_MTEDESC_SHIFT; 4572 } else { 4573 addr = clean_data_tbi(s, addr); 4574 } 4575 4576 desc = simd_desc(vsz, vsz, zt | desc); 4577 t_pg = tcg_temp_new_ptr(); 4578 4579 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 4580 fn(cpu_env, t_pg, addr, tcg_constant_i32(desc)); 4581 4582 tcg_temp_free_ptr(t_pg); 4583 } 4584 4585 /* Indexed by [mte][be][dtype][nreg] */ 4586 static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = { 4587 { /* mte inactive, little-endian */ 4588 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4589 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4590 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4591 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4592 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4593 4594 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL }, 4595 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r, 4596 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r }, 4597 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL }, 4598 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL }, 4599 4600 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL }, 4601 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL }, 4602 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r, 4603 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r }, 4604 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL }, 4605 4606 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4607 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4608 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4609 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, 4610 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, 4611 4612 /* mte inactive, big-endian */ 4613 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4614 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4615 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4616 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4617 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4618 4619 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL }, 4620 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r, 4621 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r }, 4622 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL }, 4623 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL }, 4624 4625 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL }, 4626 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL }, 4627 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r, 4628 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r }, 4629 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL }, 4630 4631 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4632 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4633 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4634 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, 4635 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } }, 4636 4637 { /* mte active, little-endian */ 4638 { { gen_helper_sve_ld1bb_r_mte, 4639 gen_helper_sve_ld2bb_r_mte, 4640 gen_helper_sve_ld3bb_r_mte, 4641 gen_helper_sve_ld4bb_r_mte }, 4642 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 4643 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 4644 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 4645 4646 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL }, 4647 { gen_helper_sve_ld1hh_le_r_mte, 4648 gen_helper_sve_ld2hh_le_r_mte, 4649 gen_helper_sve_ld3hh_le_r_mte, 4650 gen_helper_sve_ld4hh_le_r_mte }, 4651 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL }, 4652 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL }, 4653 4654 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL }, 4655 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL }, 4656 { gen_helper_sve_ld1ss_le_r_mte, 4657 gen_helper_sve_ld2ss_le_r_mte, 4658 gen_helper_sve_ld3ss_le_r_mte, 4659 gen_helper_sve_ld4ss_le_r_mte }, 4660 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL }, 4661 4662 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 4663 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 4664 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 4665 { gen_helper_sve_ld1dd_le_r_mte, 4666 gen_helper_sve_ld2dd_le_r_mte, 4667 gen_helper_sve_ld3dd_le_r_mte, 4668 gen_helper_sve_ld4dd_le_r_mte } }, 4669 4670 /* mte active, big-endian */ 4671 { { gen_helper_sve_ld1bb_r_mte, 4672 gen_helper_sve_ld2bb_r_mte, 4673 gen_helper_sve_ld3bb_r_mte, 4674 gen_helper_sve_ld4bb_r_mte }, 4675 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 4676 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 4677 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 4678 4679 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL }, 4680 { gen_helper_sve_ld1hh_be_r_mte, 4681 gen_helper_sve_ld2hh_be_r_mte, 4682 gen_helper_sve_ld3hh_be_r_mte, 4683 gen_helper_sve_ld4hh_be_r_mte }, 4684 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL }, 4685 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL }, 4686 4687 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL }, 4688 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL }, 4689 { gen_helper_sve_ld1ss_be_r_mte, 4690 gen_helper_sve_ld2ss_be_r_mte, 4691 gen_helper_sve_ld3ss_be_r_mte, 4692 gen_helper_sve_ld4ss_be_r_mte }, 4693 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL }, 4694 4695 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 4696 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 4697 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 4698 { gen_helper_sve_ld1dd_be_r_mte, 4699 gen_helper_sve_ld2dd_be_r_mte, 4700 gen_helper_sve_ld3dd_be_r_mte, 4701 gen_helper_sve_ld4dd_be_r_mte } } }, 4702 }; 4703 4704 static void do_ld_zpa(DisasContext *s, int zt, int pg, 4705 TCGv_i64 addr, int dtype, int nreg) 4706 { 4707 gen_helper_gvec_mem *fn 4708 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg]; 4709 4710 /* 4711 * While there are holes in the table, they are not 4712 * accessible via the instruction encoding. 4713 */ 4714 assert(fn != NULL); 4715 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn); 4716 } 4717 4718 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) 4719 { 4720 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 4721 return false; 4722 } 4723 if (sve_access_check(s)) { 4724 TCGv_i64 addr = new_tmp_a64(s); 4725 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4726 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4727 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 4728 } 4729 return true; 4730 } 4731 4732 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a) 4733 { 4734 if (!dc_isar_feature(aa64_sve, s)) { 4735 return false; 4736 } 4737 if (sve_access_check(s)) { 4738 int vsz = vec_full_reg_size(s); 4739 int elements = vsz >> dtype_esz[a->dtype]; 4740 TCGv_i64 addr = new_tmp_a64(s); 4741 4742 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 4743 (a->imm * elements * (a->nreg + 1)) 4744 << dtype_msz(a->dtype)); 4745 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 4746 } 4747 return true; 4748 } 4749 4750 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a) 4751 { 4752 static gen_helper_gvec_mem * const fns[2][2][16] = { 4753 { /* mte inactive, little-endian */ 4754 { gen_helper_sve_ldff1bb_r, 4755 gen_helper_sve_ldff1bhu_r, 4756 gen_helper_sve_ldff1bsu_r, 4757 gen_helper_sve_ldff1bdu_r, 4758 4759 gen_helper_sve_ldff1sds_le_r, 4760 gen_helper_sve_ldff1hh_le_r, 4761 gen_helper_sve_ldff1hsu_le_r, 4762 gen_helper_sve_ldff1hdu_le_r, 4763 4764 gen_helper_sve_ldff1hds_le_r, 4765 gen_helper_sve_ldff1hss_le_r, 4766 gen_helper_sve_ldff1ss_le_r, 4767 gen_helper_sve_ldff1sdu_le_r, 4768 4769 gen_helper_sve_ldff1bds_r, 4770 gen_helper_sve_ldff1bss_r, 4771 gen_helper_sve_ldff1bhs_r, 4772 gen_helper_sve_ldff1dd_le_r }, 4773 4774 /* mte inactive, big-endian */ 4775 { gen_helper_sve_ldff1bb_r, 4776 gen_helper_sve_ldff1bhu_r, 4777 gen_helper_sve_ldff1bsu_r, 4778 gen_helper_sve_ldff1bdu_r, 4779 4780 gen_helper_sve_ldff1sds_be_r, 4781 gen_helper_sve_ldff1hh_be_r, 4782 gen_helper_sve_ldff1hsu_be_r, 4783 gen_helper_sve_ldff1hdu_be_r, 4784 4785 gen_helper_sve_ldff1hds_be_r, 4786 gen_helper_sve_ldff1hss_be_r, 4787 gen_helper_sve_ldff1ss_be_r, 4788 gen_helper_sve_ldff1sdu_be_r, 4789 4790 gen_helper_sve_ldff1bds_r, 4791 gen_helper_sve_ldff1bss_r, 4792 gen_helper_sve_ldff1bhs_r, 4793 gen_helper_sve_ldff1dd_be_r } }, 4794 4795 { /* mte active, little-endian */ 4796 { gen_helper_sve_ldff1bb_r_mte, 4797 gen_helper_sve_ldff1bhu_r_mte, 4798 gen_helper_sve_ldff1bsu_r_mte, 4799 gen_helper_sve_ldff1bdu_r_mte, 4800 4801 gen_helper_sve_ldff1sds_le_r_mte, 4802 gen_helper_sve_ldff1hh_le_r_mte, 4803 gen_helper_sve_ldff1hsu_le_r_mte, 4804 gen_helper_sve_ldff1hdu_le_r_mte, 4805 4806 gen_helper_sve_ldff1hds_le_r_mte, 4807 gen_helper_sve_ldff1hss_le_r_mte, 4808 gen_helper_sve_ldff1ss_le_r_mte, 4809 gen_helper_sve_ldff1sdu_le_r_mte, 4810 4811 gen_helper_sve_ldff1bds_r_mte, 4812 gen_helper_sve_ldff1bss_r_mte, 4813 gen_helper_sve_ldff1bhs_r_mte, 4814 gen_helper_sve_ldff1dd_le_r_mte }, 4815 4816 /* mte active, big-endian */ 4817 { gen_helper_sve_ldff1bb_r_mte, 4818 gen_helper_sve_ldff1bhu_r_mte, 4819 gen_helper_sve_ldff1bsu_r_mte, 4820 gen_helper_sve_ldff1bdu_r_mte, 4821 4822 gen_helper_sve_ldff1sds_be_r_mte, 4823 gen_helper_sve_ldff1hh_be_r_mte, 4824 gen_helper_sve_ldff1hsu_be_r_mte, 4825 gen_helper_sve_ldff1hdu_be_r_mte, 4826 4827 gen_helper_sve_ldff1hds_be_r_mte, 4828 gen_helper_sve_ldff1hss_be_r_mte, 4829 gen_helper_sve_ldff1ss_be_r_mte, 4830 gen_helper_sve_ldff1sdu_be_r_mte, 4831 4832 gen_helper_sve_ldff1bds_r_mte, 4833 gen_helper_sve_ldff1bss_r_mte, 4834 gen_helper_sve_ldff1bhs_r_mte, 4835 gen_helper_sve_ldff1dd_be_r_mte } }, 4836 }; 4837 4838 if (!dc_isar_feature(aa64_sve, s)) { 4839 return false; 4840 } 4841 s->is_nonstreaming = true; 4842 if (sve_access_check(s)) { 4843 TCGv_i64 addr = new_tmp_a64(s); 4844 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4845 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4846 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 4847 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 4848 } 4849 return true; 4850 } 4851 4852 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) 4853 { 4854 static gen_helper_gvec_mem * const fns[2][2][16] = { 4855 { /* mte inactive, little-endian */ 4856 { gen_helper_sve_ldnf1bb_r, 4857 gen_helper_sve_ldnf1bhu_r, 4858 gen_helper_sve_ldnf1bsu_r, 4859 gen_helper_sve_ldnf1bdu_r, 4860 4861 gen_helper_sve_ldnf1sds_le_r, 4862 gen_helper_sve_ldnf1hh_le_r, 4863 gen_helper_sve_ldnf1hsu_le_r, 4864 gen_helper_sve_ldnf1hdu_le_r, 4865 4866 gen_helper_sve_ldnf1hds_le_r, 4867 gen_helper_sve_ldnf1hss_le_r, 4868 gen_helper_sve_ldnf1ss_le_r, 4869 gen_helper_sve_ldnf1sdu_le_r, 4870 4871 gen_helper_sve_ldnf1bds_r, 4872 gen_helper_sve_ldnf1bss_r, 4873 gen_helper_sve_ldnf1bhs_r, 4874 gen_helper_sve_ldnf1dd_le_r }, 4875 4876 /* mte inactive, big-endian */ 4877 { gen_helper_sve_ldnf1bb_r, 4878 gen_helper_sve_ldnf1bhu_r, 4879 gen_helper_sve_ldnf1bsu_r, 4880 gen_helper_sve_ldnf1bdu_r, 4881 4882 gen_helper_sve_ldnf1sds_be_r, 4883 gen_helper_sve_ldnf1hh_be_r, 4884 gen_helper_sve_ldnf1hsu_be_r, 4885 gen_helper_sve_ldnf1hdu_be_r, 4886 4887 gen_helper_sve_ldnf1hds_be_r, 4888 gen_helper_sve_ldnf1hss_be_r, 4889 gen_helper_sve_ldnf1ss_be_r, 4890 gen_helper_sve_ldnf1sdu_be_r, 4891 4892 gen_helper_sve_ldnf1bds_r, 4893 gen_helper_sve_ldnf1bss_r, 4894 gen_helper_sve_ldnf1bhs_r, 4895 gen_helper_sve_ldnf1dd_be_r } }, 4896 4897 { /* mte inactive, little-endian */ 4898 { gen_helper_sve_ldnf1bb_r_mte, 4899 gen_helper_sve_ldnf1bhu_r_mte, 4900 gen_helper_sve_ldnf1bsu_r_mte, 4901 gen_helper_sve_ldnf1bdu_r_mte, 4902 4903 gen_helper_sve_ldnf1sds_le_r_mte, 4904 gen_helper_sve_ldnf1hh_le_r_mte, 4905 gen_helper_sve_ldnf1hsu_le_r_mte, 4906 gen_helper_sve_ldnf1hdu_le_r_mte, 4907 4908 gen_helper_sve_ldnf1hds_le_r_mte, 4909 gen_helper_sve_ldnf1hss_le_r_mte, 4910 gen_helper_sve_ldnf1ss_le_r_mte, 4911 gen_helper_sve_ldnf1sdu_le_r_mte, 4912 4913 gen_helper_sve_ldnf1bds_r_mte, 4914 gen_helper_sve_ldnf1bss_r_mte, 4915 gen_helper_sve_ldnf1bhs_r_mte, 4916 gen_helper_sve_ldnf1dd_le_r_mte }, 4917 4918 /* mte inactive, big-endian */ 4919 { gen_helper_sve_ldnf1bb_r_mte, 4920 gen_helper_sve_ldnf1bhu_r_mte, 4921 gen_helper_sve_ldnf1bsu_r_mte, 4922 gen_helper_sve_ldnf1bdu_r_mte, 4923 4924 gen_helper_sve_ldnf1sds_be_r_mte, 4925 gen_helper_sve_ldnf1hh_be_r_mte, 4926 gen_helper_sve_ldnf1hsu_be_r_mte, 4927 gen_helper_sve_ldnf1hdu_be_r_mte, 4928 4929 gen_helper_sve_ldnf1hds_be_r_mte, 4930 gen_helper_sve_ldnf1hss_be_r_mte, 4931 gen_helper_sve_ldnf1ss_be_r_mte, 4932 gen_helper_sve_ldnf1sdu_be_r_mte, 4933 4934 gen_helper_sve_ldnf1bds_r_mte, 4935 gen_helper_sve_ldnf1bss_r_mte, 4936 gen_helper_sve_ldnf1bhs_r_mte, 4937 gen_helper_sve_ldnf1dd_be_r_mte } }, 4938 }; 4939 4940 if (!dc_isar_feature(aa64_sve, s)) { 4941 return false; 4942 } 4943 s->is_nonstreaming = true; 4944 if (sve_access_check(s)) { 4945 int vsz = vec_full_reg_size(s); 4946 int elements = vsz >> dtype_esz[a->dtype]; 4947 int off = (a->imm * elements) << dtype_msz(a->dtype); 4948 TCGv_i64 addr = new_tmp_a64(s); 4949 4950 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off); 4951 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 4952 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 4953 } 4954 return true; 4955 } 4956 4957 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 4958 { 4959 unsigned vsz = vec_full_reg_size(s); 4960 TCGv_ptr t_pg; 4961 int poff; 4962 4963 /* Load the first quadword using the normal predicated load helpers. */ 4964 poff = pred_full_reg_offset(s, pg); 4965 if (vsz > 16) { 4966 /* 4967 * Zero-extend the first 16 bits of the predicate into a temporary. 4968 * This avoids triggering an assert making sure we don't have bits 4969 * set within a predicate beyond VQ, but we have lowered VQ to 1 4970 * for this load operation. 4971 */ 4972 TCGv_i64 tmp = tcg_temp_new_i64(); 4973 #if HOST_BIG_ENDIAN 4974 poff += 6; 4975 #endif 4976 tcg_gen_ld16u_i64(tmp, cpu_env, poff); 4977 4978 poff = offsetof(CPUARMState, vfp.preg_tmp); 4979 tcg_gen_st_i64(tmp, cpu_env, poff); 4980 tcg_temp_free_i64(tmp); 4981 } 4982 4983 t_pg = tcg_temp_new_ptr(); 4984 tcg_gen_addi_ptr(t_pg, cpu_env, poff); 4985 4986 gen_helper_gvec_mem *fn 4987 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 4988 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt))); 4989 4990 tcg_temp_free_ptr(t_pg); 4991 4992 /* Replicate that first quadword. */ 4993 if (vsz > 16) { 4994 int doff = vec_full_reg_offset(s, zt); 4995 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16); 4996 } 4997 } 4998 4999 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a) 5000 { 5001 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 5002 return false; 5003 } 5004 if (sve_access_check(s)) { 5005 int msz = dtype_msz(a->dtype); 5006 TCGv_i64 addr = new_tmp_a64(s); 5007 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz); 5008 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5009 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 5010 } 5011 return true; 5012 } 5013 5014 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a) 5015 { 5016 if (!dc_isar_feature(aa64_sve, s)) { 5017 return false; 5018 } 5019 if (sve_access_check(s)) { 5020 TCGv_i64 addr = new_tmp_a64(s); 5021 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16); 5022 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 5023 } 5024 return true; 5025 } 5026 5027 static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 5028 { 5029 unsigned vsz = vec_full_reg_size(s); 5030 unsigned vsz_r32; 5031 TCGv_ptr t_pg; 5032 int poff, doff; 5033 5034 if (vsz < 32) { 5035 /* 5036 * Note that this UNDEFINED check comes after CheckSVEEnabled() 5037 * in the ARM pseudocode, which is the sve_access_check() done 5038 * in our caller. We should not now return false from the caller. 5039 */ 5040 unallocated_encoding(s); 5041 return; 5042 } 5043 5044 /* Load the first octaword using the normal predicated load helpers. */ 5045 5046 poff = pred_full_reg_offset(s, pg); 5047 if (vsz > 32) { 5048 /* 5049 * Zero-extend the first 32 bits of the predicate into a temporary. 5050 * This avoids triggering an assert making sure we don't have bits 5051 * set within a predicate beyond VQ, but we have lowered VQ to 2 5052 * for this load operation. 5053 */ 5054 TCGv_i64 tmp = tcg_temp_new_i64(); 5055 #if HOST_BIG_ENDIAN 5056 poff += 4; 5057 #endif 5058 tcg_gen_ld32u_i64(tmp, cpu_env, poff); 5059 5060 poff = offsetof(CPUARMState, vfp.preg_tmp); 5061 tcg_gen_st_i64(tmp, cpu_env, poff); 5062 tcg_temp_free_i64(tmp); 5063 } 5064 5065 t_pg = tcg_temp_new_ptr(); 5066 tcg_gen_addi_ptr(t_pg, cpu_env, poff); 5067 5068 gen_helper_gvec_mem *fn 5069 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 5070 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt))); 5071 5072 tcg_temp_free_ptr(t_pg); 5073 5074 /* 5075 * Replicate that first octaword. 5076 * The replication happens in units of 32; if the full vector size 5077 * is not a multiple of 32, the final bits are zeroed. 5078 */ 5079 doff = vec_full_reg_offset(s, zt); 5080 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32); 5081 if (vsz >= 64) { 5082 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32); 5083 } 5084 vsz -= vsz_r32; 5085 if (vsz) { 5086 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0); 5087 } 5088 } 5089 5090 static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a) 5091 { 5092 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 5093 return false; 5094 } 5095 if (a->rm == 31) { 5096 return false; 5097 } 5098 s->is_nonstreaming = true; 5099 if (sve_access_check(s)) { 5100 TCGv_i64 addr = new_tmp_a64(s); 5101 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 5102 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5103 do_ldro(s, a->rd, a->pg, addr, a->dtype); 5104 } 5105 return true; 5106 } 5107 5108 static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a) 5109 { 5110 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 5111 return false; 5112 } 5113 s->is_nonstreaming = true; 5114 if (sve_access_check(s)) { 5115 TCGv_i64 addr = new_tmp_a64(s); 5116 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32); 5117 do_ldro(s, a->rd, a->pg, addr, a->dtype); 5118 } 5119 return true; 5120 } 5121 5122 /* Load and broadcast element. */ 5123 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) 5124 { 5125 unsigned vsz = vec_full_reg_size(s); 5126 unsigned psz = pred_full_reg_size(s); 5127 unsigned esz = dtype_esz[a->dtype]; 5128 unsigned msz = dtype_msz(a->dtype); 5129 TCGLabel *over; 5130 TCGv_i64 temp, clean_addr; 5131 5132 if (!dc_isar_feature(aa64_sve, s)) { 5133 return false; 5134 } 5135 if (!sve_access_check(s)) { 5136 return true; 5137 } 5138 5139 over = gen_new_label(); 5140 5141 /* If the guarding predicate has no bits set, no load occurs. */ 5142 if (psz <= 8) { 5143 /* Reduce the pred_esz_masks value simply to reduce the 5144 * size of the code generated here. 5145 */ 5146 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8); 5147 temp = tcg_temp_new_i64(); 5148 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg)); 5149 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask); 5150 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over); 5151 tcg_temp_free_i64(temp); 5152 } else { 5153 TCGv_i32 t32 = tcg_temp_new_i32(); 5154 find_last_active(s, t32, esz, a->pg); 5155 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over); 5156 tcg_temp_free_i32(t32); 5157 } 5158 5159 /* Load the data. */ 5160 temp = tcg_temp_new_i64(); 5161 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz); 5162 clean_addr = gen_mte_check1(s, temp, false, true, msz); 5163 5164 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), 5165 finalize_memop(s, dtype_mop[a->dtype])); 5166 5167 /* Broadcast to *all* elements. */ 5168 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 5169 vsz, vsz, temp); 5170 tcg_temp_free_i64(temp); 5171 5172 /* Zero the inactive elements. */ 5173 gen_set_label(over); 5174 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false); 5175 } 5176 5177 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 5178 int msz, int esz, int nreg) 5179 { 5180 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = { 5181 { { { gen_helper_sve_st1bb_r, 5182 gen_helper_sve_st1bh_r, 5183 gen_helper_sve_st1bs_r, 5184 gen_helper_sve_st1bd_r }, 5185 { NULL, 5186 gen_helper_sve_st1hh_le_r, 5187 gen_helper_sve_st1hs_le_r, 5188 gen_helper_sve_st1hd_le_r }, 5189 { NULL, NULL, 5190 gen_helper_sve_st1ss_le_r, 5191 gen_helper_sve_st1sd_le_r }, 5192 { NULL, NULL, NULL, 5193 gen_helper_sve_st1dd_le_r } }, 5194 { { gen_helper_sve_st1bb_r, 5195 gen_helper_sve_st1bh_r, 5196 gen_helper_sve_st1bs_r, 5197 gen_helper_sve_st1bd_r }, 5198 { NULL, 5199 gen_helper_sve_st1hh_be_r, 5200 gen_helper_sve_st1hs_be_r, 5201 gen_helper_sve_st1hd_be_r }, 5202 { NULL, NULL, 5203 gen_helper_sve_st1ss_be_r, 5204 gen_helper_sve_st1sd_be_r }, 5205 { NULL, NULL, NULL, 5206 gen_helper_sve_st1dd_be_r } } }, 5207 5208 { { { gen_helper_sve_st1bb_r_mte, 5209 gen_helper_sve_st1bh_r_mte, 5210 gen_helper_sve_st1bs_r_mte, 5211 gen_helper_sve_st1bd_r_mte }, 5212 { NULL, 5213 gen_helper_sve_st1hh_le_r_mte, 5214 gen_helper_sve_st1hs_le_r_mte, 5215 gen_helper_sve_st1hd_le_r_mte }, 5216 { NULL, NULL, 5217 gen_helper_sve_st1ss_le_r_mte, 5218 gen_helper_sve_st1sd_le_r_mte }, 5219 { NULL, NULL, NULL, 5220 gen_helper_sve_st1dd_le_r_mte } }, 5221 { { gen_helper_sve_st1bb_r_mte, 5222 gen_helper_sve_st1bh_r_mte, 5223 gen_helper_sve_st1bs_r_mte, 5224 gen_helper_sve_st1bd_r_mte }, 5225 { NULL, 5226 gen_helper_sve_st1hh_be_r_mte, 5227 gen_helper_sve_st1hs_be_r_mte, 5228 gen_helper_sve_st1hd_be_r_mte }, 5229 { NULL, NULL, 5230 gen_helper_sve_st1ss_be_r_mte, 5231 gen_helper_sve_st1sd_be_r_mte }, 5232 { NULL, NULL, NULL, 5233 gen_helper_sve_st1dd_be_r_mte } } }, 5234 }; 5235 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = { 5236 { { { gen_helper_sve_st2bb_r, 5237 gen_helper_sve_st2hh_le_r, 5238 gen_helper_sve_st2ss_le_r, 5239 gen_helper_sve_st2dd_le_r }, 5240 { gen_helper_sve_st3bb_r, 5241 gen_helper_sve_st3hh_le_r, 5242 gen_helper_sve_st3ss_le_r, 5243 gen_helper_sve_st3dd_le_r }, 5244 { gen_helper_sve_st4bb_r, 5245 gen_helper_sve_st4hh_le_r, 5246 gen_helper_sve_st4ss_le_r, 5247 gen_helper_sve_st4dd_le_r } }, 5248 { { gen_helper_sve_st2bb_r, 5249 gen_helper_sve_st2hh_be_r, 5250 gen_helper_sve_st2ss_be_r, 5251 gen_helper_sve_st2dd_be_r }, 5252 { gen_helper_sve_st3bb_r, 5253 gen_helper_sve_st3hh_be_r, 5254 gen_helper_sve_st3ss_be_r, 5255 gen_helper_sve_st3dd_be_r }, 5256 { gen_helper_sve_st4bb_r, 5257 gen_helper_sve_st4hh_be_r, 5258 gen_helper_sve_st4ss_be_r, 5259 gen_helper_sve_st4dd_be_r } } }, 5260 { { { gen_helper_sve_st2bb_r_mte, 5261 gen_helper_sve_st2hh_le_r_mte, 5262 gen_helper_sve_st2ss_le_r_mte, 5263 gen_helper_sve_st2dd_le_r_mte }, 5264 { gen_helper_sve_st3bb_r_mte, 5265 gen_helper_sve_st3hh_le_r_mte, 5266 gen_helper_sve_st3ss_le_r_mte, 5267 gen_helper_sve_st3dd_le_r_mte }, 5268 { gen_helper_sve_st4bb_r_mte, 5269 gen_helper_sve_st4hh_le_r_mte, 5270 gen_helper_sve_st4ss_le_r_mte, 5271 gen_helper_sve_st4dd_le_r_mte } }, 5272 { { gen_helper_sve_st2bb_r_mte, 5273 gen_helper_sve_st2hh_be_r_mte, 5274 gen_helper_sve_st2ss_be_r_mte, 5275 gen_helper_sve_st2dd_be_r_mte }, 5276 { gen_helper_sve_st3bb_r_mte, 5277 gen_helper_sve_st3hh_be_r_mte, 5278 gen_helper_sve_st3ss_be_r_mte, 5279 gen_helper_sve_st3dd_be_r_mte }, 5280 { gen_helper_sve_st4bb_r_mte, 5281 gen_helper_sve_st4hh_be_r_mte, 5282 gen_helper_sve_st4ss_be_r_mte, 5283 gen_helper_sve_st4dd_be_r_mte } } }, 5284 }; 5285 gen_helper_gvec_mem *fn; 5286 int be = s->be_data == MO_BE; 5287 5288 if (nreg == 0) { 5289 /* ST1 */ 5290 fn = fn_single[s->mte_active[0]][be][msz][esz]; 5291 nreg = 1; 5292 } else { 5293 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */ 5294 assert(msz == esz); 5295 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz]; 5296 } 5297 assert(fn != NULL); 5298 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn); 5299 } 5300 5301 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) 5302 { 5303 if (!dc_isar_feature(aa64_sve, s)) { 5304 return false; 5305 } 5306 if (a->rm == 31 || a->msz > a->esz) { 5307 return false; 5308 } 5309 if (sve_access_check(s)) { 5310 TCGv_i64 addr = new_tmp_a64(s); 5311 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz); 5312 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5313 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5314 } 5315 return true; 5316 } 5317 5318 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a) 5319 { 5320 if (!dc_isar_feature(aa64_sve, s)) { 5321 return false; 5322 } 5323 if (a->msz > a->esz) { 5324 return false; 5325 } 5326 if (sve_access_check(s)) { 5327 int vsz = vec_full_reg_size(s); 5328 int elements = vsz >> a->esz; 5329 TCGv_i64 addr = new_tmp_a64(s); 5330 5331 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 5332 (a->imm * elements * (a->nreg + 1)) << a->msz); 5333 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5334 } 5335 return true; 5336 } 5337 5338 /* 5339 *** SVE gather loads / scatter stores 5340 */ 5341 5342 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, 5343 int scale, TCGv_i64 scalar, int msz, bool is_write, 5344 gen_helper_gvec_mem_scatter *fn) 5345 { 5346 unsigned vsz = vec_full_reg_size(s); 5347 TCGv_ptr t_zm = tcg_temp_new_ptr(); 5348 TCGv_ptr t_pg = tcg_temp_new_ptr(); 5349 TCGv_ptr t_zt = tcg_temp_new_ptr(); 5350 int desc = 0; 5351 5352 if (s->mte_active[0]) { 5353 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 5354 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 5355 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 5356 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 5357 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1); 5358 desc <<= SVE_MTEDESC_SHIFT; 5359 } 5360 desc = simd_desc(vsz, vsz, desc | scale); 5361 5362 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 5363 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm)); 5364 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt)); 5365 fn(cpu_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc)); 5366 5367 tcg_temp_free_ptr(t_zt); 5368 tcg_temp_free_ptr(t_zm); 5369 tcg_temp_free_ptr(t_pg); 5370 } 5371 5372 /* Indexed by [mte][be][ff][xs][u][msz]. */ 5373 static gen_helper_gvec_mem_scatter * const 5374 gather_load_fn32[2][2][2][2][2][3] = { 5375 { /* MTE Inactive */ 5376 { /* Little-endian */ 5377 { { { gen_helper_sve_ldbss_zsu, 5378 gen_helper_sve_ldhss_le_zsu, 5379 NULL, }, 5380 { gen_helper_sve_ldbsu_zsu, 5381 gen_helper_sve_ldhsu_le_zsu, 5382 gen_helper_sve_ldss_le_zsu, } }, 5383 { { gen_helper_sve_ldbss_zss, 5384 gen_helper_sve_ldhss_le_zss, 5385 NULL, }, 5386 { gen_helper_sve_ldbsu_zss, 5387 gen_helper_sve_ldhsu_le_zss, 5388 gen_helper_sve_ldss_le_zss, } } }, 5389 5390 /* First-fault */ 5391 { { { gen_helper_sve_ldffbss_zsu, 5392 gen_helper_sve_ldffhss_le_zsu, 5393 NULL, }, 5394 { gen_helper_sve_ldffbsu_zsu, 5395 gen_helper_sve_ldffhsu_le_zsu, 5396 gen_helper_sve_ldffss_le_zsu, } }, 5397 { { gen_helper_sve_ldffbss_zss, 5398 gen_helper_sve_ldffhss_le_zss, 5399 NULL, }, 5400 { gen_helper_sve_ldffbsu_zss, 5401 gen_helper_sve_ldffhsu_le_zss, 5402 gen_helper_sve_ldffss_le_zss, } } } }, 5403 5404 { /* Big-endian */ 5405 { { { gen_helper_sve_ldbss_zsu, 5406 gen_helper_sve_ldhss_be_zsu, 5407 NULL, }, 5408 { gen_helper_sve_ldbsu_zsu, 5409 gen_helper_sve_ldhsu_be_zsu, 5410 gen_helper_sve_ldss_be_zsu, } }, 5411 { { gen_helper_sve_ldbss_zss, 5412 gen_helper_sve_ldhss_be_zss, 5413 NULL, }, 5414 { gen_helper_sve_ldbsu_zss, 5415 gen_helper_sve_ldhsu_be_zss, 5416 gen_helper_sve_ldss_be_zss, } } }, 5417 5418 /* First-fault */ 5419 { { { gen_helper_sve_ldffbss_zsu, 5420 gen_helper_sve_ldffhss_be_zsu, 5421 NULL, }, 5422 { gen_helper_sve_ldffbsu_zsu, 5423 gen_helper_sve_ldffhsu_be_zsu, 5424 gen_helper_sve_ldffss_be_zsu, } }, 5425 { { gen_helper_sve_ldffbss_zss, 5426 gen_helper_sve_ldffhss_be_zss, 5427 NULL, }, 5428 { gen_helper_sve_ldffbsu_zss, 5429 gen_helper_sve_ldffhsu_be_zss, 5430 gen_helper_sve_ldffss_be_zss, } } } } }, 5431 { /* MTE Active */ 5432 { /* Little-endian */ 5433 { { { gen_helper_sve_ldbss_zsu_mte, 5434 gen_helper_sve_ldhss_le_zsu_mte, 5435 NULL, }, 5436 { gen_helper_sve_ldbsu_zsu_mte, 5437 gen_helper_sve_ldhsu_le_zsu_mte, 5438 gen_helper_sve_ldss_le_zsu_mte, } }, 5439 { { gen_helper_sve_ldbss_zss_mte, 5440 gen_helper_sve_ldhss_le_zss_mte, 5441 NULL, }, 5442 { gen_helper_sve_ldbsu_zss_mte, 5443 gen_helper_sve_ldhsu_le_zss_mte, 5444 gen_helper_sve_ldss_le_zss_mte, } } }, 5445 5446 /* First-fault */ 5447 { { { gen_helper_sve_ldffbss_zsu_mte, 5448 gen_helper_sve_ldffhss_le_zsu_mte, 5449 NULL, }, 5450 { gen_helper_sve_ldffbsu_zsu_mte, 5451 gen_helper_sve_ldffhsu_le_zsu_mte, 5452 gen_helper_sve_ldffss_le_zsu_mte, } }, 5453 { { gen_helper_sve_ldffbss_zss_mte, 5454 gen_helper_sve_ldffhss_le_zss_mte, 5455 NULL, }, 5456 { gen_helper_sve_ldffbsu_zss_mte, 5457 gen_helper_sve_ldffhsu_le_zss_mte, 5458 gen_helper_sve_ldffss_le_zss_mte, } } } }, 5459 5460 { /* Big-endian */ 5461 { { { gen_helper_sve_ldbss_zsu_mte, 5462 gen_helper_sve_ldhss_be_zsu_mte, 5463 NULL, }, 5464 { gen_helper_sve_ldbsu_zsu_mte, 5465 gen_helper_sve_ldhsu_be_zsu_mte, 5466 gen_helper_sve_ldss_be_zsu_mte, } }, 5467 { { gen_helper_sve_ldbss_zss_mte, 5468 gen_helper_sve_ldhss_be_zss_mte, 5469 NULL, }, 5470 { gen_helper_sve_ldbsu_zss_mte, 5471 gen_helper_sve_ldhsu_be_zss_mte, 5472 gen_helper_sve_ldss_be_zss_mte, } } }, 5473 5474 /* First-fault */ 5475 { { { gen_helper_sve_ldffbss_zsu_mte, 5476 gen_helper_sve_ldffhss_be_zsu_mte, 5477 NULL, }, 5478 { gen_helper_sve_ldffbsu_zsu_mte, 5479 gen_helper_sve_ldffhsu_be_zsu_mte, 5480 gen_helper_sve_ldffss_be_zsu_mte, } }, 5481 { { gen_helper_sve_ldffbss_zss_mte, 5482 gen_helper_sve_ldffhss_be_zss_mte, 5483 NULL, }, 5484 { gen_helper_sve_ldffbsu_zss_mte, 5485 gen_helper_sve_ldffhsu_be_zss_mte, 5486 gen_helper_sve_ldffss_be_zss_mte, } } } } }, 5487 }; 5488 5489 /* Note that we overload xs=2 to indicate 64-bit offset. */ 5490 static gen_helper_gvec_mem_scatter * const 5491 gather_load_fn64[2][2][2][3][2][4] = { 5492 { /* MTE Inactive */ 5493 { /* Little-endian */ 5494 { { { gen_helper_sve_ldbds_zsu, 5495 gen_helper_sve_ldhds_le_zsu, 5496 gen_helper_sve_ldsds_le_zsu, 5497 NULL, }, 5498 { gen_helper_sve_ldbdu_zsu, 5499 gen_helper_sve_ldhdu_le_zsu, 5500 gen_helper_sve_ldsdu_le_zsu, 5501 gen_helper_sve_lddd_le_zsu, } }, 5502 { { gen_helper_sve_ldbds_zss, 5503 gen_helper_sve_ldhds_le_zss, 5504 gen_helper_sve_ldsds_le_zss, 5505 NULL, }, 5506 { gen_helper_sve_ldbdu_zss, 5507 gen_helper_sve_ldhdu_le_zss, 5508 gen_helper_sve_ldsdu_le_zss, 5509 gen_helper_sve_lddd_le_zss, } }, 5510 { { gen_helper_sve_ldbds_zd, 5511 gen_helper_sve_ldhds_le_zd, 5512 gen_helper_sve_ldsds_le_zd, 5513 NULL, }, 5514 { gen_helper_sve_ldbdu_zd, 5515 gen_helper_sve_ldhdu_le_zd, 5516 gen_helper_sve_ldsdu_le_zd, 5517 gen_helper_sve_lddd_le_zd, } } }, 5518 5519 /* First-fault */ 5520 { { { gen_helper_sve_ldffbds_zsu, 5521 gen_helper_sve_ldffhds_le_zsu, 5522 gen_helper_sve_ldffsds_le_zsu, 5523 NULL, }, 5524 { gen_helper_sve_ldffbdu_zsu, 5525 gen_helper_sve_ldffhdu_le_zsu, 5526 gen_helper_sve_ldffsdu_le_zsu, 5527 gen_helper_sve_ldffdd_le_zsu, } }, 5528 { { gen_helper_sve_ldffbds_zss, 5529 gen_helper_sve_ldffhds_le_zss, 5530 gen_helper_sve_ldffsds_le_zss, 5531 NULL, }, 5532 { gen_helper_sve_ldffbdu_zss, 5533 gen_helper_sve_ldffhdu_le_zss, 5534 gen_helper_sve_ldffsdu_le_zss, 5535 gen_helper_sve_ldffdd_le_zss, } }, 5536 { { gen_helper_sve_ldffbds_zd, 5537 gen_helper_sve_ldffhds_le_zd, 5538 gen_helper_sve_ldffsds_le_zd, 5539 NULL, }, 5540 { gen_helper_sve_ldffbdu_zd, 5541 gen_helper_sve_ldffhdu_le_zd, 5542 gen_helper_sve_ldffsdu_le_zd, 5543 gen_helper_sve_ldffdd_le_zd, } } } }, 5544 { /* Big-endian */ 5545 { { { gen_helper_sve_ldbds_zsu, 5546 gen_helper_sve_ldhds_be_zsu, 5547 gen_helper_sve_ldsds_be_zsu, 5548 NULL, }, 5549 { gen_helper_sve_ldbdu_zsu, 5550 gen_helper_sve_ldhdu_be_zsu, 5551 gen_helper_sve_ldsdu_be_zsu, 5552 gen_helper_sve_lddd_be_zsu, } }, 5553 { { gen_helper_sve_ldbds_zss, 5554 gen_helper_sve_ldhds_be_zss, 5555 gen_helper_sve_ldsds_be_zss, 5556 NULL, }, 5557 { gen_helper_sve_ldbdu_zss, 5558 gen_helper_sve_ldhdu_be_zss, 5559 gen_helper_sve_ldsdu_be_zss, 5560 gen_helper_sve_lddd_be_zss, } }, 5561 { { gen_helper_sve_ldbds_zd, 5562 gen_helper_sve_ldhds_be_zd, 5563 gen_helper_sve_ldsds_be_zd, 5564 NULL, }, 5565 { gen_helper_sve_ldbdu_zd, 5566 gen_helper_sve_ldhdu_be_zd, 5567 gen_helper_sve_ldsdu_be_zd, 5568 gen_helper_sve_lddd_be_zd, } } }, 5569 5570 /* First-fault */ 5571 { { { gen_helper_sve_ldffbds_zsu, 5572 gen_helper_sve_ldffhds_be_zsu, 5573 gen_helper_sve_ldffsds_be_zsu, 5574 NULL, }, 5575 { gen_helper_sve_ldffbdu_zsu, 5576 gen_helper_sve_ldffhdu_be_zsu, 5577 gen_helper_sve_ldffsdu_be_zsu, 5578 gen_helper_sve_ldffdd_be_zsu, } }, 5579 { { gen_helper_sve_ldffbds_zss, 5580 gen_helper_sve_ldffhds_be_zss, 5581 gen_helper_sve_ldffsds_be_zss, 5582 NULL, }, 5583 { gen_helper_sve_ldffbdu_zss, 5584 gen_helper_sve_ldffhdu_be_zss, 5585 gen_helper_sve_ldffsdu_be_zss, 5586 gen_helper_sve_ldffdd_be_zss, } }, 5587 { { gen_helper_sve_ldffbds_zd, 5588 gen_helper_sve_ldffhds_be_zd, 5589 gen_helper_sve_ldffsds_be_zd, 5590 NULL, }, 5591 { gen_helper_sve_ldffbdu_zd, 5592 gen_helper_sve_ldffhdu_be_zd, 5593 gen_helper_sve_ldffsdu_be_zd, 5594 gen_helper_sve_ldffdd_be_zd, } } } } }, 5595 { /* MTE Active */ 5596 { /* Little-endian */ 5597 { { { gen_helper_sve_ldbds_zsu_mte, 5598 gen_helper_sve_ldhds_le_zsu_mte, 5599 gen_helper_sve_ldsds_le_zsu_mte, 5600 NULL, }, 5601 { gen_helper_sve_ldbdu_zsu_mte, 5602 gen_helper_sve_ldhdu_le_zsu_mte, 5603 gen_helper_sve_ldsdu_le_zsu_mte, 5604 gen_helper_sve_lddd_le_zsu_mte, } }, 5605 { { gen_helper_sve_ldbds_zss_mte, 5606 gen_helper_sve_ldhds_le_zss_mte, 5607 gen_helper_sve_ldsds_le_zss_mte, 5608 NULL, }, 5609 { gen_helper_sve_ldbdu_zss_mte, 5610 gen_helper_sve_ldhdu_le_zss_mte, 5611 gen_helper_sve_ldsdu_le_zss_mte, 5612 gen_helper_sve_lddd_le_zss_mte, } }, 5613 { { gen_helper_sve_ldbds_zd_mte, 5614 gen_helper_sve_ldhds_le_zd_mte, 5615 gen_helper_sve_ldsds_le_zd_mte, 5616 NULL, }, 5617 { gen_helper_sve_ldbdu_zd_mte, 5618 gen_helper_sve_ldhdu_le_zd_mte, 5619 gen_helper_sve_ldsdu_le_zd_mte, 5620 gen_helper_sve_lddd_le_zd_mte, } } }, 5621 5622 /* First-fault */ 5623 { { { gen_helper_sve_ldffbds_zsu_mte, 5624 gen_helper_sve_ldffhds_le_zsu_mte, 5625 gen_helper_sve_ldffsds_le_zsu_mte, 5626 NULL, }, 5627 { gen_helper_sve_ldffbdu_zsu_mte, 5628 gen_helper_sve_ldffhdu_le_zsu_mte, 5629 gen_helper_sve_ldffsdu_le_zsu_mte, 5630 gen_helper_sve_ldffdd_le_zsu_mte, } }, 5631 { { gen_helper_sve_ldffbds_zss_mte, 5632 gen_helper_sve_ldffhds_le_zss_mte, 5633 gen_helper_sve_ldffsds_le_zss_mte, 5634 NULL, }, 5635 { gen_helper_sve_ldffbdu_zss_mte, 5636 gen_helper_sve_ldffhdu_le_zss_mte, 5637 gen_helper_sve_ldffsdu_le_zss_mte, 5638 gen_helper_sve_ldffdd_le_zss_mte, } }, 5639 { { gen_helper_sve_ldffbds_zd_mte, 5640 gen_helper_sve_ldffhds_le_zd_mte, 5641 gen_helper_sve_ldffsds_le_zd_mte, 5642 NULL, }, 5643 { gen_helper_sve_ldffbdu_zd_mte, 5644 gen_helper_sve_ldffhdu_le_zd_mte, 5645 gen_helper_sve_ldffsdu_le_zd_mte, 5646 gen_helper_sve_ldffdd_le_zd_mte, } } } }, 5647 { /* Big-endian */ 5648 { { { gen_helper_sve_ldbds_zsu_mte, 5649 gen_helper_sve_ldhds_be_zsu_mte, 5650 gen_helper_sve_ldsds_be_zsu_mte, 5651 NULL, }, 5652 { gen_helper_sve_ldbdu_zsu_mte, 5653 gen_helper_sve_ldhdu_be_zsu_mte, 5654 gen_helper_sve_ldsdu_be_zsu_mte, 5655 gen_helper_sve_lddd_be_zsu_mte, } }, 5656 { { gen_helper_sve_ldbds_zss_mte, 5657 gen_helper_sve_ldhds_be_zss_mte, 5658 gen_helper_sve_ldsds_be_zss_mte, 5659 NULL, }, 5660 { gen_helper_sve_ldbdu_zss_mte, 5661 gen_helper_sve_ldhdu_be_zss_mte, 5662 gen_helper_sve_ldsdu_be_zss_mte, 5663 gen_helper_sve_lddd_be_zss_mte, } }, 5664 { { gen_helper_sve_ldbds_zd_mte, 5665 gen_helper_sve_ldhds_be_zd_mte, 5666 gen_helper_sve_ldsds_be_zd_mte, 5667 NULL, }, 5668 { gen_helper_sve_ldbdu_zd_mte, 5669 gen_helper_sve_ldhdu_be_zd_mte, 5670 gen_helper_sve_ldsdu_be_zd_mte, 5671 gen_helper_sve_lddd_be_zd_mte, } } }, 5672 5673 /* First-fault */ 5674 { { { gen_helper_sve_ldffbds_zsu_mte, 5675 gen_helper_sve_ldffhds_be_zsu_mte, 5676 gen_helper_sve_ldffsds_be_zsu_mte, 5677 NULL, }, 5678 { gen_helper_sve_ldffbdu_zsu_mte, 5679 gen_helper_sve_ldffhdu_be_zsu_mte, 5680 gen_helper_sve_ldffsdu_be_zsu_mte, 5681 gen_helper_sve_ldffdd_be_zsu_mte, } }, 5682 { { gen_helper_sve_ldffbds_zss_mte, 5683 gen_helper_sve_ldffhds_be_zss_mte, 5684 gen_helper_sve_ldffsds_be_zss_mte, 5685 NULL, }, 5686 { gen_helper_sve_ldffbdu_zss_mte, 5687 gen_helper_sve_ldffhdu_be_zss_mte, 5688 gen_helper_sve_ldffsdu_be_zss_mte, 5689 gen_helper_sve_ldffdd_be_zss_mte, } }, 5690 { { gen_helper_sve_ldffbds_zd_mte, 5691 gen_helper_sve_ldffhds_be_zd_mte, 5692 gen_helper_sve_ldffsds_be_zd_mte, 5693 NULL, }, 5694 { gen_helper_sve_ldffbdu_zd_mte, 5695 gen_helper_sve_ldffhdu_be_zd_mte, 5696 gen_helper_sve_ldffsdu_be_zd_mte, 5697 gen_helper_sve_ldffdd_be_zd_mte, } } } } }, 5698 }; 5699 5700 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) 5701 { 5702 gen_helper_gvec_mem_scatter *fn = NULL; 5703 bool be = s->be_data == MO_BE; 5704 bool mte = s->mte_active[0]; 5705 5706 if (!dc_isar_feature(aa64_sve, s)) { 5707 return false; 5708 } 5709 s->is_nonstreaming = true; 5710 if (!sve_access_check(s)) { 5711 return true; 5712 } 5713 5714 switch (a->esz) { 5715 case MO_32: 5716 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz]; 5717 break; 5718 case MO_64: 5719 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz]; 5720 break; 5721 } 5722 assert(fn != NULL); 5723 5724 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 5725 cpu_reg_sp(s, a->rn), a->msz, false, fn); 5726 return true; 5727 } 5728 5729 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) 5730 { 5731 gen_helper_gvec_mem_scatter *fn = NULL; 5732 bool be = s->be_data == MO_BE; 5733 bool mte = s->mte_active[0]; 5734 5735 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) { 5736 return false; 5737 } 5738 if (!dc_isar_feature(aa64_sve, s)) { 5739 return false; 5740 } 5741 s->is_nonstreaming = true; 5742 if (!sve_access_check(s)) { 5743 return true; 5744 } 5745 5746 switch (a->esz) { 5747 case MO_32: 5748 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz]; 5749 break; 5750 case MO_64: 5751 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz]; 5752 break; 5753 } 5754 assert(fn != NULL); 5755 5756 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x]) 5757 * by loading the immediate into the scalar parameter. 5758 */ 5759 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5760 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn); 5761 return true; 5762 } 5763 5764 static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a) 5765 { 5766 gen_helper_gvec_mem_scatter *fn = NULL; 5767 bool be = s->be_data == MO_BE; 5768 bool mte = s->mte_active[0]; 5769 5770 if (a->esz < a->msz + !a->u) { 5771 return false; 5772 } 5773 if (!dc_isar_feature(aa64_sve2, s)) { 5774 return false; 5775 } 5776 s->is_nonstreaming = true; 5777 if (!sve_access_check(s)) { 5778 return true; 5779 } 5780 5781 switch (a->esz) { 5782 case MO_32: 5783 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz]; 5784 break; 5785 case MO_64: 5786 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz]; 5787 break; 5788 } 5789 assert(fn != NULL); 5790 5791 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5792 cpu_reg(s, a->rm), a->msz, false, fn); 5793 return true; 5794 } 5795 5796 /* Indexed by [mte][be][xs][msz]. */ 5797 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = { 5798 { /* MTE Inactive */ 5799 { /* Little-endian */ 5800 { gen_helper_sve_stbs_zsu, 5801 gen_helper_sve_sths_le_zsu, 5802 gen_helper_sve_stss_le_zsu, }, 5803 { gen_helper_sve_stbs_zss, 5804 gen_helper_sve_sths_le_zss, 5805 gen_helper_sve_stss_le_zss, } }, 5806 { /* Big-endian */ 5807 { gen_helper_sve_stbs_zsu, 5808 gen_helper_sve_sths_be_zsu, 5809 gen_helper_sve_stss_be_zsu, }, 5810 { gen_helper_sve_stbs_zss, 5811 gen_helper_sve_sths_be_zss, 5812 gen_helper_sve_stss_be_zss, } } }, 5813 { /* MTE Active */ 5814 { /* Little-endian */ 5815 { gen_helper_sve_stbs_zsu_mte, 5816 gen_helper_sve_sths_le_zsu_mte, 5817 gen_helper_sve_stss_le_zsu_mte, }, 5818 { gen_helper_sve_stbs_zss_mte, 5819 gen_helper_sve_sths_le_zss_mte, 5820 gen_helper_sve_stss_le_zss_mte, } }, 5821 { /* Big-endian */ 5822 { gen_helper_sve_stbs_zsu_mte, 5823 gen_helper_sve_sths_be_zsu_mte, 5824 gen_helper_sve_stss_be_zsu_mte, }, 5825 { gen_helper_sve_stbs_zss_mte, 5826 gen_helper_sve_sths_be_zss_mte, 5827 gen_helper_sve_stss_be_zss_mte, } } }, 5828 }; 5829 5830 /* Note that we overload xs=2 to indicate 64-bit offset. */ 5831 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = { 5832 { /* MTE Inactive */ 5833 { /* Little-endian */ 5834 { gen_helper_sve_stbd_zsu, 5835 gen_helper_sve_sthd_le_zsu, 5836 gen_helper_sve_stsd_le_zsu, 5837 gen_helper_sve_stdd_le_zsu, }, 5838 { gen_helper_sve_stbd_zss, 5839 gen_helper_sve_sthd_le_zss, 5840 gen_helper_sve_stsd_le_zss, 5841 gen_helper_sve_stdd_le_zss, }, 5842 { gen_helper_sve_stbd_zd, 5843 gen_helper_sve_sthd_le_zd, 5844 gen_helper_sve_stsd_le_zd, 5845 gen_helper_sve_stdd_le_zd, } }, 5846 { /* Big-endian */ 5847 { gen_helper_sve_stbd_zsu, 5848 gen_helper_sve_sthd_be_zsu, 5849 gen_helper_sve_stsd_be_zsu, 5850 gen_helper_sve_stdd_be_zsu, }, 5851 { gen_helper_sve_stbd_zss, 5852 gen_helper_sve_sthd_be_zss, 5853 gen_helper_sve_stsd_be_zss, 5854 gen_helper_sve_stdd_be_zss, }, 5855 { gen_helper_sve_stbd_zd, 5856 gen_helper_sve_sthd_be_zd, 5857 gen_helper_sve_stsd_be_zd, 5858 gen_helper_sve_stdd_be_zd, } } }, 5859 { /* MTE Inactive */ 5860 { /* Little-endian */ 5861 { gen_helper_sve_stbd_zsu_mte, 5862 gen_helper_sve_sthd_le_zsu_mte, 5863 gen_helper_sve_stsd_le_zsu_mte, 5864 gen_helper_sve_stdd_le_zsu_mte, }, 5865 { gen_helper_sve_stbd_zss_mte, 5866 gen_helper_sve_sthd_le_zss_mte, 5867 gen_helper_sve_stsd_le_zss_mte, 5868 gen_helper_sve_stdd_le_zss_mte, }, 5869 { gen_helper_sve_stbd_zd_mte, 5870 gen_helper_sve_sthd_le_zd_mte, 5871 gen_helper_sve_stsd_le_zd_mte, 5872 gen_helper_sve_stdd_le_zd_mte, } }, 5873 { /* Big-endian */ 5874 { gen_helper_sve_stbd_zsu_mte, 5875 gen_helper_sve_sthd_be_zsu_mte, 5876 gen_helper_sve_stsd_be_zsu_mte, 5877 gen_helper_sve_stdd_be_zsu_mte, }, 5878 { gen_helper_sve_stbd_zss_mte, 5879 gen_helper_sve_sthd_be_zss_mte, 5880 gen_helper_sve_stsd_be_zss_mte, 5881 gen_helper_sve_stdd_be_zss_mte, }, 5882 { gen_helper_sve_stbd_zd_mte, 5883 gen_helper_sve_sthd_be_zd_mte, 5884 gen_helper_sve_stsd_be_zd_mte, 5885 gen_helper_sve_stdd_be_zd_mte, } } }, 5886 }; 5887 5888 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) 5889 { 5890 gen_helper_gvec_mem_scatter *fn; 5891 bool be = s->be_data == MO_BE; 5892 bool mte = s->mte_active[0]; 5893 5894 if (a->esz < a->msz || (a->msz == 0 && a->scale)) { 5895 return false; 5896 } 5897 if (!dc_isar_feature(aa64_sve, s)) { 5898 return false; 5899 } 5900 s->is_nonstreaming = true; 5901 if (!sve_access_check(s)) { 5902 return true; 5903 } 5904 switch (a->esz) { 5905 case MO_32: 5906 fn = scatter_store_fn32[mte][be][a->xs][a->msz]; 5907 break; 5908 case MO_64: 5909 fn = scatter_store_fn64[mte][be][a->xs][a->msz]; 5910 break; 5911 default: 5912 g_assert_not_reached(); 5913 } 5914 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 5915 cpu_reg_sp(s, a->rn), a->msz, true, fn); 5916 return true; 5917 } 5918 5919 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) 5920 { 5921 gen_helper_gvec_mem_scatter *fn = NULL; 5922 bool be = s->be_data == MO_BE; 5923 bool mte = s->mte_active[0]; 5924 5925 if (a->esz < a->msz) { 5926 return false; 5927 } 5928 if (!dc_isar_feature(aa64_sve, s)) { 5929 return false; 5930 } 5931 s->is_nonstreaming = true; 5932 if (!sve_access_check(s)) { 5933 return true; 5934 } 5935 5936 switch (a->esz) { 5937 case MO_32: 5938 fn = scatter_store_fn32[mte][be][0][a->msz]; 5939 break; 5940 case MO_64: 5941 fn = scatter_store_fn64[mte][be][2][a->msz]; 5942 break; 5943 } 5944 assert(fn != NULL); 5945 5946 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x]) 5947 * by loading the immediate into the scalar parameter. 5948 */ 5949 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5950 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn); 5951 return true; 5952 } 5953 5954 static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a) 5955 { 5956 gen_helper_gvec_mem_scatter *fn; 5957 bool be = s->be_data == MO_BE; 5958 bool mte = s->mte_active[0]; 5959 5960 if (a->esz < a->msz) { 5961 return false; 5962 } 5963 if (!dc_isar_feature(aa64_sve2, s)) { 5964 return false; 5965 } 5966 s->is_nonstreaming = true; 5967 if (!sve_access_check(s)) { 5968 return true; 5969 } 5970 5971 switch (a->esz) { 5972 case MO_32: 5973 fn = scatter_store_fn32[mte][be][0][a->msz]; 5974 break; 5975 case MO_64: 5976 fn = scatter_store_fn64[mte][be][2][a->msz]; 5977 break; 5978 default: 5979 g_assert_not_reached(); 5980 } 5981 5982 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5983 cpu_reg(s, a->rm), a->msz, true, fn); 5984 return true; 5985 } 5986 5987 /* 5988 * Prefetches 5989 */ 5990 5991 static bool trans_PRF(DisasContext *s, arg_PRF *a) 5992 { 5993 if (!dc_isar_feature(aa64_sve, s)) { 5994 return false; 5995 } 5996 /* Prefetch is a nop within QEMU. */ 5997 (void)sve_access_check(s); 5998 return true; 5999 } 6000 6001 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a) 6002 { 6003 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 6004 return false; 6005 } 6006 /* Prefetch is a nop within QEMU. */ 6007 (void)sve_access_check(s); 6008 return true; 6009 } 6010 6011 static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a) 6012 { 6013 if (!dc_isar_feature(aa64_sve, s)) { 6014 return false; 6015 } 6016 /* Prefetch is a nop within QEMU. */ 6017 s->is_nonstreaming = true; 6018 (void)sve_access_check(s); 6019 return true; 6020 } 6021 6022 /* 6023 * Move Prefix 6024 * 6025 * TODO: The implementation so far could handle predicated merging movprfx. 6026 * The helper functions as written take an extra source register to 6027 * use in the operation, but the result is only written when predication 6028 * succeeds. For unpredicated movprfx, we need to rearrange the helpers 6029 * to allow the final write back to the destination to be unconditional. 6030 * For predicated zeroing movprfx, we need to rearrange the helpers to 6031 * allow the final write back to zero inactives. 6032 * 6033 * In the meantime, just emit the moves. 6034 */ 6035 6036 TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn) 6037 TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz) 6038 TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false) 6039 6040 /* 6041 * SVE2 Integer Multiply - Unpredicated 6042 */ 6043 6044 TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a) 6045 6046 static gen_helper_gvec_3 * const smulh_zzz_fns[4] = { 6047 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h, 6048 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d, 6049 }; 6050 TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6051 smulh_zzz_fns[a->esz], a, 0) 6052 6053 static gen_helper_gvec_3 * const umulh_zzz_fns[4] = { 6054 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h, 6055 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d, 6056 }; 6057 TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6058 umulh_zzz_fns[a->esz], a, 0) 6059 6060 TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6061 gen_helper_gvec_pmul_b, a, 0) 6062 6063 static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = { 6064 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h, 6065 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d, 6066 }; 6067 TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6068 sqdmulh_zzz_fns[a->esz], a, 0) 6069 6070 static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = { 6071 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h, 6072 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d, 6073 }; 6074 TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6075 sqrdmulh_zzz_fns[a->esz], a, 0) 6076 6077 /* 6078 * SVE2 Integer - Predicated 6079 */ 6080 6081 static gen_helper_gvec_4 * const sadlp_fns[4] = { 6082 NULL, gen_helper_sve2_sadalp_zpzz_h, 6083 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d, 6084 }; 6085 TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 6086 sadlp_fns[a->esz], a, 0) 6087 6088 static gen_helper_gvec_4 * const uadlp_fns[4] = { 6089 NULL, gen_helper_sve2_uadalp_zpzz_h, 6090 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d, 6091 }; 6092 TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 6093 uadlp_fns[a->esz], a, 0) 6094 6095 /* 6096 * SVE2 integer unary operations (predicated) 6097 */ 6098 6099 TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz, 6100 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0) 6101 6102 TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz, 6103 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0) 6104 6105 static gen_helper_gvec_3 * const sqabs_fns[4] = { 6106 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h, 6107 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d, 6108 }; 6109 TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0) 6110 6111 static gen_helper_gvec_3 * const sqneg_fns[4] = { 6112 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h, 6113 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d, 6114 }; 6115 TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0) 6116 6117 DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl) 6118 DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl) 6119 DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl) 6120 6121 DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl) 6122 DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl) 6123 DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl) 6124 6125 DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd) 6126 DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd) 6127 DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub) 6128 6129 DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd) 6130 DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd) 6131 DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub) 6132 6133 DO_ZPZZ(ADDP, aa64_sve2, sve2_addp) 6134 DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp) 6135 DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp) 6136 DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp) 6137 DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp) 6138 6139 DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd) 6140 DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd) 6141 DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub) 6142 DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub) 6143 DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd) 6144 DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd) 6145 6146 /* 6147 * SVE2 Widening Integer Arithmetic 6148 */ 6149 6150 static gen_helper_gvec_3 * const saddl_fns[4] = { 6151 NULL, gen_helper_sve2_saddl_h, 6152 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d, 6153 }; 6154 TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6155 saddl_fns[a->esz], a, 0) 6156 TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6157 saddl_fns[a->esz], a, 3) 6158 TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 6159 saddl_fns[a->esz], a, 2) 6160 6161 static gen_helper_gvec_3 * const ssubl_fns[4] = { 6162 NULL, gen_helper_sve2_ssubl_h, 6163 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d, 6164 }; 6165 TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6166 ssubl_fns[a->esz], a, 0) 6167 TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6168 ssubl_fns[a->esz], a, 3) 6169 TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 6170 ssubl_fns[a->esz], a, 2) 6171 TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz, 6172 ssubl_fns[a->esz], a, 1) 6173 6174 static gen_helper_gvec_3 * const sabdl_fns[4] = { 6175 NULL, gen_helper_sve2_sabdl_h, 6176 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d, 6177 }; 6178 TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6179 sabdl_fns[a->esz], a, 0) 6180 TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6181 sabdl_fns[a->esz], a, 3) 6182 6183 static gen_helper_gvec_3 * const uaddl_fns[4] = { 6184 NULL, gen_helper_sve2_uaddl_h, 6185 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d, 6186 }; 6187 TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6188 uaddl_fns[a->esz], a, 0) 6189 TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6190 uaddl_fns[a->esz], a, 3) 6191 6192 static gen_helper_gvec_3 * const usubl_fns[4] = { 6193 NULL, gen_helper_sve2_usubl_h, 6194 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d, 6195 }; 6196 TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6197 usubl_fns[a->esz], a, 0) 6198 TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6199 usubl_fns[a->esz], a, 3) 6200 6201 static gen_helper_gvec_3 * const uabdl_fns[4] = { 6202 NULL, gen_helper_sve2_uabdl_h, 6203 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d, 6204 }; 6205 TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6206 uabdl_fns[a->esz], a, 0) 6207 TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6208 uabdl_fns[a->esz], a, 3) 6209 6210 static gen_helper_gvec_3 * const sqdmull_fns[4] = { 6211 NULL, gen_helper_sve2_sqdmull_zzz_h, 6212 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d, 6213 }; 6214 TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6215 sqdmull_fns[a->esz], a, 0) 6216 TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6217 sqdmull_fns[a->esz], a, 3) 6218 6219 static gen_helper_gvec_3 * const smull_fns[4] = { 6220 NULL, gen_helper_sve2_smull_zzz_h, 6221 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d, 6222 }; 6223 TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6224 smull_fns[a->esz], a, 0) 6225 TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6226 smull_fns[a->esz], a, 3) 6227 6228 static gen_helper_gvec_3 * const umull_fns[4] = { 6229 NULL, gen_helper_sve2_umull_zzz_h, 6230 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d, 6231 }; 6232 TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6233 umull_fns[a->esz], a, 0) 6234 TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6235 umull_fns[a->esz], a, 3) 6236 6237 static gen_helper_gvec_3 * const eoril_fns[4] = { 6238 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h, 6239 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d, 6240 }; 6241 TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2) 6242 TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1) 6243 6244 static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel) 6245 { 6246 static gen_helper_gvec_3 * const fns[4] = { 6247 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h, 6248 NULL, gen_helper_sve2_pmull_d, 6249 }; 6250 6251 if (a->esz == 0) { 6252 if (!dc_isar_feature(aa64_sve2_pmull128, s)) { 6253 return false; 6254 } 6255 s->is_nonstreaming = true; 6256 } else if (!dc_isar_feature(aa64_sve, s)) { 6257 return false; 6258 } 6259 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel); 6260 } 6261 6262 TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false) 6263 TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true) 6264 6265 static gen_helper_gvec_3 * const saddw_fns[4] = { 6266 NULL, gen_helper_sve2_saddw_h, 6267 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d, 6268 }; 6269 TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0) 6270 TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1) 6271 6272 static gen_helper_gvec_3 * const ssubw_fns[4] = { 6273 NULL, gen_helper_sve2_ssubw_h, 6274 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d, 6275 }; 6276 TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0) 6277 TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1) 6278 6279 static gen_helper_gvec_3 * const uaddw_fns[4] = { 6280 NULL, gen_helper_sve2_uaddw_h, 6281 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d, 6282 }; 6283 TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0) 6284 TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1) 6285 6286 static gen_helper_gvec_3 * const usubw_fns[4] = { 6287 NULL, gen_helper_sve2_usubw_h, 6288 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d, 6289 }; 6290 TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0) 6291 TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1) 6292 6293 static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6294 { 6295 int top = imm & 1; 6296 int shl = imm >> 1; 6297 int halfbits = 4 << vece; 6298 6299 if (top) { 6300 if (shl == halfbits) { 6301 TCGv_vec t = tcg_temp_new_vec_matching(d); 6302 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); 6303 tcg_gen_and_vec(vece, d, n, t); 6304 tcg_temp_free_vec(t); 6305 } else { 6306 tcg_gen_sari_vec(vece, d, n, halfbits); 6307 tcg_gen_shli_vec(vece, d, d, shl); 6308 } 6309 } else { 6310 tcg_gen_shli_vec(vece, d, n, halfbits); 6311 tcg_gen_sari_vec(vece, d, d, halfbits - shl); 6312 } 6313 } 6314 6315 static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm) 6316 { 6317 int halfbits = 4 << vece; 6318 int top = imm & 1; 6319 int shl = (imm >> 1); 6320 int shift; 6321 uint64_t mask; 6322 6323 mask = MAKE_64BIT_MASK(0, halfbits); 6324 mask <<= shl; 6325 mask = dup_const(vece, mask); 6326 6327 shift = shl - top * halfbits; 6328 if (shift < 0) { 6329 tcg_gen_shri_i64(d, n, -shift); 6330 } else { 6331 tcg_gen_shli_i64(d, n, shift); 6332 } 6333 tcg_gen_andi_i64(d, d, mask); 6334 } 6335 6336 static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6337 { 6338 gen_ushll_i64(MO_16, d, n, imm); 6339 } 6340 6341 static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6342 { 6343 gen_ushll_i64(MO_32, d, n, imm); 6344 } 6345 6346 static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6347 { 6348 gen_ushll_i64(MO_64, d, n, imm); 6349 } 6350 6351 static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6352 { 6353 int halfbits = 4 << vece; 6354 int top = imm & 1; 6355 int shl = imm >> 1; 6356 6357 if (top) { 6358 if (shl == halfbits) { 6359 TCGv_vec t = tcg_temp_new_vec_matching(d); 6360 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); 6361 tcg_gen_and_vec(vece, d, n, t); 6362 tcg_temp_free_vec(t); 6363 } else { 6364 tcg_gen_shri_vec(vece, d, n, halfbits); 6365 tcg_gen_shli_vec(vece, d, d, shl); 6366 } 6367 } else { 6368 if (shl == 0) { 6369 TCGv_vec t = tcg_temp_new_vec_matching(d); 6370 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6371 tcg_gen_and_vec(vece, d, n, t); 6372 tcg_temp_free_vec(t); 6373 } else { 6374 tcg_gen_shli_vec(vece, d, n, halfbits); 6375 tcg_gen_shri_vec(vece, d, d, halfbits - shl); 6376 } 6377 } 6378 } 6379 6380 static bool do_shll_tb(DisasContext *s, arg_rri_esz *a, 6381 const GVecGen2i ops[3], bool sel) 6382 { 6383 6384 if (a->esz < 0 || a->esz > 2) { 6385 return false; 6386 } 6387 if (sve_access_check(s)) { 6388 unsigned vsz = vec_full_reg_size(s); 6389 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 6390 vec_full_reg_offset(s, a->rn), 6391 vsz, vsz, (a->imm << 1) | sel, 6392 &ops[a->esz]); 6393 } 6394 return true; 6395 } 6396 6397 static const TCGOpcode sshll_list[] = { 6398 INDEX_op_shli_vec, INDEX_op_sari_vec, 0 6399 }; 6400 static const GVecGen2i sshll_ops[3] = { 6401 { .fniv = gen_sshll_vec, 6402 .opt_opc = sshll_list, 6403 .fno = gen_helper_sve2_sshll_h, 6404 .vece = MO_16 }, 6405 { .fniv = gen_sshll_vec, 6406 .opt_opc = sshll_list, 6407 .fno = gen_helper_sve2_sshll_s, 6408 .vece = MO_32 }, 6409 { .fniv = gen_sshll_vec, 6410 .opt_opc = sshll_list, 6411 .fno = gen_helper_sve2_sshll_d, 6412 .vece = MO_64 } 6413 }; 6414 TRANS_FEAT(SSHLLB, aa64_sve2, do_shll_tb, a, sshll_ops, false) 6415 TRANS_FEAT(SSHLLT, aa64_sve2, do_shll_tb, a, sshll_ops, true) 6416 6417 static const TCGOpcode ushll_list[] = { 6418 INDEX_op_shli_vec, INDEX_op_shri_vec, 0 6419 }; 6420 static const GVecGen2i ushll_ops[3] = { 6421 { .fni8 = gen_ushll16_i64, 6422 .fniv = gen_ushll_vec, 6423 .opt_opc = ushll_list, 6424 .fno = gen_helper_sve2_ushll_h, 6425 .vece = MO_16 }, 6426 { .fni8 = gen_ushll32_i64, 6427 .fniv = gen_ushll_vec, 6428 .opt_opc = ushll_list, 6429 .fno = gen_helper_sve2_ushll_s, 6430 .vece = MO_32 }, 6431 { .fni8 = gen_ushll64_i64, 6432 .fniv = gen_ushll_vec, 6433 .opt_opc = ushll_list, 6434 .fno = gen_helper_sve2_ushll_d, 6435 .vece = MO_64 }, 6436 }; 6437 TRANS_FEAT(USHLLB, aa64_sve2, do_shll_tb, a, ushll_ops, false) 6438 TRANS_FEAT(USHLLT, aa64_sve2, do_shll_tb, a, ushll_ops, true) 6439 6440 static gen_helper_gvec_3 * const bext_fns[4] = { 6441 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h, 6442 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d, 6443 }; 6444 TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6445 bext_fns[a->esz], a, 0) 6446 6447 static gen_helper_gvec_3 * const bdep_fns[4] = { 6448 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h, 6449 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d, 6450 }; 6451 TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6452 bdep_fns[a->esz], a, 0) 6453 6454 static gen_helper_gvec_3 * const bgrp_fns[4] = { 6455 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h, 6456 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d, 6457 }; 6458 TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6459 bgrp_fns[a->esz], a, 0) 6460 6461 static gen_helper_gvec_3 * const cadd_fns[4] = { 6462 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h, 6463 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d, 6464 }; 6465 TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6466 cadd_fns[a->esz], a, 0) 6467 TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6468 cadd_fns[a->esz], a, 1) 6469 6470 static gen_helper_gvec_3 * const sqcadd_fns[4] = { 6471 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h, 6472 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d, 6473 }; 6474 TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6475 sqcadd_fns[a->esz], a, 0) 6476 TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6477 sqcadd_fns[a->esz], a, 1) 6478 6479 static gen_helper_gvec_4 * const sabal_fns[4] = { 6480 NULL, gen_helper_sve2_sabal_h, 6481 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d, 6482 }; 6483 TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0) 6484 TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1) 6485 6486 static gen_helper_gvec_4 * const uabal_fns[4] = { 6487 NULL, gen_helper_sve2_uabal_h, 6488 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d, 6489 }; 6490 TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0) 6491 TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1) 6492 6493 static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel) 6494 { 6495 static gen_helper_gvec_4 * const fns[2] = { 6496 gen_helper_sve2_adcl_s, 6497 gen_helper_sve2_adcl_d, 6498 }; 6499 /* 6500 * Note that in this case the ESZ field encodes both size and sign. 6501 * Split out 'subtract' into bit 1 of the data field for the helper. 6502 */ 6503 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel); 6504 } 6505 6506 TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false) 6507 TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true) 6508 6509 TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a) 6510 TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a) 6511 TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a) 6512 TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a) 6513 TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a) 6514 TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a) 6515 6516 TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a) 6517 TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a) 6518 6519 static bool do_narrow_extract(DisasContext *s, arg_rri_esz *a, 6520 const GVecGen2 ops[3]) 6521 { 6522 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0) { 6523 return false; 6524 } 6525 if (sve_access_check(s)) { 6526 unsigned vsz = vec_full_reg_size(s); 6527 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd), 6528 vec_full_reg_offset(s, a->rn), 6529 vsz, vsz, &ops[a->esz]); 6530 } 6531 return true; 6532 } 6533 6534 static const TCGOpcode sqxtn_list[] = { 6535 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 6536 }; 6537 6538 static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6539 { 6540 TCGv_vec t = tcg_temp_new_vec_matching(d); 6541 int halfbits = 4 << vece; 6542 int64_t mask = (1ull << halfbits) - 1; 6543 int64_t min = -1ull << (halfbits - 1); 6544 int64_t max = -min - 1; 6545 6546 tcg_gen_dupi_vec(vece, t, min); 6547 tcg_gen_smax_vec(vece, d, n, t); 6548 tcg_gen_dupi_vec(vece, t, max); 6549 tcg_gen_smin_vec(vece, d, d, t); 6550 tcg_gen_dupi_vec(vece, t, mask); 6551 tcg_gen_and_vec(vece, d, d, t); 6552 tcg_temp_free_vec(t); 6553 } 6554 6555 static const GVecGen2 sqxtnb_ops[3] = { 6556 { .fniv = gen_sqxtnb_vec, 6557 .opt_opc = sqxtn_list, 6558 .fno = gen_helper_sve2_sqxtnb_h, 6559 .vece = MO_16 }, 6560 { .fniv = gen_sqxtnb_vec, 6561 .opt_opc = sqxtn_list, 6562 .fno = gen_helper_sve2_sqxtnb_s, 6563 .vece = MO_32 }, 6564 { .fniv = gen_sqxtnb_vec, 6565 .opt_opc = sqxtn_list, 6566 .fno = gen_helper_sve2_sqxtnb_d, 6567 .vece = MO_64 }, 6568 }; 6569 TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops) 6570 6571 static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6572 { 6573 TCGv_vec t = tcg_temp_new_vec_matching(d); 6574 int halfbits = 4 << vece; 6575 int64_t mask = (1ull << halfbits) - 1; 6576 int64_t min = -1ull << (halfbits - 1); 6577 int64_t max = -min - 1; 6578 6579 tcg_gen_dupi_vec(vece, t, min); 6580 tcg_gen_smax_vec(vece, n, n, t); 6581 tcg_gen_dupi_vec(vece, t, max); 6582 tcg_gen_smin_vec(vece, n, n, t); 6583 tcg_gen_shli_vec(vece, n, n, halfbits); 6584 tcg_gen_dupi_vec(vece, t, mask); 6585 tcg_gen_bitsel_vec(vece, d, t, d, n); 6586 tcg_temp_free_vec(t); 6587 } 6588 6589 static const GVecGen2 sqxtnt_ops[3] = { 6590 { .fniv = gen_sqxtnt_vec, 6591 .opt_opc = sqxtn_list, 6592 .load_dest = true, 6593 .fno = gen_helper_sve2_sqxtnt_h, 6594 .vece = MO_16 }, 6595 { .fniv = gen_sqxtnt_vec, 6596 .opt_opc = sqxtn_list, 6597 .load_dest = true, 6598 .fno = gen_helper_sve2_sqxtnt_s, 6599 .vece = MO_32 }, 6600 { .fniv = gen_sqxtnt_vec, 6601 .opt_opc = sqxtn_list, 6602 .load_dest = true, 6603 .fno = gen_helper_sve2_sqxtnt_d, 6604 .vece = MO_64 }, 6605 }; 6606 TRANS_FEAT(SQXTNT, aa64_sve2, do_narrow_extract, a, sqxtnt_ops) 6607 6608 static const TCGOpcode uqxtn_list[] = { 6609 INDEX_op_shli_vec, INDEX_op_umin_vec, 0 6610 }; 6611 6612 static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6613 { 6614 TCGv_vec t = tcg_temp_new_vec_matching(d); 6615 int halfbits = 4 << vece; 6616 int64_t max = (1ull << halfbits) - 1; 6617 6618 tcg_gen_dupi_vec(vece, t, max); 6619 tcg_gen_umin_vec(vece, d, n, t); 6620 tcg_temp_free_vec(t); 6621 } 6622 6623 static const GVecGen2 uqxtnb_ops[3] = { 6624 { .fniv = gen_uqxtnb_vec, 6625 .opt_opc = uqxtn_list, 6626 .fno = gen_helper_sve2_uqxtnb_h, 6627 .vece = MO_16 }, 6628 { .fniv = gen_uqxtnb_vec, 6629 .opt_opc = uqxtn_list, 6630 .fno = gen_helper_sve2_uqxtnb_s, 6631 .vece = MO_32 }, 6632 { .fniv = gen_uqxtnb_vec, 6633 .opt_opc = uqxtn_list, 6634 .fno = gen_helper_sve2_uqxtnb_d, 6635 .vece = MO_64 }, 6636 }; 6637 TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops) 6638 6639 static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6640 { 6641 TCGv_vec t = tcg_temp_new_vec_matching(d); 6642 int halfbits = 4 << vece; 6643 int64_t max = (1ull << halfbits) - 1; 6644 6645 tcg_gen_dupi_vec(vece, t, max); 6646 tcg_gen_umin_vec(vece, n, n, t); 6647 tcg_gen_shli_vec(vece, n, n, halfbits); 6648 tcg_gen_bitsel_vec(vece, d, t, d, n); 6649 tcg_temp_free_vec(t); 6650 } 6651 6652 static const GVecGen2 uqxtnt_ops[3] = { 6653 { .fniv = gen_uqxtnt_vec, 6654 .opt_opc = uqxtn_list, 6655 .load_dest = true, 6656 .fno = gen_helper_sve2_uqxtnt_h, 6657 .vece = MO_16 }, 6658 { .fniv = gen_uqxtnt_vec, 6659 .opt_opc = uqxtn_list, 6660 .load_dest = true, 6661 .fno = gen_helper_sve2_uqxtnt_s, 6662 .vece = MO_32 }, 6663 { .fniv = gen_uqxtnt_vec, 6664 .opt_opc = uqxtn_list, 6665 .load_dest = true, 6666 .fno = gen_helper_sve2_uqxtnt_d, 6667 .vece = MO_64 }, 6668 }; 6669 TRANS_FEAT(UQXTNT, aa64_sve2, do_narrow_extract, a, uqxtnt_ops) 6670 6671 static const TCGOpcode sqxtun_list[] = { 6672 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0 6673 }; 6674 6675 static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6676 { 6677 TCGv_vec t = tcg_temp_new_vec_matching(d); 6678 int halfbits = 4 << vece; 6679 int64_t max = (1ull << halfbits) - 1; 6680 6681 tcg_gen_dupi_vec(vece, t, 0); 6682 tcg_gen_smax_vec(vece, d, n, t); 6683 tcg_gen_dupi_vec(vece, t, max); 6684 tcg_gen_umin_vec(vece, d, d, t); 6685 tcg_temp_free_vec(t); 6686 } 6687 6688 static const GVecGen2 sqxtunb_ops[3] = { 6689 { .fniv = gen_sqxtunb_vec, 6690 .opt_opc = sqxtun_list, 6691 .fno = gen_helper_sve2_sqxtunb_h, 6692 .vece = MO_16 }, 6693 { .fniv = gen_sqxtunb_vec, 6694 .opt_opc = sqxtun_list, 6695 .fno = gen_helper_sve2_sqxtunb_s, 6696 .vece = MO_32 }, 6697 { .fniv = gen_sqxtunb_vec, 6698 .opt_opc = sqxtun_list, 6699 .fno = gen_helper_sve2_sqxtunb_d, 6700 .vece = MO_64 }, 6701 }; 6702 TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops) 6703 6704 static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6705 { 6706 TCGv_vec t = tcg_temp_new_vec_matching(d); 6707 int halfbits = 4 << vece; 6708 int64_t max = (1ull << halfbits) - 1; 6709 6710 tcg_gen_dupi_vec(vece, t, 0); 6711 tcg_gen_smax_vec(vece, n, n, t); 6712 tcg_gen_dupi_vec(vece, t, max); 6713 tcg_gen_umin_vec(vece, n, n, t); 6714 tcg_gen_shli_vec(vece, n, n, halfbits); 6715 tcg_gen_bitsel_vec(vece, d, t, d, n); 6716 tcg_temp_free_vec(t); 6717 } 6718 6719 static const GVecGen2 sqxtunt_ops[3] = { 6720 { .fniv = gen_sqxtunt_vec, 6721 .opt_opc = sqxtun_list, 6722 .load_dest = true, 6723 .fno = gen_helper_sve2_sqxtunt_h, 6724 .vece = MO_16 }, 6725 { .fniv = gen_sqxtunt_vec, 6726 .opt_opc = sqxtun_list, 6727 .load_dest = true, 6728 .fno = gen_helper_sve2_sqxtunt_s, 6729 .vece = MO_32 }, 6730 { .fniv = gen_sqxtunt_vec, 6731 .opt_opc = sqxtun_list, 6732 .load_dest = true, 6733 .fno = gen_helper_sve2_sqxtunt_d, 6734 .vece = MO_64 }, 6735 }; 6736 TRANS_FEAT(SQXTUNT, aa64_sve2, do_narrow_extract, a, sqxtunt_ops) 6737 6738 static bool do_shr_narrow(DisasContext *s, arg_rri_esz *a, 6739 const GVecGen2i ops[3]) 6740 { 6741 if (a->esz < 0 || a->esz > MO_32) { 6742 return false; 6743 } 6744 assert(a->imm > 0 && a->imm <= (8 << a->esz)); 6745 if (sve_access_check(s)) { 6746 unsigned vsz = vec_full_reg_size(s); 6747 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 6748 vec_full_reg_offset(s, a->rn), 6749 vsz, vsz, a->imm, &ops[a->esz]); 6750 } 6751 return true; 6752 } 6753 6754 static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 6755 { 6756 int halfbits = 4 << vece; 6757 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 6758 6759 tcg_gen_shri_i64(d, n, shr); 6760 tcg_gen_andi_i64(d, d, mask); 6761 } 6762 6763 static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6764 { 6765 gen_shrnb_i64(MO_16, d, n, shr); 6766 } 6767 6768 static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6769 { 6770 gen_shrnb_i64(MO_32, d, n, shr); 6771 } 6772 6773 static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6774 { 6775 gen_shrnb_i64(MO_64, d, n, shr); 6776 } 6777 6778 static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 6779 { 6780 TCGv_vec t = tcg_temp_new_vec_matching(d); 6781 int halfbits = 4 << vece; 6782 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 6783 6784 tcg_gen_shri_vec(vece, n, n, shr); 6785 tcg_gen_dupi_vec(vece, t, mask); 6786 tcg_gen_and_vec(vece, d, n, t); 6787 tcg_temp_free_vec(t); 6788 } 6789 6790 static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 }; 6791 static const GVecGen2i shrnb_ops[3] = { 6792 { .fni8 = gen_shrnb16_i64, 6793 .fniv = gen_shrnb_vec, 6794 .opt_opc = shrnb_vec_list, 6795 .fno = gen_helper_sve2_shrnb_h, 6796 .vece = MO_16 }, 6797 { .fni8 = gen_shrnb32_i64, 6798 .fniv = gen_shrnb_vec, 6799 .opt_opc = shrnb_vec_list, 6800 .fno = gen_helper_sve2_shrnb_s, 6801 .vece = MO_32 }, 6802 { .fni8 = gen_shrnb64_i64, 6803 .fniv = gen_shrnb_vec, 6804 .opt_opc = shrnb_vec_list, 6805 .fno = gen_helper_sve2_shrnb_d, 6806 .vece = MO_64 }, 6807 }; 6808 TRANS_FEAT(SHRNB, aa64_sve2, do_shr_narrow, a, shrnb_ops) 6809 6810 static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 6811 { 6812 int halfbits = 4 << vece; 6813 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 6814 6815 tcg_gen_shli_i64(n, n, halfbits - shr); 6816 tcg_gen_andi_i64(n, n, ~mask); 6817 tcg_gen_andi_i64(d, d, mask); 6818 tcg_gen_or_i64(d, d, n); 6819 } 6820 6821 static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6822 { 6823 gen_shrnt_i64(MO_16, d, n, shr); 6824 } 6825 6826 static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6827 { 6828 gen_shrnt_i64(MO_32, d, n, shr); 6829 } 6830 6831 static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6832 { 6833 tcg_gen_shri_i64(n, n, shr); 6834 tcg_gen_deposit_i64(d, d, n, 32, 32); 6835 } 6836 6837 static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 6838 { 6839 TCGv_vec t = tcg_temp_new_vec_matching(d); 6840 int halfbits = 4 << vece; 6841 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 6842 6843 tcg_gen_shli_vec(vece, n, n, halfbits - shr); 6844 tcg_gen_dupi_vec(vece, t, mask); 6845 tcg_gen_bitsel_vec(vece, d, t, d, n); 6846 tcg_temp_free_vec(t); 6847 } 6848 6849 static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 }; 6850 static const GVecGen2i shrnt_ops[3] = { 6851 { .fni8 = gen_shrnt16_i64, 6852 .fniv = gen_shrnt_vec, 6853 .opt_opc = shrnt_vec_list, 6854 .load_dest = true, 6855 .fno = gen_helper_sve2_shrnt_h, 6856 .vece = MO_16 }, 6857 { .fni8 = gen_shrnt32_i64, 6858 .fniv = gen_shrnt_vec, 6859 .opt_opc = shrnt_vec_list, 6860 .load_dest = true, 6861 .fno = gen_helper_sve2_shrnt_s, 6862 .vece = MO_32 }, 6863 { .fni8 = gen_shrnt64_i64, 6864 .fniv = gen_shrnt_vec, 6865 .opt_opc = shrnt_vec_list, 6866 .load_dest = true, 6867 .fno = gen_helper_sve2_shrnt_d, 6868 .vece = MO_64 }, 6869 }; 6870 TRANS_FEAT(SHRNT, aa64_sve2, do_shr_narrow, a, shrnt_ops) 6871 6872 static const GVecGen2i rshrnb_ops[3] = { 6873 { .fno = gen_helper_sve2_rshrnb_h }, 6874 { .fno = gen_helper_sve2_rshrnb_s }, 6875 { .fno = gen_helper_sve2_rshrnb_d }, 6876 }; 6877 TRANS_FEAT(RSHRNB, aa64_sve2, do_shr_narrow, a, rshrnb_ops) 6878 6879 static const GVecGen2i rshrnt_ops[3] = { 6880 { .fno = gen_helper_sve2_rshrnt_h }, 6881 { .fno = gen_helper_sve2_rshrnt_s }, 6882 { .fno = gen_helper_sve2_rshrnt_d }, 6883 }; 6884 TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops) 6885 6886 static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d, 6887 TCGv_vec n, int64_t shr) 6888 { 6889 TCGv_vec t = tcg_temp_new_vec_matching(d); 6890 int halfbits = 4 << vece; 6891 6892 tcg_gen_sari_vec(vece, n, n, shr); 6893 tcg_gen_dupi_vec(vece, t, 0); 6894 tcg_gen_smax_vec(vece, n, n, t); 6895 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6896 tcg_gen_umin_vec(vece, d, n, t); 6897 tcg_temp_free_vec(t); 6898 } 6899 6900 static const TCGOpcode sqshrunb_vec_list[] = { 6901 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0 6902 }; 6903 static const GVecGen2i sqshrunb_ops[3] = { 6904 { .fniv = gen_sqshrunb_vec, 6905 .opt_opc = sqshrunb_vec_list, 6906 .fno = gen_helper_sve2_sqshrunb_h, 6907 .vece = MO_16 }, 6908 { .fniv = gen_sqshrunb_vec, 6909 .opt_opc = sqshrunb_vec_list, 6910 .fno = gen_helper_sve2_sqshrunb_s, 6911 .vece = MO_32 }, 6912 { .fniv = gen_sqshrunb_vec, 6913 .opt_opc = sqshrunb_vec_list, 6914 .fno = gen_helper_sve2_sqshrunb_d, 6915 .vece = MO_64 }, 6916 }; 6917 TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops) 6918 6919 static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d, 6920 TCGv_vec n, int64_t shr) 6921 { 6922 TCGv_vec t = tcg_temp_new_vec_matching(d); 6923 int halfbits = 4 << vece; 6924 6925 tcg_gen_sari_vec(vece, n, n, shr); 6926 tcg_gen_dupi_vec(vece, t, 0); 6927 tcg_gen_smax_vec(vece, n, n, t); 6928 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6929 tcg_gen_umin_vec(vece, n, n, t); 6930 tcg_gen_shli_vec(vece, n, n, halfbits); 6931 tcg_gen_bitsel_vec(vece, d, t, d, n); 6932 tcg_temp_free_vec(t); 6933 } 6934 6935 static const TCGOpcode sqshrunt_vec_list[] = { 6936 INDEX_op_shli_vec, INDEX_op_sari_vec, 6937 INDEX_op_smax_vec, INDEX_op_umin_vec, 0 6938 }; 6939 static const GVecGen2i sqshrunt_ops[3] = { 6940 { .fniv = gen_sqshrunt_vec, 6941 .opt_opc = sqshrunt_vec_list, 6942 .load_dest = true, 6943 .fno = gen_helper_sve2_sqshrunt_h, 6944 .vece = MO_16 }, 6945 { .fniv = gen_sqshrunt_vec, 6946 .opt_opc = sqshrunt_vec_list, 6947 .load_dest = true, 6948 .fno = gen_helper_sve2_sqshrunt_s, 6949 .vece = MO_32 }, 6950 { .fniv = gen_sqshrunt_vec, 6951 .opt_opc = sqshrunt_vec_list, 6952 .load_dest = true, 6953 .fno = gen_helper_sve2_sqshrunt_d, 6954 .vece = MO_64 }, 6955 }; 6956 TRANS_FEAT(SQSHRUNT, aa64_sve2, do_shr_narrow, a, sqshrunt_ops) 6957 6958 static const GVecGen2i sqrshrunb_ops[3] = { 6959 { .fno = gen_helper_sve2_sqrshrunb_h }, 6960 { .fno = gen_helper_sve2_sqrshrunb_s }, 6961 { .fno = gen_helper_sve2_sqrshrunb_d }, 6962 }; 6963 TRANS_FEAT(SQRSHRUNB, aa64_sve2, do_shr_narrow, a, sqrshrunb_ops) 6964 6965 static const GVecGen2i sqrshrunt_ops[3] = { 6966 { .fno = gen_helper_sve2_sqrshrunt_h }, 6967 { .fno = gen_helper_sve2_sqrshrunt_s }, 6968 { .fno = gen_helper_sve2_sqrshrunt_d }, 6969 }; 6970 TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops) 6971 6972 static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d, 6973 TCGv_vec n, int64_t shr) 6974 { 6975 TCGv_vec t = tcg_temp_new_vec_matching(d); 6976 int halfbits = 4 << vece; 6977 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 6978 int64_t min = -max - 1; 6979 6980 tcg_gen_sari_vec(vece, n, n, shr); 6981 tcg_gen_dupi_vec(vece, t, min); 6982 tcg_gen_smax_vec(vece, n, n, t); 6983 tcg_gen_dupi_vec(vece, t, max); 6984 tcg_gen_smin_vec(vece, n, n, t); 6985 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6986 tcg_gen_and_vec(vece, d, n, t); 6987 tcg_temp_free_vec(t); 6988 } 6989 6990 static const TCGOpcode sqshrnb_vec_list[] = { 6991 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0 6992 }; 6993 static const GVecGen2i sqshrnb_ops[3] = { 6994 { .fniv = gen_sqshrnb_vec, 6995 .opt_opc = sqshrnb_vec_list, 6996 .fno = gen_helper_sve2_sqshrnb_h, 6997 .vece = MO_16 }, 6998 { .fniv = gen_sqshrnb_vec, 6999 .opt_opc = sqshrnb_vec_list, 7000 .fno = gen_helper_sve2_sqshrnb_s, 7001 .vece = MO_32 }, 7002 { .fniv = gen_sqshrnb_vec, 7003 .opt_opc = sqshrnb_vec_list, 7004 .fno = gen_helper_sve2_sqshrnb_d, 7005 .vece = MO_64 }, 7006 }; 7007 TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops) 7008 7009 static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d, 7010 TCGv_vec n, int64_t shr) 7011 { 7012 TCGv_vec t = tcg_temp_new_vec_matching(d); 7013 int halfbits = 4 << vece; 7014 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 7015 int64_t min = -max - 1; 7016 7017 tcg_gen_sari_vec(vece, n, n, shr); 7018 tcg_gen_dupi_vec(vece, t, min); 7019 tcg_gen_smax_vec(vece, n, n, t); 7020 tcg_gen_dupi_vec(vece, t, max); 7021 tcg_gen_smin_vec(vece, n, n, t); 7022 tcg_gen_shli_vec(vece, n, n, halfbits); 7023 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 7024 tcg_gen_bitsel_vec(vece, d, t, d, n); 7025 tcg_temp_free_vec(t); 7026 } 7027 7028 static const TCGOpcode sqshrnt_vec_list[] = { 7029 INDEX_op_shli_vec, INDEX_op_sari_vec, 7030 INDEX_op_smax_vec, INDEX_op_smin_vec, 0 7031 }; 7032 static const GVecGen2i sqshrnt_ops[3] = { 7033 { .fniv = gen_sqshrnt_vec, 7034 .opt_opc = sqshrnt_vec_list, 7035 .load_dest = true, 7036 .fno = gen_helper_sve2_sqshrnt_h, 7037 .vece = MO_16 }, 7038 { .fniv = gen_sqshrnt_vec, 7039 .opt_opc = sqshrnt_vec_list, 7040 .load_dest = true, 7041 .fno = gen_helper_sve2_sqshrnt_s, 7042 .vece = MO_32 }, 7043 { .fniv = gen_sqshrnt_vec, 7044 .opt_opc = sqshrnt_vec_list, 7045 .load_dest = true, 7046 .fno = gen_helper_sve2_sqshrnt_d, 7047 .vece = MO_64 }, 7048 }; 7049 TRANS_FEAT(SQSHRNT, aa64_sve2, do_shr_narrow, a, sqshrnt_ops) 7050 7051 static const GVecGen2i sqrshrnb_ops[3] = { 7052 { .fno = gen_helper_sve2_sqrshrnb_h }, 7053 { .fno = gen_helper_sve2_sqrshrnb_s }, 7054 { .fno = gen_helper_sve2_sqrshrnb_d }, 7055 }; 7056 TRANS_FEAT(SQRSHRNB, aa64_sve2, do_shr_narrow, a, sqrshrnb_ops) 7057 7058 static const GVecGen2i sqrshrnt_ops[3] = { 7059 { .fno = gen_helper_sve2_sqrshrnt_h }, 7060 { .fno = gen_helper_sve2_sqrshrnt_s }, 7061 { .fno = gen_helper_sve2_sqrshrnt_d }, 7062 }; 7063 TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops) 7064 7065 static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d, 7066 TCGv_vec n, int64_t shr) 7067 { 7068 TCGv_vec t = tcg_temp_new_vec_matching(d); 7069 int halfbits = 4 << vece; 7070 7071 tcg_gen_shri_vec(vece, n, n, shr); 7072 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 7073 tcg_gen_umin_vec(vece, d, n, t); 7074 tcg_temp_free_vec(t); 7075 } 7076 7077 static const TCGOpcode uqshrnb_vec_list[] = { 7078 INDEX_op_shri_vec, INDEX_op_umin_vec, 0 7079 }; 7080 static const GVecGen2i uqshrnb_ops[3] = { 7081 { .fniv = gen_uqshrnb_vec, 7082 .opt_opc = uqshrnb_vec_list, 7083 .fno = gen_helper_sve2_uqshrnb_h, 7084 .vece = MO_16 }, 7085 { .fniv = gen_uqshrnb_vec, 7086 .opt_opc = uqshrnb_vec_list, 7087 .fno = gen_helper_sve2_uqshrnb_s, 7088 .vece = MO_32 }, 7089 { .fniv = gen_uqshrnb_vec, 7090 .opt_opc = uqshrnb_vec_list, 7091 .fno = gen_helper_sve2_uqshrnb_d, 7092 .vece = MO_64 }, 7093 }; 7094 TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops) 7095 7096 static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d, 7097 TCGv_vec n, int64_t shr) 7098 { 7099 TCGv_vec t = tcg_temp_new_vec_matching(d); 7100 int halfbits = 4 << vece; 7101 7102 tcg_gen_shri_vec(vece, n, n, shr); 7103 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 7104 tcg_gen_umin_vec(vece, n, n, t); 7105 tcg_gen_shli_vec(vece, n, n, halfbits); 7106 tcg_gen_bitsel_vec(vece, d, t, d, n); 7107 tcg_temp_free_vec(t); 7108 } 7109 7110 static const TCGOpcode uqshrnt_vec_list[] = { 7111 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0 7112 }; 7113 static const GVecGen2i uqshrnt_ops[3] = { 7114 { .fniv = gen_uqshrnt_vec, 7115 .opt_opc = uqshrnt_vec_list, 7116 .load_dest = true, 7117 .fno = gen_helper_sve2_uqshrnt_h, 7118 .vece = MO_16 }, 7119 { .fniv = gen_uqshrnt_vec, 7120 .opt_opc = uqshrnt_vec_list, 7121 .load_dest = true, 7122 .fno = gen_helper_sve2_uqshrnt_s, 7123 .vece = MO_32 }, 7124 { .fniv = gen_uqshrnt_vec, 7125 .opt_opc = uqshrnt_vec_list, 7126 .load_dest = true, 7127 .fno = gen_helper_sve2_uqshrnt_d, 7128 .vece = MO_64 }, 7129 }; 7130 TRANS_FEAT(UQSHRNT, aa64_sve2, do_shr_narrow, a, uqshrnt_ops) 7131 7132 static const GVecGen2i uqrshrnb_ops[3] = { 7133 { .fno = gen_helper_sve2_uqrshrnb_h }, 7134 { .fno = gen_helper_sve2_uqrshrnb_s }, 7135 { .fno = gen_helper_sve2_uqrshrnb_d }, 7136 }; 7137 TRANS_FEAT(UQRSHRNB, aa64_sve2, do_shr_narrow, a, uqrshrnb_ops) 7138 7139 static const GVecGen2i uqrshrnt_ops[3] = { 7140 { .fno = gen_helper_sve2_uqrshrnt_h }, 7141 { .fno = gen_helper_sve2_uqrshrnt_s }, 7142 { .fno = gen_helper_sve2_uqrshrnt_d }, 7143 }; 7144 TRANS_FEAT(UQRSHRNT, aa64_sve2, do_shr_narrow, a, uqrshrnt_ops) 7145 7146 #define DO_SVE2_ZZZ_NARROW(NAME, name) \ 7147 static gen_helper_gvec_3 * const name##_fns[4] = { \ 7148 NULL, gen_helper_sve2_##name##_h, \ 7149 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \ 7150 }; \ 7151 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \ 7152 name##_fns[a->esz], a, 0) 7153 7154 DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb) 7155 DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt) 7156 DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb) 7157 DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt) 7158 7159 DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb) 7160 DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt) 7161 DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb) 7162 DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt) 7163 7164 static gen_helper_gvec_flags_4 * const match_fns[4] = { 7165 gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL 7166 }; 7167 TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz]) 7168 7169 static gen_helper_gvec_flags_4 * const nmatch_fns[4] = { 7170 gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL 7171 }; 7172 TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz]) 7173 7174 static gen_helper_gvec_4 * const histcnt_fns[4] = { 7175 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d 7176 }; 7177 TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz, 7178 histcnt_fns[a->esz], a, 0) 7179 7180 TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz, 7181 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0) 7182 7183 DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz) 7184 DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz) 7185 DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz) 7186 DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz) 7187 DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz) 7188 7189 /* 7190 * SVE Integer Multiply-Add (unpredicated) 7191 */ 7192 7193 TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, 7194 gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra, 7195 0, FPST_FPCR) 7196 TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, 7197 gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra, 7198 0, FPST_FPCR) 7199 7200 static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = { 7201 NULL, gen_helper_sve2_sqdmlal_zzzw_h, 7202 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d, 7203 }; 7204 TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7205 sqdmlal_zzzw_fns[a->esz], a, 0) 7206 TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7207 sqdmlal_zzzw_fns[a->esz], a, 3) 7208 TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 7209 sqdmlal_zzzw_fns[a->esz], a, 2) 7210 7211 static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = { 7212 NULL, gen_helper_sve2_sqdmlsl_zzzw_h, 7213 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d, 7214 }; 7215 TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7216 sqdmlsl_zzzw_fns[a->esz], a, 0) 7217 TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7218 sqdmlsl_zzzw_fns[a->esz], a, 3) 7219 TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 7220 sqdmlsl_zzzw_fns[a->esz], a, 2) 7221 7222 static gen_helper_gvec_4 * const sqrdmlah_fns[] = { 7223 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h, 7224 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d, 7225 }; 7226 TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 7227 sqrdmlah_fns[a->esz], a, 0) 7228 7229 static gen_helper_gvec_4 * const sqrdmlsh_fns[] = { 7230 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h, 7231 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d, 7232 }; 7233 TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 7234 sqrdmlsh_fns[a->esz], a, 0) 7235 7236 static gen_helper_gvec_4 * const smlal_zzzw_fns[] = { 7237 NULL, gen_helper_sve2_smlal_zzzw_h, 7238 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d, 7239 }; 7240 TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7241 smlal_zzzw_fns[a->esz], a, 0) 7242 TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7243 smlal_zzzw_fns[a->esz], a, 1) 7244 7245 static gen_helper_gvec_4 * const umlal_zzzw_fns[] = { 7246 NULL, gen_helper_sve2_umlal_zzzw_h, 7247 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d, 7248 }; 7249 TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7250 umlal_zzzw_fns[a->esz], a, 0) 7251 TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7252 umlal_zzzw_fns[a->esz], a, 1) 7253 7254 static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = { 7255 NULL, gen_helper_sve2_smlsl_zzzw_h, 7256 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d, 7257 }; 7258 TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7259 smlsl_zzzw_fns[a->esz], a, 0) 7260 TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7261 smlsl_zzzw_fns[a->esz], a, 1) 7262 7263 static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = { 7264 NULL, gen_helper_sve2_umlsl_zzzw_h, 7265 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d, 7266 }; 7267 TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7268 umlsl_zzzw_fns[a->esz], a, 0) 7269 TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7270 umlsl_zzzw_fns[a->esz], a, 1) 7271 7272 static gen_helper_gvec_4 * const cmla_fns[] = { 7273 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h, 7274 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d, 7275 }; 7276 TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7277 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7278 7279 static gen_helper_gvec_4 * const cdot_fns[] = { 7280 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d 7281 }; 7282 TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7283 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7284 7285 static gen_helper_gvec_4 * const sqrdcmlah_fns[] = { 7286 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h, 7287 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d, 7288 }; 7289 TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7290 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7291 7292 TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7293 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0) 7294 7295 TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz, 7296 gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt) 7297 7298 TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7299 gen_helper_crypto_aese, a, false) 7300 TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7301 gen_helper_crypto_aese, a, true) 7302 7303 TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7304 gen_helper_crypto_sm4e, a, 0) 7305 TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7306 gen_helper_crypto_sm4ekey, a, 0) 7307 7308 TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, 7309 gen_gvec_rax1, a) 7310 7311 TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz, 7312 gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR) 7313 TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, 7314 gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR) 7315 7316 TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 7317 gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR) 7318 7319 TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, 7320 gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR) 7321 TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz, 7322 gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR) 7323 7324 TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a, 7325 float_round_to_odd, gen_helper_sve_fcvt_ds) 7326 TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a, 7327 float_round_to_odd, gen_helper_sve2_fcvtnt_ds) 7328 7329 static gen_helper_gvec_3_ptr * const flogb_fns[] = { 7330 NULL, gen_helper_flogb_h, 7331 gen_helper_flogb_s, gen_helper_flogb_d 7332 }; 7333 TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz], 7334 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 7335 7336 static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel) 7337 { 7338 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s, 7339 a->rd, a->rn, a->rm, a->ra, 7340 (sel << 1) | sub, cpu_env); 7341 } 7342 7343 TRANS_FEAT(FMLALB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, false) 7344 TRANS_FEAT(FMLALT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, true) 7345 TRANS_FEAT(FMLSLB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, false) 7346 TRANS_FEAT(FMLSLT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, true) 7347 7348 static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel) 7349 { 7350 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s, 7351 a->rd, a->rn, a->rm, a->ra, 7352 (a->index << 2) | (sel << 1) | sub, cpu_env); 7353 } 7354 7355 TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false) 7356 TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true) 7357 TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false) 7358 TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true) 7359 7360 TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7361 gen_helper_gvec_smmla_b, a, 0) 7362 TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7363 gen_helper_gvec_usmmla_b, a, 0) 7364 TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7365 gen_helper_gvec_ummla_b, a, 0) 7366 7367 TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, 7368 gen_helper_gvec_bfdot, a, 0) 7369 TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz, 7370 gen_helper_gvec_bfdot_idx, a) 7371 7372 TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, 7373 gen_helper_gvec_bfmmla, a, 0) 7374 7375 static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) 7376 { 7377 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, 7378 a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR); 7379 } 7380 7381 TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) 7382 TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true) 7383 7384 static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) 7385 { 7386 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, 7387 a->rd, a->rn, a->rm, a->ra, 7388 (a->index << 1) | sel, FPST_FPCR); 7389 } 7390 7391 TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) 7392 TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true) 7393 7394 static bool trans_PSEL(DisasContext *s, arg_psel *a) 7395 { 7396 int vl = vec_full_reg_size(s); 7397 int pl = pred_gvec_reg_size(s); 7398 int elements = vl >> a->esz; 7399 TCGv_i64 tmp, didx, dbit; 7400 TCGv_ptr ptr; 7401 7402 if (!dc_isar_feature(aa64_sme, s)) { 7403 return false; 7404 } 7405 if (!sve_access_check(s)) { 7406 return true; 7407 } 7408 7409 tmp = tcg_temp_new_i64(); 7410 dbit = tcg_temp_new_i64(); 7411 didx = tcg_temp_new_i64(); 7412 ptr = tcg_temp_new_ptr(); 7413 7414 /* Compute the predicate element. */ 7415 tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm); 7416 if (is_power_of_2(elements)) { 7417 tcg_gen_andi_i64(tmp, tmp, elements - 1); 7418 } else { 7419 tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements)); 7420 } 7421 7422 /* Extract the predicate byte and bit indices. */ 7423 tcg_gen_shli_i64(tmp, tmp, a->esz); 7424 tcg_gen_andi_i64(dbit, tmp, 7); 7425 tcg_gen_shri_i64(didx, tmp, 3); 7426 if (HOST_BIG_ENDIAN) { 7427 tcg_gen_xori_i64(didx, didx, 7); 7428 } 7429 7430 /* Load the predicate word. */ 7431 tcg_gen_trunc_i64_ptr(ptr, didx); 7432 tcg_gen_add_ptr(ptr, ptr, cpu_env); 7433 tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm)); 7434 7435 /* Extract the predicate bit and replicate to MO_64. */ 7436 tcg_gen_shr_i64(tmp, tmp, dbit); 7437 tcg_gen_andi_i64(tmp, tmp, 1); 7438 tcg_gen_neg_i64(tmp, tmp); 7439 7440 /* Apply to either copy the source, or write zeros. */ 7441 tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd), 7442 pred_full_reg_offset(s, a->pn), tmp, pl, pl); 7443 7444 tcg_temp_free_i64(tmp); 7445 tcg_temp_free_i64(dbit); 7446 tcg_temp_free_i64(didx); 7447 tcg_temp_free_ptr(ptr); 7448 return true; 7449 } 7450 7451 static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7452 { 7453 tcg_gen_smax_i32(d, a, n); 7454 tcg_gen_smin_i32(d, d, m); 7455 } 7456 7457 static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7458 { 7459 tcg_gen_smax_i64(d, a, n); 7460 tcg_gen_smin_i64(d, d, m); 7461 } 7462 7463 static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7464 TCGv_vec m, TCGv_vec a) 7465 { 7466 tcg_gen_smax_vec(vece, d, a, n); 7467 tcg_gen_smin_vec(vece, d, d, m); 7468 } 7469 7470 static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 7471 uint32_t a, uint32_t oprsz, uint32_t maxsz) 7472 { 7473 static const TCGOpcode vecop[] = { 7474 INDEX_op_smin_vec, INDEX_op_smax_vec, 0 7475 }; 7476 static const GVecGen4 ops[4] = { 7477 { .fniv = gen_sclamp_vec, 7478 .fno = gen_helper_gvec_sclamp_b, 7479 .opt_opc = vecop, 7480 .vece = MO_8 }, 7481 { .fniv = gen_sclamp_vec, 7482 .fno = gen_helper_gvec_sclamp_h, 7483 .opt_opc = vecop, 7484 .vece = MO_16 }, 7485 { .fni4 = gen_sclamp_i32, 7486 .fniv = gen_sclamp_vec, 7487 .fno = gen_helper_gvec_sclamp_s, 7488 .opt_opc = vecop, 7489 .vece = MO_32 }, 7490 { .fni8 = gen_sclamp_i64, 7491 .fniv = gen_sclamp_vec, 7492 .fno = gen_helper_gvec_sclamp_d, 7493 .opt_opc = vecop, 7494 .vece = MO_64, 7495 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 7496 }; 7497 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 7498 } 7499 7500 TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a) 7501 7502 static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7503 { 7504 tcg_gen_umax_i32(d, a, n); 7505 tcg_gen_umin_i32(d, d, m); 7506 } 7507 7508 static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7509 { 7510 tcg_gen_umax_i64(d, a, n); 7511 tcg_gen_umin_i64(d, d, m); 7512 } 7513 7514 static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7515 TCGv_vec m, TCGv_vec a) 7516 { 7517 tcg_gen_umax_vec(vece, d, a, n); 7518 tcg_gen_umin_vec(vece, d, d, m); 7519 } 7520 7521 static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 7522 uint32_t a, uint32_t oprsz, uint32_t maxsz) 7523 { 7524 static const TCGOpcode vecop[] = { 7525 INDEX_op_umin_vec, INDEX_op_umax_vec, 0 7526 }; 7527 static const GVecGen4 ops[4] = { 7528 { .fniv = gen_uclamp_vec, 7529 .fno = gen_helper_gvec_uclamp_b, 7530 .opt_opc = vecop, 7531 .vece = MO_8 }, 7532 { .fniv = gen_uclamp_vec, 7533 .fno = gen_helper_gvec_uclamp_h, 7534 .opt_opc = vecop, 7535 .vece = MO_16 }, 7536 { .fni4 = gen_uclamp_i32, 7537 .fniv = gen_uclamp_vec, 7538 .fno = gen_helper_gvec_uclamp_s, 7539 .opt_opc = vecop, 7540 .vece = MO_32 }, 7541 { .fni8 = gen_uclamp_i64, 7542 .fniv = gen_uclamp_vec, 7543 .fno = gen_helper_gvec_uclamp_d, 7544 .opt_opc = vecop, 7545 .vece = MO_64, 7546 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 7547 }; 7548 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 7549 } 7550 7551 TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a) 7552