1 /* 2 * AArch64 SVE translation 3 * 4 * Copyright (c) 2018 Linaro, Ltd 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "exec/exec-all.h" 23 #include "tcg/tcg-op.h" 24 #include "tcg/tcg-op-gvec.h" 25 #include "tcg/tcg-gvec-desc.h" 26 #include "qemu/log.h" 27 #include "arm_ldst.h" 28 #include "translate.h" 29 #include "internals.h" 30 #include "exec/helper-proto.h" 31 #include "exec/helper-gen.h" 32 #include "exec/log.h" 33 #include "translate-a64.h" 34 #include "fpu/softfloat.h" 35 36 37 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, 38 TCGv_i64, uint32_t, uint32_t); 39 40 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr, 41 TCGv_ptr, TCGv_i32); 42 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr, 43 TCGv_ptr, TCGv_ptr, TCGv_i32); 44 45 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32); 46 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr, 47 TCGv_ptr, TCGv_i64, TCGv_i32); 48 49 /* 50 * Helpers for extracting complex instruction fields. 51 */ 52 53 /* See e.g. ASR (immediate, predicated). 54 * Returns -1 for unallocated encoding; diagnose later. 55 */ 56 static int tszimm_esz(DisasContext *s, int x) 57 { 58 x >>= 3; /* discard imm3 */ 59 return 31 - clz32(x); 60 } 61 62 static int tszimm_shr(DisasContext *s, int x) 63 { 64 return (16 << tszimm_esz(s, x)) - x; 65 } 66 67 /* See e.g. LSL (immediate, predicated). */ 68 static int tszimm_shl(DisasContext *s, int x) 69 { 70 return x - (8 << tszimm_esz(s, x)); 71 } 72 73 /* The SH bit is in bit 8. Extract the low 8 and shift. */ 74 static inline int expand_imm_sh8s(DisasContext *s, int x) 75 { 76 return (int8_t)x << (x & 0x100 ? 8 : 0); 77 } 78 79 static inline int expand_imm_sh8u(DisasContext *s, int x) 80 { 81 return (uint8_t)x << (x & 0x100 ? 8 : 0); 82 } 83 84 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype) 85 * with unsigned data. C.f. SVE Memory Contiguous Load Group. 86 */ 87 static inline int msz_dtype(DisasContext *s, int msz) 88 { 89 static const uint8_t dtype[4] = { 0, 5, 10, 15 }; 90 return dtype[msz]; 91 } 92 93 /* 94 * Include the generated decoder. 95 */ 96 97 #include "decode-sve.c.inc" 98 99 /* 100 * Implement all of the translator functions referenced by the decoder. 101 */ 102 103 /* Invoke an out-of-line helper on 2 Zregs. */ 104 static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn, 105 int rd, int rn, int data) 106 { 107 if (fn == NULL) { 108 return false; 109 } 110 if (sve_access_check(s)) { 111 unsigned vsz = vec_full_reg_size(s); 112 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 113 vec_full_reg_offset(s, rn), 114 vsz, vsz, data, fn); 115 } 116 return true; 117 } 118 119 static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 120 int rd, int rn, int data, 121 ARMFPStatusFlavour flavour) 122 { 123 if (fn == NULL) { 124 return false; 125 } 126 if (sve_access_check(s)) { 127 unsigned vsz = vec_full_reg_size(s); 128 TCGv_ptr status = fpstatus_ptr(flavour); 129 130 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 131 vec_full_reg_offset(s, rn), 132 status, vsz, vsz, data, fn); 133 tcg_temp_free_ptr(status); 134 } 135 return true; 136 } 137 138 static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 139 arg_rr_esz *a, int data) 140 { 141 return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data, 142 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 143 } 144 145 /* Invoke an out-of-line helper on 3 Zregs. */ 146 static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 147 int rd, int rn, int rm, int data) 148 { 149 if (fn == NULL) { 150 return false; 151 } 152 if (sve_access_check(s)) { 153 unsigned vsz = vec_full_reg_size(s); 154 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 155 vec_full_reg_offset(s, rn), 156 vec_full_reg_offset(s, rm), 157 vsz, vsz, data, fn); 158 } 159 return true; 160 } 161 162 static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 163 arg_rrr_esz *a, int data) 164 { 165 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data); 166 } 167 168 /* Invoke an out-of-line helper on 3 Zregs, plus float_status. */ 169 static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 170 int rd, int rn, int rm, 171 int data, ARMFPStatusFlavour flavour) 172 { 173 if (fn == NULL) { 174 return false; 175 } 176 if (sve_access_check(s)) { 177 unsigned vsz = vec_full_reg_size(s); 178 TCGv_ptr status = fpstatus_ptr(flavour); 179 180 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 181 vec_full_reg_offset(s, rn), 182 vec_full_reg_offset(s, rm), 183 status, vsz, vsz, data, fn); 184 185 tcg_temp_free_ptr(status); 186 } 187 return true; 188 } 189 190 static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 191 arg_rrr_esz *a, int data) 192 { 193 return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, 194 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 195 } 196 197 /* Invoke an out-of-line helper on 4 Zregs. */ 198 static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 199 int rd, int rn, int rm, int ra, int data) 200 { 201 if (fn == NULL) { 202 return false; 203 } 204 if (sve_access_check(s)) { 205 unsigned vsz = vec_full_reg_size(s); 206 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 207 vec_full_reg_offset(s, rn), 208 vec_full_reg_offset(s, rm), 209 vec_full_reg_offset(s, ra), 210 vsz, vsz, data, fn); 211 } 212 return true; 213 } 214 215 static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 216 arg_rrrr_esz *a, int data) 217 { 218 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data); 219 } 220 221 static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn, 222 arg_rrxr_esz *a) 223 { 224 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index); 225 } 226 227 /* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */ 228 static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 229 int rd, int rn, int rm, int ra, 230 int data, TCGv_ptr ptr) 231 { 232 if (fn == NULL) { 233 return false; 234 } 235 if (sve_access_check(s)) { 236 unsigned vsz = vec_full_reg_size(s); 237 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 238 vec_full_reg_offset(s, rn), 239 vec_full_reg_offset(s, rm), 240 vec_full_reg_offset(s, ra), 241 ptr, vsz, vsz, data, fn); 242 } 243 return true; 244 } 245 246 static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 247 int rd, int rn, int rm, int ra, 248 int data, ARMFPStatusFlavour flavour) 249 { 250 TCGv_ptr status = fpstatus_ptr(flavour); 251 bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status); 252 tcg_temp_free_ptr(status); 253 return ret; 254 } 255 256 /* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */ 257 static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn, 258 int rd, int rn, int rm, int ra, int pg, 259 int data, ARMFPStatusFlavour flavour) 260 { 261 if (fn == NULL) { 262 return false; 263 } 264 if (sve_access_check(s)) { 265 unsigned vsz = vec_full_reg_size(s); 266 TCGv_ptr status = fpstatus_ptr(flavour); 267 268 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd), 269 vec_full_reg_offset(s, rn), 270 vec_full_reg_offset(s, rm), 271 vec_full_reg_offset(s, ra), 272 pred_full_reg_offset(s, pg), 273 status, vsz, vsz, data, fn); 274 275 tcg_temp_free_ptr(status); 276 } 277 return true; 278 } 279 280 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */ 281 static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn, 282 int rd, int rn, int pg, int data) 283 { 284 if (fn == NULL) { 285 return false; 286 } 287 if (sve_access_check(s)) { 288 unsigned vsz = vec_full_reg_size(s); 289 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 290 vec_full_reg_offset(s, rn), 291 pred_full_reg_offset(s, pg), 292 vsz, vsz, data, fn); 293 } 294 return true; 295 } 296 297 static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn, 298 arg_rpr_esz *a, int data) 299 { 300 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data); 301 } 302 303 static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn, 304 arg_rpri_esz *a) 305 { 306 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm); 307 } 308 309 static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn, 310 int rd, int rn, int pg, int data, 311 ARMFPStatusFlavour flavour) 312 { 313 if (fn == NULL) { 314 return false; 315 } 316 if (sve_access_check(s)) { 317 unsigned vsz = vec_full_reg_size(s); 318 TCGv_ptr status = fpstatus_ptr(flavour); 319 320 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 321 vec_full_reg_offset(s, rn), 322 pred_full_reg_offset(s, pg), 323 status, vsz, vsz, data, fn); 324 tcg_temp_free_ptr(status); 325 } 326 return true; 327 } 328 329 static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 330 arg_rpr_esz *a, int data, 331 ARMFPStatusFlavour flavour) 332 { 333 return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour); 334 } 335 336 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 337 static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn, 338 int rd, int rn, int rm, int pg, int data) 339 { 340 if (fn == NULL) { 341 return false; 342 } 343 if (sve_access_check(s)) { 344 unsigned vsz = vec_full_reg_size(s); 345 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 346 vec_full_reg_offset(s, rn), 347 vec_full_reg_offset(s, rm), 348 pred_full_reg_offset(s, pg), 349 vsz, vsz, data, fn); 350 } 351 return true; 352 } 353 354 static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn, 355 arg_rprr_esz *a, int data) 356 { 357 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data); 358 } 359 360 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 361 static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn, 362 int rd, int rn, int rm, int pg, int data, 363 ARMFPStatusFlavour flavour) 364 { 365 if (fn == NULL) { 366 return false; 367 } 368 if (sve_access_check(s)) { 369 unsigned vsz = vec_full_reg_size(s); 370 TCGv_ptr status = fpstatus_ptr(flavour); 371 372 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 373 vec_full_reg_offset(s, rn), 374 vec_full_reg_offset(s, rm), 375 pred_full_reg_offset(s, pg), 376 status, vsz, vsz, data, fn); 377 tcg_temp_free_ptr(status); 378 } 379 return true; 380 } 381 382 static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 383 arg_rprr_esz *a) 384 { 385 return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0, 386 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 387 } 388 389 /* Invoke a vector expander on two Zregs and an immediate. */ 390 static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 391 int esz, int rd, int rn, uint64_t imm) 392 { 393 if (gvec_fn == NULL) { 394 return false; 395 } 396 if (sve_access_check(s)) { 397 unsigned vsz = vec_full_reg_size(s); 398 gvec_fn(esz, vec_full_reg_offset(s, rd), 399 vec_full_reg_offset(s, rn), imm, vsz, vsz); 400 } 401 return true; 402 } 403 404 static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 405 arg_rri_esz *a) 406 { 407 if (a->esz < 0) { 408 /* Invalid tsz encoding -- see tszimm_esz. */ 409 return false; 410 } 411 return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm); 412 } 413 414 /* Invoke a vector expander on three Zregs. */ 415 static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn, 416 int esz, int rd, int rn, int rm) 417 { 418 if (gvec_fn == NULL) { 419 return false; 420 } 421 if (sve_access_check(s)) { 422 unsigned vsz = vec_full_reg_size(s); 423 gvec_fn(esz, vec_full_reg_offset(s, rd), 424 vec_full_reg_offset(s, rn), 425 vec_full_reg_offset(s, rm), vsz, vsz); 426 } 427 return true; 428 } 429 430 static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn, 431 arg_rrr_esz *a) 432 { 433 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm); 434 } 435 436 /* Invoke a vector expander on four Zregs. */ 437 static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn, 438 arg_rrrr_esz *a) 439 { 440 if (gvec_fn == NULL) { 441 return false; 442 } 443 if (sve_access_check(s)) { 444 unsigned vsz = vec_full_reg_size(s); 445 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 446 vec_full_reg_offset(s, a->rn), 447 vec_full_reg_offset(s, a->rm), 448 vec_full_reg_offset(s, a->ra), vsz, vsz); 449 } 450 return true; 451 } 452 453 /* Invoke a vector move on two Zregs. */ 454 static bool do_mov_z(DisasContext *s, int rd, int rn) 455 { 456 if (sve_access_check(s)) { 457 unsigned vsz = vec_full_reg_size(s); 458 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd), 459 vec_full_reg_offset(s, rn), vsz, vsz); 460 } 461 return true; 462 } 463 464 /* Initialize a Zreg with replications of a 64-bit immediate. */ 465 static void do_dupi_z(DisasContext *s, int rd, uint64_t word) 466 { 467 unsigned vsz = vec_full_reg_size(s); 468 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word); 469 } 470 471 /* Invoke a vector expander on three Pregs. */ 472 static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn, 473 int rd, int rn, int rm) 474 { 475 if (sve_access_check(s)) { 476 unsigned psz = pred_gvec_reg_size(s); 477 gvec_fn(MO_64, pred_full_reg_offset(s, rd), 478 pred_full_reg_offset(s, rn), 479 pred_full_reg_offset(s, rm), psz, psz); 480 } 481 return true; 482 } 483 484 /* Invoke a vector move on two Pregs. */ 485 static bool do_mov_p(DisasContext *s, int rd, int rn) 486 { 487 if (sve_access_check(s)) { 488 unsigned psz = pred_gvec_reg_size(s); 489 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd), 490 pred_full_reg_offset(s, rn), psz, psz); 491 } 492 return true; 493 } 494 495 /* Set the cpu flags as per a return from an SVE helper. */ 496 static void do_pred_flags(TCGv_i32 t) 497 { 498 tcg_gen_mov_i32(cpu_NF, t); 499 tcg_gen_andi_i32(cpu_ZF, t, 2); 500 tcg_gen_andi_i32(cpu_CF, t, 1); 501 tcg_gen_movi_i32(cpu_VF, 0); 502 } 503 504 /* Subroutines computing the ARM PredTest psuedofunction. */ 505 static void do_predtest1(TCGv_i64 d, TCGv_i64 g) 506 { 507 TCGv_i32 t = tcg_temp_new_i32(); 508 509 gen_helper_sve_predtest1(t, d, g); 510 do_pred_flags(t); 511 tcg_temp_free_i32(t); 512 } 513 514 static void do_predtest(DisasContext *s, int dofs, int gofs, int words) 515 { 516 TCGv_ptr dptr = tcg_temp_new_ptr(); 517 TCGv_ptr gptr = tcg_temp_new_ptr(); 518 TCGv_i32 t = tcg_temp_new_i32(); 519 520 tcg_gen_addi_ptr(dptr, cpu_env, dofs); 521 tcg_gen_addi_ptr(gptr, cpu_env, gofs); 522 523 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words)); 524 tcg_temp_free_ptr(dptr); 525 tcg_temp_free_ptr(gptr); 526 527 do_pred_flags(t); 528 tcg_temp_free_i32(t); 529 } 530 531 /* For each element size, the bits within a predicate word that are active. */ 532 const uint64_t pred_esz_masks[5] = { 533 0xffffffffffffffffull, 0x5555555555555555ull, 534 0x1111111111111111ull, 0x0101010101010101ull, 535 0x0001000100010001ull, 536 }; 537 538 static bool trans_INVALID(DisasContext *s, arg_INVALID *a) 539 { 540 unallocated_encoding(s); 541 return true; 542 } 543 544 /* 545 *** SVE Logical - Unpredicated Group 546 */ 547 548 TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a) 549 TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a) 550 TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a) 551 TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a) 552 553 static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 554 { 555 TCGv_i64 t = tcg_temp_new_i64(); 556 uint64_t mask = dup_const(MO_8, 0xff >> sh); 557 558 tcg_gen_xor_i64(t, n, m); 559 tcg_gen_shri_i64(d, t, sh); 560 tcg_gen_shli_i64(t, t, 8 - sh); 561 tcg_gen_andi_i64(d, d, mask); 562 tcg_gen_andi_i64(t, t, ~mask); 563 tcg_gen_or_i64(d, d, t); 564 tcg_temp_free_i64(t); 565 } 566 567 static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 568 { 569 TCGv_i64 t = tcg_temp_new_i64(); 570 uint64_t mask = dup_const(MO_16, 0xffff >> sh); 571 572 tcg_gen_xor_i64(t, n, m); 573 tcg_gen_shri_i64(d, t, sh); 574 tcg_gen_shli_i64(t, t, 16 - sh); 575 tcg_gen_andi_i64(d, d, mask); 576 tcg_gen_andi_i64(t, t, ~mask); 577 tcg_gen_or_i64(d, d, t); 578 tcg_temp_free_i64(t); 579 } 580 581 static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh) 582 { 583 tcg_gen_xor_i32(d, n, m); 584 tcg_gen_rotri_i32(d, d, sh); 585 } 586 587 static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 588 { 589 tcg_gen_xor_i64(d, n, m); 590 tcg_gen_rotri_i64(d, d, sh); 591 } 592 593 static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 594 TCGv_vec m, int64_t sh) 595 { 596 tcg_gen_xor_vec(vece, d, n, m); 597 tcg_gen_rotri_vec(vece, d, d, sh); 598 } 599 600 void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 601 uint32_t rm_ofs, int64_t shift, 602 uint32_t opr_sz, uint32_t max_sz) 603 { 604 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 }; 605 static const GVecGen3i ops[4] = { 606 { .fni8 = gen_xar8_i64, 607 .fniv = gen_xar_vec, 608 .fno = gen_helper_sve2_xar_b, 609 .opt_opc = vecop, 610 .vece = MO_8 }, 611 { .fni8 = gen_xar16_i64, 612 .fniv = gen_xar_vec, 613 .fno = gen_helper_sve2_xar_h, 614 .opt_opc = vecop, 615 .vece = MO_16 }, 616 { .fni4 = gen_xar_i32, 617 .fniv = gen_xar_vec, 618 .fno = gen_helper_sve2_xar_s, 619 .opt_opc = vecop, 620 .vece = MO_32 }, 621 { .fni8 = gen_xar_i64, 622 .fniv = gen_xar_vec, 623 .fno = gen_helper_gvec_xar_d, 624 .opt_opc = vecop, 625 .vece = MO_64 } 626 }; 627 int esize = 8 << vece; 628 629 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */ 630 tcg_debug_assert(shift >= 0); 631 tcg_debug_assert(shift <= esize); 632 shift &= esize - 1; 633 634 if (shift == 0) { 635 /* xar with no rotate devolves to xor. */ 636 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz); 637 } else { 638 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 639 shift, &ops[vece]); 640 } 641 } 642 643 static bool trans_XAR(DisasContext *s, arg_rrri_esz *a) 644 { 645 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { 646 return false; 647 } 648 if (sve_access_check(s)) { 649 unsigned vsz = vec_full_reg_size(s); 650 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd), 651 vec_full_reg_offset(s, a->rn), 652 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz); 653 } 654 return true; 655 } 656 657 static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 658 { 659 tcg_gen_xor_i64(d, n, m); 660 tcg_gen_xor_i64(d, d, k); 661 } 662 663 static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 664 TCGv_vec m, TCGv_vec k) 665 { 666 tcg_gen_xor_vec(vece, d, n, m); 667 tcg_gen_xor_vec(vece, d, d, k); 668 } 669 670 static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 671 uint32_t a, uint32_t oprsz, uint32_t maxsz) 672 { 673 static const GVecGen4 op = { 674 .fni8 = gen_eor3_i64, 675 .fniv = gen_eor3_vec, 676 .fno = gen_helper_sve2_eor3, 677 .vece = MO_64, 678 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 679 }; 680 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 681 } 682 683 TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a) 684 685 static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 686 { 687 tcg_gen_andc_i64(d, m, k); 688 tcg_gen_xor_i64(d, d, n); 689 } 690 691 static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 692 TCGv_vec m, TCGv_vec k) 693 { 694 tcg_gen_andc_vec(vece, d, m, k); 695 tcg_gen_xor_vec(vece, d, d, n); 696 } 697 698 static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 699 uint32_t a, uint32_t oprsz, uint32_t maxsz) 700 { 701 static const GVecGen4 op = { 702 .fni8 = gen_bcax_i64, 703 .fniv = gen_bcax_vec, 704 .fno = gen_helper_sve2_bcax, 705 .vece = MO_64, 706 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 707 }; 708 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 709 } 710 711 TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a) 712 713 static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 714 uint32_t a, uint32_t oprsz, uint32_t maxsz) 715 { 716 /* BSL differs from the generic bitsel in argument ordering. */ 717 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz); 718 } 719 720 TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a) 721 722 static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 723 { 724 tcg_gen_andc_i64(n, k, n); 725 tcg_gen_andc_i64(m, m, k); 726 tcg_gen_or_i64(d, n, m); 727 } 728 729 static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 730 TCGv_vec m, TCGv_vec k) 731 { 732 if (TCG_TARGET_HAS_bitsel_vec) { 733 tcg_gen_not_vec(vece, n, n); 734 tcg_gen_bitsel_vec(vece, d, k, n, m); 735 } else { 736 tcg_gen_andc_vec(vece, n, k, n); 737 tcg_gen_andc_vec(vece, m, m, k); 738 tcg_gen_or_vec(vece, d, n, m); 739 } 740 } 741 742 static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 743 uint32_t a, uint32_t oprsz, uint32_t maxsz) 744 { 745 static const GVecGen4 op = { 746 .fni8 = gen_bsl1n_i64, 747 .fniv = gen_bsl1n_vec, 748 .fno = gen_helper_sve2_bsl1n, 749 .vece = MO_64, 750 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 751 }; 752 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 753 } 754 755 TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a) 756 757 static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 758 { 759 /* 760 * Z[dn] = (n & k) | (~m & ~k) 761 * = | ~(m | k) 762 */ 763 tcg_gen_and_i64(n, n, k); 764 if (TCG_TARGET_HAS_orc_i64) { 765 tcg_gen_or_i64(m, m, k); 766 tcg_gen_orc_i64(d, n, m); 767 } else { 768 tcg_gen_nor_i64(m, m, k); 769 tcg_gen_or_i64(d, n, m); 770 } 771 } 772 773 static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 774 TCGv_vec m, TCGv_vec k) 775 { 776 if (TCG_TARGET_HAS_bitsel_vec) { 777 tcg_gen_not_vec(vece, m, m); 778 tcg_gen_bitsel_vec(vece, d, k, n, m); 779 } else { 780 tcg_gen_and_vec(vece, n, n, k); 781 tcg_gen_or_vec(vece, m, m, k); 782 tcg_gen_orc_vec(vece, d, n, m); 783 } 784 } 785 786 static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 787 uint32_t a, uint32_t oprsz, uint32_t maxsz) 788 { 789 static const GVecGen4 op = { 790 .fni8 = gen_bsl2n_i64, 791 .fniv = gen_bsl2n_vec, 792 .fno = gen_helper_sve2_bsl2n, 793 .vece = MO_64, 794 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 795 }; 796 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 797 } 798 799 TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a) 800 801 static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 802 { 803 tcg_gen_and_i64(n, n, k); 804 tcg_gen_andc_i64(m, m, k); 805 tcg_gen_nor_i64(d, n, m); 806 } 807 808 static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 809 TCGv_vec m, TCGv_vec k) 810 { 811 tcg_gen_bitsel_vec(vece, d, k, n, m); 812 tcg_gen_not_vec(vece, d, d); 813 } 814 815 static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 816 uint32_t a, uint32_t oprsz, uint32_t maxsz) 817 { 818 static const GVecGen4 op = { 819 .fni8 = gen_nbsl_i64, 820 .fniv = gen_nbsl_vec, 821 .fno = gen_helper_sve2_nbsl, 822 .vece = MO_64, 823 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 824 }; 825 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 826 } 827 828 TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a) 829 830 /* 831 *** SVE Integer Arithmetic - Unpredicated Group 832 */ 833 834 TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a) 835 TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a) 836 TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a) 837 TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a) 838 TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a) 839 TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a) 840 841 /* 842 *** SVE Integer Arithmetic - Binary Predicated Group 843 */ 844 845 /* Select active elememnts from Zn and inactive elements from Zm, 846 * storing the result in Zd. 847 */ 848 static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz) 849 { 850 static gen_helper_gvec_4 * const fns[4] = { 851 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h, 852 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d 853 }; 854 return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0); 855 } 856 857 #define DO_ZPZZ(NAME, FEAT, name) \ 858 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \ 859 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \ 860 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \ 861 }; \ 862 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \ 863 name##_zpzz_fns[a->esz], a, 0) 864 865 DO_ZPZZ(AND_zpzz, aa64_sve, sve_and) 866 DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor) 867 DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr) 868 DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic) 869 870 DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add) 871 DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub) 872 873 DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax) 874 DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax) 875 DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin) 876 DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin) 877 DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd) 878 DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd) 879 880 DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul) 881 DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh) 882 DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh) 883 884 DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr) 885 DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr) 886 DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl) 887 888 static gen_helper_gvec_4 * const sdiv_fns[4] = { 889 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d 890 }; 891 TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0) 892 893 static gen_helper_gvec_4 * const udiv_fns[4] = { 894 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d 895 }; 896 TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0) 897 898 TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz) 899 900 /* 901 *** SVE Integer Arithmetic - Unary Predicated Group 902 */ 903 904 #define DO_ZPZ(NAME, FEAT, name) \ 905 static gen_helper_gvec_3 * const name##_fns[4] = { \ 906 gen_helper_##name##_b, gen_helper_##name##_h, \ 907 gen_helper_##name##_s, gen_helper_##name##_d, \ 908 }; \ 909 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0) 910 911 DO_ZPZ(CLS, aa64_sve, sve_cls) 912 DO_ZPZ(CLZ, aa64_sve, sve_clz) 913 DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz) 914 DO_ZPZ(CNOT, aa64_sve, sve_cnot) 915 DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz) 916 DO_ZPZ(ABS, aa64_sve, sve_abs) 917 DO_ZPZ(NEG, aa64_sve, sve_neg) 918 DO_ZPZ(RBIT, aa64_sve, sve_rbit) 919 920 static gen_helper_gvec_3 * const fabs_fns[4] = { 921 NULL, gen_helper_sve_fabs_h, 922 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d, 923 }; 924 TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0) 925 926 static gen_helper_gvec_3 * const fneg_fns[4] = { 927 NULL, gen_helper_sve_fneg_h, 928 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d, 929 }; 930 TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0) 931 932 static gen_helper_gvec_3 * const sxtb_fns[4] = { 933 NULL, gen_helper_sve_sxtb_h, 934 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d, 935 }; 936 TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0) 937 938 static gen_helper_gvec_3 * const uxtb_fns[4] = { 939 NULL, gen_helper_sve_uxtb_h, 940 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d, 941 }; 942 TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0) 943 944 static gen_helper_gvec_3 * const sxth_fns[4] = { 945 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d 946 }; 947 TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0) 948 949 static gen_helper_gvec_3 * const uxth_fns[4] = { 950 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d 951 }; 952 TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0) 953 954 TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz, 955 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0) 956 TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz, 957 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0) 958 959 /* 960 *** SVE Integer Reduction Group 961 */ 962 963 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32); 964 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a, 965 gen_helper_gvec_reduc *fn) 966 { 967 unsigned vsz = vec_full_reg_size(s); 968 TCGv_ptr t_zn, t_pg; 969 TCGv_i32 desc; 970 TCGv_i64 temp; 971 972 if (fn == NULL) { 973 return false; 974 } 975 if (!sve_access_check(s)) { 976 return true; 977 } 978 979 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 980 temp = tcg_temp_new_i64(); 981 t_zn = tcg_temp_new_ptr(); 982 t_pg = tcg_temp_new_ptr(); 983 984 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); 985 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); 986 fn(temp, t_zn, t_pg, desc); 987 tcg_temp_free_ptr(t_zn); 988 tcg_temp_free_ptr(t_pg); 989 990 write_fp_dreg(s, a->rd, temp); 991 tcg_temp_free_i64(temp); 992 return true; 993 } 994 995 #define DO_VPZ(NAME, name) \ 996 static gen_helper_gvec_reduc * const name##_fns[4] = { \ 997 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \ 998 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 999 }; \ 1000 TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz]) 1001 1002 DO_VPZ(ORV, orv) 1003 DO_VPZ(ANDV, andv) 1004 DO_VPZ(EORV, eorv) 1005 1006 DO_VPZ(UADDV, uaddv) 1007 DO_VPZ(SMAXV, smaxv) 1008 DO_VPZ(UMAXV, umaxv) 1009 DO_VPZ(SMINV, sminv) 1010 DO_VPZ(UMINV, uminv) 1011 1012 static gen_helper_gvec_reduc * const saddv_fns[4] = { 1013 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h, 1014 gen_helper_sve_saddv_s, NULL 1015 }; 1016 TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz]) 1017 1018 #undef DO_VPZ 1019 1020 /* 1021 *** SVE Shift by Immediate - Predicated Group 1022 */ 1023 1024 /* 1025 * Copy Zn into Zd, storing zeros into inactive elements. 1026 * If invert, store zeros into the active elements. 1027 */ 1028 static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg, 1029 int esz, bool invert) 1030 { 1031 static gen_helper_gvec_3 * const fns[4] = { 1032 gen_helper_sve_movz_b, gen_helper_sve_movz_h, 1033 gen_helper_sve_movz_s, gen_helper_sve_movz_d, 1034 }; 1035 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert); 1036 } 1037 1038 static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr, 1039 gen_helper_gvec_3 * const fns[4]) 1040 { 1041 int max; 1042 1043 if (a->esz < 0) { 1044 /* Invalid tsz encoding -- see tszimm_esz. */ 1045 return false; 1046 } 1047 1048 /* 1049 * Shift by element size is architecturally valid. 1050 * For arithmetic right-shift, it's the same as by one less. 1051 * For logical shifts and ASRD, it is a zeroing operation. 1052 */ 1053 max = 8 << a->esz; 1054 if (a->imm >= max) { 1055 if (asr) { 1056 a->imm = max - 1; 1057 } else { 1058 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); 1059 } 1060 } 1061 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a); 1062 } 1063 1064 static gen_helper_gvec_3 * const asr_zpzi_fns[4] = { 1065 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h, 1066 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d, 1067 }; 1068 TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns) 1069 1070 static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = { 1071 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h, 1072 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d, 1073 }; 1074 TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns) 1075 1076 static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = { 1077 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h, 1078 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d, 1079 }; 1080 TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns) 1081 1082 static gen_helper_gvec_3 * const asrd_fns[4] = { 1083 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h, 1084 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d, 1085 }; 1086 TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns) 1087 1088 static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = { 1089 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h, 1090 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d, 1091 }; 1092 TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 1093 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a) 1094 1095 static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = { 1096 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h, 1097 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d, 1098 }; 1099 TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 1100 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a) 1101 1102 static gen_helper_gvec_3 * const srshr_fns[4] = { 1103 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h, 1104 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d, 1105 }; 1106 TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 1107 a->esz < 0 ? NULL : srshr_fns[a->esz], a) 1108 1109 static gen_helper_gvec_3 * const urshr_fns[4] = { 1110 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h, 1111 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d, 1112 }; 1113 TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 1114 a->esz < 0 ? NULL : urshr_fns[a->esz], a) 1115 1116 static gen_helper_gvec_3 * const sqshlu_fns[4] = { 1117 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h, 1118 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d, 1119 }; 1120 TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi, 1121 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a) 1122 1123 /* 1124 *** SVE Bitwise Shift - Predicated Group 1125 */ 1126 1127 #define DO_ZPZW(NAME, name) \ 1128 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \ 1129 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \ 1130 gen_helper_sve_##name##_zpzw_s, NULL \ 1131 }; \ 1132 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \ 1133 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0) 1134 1135 DO_ZPZW(ASR, asr) 1136 DO_ZPZW(LSR, lsr) 1137 DO_ZPZW(LSL, lsl) 1138 1139 #undef DO_ZPZW 1140 1141 /* 1142 *** SVE Bitwise Shift - Unpredicated Group 1143 */ 1144 1145 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr, 1146 void (*gvec_fn)(unsigned, uint32_t, uint32_t, 1147 int64_t, uint32_t, uint32_t)) 1148 { 1149 if (a->esz < 0) { 1150 /* Invalid tsz encoding -- see tszimm_esz. */ 1151 return false; 1152 } 1153 if (sve_access_check(s)) { 1154 unsigned vsz = vec_full_reg_size(s); 1155 /* Shift by element size is architecturally valid. For 1156 arithmetic right-shift, it's the same as by one less. 1157 Otherwise it is a zeroing operation. */ 1158 if (a->imm >= 8 << a->esz) { 1159 if (asr) { 1160 a->imm = (8 << a->esz) - 1; 1161 } else { 1162 do_dupi_z(s, a->rd, 0); 1163 return true; 1164 } 1165 } 1166 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 1167 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz); 1168 } 1169 return true; 1170 } 1171 1172 TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari) 1173 TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri) 1174 TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli) 1175 1176 #define DO_ZZW(NAME, name) \ 1177 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \ 1178 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \ 1179 gen_helper_sve_##name##_zzw_s, NULL \ 1180 }; \ 1181 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \ 1182 name##_zzw_fns[a->esz], a, 0) 1183 1184 DO_ZZW(ASR_zzw, asr) 1185 DO_ZZW(LSR_zzw, lsr) 1186 DO_ZZW(LSL_zzw, lsl) 1187 1188 #undef DO_ZZW 1189 1190 /* 1191 *** SVE Integer Multiply-Add Group 1192 */ 1193 1194 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a, 1195 gen_helper_gvec_5 *fn) 1196 { 1197 if (sve_access_check(s)) { 1198 unsigned vsz = vec_full_reg_size(s); 1199 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd), 1200 vec_full_reg_offset(s, a->ra), 1201 vec_full_reg_offset(s, a->rn), 1202 vec_full_reg_offset(s, a->rm), 1203 pred_full_reg_offset(s, a->pg), 1204 vsz, vsz, 0, fn); 1205 } 1206 return true; 1207 } 1208 1209 static gen_helper_gvec_5 * const mla_fns[4] = { 1210 gen_helper_sve_mla_b, gen_helper_sve_mla_h, 1211 gen_helper_sve_mla_s, gen_helper_sve_mla_d, 1212 }; 1213 TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz]) 1214 1215 static gen_helper_gvec_5 * const mls_fns[4] = { 1216 gen_helper_sve_mls_b, gen_helper_sve_mls_h, 1217 gen_helper_sve_mls_s, gen_helper_sve_mls_d, 1218 }; 1219 TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz]) 1220 1221 /* 1222 *** SVE Index Generation Group 1223 */ 1224 1225 static bool do_index(DisasContext *s, int esz, int rd, 1226 TCGv_i64 start, TCGv_i64 incr) 1227 { 1228 unsigned vsz; 1229 TCGv_i32 desc; 1230 TCGv_ptr t_zd; 1231 1232 if (!sve_access_check(s)) { 1233 return true; 1234 } 1235 1236 vsz = vec_full_reg_size(s); 1237 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1238 t_zd = tcg_temp_new_ptr(); 1239 1240 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd)); 1241 if (esz == 3) { 1242 gen_helper_sve_index_d(t_zd, start, incr, desc); 1243 } else { 1244 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 1245 static index_fn * const fns[3] = { 1246 gen_helper_sve_index_b, 1247 gen_helper_sve_index_h, 1248 gen_helper_sve_index_s, 1249 }; 1250 TCGv_i32 s32 = tcg_temp_new_i32(); 1251 TCGv_i32 i32 = tcg_temp_new_i32(); 1252 1253 tcg_gen_extrl_i64_i32(s32, start); 1254 tcg_gen_extrl_i64_i32(i32, incr); 1255 fns[esz](t_zd, s32, i32, desc); 1256 1257 tcg_temp_free_i32(s32); 1258 tcg_temp_free_i32(i32); 1259 } 1260 tcg_temp_free_ptr(t_zd); 1261 return true; 1262 } 1263 1264 TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd, 1265 tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2)) 1266 TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd, 1267 tcg_constant_i64(a->imm), cpu_reg(s, a->rm)) 1268 TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd, 1269 cpu_reg(s, a->rn), tcg_constant_i64(a->imm)) 1270 TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd, 1271 cpu_reg(s, a->rn), cpu_reg(s, a->rm)) 1272 1273 /* 1274 *** SVE Stack Allocation Group 1275 */ 1276 1277 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a) 1278 { 1279 if (!dc_isar_feature(aa64_sve, s)) { 1280 return false; 1281 } 1282 if (sve_access_check(s)) { 1283 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1284 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1285 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s)); 1286 } 1287 return true; 1288 } 1289 1290 static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a) 1291 { 1292 if (!dc_isar_feature(aa64_sme, s)) { 1293 return false; 1294 } 1295 if (sme_enabled_check(s)) { 1296 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1297 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1298 tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s)); 1299 } 1300 return true; 1301 } 1302 1303 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a) 1304 { 1305 if (!dc_isar_feature(aa64_sve, s)) { 1306 return false; 1307 } 1308 if (sve_access_check(s)) { 1309 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1310 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1311 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s)); 1312 } 1313 return true; 1314 } 1315 1316 static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a) 1317 { 1318 if (!dc_isar_feature(aa64_sme, s)) { 1319 return false; 1320 } 1321 if (sme_enabled_check(s)) { 1322 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1323 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1324 tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s)); 1325 } 1326 return true; 1327 } 1328 1329 static bool trans_RDVL(DisasContext *s, arg_RDVL *a) 1330 { 1331 if (!dc_isar_feature(aa64_sve, s)) { 1332 return false; 1333 } 1334 if (sve_access_check(s)) { 1335 TCGv_i64 reg = cpu_reg(s, a->rd); 1336 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s)); 1337 } 1338 return true; 1339 } 1340 1341 static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a) 1342 { 1343 if (!dc_isar_feature(aa64_sme, s)) { 1344 return false; 1345 } 1346 if (sme_enabled_check(s)) { 1347 TCGv_i64 reg = cpu_reg(s, a->rd); 1348 tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s)); 1349 } 1350 return true; 1351 } 1352 1353 /* 1354 *** SVE Compute Vector Address Group 1355 */ 1356 1357 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn) 1358 { 1359 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm); 1360 } 1361 1362 TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32) 1363 TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64) 1364 TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32) 1365 TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32) 1366 1367 /* 1368 *** SVE Integer Misc - Unpredicated Group 1369 */ 1370 1371 static gen_helper_gvec_2 * const fexpa_fns[4] = { 1372 NULL, gen_helper_sve_fexpa_h, 1373 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d, 1374 }; 1375 TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz, 1376 fexpa_fns[a->esz], a->rd, a->rn, 0) 1377 1378 static gen_helper_gvec_3 * const ftssel_fns[4] = { 1379 NULL, gen_helper_sve_ftssel_h, 1380 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d, 1381 }; 1382 TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, 1383 ftssel_fns[a->esz], a, 0) 1384 1385 /* 1386 *** SVE Predicate Logical Operations Group 1387 */ 1388 1389 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a, 1390 const GVecGen4 *gvec_op) 1391 { 1392 if (!sve_access_check(s)) { 1393 return true; 1394 } 1395 1396 unsigned psz = pred_gvec_reg_size(s); 1397 int dofs = pred_full_reg_offset(s, a->rd); 1398 int nofs = pred_full_reg_offset(s, a->rn); 1399 int mofs = pred_full_reg_offset(s, a->rm); 1400 int gofs = pred_full_reg_offset(s, a->pg); 1401 1402 if (!a->s) { 1403 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1404 return true; 1405 } 1406 1407 if (psz == 8) { 1408 /* Do the operation and the flags generation in temps. */ 1409 TCGv_i64 pd = tcg_temp_new_i64(); 1410 TCGv_i64 pn = tcg_temp_new_i64(); 1411 TCGv_i64 pm = tcg_temp_new_i64(); 1412 TCGv_i64 pg = tcg_temp_new_i64(); 1413 1414 tcg_gen_ld_i64(pn, cpu_env, nofs); 1415 tcg_gen_ld_i64(pm, cpu_env, mofs); 1416 tcg_gen_ld_i64(pg, cpu_env, gofs); 1417 1418 gvec_op->fni8(pd, pn, pm, pg); 1419 tcg_gen_st_i64(pd, cpu_env, dofs); 1420 1421 do_predtest1(pd, pg); 1422 1423 tcg_temp_free_i64(pd); 1424 tcg_temp_free_i64(pn); 1425 tcg_temp_free_i64(pm); 1426 tcg_temp_free_i64(pg); 1427 } else { 1428 /* The operation and flags generation is large. The computation 1429 * of the flags depends on the original contents of the guarding 1430 * predicate. If the destination overwrites the guarding predicate, 1431 * then the easiest way to get this right is to save a copy. 1432 */ 1433 int tofs = gofs; 1434 if (a->rd == a->pg) { 1435 tofs = offsetof(CPUARMState, vfp.preg_tmp); 1436 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz); 1437 } 1438 1439 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1440 do_predtest(s, dofs, tofs, psz / 8); 1441 } 1442 return true; 1443 } 1444 1445 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1446 { 1447 tcg_gen_and_i64(pd, pn, pm); 1448 tcg_gen_and_i64(pd, pd, pg); 1449 } 1450 1451 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1452 TCGv_vec pm, TCGv_vec pg) 1453 { 1454 tcg_gen_and_vec(vece, pd, pn, pm); 1455 tcg_gen_and_vec(vece, pd, pd, pg); 1456 } 1457 1458 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a) 1459 { 1460 static const GVecGen4 op = { 1461 .fni8 = gen_and_pg_i64, 1462 .fniv = gen_and_pg_vec, 1463 .fno = gen_helper_sve_and_pppp, 1464 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1465 }; 1466 1467 if (!dc_isar_feature(aa64_sve, s)) { 1468 return false; 1469 } 1470 if (!a->s) { 1471 if (a->rn == a->rm) { 1472 if (a->pg == a->rn) { 1473 return do_mov_p(s, a->rd, a->rn); 1474 } 1475 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg); 1476 } else if (a->pg == a->rn || a->pg == a->rm) { 1477 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm); 1478 } 1479 } 1480 return do_pppp_flags(s, a, &op); 1481 } 1482 1483 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1484 { 1485 tcg_gen_andc_i64(pd, pn, pm); 1486 tcg_gen_and_i64(pd, pd, pg); 1487 } 1488 1489 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1490 TCGv_vec pm, TCGv_vec pg) 1491 { 1492 tcg_gen_andc_vec(vece, pd, pn, pm); 1493 tcg_gen_and_vec(vece, pd, pd, pg); 1494 } 1495 1496 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a) 1497 { 1498 static const GVecGen4 op = { 1499 .fni8 = gen_bic_pg_i64, 1500 .fniv = gen_bic_pg_vec, 1501 .fno = gen_helper_sve_bic_pppp, 1502 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1503 }; 1504 1505 if (!dc_isar_feature(aa64_sve, s)) { 1506 return false; 1507 } 1508 if (!a->s && a->pg == a->rn) { 1509 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm); 1510 } 1511 return do_pppp_flags(s, a, &op); 1512 } 1513 1514 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1515 { 1516 tcg_gen_xor_i64(pd, pn, pm); 1517 tcg_gen_and_i64(pd, pd, pg); 1518 } 1519 1520 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1521 TCGv_vec pm, TCGv_vec pg) 1522 { 1523 tcg_gen_xor_vec(vece, pd, pn, pm); 1524 tcg_gen_and_vec(vece, pd, pd, pg); 1525 } 1526 1527 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a) 1528 { 1529 static const GVecGen4 op = { 1530 .fni8 = gen_eor_pg_i64, 1531 .fniv = gen_eor_pg_vec, 1532 .fno = gen_helper_sve_eor_pppp, 1533 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1534 }; 1535 1536 if (!dc_isar_feature(aa64_sve, s)) { 1537 return false; 1538 } 1539 /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */ 1540 if (!a->s && a->pg == a->rm) { 1541 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn); 1542 } 1543 return do_pppp_flags(s, a, &op); 1544 } 1545 1546 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a) 1547 { 1548 if (a->s || !dc_isar_feature(aa64_sve, s)) { 1549 return false; 1550 } 1551 if (sve_access_check(s)) { 1552 unsigned psz = pred_gvec_reg_size(s); 1553 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd), 1554 pred_full_reg_offset(s, a->pg), 1555 pred_full_reg_offset(s, a->rn), 1556 pred_full_reg_offset(s, a->rm), psz, psz); 1557 } 1558 return true; 1559 } 1560 1561 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1562 { 1563 tcg_gen_or_i64(pd, pn, pm); 1564 tcg_gen_and_i64(pd, pd, pg); 1565 } 1566 1567 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1568 TCGv_vec pm, TCGv_vec pg) 1569 { 1570 tcg_gen_or_vec(vece, pd, pn, pm); 1571 tcg_gen_and_vec(vece, pd, pd, pg); 1572 } 1573 1574 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a) 1575 { 1576 static const GVecGen4 op = { 1577 .fni8 = gen_orr_pg_i64, 1578 .fniv = gen_orr_pg_vec, 1579 .fno = gen_helper_sve_orr_pppp, 1580 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1581 }; 1582 1583 if (!dc_isar_feature(aa64_sve, s)) { 1584 return false; 1585 } 1586 if (!a->s && a->pg == a->rn && a->rn == a->rm) { 1587 return do_mov_p(s, a->rd, a->rn); 1588 } 1589 return do_pppp_flags(s, a, &op); 1590 } 1591 1592 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1593 { 1594 tcg_gen_orc_i64(pd, pn, pm); 1595 tcg_gen_and_i64(pd, pd, pg); 1596 } 1597 1598 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1599 TCGv_vec pm, TCGv_vec pg) 1600 { 1601 tcg_gen_orc_vec(vece, pd, pn, pm); 1602 tcg_gen_and_vec(vece, pd, pd, pg); 1603 } 1604 1605 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a) 1606 { 1607 static const GVecGen4 op = { 1608 .fni8 = gen_orn_pg_i64, 1609 .fniv = gen_orn_pg_vec, 1610 .fno = gen_helper_sve_orn_pppp, 1611 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1612 }; 1613 1614 if (!dc_isar_feature(aa64_sve, s)) { 1615 return false; 1616 } 1617 return do_pppp_flags(s, a, &op); 1618 } 1619 1620 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1621 { 1622 tcg_gen_or_i64(pd, pn, pm); 1623 tcg_gen_andc_i64(pd, pg, pd); 1624 } 1625 1626 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1627 TCGv_vec pm, TCGv_vec pg) 1628 { 1629 tcg_gen_or_vec(vece, pd, pn, pm); 1630 tcg_gen_andc_vec(vece, pd, pg, pd); 1631 } 1632 1633 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a) 1634 { 1635 static const GVecGen4 op = { 1636 .fni8 = gen_nor_pg_i64, 1637 .fniv = gen_nor_pg_vec, 1638 .fno = gen_helper_sve_nor_pppp, 1639 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1640 }; 1641 1642 if (!dc_isar_feature(aa64_sve, s)) { 1643 return false; 1644 } 1645 return do_pppp_flags(s, a, &op); 1646 } 1647 1648 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1649 { 1650 tcg_gen_and_i64(pd, pn, pm); 1651 tcg_gen_andc_i64(pd, pg, pd); 1652 } 1653 1654 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1655 TCGv_vec pm, TCGv_vec pg) 1656 { 1657 tcg_gen_and_vec(vece, pd, pn, pm); 1658 tcg_gen_andc_vec(vece, pd, pg, pd); 1659 } 1660 1661 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a) 1662 { 1663 static const GVecGen4 op = { 1664 .fni8 = gen_nand_pg_i64, 1665 .fniv = gen_nand_pg_vec, 1666 .fno = gen_helper_sve_nand_pppp, 1667 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1668 }; 1669 1670 if (!dc_isar_feature(aa64_sve, s)) { 1671 return false; 1672 } 1673 return do_pppp_flags(s, a, &op); 1674 } 1675 1676 /* 1677 *** SVE Predicate Misc Group 1678 */ 1679 1680 static bool trans_PTEST(DisasContext *s, arg_PTEST *a) 1681 { 1682 if (!dc_isar_feature(aa64_sve, s)) { 1683 return false; 1684 } 1685 if (sve_access_check(s)) { 1686 int nofs = pred_full_reg_offset(s, a->rn); 1687 int gofs = pred_full_reg_offset(s, a->pg); 1688 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8); 1689 1690 if (words == 1) { 1691 TCGv_i64 pn = tcg_temp_new_i64(); 1692 TCGv_i64 pg = tcg_temp_new_i64(); 1693 1694 tcg_gen_ld_i64(pn, cpu_env, nofs); 1695 tcg_gen_ld_i64(pg, cpu_env, gofs); 1696 do_predtest1(pn, pg); 1697 1698 tcg_temp_free_i64(pn); 1699 tcg_temp_free_i64(pg); 1700 } else { 1701 do_predtest(s, nofs, gofs, words); 1702 } 1703 } 1704 return true; 1705 } 1706 1707 /* See the ARM pseudocode DecodePredCount. */ 1708 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz) 1709 { 1710 unsigned elements = fullsz >> esz; 1711 unsigned bound; 1712 1713 switch (pattern) { 1714 case 0x0: /* POW2 */ 1715 return pow2floor(elements); 1716 case 0x1: /* VL1 */ 1717 case 0x2: /* VL2 */ 1718 case 0x3: /* VL3 */ 1719 case 0x4: /* VL4 */ 1720 case 0x5: /* VL5 */ 1721 case 0x6: /* VL6 */ 1722 case 0x7: /* VL7 */ 1723 case 0x8: /* VL8 */ 1724 bound = pattern; 1725 break; 1726 case 0x9: /* VL16 */ 1727 case 0xa: /* VL32 */ 1728 case 0xb: /* VL64 */ 1729 case 0xc: /* VL128 */ 1730 case 0xd: /* VL256 */ 1731 bound = 16 << (pattern - 9); 1732 break; 1733 case 0x1d: /* MUL4 */ 1734 return elements - elements % 4; 1735 case 0x1e: /* MUL3 */ 1736 return elements - elements % 3; 1737 case 0x1f: /* ALL */ 1738 return elements; 1739 default: /* #uimm5 */ 1740 return 0; 1741 } 1742 return elements >= bound ? bound : 0; 1743 } 1744 1745 /* This handles all of the predicate initialization instructions, 1746 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32 1747 * so that decode_pred_count returns 0. For SETFFR, we will have 1748 * set RD == 16 == FFR. 1749 */ 1750 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag) 1751 { 1752 if (!sve_access_check(s)) { 1753 return true; 1754 } 1755 1756 unsigned fullsz = vec_full_reg_size(s); 1757 unsigned ofs = pred_full_reg_offset(s, rd); 1758 unsigned numelem, setsz, i; 1759 uint64_t word, lastword; 1760 TCGv_i64 t; 1761 1762 numelem = decode_pred_count(fullsz, pat, esz); 1763 1764 /* Determine what we must store into each bit, and how many. */ 1765 if (numelem == 0) { 1766 lastword = word = 0; 1767 setsz = fullsz; 1768 } else { 1769 setsz = numelem << esz; 1770 lastword = word = pred_esz_masks[esz]; 1771 if (setsz % 64) { 1772 lastword &= MAKE_64BIT_MASK(0, setsz % 64); 1773 } 1774 } 1775 1776 t = tcg_temp_new_i64(); 1777 if (fullsz <= 64) { 1778 tcg_gen_movi_i64(t, lastword); 1779 tcg_gen_st_i64(t, cpu_env, ofs); 1780 goto done; 1781 } 1782 1783 if (word == lastword) { 1784 unsigned maxsz = size_for_gvec(fullsz / 8); 1785 unsigned oprsz = size_for_gvec(setsz / 8); 1786 1787 if (oprsz * 8 == setsz) { 1788 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word); 1789 goto done; 1790 } 1791 } 1792 1793 setsz /= 8; 1794 fullsz /= 8; 1795 1796 tcg_gen_movi_i64(t, word); 1797 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) { 1798 tcg_gen_st_i64(t, cpu_env, ofs + i); 1799 } 1800 if (lastword != word) { 1801 tcg_gen_movi_i64(t, lastword); 1802 tcg_gen_st_i64(t, cpu_env, ofs + i); 1803 i += 8; 1804 } 1805 if (i < fullsz) { 1806 tcg_gen_movi_i64(t, 0); 1807 for (; i < fullsz; i += 8) { 1808 tcg_gen_st_i64(t, cpu_env, ofs + i); 1809 } 1810 } 1811 1812 done: 1813 tcg_temp_free_i64(t); 1814 1815 /* PTRUES */ 1816 if (setflag) { 1817 tcg_gen_movi_i32(cpu_NF, -(word != 0)); 1818 tcg_gen_movi_i32(cpu_CF, word == 0); 1819 tcg_gen_movi_i32(cpu_VF, 0); 1820 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1821 } 1822 return true; 1823 } 1824 1825 TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s) 1826 1827 /* Note pat == 31 is #all, to set all elements. */ 1828 TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve, 1829 do_predset, 0, FFR_PRED_NUM, 31, false) 1830 1831 /* Note pat == 32 is #unimp, to set no elements. */ 1832 TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false) 1833 1834 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a) 1835 { 1836 /* The path through do_pppp_flags is complicated enough to want to avoid 1837 * duplication. Frob the arguments into the form of a predicated AND. 1838 */ 1839 arg_rprr_s alt_a = { 1840 .rd = a->rd, .pg = a->pg, .s = a->s, 1841 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM, 1842 }; 1843 1844 s->is_nonstreaming = true; 1845 return trans_AND_pppp(s, &alt_a); 1846 } 1847 1848 TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM) 1849 TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn) 1850 1851 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a, 1852 void (*gen_fn)(TCGv_i32, TCGv_ptr, 1853 TCGv_ptr, TCGv_i32)) 1854 { 1855 if (!sve_access_check(s)) { 1856 return true; 1857 } 1858 1859 TCGv_ptr t_pd = tcg_temp_new_ptr(); 1860 TCGv_ptr t_pg = tcg_temp_new_ptr(); 1861 TCGv_i32 t; 1862 unsigned desc = 0; 1863 1864 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 1865 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 1866 1867 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd)); 1868 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn)); 1869 t = tcg_temp_new_i32(); 1870 1871 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc)); 1872 tcg_temp_free_ptr(t_pd); 1873 tcg_temp_free_ptr(t_pg); 1874 1875 do_pred_flags(t); 1876 tcg_temp_free_i32(t); 1877 return true; 1878 } 1879 1880 TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst) 1881 TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext) 1882 1883 /* 1884 *** SVE Element Count Group 1885 */ 1886 1887 /* Perform an inline saturating addition of a 32-bit value within 1888 * a 64-bit register. The second operand is known to be positive, 1889 * which halves the comparisions we must perform to bound the result. 1890 */ 1891 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1892 { 1893 int64_t ibound; 1894 1895 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 1896 if (u) { 1897 tcg_gen_ext32u_i64(reg, reg); 1898 } else { 1899 tcg_gen_ext32s_i64(reg, reg); 1900 } 1901 if (d) { 1902 tcg_gen_sub_i64(reg, reg, val); 1903 ibound = (u ? 0 : INT32_MIN); 1904 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound)); 1905 } else { 1906 tcg_gen_add_i64(reg, reg, val); 1907 ibound = (u ? UINT32_MAX : INT32_MAX); 1908 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound)); 1909 } 1910 } 1911 1912 /* Similarly with 64-bit values. */ 1913 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1914 { 1915 TCGv_i64 t0 = tcg_temp_new_i64(); 1916 TCGv_i64 t2; 1917 1918 if (u) { 1919 if (d) { 1920 tcg_gen_sub_i64(t0, reg, val); 1921 t2 = tcg_constant_i64(0); 1922 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0); 1923 } else { 1924 tcg_gen_add_i64(t0, reg, val); 1925 t2 = tcg_constant_i64(-1); 1926 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0); 1927 } 1928 } else { 1929 TCGv_i64 t1 = tcg_temp_new_i64(); 1930 if (d) { 1931 /* Detect signed overflow for subtraction. */ 1932 tcg_gen_xor_i64(t0, reg, val); 1933 tcg_gen_sub_i64(t1, reg, val); 1934 tcg_gen_xor_i64(reg, reg, t1); 1935 tcg_gen_and_i64(t0, t0, reg); 1936 1937 /* Bound the result. */ 1938 tcg_gen_movi_i64(reg, INT64_MIN); 1939 t2 = tcg_constant_i64(0); 1940 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1); 1941 } else { 1942 /* Detect signed overflow for addition. */ 1943 tcg_gen_xor_i64(t0, reg, val); 1944 tcg_gen_add_i64(reg, reg, val); 1945 tcg_gen_xor_i64(t1, reg, val); 1946 tcg_gen_andc_i64(t0, t1, t0); 1947 1948 /* Bound the result. */ 1949 tcg_gen_movi_i64(t1, INT64_MAX); 1950 t2 = tcg_constant_i64(0); 1951 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg); 1952 } 1953 tcg_temp_free_i64(t1); 1954 } 1955 tcg_temp_free_i64(t0); 1956 } 1957 1958 /* Similarly with a vector and a scalar operand. */ 1959 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn, 1960 TCGv_i64 val, bool u, bool d) 1961 { 1962 unsigned vsz = vec_full_reg_size(s); 1963 TCGv_ptr dptr, nptr; 1964 TCGv_i32 t32, desc; 1965 TCGv_i64 t64; 1966 1967 dptr = tcg_temp_new_ptr(); 1968 nptr = tcg_temp_new_ptr(); 1969 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd)); 1970 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn)); 1971 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1972 1973 switch (esz) { 1974 case MO_8: 1975 t32 = tcg_temp_new_i32(); 1976 tcg_gen_extrl_i64_i32(t32, val); 1977 if (d) { 1978 tcg_gen_neg_i32(t32, t32); 1979 } 1980 if (u) { 1981 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc); 1982 } else { 1983 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc); 1984 } 1985 tcg_temp_free_i32(t32); 1986 break; 1987 1988 case MO_16: 1989 t32 = tcg_temp_new_i32(); 1990 tcg_gen_extrl_i64_i32(t32, val); 1991 if (d) { 1992 tcg_gen_neg_i32(t32, t32); 1993 } 1994 if (u) { 1995 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc); 1996 } else { 1997 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc); 1998 } 1999 tcg_temp_free_i32(t32); 2000 break; 2001 2002 case MO_32: 2003 t64 = tcg_temp_new_i64(); 2004 if (d) { 2005 tcg_gen_neg_i64(t64, val); 2006 } else { 2007 tcg_gen_mov_i64(t64, val); 2008 } 2009 if (u) { 2010 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc); 2011 } else { 2012 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc); 2013 } 2014 tcg_temp_free_i64(t64); 2015 break; 2016 2017 case MO_64: 2018 if (u) { 2019 if (d) { 2020 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc); 2021 } else { 2022 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc); 2023 } 2024 } else if (d) { 2025 t64 = tcg_temp_new_i64(); 2026 tcg_gen_neg_i64(t64, val); 2027 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc); 2028 tcg_temp_free_i64(t64); 2029 } else { 2030 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc); 2031 } 2032 break; 2033 2034 default: 2035 g_assert_not_reached(); 2036 } 2037 2038 tcg_temp_free_ptr(dptr); 2039 tcg_temp_free_ptr(nptr); 2040 } 2041 2042 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a) 2043 { 2044 if (!dc_isar_feature(aa64_sve, s)) { 2045 return false; 2046 } 2047 if (sve_access_check(s)) { 2048 unsigned fullsz = vec_full_reg_size(s); 2049 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2050 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm); 2051 } 2052 return true; 2053 } 2054 2055 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a) 2056 { 2057 if (!dc_isar_feature(aa64_sve, s)) { 2058 return false; 2059 } 2060 if (sve_access_check(s)) { 2061 unsigned fullsz = vec_full_reg_size(s); 2062 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2063 int inc = numelem * a->imm * (a->d ? -1 : 1); 2064 TCGv_i64 reg = cpu_reg(s, a->rd); 2065 2066 tcg_gen_addi_i64(reg, reg, inc); 2067 } 2068 return true; 2069 } 2070 2071 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a) 2072 { 2073 if (!dc_isar_feature(aa64_sve, s)) { 2074 return false; 2075 } 2076 if (!sve_access_check(s)) { 2077 return true; 2078 } 2079 2080 unsigned fullsz = vec_full_reg_size(s); 2081 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2082 int inc = numelem * a->imm; 2083 TCGv_i64 reg = cpu_reg(s, a->rd); 2084 2085 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 2086 if (inc == 0) { 2087 if (a->u) { 2088 tcg_gen_ext32u_i64(reg, reg); 2089 } else { 2090 tcg_gen_ext32s_i64(reg, reg); 2091 } 2092 } else { 2093 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d); 2094 } 2095 return true; 2096 } 2097 2098 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a) 2099 { 2100 if (!dc_isar_feature(aa64_sve, s)) { 2101 return false; 2102 } 2103 if (!sve_access_check(s)) { 2104 return true; 2105 } 2106 2107 unsigned fullsz = vec_full_reg_size(s); 2108 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2109 int inc = numelem * a->imm; 2110 TCGv_i64 reg = cpu_reg(s, a->rd); 2111 2112 if (inc != 0) { 2113 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d); 2114 } 2115 return true; 2116 } 2117 2118 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 2119 { 2120 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2121 return false; 2122 } 2123 2124 unsigned fullsz = vec_full_reg_size(s); 2125 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2126 int inc = numelem * a->imm; 2127 2128 if (inc != 0) { 2129 if (sve_access_check(s)) { 2130 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd), 2131 vec_full_reg_offset(s, a->rn), 2132 tcg_constant_i64(a->d ? -inc : inc), 2133 fullsz, fullsz); 2134 } 2135 } else { 2136 do_mov_z(s, a->rd, a->rn); 2137 } 2138 return true; 2139 } 2140 2141 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 2142 { 2143 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2144 return false; 2145 } 2146 2147 unsigned fullsz = vec_full_reg_size(s); 2148 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2149 int inc = numelem * a->imm; 2150 2151 if (inc != 0) { 2152 if (sve_access_check(s)) { 2153 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 2154 tcg_constant_i64(inc), a->u, a->d); 2155 } 2156 } else { 2157 do_mov_z(s, a->rd, a->rn); 2158 } 2159 return true; 2160 } 2161 2162 /* 2163 *** SVE Bitwise Immediate Group 2164 */ 2165 2166 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn) 2167 { 2168 uint64_t imm; 2169 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2170 extract32(a->dbm, 0, 6), 2171 extract32(a->dbm, 6, 6))) { 2172 return false; 2173 } 2174 return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm); 2175 } 2176 2177 TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi) 2178 TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori) 2179 TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori) 2180 2181 static bool trans_DUPM(DisasContext *s, arg_DUPM *a) 2182 { 2183 uint64_t imm; 2184 2185 if (!dc_isar_feature(aa64_sve, s)) { 2186 return false; 2187 } 2188 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2189 extract32(a->dbm, 0, 6), 2190 extract32(a->dbm, 6, 6))) { 2191 return false; 2192 } 2193 if (sve_access_check(s)) { 2194 do_dupi_z(s, a->rd, imm); 2195 } 2196 return true; 2197 } 2198 2199 /* 2200 *** SVE Integer Wide Immediate - Predicated Group 2201 */ 2202 2203 /* Implement all merging copies. This is used for CPY (immediate), 2204 * FCPY, CPY (scalar), CPY (SIMD&FP scalar). 2205 */ 2206 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg, 2207 TCGv_i64 val) 2208 { 2209 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2210 static gen_cpy * const fns[4] = { 2211 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h, 2212 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d, 2213 }; 2214 unsigned vsz = vec_full_reg_size(s); 2215 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2216 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2217 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2218 TCGv_ptr t_pg = tcg_temp_new_ptr(); 2219 2220 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd)); 2221 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn)); 2222 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 2223 2224 fns[esz](t_zd, t_zn, t_pg, val, desc); 2225 2226 tcg_temp_free_ptr(t_zd); 2227 tcg_temp_free_ptr(t_zn); 2228 tcg_temp_free_ptr(t_pg); 2229 } 2230 2231 static bool trans_FCPY(DisasContext *s, arg_FCPY *a) 2232 { 2233 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2234 return false; 2235 } 2236 if (sve_access_check(s)) { 2237 /* Decode the VFP immediate. */ 2238 uint64_t imm = vfp_expand_imm(a->esz, a->imm); 2239 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm)); 2240 } 2241 return true; 2242 } 2243 2244 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a) 2245 { 2246 if (!dc_isar_feature(aa64_sve, s)) { 2247 return false; 2248 } 2249 if (sve_access_check(s)) { 2250 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm)); 2251 } 2252 return true; 2253 } 2254 2255 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a) 2256 { 2257 static gen_helper_gvec_2i * const fns[4] = { 2258 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h, 2259 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d, 2260 }; 2261 2262 if (!dc_isar_feature(aa64_sve, s)) { 2263 return false; 2264 } 2265 if (sve_access_check(s)) { 2266 unsigned vsz = vec_full_reg_size(s); 2267 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 2268 pred_full_reg_offset(s, a->pg), 2269 tcg_constant_i64(a->imm), 2270 vsz, vsz, 0, fns[a->esz]); 2271 } 2272 return true; 2273 } 2274 2275 /* 2276 *** SVE Permute Extract Group 2277 */ 2278 2279 static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm) 2280 { 2281 if (!sve_access_check(s)) { 2282 return true; 2283 } 2284 2285 unsigned vsz = vec_full_reg_size(s); 2286 unsigned n_ofs = imm >= vsz ? 0 : imm; 2287 unsigned n_siz = vsz - n_ofs; 2288 unsigned d = vec_full_reg_offset(s, rd); 2289 unsigned n = vec_full_reg_offset(s, rn); 2290 unsigned m = vec_full_reg_offset(s, rm); 2291 2292 /* Use host vector move insns if we have appropriate sizes 2293 * and no unfortunate overlap. 2294 */ 2295 if (m != d 2296 && n_ofs == size_for_gvec(n_ofs) 2297 && n_siz == size_for_gvec(n_siz) 2298 && (d != n || n_siz <= n_ofs)) { 2299 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz); 2300 if (n_ofs != 0) { 2301 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs); 2302 } 2303 } else { 2304 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext); 2305 } 2306 return true; 2307 } 2308 2309 TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm) 2310 TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm) 2311 2312 /* 2313 *** SVE Permute - Unpredicated Group 2314 */ 2315 2316 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a) 2317 { 2318 if (!dc_isar_feature(aa64_sve, s)) { 2319 return false; 2320 } 2321 if (sve_access_check(s)) { 2322 unsigned vsz = vec_full_reg_size(s); 2323 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd), 2324 vsz, vsz, cpu_reg_sp(s, a->rn)); 2325 } 2326 return true; 2327 } 2328 2329 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a) 2330 { 2331 if (!dc_isar_feature(aa64_sve, s)) { 2332 return false; 2333 } 2334 if ((a->imm & 0x1f) == 0) { 2335 return false; 2336 } 2337 if (sve_access_check(s)) { 2338 unsigned vsz = vec_full_reg_size(s); 2339 unsigned dofs = vec_full_reg_offset(s, a->rd); 2340 unsigned esz, index; 2341 2342 esz = ctz32(a->imm); 2343 index = a->imm >> (esz + 1); 2344 2345 if ((index << esz) < vsz) { 2346 unsigned nofs = vec_reg_offset(s, a->rn, index, esz); 2347 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz); 2348 } else { 2349 /* 2350 * While dup_mem handles 128-bit elements, dup_imm does not. 2351 * Thankfully element size doesn't matter for splatting zero. 2352 */ 2353 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); 2354 } 2355 } 2356 return true; 2357 } 2358 2359 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val) 2360 { 2361 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2362 static gen_insr * const fns[4] = { 2363 gen_helper_sve_insr_b, gen_helper_sve_insr_h, 2364 gen_helper_sve_insr_s, gen_helper_sve_insr_d, 2365 }; 2366 unsigned vsz = vec_full_reg_size(s); 2367 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2368 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2369 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2370 2371 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd)); 2372 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); 2373 2374 fns[a->esz](t_zd, t_zn, val, desc); 2375 2376 tcg_temp_free_ptr(t_zd); 2377 tcg_temp_free_ptr(t_zn); 2378 } 2379 2380 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a) 2381 { 2382 if (!dc_isar_feature(aa64_sve, s)) { 2383 return false; 2384 } 2385 if (sve_access_check(s)) { 2386 TCGv_i64 t = tcg_temp_new_i64(); 2387 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64)); 2388 do_insr_i64(s, a, t); 2389 tcg_temp_free_i64(t); 2390 } 2391 return true; 2392 } 2393 2394 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a) 2395 { 2396 if (!dc_isar_feature(aa64_sve, s)) { 2397 return false; 2398 } 2399 if (sve_access_check(s)) { 2400 do_insr_i64(s, a, cpu_reg(s, a->rm)); 2401 } 2402 return true; 2403 } 2404 2405 static gen_helper_gvec_2 * const rev_fns[4] = { 2406 gen_helper_sve_rev_b, gen_helper_sve_rev_h, 2407 gen_helper_sve_rev_s, gen_helper_sve_rev_d 2408 }; 2409 TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0) 2410 2411 static gen_helper_gvec_3 * const sve_tbl_fns[4] = { 2412 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h, 2413 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d 2414 }; 2415 TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0) 2416 2417 static gen_helper_gvec_4 * const sve2_tbl_fns[4] = { 2418 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h, 2419 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d 2420 }; 2421 TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz], 2422 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0) 2423 2424 static gen_helper_gvec_3 * const tbx_fns[4] = { 2425 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h, 2426 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d 2427 }; 2428 TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0) 2429 2430 static bool trans_UNPK(DisasContext *s, arg_UNPK *a) 2431 { 2432 static gen_helper_gvec_2 * const fns[4][2] = { 2433 { NULL, NULL }, 2434 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h }, 2435 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s }, 2436 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d }, 2437 }; 2438 2439 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2440 return false; 2441 } 2442 if (sve_access_check(s)) { 2443 unsigned vsz = vec_full_reg_size(s); 2444 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd), 2445 vec_full_reg_offset(s, a->rn) 2446 + (a->h ? vsz / 2 : 0), 2447 vsz, vsz, 0, fns[a->esz][a->u]); 2448 } 2449 return true; 2450 } 2451 2452 /* 2453 *** SVE Permute - Predicates Group 2454 */ 2455 2456 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd, 2457 gen_helper_gvec_3 *fn) 2458 { 2459 if (!sve_access_check(s)) { 2460 return true; 2461 } 2462 2463 unsigned vsz = pred_full_reg_size(s); 2464 2465 TCGv_ptr t_d = tcg_temp_new_ptr(); 2466 TCGv_ptr t_n = tcg_temp_new_ptr(); 2467 TCGv_ptr t_m = tcg_temp_new_ptr(); 2468 uint32_t desc = 0; 2469 2470 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2471 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2472 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2473 2474 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd)); 2475 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn)); 2476 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm)); 2477 2478 fn(t_d, t_n, t_m, tcg_constant_i32(desc)); 2479 2480 tcg_temp_free_ptr(t_d); 2481 tcg_temp_free_ptr(t_n); 2482 tcg_temp_free_ptr(t_m); 2483 return true; 2484 } 2485 2486 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd, 2487 gen_helper_gvec_2 *fn) 2488 { 2489 if (!sve_access_check(s)) { 2490 return true; 2491 } 2492 2493 unsigned vsz = pred_full_reg_size(s); 2494 TCGv_ptr t_d = tcg_temp_new_ptr(); 2495 TCGv_ptr t_n = tcg_temp_new_ptr(); 2496 uint32_t desc = 0; 2497 2498 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd)); 2499 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn)); 2500 2501 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2502 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2503 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2504 2505 fn(t_d, t_n, tcg_constant_i32(desc)); 2506 2507 tcg_temp_free_ptr(t_d); 2508 tcg_temp_free_ptr(t_n); 2509 return true; 2510 } 2511 2512 TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p) 2513 TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p) 2514 TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p) 2515 TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p) 2516 TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p) 2517 TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p) 2518 2519 TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p) 2520 TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p) 2521 TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p) 2522 2523 /* 2524 *** SVE Permute - Interleaving Group 2525 */ 2526 2527 static gen_helper_gvec_3 * const zip_fns[4] = { 2528 gen_helper_sve_zip_b, gen_helper_sve_zip_h, 2529 gen_helper_sve_zip_s, gen_helper_sve_zip_d, 2530 }; 2531 TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2532 zip_fns[a->esz], a, 0) 2533 TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2534 zip_fns[a->esz], a, vec_full_reg_size(s) / 2) 2535 2536 TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2537 gen_helper_sve2_zip_q, a, 0) 2538 TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2539 gen_helper_sve2_zip_q, a, 2540 QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2) 2541 2542 static gen_helper_gvec_3 * const uzp_fns[4] = { 2543 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h, 2544 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d, 2545 }; 2546 2547 TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2548 uzp_fns[a->esz], a, 0) 2549 TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2550 uzp_fns[a->esz], a, 1 << a->esz) 2551 2552 TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2553 gen_helper_sve2_uzp_q, a, 0) 2554 TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2555 gen_helper_sve2_uzp_q, a, 16) 2556 2557 static gen_helper_gvec_3 * const trn_fns[4] = { 2558 gen_helper_sve_trn_b, gen_helper_sve_trn_h, 2559 gen_helper_sve_trn_s, gen_helper_sve_trn_d, 2560 }; 2561 2562 TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2563 trn_fns[a->esz], a, 0) 2564 TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2565 trn_fns[a->esz], a, 1 << a->esz) 2566 2567 TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2568 gen_helper_sve2_trn_q, a, 0) 2569 TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2570 gen_helper_sve2_trn_q, a, 16) 2571 2572 /* 2573 *** SVE Permute Vector - Predicated Group 2574 */ 2575 2576 static gen_helper_gvec_3 * const compact_fns[4] = { 2577 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d 2578 }; 2579 TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, 2580 compact_fns[a->esz], a, 0) 2581 2582 /* Call the helper that computes the ARM LastActiveElement pseudocode 2583 * function, scaled by the element size. This includes the not found 2584 * indication; e.g. not found for esz=3 is -8. 2585 */ 2586 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg) 2587 { 2588 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot 2589 * round up, as we do elsewhere, because we need the exact size. 2590 */ 2591 TCGv_ptr t_p = tcg_temp_new_ptr(); 2592 unsigned desc = 0; 2593 2594 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 2595 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 2596 2597 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg)); 2598 2599 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc)); 2600 2601 tcg_temp_free_ptr(t_p); 2602 } 2603 2604 /* Increment LAST to the offset of the next element in the vector, 2605 * wrapping around to 0. 2606 */ 2607 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz) 2608 { 2609 unsigned vsz = vec_full_reg_size(s); 2610 2611 tcg_gen_addi_i32(last, last, 1 << esz); 2612 if (is_power_of_2(vsz)) { 2613 tcg_gen_andi_i32(last, last, vsz - 1); 2614 } else { 2615 TCGv_i32 max = tcg_constant_i32(vsz); 2616 TCGv_i32 zero = tcg_constant_i32(0); 2617 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last); 2618 } 2619 } 2620 2621 /* If LAST < 0, set LAST to the offset of the last element in the vector. */ 2622 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz) 2623 { 2624 unsigned vsz = vec_full_reg_size(s); 2625 2626 if (is_power_of_2(vsz)) { 2627 tcg_gen_andi_i32(last, last, vsz - 1); 2628 } else { 2629 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz)); 2630 TCGv_i32 zero = tcg_constant_i32(0); 2631 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last); 2632 } 2633 } 2634 2635 /* Load an unsigned element of ESZ from BASE+OFS. */ 2636 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz) 2637 { 2638 TCGv_i64 r = tcg_temp_new_i64(); 2639 2640 switch (esz) { 2641 case 0: 2642 tcg_gen_ld8u_i64(r, base, ofs); 2643 break; 2644 case 1: 2645 tcg_gen_ld16u_i64(r, base, ofs); 2646 break; 2647 case 2: 2648 tcg_gen_ld32u_i64(r, base, ofs); 2649 break; 2650 case 3: 2651 tcg_gen_ld_i64(r, base, ofs); 2652 break; 2653 default: 2654 g_assert_not_reached(); 2655 } 2656 return r; 2657 } 2658 2659 /* Load an unsigned element of ESZ from RM[LAST]. */ 2660 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last, 2661 int rm, int esz) 2662 { 2663 TCGv_ptr p = tcg_temp_new_ptr(); 2664 TCGv_i64 r; 2665 2666 /* Convert offset into vector into offset into ENV. 2667 * The final adjustment for the vector register base 2668 * is added via constant offset to the load. 2669 */ 2670 #if HOST_BIG_ENDIAN 2671 /* Adjust for element ordering. See vec_reg_offset. */ 2672 if (esz < 3) { 2673 tcg_gen_xori_i32(last, last, 8 - (1 << esz)); 2674 } 2675 #endif 2676 tcg_gen_ext_i32_ptr(p, last); 2677 tcg_gen_add_ptr(p, p, cpu_env); 2678 2679 r = load_esz(p, vec_full_reg_offset(s, rm), esz); 2680 tcg_temp_free_ptr(p); 2681 2682 return r; 2683 } 2684 2685 /* Compute CLAST for a Zreg. */ 2686 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before) 2687 { 2688 TCGv_i32 last; 2689 TCGLabel *over; 2690 TCGv_i64 ele; 2691 unsigned vsz, esz = a->esz; 2692 2693 if (!sve_access_check(s)) { 2694 return true; 2695 } 2696 2697 last = tcg_temp_local_new_i32(); 2698 over = gen_new_label(); 2699 2700 find_last_active(s, last, esz, a->pg); 2701 2702 /* There is of course no movcond for a 2048-bit vector, 2703 * so we must branch over the actual store. 2704 */ 2705 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over); 2706 2707 if (!before) { 2708 incr_last_active(s, last, esz); 2709 } 2710 2711 ele = load_last_active(s, last, a->rm, esz); 2712 tcg_temp_free_i32(last); 2713 2714 vsz = vec_full_reg_size(s); 2715 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele); 2716 tcg_temp_free_i64(ele); 2717 2718 /* If this insn used MOVPRFX, we may need a second move. */ 2719 if (a->rd != a->rn) { 2720 TCGLabel *done = gen_new_label(); 2721 tcg_gen_br(done); 2722 2723 gen_set_label(over); 2724 do_mov_z(s, a->rd, a->rn); 2725 2726 gen_set_label(done); 2727 } else { 2728 gen_set_label(over); 2729 } 2730 return true; 2731 } 2732 2733 TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false) 2734 TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true) 2735 2736 /* Compute CLAST for a scalar. */ 2737 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm, 2738 bool before, TCGv_i64 reg_val) 2739 { 2740 TCGv_i32 last = tcg_temp_new_i32(); 2741 TCGv_i64 ele, cmp; 2742 2743 find_last_active(s, last, esz, pg); 2744 2745 /* Extend the original value of last prior to incrementing. */ 2746 cmp = tcg_temp_new_i64(); 2747 tcg_gen_ext_i32_i64(cmp, last); 2748 2749 if (!before) { 2750 incr_last_active(s, last, esz); 2751 } 2752 2753 /* The conceit here is that while last < 0 indicates not found, after 2754 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address 2755 * from which we can load garbage. We then discard the garbage with 2756 * a conditional move. 2757 */ 2758 ele = load_last_active(s, last, rm, esz); 2759 tcg_temp_free_i32(last); 2760 2761 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0), 2762 ele, reg_val); 2763 2764 tcg_temp_free_i64(cmp); 2765 tcg_temp_free_i64(ele); 2766 } 2767 2768 /* Compute CLAST for a Vreg. */ 2769 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2770 { 2771 if (sve_access_check(s)) { 2772 int esz = a->esz; 2773 int ofs = vec_reg_offset(s, a->rd, 0, esz); 2774 TCGv_i64 reg = load_esz(cpu_env, ofs, esz); 2775 2776 do_clast_scalar(s, esz, a->pg, a->rn, before, reg); 2777 write_fp_dreg(s, a->rd, reg); 2778 tcg_temp_free_i64(reg); 2779 } 2780 return true; 2781 } 2782 2783 TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false) 2784 TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true) 2785 2786 /* Compute CLAST for a Xreg. */ 2787 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before) 2788 { 2789 TCGv_i64 reg; 2790 2791 if (!sve_access_check(s)) { 2792 return true; 2793 } 2794 2795 reg = cpu_reg(s, a->rd); 2796 switch (a->esz) { 2797 case 0: 2798 tcg_gen_ext8u_i64(reg, reg); 2799 break; 2800 case 1: 2801 tcg_gen_ext16u_i64(reg, reg); 2802 break; 2803 case 2: 2804 tcg_gen_ext32u_i64(reg, reg); 2805 break; 2806 case 3: 2807 break; 2808 default: 2809 g_assert_not_reached(); 2810 } 2811 2812 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg); 2813 return true; 2814 } 2815 2816 TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false) 2817 TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true) 2818 2819 /* Compute LAST for a scalar. */ 2820 static TCGv_i64 do_last_scalar(DisasContext *s, int esz, 2821 int pg, int rm, bool before) 2822 { 2823 TCGv_i32 last = tcg_temp_new_i32(); 2824 TCGv_i64 ret; 2825 2826 find_last_active(s, last, esz, pg); 2827 if (before) { 2828 wrap_last_active(s, last, esz); 2829 } else { 2830 incr_last_active(s, last, esz); 2831 } 2832 2833 ret = load_last_active(s, last, rm, esz); 2834 tcg_temp_free_i32(last); 2835 return ret; 2836 } 2837 2838 /* Compute LAST for a Vreg. */ 2839 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2840 { 2841 if (sve_access_check(s)) { 2842 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2843 write_fp_dreg(s, a->rd, val); 2844 tcg_temp_free_i64(val); 2845 } 2846 return true; 2847 } 2848 2849 TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false) 2850 TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true) 2851 2852 /* Compute LAST for a Xreg. */ 2853 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before) 2854 { 2855 if (sve_access_check(s)) { 2856 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2857 tcg_gen_mov_i64(cpu_reg(s, a->rd), val); 2858 tcg_temp_free_i64(val); 2859 } 2860 return true; 2861 } 2862 2863 TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false) 2864 TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true) 2865 2866 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a) 2867 { 2868 if (!dc_isar_feature(aa64_sve, s)) { 2869 return false; 2870 } 2871 if (sve_access_check(s)) { 2872 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn)); 2873 } 2874 return true; 2875 } 2876 2877 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a) 2878 { 2879 if (!dc_isar_feature(aa64_sve, s)) { 2880 return false; 2881 } 2882 if (sve_access_check(s)) { 2883 int ofs = vec_reg_offset(s, a->rn, 0, a->esz); 2884 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz); 2885 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t); 2886 tcg_temp_free_i64(t); 2887 } 2888 return true; 2889 } 2890 2891 static gen_helper_gvec_3 * const revb_fns[4] = { 2892 NULL, gen_helper_sve_revb_h, 2893 gen_helper_sve_revb_s, gen_helper_sve_revb_d, 2894 }; 2895 TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0) 2896 2897 static gen_helper_gvec_3 * const revh_fns[4] = { 2898 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d, 2899 }; 2900 TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0) 2901 2902 TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz, 2903 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0) 2904 2905 TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0) 2906 2907 TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz, 2908 gen_helper_sve_splice, a, a->esz) 2909 2910 TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice, 2911 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz) 2912 2913 /* 2914 *** SVE Integer Compare - Vectors Group 2915 */ 2916 2917 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a, 2918 gen_helper_gvec_flags_4 *gen_fn) 2919 { 2920 TCGv_ptr pd, zn, zm, pg; 2921 unsigned vsz; 2922 TCGv_i32 t; 2923 2924 if (gen_fn == NULL) { 2925 return false; 2926 } 2927 if (!sve_access_check(s)) { 2928 return true; 2929 } 2930 2931 vsz = vec_full_reg_size(s); 2932 t = tcg_temp_new_i32(); 2933 pd = tcg_temp_new_ptr(); 2934 zn = tcg_temp_new_ptr(); 2935 zm = tcg_temp_new_ptr(); 2936 pg = tcg_temp_new_ptr(); 2937 2938 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd)); 2939 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn)); 2940 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm)); 2941 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg)); 2942 2943 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0))); 2944 2945 tcg_temp_free_ptr(pd); 2946 tcg_temp_free_ptr(zn); 2947 tcg_temp_free_ptr(zm); 2948 tcg_temp_free_ptr(pg); 2949 2950 do_pred_flags(t); 2951 2952 tcg_temp_free_i32(t); 2953 return true; 2954 } 2955 2956 #define DO_PPZZ(NAME, name) \ 2957 static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \ 2958 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \ 2959 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \ 2960 }; \ 2961 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \ 2962 a, name##_ppzz_fns[a->esz]) 2963 2964 DO_PPZZ(CMPEQ, cmpeq) 2965 DO_PPZZ(CMPNE, cmpne) 2966 DO_PPZZ(CMPGT, cmpgt) 2967 DO_PPZZ(CMPGE, cmpge) 2968 DO_PPZZ(CMPHI, cmphi) 2969 DO_PPZZ(CMPHS, cmphs) 2970 2971 #undef DO_PPZZ 2972 2973 #define DO_PPZW(NAME, name) \ 2974 static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \ 2975 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \ 2976 gen_helper_sve_##name##_ppzw_s, NULL \ 2977 }; \ 2978 TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \ 2979 a, name##_ppzw_fns[a->esz]) 2980 2981 DO_PPZW(CMPEQ, cmpeq) 2982 DO_PPZW(CMPNE, cmpne) 2983 DO_PPZW(CMPGT, cmpgt) 2984 DO_PPZW(CMPGE, cmpge) 2985 DO_PPZW(CMPHI, cmphi) 2986 DO_PPZW(CMPHS, cmphs) 2987 DO_PPZW(CMPLT, cmplt) 2988 DO_PPZW(CMPLE, cmple) 2989 DO_PPZW(CMPLO, cmplo) 2990 DO_PPZW(CMPLS, cmpls) 2991 2992 #undef DO_PPZW 2993 2994 /* 2995 *** SVE Integer Compare - Immediate Groups 2996 */ 2997 2998 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a, 2999 gen_helper_gvec_flags_3 *gen_fn) 3000 { 3001 TCGv_ptr pd, zn, pg; 3002 unsigned vsz; 3003 TCGv_i32 t; 3004 3005 if (gen_fn == NULL) { 3006 return false; 3007 } 3008 if (!sve_access_check(s)) { 3009 return true; 3010 } 3011 3012 vsz = vec_full_reg_size(s); 3013 t = tcg_temp_new_i32(); 3014 pd = tcg_temp_new_ptr(); 3015 zn = tcg_temp_new_ptr(); 3016 pg = tcg_temp_new_ptr(); 3017 3018 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd)); 3019 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn)); 3020 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg)); 3021 3022 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm))); 3023 3024 tcg_temp_free_ptr(pd); 3025 tcg_temp_free_ptr(zn); 3026 tcg_temp_free_ptr(pg); 3027 3028 do_pred_flags(t); 3029 3030 tcg_temp_free_i32(t); 3031 return true; 3032 } 3033 3034 #define DO_PPZI(NAME, name) \ 3035 static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \ 3036 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \ 3037 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \ 3038 }; \ 3039 TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \ 3040 name##_ppzi_fns[a->esz]) 3041 3042 DO_PPZI(CMPEQ, cmpeq) 3043 DO_PPZI(CMPNE, cmpne) 3044 DO_PPZI(CMPGT, cmpgt) 3045 DO_PPZI(CMPGE, cmpge) 3046 DO_PPZI(CMPHI, cmphi) 3047 DO_PPZI(CMPHS, cmphs) 3048 DO_PPZI(CMPLT, cmplt) 3049 DO_PPZI(CMPLE, cmple) 3050 DO_PPZI(CMPLO, cmplo) 3051 DO_PPZI(CMPLS, cmpls) 3052 3053 #undef DO_PPZI 3054 3055 /* 3056 *** SVE Partition Break Group 3057 */ 3058 3059 static bool do_brk3(DisasContext *s, arg_rprr_s *a, 3060 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s) 3061 { 3062 if (!sve_access_check(s)) { 3063 return true; 3064 } 3065 3066 unsigned vsz = pred_full_reg_size(s); 3067 3068 /* Predicate sizes may be smaller and cannot use simd_desc. */ 3069 TCGv_ptr d = tcg_temp_new_ptr(); 3070 TCGv_ptr n = tcg_temp_new_ptr(); 3071 TCGv_ptr m = tcg_temp_new_ptr(); 3072 TCGv_ptr g = tcg_temp_new_ptr(); 3073 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 3074 3075 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd)); 3076 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn)); 3077 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm)); 3078 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg)); 3079 3080 if (a->s) { 3081 TCGv_i32 t = tcg_temp_new_i32(); 3082 fn_s(t, d, n, m, g, desc); 3083 do_pred_flags(t); 3084 tcg_temp_free_i32(t); 3085 } else { 3086 fn(d, n, m, g, desc); 3087 } 3088 tcg_temp_free_ptr(d); 3089 tcg_temp_free_ptr(n); 3090 tcg_temp_free_ptr(m); 3091 tcg_temp_free_ptr(g); 3092 return true; 3093 } 3094 3095 static bool do_brk2(DisasContext *s, arg_rpr_s *a, 3096 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s) 3097 { 3098 if (!sve_access_check(s)) { 3099 return true; 3100 } 3101 3102 unsigned vsz = pred_full_reg_size(s); 3103 3104 /* Predicate sizes may be smaller and cannot use simd_desc. */ 3105 TCGv_ptr d = tcg_temp_new_ptr(); 3106 TCGv_ptr n = tcg_temp_new_ptr(); 3107 TCGv_ptr g = tcg_temp_new_ptr(); 3108 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 3109 3110 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd)); 3111 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn)); 3112 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg)); 3113 3114 if (a->s) { 3115 TCGv_i32 t = tcg_temp_new_i32(); 3116 fn_s(t, d, n, g, desc); 3117 do_pred_flags(t); 3118 tcg_temp_free_i32(t); 3119 } else { 3120 fn(d, n, g, desc); 3121 } 3122 tcg_temp_free_ptr(d); 3123 tcg_temp_free_ptr(n); 3124 tcg_temp_free_ptr(g); 3125 return true; 3126 } 3127 3128 TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a, 3129 gen_helper_sve_brkpa, gen_helper_sve_brkpas) 3130 TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a, 3131 gen_helper_sve_brkpb, gen_helper_sve_brkpbs) 3132 3133 TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a, 3134 gen_helper_sve_brka_m, gen_helper_sve_brkas_m) 3135 TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a, 3136 gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m) 3137 3138 TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a, 3139 gen_helper_sve_brka_z, gen_helper_sve_brkas_z) 3140 TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a, 3141 gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z) 3142 3143 TRANS_FEAT(BRKN, aa64_sve, do_brk2, a, 3144 gen_helper_sve_brkn, gen_helper_sve_brkns) 3145 3146 /* 3147 *** SVE Predicate Count Group 3148 */ 3149 3150 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg) 3151 { 3152 unsigned psz = pred_full_reg_size(s); 3153 3154 if (psz <= 8) { 3155 uint64_t psz_mask; 3156 3157 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn)); 3158 if (pn != pg) { 3159 TCGv_i64 g = tcg_temp_new_i64(); 3160 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg)); 3161 tcg_gen_and_i64(val, val, g); 3162 tcg_temp_free_i64(g); 3163 } 3164 3165 /* Reduce the pred_esz_masks value simply to reduce the 3166 * size of the code generated here. 3167 */ 3168 psz_mask = MAKE_64BIT_MASK(0, psz * 8); 3169 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask); 3170 3171 tcg_gen_ctpop_i64(val, val); 3172 } else { 3173 TCGv_ptr t_pn = tcg_temp_new_ptr(); 3174 TCGv_ptr t_pg = tcg_temp_new_ptr(); 3175 unsigned desc = 0; 3176 3177 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz); 3178 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 3179 3180 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn)); 3181 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 3182 3183 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc)); 3184 tcg_temp_free_ptr(t_pn); 3185 tcg_temp_free_ptr(t_pg); 3186 } 3187 } 3188 3189 static bool trans_CNTP(DisasContext *s, arg_CNTP *a) 3190 { 3191 if (!dc_isar_feature(aa64_sve, s)) { 3192 return false; 3193 } 3194 if (sve_access_check(s)) { 3195 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg); 3196 } 3197 return true; 3198 } 3199 3200 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a) 3201 { 3202 if (!dc_isar_feature(aa64_sve, s)) { 3203 return false; 3204 } 3205 if (sve_access_check(s)) { 3206 TCGv_i64 reg = cpu_reg(s, a->rd); 3207 TCGv_i64 val = tcg_temp_new_i64(); 3208 3209 do_cntp(s, val, a->esz, a->pg, a->pg); 3210 if (a->d) { 3211 tcg_gen_sub_i64(reg, reg, val); 3212 } else { 3213 tcg_gen_add_i64(reg, reg, val); 3214 } 3215 tcg_temp_free_i64(val); 3216 } 3217 return true; 3218 } 3219 3220 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3221 { 3222 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3223 return false; 3224 } 3225 if (sve_access_check(s)) { 3226 unsigned vsz = vec_full_reg_size(s); 3227 TCGv_i64 val = tcg_temp_new_i64(); 3228 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds; 3229 3230 do_cntp(s, val, a->esz, a->pg, a->pg); 3231 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 3232 vec_full_reg_offset(s, a->rn), val, vsz, vsz); 3233 } 3234 return true; 3235 } 3236 3237 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a) 3238 { 3239 if (!dc_isar_feature(aa64_sve, s)) { 3240 return false; 3241 } 3242 if (sve_access_check(s)) { 3243 TCGv_i64 reg = cpu_reg(s, a->rd); 3244 TCGv_i64 val = tcg_temp_new_i64(); 3245 3246 do_cntp(s, val, a->esz, a->pg, a->pg); 3247 do_sat_addsub_32(reg, val, a->u, a->d); 3248 } 3249 return true; 3250 } 3251 3252 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a) 3253 { 3254 if (!dc_isar_feature(aa64_sve, s)) { 3255 return false; 3256 } 3257 if (sve_access_check(s)) { 3258 TCGv_i64 reg = cpu_reg(s, a->rd); 3259 TCGv_i64 val = tcg_temp_new_i64(); 3260 3261 do_cntp(s, val, a->esz, a->pg, a->pg); 3262 do_sat_addsub_64(reg, val, a->u, a->d); 3263 } 3264 return true; 3265 } 3266 3267 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3268 { 3269 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3270 return false; 3271 } 3272 if (sve_access_check(s)) { 3273 TCGv_i64 val = tcg_temp_new_i64(); 3274 do_cntp(s, val, a->esz, a->pg, a->pg); 3275 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d); 3276 } 3277 return true; 3278 } 3279 3280 /* 3281 *** SVE Integer Compare Scalars Group 3282 */ 3283 3284 static bool trans_CTERM(DisasContext *s, arg_CTERM *a) 3285 { 3286 if (!dc_isar_feature(aa64_sve, s)) { 3287 return false; 3288 } 3289 if (!sve_access_check(s)) { 3290 return true; 3291 } 3292 3293 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ); 3294 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf); 3295 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf); 3296 TCGv_i64 cmp = tcg_temp_new_i64(); 3297 3298 tcg_gen_setcond_i64(cond, cmp, rn, rm); 3299 tcg_gen_extrl_i64_i32(cpu_NF, cmp); 3300 tcg_temp_free_i64(cmp); 3301 3302 /* VF = !NF & !CF. */ 3303 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1); 3304 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF); 3305 3306 /* Both NF and VF actually look at bit 31. */ 3307 tcg_gen_neg_i32(cpu_NF, cpu_NF); 3308 tcg_gen_neg_i32(cpu_VF, cpu_VF); 3309 return true; 3310 } 3311 3312 static bool trans_WHILE(DisasContext *s, arg_WHILE *a) 3313 { 3314 TCGv_i64 op0, op1, t0, t1, tmax; 3315 TCGv_i32 t2; 3316 TCGv_ptr ptr; 3317 unsigned vsz = vec_full_reg_size(s); 3318 unsigned desc = 0; 3319 TCGCond cond; 3320 uint64_t maxval; 3321 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */ 3322 bool eq = a->eq == a->lt; 3323 3324 /* The greater-than conditions are all SVE2. */ 3325 if (a->lt 3326 ? !dc_isar_feature(aa64_sve, s) 3327 : !dc_isar_feature(aa64_sve2, s)) { 3328 return false; 3329 } 3330 if (!sve_access_check(s)) { 3331 return true; 3332 } 3333 3334 op0 = read_cpu_reg(s, a->rn, 1); 3335 op1 = read_cpu_reg(s, a->rm, 1); 3336 3337 if (!a->sf) { 3338 if (a->u) { 3339 tcg_gen_ext32u_i64(op0, op0); 3340 tcg_gen_ext32u_i64(op1, op1); 3341 } else { 3342 tcg_gen_ext32s_i64(op0, op0); 3343 tcg_gen_ext32s_i64(op1, op1); 3344 } 3345 } 3346 3347 /* For the helper, compress the different conditions into a computation 3348 * of how many iterations for which the condition is true. 3349 */ 3350 t0 = tcg_temp_new_i64(); 3351 t1 = tcg_temp_new_i64(); 3352 3353 if (a->lt) { 3354 tcg_gen_sub_i64(t0, op1, op0); 3355 if (a->u) { 3356 maxval = a->sf ? UINT64_MAX : UINT32_MAX; 3357 cond = eq ? TCG_COND_LEU : TCG_COND_LTU; 3358 } else { 3359 maxval = a->sf ? INT64_MAX : INT32_MAX; 3360 cond = eq ? TCG_COND_LE : TCG_COND_LT; 3361 } 3362 } else { 3363 tcg_gen_sub_i64(t0, op0, op1); 3364 if (a->u) { 3365 maxval = 0; 3366 cond = eq ? TCG_COND_GEU : TCG_COND_GTU; 3367 } else { 3368 maxval = a->sf ? INT64_MIN : INT32_MIN; 3369 cond = eq ? TCG_COND_GE : TCG_COND_GT; 3370 } 3371 } 3372 3373 tmax = tcg_constant_i64(vsz >> a->esz); 3374 if (eq) { 3375 /* Equality means one more iteration. */ 3376 tcg_gen_addi_i64(t0, t0, 1); 3377 3378 /* 3379 * For the less-than while, if op1 is maxval (and the only time 3380 * the addition above could overflow), then we produce an all-true 3381 * predicate by setting the count to the vector length. This is 3382 * because the pseudocode is described as an increment + compare 3383 * loop, and the maximum integer would always compare true. 3384 * Similarly, the greater-than while has the same issue with the 3385 * minimum integer due to the decrement + compare loop. 3386 */ 3387 tcg_gen_movi_i64(t1, maxval); 3388 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0); 3389 } 3390 3391 /* Bound to the maximum. */ 3392 tcg_gen_umin_i64(t0, t0, tmax); 3393 3394 /* Set the count to zero if the condition is false. */ 3395 tcg_gen_movi_i64(t1, 0); 3396 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1); 3397 tcg_temp_free_i64(t1); 3398 3399 /* Since we're bounded, pass as a 32-bit type. */ 3400 t2 = tcg_temp_new_i32(); 3401 tcg_gen_extrl_i64_i32(t2, t0); 3402 tcg_temp_free_i64(t0); 3403 3404 /* Scale elements to bits. */ 3405 tcg_gen_shli_i32(t2, t2, a->esz); 3406 3407 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3408 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3409 3410 ptr = tcg_temp_new_ptr(); 3411 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd)); 3412 3413 if (a->lt) { 3414 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); 3415 } else { 3416 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc)); 3417 } 3418 do_pred_flags(t2); 3419 3420 tcg_temp_free_ptr(ptr); 3421 tcg_temp_free_i32(t2); 3422 return true; 3423 } 3424 3425 static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) 3426 { 3427 TCGv_i64 op0, op1, diff, t1, tmax; 3428 TCGv_i32 t2; 3429 TCGv_ptr ptr; 3430 unsigned vsz = vec_full_reg_size(s); 3431 unsigned desc = 0; 3432 3433 if (!dc_isar_feature(aa64_sve2, s)) { 3434 return false; 3435 } 3436 if (!sve_access_check(s)) { 3437 return true; 3438 } 3439 3440 op0 = read_cpu_reg(s, a->rn, 1); 3441 op1 = read_cpu_reg(s, a->rm, 1); 3442 3443 tmax = tcg_constant_i64(vsz); 3444 diff = tcg_temp_new_i64(); 3445 3446 if (a->rw) { 3447 /* WHILERW */ 3448 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */ 3449 t1 = tcg_temp_new_i64(); 3450 tcg_gen_sub_i64(diff, op0, op1); 3451 tcg_gen_sub_i64(t1, op1, op0); 3452 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1); 3453 tcg_temp_free_i64(t1); 3454 /* Round down to a multiple of ESIZE. */ 3455 tcg_gen_andi_i64(diff, diff, -1 << a->esz); 3456 /* If op1 == op0, diff == 0, and the condition is always true. */ 3457 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff); 3458 } else { 3459 /* WHILEWR */ 3460 tcg_gen_sub_i64(diff, op1, op0); 3461 /* Round down to a multiple of ESIZE. */ 3462 tcg_gen_andi_i64(diff, diff, -1 << a->esz); 3463 /* If op0 >= op1, diff <= 0, the condition is always true. */ 3464 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff); 3465 } 3466 3467 /* Bound to the maximum. */ 3468 tcg_gen_umin_i64(diff, diff, tmax); 3469 3470 /* Since we're bounded, pass as a 32-bit type. */ 3471 t2 = tcg_temp_new_i32(); 3472 tcg_gen_extrl_i64_i32(t2, diff); 3473 tcg_temp_free_i64(diff); 3474 3475 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3476 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3477 3478 ptr = tcg_temp_new_ptr(); 3479 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd)); 3480 3481 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); 3482 do_pred_flags(t2); 3483 3484 tcg_temp_free_ptr(ptr); 3485 tcg_temp_free_i32(t2); 3486 return true; 3487 } 3488 3489 /* 3490 *** SVE Integer Wide Immediate - Unpredicated Group 3491 */ 3492 3493 static bool trans_FDUP(DisasContext *s, arg_FDUP *a) 3494 { 3495 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3496 return false; 3497 } 3498 if (sve_access_check(s)) { 3499 unsigned vsz = vec_full_reg_size(s); 3500 int dofs = vec_full_reg_offset(s, a->rd); 3501 uint64_t imm; 3502 3503 /* Decode the VFP immediate. */ 3504 imm = vfp_expand_imm(a->esz, a->imm); 3505 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm); 3506 } 3507 return true; 3508 } 3509 3510 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a) 3511 { 3512 if (!dc_isar_feature(aa64_sve, s)) { 3513 return false; 3514 } 3515 if (sve_access_check(s)) { 3516 unsigned vsz = vec_full_reg_size(s); 3517 int dofs = vec_full_reg_offset(s, a->rd); 3518 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm); 3519 } 3520 return true; 3521 } 3522 3523 TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a) 3524 3525 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a) 3526 { 3527 a->imm = -a->imm; 3528 return trans_ADD_zzi(s, a); 3529 } 3530 3531 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a) 3532 { 3533 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 }; 3534 static const GVecGen2s op[4] = { 3535 { .fni8 = tcg_gen_vec_sub8_i64, 3536 .fniv = tcg_gen_sub_vec, 3537 .fno = gen_helper_sve_subri_b, 3538 .opt_opc = vecop_list, 3539 .vece = MO_8, 3540 .scalar_first = true }, 3541 { .fni8 = tcg_gen_vec_sub16_i64, 3542 .fniv = tcg_gen_sub_vec, 3543 .fno = gen_helper_sve_subri_h, 3544 .opt_opc = vecop_list, 3545 .vece = MO_16, 3546 .scalar_first = true }, 3547 { .fni4 = tcg_gen_sub_i32, 3548 .fniv = tcg_gen_sub_vec, 3549 .fno = gen_helper_sve_subri_s, 3550 .opt_opc = vecop_list, 3551 .vece = MO_32, 3552 .scalar_first = true }, 3553 { .fni8 = tcg_gen_sub_i64, 3554 .fniv = tcg_gen_sub_vec, 3555 .fno = gen_helper_sve_subri_d, 3556 .opt_opc = vecop_list, 3557 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 3558 .vece = MO_64, 3559 .scalar_first = true } 3560 }; 3561 3562 if (!dc_isar_feature(aa64_sve, s)) { 3563 return false; 3564 } 3565 if (sve_access_check(s)) { 3566 unsigned vsz = vec_full_reg_size(s); 3567 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd), 3568 vec_full_reg_offset(s, a->rn), 3569 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]); 3570 } 3571 return true; 3572 } 3573 3574 TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a) 3575 3576 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d) 3577 { 3578 if (sve_access_check(s)) { 3579 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 3580 tcg_constant_i64(a->imm), u, d); 3581 } 3582 return true; 3583 } 3584 3585 TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false) 3586 TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false) 3587 TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true) 3588 TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true) 3589 3590 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn) 3591 { 3592 if (sve_access_check(s)) { 3593 unsigned vsz = vec_full_reg_size(s); 3594 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 3595 vec_full_reg_offset(s, a->rn), 3596 tcg_constant_i64(a->imm), vsz, vsz, 0, fn); 3597 } 3598 return true; 3599 } 3600 3601 #define DO_ZZI(NAME, name) \ 3602 static gen_helper_gvec_2i * const name##i_fns[4] = { \ 3603 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \ 3604 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \ 3605 }; \ 3606 TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz]) 3607 3608 DO_ZZI(SMAX, smax) 3609 DO_ZZI(UMAX, umax) 3610 DO_ZZI(SMIN, smin) 3611 DO_ZZI(UMIN, umin) 3612 3613 #undef DO_ZZI 3614 3615 static gen_helper_gvec_4 * const dot_fns[2][2] = { 3616 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h }, 3617 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h } 3618 }; 3619 TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz, 3620 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0) 3621 3622 /* 3623 * SVE Multiply - Indexed 3624 */ 3625 3626 TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, 3627 gen_helper_gvec_sdot_idx_b, a) 3628 TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, 3629 gen_helper_gvec_sdot_idx_h, a) 3630 TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, 3631 gen_helper_gvec_udot_idx_b, a) 3632 TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, 3633 gen_helper_gvec_udot_idx_h, a) 3634 3635 TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3636 gen_helper_gvec_sudot_idx_b, a) 3637 TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3638 gen_helper_gvec_usdot_idx_b, a) 3639 3640 #define DO_SVE2_RRX(NAME, FUNC) \ 3641 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3642 a->rd, a->rn, a->rm, a->index) 3643 3644 DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h) 3645 DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s) 3646 DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d) 3647 3648 DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h) 3649 DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s) 3650 DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d) 3651 3652 DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h) 3653 DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s) 3654 DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d) 3655 3656 #undef DO_SVE2_RRX 3657 3658 #define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \ 3659 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3660 a->rd, a->rn, a->rm, (a->index << 1) | TOP) 3661 3662 DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false) 3663 DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false) 3664 DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true) 3665 DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true) 3666 3667 DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false) 3668 DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false) 3669 DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true) 3670 DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true) 3671 3672 DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false) 3673 DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false) 3674 DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true) 3675 DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true) 3676 3677 #undef DO_SVE2_RRX_TB 3678 3679 #define DO_SVE2_RRXR(NAME, FUNC) \ 3680 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a) 3681 3682 DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h) 3683 DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s) 3684 DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d) 3685 3686 DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h) 3687 DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s) 3688 DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d) 3689 3690 DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h) 3691 DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s) 3692 DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d) 3693 3694 DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h) 3695 DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s) 3696 DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d) 3697 3698 #undef DO_SVE2_RRXR 3699 3700 #define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \ 3701 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3702 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP) 3703 3704 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false) 3705 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false) 3706 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true) 3707 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true) 3708 3709 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false) 3710 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false) 3711 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true) 3712 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true) 3713 3714 DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false) 3715 DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false) 3716 DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true) 3717 DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true) 3718 3719 DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false) 3720 DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false) 3721 DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true) 3722 DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true) 3723 3724 DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false) 3725 DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false) 3726 DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true) 3727 DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true) 3728 3729 DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false) 3730 DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false) 3731 DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true) 3732 DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true) 3733 3734 #undef DO_SVE2_RRXR_TB 3735 3736 #define DO_SVE2_RRXR_ROT(NAME, FUNC) \ 3737 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3738 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot) 3739 3740 DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h) 3741 DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s) 3742 3743 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h) 3744 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s) 3745 3746 DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s) 3747 DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) 3748 3749 #undef DO_SVE2_RRXR_ROT 3750 3751 /* 3752 *** SVE Floating Point Multiply-Add Indexed Group 3753 */ 3754 3755 static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) 3756 { 3757 static gen_helper_gvec_4_ptr * const fns[4] = { 3758 NULL, 3759 gen_helper_gvec_fmla_idx_h, 3760 gen_helper_gvec_fmla_idx_s, 3761 gen_helper_gvec_fmla_idx_d, 3762 }; 3763 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, 3764 (a->index << 1) | sub, 3765 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3766 } 3767 3768 TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false) 3769 TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true) 3770 3771 /* 3772 *** SVE Floating Point Multiply Indexed Group 3773 */ 3774 3775 static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = { 3776 NULL, gen_helper_gvec_fmul_idx_h, 3777 gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d, 3778 }; 3779 TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz, 3780 fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index, 3781 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3782 3783 /* 3784 *** SVE Floating Point Fast Reduction Group 3785 */ 3786 3787 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr, 3788 TCGv_ptr, TCGv_i32); 3789 3790 static bool do_reduce(DisasContext *s, arg_rpr_esz *a, 3791 gen_helper_fp_reduce *fn) 3792 { 3793 unsigned vsz, p2vsz; 3794 TCGv_i32 t_desc; 3795 TCGv_ptr t_zn, t_pg, status; 3796 TCGv_i64 temp; 3797 3798 if (fn == NULL) { 3799 return false; 3800 } 3801 if (!sve_access_check(s)) { 3802 return true; 3803 } 3804 3805 vsz = vec_full_reg_size(s); 3806 p2vsz = pow2ceil(vsz); 3807 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz)); 3808 temp = tcg_temp_new_i64(); 3809 t_zn = tcg_temp_new_ptr(); 3810 t_pg = tcg_temp_new_ptr(); 3811 3812 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); 3813 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); 3814 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3815 3816 fn(temp, t_zn, t_pg, status, t_desc); 3817 tcg_temp_free_ptr(t_zn); 3818 tcg_temp_free_ptr(t_pg); 3819 tcg_temp_free_ptr(status); 3820 3821 write_fp_dreg(s, a->rd, temp); 3822 tcg_temp_free_i64(temp); 3823 return true; 3824 } 3825 3826 #define DO_VPZ(NAME, name) \ 3827 static gen_helper_fp_reduce * const name##_fns[4] = { \ 3828 NULL, gen_helper_sve_##name##_h, \ 3829 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3830 }; \ 3831 TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz]) 3832 3833 DO_VPZ(FADDV, faddv) 3834 DO_VPZ(FMINNMV, fminnmv) 3835 DO_VPZ(FMAXNMV, fmaxnmv) 3836 DO_VPZ(FMINV, fminv) 3837 DO_VPZ(FMAXV, fmaxv) 3838 3839 #undef DO_VPZ 3840 3841 /* 3842 *** SVE Floating Point Unary Operations - Unpredicated Group 3843 */ 3844 3845 static gen_helper_gvec_2_ptr * const frecpe_fns[] = { 3846 NULL, gen_helper_gvec_frecpe_h, 3847 gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, 3848 }; 3849 TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0) 3850 3851 static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { 3852 NULL, gen_helper_gvec_frsqrte_h, 3853 gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, 3854 }; 3855 TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0) 3856 3857 /* 3858 *** SVE Floating Point Compare with Zero Group 3859 */ 3860 3861 static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a, 3862 gen_helper_gvec_3_ptr *fn) 3863 { 3864 if (fn == NULL) { 3865 return false; 3866 } 3867 if (sve_access_check(s)) { 3868 unsigned vsz = vec_full_reg_size(s); 3869 TCGv_ptr status = 3870 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3871 3872 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd), 3873 vec_full_reg_offset(s, a->rn), 3874 pred_full_reg_offset(s, a->pg), 3875 status, vsz, vsz, 0, fn); 3876 tcg_temp_free_ptr(status); 3877 } 3878 return true; 3879 } 3880 3881 #define DO_PPZ(NAME, name) \ 3882 static gen_helper_gvec_3_ptr * const name##_fns[] = { \ 3883 NULL, gen_helper_sve_##name##_h, \ 3884 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3885 }; \ 3886 TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz]) 3887 3888 DO_PPZ(FCMGE_ppz0, fcmge0) 3889 DO_PPZ(FCMGT_ppz0, fcmgt0) 3890 DO_PPZ(FCMLE_ppz0, fcmle0) 3891 DO_PPZ(FCMLT_ppz0, fcmlt0) 3892 DO_PPZ(FCMEQ_ppz0, fcmeq0) 3893 DO_PPZ(FCMNE_ppz0, fcmne0) 3894 3895 #undef DO_PPZ 3896 3897 /* 3898 *** SVE floating-point trig multiply-add coefficient 3899 */ 3900 3901 static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { 3902 NULL, gen_helper_sve_ftmad_h, 3903 gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d, 3904 }; 3905 TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, 3906 ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm, 3907 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3908 3909 /* 3910 *** SVE Floating Point Accumulating Reduction Group 3911 */ 3912 3913 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) 3914 { 3915 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr, 3916 TCGv_ptr, TCGv_ptr, TCGv_i32); 3917 static fadda_fn * const fns[3] = { 3918 gen_helper_sve_fadda_h, 3919 gen_helper_sve_fadda_s, 3920 gen_helper_sve_fadda_d, 3921 }; 3922 unsigned vsz = vec_full_reg_size(s); 3923 TCGv_ptr t_rm, t_pg, t_fpst; 3924 TCGv_i64 t_val; 3925 TCGv_i32 t_desc; 3926 3927 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3928 return false; 3929 } 3930 s->is_nonstreaming = true; 3931 if (!sve_access_check(s)) { 3932 return true; 3933 } 3934 3935 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz); 3936 t_rm = tcg_temp_new_ptr(); 3937 t_pg = tcg_temp_new_ptr(); 3938 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm)); 3939 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); 3940 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3941 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 3942 3943 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc); 3944 3945 tcg_temp_free_ptr(t_fpst); 3946 tcg_temp_free_ptr(t_pg); 3947 tcg_temp_free_ptr(t_rm); 3948 3949 write_fp_dreg(s, a->rd, t_val); 3950 tcg_temp_free_i64(t_val); 3951 return true; 3952 } 3953 3954 /* 3955 *** SVE Floating Point Arithmetic - Unpredicated Group 3956 */ 3957 3958 #define DO_FP3(NAME, name) \ 3959 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ 3960 NULL, gen_helper_gvec_##name##_h, \ 3961 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ 3962 }; \ 3963 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0) 3964 3965 DO_FP3(FADD_zzz, fadd) 3966 DO_FP3(FSUB_zzz, fsub) 3967 DO_FP3(FMUL_zzz, fmul) 3968 DO_FP3(FRECPS, recps) 3969 DO_FP3(FRSQRTS, rsqrts) 3970 3971 #undef DO_FP3 3972 3973 static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = { 3974 NULL, gen_helper_gvec_ftsmul_h, 3975 gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d 3976 }; 3977 TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, 3978 ftsmul_fns[a->esz], a, 0) 3979 3980 /* 3981 *** SVE Floating Point Arithmetic - Predicated Group 3982 */ 3983 3984 #define DO_ZPZZ_FP(NAME, FEAT, name) \ 3985 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ 3986 NULL, gen_helper_##name##_h, \ 3987 gen_helper_##name##_s, gen_helper_##name##_d \ 3988 }; \ 3989 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) 3990 3991 DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd) 3992 DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub) 3993 DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul) 3994 DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin) 3995 DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax) 3996 DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) 3997 DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) 3998 DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) 3999 DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn) 4000 DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv) 4001 DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx) 4002 4003 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr, 4004 TCGv_i64, TCGv_ptr, TCGv_i32); 4005 4006 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16, 4007 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn) 4008 { 4009 unsigned vsz = vec_full_reg_size(s); 4010 TCGv_ptr t_zd, t_zn, t_pg, status; 4011 TCGv_i32 desc; 4012 4013 t_zd = tcg_temp_new_ptr(); 4014 t_zn = tcg_temp_new_ptr(); 4015 t_pg = tcg_temp_new_ptr(); 4016 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd)); 4017 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn)); 4018 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 4019 4020 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 4021 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 4022 fn(t_zd, t_zn, t_pg, scalar, status, desc); 4023 4024 tcg_temp_free_ptr(status); 4025 tcg_temp_free_ptr(t_pg); 4026 tcg_temp_free_ptr(t_zn); 4027 tcg_temp_free_ptr(t_zd); 4028 } 4029 4030 static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm, 4031 gen_helper_sve_fp2scalar *fn) 4032 { 4033 if (fn == NULL) { 4034 return false; 4035 } 4036 if (sve_access_check(s)) { 4037 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, 4038 tcg_constant_i64(imm), fn); 4039 } 4040 return true; 4041 } 4042 4043 #define DO_FP_IMM(NAME, name, const0, const1) \ 4044 static gen_helper_sve_fp2scalar * const name##_fns[4] = { \ 4045 NULL, gen_helper_sve_##name##_h, \ 4046 gen_helper_sve_##name##_s, \ 4047 gen_helper_sve_##name##_d \ 4048 }; \ 4049 static uint64_t const name##_const[4][2] = { \ 4050 { -1, -1 }, \ 4051 { float16_##const0, float16_##const1 }, \ 4052 { float32_##const0, float32_##const1 }, \ 4053 { float64_##const0, float64_##const1 }, \ 4054 }; \ 4055 TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ 4056 name##_const[a->esz][a->imm], name##_fns[a->esz]) 4057 4058 DO_FP_IMM(FADD, fadds, half, one) 4059 DO_FP_IMM(FSUB, fsubs, half, one) 4060 DO_FP_IMM(FMUL, fmuls, half, two) 4061 DO_FP_IMM(FSUBR, fsubrs, half, one) 4062 DO_FP_IMM(FMAXNM, fmaxnms, zero, one) 4063 DO_FP_IMM(FMINNM, fminnms, zero, one) 4064 DO_FP_IMM(FMAX, fmaxs, zero, one) 4065 DO_FP_IMM(FMIN, fmins, zero, one) 4066 4067 #undef DO_FP_IMM 4068 4069 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a, 4070 gen_helper_gvec_4_ptr *fn) 4071 { 4072 if (fn == NULL) { 4073 return false; 4074 } 4075 if (sve_access_check(s)) { 4076 unsigned vsz = vec_full_reg_size(s); 4077 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4078 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd), 4079 vec_full_reg_offset(s, a->rn), 4080 vec_full_reg_offset(s, a->rm), 4081 pred_full_reg_offset(s, a->pg), 4082 status, vsz, vsz, 0, fn); 4083 tcg_temp_free_ptr(status); 4084 } 4085 return true; 4086 } 4087 4088 #define DO_FPCMP(NAME, name) \ 4089 static gen_helper_gvec_4_ptr * const name##_fns[4] = { \ 4090 NULL, gen_helper_sve_##name##_h, \ 4091 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 4092 }; \ 4093 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz]) 4094 4095 DO_FPCMP(FCMGE, fcmge) 4096 DO_FPCMP(FCMGT, fcmgt) 4097 DO_FPCMP(FCMEQ, fcmeq) 4098 DO_FPCMP(FCMNE, fcmne) 4099 DO_FPCMP(FCMUO, fcmuo) 4100 DO_FPCMP(FACGE, facge) 4101 DO_FPCMP(FACGT, facgt) 4102 4103 #undef DO_FPCMP 4104 4105 static gen_helper_gvec_4_ptr * const fcadd_fns[] = { 4106 NULL, gen_helper_sve_fcadd_h, 4107 gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d, 4108 }; 4109 TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], 4110 a->rd, a->rn, a->rm, a->pg, a->rot, 4111 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4112 4113 #define DO_FMLA(NAME, name) \ 4114 static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ 4115 NULL, gen_helper_sve_##name##_h, \ 4116 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 4117 }; \ 4118 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \ 4119 a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ 4120 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4121 4122 DO_FMLA(FMLA_zpzzz, fmla_zpzzz) 4123 DO_FMLA(FMLS_zpzzz, fmls_zpzzz) 4124 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz) 4125 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz) 4126 4127 #undef DO_FMLA 4128 4129 static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { 4130 NULL, gen_helper_sve_fcmla_zpzzz_h, 4131 gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, 4132 }; 4133 TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], 4134 a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, 4135 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4136 4137 static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { 4138 NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL 4139 }; 4140 TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz], 4141 a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot, 4142 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4143 4144 /* 4145 *** SVE Floating Point Unary Operations Predicated Group 4146 */ 4147 4148 TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4149 gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR) 4150 TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4151 gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR) 4152 4153 TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 4154 gen_helper_sve_bfcvt, a, 0, FPST_FPCR) 4155 4156 TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4157 gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR) 4158 TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4159 gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR) 4160 TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4161 gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR) 4162 TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4163 gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR) 4164 4165 TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4166 gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16) 4167 TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4168 gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16) 4169 TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4170 gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16) 4171 TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4172 gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16) 4173 TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4174 gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16) 4175 TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4176 gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16) 4177 4178 TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4179 gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR) 4180 TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4181 gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR) 4182 TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4183 gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR) 4184 TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4185 gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR) 4186 TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4187 gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR) 4188 TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4189 gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR) 4190 4191 TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4192 gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR) 4193 TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4194 gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR) 4195 4196 static gen_helper_gvec_3_ptr * const frint_fns[] = { 4197 NULL, 4198 gen_helper_sve_frint_h, 4199 gen_helper_sve_frint_s, 4200 gen_helper_sve_frint_d 4201 }; 4202 TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz], 4203 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4204 4205 static gen_helper_gvec_3_ptr * const frintx_fns[] = { 4206 NULL, 4207 gen_helper_sve_frintx_h, 4208 gen_helper_sve_frintx_s, 4209 gen_helper_sve_frintx_d 4210 }; 4211 TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz], 4212 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4213 4214 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, 4215 int mode, gen_helper_gvec_3_ptr *fn) 4216 { 4217 unsigned vsz; 4218 TCGv_i32 tmode; 4219 TCGv_ptr status; 4220 4221 if (fn == NULL) { 4222 return false; 4223 } 4224 if (!sve_access_check(s)) { 4225 return true; 4226 } 4227 4228 vsz = vec_full_reg_size(s); 4229 tmode = tcg_const_i32(mode); 4230 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4231 4232 gen_helper_set_rmode(tmode, tmode, status); 4233 4234 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 4235 vec_full_reg_offset(s, a->rn), 4236 pred_full_reg_offset(s, a->pg), 4237 status, vsz, vsz, 0, fn); 4238 4239 gen_helper_set_rmode(tmode, tmode, status); 4240 tcg_temp_free_i32(tmode); 4241 tcg_temp_free_ptr(status); 4242 return true; 4243 } 4244 4245 TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a, 4246 float_round_nearest_even, frint_fns[a->esz]) 4247 TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a, 4248 float_round_up, frint_fns[a->esz]) 4249 TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a, 4250 float_round_down, frint_fns[a->esz]) 4251 TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a, 4252 float_round_to_zero, frint_fns[a->esz]) 4253 TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a, 4254 float_round_ties_away, frint_fns[a->esz]) 4255 4256 static gen_helper_gvec_3_ptr * const frecpx_fns[] = { 4257 NULL, gen_helper_sve_frecpx_h, 4258 gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d, 4259 }; 4260 TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz], 4261 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4262 4263 static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { 4264 NULL, gen_helper_sve_fsqrt_h, 4265 gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d, 4266 }; 4267 TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz], 4268 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4269 4270 TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4271 gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16) 4272 TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4273 gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16) 4274 TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4275 gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16) 4276 4277 TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4278 gen_helper_sve_scvt_ss, a, 0, FPST_FPCR) 4279 TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4280 gen_helper_sve_scvt_ds, a, 0, FPST_FPCR) 4281 4282 TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4283 gen_helper_sve_scvt_sd, a, 0, FPST_FPCR) 4284 TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4285 gen_helper_sve_scvt_dd, a, 0, FPST_FPCR) 4286 4287 TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4288 gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16) 4289 TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4290 gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16) 4291 TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4292 gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16) 4293 4294 TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4295 gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR) 4296 TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4297 gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR) 4298 TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4299 gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR) 4300 4301 TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4302 gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR) 4303 4304 /* 4305 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group 4306 */ 4307 4308 /* Subroutine loading a vector register at VOFS of LEN bytes. 4309 * The load should begin at the address Rn + IMM. 4310 */ 4311 4312 void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, 4313 int len, int rn, int imm) 4314 { 4315 int len_align = QEMU_ALIGN_DOWN(len, 8); 4316 int len_remain = len % 8; 4317 int nparts = len / 8 + ctpop8(len_remain); 4318 int midx = get_mem_index(s); 4319 TCGv_i64 dirty_addr, clean_addr, t0, t1; 4320 4321 dirty_addr = tcg_temp_new_i64(); 4322 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4323 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len); 4324 tcg_temp_free_i64(dirty_addr); 4325 4326 /* 4327 * Note that unpredicated load/store of vector/predicate registers 4328 * are defined as a stream of bytes, which equates to little-endian 4329 * operations on larger quantities. 4330 * Attempt to keep code expansion to a minimum by limiting the 4331 * amount of unrolling done. 4332 */ 4333 if (nparts <= 4) { 4334 int i; 4335 4336 t0 = tcg_temp_new_i64(); 4337 for (i = 0; i < len_align; i += 8) { 4338 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ); 4339 tcg_gen_st_i64(t0, base, vofs + i); 4340 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4341 } 4342 tcg_temp_free_i64(t0); 4343 } else { 4344 TCGLabel *loop = gen_new_label(); 4345 TCGv_ptr tp, i = tcg_const_local_ptr(0); 4346 4347 /* Copy the clean address into a local temp, live across the loop. */ 4348 t0 = clean_addr; 4349 clean_addr = new_tmp_a64_local(s); 4350 tcg_gen_mov_i64(clean_addr, t0); 4351 4352 if (base != cpu_env) { 4353 TCGv_ptr b = tcg_temp_local_new_ptr(); 4354 tcg_gen_mov_ptr(b, base); 4355 base = b; 4356 } 4357 4358 gen_set_label(loop); 4359 4360 t0 = tcg_temp_new_i64(); 4361 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ); 4362 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4363 4364 tp = tcg_temp_new_ptr(); 4365 tcg_gen_add_ptr(tp, base, i); 4366 tcg_gen_addi_ptr(i, i, 8); 4367 tcg_gen_st_i64(t0, tp, vofs); 4368 tcg_temp_free_ptr(tp); 4369 tcg_temp_free_i64(t0); 4370 4371 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4372 tcg_temp_free_ptr(i); 4373 4374 if (base != cpu_env) { 4375 tcg_temp_free_ptr(base); 4376 assert(len_remain == 0); 4377 } 4378 } 4379 4380 /* 4381 * Predicate register loads can be any multiple of 2. 4382 * Note that we still store the entire 64-bit unit into cpu_env. 4383 */ 4384 if (len_remain) { 4385 t0 = tcg_temp_new_i64(); 4386 switch (len_remain) { 4387 case 2: 4388 case 4: 4389 case 8: 4390 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, 4391 MO_LE | ctz32(len_remain)); 4392 break; 4393 4394 case 6: 4395 t1 = tcg_temp_new_i64(); 4396 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL); 4397 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4398 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW); 4399 tcg_gen_deposit_i64(t0, t0, t1, 32, 32); 4400 tcg_temp_free_i64(t1); 4401 break; 4402 4403 default: 4404 g_assert_not_reached(); 4405 } 4406 tcg_gen_st_i64(t0, base, vofs + len_align); 4407 tcg_temp_free_i64(t0); 4408 } 4409 } 4410 4411 /* Similarly for stores. */ 4412 void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, 4413 int len, int rn, int imm) 4414 { 4415 int len_align = QEMU_ALIGN_DOWN(len, 8); 4416 int len_remain = len % 8; 4417 int nparts = len / 8 + ctpop8(len_remain); 4418 int midx = get_mem_index(s); 4419 TCGv_i64 dirty_addr, clean_addr, t0; 4420 4421 dirty_addr = tcg_temp_new_i64(); 4422 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4423 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len); 4424 tcg_temp_free_i64(dirty_addr); 4425 4426 /* Note that unpredicated load/store of vector/predicate registers 4427 * are defined as a stream of bytes, which equates to little-endian 4428 * operations on larger quantities. There is no nice way to force 4429 * a little-endian store for aarch64_be-linux-user out of line. 4430 * 4431 * Attempt to keep code expansion to a minimum by limiting the 4432 * amount of unrolling done. 4433 */ 4434 if (nparts <= 4) { 4435 int i; 4436 4437 t0 = tcg_temp_new_i64(); 4438 for (i = 0; i < len_align; i += 8) { 4439 tcg_gen_ld_i64(t0, base, vofs + i); 4440 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ); 4441 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4442 } 4443 tcg_temp_free_i64(t0); 4444 } else { 4445 TCGLabel *loop = gen_new_label(); 4446 TCGv_ptr tp, i = tcg_const_local_ptr(0); 4447 4448 /* Copy the clean address into a local temp, live across the loop. */ 4449 t0 = clean_addr; 4450 clean_addr = new_tmp_a64_local(s); 4451 tcg_gen_mov_i64(clean_addr, t0); 4452 4453 if (base != cpu_env) { 4454 TCGv_ptr b = tcg_temp_local_new_ptr(); 4455 tcg_gen_mov_ptr(b, base); 4456 base = b; 4457 } 4458 4459 gen_set_label(loop); 4460 4461 t0 = tcg_temp_new_i64(); 4462 tp = tcg_temp_new_ptr(); 4463 tcg_gen_add_ptr(tp, base, i); 4464 tcg_gen_ld_i64(t0, tp, vofs); 4465 tcg_gen_addi_ptr(i, i, 8); 4466 tcg_temp_free_ptr(tp); 4467 4468 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ); 4469 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4470 tcg_temp_free_i64(t0); 4471 4472 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4473 tcg_temp_free_ptr(i); 4474 4475 if (base != cpu_env) { 4476 tcg_temp_free_ptr(base); 4477 assert(len_remain == 0); 4478 } 4479 } 4480 4481 /* Predicate register stores can be any multiple of 2. */ 4482 if (len_remain) { 4483 t0 = tcg_temp_new_i64(); 4484 tcg_gen_ld_i64(t0, base, vofs + len_align); 4485 4486 switch (len_remain) { 4487 case 2: 4488 case 4: 4489 case 8: 4490 tcg_gen_qemu_st_i64(t0, clean_addr, midx, 4491 MO_LE | ctz32(len_remain)); 4492 break; 4493 4494 case 6: 4495 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL); 4496 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4497 tcg_gen_shri_i64(t0, t0, 32); 4498 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW); 4499 break; 4500 4501 default: 4502 g_assert_not_reached(); 4503 } 4504 tcg_temp_free_i64(t0); 4505 } 4506 } 4507 4508 static bool trans_LDR_zri(DisasContext *s, arg_rri *a) 4509 { 4510 if (!dc_isar_feature(aa64_sve, s)) { 4511 return false; 4512 } 4513 if (sve_access_check(s)) { 4514 int size = vec_full_reg_size(s); 4515 int off = vec_full_reg_offset(s, a->rd); 4516 gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size); 4517 } 4518 return true; 4519 } 4520 4521 static bool trans_LDR_pri(DisasContext *s, arg_rri *a) 4522 { 4523 if (!dc_isar_feature(aa64_sve, s)) { 4524 return false; 4525 } 4526 if (sve_access_check(s)) { 4527 int size = pred_full_reg_size(s); 4528 int off = pred_full_reg_offset(s, a->rd); 4529 gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size); 4530 } 4531 return true; 4532 } 4533 4534 static bool trans_STR_zri(DisasContext *s, arg_rri *a) 4535 { 4536 if (!dc_isar_feature(aa64_sve, s)) { 4537 return false; 4538 } 4539 if (sve_access_check(s)) { 4540 int size = vec_full_reg_size(s); 4541 int off = vec_full_reg_offset(s, a->rd); 4542 gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size); 4543 } 4544 return true; 4545 } 4546 4547 static bool trans_STR_pri(DisasContext *s, arg_rri *a) 4548 { 4549 if (!dc_isar_feature(aa64_sve, s)) { 4550 return false; 4551 } 4552 if (sve_access_check(s)) { 4553 int size = pred_full_reg_size(s); 4554 int off = pred_full_reg_offset(s, a->rd); 4555 gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size); 4556 } 4557 return true; 4558 } 4559 4560 /* 4561 *** SVE Memory - Contiguous Load Group 4562 */ 4563 4564 /* The memory mode of the dtype. */ 4565 static const MemOp dtype_mop[16] = { 4566 MO_UB, MO_UB, MO_UB, MO_UB, 4567 MO_SL, MO_UW, MO_UW, MO_UW, 4568 MO_SW, MO_SW, MO_UL, MO_UL, 4569 MO_SB, MO_SB, MO_SB, MO_UQ 4570 }; 4571 4572 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE) 4573 4574 /* The vector element size of dtype. */ 4575 static const uint8_t dtype_esz[16] = { 4576 0, 1, 2, 3, 4577 3, 1, 2, 3, 4578 3, 2, 2, 3, 4579 3, 2, 1, 3 4580 }; 4581 4582 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 4583 int dtype, uint32_t mte_n, bool is_write, 4584 gen_helper_gvec_mem *fn) 4585 { 4586 unsigned vsz = vec_full_reg_size(s); 4587 TCGv_ptr t_pg; 4588 int desc = 0; 4589 4590 /* 4591 * For e.g. LD4, there are not enough arguments to pass all 4 4592 * registers as pointers, so encode the regno into the data field. 4593 * For consistency, do this even for LD1. 4594 */ 4595 if (s->mte_active[0]) { 4596 int msz = dtype_msz(dtype); 4597 4598 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 4599 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4600 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4601 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 4602 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1); 4603 desc <<= SVE_MTEDESC_SHIFT; 4604 } else { 4605 addr = clean_data_tbi(s, addr); 4606 } 4607 4608 desc = simd_desc(vsz, vsz, zt | desc); 4609 t_pg = tcg_temp_new_ptr(); 4610 4611 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 4612 fn(cpu_env, t_pg, addr, tcg_constant_i32(desc)); 4613 4614 tcg_temp_free_ptr(t_pg); 4615 } 4616 4617 /* Indexed by [mte][be][dtype][nreg] */ 4618 static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = { 4619 { /* mte inactive, little-endian */ 4620 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4621 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4622 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4623 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4624 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4625 4626 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL }, 4627 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r, 4628 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r }, 4629 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL }, 4630 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL }, 4631 4632 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL }, 4633 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL }, 4634 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r, 4635 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r }, 4636 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL }, 4637 4638 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4639 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4640 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4641 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, 4642 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, 4643 4644 /* mte inactive, big-endian */ 4645 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4646 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4647 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4648 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4649 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4650 4651 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL }, 4652 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r, 4653 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r }, 4654 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL }, 4655 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL }, 4656 4657 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL }, 4658 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL }, 4659 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r, 4660 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r }, 4661 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL }, 4662 4663 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4664 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4665 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4666 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, 4667 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } }, 4668 4669 { /* mte active, little-endian */ 4670 { { gen_helper_sve_ld1bb_r_mte, 4671 gen_helper_sve_ld2bb_r_mte, 4672 gen_helper_sve_ld3bb_r_mte, 4673 gen_helper_sve_ld4bb_r_mte }, 4674 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 4675 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 4676 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 4677 4678 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL }, 4679 { gen_helper_sve_ld1hh_le_r_mte, 4680 gen_helper_sve_ld2hh_le_r_mte, 4681 gen_helper_sve_ld3hh_le_r_mte, 4682 gen_helper_sve_ld4hh_le_r_mte }, 4683 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL }, 4684 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL }, 4685 4686 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL }, 4687 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL }, 4688 { gen_helper_sve_ld1ss_le_r_mte, 4689 gen_helper_sve_ld2ss_le_r_mte, 4690 gen_helper_sve_ld3ss_le_r_mte, 4691 gen_helper_sve_ld4ss_le_r_mte }, 4692 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL }, 4693 4694 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 4695 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 4696 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 4697 { gen_helper_sve_ld1dd_le_r_mte, 4698 gen_helper_sve_ld2dd_le_r_mte, 4699 gen_helper_sve_ld3dd_le_r_mte, 4700 gen_helper_sve_ld4dd_le_r_mte } }, 4701 4702 /* mte active, big-endian */ 4703 { { gen_helper_sve_ld1bb_r_mte, 4704 gen_helper_sve_ld2bb_r_mte, 4705 gen_helper_sve_ld3bb_r_mte, 4706 gen_helper_sve_ld4bb_r_mte }, 4707 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 4708 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 4709 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 4710 4711 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL }, 4712 { gen_helper_sve_ld1hh_be_r_mte, 4713 gen_helper_sve_ld2hh_be_r_mte, 4714 gen_helper_sve_ld3hh_be_r_mte, 4715 gen_helper_sve_ld4hh_be_r_mte }, 4716 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL }, 4717 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL }, 4718 4719 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL }, 4720 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL }, 4721 { gen_helper_sve_ld1ss_be_r_mte, 4722 gen_helper_sve_ld2ss_be_r_mte, 4723 gen_helper_sve_ld3ss_be_r_mte, 4724 gen_helper_sve_ld4ss_be_r_mte }, 4725 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL }, 4726 4727 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 4728 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 4729 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 4730 { gen_helper_sve_ld1dd_be_r_mte, 4731 gen_helper_sve_ld2dd_be_r_mte, 4732 gen_helper_sve_ld3dd_be_r_mte, 4733 gen_helper_sve_ld4dd_be_r_mte } } }, 4734 }; 4735 4736 static void do_ld_zpa(DisasContext *s, int zt, int pg, 4737 TCGv_i64 addr, int dtype, int nreg) 4738 { 4739 gen_helper_gvec_mem *fn 4740 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg]; 4741 4742 /* 4743 * While there are holes in the table, they are not 4744 * accessible via the instruction encoding. 4745 */ 4746 assert(fn != NULL); 4747 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn); 4748 } 4749 4750 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) 4751 { 4752 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 4753 return false; 4754 } 4755 if (sve_access_check(s)) { 4756 TCGv_i64 addr = new_tmp_a64(s); 4757 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4758 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4759 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 4760 } 4761 return true; 4762 } 4763 4764 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a) 4765 { 4766 if (!dc_isar_feature(aa64_sve, s)) { 4767 return false; 4768 } 4769 if (sve_access_check(s)) { 4770 int vsz = vec_full_reg_size(s); 4771 int elements = vsz >> dtype_esz[a->dtype]; 4772 TCGv_i64 addr = new_tmp_a64(s); 4773 4774 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 4775 (a->imm * elements * (a->nreg + 1)) 4776 << dtype_msz(a->dtype)); 4777 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 4778 } 4779 return true; 4780 } 4781 4782 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a) 4783 { 4784 static gen_helper_gvec_mem * const fns[2][2][16] = { 4785 { /* mte inactive, little-endian */ 4786 { gen_helper_sve_ldff1bb_r, 4787 gen_helper_sve_ldff1bhu_r, 4788 gen_helper_sve_ldff1bsu_r, 4789 gen_helper_sve_ldff1bdu_r, 4790 4791 gen_helper_sve_ldff1sds_le_r, 4792 gen_helper_sve_ldff1hh_le_r, 4793 gen_helper_sve_ldff1hsu_le_r, 4794 gen_helper_sve_ldff1hdu_le_r, 4795 4796 gen_helper_sve_ldff1hds_le_r, 4797 gen_helper_sve_ldff1hss_le_r, 4798 gen_helper_sve_ldff1ss_le_r, 4799 gen_helper_sve_ldff1sdu_le_r, 4800 4801 gen_helper_sve_ldff1bds_r, 4802 gen_helper_sve_ldff1bss_r, 4803 gen_helper_sve_ldff1bhs_r, 4804 gen_helper_sve_ldff1dd_le_r }, 4805 4806 /* mte inactive, big-endian */ 4807 { gen_helper_sve_ldff1bb_r, 4808 gen_helper_sve_ldff1bhu_r, 4809 gen_helper_sve_ldff1bsu_r, 4810 gen_helper_sve_ldff1bdu_r, 4811 4812 gen_helper_sve_ldff1sds_be_r, 4813 gen_helper_sve_ldff1hh_be_r, 4814 gen_helper_sve_ldff1hsu_be_r, 4815 gen_helper_sve_ldff1hdu_be_r, 4816 4817 gen_helper_sve_ldff1hds_be_r, 4818 gen_helper_sve_ldff1hss_be_r, 4819 gen_helper_sve_ldff1ss_be_r, 4820 gen_helper_sve_ldff1sdu_be_r, 4821 4822 gen_helper_sve_ldff1bds_r, 4823 gen_helper_sve_ldff1bss_r, 4824 gen_helper_sve_ldff1bhs_r, 4825 gen_helper_sve_ldff1dd_be_r } }, 4826 4827 { /* mte active, little-endian */ 4828 { gen_helper_sve_ldff1bb_r_mte, 4829 gen_helper_sve_ldff1bhu_r_mte, 4830 gen_helper_sve_ldff1bsu_r_mte, 4831 gen_helper_sve_ldff1bdu_r_mte, 4832 4833 gen_helper_sve_ldff1sds_le_r_mte, 4834 gen_helper_sve_ldff1hh_le_r_mte, 4835 gen_helper_sve_ldff1hsu_le_r_mte, 4836 gen_helper_sve_ldff1hdu_le_r_mte, 4837 4838 gen_helper_sve_ldff1hds_le_r_mte, 4839 gen_helper_sve_ldff1hss_le_r_mte, 4840 gen_helper_sve_ldff1ss_le_r_mte, 4841 gen_helper_sve_ldff1sdu_le_r_mte, 4842 4843 gen_helper_sve_ldff1bds_r_mte, 4844 gen_helper_sve_ldff1bss_r_mte, 4845 gen_helper_sve_ldff1bhs_r_mte, 4846 gen_helper_sve_ldff1dd_le_r_mte }, 4847 4848 /* mte active, big-endian */ 4849 { gen_helper_sve_ldff1bb_r_mte, 4850 gen_helper_sve_ldff1bhu_r_mte, 4851 gen_helper_sve_ldff1bsu_r_mte, 4852 gen_helper_sve_ldff1bdu_r_mte, 4853 4854 gen_helper_sve_ldff1sds_be_r_mte, 4855 gen_helper_sve_ldff1hh_be_r_mte, 4856 gen_helper_sve_ldff1hsu_be_r_mte, 4857 gen_helper_sve_ldff1hdu_be_r_mte, 4858 4859 gen_helper_sve_ldff1hds_be_r_mte, 4860 gen_helper_sve_ldff1hss_be_r_mte, 4861 gen_helper_sve_ldff1ss_be_r_mte, 4862 gen_helper_sve_ldff1sdu_be_r_mte, 4863 4864 gen_helper_sve_ldff1bds_r_mte, 4865 gen_helper_sve_ldff1bss_r_mte, 4866 gen_helper_sve_ldff1bhs_r_mte, 4867 gen_helper_sve_ldff1dd_be_r_mte } }, 4868 }; 4869 4870 if (!dc_isar_feature(aa64_sve, s)) { 4871 return false; 4872 } 4873 s->is_nonstreaming = true; 4874 if (sve_access_check(s)) { 4875 TCGv_i64 addr = new_tmp_a64(s); 4876 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4877 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4878 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 4879 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 4880 } 4881 return true; 4882 } 4883 4884 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) 4885 { 4886 static gen_helper_gvec_mem * const fns[2][2][16] = { 4887 { /* mte inactive, little-endian */ 4888 { gen_helper_sve_ldnf1bb_r, 4889 gen_helper_sve_ldnf1bhu_r, 4890 gen_helper_sve_ldnf1bsu_r, 4891 gen_helper_sve_ldnf1bdu_r, 4892 4893 gen_helper_sve_ldnf1sds_le_r, 4894 gen_helper_sve_ldnf1hh_le_r, 4895 gen_helper_sve_ldnf1hsu_le_r, 4896 gen_helper_sve_ldnf1hdu_le_r, 4897 4898 gen_helper_sve_ldnf1hds_le_r, 4899 gen_helper_sve_ldnf1hss_le_r, 4900 gen_helper_sve_ldnf1ss_le_r, 4901 gen_helper_sve_ldnf1sdu_le_r, 4902 4903 gen_helper_sve_ldnf1bds_r, 4904 gen_helper_sve_ldnf1bss_r, 4905 gen_helper_sve_ldnf1bhs_r, 4906 gen_helper_sve_ldnf1dd_le_r }, 4907 4908 /* mte inactive, big-endian */ 4909 { gen_helper_sve_ldnf1bb_r, 4910 gen_helper_sve_ldnf1bhu_r, 4911 gen_helper_sve_ldnf1bsu_r, 4912 gen_helper_sve_ldnf1bdu_r, 4913 4914 gen_helper_sve_ldnf1sds_be_r, 4915 gen_helper_sve_ldnf1hh_be_r, 4916 gen_helper_sve_ldnf1hsu_be_r, 4917 gen_helper_sve_ldnf1hdu_be_r, 4918 4919 gen_helper_sve_ldnf1hds_be_r, 4920 gen_helper_sve_ldnf1hss_be_r, 4921 gen_helper_sve_ldnf1ss_be_r, 4922 gen_helper_sve_ldnf1sdu_be_r, 4923 4924 gen_helper_sve_ldnf1bds_r, 4925 gen_helper_sve_ldnf1bss_r, 4926 gen_helper_sve_ldnf1bhs_r, 4927 gen_helper_sve_ldnf1dd_be_r } }, 4928 4929 { /* mte inactive, little-endian */ 4930 { gen_helper_sve_ldnf1bb_r_mte, 4931 gen_helper_sve_ldnf1bhu_r_mte, 4932 gen_helper_sve_ldnf1bsu_r_mte, 4933 gen_helper_sve_ldnf1bdu_r_mte, 4934 4935 gen_helper_sve_ldnf1sds_le_r_mte, 4936 gen_helper_sve_ldnf1hh_le_r_mte, 4937 gen_helper_sve_ldnf1hsu_le_r_mte, 4938 gen_helper_sve_ldnf1hdu_le_r_mte, 4939 4940 gen_helper_sve_ldnf1hds_le_r_mte, 4941 gen_helper_sve_ldnf1hss_le_r_mte, 4942 gen_helper_sve_ldnf1ss_le_r_mte, 4943 gen_helper_sve_ldnf1sdu_le_r_mte, 4944 4945 gen_helper_sve_ldnf1bds_r_mte, 4946 gen_helper_sve_ldnf1bss_r_mte, 4947 gen_helper_sve_ldnf1bhs_r_mte, 4948 gen_helper_sve_ldnf1dd_le_r_mte }, 4949 4950 /* mte inactive, big-endian */ 4951 { gen_helper_sve_ldnf1bb_r_mte, 4952 gen_helper_sve_ldnf1bhu_r_mte, 4953 gen_helper_sve_ldnf1bsu_r_mte, 4954 gen_helper_sve_ldnf1bdu_r_mte, 4955 4956 gen_helper_sve_ldnf1sds_be_r_mte, 4957 gen_helper_sve_ldnf1hh_be_r_mte, 4958 gen_helper_sve_ldnf1hsu_be_r_mte, 4959 gen_helper_sve_ldnf1hdu_be_r_mte, 4960 4961 gen_helper_sve_ldnf1hds_be_r_mte, 4962 gen_helper_sve_ldnf1hss_be_r_mte, 4963 gen_helper_sve_ldnf1ss_be_r_mte, 4964 gen_helper_sve_ldnf1sdu_be_r_mte, 4965 4966 gen_helper_sve_ldnf1bds_r_mte, 4967 gen_helper_sve_ldnf1bss_r_mte, 4968 gen_helper_sve_ldnf1bhs_r_mte, 4969 gen_helper_sve_ldnf1dd_be_r_mte } }, 4970 }; 4971 4972 if (!dc_isar_feature(aa64_sve, s)) { 4973 return false; 4974 } 4975 s->is_nonstreaming = true; 4976 if (sve_access_check(s)) { 4977 int vsz = vec_full_reg_size(s); 4978 int elements = vsz >> dtype_esz[a->dtype]; 4979 int off = (a->imm * elements) << dtype_msz(a->dtype); 4980 TCGv_i64 addr = new_tmp_a64(s); 4981 4982 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off); 4983 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 4984 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 4985 } 4986 return true; 4987 } 4988 4989 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 4990 { 4991 unsigned vsz = vec_full_reg_size(s); 4992 TCGv_ptr t_pg; 4993 int poff; 4994 4995 /* Load the first quadword using the normal predicated load helpers. */ 4996 poff = pred_full_reg_offset(s, pg); 4997 if (vsz > 16) { 4998 /* 4999 * Zero-extend the first 16 bits of the predicate into a temporary. 5000 * This avoids triggering an assert making sure we don't have bits 5001 * set within a predicate beyond VQ, but we have lowered VQ to 1 5002 * for this load operation. 5003 */ 5004 TCGv_i64 tmp = tcg_temp_new_i64(); 5005 #if HOST_BIG_ENDIAN 5006 poff += 6; 5007 #endif 5008 tcg_gen_ld16u_i64(tmp, cpu_env, poff); 5009 5010 poff = offsetof(CPUARMState, vfp.preg_tmp); 5011 tcg_gen_st_i64(tmp, cpu_env, poff); 5012 tcg_temp_free_i64(tmp); 5013 } 5014 5015 t_pg = tcg_temp_new_ptr(); 5016 tcg_gen_addi_ptr(t_pg, cpu_env, poff); 5017 5018 gen_helper_gvec_mem *fn 5019 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 5020 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt))); 5021 5022 tcg_temp_free_ptr(t_pg); 5023 5024 /* Replicate that first quadword. */ 5025 if (vsz > 16) { 5026 int doff = vec_full_reg_offset(s, zt); 5027 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16); 5028 } 5029 } 5030 5031 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a) 5032 { 5033 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 5034 return false; 5035 } 5036 if (sve_access_check(s)) { 5037 int msz = dtype_msz(a->dtype); 5038 TCGv_i64 addr = new_tmp_a64(s); 5039 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz); 5040 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5041 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 5042 } 5043 return true; 5044 } 5045 5046 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a) 5047 { 5048 if (!dc_isar_feature(aa64_sve, s)) { 5049 return false; 5050 } 5051 if (sve_access_check(s)) { 5052 TCGv_i64 addr = new_tmp_a64(s); 5053 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16); 5054 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 5055 } 5056 return true; 5057 } 5058 5059 static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 5060 { 5061 unsigned vsz = vec_full_reg_size(s); 5062 unsigned vsz_r32; 5063 TCGv_ptr t_pg; 5064 int poff, doff; 5065 5066 if (vsz < 32) { 5067 /* 5068 * Note that this UNDEFINED check comes after CheckSVEEnabled() 5069 * in the ARM pseudocode, which is the sve_access_check() done 5070 * in our caller. We should not now return false from the caller. 5071 */ 5072 unallocated_encoding(s); 5073 return; 5074 } 5075 5076 /* Load the first octaword using the normal predicated load helpers. */ 5077 5078 poff = pred_full_reg_offset(s, pg); 5079 if (vsz > 32) { 5080 /* 5081 * Zero-extend the first 32 bits of the predicate into a temporary. 5082 * This avoids triggering an assert making sure we don't have bits 5083 * set within a predicate beyond VQ, but we have lowered VQ to 2 5084 * for this load operation. 5085 */ 5086 TCGv_i64 tmp = tcg_temp_new_i64(); 5087 #if HOST_BIG_ENDIAN 5088 poff += 4; 5089 #endif 5090 tcg_gen_ld32u_i64(tmp, cpu_env, poff); 5091 5092 poff = offsetof(CPUARMState, vfp.preg_tmp); 5093 tcg_gen_st_i64(tmp, cpu_env, poff); 5094 tcg_temp_free_i64(tmp); 5095 } 5096 5097 t_pg = tcg_temp_new_ptr(); 5098 tcg_gen_addi_ptr(t_pg, cpu_env, poff); 5099 5100 gen_helper_gvec_mem *fn 5101 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 5102 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt))); 5103 5104 tcg_temp_free_ptr(t_pg); 5105 5106 /* 5107 * Replicate that first octaword. 5108 * The replication happens in units of 32; if the full vector size 5109 * is not a multiple of 32, the final bits are zeroed. 5110 */ 5111 doff = vec_full_reg_offset(s, zt); 5112 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32); 5113 if (vsz >= 64) { 5114 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32); 5115 } 5116 vsz -= vsz_r32; 5117 if (vsz) { 5118 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0); 5119 } 5120 } 5121 5122 static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a) 5123 { 5124 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 5125 return false; 5126 } 5127 if (a->rm == 31) { 5128 return false; 5129 } 5130 s->is_nonstreaming = true; 5131 if (sve_access_check(s)) { 5132 TCGv_i64 addr = new_tmp_a64(s); 5133 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 5134 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5135 do_ldro(s, a->rd, a->pg, addr, a->dtype); 5136 } 5137 return true; 5138 } 5139 5140 static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a) 5141 { 5142 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 5143 return false; 5144 } 5145 s->is_nonstreaming = true; 5146 if (sve_access_check(s)) { 5147 TCGv_i64 addr = new_tmp_a64(s); 5148 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32); 5149 do_ldro(s, a->rd, a->pg, addr, a->dtype); 5150 } 5151 return true; 5152 } 5153 5154 /* Load and broadcast element. */ 5155 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) 5156 { 5157 unsigned vsz = vec_full_reg_size(s); 5158 unsigned psz = pred_full_reg_size(s); 5159 unsigned esz = dtype_esz[a->dtype]; 5160 unsigned msz = dtype_msz(a->dtype); 5161 TCGLabel *over; 5162 TCGv_i64 temp, clean_addr; 5163 5164 if (!dc_isar_feature(aa64_sve, s)) { 5165 return false; 5166 } 5167 if (!sve_access_check(s)) { 5168 return true; 5169 } 5170 5171 over = gen_new_label(); 5172 5173 /* If the guarding predicate has no bits set, no load occurs. */ 5174 if (psz <= 8) { 5175 /* Reduce the pred_esz_masks value simply to reduce the 5176 * size of the code generated here. 5177 */ 5178 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8); 5179 temp = tcg_temp_new_i64(); 5180 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg)); 5181 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask); 5182 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over); 5183 tcg_temp_free_i64(temp); 5184 } else { 5185 TCGv_i32 t32 = tcg_temp_new_i32(); 5186 find_last_active(s, t32, esz, a->pg); 5187 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over); 5188 tcg_temp_free_i32(t32); 5189 } 5190 5191 /* Load the data. */ 5192 temp = tcg_temp_new_i64(); 5193 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz); 5194 clean_addr = gen_mte_check1(s, temp, false, true, msz); 5195 5196 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), 5197 finalize_memop(s, dtype_mop[a->dtype])); 5198 5199 /* Broadcast to *all* elements. */ 5200 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 5201 vsz, vsz, temp); 5202 tcg_temp_free_i64(temp); 5203 5204 /* Zero the inactive elements. */ 5205 gen_set_label(over); 5206 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false); 5207 } 5208 5209 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 5210 int msz, int esz, int nreg) 5211 { 5212 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = { 5213 { { { gen_helper_sve_st1bb_r, 5214 gen_helper_sve_st1bh_r, 5215 gen_helper_sve_st1bs_r, 5216 gen_helper_sve_st1bd_r }, 5217 { NULL, 5218 gen_helper_sve_st1hh_le_r, 5219 gen_helper_sve_st1hs_le_r, 5220 gen_helper_sve_st1hd_le_r }, 5221 { NULL, NULL, 5222 gen_helper_sve_st1ss_le_r, 5223 gen_helper_sve_st1sd_le_r }, 5224 { NULL, NULL, NULL, 5225 gen_helper_sve_st1dd_le_r } }, 5226 { { gen_helper_sve_st1bb_r, 5227 gen_helper_sve_st1bh_r, 5228 gen_helper_sve_st1bs_r, 5229 gen_helper_sve_st1bd_r }, 5230 { NULL, 5231 gen_helper_sve_st1hh_be_r, 5232 gen_helper_sve_st1hs_be_r, 5233 gen_helper_sve_st1hd_be_r }, 5234 { NULL, NULL, 5235 gen_helper_sve_st1ss_be_r, 5236 gen_helper_sve_st1sd_be_r }, 5237 { NULL, NULL, NULL, 5238 gen_helper_sve_st1dd_be_r } } }, 5239 5240 { { { gen_helper_sve_st1bb_r_mte, 5241 gen_helper_sve_st1bh_r_mte, 5242 gen_helper_sve_st1bs_r_mte, 5243 gen_helper_sve_st1bd_r_mte }, 5244 { NULL, 5245 gen_helper_sve_st1hh_le_r_mte, 5246 gen_helper_sve_st1hs_le_r_mte, 5247 gen_helper_sve_st1hd_le_r_mte }, 5248 { NULL, NULL, 5249 gen_helper_sve_st1ss_le_r_mte, 5250 gen_helper_sve_st1sd_le_r_mte }, 5251 { NULL, NULL, NULL, 5252 gen_helper_sve_st1dd_le_r_mte } }, 5253 { { gen_helper_sve_st1bb_r_mte, 5254 gen_helper_sve_st1bh_r_mte, 5255 gen_helper_sve_st1bs_r_mte, 5256 gen_helper_sve_st1bd_r_mte }, 5257 { NULL, 5258 gen_helper_sve_st1hh_be_r_mte, 5259 gen_helper_sve_st1hs_be_r_mte, 5260 gen_helper_sve_st1hd_be_r_mte }, 5261 { NULL, NULL, 5262 gen_helper_sve_st1ss_be_r_mte, 5263 gen_helper_sve_st1sd_be_r_mte }, 5264 { NULL, NULL, NULL, 5265 gen_helper_sve_st1dd_be_r_mte } } }, 5266 }; 5267 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = { 5268 { { { gen_helper_sve_st2bb_r, 5269 gen_helper_sve_st2hh_le_r, 5270 gen_helper_sve_st2ss_le_r, 5271 gen_helper_sve_st2dd_le_r }, 5272 { gen_helper_sve_st3bb_r, 5273 gen_helper_sve_st3hh_le_r, 5274 gen_helper_sve_st3ss_le_r, 5275 gen_helper_sve_st3dd_le_r }, 5276 { gen_helper_sve_st4bb_r, 5277 gen_helper_sve_st4hh_le_r, 5278 gen_helper_sve_st4ss_le_r, 5279 gen_helper_sve_st4dd_le_r } }, 5280 { { gen_helper_sve_st2bb_r, 5281 gen_helper_sve_st2hh_be_r, 5282 gen_helper_sve_st2ss_be_r, 5283 gen_helper_sve_st2dd_be_r }, 5284 { gen_helper_sve_st3bb_r, 5285 gen_helper_sve_st3hh_be_r, 5286 gen_helper_sve_st3ss_be_r, 5287 gen_helper_sve_st3dd_be_r }, 5288 { gen_helper_sve_st4bb_r, 5289 gen_helper_sve_st4hh_be_r, 5290 gen_helper_sve_st4ss_be_r, 5291 gen_helper_sve_st4dd_be_r } } }, 5292 { { { gen_helper_sve_st2bb_r_mte, 5293 gen_helper_sve_st2hh_le_r_mte, 5294 gen_helper_sve_st2ss_le_r_mte, 5295 gen_helper_sve_st2dd_le_r_mte }, 5296 { gen_helper_sve_st3bb_r_mte, 5297 gen_helper_sve_st3hh_le_r_mte, 5298 gen_helper_sve_st3ss_le_r_mte, 5299 gen_helper_sve_st3dd_le_r_mte }, 5300 { gen_helper_sve_st4bb_r_mte, 5301 gen_helper_sve_st4hh_le_r_mte, 5302 gen_helper_sve_st4ss_le_r_mte, 5303 gen_helper_sve_st4dd_le_r_mte } }, 5304 { { gen_helper_sve_st2bb_r_mte, 5305 gen_helper_sve_st2hh_be_r_mte, 5306 gen_helper_sve_st2ss_be_r_mte, 5307 gen_helper_sve_st2dd_be_r_mte }, 5308 { gen_helper_sve_st3bb_r_mte, 5309 gen_helper_sve_st3hh_be_r_mte, 5310 gen_helper_sve_st3ss_be_r_mte, 5311 gen_helper_sve_st3dd_be_r_mte }, 5312 { gen_helper_sve_st4bb_r_mte, 5313 gen_helper_sve_st4hh_be_r_mte, 5314 gen_helper_sve_st4ss_be_r_mte, 5315 gen_helper_sve_st4dd_be_r_mte } } }, 5316 }; 5317 gen_helper_gvec_mem *fn; 5318 int be = s->be_data == MO_BE; 5319 5320 if (nreg == 0) { 5321 /* ST1 */ 5322 fn = fn_single[s->mte_active[0]][be][msz][esz]; 5323 nreg = 1; 5324 } else { 5325 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */ 5326 assert(msz == esz); 5327 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz]; 5328 } 5329 assert(fn != NULL); 5330 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn); 5331 } 5332 5333 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) 5334 { 5335 if (!dc_isar_feature(aa64_sve, s)) { 5336 return false; 5337 } 5338 if (a->rm == 31 || a->msz > a->esz) { 5339 return false; 5340 } 5341 if (sve_access_check(s)) { 5342 TCGv_i64 addr = new_tmp_a64(s); 5343 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz); 5344 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5345 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5346 } 5347 return true; 5348 } 5349 5350 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a) 5351 { 5352 if (!dc_isar_feature(aa64_sve, s)) { 5353 return false; 5354 } 5355 if (a->msz > a->esz) { 5356 return false; 5357 } 5358 if (sve_access_check(s)) { 5359 int vsz = vec_full_reg_size(s); 5360 int elements = vsz >> a->esz; 5361 TCGv_i64 addr = new_tmp_a64(s); 5362 5363 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 5364 (a->imm * elements * (a->nreg + 1)) << a->msz); 5365 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5366 } 5367 return true; 5368 } 5369 5370 /* 5371 *** SVE gather loads / scatter stores 5372 */ 5373 5374 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, 5375 int scale, TCGv_i64 scalar, int msz, bool is_write, 5376 gen_helper_gvec_mem_scatter *fn) 5377 { 5378 unsigned vsz = vec_full_reg_size(s); 5379 TCGv_ptr t_zm = tcg_temp_new_ptr(); 5380 TCGv_ptr t_pg = tcg_temp_new_ptr(); 5381 TCGv_ptr t_zt = tcg_temp_new_ptr(); 5382 int desc = 0; 5383 5384 if (s->mte_active[0]) { 5385 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 5386 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 5387 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 5388 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 5389 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1); 5390 desc <<= SVE_MTEDESC_SHIFT; 5391 } 5392 desc = simd_desc(vsz, vsz, desc | scale); 5393 5394 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 5395 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm)); 5396 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt)); 5397 fn(cpu_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc)); 5398 5399 tcg_temp_free_ptr(t_zt); 5400 tcg_temp_free_ptr(t_zm); 5401 tcg_temp_free_ptr(t_pg); 5402 } 5403 5404 /* Indexed by [mte][be][ff][xs][u][msz]. */ 5405 static gen_helper_gvec_mem_scatter * const 5406 gather_load_fn32[2][2][2][2][2][3] = { 5407 { /* MTE Inactive */ 5408 { /* Little-endian */ 5409 { { { gen_helper_sve_ldbss_zsu, 5410 gen_helper_sve_ldhss_le_zsu, 5411 NULL, }, 5412 { gen_helper_sve_ldbsu_zsu, 5413 gen_helper_sve_ldhsu_le_zsu, 5414 gen_helper_sve_ldss_le_zsu, } }, 5415 { { gen_helper_sve_ldbss_zss, 5416 gen_helper_sve_ldhss_le_zss, 5417 NULL, }, 5418 { gen_helper_sve_ldbsu_zss, 5419 gen_helper_sve_ldhsu_le_zss, 5420 gen_helper_sve_ldss_le_zss, } } }, 5421 5422 /* First-fault */ 5423 { { { gen_helper_sve_ldffbss_zsu, 5424 gen_helper_sve_ldffhss_le_zsu, 5425 NULL, }, 5426 { gen_helper_sve_ldffbsu_zsu, 5427 gen_helper_sve_ldffhsu_le_zsu, 5428 gen_helper_sve_ldffss_le_zsu, } }, 5429 { { gen_helper_sve_ldffbss_zss, 5430 gen_helper_sve_ldffhss_le_zss, 5431 NULL, }, 5432 { gen_helper_sve_ldffbsu_zss, 5433 gen_helper_sve_ldffhsu_le_zss, 5434 gen_helper_sve_ldffss_le_zss, } } } }, 5435 5436 { /* Big-endian */ 5437 { { { gen_helper_sve_ldbss_zsu, 5438 gen_helper_sve_ldhss_be_zsu, 5439 NULL, }, 5440 { gen_helper_sve_ldbsu_zsu, 5441 gen_helper_sve_ldhsu_be_zsu, 5442 gen_helper_sve_ldss_be_zsu, } }, 5443 { { gen_helper_sve_ldbss_zss, 5444 gen_helper_sve_ldhss_be_zss, 5445 NULL, }, 5446 { gen_helper_sve_ldbsu_zss, 5447 gen_helper_sve_ldhsu_be_zss, 5448 gen_helper_sve_ldss_be_zss, } } }, 5449 5450 /* First-fault */ 5451 { { { gen_helper_sve_ldffbss_zsu, 5452 gen_helper_sve_ldffhss_be_zsu, 5453 NULL, }, 5454 { gen_helper_sve_ldffbsu_zsu, 5455 gen_helper_sve_ldffhsu_be_zsu, 5456 gen_helper_sve_ldffss_be_zsu, } }, 5457 { { gen_helper_sve_ldffbss_zss, 5458 gen_helper_sve_ldffhss_be_zss, 5459 NULL, }, 5460 { gen_helper_sve_ldffbsu_zss, 5461 gen_helper_sve_ldffhsu_be_zss, 5462 gen_helper_sve_ldffss_be_zss, } } } } }, 5463 { /* MTE Active */ 5464 { /* Little-endian */ 5465 { { { gen_helper_sve_ldbss_zsu_mte, 5466 gen_helper_sve_ldhss_le_zsu_mte, 5467 NULL, }, 5468 { gen_helper_sve_ldbsu_zsu_mte, 5469 gen_helper_sve_ldhsu_le_zsu_mte, 5470 gen_helper_sve_ldss_le_zsu_mte, } }, 5471 { { gen_helper_sve_ldbss_zss_mte, 5472 gen_helper_sve_ldhss_le_zss_mte, 5473 NULL, }, 5474 { gen_helper_sve_ldbsu_zss_mte, 5475 gen_helper_sve_ldhsu_le_zss_mte, 5476 gen_helper_sve_ldss_le_zss_mte, } } }, 5477 5478 /* First-fault */ 5479 { { { gen_helper_sve_ldffbss_zsu_mte, 5480 gen_helper_sve_ldffhss_le_zsu_mte, 5481 NULL, }, 5482 { gen_helper_sve_ldffbsu_zsu_mte, 5483 gen_helper_sve_ldffhsu_le_zsu_mte, 5484 gen_helper_sve_ldffss_le_zsu_mte, } }, 5485 { { gen_helper_sve_ldffbss_zss_mte, 5486 gen_helper_sve_ldffhss_le_zss_mte, 5487 NULL, }, 5488 { gen_helper_sve_ldffbsu_zss_mte, 5489 gen_helper_sve_ldffhsu_le_zss_mte, 5490 gen_helper_sve_ldffss_le_zss_mte, } } } }, 5491 5492 { /* Big-endian */ 5493 { { { gen_helper_sve_ldbss_zsu_mte, 5494 gen_helper_sve_ldhss_be_zsu_mte, 5495 NULL, }, 5496 { gen_helper_sve_ldbsu_zsu_mte, 5497 gen_helper_sve_ldhsu_be_zsu_mte, 5498 gen_helper_sve_ldss_be_zsu_mte, } }, 5499 { { gen_helper_sve_ldbss_zss_mte, 5500 gen_helper_sve_ldhss_be_zss_mte, 5501 NULL, }, 5502 { gen_helper_sve_ldbsu_zss_mte, 5503 gen_helper_sve_ldhsu_be_zss_mte, 5504 gen_helper_sve_ldss_be_zss_mte, } } }, 5505 5506 /* First-fault */ 5507 { { { gen_helper_sve_ldffbss_zsu_mte, 5508 gen_helper_sve_ldffhss_be_zsu_mte, 5509 NULL, }, 5510 { gen_helper_sve_ldffbsu_zsu_mte, 5511 gen_helper_sve_ldffhsu_be_zsu_mte, 5512 gen_helper_sve_ldffss_be_zsu_mte, } }, 5513 { { gen_helper_sve_ldffbss_zss_mte, 5514 gen_helper_sve_ldffhss_be_zss_mte, 5515 NULL, }, 5516 { gen_helper_sve_ldffbsu_zss_mte, 5517 gen_helper_sve_ldffhsu_be_zss_mte, 5518 gen_helper_sve_ldffss_be_zss_mte, } } } } }, 5519 }; 5520 5521 /* Note that we overload xs=2 to indicate 64-bit offset. */ 5522 static gen_helper_gvec_mem_scatter * const 5523 gather_load_fn64[2][2][2][3][2][4] = { 5524 { /* MTE Inactive */ 5525 { /* Little-endian */ 5526 { { { gen_helper_sve_ldbds_zsu, 5527 gen_helper_sve_ldhds_le_zsu, 5528 gen_helper_sve_ldsds_le_zsu, 5529 NULL, }, 5530 { gen_helper_sve_ldbdu_zsu, 5531 gen_helper_sve_ldhdu_le_zsu, 5532 gen_helper_sve_ldsdu_le_zsu, 5533 gen_helper_sve_lddd_le_zsu, } }, 5534 { { gen_helper_sve_ldbds_zss, 5535 gen_helper_sve_ldhds_le_zss, 5536 gen_helper_sve_ldsds_le_zss, 5537 NULL, }, 5538 { gen_helper_sve_ldbdu_zss, 5539 gen_helper_sve_ldhdu_le_zss, 5540 gen_helper_sve_ldsdu_le_zss, 5541 gen_helper_sve_lddd_le_zss, } }, 5542 { { gen_helper_sve_ldbds_zd, 5543 gen_helper_sve_ldhds_le_zd, 5544 gen_helper_sve_ldsds_le_zd, 5545 NULL, }, 5546 { gen_helper_sve_ldbdu_zd, 5547 gen_helper_sve_ldhdu_le_zd, 5548 gen_helper_sve_ldsdu_le_zd, 5549 gen_helper_sve_lddd_le_zd, } } }, 5550 5551 /* First-fault */ 5552 { { { gen_helper_sve_ldffbds_zsu, 5553 gen_helper_sve_ldffhds_le_zsu, 5554 gen_helper_sve_ldffsds_le_zsu, 5555 NULL, }, 5556 { gen_helper_sve_ldffbdu_zsu, 5557 gen_helper_sve_ldffhdu_le_zsu, 5558 gen_helper_sve_ldffsdu_le_zsu, 5559 gen_helper_sve_ldffdd_le_zsu, } }, 5560 { { gen_helper_sve_ldffbds_zss, 5561 gen_helper_sve_ldffhds_le_zss, 5562 gen_helper_sve_ldffsds_le_zss, 5563 NULL, }, 5564 { gen_helper_sve_ldffbdu_zss, 5565 gen_helper_sve_ldffhdu_le_zss, 5566 gen_helper_sve_ldffsdu_le_zss, 5567 gen_helper_sve_ldffdd_le_zss, } }, 5568 { { gen_helper_sve_ldffbds_zd, 5569 gen_helper_sve_ldffhds_le_zd, 5570 gen_helper_sve_ldffsds_le_zd, 5571 NULL, }, 5572 { gen_helper_sve_ldffbdu_zd, 5573 gen_helper_sve_ldffhdu_le_zd, 5574 gen_helper_sve_ldffsdu_le_zd, 5575 gen_helper_sve_ldffdd_le_zd, } } } }, 5576 { /* Big-endian */ 5577 { { { gen_helper_sve_ldbds_zsu, 5578 gen_helper_sve_ldhds_be_zsu, 5579 gen_helper_sve_ldsds_be_zsu, 5580 NULL, }, 5581 { gen_helper_sve_ldbdu_zsu, 5582 gen_helper_sve_ldhdu_be_zsu, 5583 gen_helper_sve_ldsdu_be_zsu, 5584 gen_helper_sve_lddd_be_zsu, } }, 5585 { { gen_helper_sve_ldbds_zss, 5586 gen_helper_sve_ldhds_be_zss, 5587 gen_helper_sve_ldsds_be_zss, 5588 NULL, }, 5589 { gen_helper_sve_ldbdu_zss, 5590 gen_helper_sve_ldhdu_be_zss, 5591 gen_helper_sve_ldsdu_be_zss, 5592 gen_helper_sve_lddd_be_zss, } }, 5593 { { gen_helper_sve_ldbds_zd, 5594 gen_helper_sve_ldhds_be_zd, 5595 gen_helper_sve_ldsds_be_zd, 5596 NULL, }, 5597 { gen_helper_sve_ldbdu_zd, 5598 gen_helper_sve_ldhdu_be_zd, 5599 gen_helper_sve_ldsdu_be_zd, 5600 gen_helper_sve_lddd_be_zd, } } }, 5601 5602 /* First-fault */ 5603 { { { gen_helper_sve_ldffbds_zsu, 5604 gen_helper_sve_ldffhds_be_zsu, 5605 gen_helper_sve_ldffsds_be_zsu, 5606 NULL, }, 5607 { gen_helper_sve_ldffbdu_zsu, 5608 gen_helper_sve_ldffhdu_be_zsu, 5609 gen_helper_sve_ldffsdu_be_zsu, 5610 gen_helper_sve_ldffdd_be_zsu, } }, 5611 { { gen_helper_sve_ldffbds_zss, 5612 gen_helper_sve_ldffhds_be_zss, 5613 gen_helper_sve_ldffsds_be_zss, 5614 NULL, }, 5615 { gen_helper_sve_ldffbdu_zss, 5616 gen_helper_sve_ldffhdu_be_zss, 5617 gen_helper_sve_ldffsdu_be_zss, 5618 gen_helper_sve_ldffdd_be_zss, } }, 5619 { { gen_helper_sve_ldffbds_zd, 5620 gen_helper_sve_ldffhds_be_zd, 5621 gen_helper_sve_ldffsds_be_zd, 5622 NULL, }, 5623 { gen_helper_sve_ldffbdu_zd, 5624 gen_helper_sve_ldffhdu_be_zd, 5625 gen_helper_sve_ldffsdu_be_zd, 5626 gen_helper_sve_ldffdd_be_zd, } } } } }, 5627 { /* MTE Active */ 5628 { /* Little-endian */ 5629 { { { gen_helper_sve_ldbds_zsu_mte, 5630 gen_helper_sve_ldhds_le_zsu_mte, 5631 gen_helper_sve_ldsds_le_zsu_mte, 5632 NULL, }, 5633 { gen_helper_sve_ldbdu_zsu_mte, 5634 gen_helper_sve_ldhdu_le_zsu_mte, 5635 gen_helper_sve_ldsdu_le_zsu_mte, 5636 gen_helper_sve_lddd_le_zsu_mte, } }, 5637 { { gen_helper_sve_ldbds_zss_mte, 5638 gen_helper_sve_ldhds_le_zss_mte, 5639 gen_helper_sve_ldsds_le_zss_mte, 5640 NULL, }, 5641 { gen_helper_sve_ldbdu_zss_mte, 5642 gen_helper_sve_ldhdu_le_zss_mte, 5643 gen_helper_sve_ldsdu_le_zss_mte, 5644 gen_helper_sve_lddd_le_zss_mte, } }, 5645 { { gen_helper_sve_ldbds_zd_mte, 5646 gen_helper_sve_ldhds_le_zd_mte, 5647 gen_helper_sve_ldsds_le_zd_mte, 5648 NULL, }, 5649 { gen_helper_sve_ldbdu_zd_mte, 5650 gen_helper_sve_ldhdu_le_zd_mte, 5651 gen_helper_sve_ldsdu_le_zd_mte, 5652 gen_helper_sve_lddd_le_zd_mte, } } }, 5653 5654 /* First-fault */ 5655 { { { gen_helper_sve_ldffbds_zsu_mte, 5656 gen_helper_sve_ldffhds_le_zsu_mte, 5657 gen_helper_sve_ldffsds_le_zsu_mte, 5658 NULL, }, 5659 { gen_helper_sve_ldffbdu_zsu_mte, 5660 gen_helper_sve_ldffhdu_le_zsu_mte, 5661 gen_helper_sve_ldffsdu_le_zsu_mte, 5662 gen_helper_sve_ldffdd_le_zsu_mte, } }, 5663 { { gen_helper_sve_ldffbds_zss_mte, 5664 gen_helper_sve_ldffhds_le_zss_mte, 5665 gen_helper_sve_ldffsds_le_zss_mte, 5666 NULL, }, 5667 { gen_helper_sve_ldffbdu_zss_mte, 5668 gen_helper_sve_ldffhdu_le_zss_mte, 5669 gen_helper_sve_ldffsdu_le_zss_mte, 5670 gen_helper_sve_ldffdd_le_zss_mte, } }, 5671 { { gen_helper_sve_ldffbds_zd_mte, 5672 gen_helper_sve_ldffhds_le_zd_mte, 5673 gen_helper_sve_ldffsds_le_zd_mte, 5674 NULL, }, 5675 { gen_helper_sve_ldffbdu_zd_mte, 5676 gen_helper_sve_ldffhdu_le_zd_mte, 5677 gen_helper_sve_ldffsdu_le_zd_mte, 5678 gen_helper_sve_ldffdd_le_zd_mte, } } } }, 5679 { /* Big-endian */ 5680 { { { gen_helper_sve_ldbds_zsu_mte, 5681 gen_helper_sve_ldhds_be_zsu_mte, 5682 gen_helper_sve_ldsds_be_zsu_mte, 5683 NULL, }, 5684 { gen_helper_sve_ldbdu_zsu_mte, 5685 gen_helper_sve_ldhdu_be_zsu_mte, 5686 gen_helper_sve_ldsdu_be_zsu_mte, 5687 gen_helper_sve_lddd_be_zsu_mte, } }, 5688 { { gen_helper_sve_ldbds_zss_mte, 5689 gen_helper_sve_ldhds_be_zss_mte, 5690 gen_helper_sve_ldsds_be_zss_mte, 5691 NULL, }, 5692 { gen_helper_sve_ldbdu_zss_mte, 5693 gen_helper_sve_ldhdu_be_zss_mte, 5694 gen_helper_sve_ldsdu_be_zss_mte, 5695 gen_helper_sve_lddd_be_zss_mte, } }, 5696 { { gen_helper_sve_ldbds_zd_mte, 5697 gen_helper_sve_ldhds_be_zd_mte, 5698 gen_helper_sve_ldsds_be_zd_mte, 5699 NULL, }, 5700 { gen_helper_sve_ldbdu_zd_mte, 5701 gen_helper_sve_ldhdu_be_zd_mte, 5702 gen_helper_sve_ldsdu_be_zd_mte, 5703 gen_helper_sve_lddd_be_zd_mte, } } }, 5704 5705 /* First-fault */ 5706 { { { gen_helper_sve_ldffbds_zsu_mte, 5707 gen_helper_sve_ldffhds_be_zsu_mte, 5708 gen_helper_sve_ldffsds_be_zsu_mte, 5709 NULL, }, 5710 { gen_helper_sve_ldffbdu_zsu_mte, 5711 gen_helper_sve_ldffhdu_be_zsu_mte, 5712 gen_helper_sve_ldffsdu_be_zsu_mte, 5713 gen_helper_sve_ldffdd_be_zsu_mte, } }, 5714 { { gen_helper_sve_ldffbds_zss_mte, 5715 gen_helper_sve_ldffhds_be_zss_mte, 5716 gen_helper_sve_ldffsds_be_zss_mte, 5717 NULL, }, 5718 { gen_helper_sve_ldffbdu_zss_mte, 5719 gen_helper_sve_ldffhdu_be_zss_mte, 5720 gen_helper_sve_ldffsdu_be_zss_mte, 5721 gen_helper_sve_ldffdd_be_zss_mte, } }, 5722 { { gen_helper_sve_ldffbds_zd_mte, 5723 gen_helper_sve_ldffhds_be_zd_mte, 5724 gen_helper_sve_ldffsds_be_zd_mte, 5725 NULL, }, 5726 { gen_helper_sve_ldffbdu_zd_mte, 5727 gen_helper_sve_ldffhdu_be_zd_mte, 5728 gen_helper_sve_ldffsdu_be_zd_mte, 5729 gen_helper_sve_ldffdd_be_zd_mte, } } } } }, 5730 }; 5731 5732 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) 5733 { 5734 gen_helper_gvec_mem_scatter *fn = NULL; 5735 bool be = s->be_data == MO_BE; 5736 bool mte = s->mte_active[0]; 5737 5738 if (!dc_isar_feature(aa64_sve, s)) { 5739 return false; 5740 } 5741 s->is_nonstreaming = true; 5742 if (!sve_access_check(s)) { 5743 return true; 5744 } 5745 5746 switch (a->esz) { 5747 case MO_32: 5748 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz]; 5749 break; 5750 case MO_64: 5751 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz]; 5752 break; 5753 } 5754 assert(fn != NULL); 5755 5756 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 5757 cpu_reg_sp(s, a->rn), a->msz, false, fn); 5758 return true; 5759 } 5760 5761 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) 5762 { 5763 gen_helper_gvec_mem_scatter *fn = NULL; 5764 bool be = s->be_data == MO_BE; 5765 bool mte = s->mte_active[0]; 5766 5767 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) { 5768 return false; 5769 } 5770 if (!dc_isar_feature(aa64_sve, s)) { 5771 return false; 5772 } 5773 s->is_nonstreaming = true; 5774 if (!sve_access_check(s)) { 5775 return true; 5776 } 5777 5778 switch (a->esz) { 5779 case MO_32: 5780 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz]; 5781 break; 5782 case MO_64: 5783 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz]; 5784 break; 5785 } 5786 assert(fn != NULL); 5787 5788 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x]) 5789 * by loading the immediate into the scalar parameter. 5790 */ 5791 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5792 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn); 5793 return true; 5794 } 5795 5796 static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a) 5797 { 5798 gen_helper_gvec_mem_scatter *fn = NULL; 5799 bool be = s->be_data == MO_BE; 5800 bool mte = s->mte_active[0]; 5801 5802 if (a->esz < a->msz + !a->u) { 5803 return false; 5804 } 5805 if (!dc_isar_feature(aa64_sve2, s)) { 5806 return false; 5807 } 5808 s->is_nonstreaming = true; 5809 if (!sve_access_check(s)) { 5810 return true; 5811 } 5812 5813 switch (a->esz) { 5814 case MO_32: 5815 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz]; 5816 break; 5817 case MO_64: 5818 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz]; 5819 break; 5820 } 5821 assert(fn != NULL); 5822 5823 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5824 cpu_reg(s, a->rm), a->msz, false, fn); 5825 return true; 5826 } 5827 5828 /* Indexed by [mte][be][xs][msz]. */ 5829 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = { 5830 { /* MTE Inactive */ 5831 { /* Little-endian */ 5832 { gen_helper_sve_stbs_zsu, 5833 gen_helper_sve_sths_le_zsu, 5834 gen_helper_sve_stss_le_zsu, }, 5835 { gen_helper_sve_stbs_zss, 5836 gen_helper_sve_sths_le_zss, 5837 gen_helper_sve_stss_le_zss, } }, 5838 { /* Big-endian */ 5839 { gen_helper_sve_stbs_zsu, 5840 gen_helper_sve_sths_be_zsu, 5841 gen_helper_sve_stss_be_zsu, }, 5842 { gen_helper_sve_stbs_zss, 5843 gen_helper_sve_sths_be_zss, 5844 gen_helper_sve_stss_be_zss, } } }, 5845 { /* MTE Active */ 5846 { /* Little-endian */ 5847 { gen_helper_sve_stbs_zsu_mte, 5848 gen_helper_sve_sths_le_zsu_mte, 5849 gen_helper_sve_stss_le_zsu_mte, }, 5850 { gen_helper_sve_stbs_zss_mte, 5851 gen_helper_sve_sths_le_zss_mte, 5852 gen_helper_sve_stss_le_zss_mte, } }, 5853 { /* Big-endian */ 5854 { gen_helper_sve_stbs_zsu_mte, 5855 gen_helper_sve_sths_be_zsu_mte, 5856 gen_helper_sve_stss_be_zsu_mte, }, 5857 { gen_helper_sve_stbs_zss_mte, 5858 gen_helper_sve_sths_be_zss_mte, 5859 gen_helper_sve_stss_be_zss_mte, } } }, 5860 }; 5861 5862 /* Note that we overload xs=2 to indicate 64-bit offset. */ 5863 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = { 5864 { /* MTE Inactive */ 5865 { /* Little-endian */ 5866 { gen_helper_sve_stbd_zsu, 5867 gen_helper_sve_sthd_le_zsu, 5868 gen_helper_sve_stsd_le_zsu, 5869 gen_helper_sve_stdd_le_zsu, }, 5870 { gen_helper_sve_stbd_zss, 5871 gen_helper_sve_sthd_le_zss, 5872 gen_helper_sve_stsd_le_zss, 5873 gen_helper_sve_stdd_le_zss, }, 5874 { gen_helper_sve_stbd_zd, 5875 gen_helper_sve_sthd_le_zd, 5876 gen_helper_sve_stsd_le_zd, 5877 gen_helper_sve_stdd_le_zd, } }, 5878 { /* Big-endian */ 5879 { gen_helper_sve_stbd_zsu, 5880 gen_helper_sve_sthd_be_zsu, 5881 gen_helper_sve_stsd_be_zsu, 5882 gen_helper_sve_stdd_be_zsu, }, 5883 { gen_helper_sve_stbd_zss, 5884 gen_helper_sve_sthd_be_zss, 5885 gen_helper_sve_stsd_be_zss, 5886 gen_helper_sve_stdd_be_zss, }, 5887 { gen_helper_sve_stbd_zd, 5888 gen_helper_sve_sthd_be_zd, 5889 gen_helper_sve_stsd_be_zd, 5890 gen_helper_sve_stdd_be_zd, } } }, 5891 { /* MTE Inactive */ 5892 { /* Little-endian */ 5893 { gen_helper_sve_stbd_zsu_mte, 5894 gen_helper_sve_sthd_le_zsu_mte, 5895 gen_helper_sve_stsd_le_zsu_mte, 5896 gen_helper_sve_stdd_le_zsu_mte, }, 5897 { gen_helper_sve_stbd_zss_mte, 5898 gen_helper_sve_sthd_le_zss_mte, 5899 gen_helper_sve_stsd_le_zss_mte, 5900 gen_helper_sve_stdd_le_zss_mte, }, 5901 { gen_helper_sve_stbd_zd_mte, 5902 gen_helper_sve_sthd_le_zd_mte, 5903 gen_helper_sve_stsd_le_zd_mte, 5904 gen_helper_sve_stdd_le_zd_mte, } }, 5905 { /* Big-endian */ 5906 { gen_helper_sve_stbd_zsu_mte, 5907 gen_helper_sve_sthd_be_zsu_mte, 5908 gen_helper_sve_stsd_be_zsu_mte, 5909 gen_helper_sve_stdd_be_zsu_mte, }, 5910 { gen_helper_sve_stbd_zss_mte, 5911 gen_helper_sve_sthd_be_zss_mte, 5912 gen_helper_sve_stsd_be_zss_mte, 5913 gen_helper_sve_stdd_be_zss_mte, }, 5914 { gen_helper_sve_stbd_zd_mte, 5915 gen_helper_sve_sthd_be_zd_mte, 5916 gen_helper_sve_stsd_be_zd_mte, 5917 gen_helper_sve_stdd_be_zd_mte, } } }, 5918 }; 5919 5920 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) 5921 { 5922 gen_helper_gvec_mem_scatter *fn; 5923 bool be = s->be_data == MO_BE; 5924 bool mte = s->mte_active[0]; 5925 5926 if (a->esz < a->msz || (a->msz == 0 && a->scale)) { 5927 return false; 5928 } 5929 if (!dc_isar_feature(aa64_sve, s)) { 5930 return false; 5931 } 5932 s->is_nonstreaming = true; 5933 if (!sve_access_check(s)) { 5934 return true; 5935 } 5936 switch (a->esz) { 5937 case MO_32: 5938 fn = scatter_store_fn32[mte][be][a->xs][a->msz]; 5939 break; 5940 case MO_64: 5941 fn = scatter_store_fn64[mte][be][a->xs][a->msz]; 5942 break; 5943 default: 5944 g_assert_not_reached(); 5945 } 5946 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 5947 cpu_reg_sp(s, a->rn), a->msz, true, fn); 5948 return true; 5949 } 5950 5951 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) 5952 { 5953 gen_helper_gvec_mem_scatter *fn = NULL; 5954 bool be = s->be_data == MO_BE; 5955 bool mte = s->mte_active[0]; 5956 5957 if (a->esz < a->msz) { 5958 return false; 5959 } 5960 if (!dc_isar_feature(aa64_sve, s)) { 5961 return false; 5962 } 5963 s->is_nonstreaming = true; 5964 if (!sve_access_check(s)) { 5965 return true; 5966 } 5967 5968 switch (a->esz) { 5969 case MO_32: 5970 fn = scatter_store_fn32[mte][be][0][a->msz]; 5971 break; 5972 case MO_64: 5973 fn = scatter_store_fn64[mte][be][2][a->msz]; 5974 break; 5975 } 5976 assert(fn != NULL); 5977 5978 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x]) 5979 * by loading the immediate into the scalar parameter. 5980 */ 5981 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5982 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn); 5983 return true; 5984 } 5985 5986 static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a) 5987 { 5988 gen_helper_gvec_mem_scatter *fn; 5989 bool be = s->be_data == MO_BE; 5990 bool mte = s->mte_active[0]; 5991 5992 if (a->esz < a->msz) { 5993 return false; 5994 } 5995 if (!dc_isar_feature(aa64_sve2, s)) { 5996 return false; 5997 } 5998 s->is_nonstreaming = true; 5999 if (!sve_access_check(s)) { 6000 return true; 6001 } 6002 6003 switch (a->esz) { 6004 case MO_32: 6005 fn = scatter_store_fn32[mte][be][0][a->msz]; 6006 break; 6007 case MO_64: 6008 fn = scatter_store_fn64[mte][be][2][a->msz]; 6009 break; 6010 default: 6011 g_assert_not_reached(); 6012 } 6013 6014 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 6015 cpu_reg(s, a->rm), a->msz, true, fn); 6016 return true; 6017 } 6018 6019 /* 6020 * Prefetches 6021 */ 6022 6023 static bool trans_PRF(DisasContext *s, arg_PRF *a) 6024 { 6025 if (!dc_isar_feature(aa64_sve, s)) { 6026 return false; 6027 } 6028 /* Prefetch is a nop within QEMU. */ 6029 (void)sve_access_check(s); 6030 return true; 6031 } 6032 6033 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a) 6034 { 6035 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 6036 return false; 6037 } 6038 /* Prefetch is a nop within QEMU. */ 6039 (void)sve_access_check(s); 6040 return true; 6041 } 6042 6043 static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a) 6044 { 6045 if (!dc_isar_feature(aa64_sve, s)) { 6046 return false; 6047 } 6048 /* Prefetch is a nop within QEMU. */ 6049 s->is_nonstreaming = true; 6050 (void)sve_access_check(s); 6051 return true; 6052 } 6053 6054 /* 6055 * Move Prefix 6056 * 6057 * TODO: The implementation so far could handle predicated merging movprfx. 6058 * The helper functions as written take an extra source register to 6059 * use in the operation, but the result is only written when predication 6060 * succeeds. For unpredicated movprfx, we need to rearrange the helpers 6061 * to allow the final write back to the destination to be unconditional. 6062 * For predicated zeroing movprfx, we need to rearrange the helpers to 6063 * allow the final write back to zero inactives. 6064 * 6065 * In the meantime, just emit the moves. 6066 */ 6067 6068 TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn) 6069 TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz) 6070 TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false) 6071 6072 /* 6073 * SVE2 Integer Multiply - Unpredicated 6074 */ 6075 6076 TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a) 6077 6078 static gen_helper_gvec_3 * const smulh_zzz_fns[4] = { 6079 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h, 6080 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d, 6081 }; 6082 TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6083 smulh_zzz_fns[a->esz], a, 0) 6084 6085 static gen_helper_gvec_3 * const umulh_zzz_fns[4] = { 6086 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h, 6087 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d, 6088 }; 6089 TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6090 umulh_zzz_fns[a->esz], a, 0) 6091 6092 TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6093 gen_helper_gvec_pmul_b, a, 0) 6094 6095 static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = { 6096 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h, 6097 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d, 6098 }; 6099 TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6100 sqdmulh_zzz_fns[a->esz], a, 0) 6101 6102 static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = { 6103 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h, 6104 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d, 6105 }; 6106 TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6107 sqrdmulh_zzz_fns[a->esz], a, 0) 6108 6109 /* 6110 * SVE2 Integer - Predicated 6111 */ 6112 6113 static gen_helper_gvec_4 * const sadlp_fns[4] = { 6114 NULL, gen_helper_sve2_sadalp_zpzz_h, 6115 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d, 6116 }; 6117 TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 6118 sadlp_fns[a->esz], a, 0) 6119 6120 static gen_helper_gvec_4 * const uadlp_fns[4] = { 6121 NULL, gen_helper_sve2_uadalp_zpzz_h, 6122 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d, 6123 }; 6124 TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 6125 uadlp_fns[a->esz], a, 0) 6126 6127 /* 6128 * SVE2 integer unary operations (predicated) 6129 */ 6130 6131 TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz, 6132 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0) 6133 6134 TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz, 6135 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0) 6136 6137 static gen_helper_gvec_3 * const sqabs_fns[4] = { 6138 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h, 6139 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d, 6140 }; 6141 TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0) 6142 6143 static gen_helper_gvec_3 * const sqneg_fns[4] = { 6144 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h, 6145 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d, 6146 }; 6147 TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0) 6148 6149 DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl) 6150 DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl) 6151 DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl) 6152 6153 DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl) 6154 DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl) 6155 DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl) 6156 6157 DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd) 6158 DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd) 6159 DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub) 6160 6161 DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd) 6162 DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd) 6163 DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub) 6164 6165 DO_ZPZZ(ADDP, aa64_sve2, sve2_addp) 6166 DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp) 6167 DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp) 6168 DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp) 6169 DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp) 6170 6171 DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd) 6172 DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd) 6173 DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub) 6174 DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub) 6175 DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd) 6176 DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd) 6177 6178 /* 6179 * SVE2 Widening Integer Arithmetic 6180 */ 6181 6182 static gen_helper_gvec_3 * const saddl_fns[4] = { 6183 NULL, gen_helper_sve2_saddl_h, 6184 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d, 6185 }; 6186 TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6187 saddl_fns[a->esz], a, 0) 6188 TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6189 saddl_fns[a->esz], a, 3) 6190 TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 6191 saddl_fns[a->esz], a, 2) 6192 6193 static gen_helper_gvec_3 * const ssubl_fns[4] = { 6194 NULL, gen_helper_sve2_ssubl_h, 6195 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d, 6196 }; 6197 TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6198 ssubl_fns[a->esz], a, 0) 6199 TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6200 ssubl_fns[a->esz], a, 3) 6201 TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 6202 ssubl_fns[a->esz], a, 2) 6203 TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz, 6204 ssubl_fns[a->esz], a, 1) 6205 6206 static gen_helper_gvec_3 * const sabdl_fns[4] = { 6207 NULL, gen_helper_sve2_sabdl_h, 6208 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d, 6209 }; 6210 TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6211 sabdl_fns[a->esz], a, 0) 6212 TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6213 sabdl_fns[a->esz], a, 3) 6214 6215 static gen_helper_gvec_3 * const uaddl_fns[4] = { 6216 NULL, gen_helper_sve2_uaddl_h, 6217 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d, 6218 }; 6219 TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6220 uaddl_fns[a->esz], a, 0) 6221 TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6222 uaddl_fns[a->esz], a, 3) 6223 6224 static gen_helper_gvec_3 * const usubl_fns[4] = { 6225 NULL, gen_helper_sve2_usubl_h, 6226 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d, 6227 }; 6228 TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6229 usubl_fns[a->esz], a, 0) 6230 TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6231 usubl_fns[a->esz], a, 3) 6232 6233 static gen_helper_gvec_3 * const uabdl_fns[4] = { 6234 NULL, gen_helper_sve2_uabdl_h, 6235 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d, 6236 }; 6237 TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6238 uabdl_fns[a->esz], a, 0) 6239 TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6240 uabdl_fns[a->esz], a, 3) 6241 6242 static gen_helper_gvec_3 * const sqdmull_fns[4] = { 6243 NULL, gen_helper_sve2_sqdmull_zzz_h, 6244 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d, 6245 }; 6246 TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6247 sqdmull_fns[a->esz], a, 0) 6248 TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6249 sqdmull_fns[a->esz], a, 3) 6250 6251 static gen_helper_gvec_3 * const smull_fns[4] = { 6252 NULL, gen_helper_sve2_smull_zzz_h, 6253 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d, 6254 }; 6255 TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6256 smull_fns[a->esz], a, 0) 6257 TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6258 smull_fns[a->esz], a, 3) 6259 6260 static gen_helper_gvec_3 * const umull_fns[4] = { 6261 NULL, gen_helper_sve2_umull_zzz_h, 6262 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d, 6263 }; 6264 TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6265 umull_fns[a->esz], a, 0) 6266 TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6267 umull_fns[a->esz], a, 3) 6268 6269 static gen_helper_gvec_3 * const eoril_fns[4] = { 6270 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h, 6271 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d, 6272 }; 6273 TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2) 6274 TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1) 6275 6276 static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel) 6277 { 6278 static gen_helper_gvec_3 * const fns[4] = { 6279 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h, 6280 NULL, gen_helper_sve2_pmull_d, 6281 }; 6282 6283 if (a->esz == 0) { 6284 if (!dc_isar_feature(aa64_sve2_pmull128, s)) { 6285 return false; 6286 } 6287 s->is_nonstreaming = true; 6288 } else if (!dc_isar_feature(aa64_sve, s)) { 6289 return false; 6290 } 6291 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel); 6292 } 6293 6294 TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false) 6295 TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true) 6296 6297 static gen_helper_gvec_3 * const saddw_fns[4] = { 6298 NULL, gen_helper_sve2_saddw_h, 6299 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d, 6300 }; 6301 TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0) 6302 TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1) 6303 6304 static gen_helper_gvec_3 * const ssubw_fns[4] = { 6305 NULL, gen_helper_sve2_ssubw_h, 6306 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d, 6307 }; 6308 TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0) 6309 TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1) 6310 6311 static gen_helper_gvec_3 * const uaddw_fns[4] = { 6312 NULL, gen_helper_sve2_uaddw_h, 6313 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d, 6314 }; 6315 TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0) 6316 TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1) 6317 6318 static gen_helper_gvec_3 * const usubw_fns[4] = { 6319 NULL, gen_helper_sve2_usubw_h, 6320 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d, 6321 }; 6322 TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0) 6323 TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1) 6324 6325 static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6326 { 6327 int top = imm & 1; 6328 int shl = imm >> 1; 6329 int halfbits = 4 << vece; 6330 6331 if (top) { 6332 if (shl == halfbits) { 6333 TCGv_vec t = tcg_temp_new_vec_matching(d); 6334 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); 6335 tcg_gen_and_vec(vece, d, n, t); 6336 tcg_temp_free_vec(t); 6337 } else { 6338 tcg_gen_sari_vec(vece, d, n, halfbits); 6339 tcg_gen_shli_vec(vece, d, d, shl); 6340 } 6341 } else { 6342 tcg_gen_shli_vec(vece, d, n, halfbits); 6343 tcg_gen_sari_vec(vece, d, d, halfbits - shl); 6344 } 6345 } 6346 6347 static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm) 6348 { 6349 int halfbits = 4 << vece; 6350 int top = imm & 1; 6351 int shl = (imm >> 1); 6352 int shift; 6353 uint64_t mask; 6354 6355 mask = MAKE_64BIT_MASK(0, halfbits); 6356 mask <<= shl; 6357 mask = dup_const(vece, mask); 6358 6359 shift = shl - top * halfbits; 6360 if (shift < 0) { 6361 tcg_gen_shri_i64(d, n, -shift); 6362 } else { 6363 tcg_gen_shli_i64(d, n, shift); 6364 } 6365 tcg_gen_andi_i64(d, d, mask); 6366 } 6367 6368 static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6369 { 6370 gen_ushll_i64(MO_16, d, n, imm); 6371 } 6372 6373 static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6374 { 6375 gen_ushll_i64(MO_32, d, n, imm); 6376 } 6377 6378 static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6379 { 6380 gen_ushll_i64(MO_64, d, n, imm); 6381 } 6382 6383 static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6384 { 6385 int halfbits = 4 << vece; 6386 int top = imm & 1; 6387 int shl = imm >> 1; 6388 6389 if (top) { 6390 if (shl == halfbits) { 6391 TCGv_vec t = tcg_temp_new_vec_matching(d); 6392 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); 6393 tcg_gen_and_vec(vece, d, n, t); 6394 tcg_temp_free_vec(t); 6395 } else { 6396 tcg_gen_shri_vec(vece, d, n, halfbits); 6397 tcg_gen_shli_vec(vece, d, d, shl); 6398 } 6399 } else { 6400 if (shl == 0) { 6401 TCGv_vec t = tcg_temp_new_vec_matching(d); 6402 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6403 tcg_gen_and_vec(vece, d, n, t); 6404 tcg_temp_free_vec(t); 6405 } else { 6406 tcg_gen_shli_vec(vece, d, n, halfbits); 6407 tcg_gen_shri_vec(vece, d, d, halfbits - shl); 6408 } 6409 } 6410 } 6411 6412 static bool do_shll_tb(DisasContext *s, arg_rri_esz *a, 6413 const GVecGen2i ops[3], bool sel) 6414 { 6415 6416 if (a->esz < 0 || a->esz > 2) { 6417 return false; 6418 } 6419 if (sve_access_check(s)) { 6420 unsigned vsz = vec_full_reg_size(s); 6421 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 6422 vec_full_reg_offset(s, a->rn), 6423 vsz, vsz, (a->imm << 1) | sel, 6424 &ops[a->esz]); 6425 } 6426 return true; 6427 } 6428 6429 static const TCGOpcode sshll_list[] = { 6430 INDEX_op_shli_vec, INDEX_op_sari_vec, 0 6431 }; 6432 static const GVecGen2i sshll_ops[3] = { 6433 { .fniv = gen_sshll_vec, 6434 .opt_opc = sshll_list, 6435 .fno = gen_helper_sve2_sshll_h, 6436 .vece = MO_16 }, 6437 { .fniv = gen_sshll_vec, 6438 .opt_opc = sshll_list, 6439 .fno = gen_helper_sve2_sshll_s, 6440 .vece = MO_32 }, 6441 { .fniv = gen_sshll_vec, 6442 .opt_opc = sshll_list, 6443 .fno = gen_helper_sve2_sshll_d, 6444 .vece = MO_64 } 6445 }; 6446 TRANS_FEAT(SSHLLB, aa64_sve2, do_shll_tb, a, sshll_ops, false) 6447 TRANS_FEAT(SSHLLT, aa64_sve2, do_shll_tb, a, sshll_ops, true) 6448 6449 static const TCGOpcode ushll_list[] = { 6450 INDEX_op_shli_vec, INDEX_op_shri_vec, 0 6451 }; 6452 static const GVecGen2i ushll_ops[3] = { 6453 { .fni8 = gen_ushll16_i64, 6454 .fniv = gen_ushll_vec, 6455 .opt_opc = ushll_list, 6456 .fno = gen_helper_sve2_ushll_h, 6457 .vece = MO_16 }, 6458 { .fni8 = gen_ushll32_i64, 6459 .fniv = gen_ushll_vec, 6460 .opt_opc = ushll_list, 6461 .fno = gen_helper_sve2_ushll_s, 6462 .vece = MO_32 }, 6463 { .fni8 = gen_ushll64_i64, 6464 .fniv = gen_ushll_vec, 6465 .opt_opc = ushll_list, 6466 .fno = gen_helper_sve2_ushll_d, 6467 .vece = MO_64 }, 6468 }; 6469 TRANS_FEAT(USHLLB, aa64_sve2, do_shll_tb, a, ushll_ops, false) 6470 TRANS_FEAT(USHLLT, aa64_sve2, do_shll_tb, a, ushll_ops, true) 6471 6472 static gen_helper_gvec_3 * const bext_fns[4] = { 6473 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h, 6474 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d, 6475 }; 6476 TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6477 bext_fns[a->esz], a, 0) 6478 6479 static gen_helper_gvec_3 * const bdep_fns[4] = { 6480 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h, 6481 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d, 6482 }; 6483 TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6484 bdep_fns[a->esz], a, 0) 6485 6486 static gen_helper_gvec_3 * const bgrp_fns[4] = { 6487 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h, 6488 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d, 6489 }; 6490 TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6491 bgrp_fns[a->esz], a, 0) 6492 6493 static gen_helper_gvec_3 * const cadd_fns[4] = { 6494 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h, 6495 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d, 6496 }; 6497 TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6498 cadd_fns[a->esz], a, 0) 6499 TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6500 cadd_fns[a->esz], a, 1) 6501 6502 static gen_helper_gvec_3 * const sqcadd_fns[4] = { 6503 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h, 6504 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d, 6505 }; 6506 TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6507 sqcadd_fns[a->esz], a, 0) 6508 TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6509 sqcadd_fns[a->esz], a, 1) 6510 6511 static gen_helper_gvec_4 * const sabal_fns[4] = { 6512 NULL, gen_helper_sve2_sabal_h, 6513 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d, 6514 }; 6515 TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0) 6516 TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1) 6517 6518 static gen_helper_gvec_4 * const uabal_fns[4] = { 6519 NULL, gen_helper_sve2_uabal_h, 6520 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d, 6521 }; 6522 TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0) 6523 TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1) 6524 6525 static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel) 6526 { 6527 static gen_helper_gvec_4 * const fns[2] = { 6528 gen_helper_sve2_adcl_s, 6529 gen_helper_sve2_adcl_d, 6530 }; 6531 /* 6532 * Note that in this case the ESZ field encodes both size and sign. 6533 * Split out 'subtract' into bit 1 of the data field for the helper. 6534 */ 6535 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel); 6536 } 6537 6538 TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false) 6539 TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true) 6540 6541 TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a) 6542 TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a) 6543 TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a) 6544 TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a) 6545 TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a) 6546 TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a) 6547 6548 TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a) 6549 TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a) 6550 6551 static bool do_narrow_extract(DisasContext *s, arg_rri_esz *a, 6552 const GVecGen2 ops[3]) 6553 { 6554 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0) { 6555 return false; 6556 } 6557 if (sve_access_check(s)) { 6558 unsigned vsz = vec_full_reg_size(s); 6559 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd), 6560 vec_full_reg_offset(s, a->rn), 6561 vsz, vsz, &ops[a->esz]); 6562 } 6563 return true; 6564 } 6565 6566 static const TCGOpcode sqxtn_list[] = { 6567 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 6568 }; 6569 6570 static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6571 { 6572 TCGv_vec t = tcg_temp_new_vec_matching(d); 6573 int halfbits = 4 << vece; 6574 int64_t mask = (1ull << halfbits) - 1; 6575 int64_t min = -1ull << (halfbits - 1); 6576 int64_t max = -min - 1; 6577 6578 tcg_gen_dupi_vec(vece, t, min); 6579 tcg_gen_smax_vec(vece, d, n, t); 6580 tcg_gen_dupi_vec(vece, t, max); 6581 tcg_gen_smin_vec(vece, d, d, t); 6582 tcg_gen_dupi_vec(vece, t, mask); 6583 tcg_gen_and_vec(vece, d, d, t); 6584 tcg_temp_free_vec(t); 6585 } 6586 6587 static const GVecGen2 sqxtnb_ops[3] = { 6588 { .fniv = gen_sqxtnb_vec, 6589 .opt_opc = sqxtn_list, 6590 .fno = gen_helper_sve2_sqxtnb_h, 6591 .vece = MO_16 }, 6592 { .fniv = gen_sqxtnb_vec, 6593 .opt_opc = sqxtn_list, 6594 .fno = gen_helper_sve2_sqxtnb_s, 6595 .vece = MO_32 }, 6596 { .fniv = gen_sqxtnb_vec, 6597 .opt_opc = sqxtn_list, 6598 .fno = gen_helper_sve2_sqxtnb_d, 6599 .vece = MO_64 }, 6600 }; 6601 TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops) 6602 6603 static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6604 { 6605 TCGv_vec t = tcg_temp_new_vec_matching(d); 6606 int halfbits = 4 << vece; 6607 int64_t mask = (1ull << halfbits) - 1; 6608 int64_t min = -1ull << (halfbits - 1); 6609 int64_t max = -min - 1; 6610 6611 tcg_gen_dupi_vec(vece, t, min); 6612 tcg_gen_smax_vec(vece, n, n, t); 6613 tcg_gen_dupi_vec(vece, t, max); 6614 tcg_gen_smin_vec(vece, n, n, t); 6615 tcg_gen_shli_vec(vece, n, n, halfbits); 6616 tcg_gen_dupi_vec(vece, t, mask); 6617 tcg_gen_bitsel_vec(vece, d, t, d, n); 6618 tcg_temp_free_vec(t); 6619 } 6620 6621 static const GVecGen2 sqxtnt_ops[3] = { 6622 { .fniv = gen_sqxtnt_vec, 6623 .opt_opc = sqxtn_list, 6624 .load_dest = true, 6625 .fno = gen_helper_sve2_sqxtnt_h, 6626 .vece = MO_16 }, 6627 { .fniv = gen_sqxtnt_vec, 6628 .opt_opc = sqxtn_list, 6629 .load_dest = true, 6630 .fno = gen_helper_sve2_sqxtnt_s, 6631 .vece = MO_32 }, 6632 { .fniv = gen_sqxtnt_vec, 6633 .opt_opc = sqxtn_list, 6634 .load_dest = true, 6635 .fno = gen_helper_sve2_sqxtnt_d, 6636 .vece = MO_64 }, 6637 }; 6638 TRANS_FEAT(SQXTNT, aa64_sve2, do_narrow_extract, a, sqxtnt_ops) 6639 6640 static const TCGOpcode uqxtn_list[] = { 6641 INDEX_op_shli_vec, INDEX_op_umin_vec, 0 6642 }; 6643 6644 static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6645 { 6646 TCGv_vec t = tcg_temp_new_vec_matching(d); 6647 int halfbits = 4 << vece; 6648 int64_t max = (1ull << halfbits) - 1; 6649 6650 tcg_gen_dupi_vec(vece, t, max); 6651 tcg_gen_umin_vec(vece, d, n, t); 6652 tcg_temp_free_vec(t); 6653 } 6654 6655 static const GVecGen2 uqxtnb_ops[3] = { 6656 { .fniv = gen_uqxtnb_vec, 6657 .opt_opc = uqxtn_list, 6658 .fno = gen_helper_sve2_uqxtnb_h, 6659 .vece = MO_16 }, 6660 { .fniv = gen_uqxtnb_vec, 6661 .opt_opc = uqxtn_list, 6662 .fno = gen_helper_sve2_uqxtnb_s, 6663 .vece = MO_32 }, 6664 { .fniv = gen_uqxtnb_vec, 6665 .opt_opc = uqxtn_list, 6666 .fno = gen_helper_sve2_uqxtnb_d, 6667 .vece = MO_64 }, 6668 }; 6669 TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops) 6670 6671 static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6672 { 6673 TCGv_vec t = tcg_temp_new_vec_matching(d); 6674 int halfbits = 4 << vece; 6675 int64_t max = (1ull << halfbits) - 1; 6676 6677 tcg_gen_dupi_vec(vece, t, max); 6678 tcg_gen_umin_vec(vece, n, n, t); 6679 tcg_gen_shli_vec(vece, n, n, halfbits); 6680 tcg_gen_bitsel_vec(vece, d, t, d, n); 6681 tcg_temp_free_vec(t); 6682 } 6683 6684 static const GVecGen2 uqxtnt_ops[3] = { 6685 { .fniv = gen_uqxtnt_vec, 6686 .opt_opc = uqxtn_list, 6687 .load_dest = true, 6688 .fno = gen_helper_sve2_uqxtnt_h, 6689 .vece = MO_16 }, 6690 { .fniv = gen_uqxtnt_vec, 6691 .opt_opc = uqxtn_list, 6692 .load_dest = true, 6693 .fno = gen_helper_sve2_uqxtnt_s, 6694 .vece = MO_32 }, 6695 { .fniv = gen_uqxtnt_vec, 6696 .opt_opc = uqxtn_list, 6697 .load_dest = true, 6698 .fno = gen_helper_sve2_uqxtnt_d, 6699 .vece = MO_64 }, 6700 }; 6701 TRANS_FEAT(UQXTNT, aa64_sve2, do_narrow_extract, a, uqxtnt_ops) 6702 6703 static const TCGOpcode sqxtun_list[] = { 6704 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0 6705 }; 6706 6707 static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6708 { 6709 TCGv_vec t = tcg_temp_new_vec_matching(d); 6710 int halfbits = 4 << vece; 6711 int64_t max = (1ull << halfbits) - 1; 6712 6713 tcg_gen_dupi_vec(vece, t, 0); 6714 tcg_gen_smax_vec(vece, d, n, t); 6715 tcg_gen_dupi_vec(vece, t, max); 6716 tcg_gen_umin_vec(vece, d, d, t); 6717 tcg_temp_free_vec(t); 6718 } 6719 6720 static const GVecGen2 sqxtunb_ops[3] = { 6721 { .fniv = gen_sqxtunb_vec, 6722 .opt_opc = sqxtun_list, 6723 .fno = gen_helper_sve2_sqxtunb_h, 6724 .vece = MO_16 }, 6725 { .fniv = gen_sqxtunb_vec, 6726 .opt_opc = sqxtun_list, 6727 .fno = gen_helper_sve2_sqxtunb_s, 6728 .vece = MO_32 }, 6729 { .fniv = gen_sqxtunb_vec, 6730 .opt_opc = sqxtun_list, 6731 .fno = gen_helper_sve2_sqxtunb_d, 6732 .vece = MO_64 }, 6733 }; 6734 TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops) 6735 6736 static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6737 { 6738 TCGv_vec t = tcg_temp_new_vec_matching(d); 6739 int halfbits = 4 << vece; 6740 int64_t max = (1ull << halfbits) - 1; 6741 6742 tcg_gen_dupi_vec(vece, t, 0); 6743 tcg_gen_smax_vec(vece, n, n, t); 6744 tcg_gen_dupi_vec(vece, t, max); 6745 tcg_gen_umin_vec(vece, n, n, t); 6746 tcg_gen_shli_vec(vece, n, n, halfbits); 6747 tcg_gen_bitsel_vec(vece, d, t, d, n); 6748 tcg_temp_free_vec(t); 6749 } 6750 6751 static const GVecGen2 sqxtunt_ops[3] = { 6752 { .fniv = gen_sqxtunt_vec, 6753 .opt_opc = sqxtun_list, 6754 .load_dest = true, 6755 .fno = gen_helper_sve2_sqxtunt_h, 6756 .vece = MO_16 }, 6757 { .fniv = gen_sqxtunt_vec, 6758 .opt_opc = sqxtun_list, 6759 .load_dest = true, 6760 .fno = gen_helper_sve2_sqxtunt_s, 6761 .vece = MO_32 }, 6762 { .fniv = gen_sqxtunt_vec, 6763 .opt_opc = sqxtun_list, 6764 .load_dest = true, 6765 .fno = gen_helper_sve2_sqxtunt_d, 6766 .vece = MO_64 }, 6767 }; 6768 TRANS_FEAT(SQXTUNT, aa64_sve2, do_narrow_extract, a, sqxtunt_ops) 6769 6770 static bool do_shr_narrow(DisasContext *s, arg_rri_esz *a, 6771 const GVecGen2i ops[3]) 6772 { 6773 if (a->esz < 0 || a->esz > MO_32) { 6774 return false; 6775 } 6776 assert(a->imm > 0 && a->imm <= (8 << a->esz)); 6777 if (sve_access_check(s)) { 6778 unsigned vsz = vec_full_reg_size(s); 6779 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 6780 vec_full_reg_offset(s, a->rn), 6781 vsz, vsz, a->imm, &ops[a->esz]); 6782 } 6783 return true; 6784 } 6785 6786 static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 6787 { 6788 int halfbits = 4 << vece; 6789 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 6790 6791 tcg_gen_shri_i64(d, n, shr); 6792 tcg_gen_andi_i64(d, d, mask); 6793 } 6794 6795 static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6796 { 6797 gen_shrnb_i64(MO_16, d, n, shr); 6798 } 6799 6800 static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6801 { 6802 gen_shrnb_i64(MO_32, d, n, shr); 6803 } 6804 6805 static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6806 { 6807 gen_shrnb_i64(MO_64, d, n, shr); 6808 } 6809 6810 static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 6811 { 6812 TCGv_vec t = tcg_temp_new_vec_matching(d); 6813 int halfbits = 4 << vece; 6814 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 6815 6816 tcg_gen_shri_vec(vece, n, n, shr); 6817 tcg_gen_dupi_vec(vece, t, mask); 6818 tcg_gen_and_vec(vece, d, n, t); 6819 tcg_temp_free_vec(t); 6820 } 6821 6822 static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 }; 6823 static const GVecGen2i shrnb_ops[3] = { 6824 { .fni8 = gen_shrnb16_i64, 6825 .fniv = gen_shrnb_vec, 6826 .opt_opc = shrnb_vec_list, 6827 .fno = gen_helper_sve2_shrnb_h, 6828 .vece = MO_16 }, 6829 { .fni8 = gen_shrnb32_i64, 6830 .fniv = gen_shrnb_vec, 6831 .opt_opc = shrnb_vec_list, 6832 .fno = gen_helper_sve2_shrnb_s, 6833 .vece = MO_32 }, 6834 { .fni8 = gen_shrnb64_i64, 6835 .fniv = gen_shrnb_vec, 6836 .opt_opc = shrnb_vec_list, 6837 .fno = gen_helper_sve2_shrnb_d, 6838 .vece = MO_64 }, 6839 }; 6840 TRANS_FEAT(SHRNB, aa64_sve2, do_shr_narrow, a, shrnb_ops) 6841 6842 static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 6843 { 6844 int halfbits = 4 << vece; 6845 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 6846 6847 tcg_gen_shli_i64(n, n, halfbits - shr); 6848 tcg_gen_andi_i64(n, n, ~mask); 6849 tcg_gen_andi_i64(d, d, mask); 6850 tcg_gen_or_i64(d, d, n); 6851 } 6852 6853 static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6854 { 6855 gen_shrnt_i64(MO_16, d, n, shr); 6856 } 6857 6858 static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6859 { 6860 gen_shrnt_i64(MO_32, d, n, shr); 6861 } 6862 6863 static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6864 { 6865 tcg_gen_shri_i64(n, n, shr); 6866 tcg_gen_deposit_i64(d, d, n, 32, 32); 6867 } 6868 6869 static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 6870 { 6871 TCGv_vec t = tcg_temp_new_vec_matching(d); 6872 int halfbits = 4 << vece; 6873 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 6874 6875 tcg_gen_shli_vec(vece, n, n, halfbits - shr); 6876 tcg_gen_dupi_vec(vece, t, mask); 6877 tcg_gen_bitsel_vec(vece, d, t, d, n); 6878 tcg_temp_free_vec(t); 6879 } 6880 6881 static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 }; 6882 static const GVecGen2i shrnt_ops[3] = { 6883 { .fni8 = gen_shrnt16_i64, 6884 .fniv = gen_shrnt_vec, 6885 .opt_opc = shrnt_vec_list, 6886 .load_dest = true, 6887 .fno = gen_helper_sve2_shrnt_h, 6888 .vece = MO_16 }, 6889 { .fni8 = gen_shrnt32_i64, 6890 .fniv = gen_shrnt_vec, 6891 .opt_opc = shrnt_vec_list, 6892 .load_dest = true, 6893 .fno = gen_helper_sve2_shrnt_s, 6894 .vece = MO_32 }, 6895 { .fni8 = gen_shrnt64_i64, 6896 .fniv = gen_shrnt_vec, 6897 .opt_opc = shrnt_vec_list, 6898 .load_dest = true, 6899 .fno = gen_helper_sve2_shrnt_d, 6900 .vece = MO_64 }, 6901 }; 6902 TRANS_FEAT(SHRNT, aa64_sve2, do_shr_narrow, a, shrnt_ops) 6903 6904 static const GVecGen2i rshrnb_ops[3] = { 6905 { .fno = gen_helper_sve2_rshrnb_h }, 6906 { .fno = gen_helper_sve2_rshrnb_s }, 6907 { .fno = gen_helper_sve2_rshrnb_d }, 6908 }; 6909 TRANS_FEAT(RSHRNB, aa64_sve2, do_shr_narrow, a, rshrnb_ops) 6910 6911 static const GVecGen2i rshrnt_ops[3] = { 6912 { .fno = gen_helper_sve2_rshrnt_h }, 6913 { .fno = gen_helper_sve2_rshrnt_s }, 6914 { .fno = gen_helper_sve2_rshrnt_d }, 6915 }; 6916 TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops) 6917 6918 static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d, 6919 TCGv_vec n, int64_t shr) 6920 { 6921 TCGv_vec t = tcg_temp_new_vec_matching(d); 6922 int halfbits = 4 << vece; 6923 6924 tcg_gen_sari_vec(vece, n, n, shr); 6925 tcg_gen_dupi_vec(vece, t, 0); 6926 tcg_gen_smax_vec(vece, n, n, t); 6927 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6928 tcg_gen_umin_vec(vece, d, n, t); 6929 tcg_temp_free_vec(t); 6930 } 6931 6932 static const TCGOpcode sqshrunb_vec_list[] = { 6933 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0 6934 }; 6935 static const GVecGen2i sqshrunb_ops[3] = { 6936 { .fniv = gen_sqshrunb_vec, 6937 .opt_opc = sqshrunb_vec_list, 6938 .fno = gen_helper_sve2_sqshrunb_h, 6939 .vece = MO_16 }, 6940 { .fniv = gen_sqshrunb_vec, 6941 .opt_opc = sqshrunb_vec_list, 6942 .fno = gen_helper_sve2_sqshrunb_s, 6943 .vece = MO_32 }, 6944 { .fniv = gen_sqshrunb_vec, 6945 .opt_opc = sqshrunb_vec_list, 6946 .fno = gen_helper_sve2_sqshrunb_d, 6947 .vece = MO_64 }, 6948 }; 6949 TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops) 6950 6951 static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d, 6952 TCGv_vec n, int64_t shr) 6953 { 6954 TCGv_vec t = tcg_temp_new_vec_matching(d); 6955 int halfbits = 4 << vece; 6956 6957 tcg_gen_sari_vec(vece, n, n, shr); 6958 tcg_gen_dupi_vec(vece, t, 0); 6959 tcg_gen_smax_vec(vece, n, n, t); 6960 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6961 tcg_gen_umin_vec(vece, n, n, t); 6962 tcg_gen_shli_vec(vece, n, n, halfbits); 6963 tcg_gen_bitsel_vec(vece, d, t, d, n); 6964 tcg_temp_free_vec(t); 6965 } 6966 6967 static const TCGOpcode sqshrunt_vec_list[] = { 6968 INDEX_op_shli_vec, INDEX_op_sari_vec, 6969 INDEX_op_smax_vec, INDEX_op_umin_vec, 0 6970 }; 6971 static const GVecGen2i sqshrunt_ops[3] = { 6972 { .fniv = gen_sqshrunt_vec, 6973 .opt_opc = sqshrunt_vec_list, 6974 .load_dest = true, 6975 .fno = gen_helper_sve2_sqshrunt_h, 6976 .vece = MO_16 }, 6977 { .fniv = gen_sqshrunt_vec, 6978 .opt_opc = sqshrunt_vec_list, 6979 .load_dest = true, 6980 .fno = gen_helper_sve2_sqshrunt_s, 6981 .vece = MO_32 }, 6982 { .fniv = gen_sqshrunt_vec, 6983 .opt_opc = sqshrunt_vec_list, 6984 .load_dest = true, 6985 .fno = gen_helper_sve2_sqshrunt_d, 6986 .vece = MO_64 }, 6987 }; 6988 TRANS_FEAT(SQSHRUNT, aa64_sve2, do_shr_narrow, a, sqshrunt_ops) 6989 6990 static const GVecGen2i sqrshrunb_ops[3] = { 6991 { .fno = gen_helper_sve2_sqrshrunb_h }, 6992 { .fno = gen_helper_sve2_sqrshrunb_s }, 6993 { .fno = gen_helper_sve2_sqrshrunb_d }, 6994 }; 6995 TRANS_FEAT(SQRSHRUNB, aa64_sve2, do_shr_narrow, a, sqrshrunb_ops) 6996 6997 static const GVecGen2i sqrshrunt_ops[3] = { 6998 { .fno = gen_helper_sve2_sqrshrunt_h }, 6999 { .fno = gen_helper_sve2_sqrshrunt_s }, 7000 { .fno = gen_helper_sve2_sqrshrunt_d }, 7001 }; 7002 TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops) 7003 7004 static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d, 7005 TCGv_vec n, int64_t shr) 7006 { 7007 TCGv_vec t = tcg_temp_new_vec_matching(d); 7008 int halfbits = 4 << vece; 7009 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 7010 int64_t min = -max - 1; 7011 7012 tcg_gen_sari_vec(vece, n, n, shr); 7013 tcg_gen_dupi_vec(vece, t, min); 7014 tcg_gen_smax_vec(vece, n, n, t); 7015 tcg_gen_dupi_vec(vece, t, max); 7016 tcg_gen_smin_vec(vece, n, n, t); 7017 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 7018 tcg_gen_and_vec(vece, d, n, t); 7019 tcg_temp_free_vec(t); 7020 } 7021 7022 static const TCGOpcode sqshrnb_vec_list[] = { 7023 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0 7024 }; 7025 static const GVecGen2i sqshrnb_ops[3] = { 7026 { .fniv = gen_sqshrnb_vec, 7027 .opt_opc = sqshrnb_vec_list, 7028 .fno = gen_helper_sve2_sqshrnb_h, 7029 .vece = MO_16 }, 7030 { .fniv = gen_sqshrnb_vec, 7031 .opt_opc = sqshrnb_vec_list, 7032 .fno = gen_helper_sve2_sqshrnb_s, 7033 .vece = MO_32 }, 7034 { .fniv = gen_sqshrnb_vec, 7035 .opt_opc = sqshrnb_vec_list, 7036 .fno = gen_helper_sve2_sqshrnb_d, 7037 .vece = MO_64 }, 7038 }; 7039 TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops) 7040 7041 static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d, 7042 TCGv_vec n, int64_t shr) 7043 { 7044 TCGv_vec t = tcg_temp_new_vec_matching(d); 7045 int halfbits = 4 << vece; 7046 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 7047 int64_t min = -max - 1; 7048 7049 tcg_gen_sari_vec(vece, n, n, shr); 7050 tcg_gen_dupi_vec(vece, t, min); 7051 tcg_gen_smax_vec(vece, n, n, t); 7052 tcg_gen_dupi_vec(vece, t, max); 7053 tcg_gen_smin_vec(vece, n, n, t); 7054 tcg_gen_shli_vec(vece, n, n, halfbits); 7055 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 7056 tcg_gen_bitsel_vec(vece, d, t, d, n); 7057 tcg_temp_free_vec(t); 7058 } 7059 7060 static const TCGOpcode sqshrnt_vec_list[] = { 7061 INDEX_op_shli_vec, INDEX_op_sari_vec, 7062 INDEX_op_smax_vec, INDEX_op_smin_vec, 0 7063 }; 7064 static const GVecGen2i sqshrnt_ops[3] = { 7065 { .fniv = gen_sqshrnt_vec, 7066 .opt_opc = sqshrnt_vec_list, 7067 .load_dest = true, 7068 .fno = gen_helper_sve2_sqshrnt_h, 7069 .vece = MO_16 }, 7070 { .fniv = gen_sqshrnt_vec, 7071 .opt_opc = sqshrnt_vec_list, 7072 .load_dest = true, 7073 .fno = gen_helper_sve2_sqshrnt_s, 7074 .vece = MO_32 }, 7075 { .fniv = gen_sqshrnt_vec, 7076 .opt_opc = sqshrnt_vec_list, 7077 .load_dest = true, 7078 .fno = gen_helper_sve2_sqshrnt_d, 7079 .vece = MO_64 }, 7080 }; 7081 TRANS_FEAT(SQSHRNT, aa64_sve2, do_shr_narrow, a, sqshrnt_ops) 7082 7083 static const GVecGen2i sqrshrnb_ops[3] = { 7084 { .fno = gen_helper_sve2_sqrshrnb_h }, 7085 { .fno = gen_helper_sve2_sqrshrnb_s }, 7086 { .fno = gen_helper_sve2_sqrshrnb_d }, 7087 }; 7088 TRANS_FEAT(SQRSHRNB, aa64_sve2, do_shr_narrow, a, sqrshrnb_ops) 7089 7090 static const GVecGen2i sqrshrnt_ops[3] = { 7091 { .fno = gen_helper_sve2_sqrshrnt_h }, 7092 { .fno = gen_helper_sve2_sqrshrnt_s }, 7093 { .fno = gen_helper_sve2_sqrshrnt_d }, 7094 }; 7095 TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops) 7096 7097 static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d, 7098 TCGv_vec n, int64_t shr) 7099 { 7100 TCGv_vec t = tcg_temp_new_vec_matching(d); 7101 int halfbits = 4 << vece; 7102 7103 tcg_gen_shri_vec(vece, n, n, shr); 7104 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 7105 tcg_gen_umin_vec(vece, d, n, t); 7106 tcg_temp_free_vec(t); 7107 } 7108 7109 static const TCGOpcode uqshrnb_vec_list[] = { 7110 INDEX_op_shri_vec, INDEX_op_umin_vec, 0 7111 }; 7112 static const GVecGen2i uqshrnb_ops[3] = { 7113 { .fniv = gen_uqshrnb_vec, 7114 .opt_opc = uqshrnb_vec_list, 7115 .fno = gen_helper_sve2_uqshrnb_h, 7116 .vece = MO_16 }, 7117 { .fniv = gen_uqshrnb_vec, 7118 .opt_opc = uqshrnb_vec_list, 7119 .fno = gen_helper_sve2_uqshrnb_s, 7120 .vece = MO_32 }, 7121 { .fniv = gen_uqshrnb_vec, 7122 .opt_opc = uqshrnb_vec_list, 7123 .fno = gen_helper_sve2_uqshrnb_d, 7124 .vece = MO_64 }, 7125 }; 7126 TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops) 7127 7128 static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d, 7129 TCGv_vec n, int64_t shr) 7130 { 7131 TCGv_vec t = tcg_temp_new_vec_matching(d); 7132 int halfbits = 4 << vece; 7133 7134 tcg_gen_shri_vec(vece, n, n, shr); 7135 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 7136 tcg_gen_umin_vec(vece, n, n, t); 7137 tcg_gen_shli_vec(vece, n, n, halfbits); 7138 tcg_gen_bitsel_vec(vece, d, t, d, n); 7139 tcg_temp_free_vec(t); 7140 } 7141 7142 static const TCGOpcode uqshrnt_vec_list[] = { 7143 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0 7144 }; 7145 static const GVecGen2i uqshrnt_ops[3] = { 7146 { .fniv = gen_uqshrnt_vec, 7147 .opt_opc = uqshrnt_vec_list, 7148 .load_dest = true, 7149 .fno = gen_helper_sve2_uqshrnt_h, 7150 .vece = MO_16 }, 7151 { .fniv = gen_uqshrnt_vec, 7152 .opt_opc = uqshrnt_vec_list, 7153 .load_dest = true, 7154 .fno = gen_helper_sve2_uqshrnt_s, 7155 .vece = MO_32 }, 7156 { .fniv = gen_uqshrnt_vec, 7157 .opt_opc = uqshrnt_vec_list, 7158 .load_dest = true, 7159 .fno = gen_helper_sve2_uqshrnt_d, 7160 .vece = MO_64 }, 7161 }; 7162 TRANS_FEAT(UQSHRNT, aa64_sve2, do_shr_narrow, a, uqshrnt_ops) 7163 7164 static const GVecGen2i uqrshrnb_ops[3] = { 7165 { .fno = gen_helper_sve2_uqrshrnb_h }, 7166 { .fno = gen_helper_sve2_uqrshrnb_s }, 7167 { .fno = gen_helper_sve2_uqrshrnb_d }, 7168 }; 7169 TRANS_FEAT(UQRSHRNB, aa64_sve2, do_shr_narrow, a, uqrshrnb_ops) 7170 7171 static const GVecGen2i uqrshrnt_ops[3] = { 7172 { .fno = gen_helper_sve2_uqrshrnt_h }, 7173 { .fno = gen_helper_sve2_uqrshrnt_s }, 7174 { .fno = gen_helper_sve2_uqrshrnt_d }, 7175 }; 7176 TRANS_FEAT(UQRSHRNT, aa64_sve2, do_shr_narrow, a, uqrshrnt_ops) 7177 7178 #define DO_SVE2_ZZZ_NARROW(NAME, name) \ 7179 static gen_helper_gvec_3 * const name##_fns[4] = { \ 7180 NULL, gen_helper_sve2_##name##_h, \ 7181 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \ 7182 }; \ 7183 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \ 7184 name##_fns[a->esz], a, 0) 7185 7186 DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb) 7187 DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt) 7188 DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb) 7189 DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt) 7190 7191 DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb) 7192 DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt) 7193 DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb) 7194 DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt) 7195 7196 static gen_helper_gvec_flags_4 * const match_fns[4] = { 7197 gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL 7198 }; 7199 TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz]) 7200 7201 static gen_helper_gvec_flags_4 * const nmatch_fns[4] = { 7202 gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL 7203 }; 7204 TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz]) 7205 7206 static gen_helper_gvec_4 * const histcnt_fns[4] = { 7207 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d 7208 }; 7209 TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz, 7210 histcnt_fns[a->esz], a, 0) 7211 7212 TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz, 7213 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0) 7214 7215 DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz) 7216 DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz) 7217 DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz) 7218 DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz) 7219 DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz) 7220 7221 /* 7222 * SVE Integer Multiply-Add (unpredicated) 7223 */ 7224 7225 TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, 7226 gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra, 7227 0, FPST_FPCR) 7228 TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, 7229 gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra, 7230 0, FPST_FPCR) 7231 7232 static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = { 7233 NULL, gen_helper_sve2_sqdmlal_zzzw_h, 7234 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d, 7235 }; 7236 TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7237 sqdmlal_zzzw_fns[a->esz], a, 0) 7238 TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7239 sqdmlal_zzzw_fns[a->esz], a, 3) 7240 TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 7241 sqdmlal_zzzw_fns[a->esz], a, 2) 7242 7243 static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = { 7244 NULL, gen_helper_sve2_sqdmlsl_zzzw_h, 7245 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d, 7246 }; 7247 TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7248 sqdmlsl_zzzw_fns[a->esz], a, 0) 7249 TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7250 sqdmlsl_zzzw_fns[a->esz], a, 3) 7251 TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 7252 sqdmlsl_zzzw_fns[a->esz], a, 2) 7253 7254 static gen_helper_gvec_4 * const sqrdmlah_fns[] = { 7255 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h, 7256 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d, 7257 }; 7258 TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 7259 sqrdmlah_fns[a->esz], a, 0) 7260 7261 static gen_helper_gvec_4 * const sqrdmlsh_fns[] = { 7262 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h, 7263 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d, 7264 }; 7265 TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 7266 sqrdmlsh_fns[a->esz], a, 0) 7267 7268 static gen_helper_gvec_4 * const smlal_zzzw_fns[] = { 7269 NULL, gen_helper_sve2_smlal_zzzw_h, 7270 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d, 7271 }; 7272 TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7273 smlal_zzzw_fns[a->esz], a, 0) 7274 TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7275 smlal_zzzw_fns[a->esz], a, 1) 7276 7277 static gen_helper_gvec_4 * const umlal_zzzw_fns[] = { 7278 NULL, gen_helper_sve2_umlal_zzzw_h, 7279 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d, 7280 }; 7281 TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7282 umlal_zzzw_fns[a->esz], a, 0) 7283 TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7284 umlal_zzzw_fns[a->esz], a, 1) 7285 7286 static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = { 7287 NULL, gen_helper_sve2_smlsl_zzzw_h, 7288 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d, 7289 }; 7290 TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7291 smlsl_zzzw_fns[a->esz], a, 0) 7292 TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7293 smlsl_zzzw_fns[a->esz], a, 1) 7294 7295 static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = { 7296 NULL, gen_helper_sve2_umlsl_zzzw_h, 7297 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d, 7298 }; 7299 TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7300 umlsl_zzzw_fns[a->esz], a, 0) 7301 TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7302 umlsl_zzzw_fns[a->esz], a, 1) 7303 7304 static gen_helper_gvec_4 * const cmla_fns[] = { 7305 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h, 7306 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d, 7307 }; 7308 TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7309 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7310 7311 static gen_helper_gvec_4 * const cdot_fns[] = { 7312 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d 7313 }; 7314 TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7315 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7316 7317 static gen_helper_gvec_4 * const sqrdcmlah_fns[] = { 7318 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h, 7319 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d, 7320 }; 7321 TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7322 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7323 7324 TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7325 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0) 7326 7327 TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz, 7328 gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt) 7329 7330 TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7331 gen_helper_crypto_aese, a, false) 7332 TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7333 gen_helper_crypto_aese, a, true) 7334 7335 TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7336 gen_helper_crypto_sm4e, a, 0) 7337 TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7338 gen_helper_crypto_sm4ekey, a, 0) 7339 7340 TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, 7341 gen_gvec_rax1, a) 7342 7343 TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz, 7344 gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR) 7345 TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, 7346 gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR) 7347 7348 TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 7349 gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR) 7350 7351 TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, 7352 gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR) 7353 TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz, 7354 gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR) 7355 7356 TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a, 7357 float_round_to_odd, gen_helper_sve_fcvt_ds) 7358 TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a, 7359 float_round_to_odd, gen_helper_sve2_fcvtnt_ds) 7360 7361 static gen_helper_gvec_3_ptr * const flogb_fns[] = { 7362 NULL, gen_helper_flogb_h, 7363 gen_helper_flogb_s, gen_helper_flogb_d 7364 }; 7365 TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz], 7366 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 7367 7368 static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel) 7369 { 7370 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s, 7371 a->rd, a->rn, a->rm, a->ra, 7372 (sel << 1) | sub, cpu_env); 7373 } 7374 7375 TRANS_FEAT(FMLALB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, false) 7376 TRANS_FEAT(FMLALT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, true) 7377 TRANS_FEAT(FMLSLB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, false) 7378 TRANS_FEAT(FMLSLT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, true) 7379 7380 static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel) 7381 { 7382 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s, 7383 a->rd, a->rn, a->rm, a->ra, 7384 (a->index << 2) | (sel << 1) | sub, cpu_env); 7385 } 7386 7387 TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false) 7388 TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true) 7389 TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false) 7390 TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true) 7391 7392 TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7393 gen_helper_gvec_smmla_b, a, 0) 7394 TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7395 gen_helper_gvec_usmmla_b, a, 0) 7396 TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7397 gen_helper_gvec_ummla_b, a, 0) 7398 7399 TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, 7400 gen_helper_gvec_bfdot, a, 0) 7401 TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz, 7402 gen_helper_gvec_bfdot_idx, a) 7403 7404 TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, 7405 gen_helper_gvec_bfmmla, a, 0) 7406 7407 static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) 7408 { 7409 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, 7410 a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR); 7411 } 7412 7413 TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) 7414 TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true) 7415 7416 static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) 7417 { 7418 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, 7419 a->rd, a->rn, a->rm, a->ra, 7420 (a->index << 1) | sel, FPST_FPCR); 7421 } 7422 7423 TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) 7424 TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true) 7425 7426 static bool trans_PSEL(DisasContext *s, arg_psel *a) 7427 { 7428 int vl = vec_full_reg_size(s); 7429 int pl = pred_gvec_reg_size(s); 7430 int elements = vl >> a->esz; 7431 TCGv_i64 tmp, didx, dbit; 7432 TCGv_ptr ptr; 7433 7434 if (!dc_isar_feature(aa64_sme, s)) { 7435 return false; 7436 } 7437 if (!sve_access_check(s)) { 7438 return true; 7439 } 7440 7441 tmp = tcg_temp_new_i64(); 7442 dbit = tcg_temp_new_i64(); 7443 didx = tcg_temp_new_i64(); 7444 ptr = tcg_temp_new_ptr(); 7445 7446 /* Compute the predicate element. */ 7447 tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm); 7448 if (is_power_of_2(elements)) { 7449 tcg_gen_andi_i64(tmp, tmp, elements - 1); 7450 } else { 7451 tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements)); 7452 } 7453 7454 /* Extract the predicate byte and bit indices. */ 7455 tcg_gen_shli_i64(tmp, tmp, a->esz); 7456 tcg_gen_andi_i64(dbit, tmp, 7); 7457 tcg_gen_shri_i64(didx, tmp, 3); 7458 if (HOST_BIG_ENDIAN) { 7459 tcg_gen_xori_i64(didx, didx, 7); 7460 } 7461 7462 /* Load the predicate word. */ 7463 tcg_gen_trunc_i64_ptr(ptr, didx); 7464 tcg_gen_add_ptr(ptr, ptr, cpu_env); 7465 tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm)); 7466 7467 /* Extract the predicate bit and replicate to MO_64. */ 7468 tcg_gen_shr_i64(tmp, tmp, dbit); 7469 tcg_gen_andi_i64(tmp, tmp, 1); 7470 tcg_gen_neg_i64(tmp, tmp); 7471 7472 /* Apply to either copy the source, or write zeros. */ 7473 tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd), 7474 pred_full_reg_offset(s, a->pn), tmp, pl, pl); 7475 7476 tcg_temp_free_i64(tmp); 7477 tcg_temp_free_i64(dbit); 7478 tcg_temp_free_i64(didx); 7479 tcg_temp_free_ptr(ptr); 7480 return true; 7481 } 7482 7483 static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7484 { 7485 tcg_gen_smax_i32(d, a, n); 7486 tcg_gen_smin_i32(d, d, m); 7487 } 7488 7489 static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7490 { 7491 tcg_gen_smax_i64(d, a, n); 7492 tcg_gen_smin_i64(d, d, m); 7493 } 7494 7495 static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7496 TCGv_vec m, TCGv_vec a) 7497 { 7498 tcg_gen_smax_vec(vece, d, a, n); 7499 tcg_gen_smin_vec(vece, d, d, m); 7500 } 7501 7502 static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 7503 uint32_t a, uint32_t oprsz, uint32_t maxsz) 7504 { 7505 static const TCGOpcode vecop[] = { 7506 INDEX_op_smin_vec, INDEX_op_smax_vec, 0 7507 }; 7508 static const GVecGen4 ops[4] = { 7509 { .fniv = gen_sclamp_vec, 7510 .fno = gen_helper_gvec_sclamp_b, 7511 .opt_opc = vecop, 7512 .vece = MO_8 }, 7513 { .fniv = gen_sclamp_vec, 7514 .fno = gen_helper_gvec_sclamp_h, 7515 .opt_opc = vecop, 7516 .vece = MO_16 }, 7517 { .fni4 = gen_sclamp_i32, 7518 .fniv = gen_sclamp_vec, 7519 .fno = gen_helper_gvec_sclamp_s, 7520 .opt_opc = vecop, 7521 .vece = MO_32 }, 7522 { .fni8 = gen_sclamp_i64, 7523 .fniv = gen_sclamp_vec, 7524 .fno = gen_helper_gvec_sclamp_d, 7525 .opt_opc = vecop, 7526 .vece = MO_64, 7527 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 7528 }; 7529 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 7530 } 7531 7532 TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a) 7533 7534 static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7535 { 7536 tcg_gen_umax_i32(d, a, n); 7537 tcg_gen_umin_i32(d, d, m); 7538 } 7539 7540 static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7541 { 7542 tcg_gen_umax_i64(d, a, n); 7543 tcg_gen_umin_i64(d, d, m); 7544 } 7545 7546 static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7547 TCGv_vec m, TCGv_vec a) 7548 { 7549 tcg_gen_umax_vec(vece, d, a, n); 7550 tcg_gen_umin_vec(vece, d, d, m); 7551 } 7552 7553 static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 7554 uint32_t a, uint32_t oprsz, uint32_t maxsz) 7555 { 7556 static const TCGOpcode vecop[] = { 7557 INDEX_op_umin_vec, INDEX_op_umax_vec, 0 7558 }; 7559 static const GVecGen4 ops[4] = { 7560 { .fniv = gen_uclamp_vec, 7561 .fno = gen_helper_gvec_uclamp_b, 7562 .opt_opc = vecop, 7563 .vece = MO_8 }, 7564 { .fniv = gen_uclamp_vec, 7565 .fno = gen_helper_gvec_uclamp_h, 7566 .opt_opc = vecop, 7567 .vece = MO_16 }, 7568 { .fni4 = gen_uclamp_i32, 7569 .fniv = gen_uclamp_vec, 7570 .fno = gen_helper_gvec_uclamp_s, 7571 .opt_opc = vecop, 7572 .vece = MO_32 }, 7573 { .fni8 = gen_uclamp_i64, 7574 .fniv = gen_uclamp_vec, 7575 .fno = gen_helper_gvec_uclamp_d, 7576 .opt_opc = vecop, 7577 .vece = MO_64, 7578 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 7579 }; 7580 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 7581 } 7582 7583 TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a) 7584