1/* 2 * Initial TCG Implementation for aarch64 3 * 4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH 5 * Written by Claudio Fontana 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or 8 * (at your option) any later version. 9 * 10 * See the COPYING file in the top-level directory for details. 11 */ 12 13#include "../tcg-ldst.c.inc" 14#include "../tcg-pool.c.inc" 15#include "qemu/bitops.h" 16 17/* We're going to re-use TCGType in setting of the SF bit, which controls 18 the size of the operation performed. If we know the values match, it 19 makes things much cleaner. */ 20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); 21 22#ifdef CONFIG_DEBUG_TCG 23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 24 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 25 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 26 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 27 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp", 28 29 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 30 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 31 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 32 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31", 33}; 34#endif /* CONFIG_DEBUG_TCG */ 35 36static const int tcg_target_reg_alloc_order[] = { 37 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, 38 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, 39 TCG_REG_X28, /* we will reserve this for guest_base if configured */ 40 41 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, 42 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, 43 44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, 46 47 /* X16 reserved as temporary */ 48 /* X17 reserved as temporary */ 49 /* X18 reserved by system */ 50 /* X19 reserved for AREG0 */ 51 /* X29 reserved as fp */ 52 /* X30 reserved as temporary */ 53 54 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, 55 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, 56 /* V8 - V15 are call-saved, and skipped. */ 57 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, 58 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, 59 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, 60 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, 61}; 62 63static const int tcg_target_call_iarg_regs[8] = { 64 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 65 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 66}; 67 68static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) 69{ 70 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); 71 tcg_debug_assert(slot >= 0 && slot <= 1); 72 return TCG_REG_X0 + slot; 73} 74 75#define TCG_REG_TMP0 TCG_REG_X16 76#define TCG_REG_TMP1 TCG_REG_X17 77#define TCG_REG_TMP2 TCG_REG_X30 78#define TCG_VEC_TMP0 TCG_REG_V31 79 80#define TCG_REG_GUEST_BASE TCG_REG_X28 81 82static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 83{ 84 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 85 ptrdiff_t offset = target - src_rx; 86 87 if (offset == sextract64(offset, 0, 26)) { 88 /* read instruction, mask away previous PC_REL26 parameter contents, 89 set the proper offset, then write back the instruction. */ 90 *src_rw = deposit32(*src_rw, 0, 26, offset); 91 return true; 92 } 93 return false; 94} 95 96static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 97{ 98 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 99 ptrdiff_t offset = target - src_rx; 100 101 if (offset == sextract64(offset, 0, 19)) { 102 *src_rw = deposit32(*src_rw, 5, 19, offset); 103 return true; 104 } 105 return false; 106} 107 108static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 109{ 110 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 111 ptrdiff_t offset = target - src_rx; 112 113 if (offset == sextract64(offset, 0, 14)) { 114 *src_rw = deposit32(*src_rw, 5, 14, offset); 115 return true; 116 } 117 return false; 118} 119 120static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 121 intptr_t value, intptr_t addend) 122{ 123 tcg_debug_assert(addend == 0); 124 switch (type) { 125 case R_AARCH64_JUMP26: 126 case R_AARCH64_CALL26: 127 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value); 128 case R_AARCH64_CONDBR19: 129 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value); 130 case R_AARCH64_TSTBR14: 131 return reloc_pc14(code_ptr, (const tcg_insn_unit *)value); 132 default: 133 g_assert_not_reached(); 134 } 135} 136 137#define TCG_CT_CONST_AIMM 0x100 138#define TCG_CT_CONST_LIMM 0x200 139#define TCG_CT_CONST_ZERO 0x400 140#define TCG_CT_CONST_MONE 0x800 141#define TCG_CT_CONST_ORRI 0x1000 142#define TCG_CT_CONST_ANDI 0x2000 143#define TCG_CT_CONST_CMP 0x4000 144 145#define ALL_GENERAL_REGS 0xffffffffu 146#define ALL_VECTOR_REGS 0xffffffff00000000ull 147 148/* Match a constant valid for addition (12-bit, optionally shifted). */ 149static inline bool is_aimm(uint64_t val) 150{ 151 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; 152} 153 154/* Match a constant valid for logical operations. */ 155static inline bool is_limm(uint64_t val) 156{ 157 /* Taking a simplified view of the logical immediates for now, ignoring 158 the replication that can happen across the field. Match bit patterns 159 of the forms 160 0....01....1 161 0..01..10..0 162 and their inverses. */ 163 164 /* Make things easier below, by testing the form with msb clear. */ 165 if ((int64_t)val < 0) { 166 val = ~val; 167 } 168 if (val == 0) { 169 return false; 170 } 171 val += val & -val; 172 return (val & (val - 1)) == 0; 173} 174 175/* Return true if v16 is a valid 16-bit shifted immediate. */ 176static bool is_shimm16(uint16_t v16, int *cmode, int *imm8) 177{ 178 if (v16 == (v16 & 0xff)) { 179 *cmode = 0x8; 180 *imm8 = v16 & 0xff; 181 return true; 182 } else if (v16 == (v16 & 0xff00)) { 183 *cmode = 0xa; 184 *imm8 = v16 >> 8; 185 return true; 186 } 187 return false; 188} 189 190/* Return true if v32 is a valid 32-bit shifted immediate. */ 191static bool is_shimm32(uint32_t v32, int *cmode, int *imm8) 192{ 193 if (v32 == (v32 & 0xff)) { 194 *cmode = 0x0; 195 *imm8 = v32 & 0xff; 196 return true; 197 } else if (v32 == (v32 & 0xff00)) { 198 *cmode = 0x2; 199 *imm8 = (v32 >> 8) & 0xff; 200 return true; 201 } else if (v32 == (v32 & 0xff0000)) { 202 *cmode = 0x4; 203 *imm8 = (v32 >> 16) & 0xff; 204 return true; 205 } else if (v32 == (v32 & 0xff000000)) { 206 *cmode = 0x6; 207 *imm8 = v32 >> 24; 208 return true; 209 } 210 return false; 211} 212 213/* Return true if v32 is a valid 32-bit shifting ones immediate. */ 214static bool is_soimm32(uint32_t v32, int *cmode, int *imm8) 215{ 216 if ((v32 & 0xffff00ff) == 0xff) { 217 *cmode = 0xc; 218 *imm8 = (v32 >> 8) & 0xff; 219 return true; 220 } else if ((v32 & 0xff00ffff) == 0xffff) { 221 *cmode = 0xd; 222 *imm8 = (v32 >> 16) & 0xff; 223 return true; 224 } 225 return false; 226} 227 228/* Return true if v32 is a valid float32 immediate. */ 229static bool is_fimm32(uint32_t v32, int *cmode, int *imm8) 230{ 231 if (extract32(v32, 0, 19) == 0 232 && (extract32(v32, 25, 6) == 0x20 233 || extract32(v32, 25, 6) == 0x1f)) { 234 *cmode = 0xf; 235 *imm8 = (extract32(v32, 31, 1) << 7) 236 | (extract32(v32, 25, 1) << 6) 237 | extract32(v32, 19, 6); 238 return true; 239 } 240 return false; 241} 242 243/* Return true if v64 is a valid float64 immediate. */ 244static bool is_fimm64(uint64_t v64, int *cmode, int *imm8) 245{ 246 if (extract64(v64, 0, 48) == 0 247 && (extract64(v64, 54, 9) == 0x100 248 || extract64(v64, 54, 9) == 0x0ff)) { 249 *cmode = 0xf; 250 *imm8 = (extract64(v64, 63, 1) << 7) 251 | (extract64(v64, 54, 1) << 6) 252 | extract64(v64, 48, 6); 253 return true; 254 } 255 return false; 256} 257 258/* 259 * Return non-zero if v32 can be formed by MOVI+ORR. 260 * Place the parameters for MOVI in (cmode, imm8). 261 * Return the cmode for ORR; the imm8 can be had via extraction from v32. 262 */ 263static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8) 264{ 265 int i; 266 267 for (i = 6; i > 0; i -= 2) { 268 /* Mask out one byte we can add with ORR. */ 269 uint32_t tmp = v32 & ~(0xffu << (i * 4)); 270 if (is_shimm32(tmp, cmode, imm8) || 271 is_soimm32(tmp, cmode, imm8)) { 272 break; 273 } 274 } 275 return i; 276} 277 278/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */ 279static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8) 280{ 281 if (v32 == deposit32(v32, 16, 16, v32)) { 282 return is_shimm16(v32, cmode, imm8); 283 } else { 284 return is_shimm32(v32, cmode, imm8); 285 } 286} 287 288static bool tcg_target_const_match(int64_t val, int ct, 289 TCGType type, TCGCond cond, int vece) 290{ 291 if (ct & TCG_CT_CONST) { 292 return 1; 293 } 294 if (type == TCG_TYPE_I32) { 295 val = (int32_t)val; 296 } 297 298 if (ct & TCG_CT_CONST_CMP) { 299 if (is_tst_cond(cond)) { 300 ct |= TCG_CT_CONST_LIMM; 301 } else { 302 ct |= TCG_CT_CONST_AIMM; 303 } 304 } 305 306 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { 307 return 1; 308 } 309 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { 310 return 1; 311 } 312 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 313 return 1; 314 } 315 if ((ct & TCG_CT_CONST_MONE) && val == -1) { 316 return 1; 317 } 318 319 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) { 320 case 0: 321 break; 322 case TCG_CT_CONST_ANDI: 323 val = ~val; 324 /* fallthru */ 325 case TCG_CT_CONST_ORRI: 326 if (val == deposit64(val, 32, 32, val)) { 327 int cmode, imm8; 328 return is_shimm1632(val, &cmode, &imm8); 329 } 330 break; 331 default: 332 /* Both bits should not be set for the same insn. */ 333 g_assert_not_reached(); 334 } 335 336 return 0; 337} 338 339enum aarch64_cond_code { 340 COND_EQ = 0x0, 341 COND_NE = 0x1, 342 COND_CS = 0x2, /* Unsigned greater or equal */ 343 COND_HS = COND_CS, /* ALIAS greater or equal */ 344 COND_CC = 0x3, /* Unsigned less than */ 345 COND_LO = COND_CC, /* ALIAS Lower */ 346 COND_MI = 0x4, /* Negative */ 347 COND_PL = 0x5, /* Zero or greater */ 348 COND_VS = 0x6, /* Overflow */ 349 COND_VC = 0x7, /* No overflow */ 350 COND_HI = 0x8, /* Unsigned greater than */ 351 COND_LS = 0x9, /* Unsigned less or equal */ 352 COND_GE = 0xa, 353 COND_LT = 0xb, 354 COND_GT = 0xc, 355 COND_LE = 0xd, 356 COND_AL = 0xe, 357 COND_NV = 0xf, /* behaves like COND_AL here */ 358}; 359 360static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { 361 [TCG_COND_EQ] = COND_EQ, 362 [TCG_COND_NE] = COND_NE, 363 [TCG_COND_LT] = COND_LT, 364 [TCG_COND_GE] = COND_GE, 365 [TCG_COND_LE] = COND_LE, 366 [TCG_COND_GT] = COND_GT, 367 /* unsigned */ 368 [TCG_COND_LTU] = COND_LO, 369 [TCG_COND_GTU] = COND_HI, 370 [TCG_COND_GEU] = COND_HS, 371 [TCG_COND_LEU] = COND_LS, 372 /* bit test */ 373 [TCG_COND_TSTEQ] = COND_EQ, 374 [TCG_COND_TSTNE] = COND_NE, 375}; 376 377typedef enum { 378 LDST_ST = 0, /* store */ 379 LDST_LD = 1, /* load */ 380 LDST_LD_S_X = 2, /* load and sign-extend into Xt */ 381 LDST_LD_S_W = 3, /* load and sign-extend into Wt */ 382} AArch64LdstType; 383 384/* We encode the format of the insn into the beginning of the name, so that 385 we can have the preprocessor help "typecheck" the insn vs the output 386 function. Arm didn't provide us with nice names for the formats, so we 387 use the section number of the architecture reference manual in which the 388 instruction group is described. */ 389typedef enum { 390 /* Compare and branch (immediate). */ 391 I3201_CBZ = 0x34000000, 392 I3201_CBNZ = 0x35000000, 393 394 /* Conditional branch (immediate). */ 395 I3202_B_C = 0x54000000, 396 397 /* Test and branch (immediate). */ 398 I3205_TBZ = 0x36000000, 399 I3205_TBNZ = 0x37000000, 400 401 /* Unconditional branch (immediate). */ 402 I3206_B = 0x14000000, 403 I3206_BL = 0x94000000, 404 405 /* Unconditional branch (register). */ 406 I3207_BR = 0xd61f0000, 407 I3207_BLR = 0xd63f0000, 408 I3207_RET = 0xd65f0000, 409 410 /* AdvSIMD load/store single structure. */ 411 I3303_LD1R = 0x0d40c000, 412 413 /* Load literal for loading the address at pc-relative offset */ 414 I3305_LDR = 0x58000000, 415 I3305_LDR_v64 = 0x5c000000, 416 I3305_LDR_v128 = 0x9c000000, 417 418 /* Load/store exclusive. */ 419 I3306_LDXP = 0xc8600000, 420 I3306_STXP = 0xc8200000, 421 422 /* Load/store register. Described here as 3.3.12, but the helper 423 that emits them can transform to 3.3.10 or 3.3.13. */ 424 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, 425 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, 426 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, 427 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, 428 429 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, 430 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, 431 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, 432 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, 433 434 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, 435 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, 436 437 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, 438 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, 439 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, 440 441 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30, 442 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30, 443 444 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30, 445 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30, 446 447 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30, 448 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30, 449 450 I3312_TO_I3310 = 0x00200800, 451 I3312_TO_I3313 = 0x01000000, 452 453 /* Load/store register pair instructions. */ 454 I3314_LDP = 0x28400000, 455 I3314_STP = 0x28000000, 456 457 /* Add/subtract immediate instructions. */ 458 I3401_ADDI = 0x11000000, 459 I3401_ADDSI = 0x31000000, 460 I3401_SUBI = 0x51000000, 461 I3401_SUBSI = 0x71000000, 462 463 /* Bitfield instructions. */ 464 I3402_BFM = 0x33000000, 465 I3402_SBFM = 0x13000000, 466 I3402_UBFM = 0x53000000, 467 468 /* Extract instruction. */ 469 I3403_EXTR = 0x13800000, 470 471 /* Logical immediate instructions. */ 472 I3404_ANDI = 0x12000000, 473 I3404_ORRI = 0x32000000, 474 I3404_EORI = 0x52000000, 475 I3404_ANDSI = 0x72000000, 476 477 /* Move wide immediate instructions. */ 478 I3405_MOVN = 0x12800000, 479 I3405_MOVZ = 0x52800000, 480 I3405_MOVK = 0x72800000, 481 482 /* PC relative addressing instructions. */ 483 I3406_ADR = 0x10000000, 484 I3406_ADRP = 0x90000000, 485 486 /* Add/subtract extended register instructions. */ 487 I3501_ADD = 0x0b200000, 488 489 /* Add/subtract shifted register instructions (without a shift). */ 490 I3502_ADD = 0x0b000000, 491 I3502_ADDS = 0x2b000000, 492 I3502_SUB = 0x4b000000, 493 I3502_SUBS = 0x6b000000, 494 495 /* Add/subtract shifted register instructions (with a shift). */ 496 I3502S_ADD_LSL = I3502_ADD, 497 498 /* Add/subtract with carry instructions. */ 499 I3503_ADC = 0x1a000000, 500 I3503_SBC = 0x5a000000, 501 502 /* Conditional select instructions. */ 503 I3506_CSEL = 0x1a800000, 504 I3506_CSINC = 0x1a800400, 505 I3506_CSINV = 0x5a800000, 506 I3506_CSNEG = 0x5a800400, 507 508 /* Data-processing (1 source) instructions. */ 509 I3507_CLZ = 0x5ac01000, 510 I3507_RBIT = 0x5ac00000, 511 I3507_REV = 0x5ac00000, /* + size << 10 */ 512 513 /* Data-processing (2 source) instructions. */ 514 I3508_LSLV = 0x1ac02000, 515 I3508_LSRV = 0x1ac02400, 516 I3508_ASRV = 0x1ac02800, 517 I3508_RORV = 0x1ac02c00, 518 I3508_SMULH = 0x9b407c00, 519 I3508_UMULH = 0x9bc07c00, 520 I3508_UDIV = 0x1ac00800, 521 I3508_SDIV = 0x1ac00c00, 522 523 /* Data-processing (3 source) instructions. */ 524 I3509_MADD = 0x1b000000, 525 I3509_MSUB = 0x1b008000, 526 527 /* Logical shifted register instructions (without a shift). */ 528 I3510_AND = 0x0a000000, 529 I3510_BIC = 0x0a200000, 530 I3510_ORR = 0x2a000000, 531 I3510_ORN = 0x2a200000, 532 I3510_EOR = 0x4a000000, 533 I3510_EON = 0x4a200000, 534 I3510_ANDS = 0x6a000000, 535 536 /* Logical shifted register instructions (with a shift). */ 537 I3502S_AND_LSR = I3510_AND | (1 << 22), 538 539 /* AdvSIMD copy */ 540 I3605_DUP = 0x0e000400, 541 I3605_INS = 0x4e001c00, 542 I3605_UMOV = 0x0e003c00, 543 544 /* AdvSIMD modified immediate */ 545 I3606_MOVI = 0x0f000400, 546 I3606_MVNI = 0x2f000400, 547 I3606_BIC = 0x2f001400, 548 I3606_ORR = 0x0f001400, 549 550 /* AdvSIMD scalar shift by immediate */ 551 I3609_SSHR = 0x5f000400, 552 I3609_SSRA = 0x5f001400, 553 I3609_SHL = 0x5f005400, 554 I3609_USHR = 0x7f000400, 555 I3609_USRA = 0x7f001400, 556 I3609_SLI = 0x7f005400, 557 558 /* AdvSIMD scalar three same */ 559 I3611_SQADD = 0x5e200c00, 560 I3611_SQSUB = 0x5e202c00, 561 I3611_CMGT = 0x5e203400, 562 I3611_CMGE = 0x5e203c00, 563 I3611_SSHL = 0x5e204400, 564 I3611_ADD = 0x5e208400, 565 I3611_CMTST = 0x5e208c00, 566 I3611_UQADD = 0x7e200c00, 567 I3611_UQSUB = 0x7e202c00, 568 I3611_CMHI = 0x7e203400, 569 I3611_CMHS = 0x7e203c00, 570 I3611_USHL = 0x7e204400, 571 I3611_SUB = 0x7e208400, 572 I3611_CMEQ = 0x7e208c00, 573 574 /* AdvSIMD scalar two-reg misc */ 575 I3612_CMGT0 = 0x5e208800, 576 I3612_CMEQ0 = 0x5e209800, 577 I3612_CMLT0 = 0x5e20a800, 578 I3612_ABS = 0x5e20b800, 579 I3612_CMGE0 = 0x7e208800, 580 I3612_CMLE0 = 0x7e209800, 581 I3612_NEG = 0x7e20b800, 582 583 /* AdvSIMD shift by immediate */ 584 I3614_SSHR = 0x0f000400, 585 I3614_SSRA = 0x0f001400, 586 I3614_SHL = 0x0f005400, 587 I3614_SLI = 0x2f005400, 588 I3614_USHR = 0x2f000400, 589 I3614_USRA = 0x2f001400, 590 591 /* AdvSIMD three same. */ 592 I3616_ADD = 0x0e208400, 593 I3616_AND = 0x0e201c00, 594 I3616_BIC = 0x0e601c00, 595 I3616_BIF = 0x2ee01c00, 596 I3616_BIT = 0x2ea01c00, 597 I3616_BSL = 0x2e601c00, 598 I3616_EOR = 0x2e201c00, 599 I3616_MUL = 0x0e209c00, 600 I3616_ORR = 0x0ea01c00, 601 I3616_ORN = 0x0ee01c00, 602 I3616_SUB = 0x2e208400, 603 I3616_CMGT = 0x0e203400, 604 I3616_CMGE = 0x0e203c00, 605 I3616_CMTST = 0x0e208c00, 606 I3616_CMHI = 0x2e203400, 607 I3616_CMHS = 0x2e203c00, 608 I3616_CMEQ = 0x2e208c00, 609 I3616_SMAX = 0x0e206400, 610 I3616_SMIN = 0x0e206c00, 611 I3616_SSHL = 0x0e204400, 612 I3616_SQADD = 0x0e200c00, 613 I3616_SQSUB = 0x0e202c00, 614 I3616_UMAX = 0x2e206400, 615 I3616_UMIN = 0x2e206c00, 616 I3616_UQADD = 0x2e200c00, 617 I3616_UQSUB = 0x2e202c00, 618 I3616_USHL = 0x2e204400, 619 620 /* AdvSIMD two-reg misc. */ 621 I3617_CMGT0 = 0x0e208800, 622 I3617_CMEQ0 = 0x0e209800, 623 I3617_CMLT0 = 0x0e20a800, 624 I3617_CMGE0 = 0x2e208800, 625 I3617_CMLE0 = 0x2e209800, 626 I3617_NOT = 0x2e205800, 627 I3617_ABS = 0x0e20b800, 628 I3617_NEG = 0x2e20b800, 629 630 /* System instructions. */ 631 NOP = 0xd503201f, 632 DMB_ISH = 0xd50338bf, 633 DMB_LD = 0x00000100, 634 DMB_ST = 0x00000200, 635 636 BTI_C = 0xd503245f, 637 BTI_J = 0xd503249f, 638 BTI_JC = 0xd50324df, 639} AArch64Insn; 640 641static inline uint32_t tcg_in32(TCGContext *s) 642{ 643 uint32_t v = *(uint32_t *)s->code_ptr; 644 return v; 645} 646 647/* Emit an opcode with "type-checking" of the format. */ 648#define tcg_out_insn(S, FMT, OP, ...) \ 649 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) 650 651static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q, 652 TCGReg rt, TCGReg rn, unsigned size) 653{ 654 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30)); 655} 656 657static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, 658 int imm19, TCGReg rt) 659{ 660 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt); 661} 662 663static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs, 664 TCGReg rt, TCGReg rt2, TCGReg rn) 665{ 666 tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt); 667} 668 669static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, 670 TCGReg rt, int imm19) 671{ 672 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); 673} 674 675static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, 676 TCGCond c, int imm19) 677{ 678 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); 679} 680 681static void tcg_out_insn_3205(TCGContext *s, AArch64Insn insn, 682 TCGReg rt, int imm6, int imm14) 683{ 684 insn |= (imm6 & 0x20) << (31 - 5); 685 insn |= (imm6 & 0x1f) << 19; 686 tcg_out32(s, insn | (imm14 & 0x3fff) << 5 | rt); 687} 688 689static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) 690{ 691 tcg_out32(s, insn | (imm26 & 0x03ffffff)); 692} 693 694static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) 695{ 696 tcg_out32(s, insn | rn << 5); 697} 698 699static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, 700 TCGReg r1, TCGReg r2, TCGReg rn, 701 tcg_target_long ofs, bool pre, bool w) 702{ 703 insn |= 1u << 31; /* ext */ 704 insn |= pre << 24; 705 insn |= w << 23; 706 707 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); 708 insn |= (ofs & (0x7f << 3)) << (15 - 3); 709 710 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); 711} 712 713static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, 714 TCGReg rd, TCGReg rn, uint64_t aimm) 715{ 716 if (aimm > 0xfff) { 717 tcg_debug_assert((aimm & 0xfff) == 0); 718 aimm >>= 12; 719 tcg_debug_assert(aimm <= 0xfff); 720 aimm |= 1 << 12; /* apply LSL 12 */ 721 } 722 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); 723} 724 725/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 726 (Logical immediate). Both insn groups have N, IMMR and IMMS fields 727 that feed the DecodeBitMasks pseudo function. */ 728static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, 729 TCGReg rd, TCGReg rn, int n, int immr, int imms) 730{ 731 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 732 | rn << 5 | rd); 733} 734 735#define tcg_out_insn_3404 tcg_out_insn_3402 736 737static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, 738 TCGReg rd, TCGReg rn, TCGReg rm, int imms) 739{ 740 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 741 | rn << 5 | rd); 742} 743 744/* This function is used for the Move (wide immediate) instruction group. 745 Note that SHIFT is a full shift count, not the 2 bit HW field. */ 746static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, 747 TCGReg rd, uint16_t half, unsigned shift) 748{ 749 tcg_debug_assert((shift & ~0x30) == 0); 750 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); 751} 752 753static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, 754 TCGReg rd, int64_t disp) 755{ 756 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); 757} 758 759static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn, 760 TCGType sf, TCGReg rd, TCGReg rn, 761 TCGReg rm, int opt, int imm3) 762{ 763 tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 | 764 imm3 << 10 | rn << 5 | rd); 765} 766 767/* This function is for both 3.5.2 (Add/Subtract shifted register), for 768 the rare occasion when we actually want to supply a shift amount. */ 769static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, 770 TCGType ext, TCGReg rd, TCGReg rn, 771 TCGReg rm, int imm6) 772{ 773 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); 774} 775 776/* This function is for 3.5.2 (Add/subtract shifted register), 777 and 3.5.10 (Logical shifted register), for the vast majorty of cases 778 when we don't want to apply a shift. Thus it can also be used for 779 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */ 780static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, 781 TCGReg rd, TCGReg rn, TCGReg rm) 782{ 783 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); 784} 785 786#define tcg_out_insn_3503 tcg_out_insn_3502 787#define tcg_out_insn_3508 tcg_out_insn_3502 788#define tcg_out_insn_3510 tcg_out_insn_3502 789 790static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, 791 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) 792{ 793 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd 794 | tcg_cond_to_aarch64[c] << 12); 795} 796 797static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, 798 TCGReg rd, TCGReg rn) 799{ 800 tcg_out32(s, insn | ext << 31 | rn << 5 | rd); 801} 802 803static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, 804 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) 805{ 806 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); 807} 808 809static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q, 810 TCGReg rd, TCGReg rn, int dst_idx, int src_idx) 811{ 812 /* Note that bit 11 set means general register input. Therefore 813 we can handle both register sets with one function. */ 814 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11) 815 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5); 816} 817 818static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q, 819 TCGReg rd, bool op, int cmode, uint8_t imm8) 820{ 821 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f) 822 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5); 823} 824 825static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn, 826 TCGReg rd, TCGReg rn, unsigned immhb) 827{ 828 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f)); 829} 830 831static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn, 832 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 833{ 834 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16 835 | (rn & 0x1f) << 5 | (rd & 0x1f)); 836} 837 838static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn, 839 unsigned size, TCGReg rd, TCGReg rn) 840{ 841 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f)); 842} 843 844static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q, 845 TCGReg rd, TCGReg rn, unsigned immhb) 846{ 847 tcg_out32(s, insn | q << 30 | immhb << 16 848 | (rn & 0x1f) << 5 | (rd & 0x1f)); 849} 850 851static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q, 852 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 853{ 854 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16 855 | (rn & 0x1f) << 5 | (rd & 0x1f)); 856} 857 858static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q, 859 unsigned size, TCGReg rd, TCGReg rn) 860{ 861 tcg_out32(s, insn | q << 30 | (size << 22) 862 | (rn & 0x1f) << 5 | (rd & 0x1f)); 863} 864 865static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, 866 TCGReg rd, TCGReg base, TCGType ext, 867 TCGReg regoff) 868{ 869 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 870 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | 871 0x4000 | ext << 13 | base << 5 | (rd & 0x1f)); 872} 873 874static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, 875 TCGReg rd, TCGReg rn, intptr_t offset) 876{ 877 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f)); 878} 879 880static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, 881 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) 882{ 883 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 884 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 885 | rn << 5 | (rd & 0x1f)); 886} 887 888static void tcg_out_bti(TCGContext *s, AArch64Insn insn) 889{ 890 /* 891 * While BTI insns are nops on hosts without FEAT_BTI, 892 * there is no point in emitting them in that case either. 893 */ 894 if (cpuinfo & CPUINFO_BTI) { 895 tcg_out32(s, insn); 896 } 897} 898 899/* Register to register move using ORR (shifted register with no shift). */ 900static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) 901{ 902 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); 903} 904 905/* Register to register move using ADDI (move to/from SP). */ 906static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) 907{ 908 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); 909} 910 911/* This function is used for the Logical (immediate) instruction group. 912 The value of LIMM must satisfy IS_LIMM. See the comment above about 913 only supporting simplified logical immediates. */ 914static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, 915 TCGReg rd, TCGReg rn, uint64_t limm) 916{ 917 unsigned h, l, r, c; 918 919 tcg_debug_assert(is_limm(limm)); 920 921 h = clz64(limm); 922 l = ctz64(limm); 923 if (l == 0) { 924 r = 0; /* form 0....01....1 */ 925 c = ctz64(~limm) - 1; 926 if (h == 0) { 927 r = clz64(~limm); /* form 1..10..01..1 */ 928 c += r; 929 } 930 } else { 931 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ 932 c = r - h - 1; 933 } 934 if (ext == TCG_TYPE_I32) { 935 r &= 31; 936 c &= 31; 937 } 938 939 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); 940} 941 942static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 943 TCGReg rd, int64_t v64) 944{ 945 bool q = type == TCG_TYPE_V128; 946 int cmode, imm8, i; 947 948 /* Test all bytes equal first. */ 949 if (vece == MO_8) { 950 imm8 = (uint8_t)v64; 951 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8); 952 return; 953 } 954 955 /* 956 * Test all bytes 0x00 or 0xff second. This can match cases that 957 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below. 958 */ 959 for (i = imm8 = 0; i < 8; i++) { 960 uint8_t byte = v64 >> (i * 8); 961 if (byte == 0xff) { 962 imm8 |= 1 << i; 963 } else if (byte != 0) { 964 goto fail_bytes; 965 } 966 } 967 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8); 968 return; 969 fail_bytes: 970 971 /* 972 * Tests for various replications. For each element width, if we 973 * cannot find an expansion there's no point checking a larger 974 * width because we already know by replication it cannot match. 975 */ 976 if (vece == MO_16) { 977 uint16_t v16 = v64; 978 979 if (is_shimm16(v16, &cmode, &imm8)) { 980 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 981 return; 982 } 983 if (is_shimm16(~v16, &cmode, &imm8)) { 984 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 985 return; 986 } 987 988 /* 989 * Otherwise, all remaining constants can be loaded in two insns: 990 * rd = v16 & 0xff, rd |= v16 & 0xff00. 991 */ 992 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff); 993 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8); 994 return; 995 } else if (vece == MO_32) { 996 uint32_t v32 = v64; 997 uint32_t n32 = ~v32; 998 999 if (is_shimm32(v32, &cmode, &imm8) || 1000 is_soimm32(v32, &cmode, &imm8) || 1001 is_fimm32(v32, &cmode, &imm8)) { 1002 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 1003 return; 1004 } 1005 if (is_shimm32(n32, &cmode, &imm8) || 1006 is_soimm32(n32, &cmode, &imm8)) { 1007 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 1008 return; 1009 } 1010 1011 /* 1012 * Restrict the set of constants to those we can load with 1013 * two instructions. Others we load from the pool. 1014 */ 1015 i = is_shimm32_pair(v32, &cmode, &imm8); 1016 if (i) { 1017 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 1018 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8)); 1019 return; 1020 } 1021 i = is_shimm32_pair(n32, &cmode, &imm8); 1022 if (i) { 1023 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 1024 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8)); 1025 return; 1026 } 1027 } else if (is_fimm64(v64, &cmode, &imm8)) { 1028 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8); 1029 return; 1030 } 1031 1032 /* 1033 * As a last resort, load from the constant pool. Sadly there 1034 * is no LD1R (literal), so store the full 16-byte vector. 1035 */ 1036 if (type == TCG_TYPE_V128) { 1037 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64); 1038 tcg_out_insn(s, 3305, LDR_v128, 0, rd); 1039 } else { 1040 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0); 1041 tcg_out_insn(s, 3305, LDR_v64, 0, rd); 1042 } 1043} 1044 1045static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 1046 TCGReg rd, TCGReg rs) 1047{ 1048 int is_q = type - TCG_TYPE_V64; 1049 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0); 1050 return true; 1051} 1052 1053static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 1054 TCGReg r, TCGReg base, intptr_t offset) 1055{ 1056 TCGReg temp = TCG_REG_TMP0; 1057 1058 if (offset < -0xffffff || offset > 0xffffff) { 1059 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset); 1060 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base); 1061 base = temp; 1062 } else { 1063 AArch64Insn add_insn = I3401_ADDI; 1064 1065 if (offset < 0) { 1066 add_insn = I3401_SUBI; 1067 offset = -offset; 1068 } 1069 if (offset & 0xfff000) { 1070 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000); 1071 base = temp; 1072 } 1073 if (offset & 0xfff) { 1074 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff); 1075 base = temp; 1076 } 1077 } 1078 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece); 1079 return true; 1080} 1081 1082static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 1083 tcg_target_long value) 1084{ 1085 tcg_target_long svalue = value; 1086 tcg_target_long ivalue = ~value; 1087 tcg_target_long t0, t1, t2; 1088 int s0, s1; 1089 AArch64Insn opc; 1090 1091 switch (type) { 1092 case TCG_TYPE_I32: 1093 case TCG_TYPE_I64: 1094 tcg_debug_assert(rd < 32); 1095 break; 1096 default: 1097 g_assert_not_reached(); 1098 } 1099 1100 /* For 32-bit values, discard potential garbage in value. For 64-bit 1101 values within [2**31, 2**32-1], we can create smaller sequences by 1102 interpreting this as a negative 32-bit number, while ensuring that 1103 the high 32 bits are cleared by setting SF=0. */ 1104 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { 1105 svalue = (int32_t)value; 1106 value = (uint32_t)value; 1107 ivalue = (uint32_t)ivalue; 1108 type = TCG_TYPE_I32; 1109 } 1110 1111 /* Speed things up by handling the common case of small positive 1112 and negative values specially. */ 1113 if ((value & ~0xffffull) == 0) { 1114 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); 1115 return; 1116 } else if ((ivalue & ~0xffffull) == 0) { 1117 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); 1118 return; 1119 } 1120 1121 /* Check for bitfield immediates. For the benefit of 32-bit quantities, 1122 use the sign-extended value. That lets us match rotated values such 1123 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ 1124 if (is_limm(svalue)) { 1125 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); 1126 return; 1127 } 1128 1129 /* Look for host pointer values within 4G of the PC. This happens 1130 often when loading pointers to QEMU's own data structures. */ 1131 if (type == TCG_TYPE_I64) { 1132 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr); 1133 tcg_target_long disp = value - src_rx; 1134 if (disp == sextract64(disp, 0, 21)) { 1135 tcg_out_insn(s, 3406, ADR, rd, disp); 1136 return; 1137 } 1138 disp = (value >> 12) - (src_rx >> 12); 1139 if (disp == sextract64(disp, 0, 21)) { 1140 tcg_out_insn(s, 3406, ADRP, rd, disp); 1141 if (value & 0xfff) { 1142 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); 1143 } 1144 return; 1145 } 1146 } 1147 1148 /* Would it take fewer insns to begin with MOVN? */ 1149 if (ctpop64(value) >= 32) { 1150 t0 = ivalue; 1151 opc = I3405_MOVN; 1152 } else { 1153 t0 = value; 1154 opc = I3405_MOVZ; 1155 } 1156 s0 = ctz64(t0) & (63 & -16); 1157 t1 = t0 & ~(0xffffull << s0); 1158 s1 = ctz64(t1) & (63 & -16); 1159 t2 = t1 & ~(0xffffull << s1); 1160 if (t2 == 0) { 1161 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0); 1162 if (t1 != 0) { 1163 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1); 1164 } 1165 return; 1166 } 1167 1168 /* For more than 2 insns, dump it into the constant pool. */ 1169 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0); 1170 tcg_out_insn(s, 3305, LDR, 0, rd); 1171} 1172 1173static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) 1174{ 1175 return false; 1176} 1177 1178static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, 1179 tcg_target_long imm) 1180{ 1181 /* This function is only used for passing structs by reference. */ 1182 g_assert_not_reached(); 1183} 1184 1185/* Define something more legible for general use. */ 1186#define tcg_out_ldst_r tcg_out_insn_3310 1187 1188static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd, 1189 TCGReg rn, intptr_t offset, int lgsize) 1190{ 1191 /* If the offset is naturally aligned and in range, then we can 1192 use the scaled uimm12 encoding */ 1193 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) { 1194 uintptr_t scaled_uimm = offset >> lgsize; 1195 if (scaled_uimm <= 0xfff) { 1196 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); 1197 return; 1198 } 1199 } 1200 1201 /* Small signed offsets can use the unscaled encoding. */ 1202 if (offset >= -256 && offset < 256) { 1203 tcg_out_insn_3312(s, insn, rd, rn, offset); 1204 return; 1205 } 1206 1207 /* Worst-case scenario, move offset to temp register, use reg offset. */ 1208 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset); 1209 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0); 1210} 1211 1212static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 1213{ 1214 if (ret == arg) { 1215 return true; 1216 } 1217 switch (type) { 1218 case TCG_TYPE_I32: 1219 case TCG_TYPE_I64: 1220 if (ret < 32 && arg < 32) { 1221 tcg_out_movr(s, type, ret, arg); 1222 break; 1223 } else if (ret < 32) { 1224 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0); 1225 break; 1226 } else if (arg < 32) { 1227 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0); 1228 break; 1229 } 1230 /* FALLTHRU */ 1231 1232 case TCG_TYPE_V64: 1233 tcg_debug_assert(ret >= 32 && arg >= 32); 1234 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg); 1235 break; 1236 case TCG_TYPE_V128: 1237 tcg_debug_assert(ret >= 32 && arg >= 32); 1238 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg); 1239 break; 1240 1241 default: 1242 g_assert_not_reached(); 1243 } 1244 return true; 1245} 1246 1247static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1248 TCGReg base, intptr_t ofs) 1249{ 1250 AArch64Insn insn; 1251 int lgsz; 1252 1253 switch (type) { 1254 case TCG_TYPE_I32: 1255 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS); 1256 lgsz = 2; 1257 break; 1258 case TCG_TYPE_I64: 1259 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD); 1260 lgsz = 3; 1261 break; 1262 case TCG_TYPE_V64: 1263 insn = I3312_LDRVD; 1264 lgsz = 3; 1265 break; 1266 case TCG_TYPE_V128: 1267 insn = I3312_LDRVQ; 1268 lgsz = 4; 1269 break; 1270 default: 1271 g_assert_not_reached(); 1272 } 1273 tcg_out_ldst(s, insn, ret, base, ofs, lgsz); 1274} 1275 1276static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src, 1277 TCGReg base, intptr_t ofs) 1278{ 1279 AArch64Insn insn; 1280 int lgsz; 1281 1282 switch (type) { 1283 case TCG_TYPE_I32: 1284 insn = (src < 32 ? I3312_STRW : I3312_STRVS); 1285 lgsz = 2; 1286 break; 1287 case TCG_TYPE_I64: 1288 insn = (src < 32 ? I3312_STRX : I3312_STRVD); 1289 lgsz = 3; 1290 break; 1291 case TCG_TYPE_V64: 1292 insn = I3312_STRVD; 1293 lgsz = 3; 1294 break; 1295 case TCG_TYPE_V128: 1296 insn = I3312_STRVQ; 1297 lgsz = 4; 1298 break; 1299 default: 1300 g_assert_not_reached(); 1301 } 1302 tcg_out_ldst(s, insn, src, base, ofs, lgsz); 1303} 1304 1305static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1306 TCGReg base, intptr_t ofs) 1307{ 1308 if (type <= TCG_TYPE_I64 && val == 0) { 1309 tcg_out_st(s, type, TCG_REG_XZR, base, ofs); 1310 return true; 1311 } 1312 return false; 1313} 1314 1315static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, 1316 TCGReg rn, unsigned int a, unsigned int b) 1317{ 1318 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); 1319} 1320 1321static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, 1322 TCGReg rn, unsigned int a, unsigned int b) 1323{ 1324 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); 1325} 1326 1327static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, 1328 TCGReg rn, unsigned int a, unsigned int b) 1329{ 1330 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); 1331} 1332 1333static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, 1334 TCGReg rn, TCGReg rm, unsigned int a) 1335{ 1336 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); 1337} 1338 1339static inline void tcg_out_shl(TCGContext *s, TCGType ext, 1340 TCGReg rd, TCGReg rn, unsigned int m) 1341{ 1342 int bits = ext ? 64 : 32; 1343 int max = bits - 1; 1344 tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max); 1345} 1346 1347static inline void tcg_out_shr(TCGContext *s, TCGType ext, 1348 TCGReg rd, TCGReg rn, unsigned int m) 1349{ 1350 int max = ext ? 63 : 31; 1351 tcg_out_ubfm(s, ext, rd, rn, m & max, max); 1352} 1353 1354static inline void tcg_out_sar(TCGContext *s, TCGType ext, 1355 TCGReg rd, TCGReg rn, unsigned int m) 1356{ 1357 int max = ext ? 63 : 31; 1358 tcg_out_sbfm(s, ext, rd, rn, m & max, max); 1359} 1360 1361static inline void tcg_out_rotr(TCGContext *s, TCGType ext, 1362 TCGReg rd, TCGReg rn, unsigned int m) 1363{ 1364 int max = ext ? 63 : 31; 1365 tcg_out_extr(s, ext, rd, rn, rn, m & max); 1366} 1367 1368static inline void tcg_out_rotl(TCGContext *s, TCGType ext, 1369 TCGReg rd, TCGReg rn, unsigned int m) 1370{ 1371 int max = ext ? 63 : 31; 1372 tcg_out_extr(s, ext, rd, rn, rn, -m & max); 1373} 1374 1375static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, 1376 TCGReg rn, unsigned lsb, unsigned width) 1377{ 1378 unsigned size = ext ? 64 : 32; 1379 unsigned a = (size - lsb) & (size - 1); 1380 unsigned b = width - 1; 1381 tcg_out_bfm(s, ext, rd, rn, a, b); 1382} 1383 1384static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGCond cond, TCGReg a, 1385 tcg_target_long b, bool const_b) 1386{ 1387 if (is_tst_cond(cond)) { 1388 if (!const_b) { 1389 tcg_out_insn(s, 3510, ANDS, ext, TCG_REG_XZR, a, b); 1390 } else { 1391 tcg_debug_assert(is_limm(b)); 1392 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, a, b); 1393 } 1394 } else { 1395 if (!const_b) { 1396 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); 1397 } else if (b >= 0) { 1398 tcg_debug_assert(is_aimm(b)); 1399 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); 1400 } else { 1401 tcg_debug_assert(is_aimm(-b)); 1402 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); 1403 } 1404 } 1405} 1406 1407static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) 1408{ 1409 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1410 tcg_debug_assert(offset == sextract64(offset, 0, 26)); 1411 tcg_out_insn(s, 3206, B, offset); 1412} 1413 1414static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target) 1415{ 1416 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1417 if (offset == sextract64(offset, 0, 26)) { 1418 tcg_out_insn(s, 3206, BL, offset); 1419 } else { 1420 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target); 1421 tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0); 1422 } 1423} 1424 1425static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 1426 const TCGHelperInfo *info) 1427{ 1428 tcg_out_call_int(s, target); 1429} 1430 1431static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) 1432{ 1433 if (!l->has_value) { 1434 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); 1435 tcg_out_insn(s, 3206, B, 0); 1436 } else { 1437 tcg_out_goto(s, l->u.value_ptr); 1438 } 1439} 1440 1441static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, 1442 TCGArg b, bool b_const, TCGLabel *l) 1443{ 1444 int tbit = -1; 1445 bool need_cmp = true; 1446 1447 switch (c) { 1448 case TCG_COND_EQ: 1449 case TCG_COND_NE: 1450 /* cmp xN,0; b.ne L -> cbnz xN,L */ 1451 if (b_const && b == 0) { 1452 need_cmp = false; 1453 } 1454 break; 1455 case TCG_COND_LT: 1456 case TCG_COND_GE: 1457 /* cmp xN,0; b.mi L -> tbnz xN,63,L */ 1458 if (b_const && b == 0) { 1459 c = (c == TCG_COND_LT ? TCG_COND_TSTNE : TCG_COND_TSTEQ); 1460 tbit = ext ? 63 : 31; 1461 need_cmp = false; 1462 } 1463 break; 1464 case TCG_COND_TSTEQ: 1465 case TCG_COND_TSTNE: 1466 /* tst xN,1<<B; b.ne L -> tbnz xN,B,L */ 1467 if (b_const && is_power_of_2(b)) { 1468 tbit = ctz64(b); 1469 need_cmp = false; 1470 } 1471 break; 1472 default: 1473 break; 1474 } 1475 1476 if (need_cmp) { 1477 tcg_out_cmp(s, ext, c, a, b, b_const); 1478 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1479 tcg_out_insn(s, 3202, B_C, c, 0); 1480 return; 1481 } 1482 1483 if (tbit >= 0) { 1484 tcg_out_reloc(s, s->code_ptr, R_AARCH64_TSTBR14, l, 0); 1485 switch (c) { 1486 case TCG_COND_TSTEQ: 1487 tcg_out_insn(s, 3205, TBZ, a, tbit, 0); 1488 break; 1489 case TCG_COND_TSTNE: 1490 tcg_out_insn(s, 3205, TBNZ, a, tbit, 0); 1491 break; 1492 default: 1493 g_assert_not_reached(); 1494 } 1495 } else { 1496 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1497 switch (c) { 1498 case TCG_COND_EQ: 1499 tcg_out_insn(s, 3201, CBZ, ext, a, 0); 1500 break; 1501 case TCG_COND_NE: 1502 tcg_out_insn(s, 3201, CBNZ, ext, a, 0); 1503 break; 1504 default: 1505 g_assert_not_reached(); 1506 } 1507 } 1508} 1509 1510static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits, 1511 TCGReg rd, TCGReg rn) 1512{ 1513 /* REV, REV16, REV32 */ 1514 tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn); 1515} 1516 1517static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits, 1518 TCGReg rd, TCGReg rn) 1519{ 1520 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ 1521 int bits = (8 << s_bits) - 1; 1522 tcg_out_sbfm(s, ext, rd, rn, 0, bits); 1523} 1524 1525static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn) 1526{ 1527 tcg_out_sxt(s, type, MO_8, rd, rn); 1528} 1529 1530static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn) 1531{ 1532 tcg_out_sxt(s, type, MO_16, rd, rn); 1533} 1534 1535static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn) 1536{ 1537 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn); 1538} 1539 1540static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn) 1541{ 1542 tcg_out_ext32s(s, rd, rn); 1543} 1544 1545static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits, 1546 TCGReg rd, TCGReg rn) 1547{ 1548 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ 1549 int bits = (8 << s_bits) - 1; 1550 tcg_out_ubfm(s, 0, rd, rn, 0, bits); 1551} 1552 1553static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn) 1554{ 1555 tcg_out_uxt(s, MO_8, rd, rn); 1556} 1557 1558static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn) 1559{ 1560 tcg_out_uxt(s, MO_16, rd, rn); 1561} 1562 1563static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn) 1564{ 1565 tcg_out_movr(s, TCG_TYPE_I32, rd, rn); 1566} 1567 1568static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn) 1569{ 1570 tcg_out_ext32u(s, rd, rn); 1571} 1572 1573static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) 1574{ 1575 tcg_out_mov(s, TCG_TYPE_I32, rd, rn); 1576} 1577 1578static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, 1579 TCGReg rn, int64_t aimm) 1580{ 1581 if (aimm >= 0) { 1582 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); 1583 } else { 1584 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); 1585 } 1586} 1587 1588static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, 1589 TCGReg rh, TCGReg al, TCGReg ah, 1590 tcg_target_long bl, tcg_target_long bh, 1591 bool const_bl, bool const_bh, bool sub) 1592{ 1593 TCGReg orig_rl = rl; 1594 AArch64Insn insn; 1595 1596 if (rl == ah || (!const_bh && rl == bh)) { 1597 rl = TCG_REG_TMP0; 1598 } 1599 1600 if (const_bl) { 1601 if (bl < 0) { 1602 bl = -bl; 1603 insn = sub ? I3401_ADDSI : I3401_SUBSI; 1604 } else { 1605 insn = sub ? I3401_SUBSI : I3401_ADDSI; 1606 } 1607 1608 if (unlikely(al == TCG_REG_XZR)) { 1609 /* ??? We want to allow al to be zero for the benefit of 1610 negation via subtraction. However, that leaves open the 1611 possibility of adding 0+const in the low part, and the 1612 immediate add instructions encode XSP not XZR. Don't try 1613 anything more elaborate here than loading another zero. */ 1614 al = TCG_REG_TMP0; 1615 tcg_out_movi(s, ext, al, 0); 1616 } 1617 tcg_out_insn_3401(s, insn, ext, rl, al, bl); 1618 } else { 1619 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); 1620 } 1621 1622 insn = I3503_ADC; 1623 if (const_bh) { 1624 /* Note that the only two constants we support are 0 and -1, and 1625 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ 1626 if ((bh != 0) ^ sub) { 1627 insn = I3503_SBC; 1628 } 1629 bh = TCG_REG_XZR; 1630 } else if (sub) { 1631 insn = I3503_SBC; 1632 } 1633 tcg_out_insn_3503(s, insn, ext, rh, ah, bh); 1634 1635 tcg_out_mov(s, ext, orig_rl, rl); 1636} 1637 1638static inline void tcg_out_mb(TCGContext *s, TCGArg a0) 1639{ 1640 static const uint32_t sync[] = { 1641 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST, 1642 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST, 1643 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1644 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD, 1645 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1646 }; 1647 tcg_out32(s, sync[a0 & TCG_MO_ALL]); 1648} 1649 1650static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, 1651 TCGReg a0, TCGArg b, bool const_b, bool is_ctz) 1652{ 1653 TCGReg a1 = a0; 1654 if (is_ctz) { 1655 a1 = TCG_REG_TMP0; 1656 tcg_out_insn(s, 3507, RBIT, ext, a1, a0); 1657 } 1658 if (const_b && b == (ext ? 64 : 32)) { 1659 tcg_out_insn(s, 3507, CLZ, ext, d, a1); 1660 } else { 1661 AArch64Insn sel = I3506_CSEL; 1662 1663 tcg_out_cmp(s, ext, TCG_COND_NE, a0, 0, 1); 1664 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1); 1665 1666 if (const_b) { 1667 if (b == -1) { 1668 b = TCG_REG_XZR; 1669 sel = I3506_CSINV; 1670 } else if (b == 0) { 1671 b = TCG_REG_XZR; 1672 } else { 1673 tcg_out_movi(s, ext, d, b); 1674 b = d; 1675 } 1676 } 1677 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE); 1678 } 1679} 1680 1681typedef struct { 1682 TCGReg base; 1683 TCGReg index; 1684 TCGType index_ext; 1685 TCGAtomAlign aa; 1686} HostAddress; 1687 1688bool tcg_target_has_memory_bswap(MemOp memop) 1689{ 1690 return false; 1691} 1692 1693static const TCGLdstHelperParam ldst_helper_param = { 1694 .ntmp = 1, .tmp = { TCG_REG_TMP0 } 1695}; 1696 1697static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1698{ 1699 MemOp opc = get_memop(lb->oi); 1700 1701 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1702 return false; 1703 } 1704 1705 tcg_out_ld_helper_args(s, lb, &ldst_helper_param); 1706 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); 1707 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); 1708 tcg_out_goto(s, lb->raddr); 1709 return true; 1710} 1711 1712static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1713{ 1714 MemOp opc = get_memop(lb->oi); 1715 1716 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1717 return false; 1718 } 1719 1720 tcg_out_st_helper_args(s, lb, &ldst_helper_param); 1721 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]); 1722 tcg_out_goto(s, lb->raddr); 1723 return true; 1724} 1725 1726/* We expect to use a 7-bit scaled negative offset from ENV. */ 1727#define MIN_TLB_MASK_TABLE_OFS -512 1728 1729/* 1730 * For system-mode, perform the TLB load and compare. 1731 * For user-mode, perform any required alignment tests. 1732 * In both cases, return a TCGLabelQemuLdst structure if the slow path 1733 * is required and fill in @h with the host address for the fast path. 1734 */ 1735static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, 1736 TCGReg addr_reg, MemOpIdx oi, 1737 bool is_ld) 1738{ 1739 TCGType addr_type = s->addr_type; 1740 TCGLabelQemuLdst *ldst = NULL; 1741 MemOp opc = get_memop(oi); 1742 MemOp s_bits = opc & MO_SIZE; 1743 unsigned a_mask; 1744 1745 h->aa = atom_and_align_for_opc(s, opc, 1746 have_lse2 ? MO_ATOM_WITHIN16 1747 : MO_ATOM_IFALIGN, 1748 s_bits == MO_128); 1749 a_mask = (1 << h->aa.align) - 1; 1750 1751 if (tcg_use_softmmu) { 1752 unsigned s_mask = (1u << s_bits) - 1; 1753 unsigned mem_index = get_mmuidx(oi); 1754 TCGReg addr_adj; 1755 TCGType mask_type; 1756 uint64_t compare_mask; 1757 1758 ldst = new_ldst_label(s); 1759 ldst->is_ld = is_ld; 1760 ldst->oi = oi; 1761 ldst->addrlo_reg = addr_reg; 1762 1763 mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32 1764 ? TCG_TYPE_I64 : TCG_TYPE_I32); 1765 1766 /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */ 1767 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); 1768 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); 1769 tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0, 1770 tlb_mask_table_ofs(s, mem_index), 1, 0); 1771 1772 /* Extract the TLB index from the address into X0. */ 1773 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, 1774 TCG_REG_TMP0, TCG_REG_TMP0, addr_reg, 1775 s->page_bits - CPU_TLB_ENTRY_BITS); 1776 1777 /* Add the tlb_table pointer, forming the CPUTLBEntry address. */ 1778 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0); 1779 1780 /* Load the tlb comparator into TMP0, and the fast path addend. */ 1781 QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN); 1782 tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1, 1783 is_ld ? offsetof(CPUTLBEntry, addr_read) 1784 : offsetof(CPUTLBEntry, addr_write)); 1785 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, 1786 offsetof(CPUTLBEntry, addend)); 1787 1788 /* 1789 * For aligned accesses, we check the first byte and include 1790 * the alignment bits within the address. For unaligned access, 1791 * we check that we don't cross pages using the address of the 1792 * last byte of the access. 1793 */ 1794 if (a_mask >= s_mask) { 1795 addr_adj = addr_reg; 1796 } else { 1797 addr_adj = TCG_REG_TMP2; 1798 tcg_out_insn(s, 3401, ADDI, addr_type, 1799 addr_adj, addr_reg, s_mask - a_mask); 1800 } 1801 compare_mask = (uint64_t)s->page_mask | a_mask; 1802 1803 /* Store the page mask part of the address into TMP2. */ 1804 tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2, 1805 addr_adj, compare_mask); 1806 1807 /* Perform the address comparison. */ 1808 tcg_out_cmp(s, addr_type, TCG_COND_NE, TCG_REG_TMP0, TCG_REG_TMP2, 0); 1809 1810 /* If not equal, we jump to the slow path. */ 1811 ldst->label_ptr[0] = s->code_ptr; 1812 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1813 1814 h->base = TCG_REG_TMP1; 1815 h->index = addr_reg; 1816 h->index_ext = addr_type; 1817 } else { 1818 if (a_mask) { 1819 ldst = new_ldst_label(s); 1820 1821 ldst->is_ld = is_ld; 1822 ldst->oi = oi; 1823 ldst->addrlo_reg = addr_reg; 1824 1825 /* tst addr, #mask */ 1826 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask); 1827 1828 /* b.ne slow_path */ 1829 ldst->label_ptr[0] = s->code_ptr; 1830 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1831 } 1832 1833 if (guest_base || addr_type == TCG_TYPE_I32) { 1834 h->base = TCG_REG_GUEST_BASE; 1835 h->index = addr_reg; 1836 h->index_ext = addr_type; 1837 } else { 1838 h->base = addr_reg; 1839 h->index = TCG_REG_XZR; 1840 h->index_ext = TCG_TYPE_I64; 1841 } 1842 } 1843 1844 return ldst; 1845} 1846 1847static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, 1848 TCGReg data_r, HostAddress h) 1849{ 1850 switch (memop & MO_SSIZE) { 1851 case MO_UB: 1852 tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index); 1853 break; 1854 case MO_SB: 1855 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, 1856 data_r, h.base, h.index_ext, h.index); 1857 break; 1858 case MO_UW: 1859 tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index); 1860 break; 1861 case MO_SW: 1862 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), 1863 data_r, h.base, h.index_ext, h.index); 1864 break; 1865 case MO_UL: 1866 tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index); 1867 break; 1868 case MO_SL: 1869 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index); 1870 break; 1871 case MO_UQ: 1872 tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index); 1873 break; 1874 default: 1875 g_assert_not_reached(); 1876 } 1877} 1878 1879static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, 1880 TCGReg data_r, HostAddress h) 1881{ 1882 switch (memop & MO_SIZE) { 1883 case MO_8: 1884 tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index); 1885 break; 1886 case MO_16: 1887 tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index); 1888 break; 1889 case MO_32: 1890 tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index); 1891 break; 1892 case MO_64: 1893 tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index); 1894 break; 1895 default: 1896 g_assert_not_reached(); 1897 } 1898} 1899 1900static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1901 MemOpIdx oi, TCGType data_type) 1902{ 1903 TCGLabelQemuLdst *ldst; 1904 HostAddress h; 1905 1906 ldst = prepare_host_addr(s, &h, addr_reg, oi, true); 1907 tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h); 1908 1909 if (ldst) { 1910 ldst->type = data_type; 1911 ldst->datalo_reg = data_reg; 1912 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1913 } 1914} 1915 1916static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1917 MemOpIdx oi, TCGType data_type) 1918{ 1919 TCGLabelQemuLdst *ldst; 1920 HostAddress h; 1921 1922 ldst = prepare_host_addr(s, &h, addr_reg, oi, false); 1923 tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h); 1924 1925 if (ldst) { 1926 ldst->type = data_type; 1927 ldst->datalo_reg = data_reg; 1928 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1929 } 1930} 1931 1932static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, 1933 TCGReg addr_reg, MemOpIdx oi, bool is_ld) 1934{ 1935 TCGLabelQemuLdst *ldst; 1936 HostAddress h; 1937 TCGReg base; 1938 bool use_pair; 1939 1940 ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld); 1941 1942 /* Compose the final address, as LDP/STP have no indexing. */ 1943 if (h.index == TCG_REG_XZR) { 1944 base = h.base; 1945 } else { 1946 base = TCG_REG_TMP2; 1947 if (h.index_ext == TCG_TYPE_I32) { 1948 /* add base, base, index, uxtw */ 1949 tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base, 1950 h.base, h.index, MO_32, 0); 1951 } else { 1952 /* add base, base, index */ 1953 tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index); 1954 } 1955 } 1956 1957 use_pair = h.aa.atom < MO_128 || have_lse2; 1958 1959 if (!use_pair) { 1960 tcg_insn_unit *branch = NULL; 1961 TCGReg ll, lh, sl, sh; 1962 1963 /* 1964 * If we have already checked for 16-byte alignment, that's all 1965 * we need. Otherwise we have determined that misaligned atomicity 1966 * may be handled with two 8-byte loads. 1967 */ 1968 if (h.aa.align < MO_128) { 1969 /* 1970 * TODO: align should be MO_64, so we only need test bit 3, 1971 * which means we could use TBNZ instead of ANDS+B_C. 1972 */ 1973 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15); 1974 branch = s->code_ptr; 1975 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1976 use_pair = true; 1977 } 1978 1979 if (is_ld) { 1980 /* 1981 * 16-byte atomicity without LSE2 requires LDXP+STXP loop: 1982 * ldxp lo, hi, [base] 1983 * stxp t0, lo, hi, [base] 1984 * cbnz t0, .-8 1985 * Require no overlap between data{lo,hi} and base. 1986 */ 1987 if (datalo == base || datahi == base) { 1988 tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base); 1989 base = TCG_REG_TMP2; 1990 } 1991 ll = sl = datalo; 1992 lh = sh = datahi; 1993 } else { 1994 /* 1995 * 16-byte atomicity without LSE2 requires LDXP+STXP loop: 1996 * 1: ldxp t0, t1, [base] 1997 * stxp t0, lo, hi, [base] 1998 * cbnz t0, 1b 1999 */ 2000 tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1); 2001 ll = TCG_REG_TMP0; 2002 lh = TCG_REG_TMP1; 2003 sl = datalo; 2004 sh = datahi; 2005 } 2006 2007 tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base); 2008 tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base); 2009 tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2); 2010 2011 if (use_pair) { 2012 /* "b .+8", branching across the one insn of use_pair. */ 2013 tcg_out_insn(s, 3206, B, 2); 2014 reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr)); 2015 } 2016 } 2017 2018 if (use_pair) { 2019 if (is_ld) { 2020 tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0); 2021 } else { 2022 tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0); 2023 } 2024 } 2025 2026 if (ldst) { 2027 ldst->type = TCG_TYPE_I128; 2028 ldst->datalo_reg = datalo; 2029 ldst->datahi_reg = datahi; 2030 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2031 } 2032} 2033 2034static const tcg_insn_unit *tb_ret_addr; 2035 2036static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) 2037{ 2038 const tcg_insn_unit *target; 2039 ptrdiff_t offset; 2040 2041 /* Reuse the zeroing that exists for goto_ptr. */ 2042 if (a0 == 0) { 2043 target = tcg_code_gen_epilogue; 2044 } else { 2045 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); 2046 target = tb_ret_addr; 2047 } 2048 2049 offset = tcg_pcrel_diff(s, target) >> 2; 2050 if (offset == sextract64(offset, 0, 26)) { 2051 tcg_out_insn(s, 3206, B, offset); 2052 } else { 2053 /* 2054 * Only x16/x17 generate BTI type Jump (2), 2055 * other registers generate BTI type Jump|Call (3). 2056 */ 2057 QEMU_BUILD_BUG_ON(TCG_REG_TMP0 != TCG_REG_X16); 2058 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target); 2059 tcg_out_insn(s, 3207, BR, TCG_REG_TMP0); 2060 } 2061} 2062 2063static void tcg_out_goto_tb(TCGContext *s, int which) 2064{ 2065 /* 2066 * Direct branch, or indirect address load, will be patched 2067 * by tb_target_set_jmp_target. Assert indirect load offset 2068 * in range early, regardless of direct branch distance. 2069 */ 2070 intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which)); 2071 tcg_debug_assert(i_off == sextract64(i_off, 0, 21)); 2072 2073 set_jmp_insn_offset(s, which); 2074 tcg_out32(s, I3206_B); 2075 tcg_out_insn(s, 3207, BR, TCG_REG_TMP0); 2076 set_jmp_reset_offset(s, which); 2077 tcg_out_bti(s, BTI_J); 2078} 2079 2080void tb_target_set_jmp_target(const TranslationBlock *tb, int n, 2081 uintptr_t jmp_rx, uintptr_t jmp_rw) 2082{ 2083 uintptr_t d_addr = tb->jmp_target_addr[n]; 2084 ptrdiff_t d_offset = d_addr - jmp_rx; 2085 tcg_insn_unit insn; 2086 2087 /* Either directly branch, or indirect branch load. */ 2088 if (d_offset == sextract64(d_offset, 0, 28)) { 2089 insn = deposit32(I3206_B, 0, 26, d_offset >> 2); 2090 } else { 2091 uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n]; 2092 ptrdiff_t i_offset = i_addr - jmp_rx; 2093 2094 /* Note that we asserted this in range in tcg_out_goto_tb. */ 2095 insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2); 2096 } 2097 qatomic_set((uint32_t *)jmp_rw, insn); 2098 flush_idcache_range(jmp_rx, jmp_rw, 4); 2099} 2100 2101static void tcg_out_op(TCGContext *s, TCGOpcode opc, 2102 const TCGArg args[TCG_MAX_OP_ARGS], 2103 const int const_args[TCG_MAX_OP_ARGS]) 2104{ 2105 /* 99% of the time, we can signal the use of extension registers 2106 by looking to see if the opcode handles 64-bit data. */ 2107 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0; 2108 2109 /* Hoist the loads of the most common arguments. */ 2110 TCGArg a0 = args[0]; 2111 TCGArg a1 = args[1]; 2112 TCGArg a2 = args[2]; 2113 int c2 = const_args[2]; 2114 2115 /* Some operands are defined with "rZ" constraint, a register or 2116 the zero register. These need not actually test args[I] == 0. */ 2117#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) 2118 2119 switch (opc) { 2120 case INDEX_op_goto_ptr: 2121 tcg_out_insn(s, 3207, BR, a0); 2122 break; 2123 2124 case INDEX_op_br: 2125 tcg_out_goto_label(s, arg_label(a0)); 2126 break; 2127 2128 case INDEX_op_ld8u_i32: 2129 case INDEX_op_ld8u_i64: 2130 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0); 2131 break; 2132 case INDEX_op_ld8s_i32: 2133 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0); 2134 break; 2135 case INDEX_op_ld8s_i64: 2136 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0); 2137 break; 2138 case INDEX_op_ld16u_i32: 2139 case INDEX_op_ld16u_i64: 2140 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1); 2141 break; 2142 case INDEX_op_ld16s_i32: 2143 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1); 2144 break; 2145 case INDEX_op_ld16s_i64: 2146 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1); 2147 break; 2148 case INDEX_op_ld_i32: 2149 case INDEX_op_ld32u_i64: 2150 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2); 2151 break; 2152 case INDEX_op_ld32s_i64: 2153 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2); 2154 break; 2155 case INDEX_op_ld_i64: 2156 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3); 2157 break; 2158 2159 case INDEX_op_st8_i32: 2160 case INDEX_op_st8_i64: 2161 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0); 2162 break; 2163 case INDEX_op_st16_i32: 2164 case INDEX_op_st16_i64: 2165 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1); 2166 break; 2167 case INDEX_op_st_i32: 2168 case INDEX_op_st32_i64: 2169 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2); 2170 break; 2171 case INDEX_op_st_i64: 2172 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3); 2173 break; 2174 2175 case INDEX_op_add_i32: 2176 a2 = (int32_t)a2; 2177 /* FALLTHRU */ 2178 case INDEX_op_add_i64: 2179 if (c2) { 2180 tcg_out_addsubi(s, ext, a0, a1, a2); 2181 } else { 2182 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); 2183 } 2184 break; 2185 2186 case INDEX_op_sub_i32: 2187 a2 = (int32_t)a2; 2188 /* FALLTHRU */ 2189 case INDEX_op_sub_i64: 2190 if (c2) { 2191 tcg_out_addsubi(s, ext, a0, a1, -a2); 2192 } else { 2193 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); 2194 } 2195 break; 2196 2197 case INDEX_op_neg_i64: 2198 case INDEX_op_neg_i32: 2199 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); 2200 break; 2201 2202 case INDEX_op_and_i32: 2203 a2 = (int32_t)a2; 2204 /* FALLTHRU */ 2205 case INDEX_op_and_i64: 2206 if (c2) { 2207 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); 2208 } else { 2209 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); 2210 } 2211 break; 2212 2213 case INDEX_op_andc_i32: 2214 a2 = (int32_t)a2; 2215 /* FALLTHRU */ 2216 case INDEX_op_andc_i64: 2217 if (c2) { 2218 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); 2219 } else { 2220 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); 2221 } 2222 break; 2223 2224 case INDEX_op_or_i32: 2225 a2 = (int32_t)a2; 2226 /* FALLTHRU */ 2227 case INDEX_op_or_i64: 2228 if (c2) { 2229 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); 2230 } else { 2231 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); 2232 } 2233 break; 2234 2235 case INDEX_op_orc_i32: 2236 a2 = (int32_t)a2; 2237 /* FALLTHRU */ 2238 case INDEX_op_orc_i64: 2239 if (c2) { 2240 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); 2241 } else { 2242 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); 2243 } 2244 break; 2245 2246 case INDEX_op_xor_i32: 2247 a2 = (int32_t)a2; 2248 /* FALLTHRU */ 2249 case INDEX_op_xor_i64: 2250 if (c2) { 2251 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); 2252 } else { 2253 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); 2254 } 2255 break; 2256 2257 case INDEX_op_eqv_i32: 2258 a2 = (int32_t)a2; 2259 /* FALLTHRU */ 2260 case INDEX_op_eqv_i64: 2261 if (c2) { 2262 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); 2263 } else { 2264 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); 2265 } 2266 break; 2267 2268 case INDEX_op_not_i64: 2269 case INDEX_op_not_i32: 2270 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); 2271 break; 2272 2273 case INDEX_op_mul_i64: 2274 case INDEX_op_mul_i32: 2275 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); 2276 break; 2277 2278 case INDEX_op_div_i64: 2279 case INDEX_op_div_i32: 2280 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); 2281 break; 2282 case INDEX_op_divu_i64: 2283 case INDEX_op_divu_i32: 2284 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); 2285 break; 2286 2287 case INDEX_op_rem_i64: 2288 case INDEX_op_rem_i32: 2289 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2); 2290 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1); 2291 break; 2292 case INDEX_op_remu_i64: 2293 case INDEX_op_remu_i32: 2294 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2); 2295 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1); 2296 break; 2297 2298 case INDEX_op_shl_i64: 2299 case INDEX_op_shl_i32: 2300 if (c2) { 2301 tcg_out_shl(s, ext, a0, a1, a2); 2302 } else { 2303 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); 2304 } 2305 break; 2306 2307 case INDEX_op_shr_i64: 2308 case INDEX_op_shr_i32: 2309 if (c2) { 2310 tcg_out_shr(s, ext, a0, a1, a2); 2311 } else { 2312 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); 2313 } 2314 break; 2315 2316 case INDEX_op_sar_i64: 2317 case INDEX_op_sar_i32: 2318 if (c2) { 2319 tcg_out_sar(s, ext, a0, a1, a2); 2320 } else { 2321 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); 2322 } 2323 break; 2324 2325 case INDEX_op_rotr_i64: 2326 case INDEX_op_rotr_i32: 2327 if (c2) { 2328 tcg_out_rotr(s, ext, a0, a1, a2); 2329 } else { 2330 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); 2331 } 2332 break; 2333 2334 case INDEX_op_rotl_i64: 2335 case INDEX_op_rotl_i32: 2336 if (c2) { 2337 tcg_out_rotl(s, ext, a0, a1, a2); 2338 } else { 2339 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2); 2340 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0); 2341 } 2342 break; 2343 2344 case INDEX_op_clz_i64: 2345 case INDEX_op_clz_i32: 2346 tcg_out_cltz(s, ext, a0, a1, a2, c2, false); 2347 break; 2348 case INDEX_op_ctz_i64: 2349 case INDEX_op_ctz_i32: 2350 tcg_out_cltz(s, ext, a0, a1, a2, c2, true); 2351 break; 2352 2353 case INDEX_op_brcond_i32: 2354 a1 = (int32_t)a1; 2355 /* FALLTHRU */ 2356 case INDEX_op_brcond_i64: 2357 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); 2358 break; 2359 2360 case INDEX_op_setcond_i32: 2361 a2 = (int32_t)a2; 2362 /* FALLTHRU */ 2363 case INDEX_op_setcond_i64: 2364 tcg_out_cmp(s, ext, args[3], a1, a2, c2); 2365 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ 2366 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, 2367 TCG_REG_XZR, tcg_invert_cond(args[3])); 2368 break; 2369 2370 case INDEX_op_negsetcond_i32: 2371 a2 = (int32_t)a2; 2372 /* FALLTHRU */ 2373 case INDEX_op_negsetcond_i64: 2374 tcg_out_cmp(s, ext, args[3], a1, a2, c2); 2375 /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond). */ 2376 tcg_out_insn(s, 3506, CSINV, ext, a0, TCG_REG_XZR, 2377 TCG_REG_XZR, tcg_invert_cond(args[3])); 2378 break; 2379 2380 case INDEX_op_movcond_i32: 2381 a2 = (int32_t)a2; 2382 /* FALLTHRU */ 2383 case INDEX_op_movcond_i64: 2384 tcg_out_cmp(s, ext, args[5], a1, a2, c2); 2385 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); 2386 break; 2387 2388 case INDEX_op_qemu_ld_a32_i32: 2389 case INDEX_op_qemu_ld_a64_i32: 2390 case INDEX_op_qemu_ld_a32_i64: 2391 case INDEX_op_qemu_ld_a64_i64: 2392 tcg_out_qemu_ld(s, a0, a1, a2, ext); 2393 break; 2394 case INDEX_op_qemu_st_a32_i32: 2395 case INDEX_op_qemu_st_a64_i32: 2396 case INDEX_op_qemu_st_a32_i64: 2397 case INDEX_op_qemu_st_a64_i64: 2398 tcg_out_qemu_st(s, REG0(0), a1, a2, ext); 2399 break; 2400 case INDEX_op_qemu_ld_a32_i128: 2401 case INDEX_op_qemu_ld_a64_i128: 2402 tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true); 2403 break; 2404 case INDEX_op_qemu_st_a32_i128: 2405 case INDEX_op_qemu_st_a64_i128: 2406 tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false); 2407 break; 2408 2409 case INDEX_op_bswap64_i64: 2410 tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1); 2411 break; 2412 case INDEX_op_bswap32_i64: 2413 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2414 if (a2 & TCG_BSWAP_OS) { 2415 tcg_out_ext32s(s, a0, a0); 2416 } 2417 break; 2418 case INDEX_op_bswap32_i32: 2419 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2420 break; 2421 case INDEX_op_bswap16_i64: 2422 case INDEX_op_bswap16_i32: 2423 tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1); 2424 if (a2 & TCG_BSWAP_OS) { 2425 /* Output must be sign-extended. */ 2426 tcg_out_ext16s(s, ext, a0, a0); 2427 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 2428 /* Output must be zero-extended, but input isn't. */ 2429 tcg_out_ext16u(s, a0, a0); 2430 } 2431 break; 2432 2433 case INDEX_op_deposit_i64: 2434 case INDEX_op_deposit_i32: 2435 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]); 2436 break; 2437 2438 case INDEX_op_extract_i64: 2439 case INDEX_op_extract_i32: 2440 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2441 break; 2442 2443 case INDEX_op_sextract_i64: 2444 case INDEX_op_sextract_i32: 2445 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2446 break; 2447 2448 case INDEX_op_extract2_i64: 2449 case INDEX_op_extract2_i32: 2450 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]); 2451 break; 2452 2453 case INDEX_op_add2_i32: 2454 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2455 (int32_t)args[4], args[5], const_args[4], 2456 const_args[5], false); 2457 break; 2458 case INDEX_op_add2_i64: 2459 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2460 args[5], const_args[4], const_args[5], false); 2461 break; 2462 case INDEX_op_sub2_i32: 2463 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2464 (int32_t)args[4], args[5], const_args[4], 2465 const_args[5], true); 2466 break; 2467 case INDEX_op_sub2_i64: 2468 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2469 args[5], const_args[4], const_args[5], true); 2470 break; 2471 2472 case INDEX_op_muluh_i64: 2473 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); 2474 break; 2475 case INDEX_op_mulsh_i64: 2476 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); 2477 break; 2478 2479 case INDEX_op_mb: 2480 tcg_out_mb(s, a0); 2481 break; 2482 2483 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 2484 case INDEX_op_mov_i64: 2485 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 2486 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ 2487 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ 2488 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ 2489 case INDEX_op_ext8s_i64: 2490 case INDEX_op_ext8u_i32: 2491 case INDEX_op_ext8u_i64: 2492 case INDEX_op_ext16s_i64: 2493 case INDEX_op_ext16s_i32: 2494 case INDEX_op_ext16u_i64: 2495 case INDEX_op_ext16u_i32: 2496 case INDEX_op_ext32s_i64: 2497 case INDEX_op_ext32u_i64: 2498 case INDEX_op_ext_i32_i64: 2499 case INDEX_op_extu_i32_i64: 2500 case INDEX_op_extrl_i64_i32: 2501 default: 2502 g_assert_not_reached(); 2503 } 2504 2505#undef REG0 2506} 2507 2508static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 2509 unsigned vecl, unsigned vece, 2510 const TCGArg args[TCG_MAX_OP_ARGS], 2511 const int const_args[TCG_MAX_OP_ARGS]) 2512{ 2513 static const AArch64Insn cmp_vec_insn[16] = { 2514 [TCG_COND_EQ] = I3616_CMEQ, 2515 [TCG_COND_GT] = I3616_CMGT, 2516 [TCG_COND_GE] = I3616_CMGE, 2517 [TCG_COND_GTU] = I3616_CMHI, 2518 [TCG_COND_GEU] = I3616_CMHS, 2519 }; 2520 static const AArch64Insn cmp_scalar_insn[16] = { 2521 [TCG_COND_EQ] = I3611_CMEQ, 2522 [TCG_COND_GT] = I3611_CMGT, 2523 [TCG_COND_GE] = I3611_CMGE, 2524 [TCG_COND_GTU] = I3611_CMHI, 2525 [TCG_COND_GEU] = I3611_CMHS, 2526 }; 2527 static const AArch64Insn cmp0_vec_insn[16] = { 2528 [TCG_COND_EQ] = I3617_CMEQ0, 2529 [TCG_COND_GT] = I3617_CMGT0, 2530 [TCG_COND_GE] = I3617_CMGE0, 2531 [TCG_COND_LT] = I3617_CMLT0, 2532 [TCG_COND_LE] = I3617_CMLE0, 2533 }; 2534 static const AArch64Insn cmp0_scalar_insn[16] = { 2535 [TCG_COND_EQ] = I3612_CMEQ0, 2536 [TCG_COND_GT] = I3612_CMGT0, 2537 [TCG_COND_GE] = I3612_CMGE0, 2538 [TCG_COND_LT] = I3612_CMLT0, 2539 [TCG_COND_LE] = I3612_CMLE0, 2540 }; 2541 2542 TCGType type = vecl + TCG_TYPE_V64; 2543 unsigned is_q = vecl; 2544 bool is_scalar = !is_q && vece == MO_64; 2545 TCGArg a0, a1, a2, a3; 2546 int cmode, imm8; 2547 2548 a0 = args[0]; 2549 a1 = args[1]; 2550 a2 = args[2]; 2551 2552 switch (opc) { 2553 case INDEX_op_ld_vec: 2554 tcg_out_ld(s, type, a0, a1, a2); 2555 break; 2556 case INDEX_op_st_vec: 2557 tcg_out_st(s, type, a0, a1, a2); 2558 break; 2559 case INDEX_op_dupm_vec: 2560 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 2561 break; 2562 case INDEX_op_add_vec: 2563 if (is_scalar) { 2564 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2); 2565 } else { 2566 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2); 2567 } 2568 break; 2569 case INDEX_op_sub_vec: 2570 if (is_scalar) { 2571 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2); 2572 } else { 2573 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2); 2574 } 2575 break; 2576 case INDEX_op_mul_vec: 2577 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2); 2578 break; 2579 case INDEX_op_neg_vec: 2580 if (is_scalar) { 2581 tcg_out_insn(s, 3612, NEG, vece, a0, a1); 2582 } else { 2583 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1); 2584 } 2585 break; 2586 case INDEX_op_abs_vec: 2587 if (is_scalar) { 2588 tcg_out_insn(s, 3612, ABS, vece, a0, a1); 2589 } else { 2590 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1); 2591 } 2592 break; 2593 case INDEX_op_and_vec: 2594 if (const_args[2]) { 2595 is_shimm1632(~a2, &cmode, &imm8); 2596 if (a0 == a1) { 2597 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2598 return; 2599 } 2600 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2601 a2 = a0; 2602 } 2603 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2); 2604 break; 2605 case INDEX_op_or_vec: 2606 if (const_args[2]) { 2607 is_shimm1632(a2, &cmode, &imm8); 2608 if (a0 == a1) { 2609 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2610 return; 2611 } 2612 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2613 a2 = a0; 2614 } 2615 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2); 2616 break; 2617 case INDEX_op_andc_vec: 2618 if (const_args[2]) { 2619 is_shimm1632(a2, &cmode, &imm8); 2620 if (a0 == a1) { 2621 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2622 return; 2623 } 2624 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2625 a2 = a0; 2626 } 2627 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2); 2628 break; 2629 case INDEX_op_orc_vec: 2630 if (const_args[2]) { 2631 is_shimm1632(~a2, &cmode, &imm8); 2632 if (a0 == a1) { 2633 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2634 return; 2635 } 2636 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2637 a2 = a0; 2638 } 2639 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2); 2640 break; 2641 case INDEX_op_xor_vec: 2642 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2); 2643 break; 2644 case INDEX_op_ssadd_vec: 2645 if (is_scalar) { 2646 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2); 2647 } else { 2648 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2); 2649 } 2650 break; 2651 case INDEX_op_sssub_vec: 2652 if (is_scalar) { 2653 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2); 2654 } else { 2655 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2); 2656 } 2657 break; 2658 case INDEX_op_usadd_vec: 2659 if (is_scalar) { 2660 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2); 2661 } else { 2662 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2); 2663 } 2664 break; 2665 case INDEX_op_ussub_vec: 2666 if (is_scalar) { 2667 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2); 2668 } else { 2669 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2); 2670 } 2671 break; 2672 case INDEX_op_smax_vec: 2673 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2); 2674 break; 2675 case INDEX_op_smin_vec: 2676 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2); 2677 break; 2678 case INDEX_op_umax_vec: 2679 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2); 2680 break; 2681 case INDEX_op_umin_vec: 2682 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2); 2683 break; 2684 case INDEX_op_not_vec: 2685 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1); 2686 break; 2687 case INDEX_op_shli_vec: 2688 if (is_scalar) { 2689 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece)); 2690 } else { 2691 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece)); 2692 } 2693 break; 2694 case INDEX_op_shri_vec: 2695 if (is_scalar) { 2696 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2); 2697 } else { 2698 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2); 2699 } 2700 break; 2701 case INDEX_op_sari_vec: 2702 if (is_scalar) { 2703 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2); 2704 } else { 2705 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); 2706 } 2707 break; 2708 case INDEX_op_aa64_sli_vec: 2709 if (is_scalar) { 2710 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece)); 2711 } else { 2712 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece)); 2713 } 2714 break; 2715 case INDEX_op_shlv_vec: 2716 if (is_scalar) { 2717 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2); 2718 } else { 2719 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); 2720 } 2721 break; 2722 case INDEX_op_aa64_sshl_vec: 2723 if (is_scalar) { 2724 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2); 2725 } else { 2726 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2); 2727 } 2728 break; 2729 case INDEX_op_cmp_vec: 2730 { 2731 TCGCond cond = args[3]; 2732 AArch64Insn insn; 2733 2734 if (cond == TCG_COND_NE) { 2735 if (const_args[2]) { 2736 if (is_scalar) { 2737 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1); 2738 } else { 2739 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1); 2740 } 2741 } else { 2742 if (is_scalar) { 2743 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2); 2744 } else { 2745 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2); 2746 } 2747 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); 2748 } 2749 } else { 2750 if (const_args[2]) { 2751 if (is_scalar) { 2752 insn = cmp0_scalar_insn[cond]; 2753 if (insn) { 2754 tcg_out_insn_3612(s, insn, vece, a0, a1); 2755 break; 2756 } 2757 } else { 2758 insn = cmp0_vec_insn[cond]; 2759 if (insn) { 2760 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1); 2761 break; 2762 } 2763 } 2764 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0); 2765 a2 = TCG_VEC_TMP0; 2766 } 2767 if (is_scalar) { 2768 insn = cmp_scalar_insn[cond]; 2769 if (insn == 0) { 2770 TCGArg t; 2771 t = a1, a1 = a2, a2 = t; 2772 cond = tcg_swap_cond(cond); 2773 insn = cmp_scalar_insn[cond]; 2774 tcg_debug_assert(insn != 0); 2775 } 2776 tcg_out_insn_3611(s, insn, vece, a0, a1, a2); 2777 } else { 2778 insn = cmp_vec_insn[cond]; 2779 if (insn == 0) { 2780 TCGArg t; 2781 t = a1, a1 = a2, a2 = t; 2782 cond = tcg_swap_cond(cond); 2783 insn = cmp_vec_insn[cond]; 2784 tcg_debug_assert(insn != 0); 2785 } 2786 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2); 2787 } 2788 } 2789 } 2790 break; 2791 2792 case INDEX_op_bitsel_vec: 2793 a3 = args[3]; 2794 if (a0 == a3) { 2795 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1); 2796 } else if (a0 == a2) { 2797 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1); 2798 } else { 2799 if (a0 != a1) { 2800 tcg_out_mov(s, type, a0, a1); 2801 } 2802 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3); 2803 } 2804 break; 2805 2806 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 2807 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 2808 default: 2809 g_assert_not_reached(); 2810 } 2811} 2812 2813int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 2814{ 2815 switch (opc) { 2816 case INDEX_op_add_vec: 2817 case INDEX_op_sub_vec: 2818 case INDEX_op_and_vec: 2819 case INDEX_op_or_vec: 2820 case INDEX_op_xor_vec: 2821 case INDEX_op_andc_vec: 2822 case INDEX_op_orc_vec: 2823 case INDEX_op_neg_vec: 2824 case INDEX_op_abs_vec: 2825 case INDEX_op_not_vec: 2826 case INDEX_op_cmp_vec: 2827 case INDEX_op_shli_vec: 2828 case INDEX_op_shri_vec: 2829 case INDEX_op_sari_vec: 2830 case INDEX_op_ssadd_vec: 2831 case INDEX_op_sssub_vec: 2832 case INDEX_op_usadd_vec: 2833 case INDEX_op_ussub_vec: 2834 case INDEX_op_shlv_vec: 2835 case INDEX_op_bitsel_vec: 2836 return 1; 2837 case INDEX_op_rotli_vec: 2838 case INDEX_op_shrv_vec: 2839 case INDEX_op_sarv_vec: 2840 case INDEX_op_rotlv_vec: 2841 case INDEX_op_rotrv_vec: 2842 return -1; 2843 case INDEX_op_mul_vec: 2844 case INDEX_op_smax_vec: 2845 case INDEX_op_smin_vec: 2846 case INDEX_op_umax_vec: 2847 case INDEX_op_umin_vec: 2848 return vece < MO_64; 2849 2850 default: 2851 return 0; 2852 } 2853} 2854 2855void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 2856 TCGArg a0, ...) 2857{ 2858 va_list va; 2859 TCGv_vec v0, v1, v2, t1, t2, c1; 2860 TCGArg a2; 2861 2862 va_start(va, a0); 2863 v0 = temp_tcgv_vec(arg_temp(a0)); 2864 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 2865 a2 = va_arg(va, TCGArg); 2866 va_end(va); 2867 2868 switch (opc) { 2869 case INDEX_op_rotli_vec: 2870 t1 = tcg_temp_new_vec(type); 2871 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1)); 2872 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece, 2873 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2); 2874 tcg_temp_free_vec(t1); 2875 break; 2876 2877 case INDEX_op_shrv_vec: 2878 case INDEX_op_sarv_vec: 2879 /* Right shifts are negative left shifts for AArch64. */ 2880 v2 = temp_tcgv_vec(arg_temp(a2)); 2881 t1 = tcg_temp_new_vec(type); 2882 tcg_gen_neg_vec(vece, t1, v2); 2883 opc = (opc == INDEX_op_shrv_vec 2884 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec); 2885 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0), 2886 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2887 tcg_temp_free_vec(t1); 2888 break; 2889 2890 case INDEX_op_rotlv_vec: 2891 v2 = temp_tcgv_vec(arg_temp(a2)); 2892 t1 = tcg_temp_new_vec(type); 2893 c1 = tcg_constant_vec(type, vece, 8 << vece); 2894 tcg_gen_sub_vec(vece, t1, v2, c1); 2895 /* Right shifts are negative left shifts for AArch64. */ 2896 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2897 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2898 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0), 2899 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 2900 tcg_gen_or_vec(vece, v0, v0, t1); 2901 tcg_temp_free_vec(t1); 2902 break; 2903 2904 case INDEX_op_rotrv_vec: 2905 v2 = temp_tcgv_vec(arg_temp(a2)); 2906 t1 = tcg_temp_new_vec(type); 2907 t2 = tcg_temp_new_vec(type); 2908 c1 = tcg_constant_vec(type, vece, 8 << vece); 2909 tcg_gen_neg_vec(vece, t1, v2); 2910 tcg_gen_sub_vec(vece, t2, c1, v2); 2911 /* Right shifts are negative left shifts for AArch64. */ 2912 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2913 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2914 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2), 2915 tcgv_vec_arg(v1), tcgv_vec_arg(t2)); 2916 tcg_gen_or_vec(vece, v0, t1, t2); 2917 tcg_temp_free_vec(t1); 2918 tcg_temp_free_vec(t2); 2919 break; 2920 2921 default: 2922 g_assert_not_reached(); 2923 } 2924} 2925 2926static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 2927{ 2928 switch (op) { 2929 case INDEX_op_goto_ptr: 2930 return C_O0_I1(r); 2931 2932 case INDEX_op_ld8u_i32: 2933 case INDEX_op_ld8s_i32: 2934 case INDEX_op_ld16u_i32: 2935 case INDEX_op_ld16s_i32: 2936 case INDEX_op_ld_i32: 2937 case INDEX_op_ld8u_i64: 2938 case INDEX_op_ld8s_i64: 2939 case INDEX_op_ld16u_i64: 2940 case INDEX_op_ld16s_i64: 2941 case INDEX_op_ld32u_i64: 2942 case INDEX_op_ld32s_i64: 2943 case INDEX_op_ld_i64: 2944 case INDEX_op_neg_i32: 2945 case INDEX_op_neg_i64: 2946 case INDEX_op_not_i32: 2947 case INDEX_op_not_i64: 2948 case INDEX_op_bswap16_i32: 2949 case INDEX_op_bswap32_i32: 2950 case INDEX_op_bswap16_i64: 2951 case INDEX_op_bswap32_i64: 2952 case INDEX_op_bswap64_i64: 2953 case INDEX_op_ext8s_i32: 2954 case INDEX_op_ext16s_i32: 2955 case INDEX_op_ext8u_i32: 2956 case INDEX_op_ext16u_i32: 2957 case INDEX_op_ext8s_i64: 2958 case INDEX_op_ext16s_i64: 2959 case INDEX_op_ext32s_i64: 2960 case INDEX_op_ext8u_i64: 2961 case INDEX_op_ext16u_i64: 2962 case INDEX_op_ext32u_i64: 2963 case INDEX_op_ext_i32_i64: 2964 case INDEX_op_extu_i32_i64: 2965 case INDEX_op_extract_i32: 2966 case INDEX_op_extract_i64: 2967 case INDEX_op_sextract_i32: 2968 case INDEX_op_sextract_i64: 2969 return C_O1_I1(r, r); 2970 2971 case INDEX_op_st8_i32: 2972 case INDEX_op_st16_i32: 2973 case INDEX_op_st_i32: 2974 case INDEX_op_st8_i64: 2975 case INDEX_op_st16_i64: 2976 case INDEX_op_st32_i64: 2977 case INDEX_op_st_i64: 2978 return C_O0_I2(rZ, r); 2979 2980 case INDEX_op_add_i32: 2981 case INDEX_op_add_i64: 2982 case INDEX_op_sub_i32: 2983 case INDEX_op_sub_i64: 2984 return C_O1_I2(r, r, rA); 2985 2986 case INDEX_op_setcond_i32: 2987 case INDEX_op_setcond_i64: 2988 case INDEX_op_negsetcond_i32: 2989 case INDEX_op_negsetcond_i64: 2990 return C_O1_I2(r, r, rC); 2991 2992 case INDEX_op_mul_i32: 2993 case INDEX_op_mul_i64: 2994 case INDEX_op_div_i32: 2995 case INDEX_op_div_i64: 2996 case INDEX_op_divu_i32: 2997 case INDEX_op_divu_i64: 2998 case INDEX_op_rem_i32: 2999 case INDEX_op_rem_i64: 3000 case INDEX_op_remu_i32: 3001 case INDEX_op_remu_i64: 3002 case INDEX_op_muluh_i64: 3003 case INDEX_op_mulsh_i64: 3004 return C_O1_I2(r, r, r); 3005 3006 case INDEX_op_and_i32: 3007 case INDEX_op_and_i64: 3008 case INDEX_op_or_i32: 3009 case INDEX_op_or_i64: 3010 case INDEX_op_xor_i32: 3011 case INDEX_op_xor_i64: 3012 case INDEX_op_andc_i32: 3013 case INDEX_op_andc_i64: 3014 case INDEX_op_orc_i32: 3015 case INDEX_op_orc_i64: 3016 case INDEX_op_eqv_i32: 3017 case INDEX_op_eqv_i64: 3018 return C_O1_I2(r, r, rL); 3019 3020 case INDEX_op_shl_i32: 3021 case INDEX_op_shr_i32: 3022 case INDEX_op_sar_i32: 3023 case INDEX_op_rotl_i32: 3024 case INDEX_op_rotr_i32: 3025 case INDEX_op_shl_i64: 3026 case INDEX_op_shr_i64: 3027 case INDEX_op_sar_i64: 3028 case INDEX_op_rotl_i64: 3029 case INDEX_op_rotr_i64: 3030 return C_O1_I2(r, r, ri); 3031 3032 case INDEX_op_clz_i32: 3033 case INDEX_op_ctz_i32: 3034 case INDEX_op_clz_i64: 3035 case INDEX_op_ctz_i64: 3036 return C_O1_I2(r, r, rAL); 3037 3038 case INDEX_op_brcond_i32: 3039 case INDEX_op_brcond_i64: 3040 return C_O0_I2(r, rC); 3041 3042 case INDEX_op_movcond_i32: 3043 case INDEX_op_movcond_i64: 3044 return C_O1_I4(r, r, rC, rZ, rZ); 3045 3046 case INDEX_op_qemu_ld_a32_i32: 3047 case INDEX_op_qemu_ld_a64_i32: 3048 case INDEX_op_qemu_ld_a32_i64: 3049 case INDEX_op_qemu_ld_a64_i64: 3050 return C_O1_I1(r, r); 3051 case INDEX_op_qemu_ld_a32_i128: 3052 case INDEX_op_qemu_ld_a64_i128: 3053 return C_O2_I1(r, r, r); 3054 case INDEX_op_qemu_st_a32_i32: 3055 case INDEX_op_qemu_st_a64_i32: 3056 case INDEX_op_qemu_st_a32_i64: 3057 case INDEX_op_qemu_st_a64_i64: 3058 return C_O0_I2(rZ, r); 3059 case INDEX_op_qemu_st_a32_i128: 3060 case INDEX_op_qemu_st_a64_i128: 3061 return C_O0_I3(rZ, rZ, r); 3062 3063 case INDEX_op_deposit_i32: 3064 case INDEX_op_deposit_i64: 3065 return C_O1_I2(r, 0, rZ); 3066 3067 case INDEX_op_extract2_i32: 3068 case INDEX_op_extract2_i64: 3069 return C_O1_I2(r, rZ, rZ); 3070 3071 case INDEX_op_add2_i32: 3072 case INDEX_op_add2_i64: 3073 case INDEX_op_sub2_i32: 3074 case INDEX_op_sub2_i64: 3075 return C_O2_I4(r, r, rZ, rZ, rA, rMZ); 3076 3077 case INDEX_op_add_vec: 3078 case INDEX_op_sub_vec: 3079 case INDEX_op_mul_vec: 3080 case INDEX_op_xor_vec: 3081 case INDEX_op_ssadd_vec: 3082 case INDEX_op_sssub_vec: 3083 case INDEX_op_usadd_vec: 3084 case INDEX_op_ussub_vec: 3085 case INDEX_op_smax_vec: 3086 case INDEX_op_smin_vec: 3087 case INDEX_op_umax_vec: 3088 case INDEX_op_umin_vec: 3089 case INDEX_op_shlv_vec: 3090 case INDEX_op_shrv_vec: 3091 case INDEX_op_sarv_vec: 3092 case INDEX_op_aa64_sshl_vec: 3093 return C_O1_I2(w, w, w); 3094 case INDEX_op_not_vec: 3095 case INDEX_op_neg_vec: 3096 case INDEX_op_abs_vec: 3097 case INDEX_op_shli_vec: 3098 case INDEX_op_shri_vec: 3099 case INDEX_op_sari_vec: 3100 return C_O1_I1(w, w); 3101 case INDEX_op_ld_vec: 3102 case INDEX_op_dupm_vec: 3103 return C_O1_I1(w, r); 3104 case INDEX_op_st_vec: 3105 return C_O0_I2(w, r); 3106 case INDEX_op_dup_vec: 3107 return C_O1_I1(w, wr); 3108 case INDEX_op_or_vec: 3109 case INDEX_op_andc_vec: 3110 return C_O1_I2(w, w, wO); 3111 case INDEX_op_and_vec: 3112 case INDEX_op_orc_vec: 3113 return C_O1_I2(w, w, wN); 3114 case INDEX_op_cmp_vec: 3115 return C_O1_I2(w, w, wZ); 3116 case INDEX_op_bitsel_vec: 3117 return C_O1_I3(w, w, w, w); 3118 case INDEX_op_aa64_sli_vec: 3119 return C_O1_I2(w, 0, w); 3120 3121 default: 3122 g_assert_not_reached(); 3123 } 3124} 3125 3126static void tcg_target_init(TCGContext *s) 3127{ 3128 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu; 3129 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu; 3130 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 3131 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 3132 3133 tcg_target_call_clobber_regs = -1ull; 3134 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19); 3135 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20); 3136 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21); 3137 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22); 3138 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23); 3139 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24); 3140 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25); 3141 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26); 3142 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27); 3143 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28); 3144 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29); 3145 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 3146 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 3147 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 3148 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 3149 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 3150 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 3151 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 3152 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 3153 3154 s->reserved_regs = 0; 3155 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 3156 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); 3157 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ 3158 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0); 3159 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); 3160 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); 3161 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0); 3162} 3163 3164/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ 3165#define PUSH_SIZE ((30 - 19 + 1) * 8) 3166 3167#define FRAME_SIZE \ 3168 ((PUSH_SIZE \ 3169 + TCG_STATIC_CALL_ARGS_SIZE \ 3170 + CPU_TEMP_BUF_NLONGS * sizeof(long) \ 3171 + TCG_TARGET_STACK_ALIGN - 1) \ 3172 & ~(TCG_TARGET_STACK_ALIGN - 1)) 3173 3174/* We're expecting a 2 byte uleb128 encoded value. */ 3175QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 3176 3177/* We're expecting to use a single ADDI insn. */ 3178QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); 3179 3180static void tcg_target_qemu_prologue(TCGContext *s) 3181{ 3182 TCGReg r; 3183 3184 tcg_out_bti(s, BTI_C); 3185 3186 /* Push (FP, LR) and allocate space for all saved registers. */ 3187 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, 3188 TCG_REG_SP, -PUSH_SIZE, 1, 1); 3189 3190 /* Set up frame pointer for canonical unwinding. */ 3191 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); 3192 3193 /* Store callee-preserved regs x19..x28. */ 3194 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 3195 int ofs = (r - TCG_REG_X19 + 2) * 8; 3196 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 3197 } 3198 3199 /* Make stack space for TCG locals. */ 3200 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 3201 FRAME_SIZE - PUSH_SIZE); 3202 3203 /* Inform TCG about how to find TCG locals with register, offset, size. */ 3204 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, 3205 CPU_TEMP_BUF_NLONGS * sizeof(long)); 3206 3207 if (!tcg_use_softmmu) { 3208 /* 3209 * Note that XZR cannot be encoded in the address base register slot, 3210 * as that actually encodes SP. Depending on the guest, we may need 3211 * to zero-extend the guest address via the address index register slot, 3212 * therefore we need to load even a zero guest base into a register. 3213 */ 3214 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); 3215 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); 3216 } 3217 3218 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 3219 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); 3220 3221 /* 3222 * Return path for goto_ptr. Set return value to 0, a-la exit_tb, 3223 * and fall through to the rest of the epilogue. 3224 */ 3225 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 3226 tcg_out_bti(s, BTI_J); 3227 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0); 3228 3229 /* TB epilogue */ 3230 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); 3231 tcg_out_bti(s, BTI_J); 3232 3233 /* Remove TCG locals stack space. */ 3234 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 3235 FRAME_SIZE - PUSH_SIZE); 3236 3237 /* Restore registers x19..x28. */ 3238 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 3239 int ofs = (r - TCG_REG_X19 + 2) * 8; 3240 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 3241 } 3242 3243 /* Pop (FP, LR), restore SP to previous frame. */ 3244 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, 3245 TCG_REG_SP, PUSH_SIZE, 0, 1); 3246 tcg_out_insn(s, 3207, RET, TCG_REG_LR); 3247} 3248 3249static void tcg_out_tb_start(TCGContext *s) 3250{ 3251 tcg_out_bti(s, BTI_J); 3252} 3253 3254static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 3255{ 3256 int i; 3257 for (i = 0; i < count; ++i) { 3258 p[i] = NOP; 3259 } 3260} 3261 3262typedef struct { 3263 DebugFrameHeader h; 3264 uint8_t fde_def_cfa[4]; 3265 uint8_t fde_reg_ofs[24]; 3266} DebugFrame; 3267 3268#define ELF_HOST_MACHINE EM_AARCH64 3269 3270static const DebugFrame debug_frame = { 3271 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 3272 .h.cie.id = -1, 3273 .h.cie.version = 1, 3274 .h.cie.code_align = 1, 3275 .h.cie.data_align = 0x78, /* sleb128 -8 */ 3276 .h.cie.return_column = TCG_REG_LR, 3277 3278 /* Total FDE size does not include the "len" member. */ 3279 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 3280 3281 .fde_def_cfa = { 3282 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 3283 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 3284 (FRAME_SIZE >> 7) 3285 }, 3286 .fde_reg_ofs = { 3287 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ 3288 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ 3289 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ 3290 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ 3291 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ 3292 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ 3293 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ 3294 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ 3295 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ 3296 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ 3297 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ 3298 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ 3299 } 3300}; 3301 3302void tcg_register_jit(const void *buf, size_t buf_size) 3303{ 3304 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 3305} 3306