1/* 2 * Initial TCG Implementation for aarch64 3 * 4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH 5 * Written by Claudio Fontana 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or 8 * (at your option) any later version. 9 * 10 * See the COPYING file in the top-level directory for details. 11 */ 12 13#include "../tcg-ldst.c.inc" 14#include "../tcg-pool.c.inc" 15#include "qemu/bitops.h" 16 17/* We're going to re-use TCGType in setting of the SF bit, which controls 18 the size of the operation performed. If we know the values match, it 19 makes things much cleaner. */ 20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); 21 22#ifdef CONFIG_DEBUG_TCG 23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 24 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 25 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 26 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 27 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp", 28 29 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 30 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 31 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 32 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31", 33}; 34#endif /* CONFIG_DEBUG_TCG */ 35 36static const int tcg_target_reg_alloc_order[] = { 37 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, 38 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, 39 TCG_REG_X28, /* we will reserve this for guest_base if configured */ 40 41 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, 42 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, 43 44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, 46 47 /* X16 reserved as temporary */ 48 /* X17 reserved as temporary */ 49 /* X18 reserved by system */ 50 /* X19 reserved for AREG0 */ 51 /* X29 reserved as fp */ 52 /* X30 reserved as temporary */ 53 54 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, 55 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, 56 /* V8 - V15 are call-saved, and skipped. */ 57 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, 58 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, 59 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, 60 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, 61}; 62 63static const int tcg_target_call_iarg_regs[8] = { 64 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 65 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 66}; 67 68static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) 69{ 70 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); 71 tcg_debug_assert(slot >= 0 && slot <= 1); 72 return TCG_REG_X0 + slot; 73} 74 75#define TCG_REG_TMP0 TCG_REG_X16 76#define TCG_REG_TMP1 TCG_REG_X17 77#define TCG_REG_TMP2 TCG_REG_X30 78#define TCG_VEC_TMP0 TCG_REG_V31 79 80#define TCG_REG_GUEST_BASE TCG_REG_X28 81 82static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 83{ 84 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 85 ptrdiff_t offset = target - src_rx; 86 87 if (offset == sextract64(offset, 0, 26)) { 88 /* read instruction, mask away previous PC_REL26 parameter contents, 89 set the proper offset, then write back the instruction. */ 90 *src_rw = deposit32(*src_rw, 0, 26, offset); 91 return true; 92 } 93 return false; 94} 95 96static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 97{ 98 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 99 ptrdiff_t offset = target - src_rx; 100 101 if (offset == sextract64(offset, 0, 19)) { 102 *src_rw = deposit32(*src_rw, 5, 19, offset); 103 return true; 104 } 105 return false; 106} 107 108static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 109 intptr_t value, intptr_t addend) 110{ 111 tcg_debug_assert(addend == 0); 112 switch (type) { 113 case R_AARCH64_JUMP26: 114 case R_AARCH64_CALL26: 115 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value); 116 case R_AARCH64_CONDBR19: 117 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value); 118 default: 119 g_assert_not_reached(); 120 } 121} 122 123#define TCG_CT_CONST_AIMM 0x100 124#define TCG_CT_CONST_LIMM 0x200 125#define TCG_CT_CONST_ZERO 0x400 126#define TCG_CT_CONST_MONE 0x800 127#define TCG_CT_CONST_ORRI 0x1000 128#define TCG_CT_CONST_ANDI 0x2000 129 130#define ALL_GENERAL_REGS 0xffffffffu 131#define ALL_VECTOR_REGS 0xffffffff00000000ull 132 133/* Match a constant valid for addition (12-bit, optionally shifted). */ 134static inline bool is_aimm(uint64_t val) 135{ 136 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; 137} 138 139/* Match a constant valid for logical operations. */ 140static inline bool is_limm(uint64_t val) 141{ 142 /* Taking a simplified view of the logical immediates for now, ignoring 143 the replication that can happen across the field. Match bit patterns 144 of the forms 145 0....01....1 146 0..01..10..0 147 and their inverses. */ 148 149 /* Make things easier below, by testing the form with msb clear. */ 150 if ((int64_t)val < 0) { 151 val = ~val; 152 } 153 if (val == 0) { 154 return false; 155 } 156 val += val & -val; 157 return (val & (val - 1)) == 0; 158} 159 160/* Return true if v16 is a valid 16-bit shifted immediate. */ 161static bool is_shimm16(uint16_t v16, int *cmode, int *imm8) 162{ 163 if (v16 == (v16 & 0xff)) { 164 *cmode = 0x8; 165 *imm8 = v16 & 0xff; 166 return true; 167 } else if (v16 == (v16 & 0xff00)) { 168 *cmode = 0xa; 169 *imm8 = v16 >> 8; 170 return true; 171 } 172 return false; 173} 174 175/* Return true if v32 is a valid 32-bit shifted immediate. */ 176static bool is_shimm32(uint32_t v32, int *cmode, int *imm8) 177{ 178 if (v32 == (v32 & 0xff)) { 179 *cmode = 0x0; 180 *imm8 = v32 & 0xff; 181 return true; 182 } else if (v32 == (v32 & 0xff00)) { 183 *cmode = 0x2; 184 *imm8 = (v32 >> 8) & 0xff; 185 return true; 186 } else if (v32 == (v32 & 0xff0000)) { 187 *cmode = 0x4; 188 *imm8 = (v32 >> 16) & 0xff; 189 return true; 190 } else if (v32 == (v32 & 0xff000000)) { 191 *cmode = 0x6; 192 *imm8 = v32 >> 24; 193 return true; 194 } 195 return false; 196} 197 198/* Return true if v32 is a valid 32-bit shifting ones immediate. */ 199static bool is_soimm32(uint32_t v32, int *cmode, int *imm8) 200{ 201 if ((v32 & 0xffff00ff) == 0xff) { 202 *cmode = 0xc; 203 *imm8 = (v32 >> 8) & 0xff; 204 return true; 205 } else if ((v32 & 0xff00ffff) == 0xffff) { 206 *cmode = 0xd; 207 *imm8 = (v32 >> 16) & 0xff; 208 return true; 209 } 210 return false; 211} 212 213/* Return true if v32 is a valid float32 immediate. */ 214static bool is_fimm32(uint32_t v32, int *cmode, int *imm8) 215{ 216 if (extract32(v32, 0, 19) == 0 217 && (extract32(v32, 25, 6) == 0x20 218 || extract32(v32, 25, 6) == 0x1f)) { 219 *cmode = 0xf; 220 *imm8 = (extract32(v32, 31, 1) << 7) 221 | (extract32(v32, 25, 1) << 6) 222 | extract32(v32, 19, 6); 223 return true; 224 } 225 return false; 226} 227 228/* Return true if v64 is a valid float64 immediate. */ 229static bool is_fimm64(uint64_t v64, int *cmode, int *imm8) 230{ 231 if (extract64(v64, 0, 48) == 0 232 && (extract64(v64, 54, 9) == 0x100 233 || extract64(v64, 54, 9) == 0x0ff)) { 234 *cmode = 0xf; 235 *imm8 = (extract64(v64, 63, 1) << 7) 236 | (extract64(v64, 54, 1) << 6) 237 | extract64(v64, 48, 6); 238 return true; 239 } 240 return false; 241} 242 243/* 244 * Return non-zero if v32 can be formed by MOVI+ORR. 245 * Place the parameters for MOVI in (cmode, imm8). 246 * Return the cmode for ORR; the imm8 can be had via extraction from v32. 247 */ 248static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8) 249{ 250 int i; 251 252 for (i = 6; i > 0; i -= 2) { 253 /* Mask out one byte we can add with ORR. */ 254 uint32_t tmp = v32 & ~(0xffu << (i * 4)); 255 if (is_shimm32(tmp, cmode, imm8) || 256 is_soimm32(tmp, cmode, imm8)) { 257 break; 258 } 259 } 260 return i; 261} 262 263/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */ 264static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8) 265{ 266 if (v32 == deposit32(v32, 16, 16, v32)) { 267 return is_shimm16(v32, cmode, imm8); 268 } else { 269 return is_shimm32(v32, cmode, imm8); 270 } 271} 272 273static bool tcg_target_const_match(int64_t val, int ct, 274 TCGType type, TCGCond cond, int vece) 275{ 276 if (ct & TCG_CT_CONST) { 277 return 1; 278 } 279 if (type == TCG_TYPE_I32) { 280 val = (int32_t)val; 281 } 282 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { 283 return 1; 284 } 285 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { 286 return 1; 287 } 288 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 289 return 1; 290 } 291 if ((ct & TCG_CT_CONST_MONE) && val == -1) { 292 return 1; 293 } 294 295 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) { 296 case 0: 297 break; 298 case TCG_CT_CONST_ANDI: 299 val = ~val; 300 /* fallthru */ 301 case TCG_CT_CONST_ORRI: 302 if (val == deposit64(val, 32, 32, val)) { 303 int cmode, imm8; 304 return is_shimm1632(val, &cmode, &imm8); 305 } 306 break; 307 default: 308 /* Both bits should not be set for the same insn. */ 309 g_assert_not_reached(); 310 } 311 312 return 0; 313} 314 315enum aarch64_cond_code { 316 COND_EQ = 0x0, 317 COND_NE = 0x1, 318 COND_CS = 0x2, /* Unsigned greater or equal */ 319 COND_HS = COND_CS, /* ALIAS greater or equal */ 320 COND_CC = 0x3, /* Unsigned less than */ 321 COND_LO = COND_CC, /* ALIAS Lower */ 322 COND_MI = 0x4, /* Negative */ 323 COND_PL = 0x5, /* Zero or greater */ 324 COND_VS = 0x6, /* Overflow */ 325 COND_VC = 0x7, /* No overflow */ 326 COND_HI = 0x8, /* Unsigned greater than */ 327 COND_LS = 0x9, /* Unsigned less or equal */ 328 COND_GE = 0xa, 329 COND_LT = 0xb, 330 COND_GT = 0xc, 331 COND_LE = 0xd, 332 COND_AL = 0xe, 333 COND_NV = 0xf, /* behaves like COND_AL here */ 334}; 335 336static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { 337 [TCG_COND_EQ] = COND_EQ, 338 [TCG_COND_NE] = COND_NE, 339 [TCG_COND_LT] = COND_LT, 340 [TCG_COND_GE] = COND_GE, 341 [TCG_COND_LE] = COND_LE, 342 [TCG_COND_GT] = COND_GT, 343 /* unsigned */ 344 [TCG_COND_LTU] = COND_LO, 345 [TCG_COND_GTU] = COND_HI, 346 [TCG_COND_GEU] = COND_HS, 347 [TCG_COND_LEU] = COND_LS, 348}; 349 350typedef enum { 351 LDST_ST = 0, /* store */ 352 LDST_LD = 1, /* load */ 353 LDST_LD_S_X = 2, /* load and sign-extend into Xt */ 354 LDST_LD_S_W = 3, /* load and sign-extend into Wt */ 355} AArch64LdstType; 356 357/* We encode the format of the insn into the beginning of the name, so that 358 we can have the preprocessor help "typecheck" the insn vs the output 359 function. Arm didn't provide us with nice names for the formats, so we 360 use the section number of the architecture reference manual in which the 361 instruction group is described. */ 362typedef enum { 363 /* Compare and branch (immediate). */ 364 I3201_CBZ = 0x34000000, 365 I3201_CBNZ = 0x35000000, 366 367 /* Conditional branch (immediate). */ 368 I3202_B_C = 0x54000000, 369 370 /* Unconditional branch (immediate). */ 371 I3206_B = 0x14000000, 372 I3206_BL = 0x94000000, 373 374 /* Unconditional branch (register). */ 375 I3207_BR = 0xd61f0000, 376 I3207_BLR = 0xd63f0000, 377 I3207_RET = 0xd65f0000, 378 379 /* AdvSIMD load/store single structure. */ 380 I3303_LD1R = 0x0d40c000, 381 382 /* Load literal for loading the address at pc-relative offset */ 383 I3305_LDR = 0x58000000, 384 I3305_LDR_v64 = 0x5c000000, 385 I3305_LDR_v128 = 0x9c000000, 386 387 /* Load/store exclusive. */ 388 I3306_LDXP = 0xc8600000, 389 I3306_STXP = 0xc8200000, 390 391 /* Load/store register. Described here as 3.3.12, but the helper 392 that emits them can transform to 3.3.10 or 3.3.13. */ 393 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, 394 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, 395 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, 396 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, 397 398 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, 399 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, 400 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, 401 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, 402 403 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, 404 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, 405 406 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, 407 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, 408 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, 409 410 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30, 411 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30, 412 413 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30, 414 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30, 415 416 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30, 417 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30, 418 419 I3312_TO_I3310 = 0x00200800, 420 I3312_TO_I3313 = 0x01000000, 421 422 /* Load/store register pair instructions. */ 423 I3314_LDP = 0x28400000, 424 I3314_STP = 0x28000000, 425 426 /* Add/subtract immediate instructions. */ 427 I3401_ADDI = 0x11000000, 428 I3401_ADDSI = 0x31000000, 429 I3401_SUBI = 0x51000000, 430 I3401_SUBSI = 0x71000000, 431 432 /* Bitfield instructions. */ 433 I3402_BFM = 0x33000000, 434 I3402_SBFM = 0x13000000, 435 I3402_UBFM = 0x53000000, 436 437 /* Extract instruction. */ 438 I3403_EXTR = 0x13800000, 439 440 /* Logical immediate instructions. */ 441 I3404_ANDI = 0x12000000, 442 I3404_ORRI = 0x32000000, 443 I3404_EORI = 0x52000000, 444 I3404_ANDSI = 0x72000000, 445 446 /* Move wide immediate instructions. */ 447 I3405_MOVN = 0x12800000, 448 I3405_MOVZ = 0x52800000, 449 I3405_MOVK = 0x72800000, 450 451 /* PC relative addressing instructions. */ 452 I3406_ADR = 0x10000000, 453 I3406_ADRP = 0x90000000, 454 455 /* Add/subtract extended register instructions. */ 456 I3501_ADD = 0x0b200000, 457 458 /* Add/subtract shifted register instructions (without a shift). */ 459 I3502_ADD = 0x0b000000, 460 I3502_ADDS = 0x2b000000, 461 I3502_SUB = 0x4b000000, 462 I3502_SUBS = 0x6b000000, 463 464 /* Add/subtract shifted register instructions (with a shift). */ 465 I3502S_ADD_LSL = I3502_ADD, 466 467 /* Add/subtract with carry instructions. */ 468 I3503_ADC = 0x1a000000, 469 I3503_SBC = 0x5a000000, 470 471 /* Conditional select instructions. */ 472 I3506_CSEL = 0x1a800000, 473 I3506_CSINC = 0x1a800400, 474 I3506_CSINV = 0x5a800000, 475 I3506_CSNEG = 0x5a800400, 476 477 /* Data-processing (1 source) instructions. */ 478 I3507_CLZ = 0x5ac01000, 479 I3507_RBIT = 0x5ac00000, 480 I3507_REV = 0x5ac00000, /* + size << 10 */ 481 482 /* Data-processing (2 source) instructions. */ 483 I3508_LSLV = 0x1ac02000, 484 I3508_LSRV = 0x1ac02400, 485 I3508_ASRV = 0x1ac02800, 486 I3508_RORV = 0x1ac02c00, 487 I3508_SMULH = 0x9b407c00, 488 I3508_UMULH = 0x9bc07c00, 489 I3508_UDIV = 0x1ac00800, 490 I3508_SDIV = 0x1ac00c00, 491 492 /* Data-processing (3 source) instructions. */ 493 I3509_MADD = 0x1b000000, 494 I3509_MSUB = 0x1b008000, 495 496 /* Logical shifted register instructions (without a shift). */ 497 I3510_AND = 0x0a000000, 498 I3510_BIC = 0x0a200000, 499 I3510_ORR = 0x2a000000, 500 I3510_ORN = 0x2a200000, 501 I3510_EOR = 0x4a000000, 502 I3510_EON = 0x4a200000, 503 I3510_ANDS = 0x6a000000, 504 505 /* Logical shifted register instructions (with a shift). */ 506 I3502S_AND_LSR = I3510_AND | (1 << 22), 507 508 /* AdvSIMD copy */ 509 I3605_DUP = 0x0e000400, 510 I3605_INS = 0x4e001c00, 511 I3605_UMOV = 0x0e003c00, 512 513 /* AdvSIMD modified immediate */ 514 I3606_MOVI = 0x0f000400, 515 I3606_MVNI = 0x2f000400, 516 I3606_BIC = 0x2f001400, 517 I3606_ORR = 0x0f001400, 518 519 /* AdvSIMD scalar shift by immediate */ 520 I3609_SSHR = 0x5f000400, 521 I3609_SSRA = 0x5f001400, 522 I3609_SHL = 0x5f005400, 523 I3609_USHR = 0x7f000400, 524 I3609_USRA = 0x7f001400, 525 I3609_SLI = 0x7f005400, 526 527 /* AdvSIMD scalar three same */ 528 I3611_SQADD = 0x5e200c00, 529 I3611_SQSUB = 0x5e202c00, 530 I3611_CMGT = 0x5e203400, 531 I3611_CMGE = 0x5e203c00, 532 I3611_SSHL = 0x5e204400, 533 I3611_ADD = 0x5e208400, 534 I3611_CMTST = 0x5e208c00, 535 I3611_UQADD = 0x7e200c00, 536 I3611_UQSUB = 0x7e202c00, 537 I3611_CMHI = 0x7e203400, 538 I3611_CMHS = 0x7e203c00, 539 I3611_USHL = 0x7e204400, 540 I3611_SUB = 0x7e208400, 541 I3611_CMEQ = 0x7e208c00, 542 543 /* AdvSIMD scalar two-reg misc */ 544 I3612_CMGT0 = 0x5e208800, 545 I3612_CMEQ0 = 0x5e209800, 546 I3612_CMLT0 = 0x5e20a800, 547 I3612_ABS = 0x5e20b800, 548 I3612_CMGE0 = 0x7e208800, 549 I3612_CMLE0 = 0x7e209800, 550 I3612_NEG = 0x7e20b800, 551 552 /* AdvSIMD shift by immediate */ 553 I3614_SSHR = 0x0f000400, 554 I3614_SSRA = 0x0f001400, 555 I3614_SHL = 0x0f005400, 556 I3614_SLI = 0x2f005400, 557 I3614_USHR = 0x2f000400, 558 I3614_USRA = 0x2f001400, 559 560 /* AdvSIMD three same. */ 561 I3616_ADD = 0x0e208400, 562 I3616_AND = 0x0e201c00, 563 I3616_BIC = 0x0e601c00, 564 I3616_BIF = 0x2ee01c00, 565 I3616_BIT = 0x2ea01c00, 566 I3616_BSL = 0x2e601c00, 567 I3616_EOR = 0x2e201c00, 568 I3616_MUL = 0x0e209c00, 569 I3616_ORR = 0x0ea01c00, 570 I3616_ORN = 0x0ee01c00, 571 I3616_SUB = 0x2e208400, 572 I3616_CMGT = 0x0e203400, 573 I3616_CMGE = 0x0e203c00, 574 I3616_CMTST = 0x0e208c00, 575 I3616_CMHI = 0x2e203400, 576 I3616_CMHS = 0x2e203c00, 577 I3616_CMEQ = 0x2e208c00, 578 I3616_SMAX = 0x0e206400, 579 I3616_SMIN = 0x0e206c00, 580 I3616_SSHL = 0x0e204400, 581 I3616_SQADD = 0x0e200c00, 582 I3616_SQSUB = 0x0e202c00, 583 I3616_UMAX = 0x2e206400, 584 I3616_UMIN = 0x2e206c00, 585 I3616_UQADD = 0x2e200c00, 586 I3616_UQSUB = 0x2e202c00, 587 I3616_USHL = 0x2e204400, 588 589 /* AdvSIMD two-reg misc. */ 590 I3617_CMGT0 = 0x0e208800, 591 I3617_CMEQ0 = 0x0e209800, 592 I3617_CMLT0 = 0x0e20a800, 593 I3617_CMGE0 = 0x2e208800, 594 I3617_CMLE0 = 0x2e209800, 595 I3617_NOT = 0x2e205800, 596 I3617_ABS = 0x0e20b800, 597 I3617_NEG = 0x2e20b800, 598 599 /* System instructions. */ 600 NOP = 0xd503201f, 601 DMB_ISH = 0xd50338bf, 602 DMB_LD = 0x00000100, 603 DMB_ST = 0x00000200, 604 605 BTI_C = 0xd503245f, 606 BTI_J = 0xd503249f, 607 BTI_JC = 0xd50324df, 608} AArch64Insn; 609 610static inline uint32_t tcg_in32(TCGContext *s) 611{ 612 uint32_t v = *(uint32_t *)s->code_ptr; 613 return v; 614} 615 616/* Emit an opcode with "type-checking" of the format. */ 617#define tcg_out_insn(S, FMT, OP, ...) \ 618 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) 619 620static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q, 621 TCGReg rt, TCGReg rn, unsigned size) 622{ 623 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30)); 624} 625 626static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, 627 int imm19, TCGReg rt) 628{ 629 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt); 630} 631 632static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs, 633 TCGReg rt, TCGReg rt2, TCGReg rn) 634{ 635 tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt); 636} 637 638static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, 639 TCGReg rt, int imm19) 640{ 641 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); 642} 643 644static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, 645 TCGCond c, int imm19) 646{ 647 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); 648} 649 650static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) 651{ 652 tcg_out32(s, insn | (imm26 & 0x03ffffff)); 653} 654 655static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) 656{ 657 tcg_out32(s, insn | rn << 5); 658} 659 660static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, 661 TCGReg r1, TCGReg r2, TCGReg rn, 662 tcg_target_long ofs, bool pre, bool w) 663{ 664 insn |= 1u << 31; /* ext */ 665 insn |= pre << 24; 666 insn |= w << 23; 667 668 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); 669 insn |= (ofs & (0x7f << 3)) << (15 - 3); 670 671 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); 672} 673 674static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, 675 TCGReg rd, TCGReg rn, uint64_t aimm) 676{ 677 if (aimm > 0xfff) { 678 tcg_debug_assert((aimm & 0xfff) == 0); 679 aimm >>= 12; 680 tcg_debug_assert(aimm <= 0xfff); 681 aimm |= 1 << 12; /* apply LSL 12 */ 682 } 683 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); 684} 685 686/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 687 (Logical immediate). Both insn groups have N, IMMR and IMMS fields 688 that feed the DecodeBitMasks pseudo function. */ 689static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, 690 TCGReg rd, TCGReg rn, int n, int immr, int imms) 691{ 692 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 693 | rn << 5 | rd); 694} 695 696#define tcg_out_insn_3404 tcg_out_insn_3402 697 698static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, 699 TCGReg rd, TCGReg rn, TCGReg rm, int imms) 700{ 701 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 702 | rn << 5 | rd); 703} 704 705/* This function is used for the Move (wide immediate) instruction group. 706 Note that SHIFT is a full shift count, not the 2 bit HW field. */ 707static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, 708 TCGReg rd, uint16_t half, unsigned shift) 709{ 710 tcg_debug_assert((shift & ~0x30) == 0); 711 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); 712} 713 714static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, 715 TCGReg rd, int64_t disp) 716{ 717 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); 718} 719 720static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn, 721 TCGType sf, TCGReg rd, TCGReg rn, 722 TCGReg rm, int opt, int imm3) 723{ 724 tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 | 725 imm3 << 10 | rn << 5 | rd); 726} 727 728/* This function is for both 3.5.2 (Add/Subtract shifted register), for 729 the rare occasion when we actually want to supply a shift amount. */ 730static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, 731 TCGType ext, TCGReg rd, TCGReg rn, 732 TCGReg rm, int imm6) 733{ 734 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); 735} 736 737/* This function is for 3.5.2 (Add/subtract shifted register), 738 and 3.5.10 (Logical shifted register), for the vast majorty of cases 739 when we don't want to apply a shift. Thus it can also be used for 740 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */ 741static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, 742 TCGReg rd, TCGReg rn, TCGReg rm) 743{ 744 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); 745} 746 747#define tcg_out_insn_3503 tcg_out_insn_3502 748#define tcg_out_insn_3508 tcg_out_insn_3502 749#define tcg_out_insn_3510 tcg_out_insn_3502 750 751static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, 752 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) 753{ 754 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd 755 | tcg_cond_to_aarch64[c] << 12); 756} 757 758static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, 759 TCGReg rd, TCGReg rn) 760{ 761 tcg_out32(s, insn | ext << 31 | rn << 5 | rd); 762} 763 764static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, 765 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) 766{ 767 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); 768} 769 770static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q, 771 TCGReg rd, TCGReg rn, int dst_idx, int src_idx) 772{ 773 /* Note that bit 11 set means general register input. Therefore 774 we can handle both register sets with one function. */ 775 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11) 776 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5); 777} 778 779static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q, 780 TCGReg rd, bool op, int cmode, uint8_t imm8) 781{ 782 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f) 783 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5); 784} 785 786static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn, 787 TCGReg rd, TCGReg rn, unsigned immhb) 788{ 789 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f)); 790} 791 792static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn, 793 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 794{ 795 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16 796 | (rn & 0x1f) << 5 | (rd & 0x1f)); 797} 798 799static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn, 800 unsigned size, TCGReg rd, TCGReg rn) 801{ 802 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f)); 803} 804 805static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q, 806 TCGReg rd, TCGReg rn, unsigned immhb) 807{ 808 tcg_out32(s, insn | q << 30 | immhb << 16 809 | (rn & 0x1f) << 5 | (rd & 0x1f)); 810} 811 812static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q, 813 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 814{ 815 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16 816 | (rn & 0x1f) << 5 | (rd & 0x1f)); 817} 818 819static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q, 820 unsigned size, TCGReg rd, TCGReg rn) 821{ 822 tcg_out32(s, insn | q << 30 | (size << 22) 823 | (rn & 0x1f) << 5 | (rd & 0x1f)); 824} 825 826static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, 827 TCGReg rd, TCGReg base, TCGType ext, 828 TCGReg regoff) 829{ 830 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 831 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | 832 0x4000 | ext << 13 | base << 5 | (rd & 0x1f)); 833} 834 835static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, 836 TCGReg rd, TCGReg rn, intptr_t offset) 837{ 838 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f)); 839} 840 841static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, 842 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) 843{ 844 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 845 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 846 | rn << 5 | (rd & 0x1f)); 847} 848 849static void tcg_out_bti(TCGContext *s, AArch64Insn insn) 850{ 851 /* 852 * While BTI insns are nops on hosts without FEAT_BTI, 853 * there is no point in emitting them in that case either. 854 */ 855 if (cpuinfo & CPUINFO_BTI) { 856 tcg_out32(s, insn); 857 } 858} 859 860/* Register to register move using ORR (shifted register with no shift). */ 861static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) 862{ 863 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); 864} 865 866/* Register to register move using ADDI (move to/from SP). */ 867static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) 868{ 869 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); 870} 871 872/* This function is used for the Logical (immediate) instruction group. 873 The value of LIMM must satisfy IS_LIMM. See the comment above about 874 only supporting simplified logical immediates. */ 875static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, 876 TCGReg rd, TCGReg rn, uint64_t limm) 877{ 878 unsigned h, l, r, c; 879 880 tcg_debug_assert(is_limm(limm)); 881 882 h = clz64(limm); 883 l = ctz64(limm); 884 if (l == 0) { 885 r = 0; /* form 0....01....1 */ 886 c = ctz64(~limm) - 1; 887 if (h == 0) { 888 r = clz64(~limm); /* form 1..10..01..1 */ 889 c += r; 890 } 891 } else { 892 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ 893 c = r - h - 1; 894 } 895 if (ext == TCG_TYPE_I32) { 896 r &= 31; 897 c &= 31; 898 } 899 900 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); 901} 902 903static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 904 TCGReg rd, int64_t v64) 905{ 906 bool q = type == TCG_TYPE_V128; 907 int cmode, imm8, i; 908 909 /* Test all bytes equal first. */ 910 if (vece == MO_8) { 911 imm8 = (uint8_t)v64; 912 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8); 913 return; 914 } 915 916 /* 917 * Test all bytes 0x00 or 0xff second. This can match cases that 918 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below. 919 */ 920 for (i = imm8 = 0; i < 8; i++) { 921 uint8_t byte = v64 >> (i * 8); 922 if (byte == 0xff) { 923 imm8 |= 1 << i; 924 } else if (byte != 0) { 925 goto fail_bytes; 926 } 927 } 928 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8); 929 return; 930 fail_bytes: 931 932 /* 933 * Tests for various replications. For each element width, if we 934 * cannot find an expansion there's no point checking a larger 935 * width because we already know by replication it cannot match. 936 */ 937 if (vece == MO_16) { 938 uint16_t v16 = v64; 939 940 if (is_shimm16(v16, &cmode, &imm8)) { 941 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 942 return; 943 } 944 if (is_shimm16(~v16, &cmode, &imm8)) { 945 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 946 return; 947 } 948 949 /* 950 * Otherwise, all remaining constants can be loaded in two insns: 951 * rd = v16 & 0xff, rd |= v16 & 0xff00. 952 */ 953 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff); 954 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8); 955 return; 956 } else if (vece == MO_32) { 957 uint32_t v32 = v64; 958 uint32_t n32 = ~v32; 959 960 if (is_shimm32(v32, &cmode, &imm8) || 961 is_soimm32(v32, &cmode, &imm8) || 962 is_fimm32(v32, &cmode, &imm8)) { 963 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 964 return; 965 } 966 if (is_shimm32(n32, &cmode, &imm8) || 967 is_soimm32(n32, &cmode, &imm8)) { 968 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 969 return; 970 } 971 972 /* 973 * Restrict the set of constants to those we can load with 974 * two instructions. Others we load from the pool. 975 */ 976 i = is_shimm32_pair(v32, &cmode, &imm8); 977 if (i) { 978 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 979 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8)); 980 return; 981 } 982 i = is_shimm32_pair(n32, &cmode, &imm8); 983 if (i) { 984 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 985 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8)); 986 return; 987 } 988 } else if (is_fimm64(v64, &cmode, &imm8)) { 989 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8); 990 return; 991 } 992 993 /* 994 * As a last resort, load from the constant pool. Sadly there 995 * is no LD1R (literal), so store the full 16-byte vector. 996 */ 997 if (type == TCG_TYPE_V128) { 998 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64); 999 tcg_out_insn(s, 3305, LDR_v128, 0, rd); 1000 } else { 1001 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0); 1002 tcg_out_insn(s, 3305, LDR_v64, 0, rd); 1003 } 1004} 1005 1006static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 1007 TCGReg rd, TCGReg rs) 1008{ 1009 int is_q = type - TCG_TYPE_V64; 1010 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0); 1011 return true; 1012} 1013 1014static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 1015 TCGReg r, TCGReg base, intptr_t offset) 1016{ 1017 TCGReg temp = TCG_REG_TMP0; 1018 1019 if (offset < -0xffffff || offset > 0xffffff) { 1020 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset); 1021 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base); 1022 base = temp; 1023 } else { 1024 AArch64Insn add_insn = I3401_ADDI; 1025 1026 if (offset < 0) { 1027 add_insn = I3401_SUBI; 1028 offset = -offset; 1029 } 1030 if (offset & 0xfff000) { 1031 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000); 1032 base = temp; 1033 } 1034 if (offset & 0xfff) { 1035 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff); 1036 base = temp; 1037 } 1038 } 1039 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece); 1040 return true; 1041} 1042 1043static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 1044 tcg_target_long value) 1045{ 1046 tcg_target_long svalue = value; 1047 tcg_target_long ivalue = ~value; 1048 tcg_target_long t0, t1, t2; 1049 int s0, s1; 1050 AArch64Insn opc; 1051 1052 switch (type) { 1053 case TCG_TYPE_I32: 1054 case TCG_TYPE_I64: 1055 tcg_debug_assert(rd < 32); 1056 break; 1057 default: 1058 g_assert_not_reached(); 1059 } 1060 1061 /* For 32-bit values, discard potential garbage in value. For 64-bit 1062 values within [2**31, 2**32-1], we can create smaller sequences by 1063 interpreting this as a negative 32-bit number, while ensuring that 1064 the high 32 bits are cleared by setting SF=0. */ 1065 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { 1066 svalue = (int32_t)value; 1067 value = (uint32_t)value; 1068 ivalue = (uint32_t)ivalue; 1069 type = TCG_TYPE_I32; 1070 } 1071 1072 /* Speed things up by handling the common case of small positive 1073 and negative values specially. */ 1074 if ((value & ~0xffffull) == 0) { 1075 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); 1076 return; 1077 } else if ((ivalue & ~0xffffull) == 0) { 1078 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); 1079 return; 1080 } 1081 1082 /* Check for bitfield immediates. For the benefit of 32-bit quantities, 1083 use the sign-extended value. That lets us match rotated values such 1084 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ 1085 if (is_limm(svalue)) { 1086 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); 1087 return; 1088 } 1089 1090 /* Look for host pointer values within 4G of the PC. This happens 1091 often when loading pointers to QEMU's own data structures. */ 1092 if (type == TCG_TYPE_I64) { 1093 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr); 1094 tcg_target_long disp = value - src_rx; 1095 if (disp == sextract64(disp, 0, 21)) { 1096 tcg_out_insn(s, 3406, ADR, rd, disp); 1097 return; 1098 } 1099 disp = (value >> 12) - (src_rx >> 12); 1100 if (disp == sextract64(disp, 0, 21)) { 1101 tcg_out_insn(s, 3406, ADRP, rd, disp); 1102 if (value & 0xfff) { 1103 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); 1104 } 1105 return; 1106 } 1107 } 1108 1109 /* Would it take fewer insns to begin with MOVN? */ 1110 if (ctpop64(value) >= 32) { 1111 t0 = ivalue; 1112 opc = I3405_MOVN; 1113 } else { 1114 t0 = value; 1115 opc = I3405_MOVZ; 1116 } 1117 s0 = ctz64(t0) & (63 & -16); 1118 t1 = t0 & ~(0xffffull << s0); 1119 s1 = ctz64(t1) & (63 & -16); 1120 t2 = t1 & ~(0xffffull << s1); 1121 if (t2 == 0) { 1122 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0); 1123 if (t1 != 0) { 1124 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1); 1125 } 1126 return; 1127 } 1128 1129 /* For more than 2 insns, dump it into the constant pool. */ 1130 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0); 1131 tcg_out_insn(s, 3305, LDR, 0, rd); 1132} 1133 1134static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) 1135{ 1136 return false; 1137} 1138 1139static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, 1140 tcg_target_long imm) 1141{ 1142 /* This function is only used for passing structs by reference. */ 1143 g_assert_not_reached(); 1144} 1145 1146/* Define something more legible for general use. */ 1147#define tcg_out_ldst_r tcg_out_insn_3310 1148 1149static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd, 1150 TCGReg rn, intptr_t offset, int lgsize) 1151{ 1152 /* If the offset is naturally aligned and in range, then we can 1153 use the scaled uimm12 encoding */ 1154 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) { 1155 uintptr_t scaled_uimm = offset >> lgsize; 1156 if (scaled_uimm <= 0xfff) { 1157 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); 1158 return; 1159 } 1160 } 1161 1162 /* Small signed offsets can use the unscaled encoding. */ 1163 if (offset >= -256 && offset < 256) { 1164 tcg_out_insn_3312(s, insn, rd, rn, offset); 1165 return; 1166 } 1167 1168 /* Worst-case scenario, move offset to temp register, use reg offset. */ 1169 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset); 1170 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0); 1171} 1172 1173static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 1174{ 1175 if (ret == arg) { 1176 return true; 1177 } 1178 switch (type) { 1179 case TCG_TYPE_I32: 1180 case TCG_TYPE_I64: 1181 if (ret < 32 && arg < 32) { 1182 tcg_out_movr(s, type, ret, arg); 1183 break; 1184 } else if (ret < 32) { 1185 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0); 1186 break; 1187 } else if (arg < 32) { 1188 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0); 1189 break; 1190 } 1191 /* FALLTHRU */ 1192 1193 case TCG_TYPE_V64: 1194 tcg_debug_assert(ret >= 32 && arg >= 32); 1195 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg); 1196 break; 1197 case TCG_TYPE_V128: 1198 tcg_debug_assert(ret >= 32 && arg >= 32); 1199 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg); 1200 break; 1201 1202 default: 1203 g_assert_not_reached(); 1204 } 1205 return true; 1206} 1207 1208static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1209 TCGReg base, intptr_t ofs) 1210{ 1211 AArch64Insn insn; 1212 int lgsz; 1213 1214 switch (type) { 1215 case TCG_TYPE_I32: 1216 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS); 1217 lgsz = 2; 1218 break; 1219 case TCG_TYPE_I64: 1220 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD); 1221 lgsz = 3; 1222 break; 1223 case TCG_TYPE_V64: 1224 insn = I3312_LDRVD; 1225 lgsz = 3; 1226 break; 1227 case TCG_TYPE_V128: 1228 insn = I3312_LDRVQ; 1229 lgsz = 4; 1230 break; 1231 default: 1232 g_assert_not_reached(); 1233 } 1234 tcg_out_ldst(s, insn, ret, base, ofs, lgsz); 1235} 1236 1237static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src, 1238 TCGReg base, intptr_t ofs) 1239{ 1240 AArch64Insn insn; 1241 int lgsz; 1242 1243 switch (type) { 1244 case TCG_TYPE_I32: 1245 insn = (src < 32 ? I3312_STRW : I3312_STRVS); 1246 lgsz = 2; 1247 break; 1248 case TCG_TYPE_I64: 1249 insn = (src < 32 ? I3312_STRX : I3312_STRVD); 1250 lgsz = 3; 1251 break; 1252 case TCG_TYPE_V64: 1253 insn = I3312_STRVD; 1254 lgsz = 3; 1255 break; 1256 case TCG_TYPE_V128: 1257 insn = I3312_STRVQ; 1258 lgsz = 4; 1259 break; 1260 default: 1261 g_assert_not_reached(); 1262 } 1263 tcg_out_ldst(s, insn, src, base, ofs, lgsz); 1264} 1265 1266static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1267 TCGReg base, intptr_t ofs) 1268{ 1269 if (type <= TCG_TYPE_I64 && val == 0) { 1270 tcg_out_st(s, type, TCG_REG_XZR, base, ofs); 1271 return true; 1272 } 1273 return false; 1274} 1275 1276static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, 1277 TCGReg rn, unsigned int a, unsigned int b) 1278{ 1279 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); 1280} 1281 1282static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, 1283 TCGReg rn, unsigned int a, unsigned int b) 1284{ 1285 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); 1286} 1287 1288static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, 1289 TCGReg rn, unsigned int a, unsigned int b) 1290{ 1291 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); 1292} 1293 1294static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, 1295 TCGReg rn, TCGReg rm, unsigned int a) 1296{ 1297 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); 1298} 1299 1300static inline void tcg_out_shl(TCGContext *s, TCGType ext, 1301 TCGReg rd, TCGReg rn, unsigned int m) 1302{ 1303 int bits = ext ? 64 : 32; 1304 int max = bits - 1; 1305 tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max); 1306} 1307 1308static inline void tcg_out_shr(TCGContext *s, TCGType ext, 1309 TCGReg rd, TCGReg rn, unsigned int m) 1310{ 1311 int max = ext ? 63 : 31; 1312 tcg_out_ubfm(s, ext, rd, rn, m & max, max); 1313} 1314 1315static inline void tcg_out_sar(TCGContext *s, TCGType ext, 1316 TCGReg rd, TCGReg rn, unsigned int m) 1317{ 1318 int max = ext ? 63 : 31; 1319 tcg_out_sbfm(s, ext, rd, rn, m & max, max); 1320} 1321 1322static inline void tcg_out_rotr(TCGContext *s, TCGType ext, 1323 TCGReg rd, TCGReg rn, unsigned int m) 1324{ 1325 int max = ext ? 63 : 31; 1326 tcg_out_extr(s, ext, rd, rn, rn, m & max); 1327} 1328 1329static inline void tcg_out_rotl(TCGContext *s, TCGType ext, 1330 TCGReg rd, TCGReg rn, unsigned int m) 1331{ 1332 int max = ext ? 63 : 31; 1333 tcg_out_extr(s, ext, rd, rn, rn, -m & max); 1334} 1335 1336static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, 1337 TCGReg rn, unsigned lsb, unsigned width) 1338{ 1339 unsigned size = ext ? 64 : 32; 1340 unsigned a = (size - lsb) & (size - 1); 1341 unsigned b = width - 1; 1342 tcg_out_bfm(s, ext, rd, rn, a, b); 1343} 1344 1345static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, 1346 tcg_target_long b, bool const_b) 1347{ 1348 if (const_b) { 1349 /* Using CMP or CMN aliases. */ 1350 if (b >= 0) { 1351 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); 1352 } else { 1353 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); 1354 } 1355 } else { 1356 /* Using CMP alias SUBS wzr, Wn, Wm */ 1357 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); 1358 } 1359} 1360 1361static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) 1362{ 1363 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1364 tcg_debug_assert(offset == sextract64(offset, 0, 26)); 1365 tcg_out_insn(s, 3206, B, offset); 1366} 1367 1368static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target) 1369{ 1370 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1371 if (offset == sextract64(offset, 0, 26)) { 1372 tcg_out_insn(s, 3206, BL, offset); 1373 } else { 1374 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target); 1375 tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0); 1376 } 1377} 1378 1379static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 1380 const TCGHelperInfo *info) 1381{ 1382 tcg_out_call_int(s, target); 1383} 1384 1385static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) 1386{ 1387 if (!l->has_value) { 1388 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); 1389 tcg_out_insn(s, 3206, B, 0); 1390 } else { 1391 tcg_out_goto(s, l->u.value_ptr); 1392 } 1393} 1394 1395static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, 1396 TCGArg b, bool b_const, TCGLabel *l) 1397{ 1398 intptr_t offset; 1399 bool need_cmp; 1400 1401 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) { 1402 need_cmp = false; 1403 } else { 1404 need_cmp = true; 1405 tcg_out_cmp(s, ext, a, b, b_const); 1406 } 1407 1408 if (!l->has_value) { 1409 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1410 offset = tcg_in32(s) >> 5; 1411 } else { 1412 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2; 1413 tcg_debug_assert(offset == sextract64(offset, 0, 19)); 1414 } 1415 1416 if (need_cmp) { 1417 tcg_out_insn(s, 3202, B_C, c, offset); 1418 } else if (c == TCG_COND_EQ) { 1419 tcg_out_insn(s, 3201, CBZ, ext, a, offset); 1420 } else { 1421 tcg_out_insn(s, 3201, CBNZ, ext, a, offset); 1422 } 1423} 1424 1425static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits, 1426 TCGReg rd, TCGReg rn) 1427{ 1428 /* REV, REV16, REV32 */ 1429 tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn); 1430} 1431 1432static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits, 1433 TCGReg rd, TCGReg rn) 1434{ 1435 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ 1436 int bits = (8 << s_bits) - 1; 1437 tcg_out_sbfm(s, ext, rd, rn, 0, bits); 1438} 1439 1440static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn) 1441{ 1442 tcg_out_sxt(s, type, MO_8, rd, rn); 1443} 1444 1445static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn) 1446{ 1447 tcg_out_sxt(s, type, MO_16, rd, rn); 1448} 1449 1450static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn) 1451{ 1452 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn); 1453} 1454 1455static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn) 1456{ 1457 tcg_out_ext32s(s, rd, rn); 1458} 1459 1460static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits, 1461 TCGReg rd, TCGReg rn) 1462{ 1463 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ 1464 int bits = (8 << s_bits) - 1; 1465 tcg_out_ubfm(s, 0, rd, rn, 0, bits); 1466} 1467 1468static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn) 1469{ 1470 tcg_out_uxt(s, MO_8, rd, rn); 1471} 1472 1473static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn) 1474{ 1475 tcg_out_uxt(s, MO_16, rd, rn); 1476} 1477 1478static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn) 1479{ 1480 tcg_out_movr(s, TCG_TYPE_I32, rd, rn); 1481} 1482 1483static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn) 1484{ 1485 tcg_out_ext32u(s, rd, rn); 1486} 1487 1488static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) 1489{ 1490 tcg_out_mov(s, TCG_TYPE_I32, rd, rn); 1491} 1492 1493static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, 1494 TCGReg rn, int64_t aimm) 1495{ 1496 if (aimm >= 0) { 1497 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); 1498 } else { 1499 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); 1500 } 1501} 1502 1503static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, 1504 TCGReg rh, TCGReg al, TCGReg ah, 1505 tcg_target_long bl, tcg_target_long bh, 1506 bool const_bl, bool const_bh, bool sub) 1507{ 1508 TCGReg orig_rl = rl; 1509 AArch64Insn insn; 1510 1511 if (rl == ah || (!const_bh && rl == bh)) { 1512 rl = TCG_REG_TMP0; 1513 } 1514 1515 if (const_bl) { 1516 if (bl < 0) { 1517 bl = -bl; 1518 insn = sub ? I3401_ADDSI : I3401_SUBSI; 1519 } else { 1520 insn = sub ? I3401_SUBSI : I3401_ADDSI; 1521 } 1522 1523 if (unlikely(al == TCG_REG_XZR)) { 1524 /* ??? We want to allow al to be zero for the benefit of 1525 negation via subtraction. However, that leaves open the 1526 possibility of adding 0+const in the low part, and the 1527 immediate add instructions encode XSP not XZR. Don't try 1528 anything more elaborate here than loading another zero. */ 1529 al = TCG_REG_TMP0; 1530 tcg_out_movi(s, ext, al, 0); 1531 } 1532 tcg_out_insn_3401(s, insn, ext, rl, al, bl); 1533 } else { 1534 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); 1535 } 1536 1537 insn = I3503_ADC; 1538 if (const_bh) { 1539 /* Note that the only two constants we support are 0 and -1, and 1540 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ 1541 if ((bh != 0) ^ sub) { 1542 insn = I3503_SBC; 1543 } 1544 bh = TCG_REG_XZR; 1545 } else if (sub) { 1546 insn = I3503_SBC; 1547 } 1548 tcg_out_insn_3503(s, insn, ext, rh, ah, bh); 1549 1550 tcg_out_mov(s, ext, orig_rl, rl); 1551} 1552 1553static inline void tcg_out_mb(TCGContext *s, TCGArg a0) 1554{ 1555 static const uint32_t sync[] = { 1556 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST, 1557 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST, 1558 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1559 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD, 1560 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1561 }; 1562 tcg_out32(s, sync[a0 & TCG_MO_ALL]); 1563} 1564 1565static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, 1566 TCGReg a0, TCGArg b, bool const_b, bool is_ctz) 1567{ 1568 TCGReg a1 = a0; 1569 if (is_ctz) { 1570 a1 = TCG_REG_TMP0; 1571 tcg_out_insn(s, 3507, RBIT, ext, a1, a0); 1572 } 1573 if (const_b && b == (ext ? 64 : 32)) { 1574 tcg_out_insn(s, 3507, CLZ, ext, d, a1); 1575 } else { 1576 AArch64Insn sel = I3506_CSEL; 1577 1578 tcg_out_cmp(s, ext, a0, 0, 1); 1579 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1); 1580 1581 if (const_b) { 1582 if (b == -1) { 1583 b = TCG_REG_XZR; 1584 sel = I3506_CSINV; 1585 } else if (b == 0) { 1586 b = TCG_REG_XZR; 1587 } else { 1588 tcg_out_movi(s, ext, d, b); 1589 b = d; 1590 } 1591 } 1592 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE); 1593 } 1594} 1595 1596typedef struct { 1597 TCGReg base; 1598 TCGReg index; 1599 TCGType index_ext; 1600 TCGAtomAlign aa; 1601} HostAddress; 1602 1603bool tcg_target_has_memory_bswap(MemOp memop) 1604{ 1605 return false; 1606} 1607 1608static const TCGLdstHelperParam ldst_helper_param = { 1609 .ntmp = 1, .tmp = { TCG_REG_TMP0 } 1610}; 1611 1612static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1613{ 1614 MemOp opc = get_memop(lb->oi); 1615 1616 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1617 return false; 1618 } 1619 1620 tcg_out_ld_helper_args(s, lb, &ldst_helper_param); 1621 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); 1622 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); 1623 tcg_out_goto(s, lb->raddr); 1624 return true; 1625} 1626 1627static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1628{ 1629 MemOp opc = get_memop(lb->oi); 1630 1631 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1632 return false; 1633 } 1634 1635 tcg_out_st_helper_args(s, lb, &ldst_helper_param); 1636 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]); 1637 tcg_out_goto(s, lb->raddr); 1638 return true; 1639} 1640 1641/* We expect to use a 7-bit scaled negative offset from ENV. */ 1642#define MIN_TLB_MASK_TABLE_OFS -512 1643 1644/* 1645 * For system-mode, perform the TLB load and compare. 1646 * For user-mode, perform any required alignment tests. 1647 * In both cases, return a TCGLabelQemuLdst structure if the slow path 1648 * is required and fill in @h with the host address for the fast path. 1649 */ 1650static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, 1651 TCGReg addr_reg, MemOpIdx oi, 1652 bool is_ld) 1653{ 1654 TCGType addr_type = s->addr_type; 1655 TCGLabelQemuLdst *ldst = NULL; 1656 MemOp opc = get_memop(oi); 1657 MemOp s_bits = opc & MO_SIZE; 1658 unsigned a_mask; 1659 1660 h->aa = atom_and_align_for_opc(s, opc, 1661 have_lse2 ? MO_ATOM_WITHIN16 1662 : MO_ATOM_IFALIGN, 1663 s_bits == MO_128); 1664 a_mask = (1 << h->aa.align) - 1; 1665 1666 if (tcg_use_softmmu) { 1667 unsigned s_mask = (1u << s_bits) - 1; 1668 unsigned mem_index = get_mmuidx(oi); 1669 TCGReg addr_adj; 1670 TCGType mask_type; 1671 uint64_t compare_mask; 1672 1673 ldst = new_ldst_label(s); 1674 ldst->is_ld = is_ld; 1675 ldst->oi = oi; 1676 ldst->addrlo_reg = addr_reg; 1677 1678 mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32 1679 ? TCG_TYPE_I64 : TCG_TYPE_I32); 1680 1681 /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */ 1682 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); 1683 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); 1684 tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0, 1685 tlb_mask_table_ofs(s, mem_index), 1, 0); 1686 1687 /* Extract the TLB index from the address into X0. */ 1688 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, 1689 TCG_REG_TMP0, TCG_REG_TMP0, addr_reg, 1690 s->page_bits - CPU_TLB_ENTRY_BITS); 1691 1692 /* Add the tlb_table pointer, forming the CPUTLBEntry address. */ 1693 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0); 1694 1695 /* Load the tlb comparator into TMP0, and the fast path addend. */ 1696 QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN); 1697 tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1, 1698 is_ld ? offsetof(CPUTLBEntry, addr_read) 1699 : offsetof(CPUTLBEntry, addr_write)); 1700 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, 1701 offsetof(CPUTLBEntry, addend)); 1702 1703 /* 1704 * For aligned accesses, we check the first byte and include 1705 * the alignment bits within the address. For unaligned access, 1706 * we check that we don't cross pages using the address of the 1707 * last byte of the access. 1708 */ 1709 if (a_mask >= s_mask) { 1710 addr_adj = addr_reg; 1711 } else { 1712 addr_adj = TCG_REG_TMP2; 1713 tcg_out_insn(s, 3401, ADDI, addr_type, 1714 addr_adj, addr_reg, s_mask - a_mask); 1715 } 1716 compare_mask = (uint64_t)s->page_mask | a_mask; 1717 1718 /* Store the page mask part of the address into TMP2. */ 1719 tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2, 1720 addr_adj, compare_mask); 1721 1722 /* Perform the address comparison. */ 1723 tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0); 1724 1725 /* If not equal, we jump to the slow path. */ 1726 ldst->label_ptr[0] = s->code_ptr; 1727 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1728 1729 h->base = TCG_REG_TMP1; 1730 h->index = addr_reg; 1731 h->index_ext = addr_type; 1732 } else { 1733 if (a_mask) { 1734 ldst = new_ldst_label(s); 1735 1736 ldst->is_ld = is_ld; 1737 ldst->oi = oi; 1738 ldst->addrlo_reg = addr_reg; 1739 1740 /* tst addr, #mask */ 1741 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask); 1742 1743 /* b.ne slow_path */ 1744 ldst->label_ptr[0] = s->code_ptr; 1745 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1746 } 1747 1748 if (guest_base || addr_type == TCG_TYPE_I32) { 1749 h->base = TCG_REG_GUEST_BASE; 1750 h->index = addr_reg; 1751 h->index_ext = addr_type; 1752 } else { 1753 h->base = addr_reg; 1754 h->index = TCG_REG_XZR; 1755 h->index_ext = TCG_TYPE_I64; 1756 } 1757 } 1758 1759 return ldst; 1760} 1761 1762static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, 1763 TCGReg data_r, HostAddress h) 1764{ 1765 switch (memop & MO_SSIZE) { 1766 case MO_UB: 1767 tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index); 1768 break; 1769 case MO_SB: 1770 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, 1771 data_r, h.base, h.index_ext, h.index); 1772 break; 1773 case MO_UW: 1774 tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index); 1775 break; 1776 case MO_SW: 1777 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), 1778 data_r, h.base, h.index_ext, h.index); 1779 break; 1780 case MO_UL: 1781 tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index); 1782 break; 1783 case MO_SL: 1784 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index); 1785 break; 1786 case MO_UQ: 1787 tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index); 1788 break; 1789 default: 1790 g_assert_not_reached(); 1791 } 1792} 1793 1794static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, 1795 TCGReg data_r, HostAddress h) 1796{ 1797 switch (memop & MO_SIZE) { 1798 case MO_8: 1799 tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index); 1800 break; 1801 case MO_16: 1802 tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index); 1803 break; 1804 case MO_32: 1805 tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index); 1806 break; 1807 case MO_64: 1808 tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index); 1809 break; 1810 default: 1811 g_assert_not_reached(); 1812 } 1813} 1814 1815static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1816 MemOpIdx oi, TCGType data_type) 1817{ 1818 TCGLabelQemuLdst *ldst; 1819 HostAddress h; 1820 1821 ldst = prepare_host_addr(s, &h, addr_reg, oi, true); 1822 tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h); 1823 1824 if (ldst) { 1825 ldst->type = data_type; 1826 ldst->datalo_reg = data_reg; 1827 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1828 } 1829} 1830 1831static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1832 MemOpIdx oi, TCGType data_type) 1833{ 1834 TCGLabelQemuLdst *ldst; 1835 HostAddress h; 1836 1837 ldst = prepare_host_addr(s, &h, addr_reg, oi, false); 1838 tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h); 1839 1840 if (ldst) { 1841 ldst->type = data_type; 1842 ldst->datalo_reg = data_reg; 1843 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1844 } 1845} 1846 1847static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, 1848 TCGReg addr_reg, MemOpIdx oi, bool is_ld) 1849{ 1850 TCGLabelQemuLdst *ldst; 1851 HostAddress h; 1852 TCGReg base; 1853 bool use_pair; 1854 1855 ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld); 1856 1857 /* Compose the final address, as LDP/STP have no indexing. */ 1858 if (h.index == TCG_REG_XZR) { 1859 base = h.base; 1860 } else { 1861 base = TCG_REG_TMP2; 1862 if (h.index_ext == TCG_TYPE_I32) { 1863 /* add base, base, index, uxtw */ 1864 tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base, 1865 h.base, h.index, MO_32, 0); 1866 } else { 1867 /* add base, base, index */ 1868 tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index); 1869 } 1870 } 1871 1872 use_pair = h.aa.atom < MO_128 || have_lse2; 1873 1874 if (!use_pair) { 1875 tcg_insn_unit *branch = NULL; 1876 TCGReg ll, lh, sl, sh; 1877 1878 /* 1879 * If we have already checked for 16-byte alignment, that's all 1880 * we need. Otherwise we have determined that misaligned atomicity 1881 * may be handled with two 8-byte loads. 1882 */ 1883 if (h.aa.align < MO_128) { 1884 /* 1885 * TODO: align should be MO_64, so we only need test bit 3, 1886 * which means we could use TBNZ instead of ANDS+B_C. 1887 */ 1888 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15); 1889 branch = s->code_ptr; 1890 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1891 use_pair = true; 1892 } 1893 1894 if (is_ld) { 1895 /* 1896 * 16-byte atomicity without LSE2 requires LDXP+STXP loop: 1897 * ldxp lo, hi, [base] 1898 * stxp t0, lo, hi, [base] 1899 * cbnz t0, .-8 1900 * Require no overlap between data{lo,hi} and base. 1901 */ 1902 if (datalo == base || datahi == base) { 1903 tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base); 1904 base = TCG_REG_TMP2; 1905 } 1906 ll = sl = datalo; 1907 lh = sh = datahi; 1908 } else { 1909 /* 1910 * 16-byte atomicity without LSE2 requires LDXP+STXP loop: 1911 * 1: ldxp t0, t1, [base] 1912 * stxp t0, lo, hi, [base] 1913 * cbnz t0, 1b 1914 */ 1915 tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1); 1916 ll = TCG_REG_TMP0; 1917 lh = TCG_REG_TMP1; 1918 sl = datalo; 1919 sh = datahi; 1920 } 1921 1922 tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base); 1923 tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base); 1924 tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2); 1925 1926 if (use_pair) { 1927 /* "b .+8", branching across the one insn of use_pair. */ 1928 tcg_out_insn(s, 3206, B, 2); 1929 reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr)); 1930 } 1931 } 1932 1933 if (use_pair) { 1934 if (is_ld) { 1935 tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0); 1936 } else { 1937 tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0); 1938 } 1939 } 1940 1941 if (ldst) { 1942 ldst->type = TCG_TYPE_I128; 1943 ldst->datalo_reg = datalo; 1944 ldst->datahi_reg = datahi; 1945 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1946 } 1947} 1948 1949static const tcg_insn_unit *tb_ret_addr; 1950 1951static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) 1952{ 1953 const tcg_insn_unit *target; 1954 ptrdiff_t offset; 1955 1956 /* Reuse the zeroing that exists for goto_ptr. */ 1957 if (a0 == 0) { 1958 target = tcg_code_gen_epilogue; 1959 } else { 1960 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); 1961 target = tb_ret_addr; 1962 } 1963 1964 offset = tcg_pcrel_diff(s, target) >> 2; 1965 if (offset == sextract64(offset, 0, 26)) { 1966 tcg_out_insn(s, 3206, B, offset); 1967 } else { 1968 /* 1969 * Only x16/x17 generate BTI type Jump (2), 1970 * other registers generate BTI type Jump|Call (3). 1971 */ 1972 QEMU_BUILD_BUG_ON(TCG_REG_TMP0 != TCG_REG_X16); 1973 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target); 1974 tcg_out_insn(s, 3207, BR, TCG_REG_TMP0); 1975 } 1976} 1977 1978static void tcg_out_goto_tb(TCGContext *s, int which) 1979{ 1980 /* 1981 * Direct branch, or indirect address load, will be patched 1982 * by tb_target_set_jmp_target. Assert indirect load offset 1983 * in range early, regardless of direct branch distance. 1984 */ 1985 intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which)); 1986 tcg_debug_assert(i_off == sextract64(i_off, 0, 21)); 1987 1988 set_jmp_insn_offset(s, which); 1989 tcg_out32(s, I3206_B); 1990 tcg_out_insn(s, 3207, BR, TCG_REG_TMP0); 1991 set_jmp_reset_offset(s, which); 1992 tcg_out_bti(s, BTI_J); 1993} 1994 1995void tb_target_set_jmp_target(const TranslationBlock *tb, int n, 1996 uintptr_t jmp_rx, uintptr_t jmp_rw) 1997{ 1998 uintptr_t d_addr = tb->jmp_target_addr[n]; 1999 ptrdiff_t d_offset = d_addr - jmp_rx; 2000 tcg_insn_unit insn; 2001 2002 /* Either directly branch, or indirect branch load. */ 2003 if (d_offset == sextract64(d_offset, 0, 28)) { 2004 insn = deposit32(I3206_B, 0, 26, d_offset >> 2); 2005 } else { 2006 uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n]; 2007 ptrdiff_t i_offset = i_addr - jmp_rx; 2008 2009 /* Note that we asserted this in range in tcg_out_goto_tb. */ 2010 insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2); 2011 } 2012 qatomic_set((uint32_t *)jmp_rw, insn); 2013 flush_idcache_range(jmp_rx, jmp_rw, 4); 2014} 2015 2016static void tcg_out_op(TCGContext *s, TCGOpcode opc, 2017 const TCGArg args[TCG_MAX_OP_ARGS], 2018 const int const_args[TCG_MAX_OP_ARGS]) 2019{ 2020 /* 99% of the time, we can signal the use of extension registers 2021 by looking to see if the opcode handles 64-bit data. */ 2022 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0; 2023 2024 /* Hoist the loads of the most common arguments. */ 2025 TCGArg a0 = args[0]; 2026 TCGArg a1 = args[1]; 2027 TCGArg a2 = args[2]; 2028 int c2 = const_args[2]; 2029 2030 /* Some operands are defined with "rZ" constraint, a register or 2031 the zero register. These need not actually test args[I] == 0. */ 2032#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) 2033 2034 switch (opc) { 2035 case INDEX_op_goto_ptr: 2036 tcg_out_insn(s, 3207, BR, a0); 2037 break; 2038 2039 case INDEX_op_br: 2040 tcg_out_goto_label(s, arg_label(a0)); 2041 break; 2042 2043 case INDEX_op_ld8u_i32: 2044 case INDEX_op_ld8u_i64: 2045 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0); 2046 break; 2047 case INDEX_op_ld8s_i32: 2048 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0); 2049 break; 2050 case INDEX_op_ld8s_i64: 2051 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0); 2052 break; 2053 case INDEX_op_ld16u_i32: 2054 case INDEX_op_ld16u_i64: 2055 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1); 2056 break; 2057 case INDEX_op_ld16s_i32: 2058 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1); 2059 break; 2060 case INDEX_op_ld16s_i64: 2061 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1); 2062 break; 2063 case INDEX_op_ld_i32: 2064 case INDEX_op_ld32u_i64: 2065 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2); 2066 break; 2067 case INDEX_op_ld32s_i64: 2068 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2); 2069 break; 2070 case INDEX_op_ld_i64: 2071 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3); 2072 break; 2073 2074 case INDEX_op_st8_i32: 2075 case INDEX_op_st8_i64: 2076 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0); 2077 break; 2078 case INDEX_op_st16_i32: 2079 case INDEX_op_st16_i64: 2080 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1); 2081 break; 2082 case INDEX_op_st_i32: 2083 case INDEX_op_st32_i64: 2084 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2); 2085 break; 2086 case INDEX_op_st_i64: 2087 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3); 2088 break; 2089 2090 case INDEX_op_add_i32: 2091 a2 = (int32_t)a2; 2092 /* FALLTHRU */ 2093 case INDEX_op_add_i64: 2094 if (c2) { 2095 tcg_out_addsubi(s, ext, a0, a1, a2); 2096 } else { 2097 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); 2098 } 2099 break; 2100 2101 case INDEX_op_sub_i32: 2102 a2 = (int32_t)a2; 2103 /* FALLTHRU */ 2104 case INDEX_op_sub_i64: 2105 if (c2) { 2106 tcg_out_addsubi(s, ext, a0, a1, -a2); 2107 } else { 2108 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); 2109 } 2110 break; 2111 2112 case INDEX_op_neg_i64: 2113 case INDEX_op_neg_i32: 2114 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); 2115 break; 2116 2117 case INDEX_op_and_i32: 2118 a2 = (int32_t)a2; 2119 /* FALLTHRU */ 2120 case INDEX_op_and_i64: 2121 if (c2) { 2122 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); 2123 } else { 2124 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); 2125 } 2126 break; 2127 2128 case INDEX_op_andc_i32: 2129 a2 = (int32_t)a2; 2130 /* FALLTHRU */ 2131 case INDEX_op_andc_i64: 2132 if (c2) { 2133 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); 2134 } else { 2135 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); 2136 } 2137 break; 2138 2139 case INDEX_op_or_i32: 2140 a2 = (int32_t)a2; 2141 /* FALLTHRU */ 2142 case INDEX_op_or_i64: 2143 if (c2) { 2144 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); 2145 } else { 2146 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); 2147 } 2148 break; 2149 2150 case INDEX_op_orc_i32: 2151 a2 = (int32_t)a2; 2152 /* FALLTHRU */ 2153 case INDEX_op_orc_i64: 2154 if (c2) { 2155 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); 2156 } else { 2157 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); 2158 } 2159 break; 2160 2161 case INDEX_op_xor_i32: 2162 a2 = (int32_t)a2; 2163 /* FALLTHRU */ 2164 case INDEX_op_xor_i64: 2165 if (c2) { 2166 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); 2167 } else { 2168 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); 2169 } 2170 break; 2171 2172 case INDEX_op_eqv_i32: 2173 a2 = (int32_t)a2; 2174 /* FALLTHRU */ 2175 case INDEX_op_eqv_i64: 2176 if (c2) { 2177 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); 2178 } else { 2179 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); 2180 } 2181 break; 2182 2183 case INDEX_op_not_i64: 2184 case INDEX_op_not_i32: 2185 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); 2186 break; 2187 2188 case INDEX_op_mul_i64: 2189 case INDEX_op_mul_i32: 2190 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); 2191 break; 2192 2193 case INDEX_op_div_i64: 2194 case INDEX_op_div_i32: 2195 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); 2196 break; 2197 case INDEX_op_divu_i64: 2198 case INDEX_op_divu_i32: 2199 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); 2200 break; 2201 2202 case INDEX_op_rem_i64: 2203 case INDEX_op_rem_i32: 2204 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2); 2205 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1); 2206 break; 2207 case INDEX_op_remu_i64: 2208 case INDEX_op_remu_i32: 2209 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2); 2210 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1); 2211 break; 2212 2213 case INDEX_op_shl_i64: 2214 case INDEX_op_shl_i32: 2215 if (c2) { 2216 tcg_out_shl(s, ext, a0, a1, a2); 2217 } else { 2218 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); 2219 } 2220 break; 2221 2222 case INDEX_op_shr_i64: 2223 case INDEX_op_shr_i32: 2224 if (c2) { 2225 tcg_out_shr(s, ext, a0, a1, a2); 2226 } else { 2227 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); 2228 } 2229 break; 2230 2231 case INDEX_op_sar_i64: 2232 case INDEX_op_sar_i32: 2233 if (c2) { 2234 tcg_out_sar(s, ext, a0, a1, a2); 2235 } else { 2236 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); 2237 } 2238 break; 2239 2240 case INDEX_op_rotr_i64: 2241 case INDEX_op_rotr_i32: 2242 if (c2) { 2243 tcg_out_rotr(s, ext, a0, a1, a2); 2244 } else { 2245 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); 2246 } 2247 break; 2248 2249 case INDEX_op_rotl_i64: 2250 case INDEX_op_rotl_i32: 2251 if (c2) { 2252 tcg_out_rotl(s, ext, a0, a1, a2); 2253 } else { 2254 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2); 2255 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0); 2256 } 2257 break; 2258 2259 case INDEX_op_clz_i64: 2260 case INDEX_op_clz_i32: 2261 tcg_out_cltz(s, ext, a0, a1, a2, c2, false); 2262 break; 2263 case INDEX_op_ctz_i64: 2264 case INDEX_op_ctz_i32: 2265 tcg_out_cltz(s, ext, a0, a1, a2, c2, true); 2266 break; 2267 2268 case INDEX_op_brcond_i32: 2269 a1 = (int32_t)a1; 2270 /* FALLTHRU */ 2271 case INDEX_op_brcond_i64: 2272 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); 2273 break; 2274 2275 case INDEX_op_setcond_i32: 2276 a2 = (int32_t)a2; 2277 /* FALLTHRU */ 2278 case INDEX_op_setcond_i64: 2279 tcg_out_cmp(s, ext, a1, a2, c2); 2280 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ 2281 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, 2282 TCG_REG_XZR, tcg_invert_cond(args[3])); 2283 break; 2284 2285 case INDEX_op_negsetcond_i32: 2286 a2 = (int32_t)a2; 2287 /* FALLTHRU */ 2288 case INDEX_op_negsetcond_i64: 2289 tcg_out_cmp(s, ext, a1, a2, c2); 2290 /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond). */ 2291 tcg_out_insn(s, 3506, CSINV, ext, a0, TCG_REG_XZR, 2292 TCG_REG_XZR, tcg_invert_cond(args[3])); 2293 break; 2294 2295 case INDEX_op_movcond_i32: 2296 a2 = (int32_t)a2; 2297 /* FALLTHRU */ 2298 case INDEX_op_movcond_i64: 2299 tcg_out_cmp(s, ext, a1, a2, c2); 2300 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); 2301 break; 2302 2303 case INDEX_op_qemu_ld_a32_i32: 2304 case INDEX_op_qemu_ld_a64_i32: 2305 case INDEX_op_qemu_ld_a32_i64: 2306 case INDEX_op_qemu_ld_a64_i64: 2307 tcg_out_qemu_ld(s, a0, a1, a2, ext); 2308 break; 2309 case INDEX_op_qemu_st_a32_i32: 2310 case INDEX_op_qemu_st_a64_i32: 2311 case INDEX_op_qemu_st_a32_i64: 2312 case INDEX_op_qemu_st_a64_i64: 2313 tcg_out_qemu_st(s, REG0(0), a1, a2, ext); 2314 break; 2315 case INDEX_op_qemu_ld_a32_i128: 2316 case INDEX_op_qemu_ld_a64_i128: 2317 tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true); 2318 break; 2319 case INDEX_op_qemu_st_a32_i128: 2320 case INDEX_op_qemu_st_a64_i128: 2321 tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false); 2322 break; 2323 2324 case INDEX_op_bswap64_i64: 2325 tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1); 2326 break; 2327 case INDEX_op_bswap32_i64: 2328 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2329 if (a2 & TCG_BSWAP_OS) { 2330 tcg_out_ext32s(s, a0, a0); 2331 } 2332 break; 2333 case INDEX_op_bswap32_i32: 2334 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2335 break; 2336 case INDEX_op_bswap16_i64: 2337 case INDEX_op_bswap16_i32: 2338 tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1); 2339 if (a2 & TCG_BSWAP_OS) { 2340 /* Output must be sign-extended. */ 2341 tcg_out_ext16s(s, ext, a0, a0); 2342 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 2343 /* Output must be zero-extended, but input isn't. */ 2344 tcg_out_ext16u(s, a0, a0); 2345 } 2346 break; 2347 2348 case INDEX_op_deposit_i64: 2349 case INDEX_op_deposit_i32: 2350 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]); 2351 break; 2352 2353 case INDEX_op_extract_i64: 2354 case INDEX_op_extract_i32: 2355 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2356 break; 2357 2358 case INDEX_op_sextract_i64: 2359 case INDEX_op_sextract_i32: 2360 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2361 break; 2362 2363 case INDEX_op_extract2_i64: 2364 case INDEX_op_extract2_i32: 2365 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]); 2366 break; 2367 2368 case INDEX_op_add2_i32: 2369 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2370 (int32_t)args[4], args[5], const_args[4], 2371 const_args[5], false); 2372 break; 2373 case INDEX_op_add2_i64: 2374 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2375 args[5], const_args[4], const_args[5], false); 2376 break; 2377 case INDEX_op_sub2_i32: 2378 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2379 (int32_t)args[4], args[5], const_args[4], 2380 const_args[5], true); 2381 break; 2382 case INDEX_op_sub2_i64: 2383 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2384 args[5], const_args[4], const_args[5], true); 2385 break; 2386 2387 case INDEX_op_muluh_i64: 2388 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); 2389 break; 2390 case INDEX_op_mulsh_i64: 2391 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); 2392 break; 2393 2394 case INDEX_op_mb: 2395 tcg_out_mb(s, a0); 2396 break; 2397 2398 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 2399 case INDEX_op_mov_i64: 2400 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 2401 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ 2402 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ 2403 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ 2404 case INDEX_op_ext8s_i64: 2405 case INDEX_op_ext8u_i32: 2406 case INDEX_op_ext8u_i64: 2407 case INDEX_op_ext16s_i64: 2408 case INDEX_op_ext16s_i32: 2409 case INDEX_op_ext16u_i64: 2410 case INDEX_op_ext16u_i32: 2411 case INDEX_op_ext32s_i64: 2412 case INDEX_op_ext32u_i64: 2413 case INDEX_op_ext_i32_i64: 2414 case INDEX_op_extu_i32_i64: 2415 case INDEX_op_extrl_i64_i32: 2416 default: 2417 g_assert_not_reached(); 2418 } 2419 2420#undef REG0 2421} 2422 2423static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 2424 unsigned vecl, unsigned vece, 2425 const TCGArg args[TCG_MAX_OP_ARGS], 2426 const int const_args[TCG_MAX_OP_ARGS]) 2427{ 2428 static const AArch64Insn cmp_vec_insn[16] = { 2429 [TCG_COND_EQ] = I3616_CMEQ, 2430 [TCG_COND_GT] = I3616_CMGT, 2431 [TCG_COND_GE] = I3616_CMGE, 2432 [TCG_COND_GTU] = I3616_CMHI, 2433 [TCG_COND_GEU] = I3616_CMHS, 2434 }; 2435 static const AArch64Insn cmp_scalar_insn[16] = { 2436 [TCG_COND_EQ] = I3611_CMEQ, 2437 [TCG_COND_GT] = I3611_CMGT, 2438 [TCG_COND_GE] = I3611_CMGE, 2439 [TCG_COND_GTU] = I3611_CMHI, 2440 [TCG_COND_GEU] = I3611_CMHS, 2441 }; 2442 static const AArch64Insn cmp0_vec_insn[16] = { 2443 [TCG_COND_EQ] = I3617_CMEQ0, 2444 [TCG_COND_GT] = I3617_CMGT0, 2445 [TCG_COND_GE] = I3617_CMGE0, 2446 [TCG_COND_LT] = I3617_CMLT0, 2447 [TCG_COND_LE] = I3617_CMLE0, 2448 }; 2449 static const AArch64Insn cmp0_scalar_insn[16] = { 2450 [TCG_COND_EQ] = I3612_CMEQ0, 2451 [TCG_COND_GT] = I3612_CMGT0, 2452 [TCG_COND_GE] = I3612_CMGE0, 2453 [TCG_COND_LT] = I3612_CMLT0, 2454 [TCG_COND_LE] = I3612_CMLE0, 2455 }; 2456 2457 TCGType type = vecl + TCG_TYPE_V64; 2458 unsigned is_q = vecl; 2459 bool is_scalar = !is_q && vece == MO_64; 2460 TCGArg a0, a1, a2, a3; 2461 int cmode, imm8; 2462 2463 a0 = args[0]; 2464 a1 = args[1]; 2465 a2 = args[2]; 2466 2467 switch (opc) { 2468 case INDEX_op_ld_vec: 2469 tcg_out_ld(s, type, a0, a1, a2); 2470 break; 2471 case INDEX_op_st_vec: 2472 tcg_out_st(s, type, a0, a1, a2); 2473 break; 2474 case INDEX_op_dupm_vec: 2475 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 2476 break; 2477 case INDEX_op_add_vec: 2478 if (is_scalar) { 2479 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2); 2480 } else { 2481 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2); 2482 } 2483 break; 2484 case INDEX_op_sub_vec: 2485 if (is_scalar) { 2486 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2); 2487 } else { 2488 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2); 2489 } 2490 break; 2491 case INDEX_op_mul_vec: 2492 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2); 2493 break; 2494 case INDEX_op_neg_vec: 2495 if (is_scalar) { 2496 tcg_out_insn(s, 3612, NEG, vece, a0, a1); 2497 } else { 2498 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1); 2499 } 2500 break; 2501 case INDEX_op_abs_vec: 2502 if (is_scalar) { 2503 tcg_out_insn(s, 3612, ABS, vece, a0, a1); 2504 } else { 2505 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1); 2506 } 2507 break; 2508 case INDEX_op_and_vec: 2509 if (const_args[2]) { 2510 is_shimm1632(~a2, &cmode, &imm8); 2511 if (a0 == a1) { 2512 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2513 return; 2514 } 2515 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2516 a2 = a0; 2517 } 2518 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2); 2519 break; 2520 case INDEX_op_or_vec: 2521 if (const_args[2]) { 2522 is_shimm1632(a2, &cmode, &imm8); 2523 if (a0 == a1) { 2524 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2525 return; 2526 } 2527 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2528 a2 = a0; 2529 } 2530 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2); 2531 break; 2532 case INDEX_op_andc_vec: 2533 if (const_args[2]) { 2534 is_shimm1632(a2, &cmode, &imm8); 2535 if (a0 == a1) { 2536 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2537 return; 2538 } 2539 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2540 a2 = a0; 2541 } 2542 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2); 2543 break; 2544 case INDEX_op_orc_vec: 2545 if (const_args[2]) { 2546 is_shimm1632(~a2, &cmode, &imm8); 2547 if (a0 == a1) { 2548 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2549 return; 2550 } 2551 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2552 a2 = a0; 2553 } 2554 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2); 2555 break; 2556 case INDEX_op_xor_vec: 2557 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2); 2558 break; 2559 case INDEX_op_ssadd_vec: 2560 if (is_scalar) { 2561 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2); 2562 } else { 2563 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2); 2564 } 2565 break; 2566 case INDEX_op_sssub_vec: 2567 if (is_scalar) { 2568 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2); 2569 } else { 2570 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2); 2571 } 2572 break; 2573 case INDEX_op_usadd_vec: 2574 if (is_scalar) { 2575 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2); 2576 } else { 2577 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2); 2578 } 2579 break; 2580 case INDEX_op_ussub_vec: 2581 if (is_scalar) { 2582 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2); 2583 } else { 2584 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2); 2585 } 2586 break; 2587 case INDEX_op_smax_vec: 2588 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2); 2589 break; 2590 case INDEX_op_smin_vec: 2591 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2); 2592 break; 2593 case INDEX_op_umax_vec: 2594 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2); 2595 break; 2596 case INDEX_op_umin_vec: 2597 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2); 2598 break; 2599 case INDEX_op_not_vec: 2600 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1); 2601 break; 2602 case INDEX_op_shli_vec: 2603 if (is_scalar) { 2604 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece)); 2605 } else { 2606 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece)); 2607 } 2608 break; 2609 case INDEX_op_shri_vec: 2610 if (is_scalar) { 2611 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2); 2612 } else { 2613 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2); 2614 } 2615 break; 2616 case INDEX_op_sari_vec: 2617 if (is_scalar) { 2618 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2); 2619 } else { 2620 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); 2621 } 2622 break; 2623 case INDEX_op_aa64_sli_vec: 2624 if (is_scalar) { 2625 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece)); 2626 } else { 2627 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece)); 2628 } 2629 break; 2630 case INDEX_op_shlv_vec: 2631 if (is_scalar) { 2632 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2); 2633 } else { 2634 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); 2635 } 2636 break; 2637 case INDEX_op_aa64_sshl_vec: 2638 if (is_scalar) { 2639 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2); 2640 } else { 2641 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2); 2642 } 2643 break; 2644 case INDEX_op_cmp_vec: 2645 { 2646 TCGCond cond = args[3]; 2647 AArch64Insn insn; 2648 2649 if (cond == TCG_COND_NE) { 2650 if (const_args[2]) { 2651 if (is_scalar) { 2652 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1); 2653 } else { 2654 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1); 2655 } 2656 } else { 2657 if (is_scalar) { 2658 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2); 2659 } else { 2660 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2); 2661 } 2662 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); 2663 } 2664 } else { 2665 if (const_args[2]) { 2666 if (is_scalar) { 2667 insn = cmp0_scalar_insn[cond]; 2668 if (insn) { 2669 tcg_out_insn_3612(s, insn, vece, a0, a1); 2670 break; 2671 } 2672 } else { 2673 insn = cmp0_vec_insn[cond]; 2674 if (insn) { 2675 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1); 2676 break; 2677 } 2678 } 2679 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0); 2680 a2 = TCG_VEC_TMP0; 2681 } 2682 if (is_scalar) { 2683 insn = cmp_scalar_insn[cond]; 2684 if (insn == 0) { 2685 TCGArg t; 2686 t = a1, a1 = a2, a2 = t; 2687 cond = tcg_swap_cond(cond); 2688 insn = cmp_scalar_insn[cond]; 2689 tcg_debug_assert(insn != 0); 2690 } 2691 tcg_out_insn_3611(s, insn, vece, a0, a1, a2); 2692 } else { 2693 insn = cmp_vec_insn[cond]; 2694 if (insn == 0) { 2695 TCGArg t; 2696 t = a1, a1 = a2, a2 = t; 2697 cond = tcg_swap_cond(cond); 2698 insn = cmp_vec_insn[cond]; 2699 tcg_debug_assert(insn != 0); 2700 } 2701 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2); 2702 } 2703 } 2704 } 2705 break; 2706 2707 case INDEX_op_bitsel_vec: 2708 a3 = args[3]; 2709 if (a0 == a3) { 2710 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1); 2711 } else if (a0 == a2) { 2712 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1); 2713 } else { 2714 if (a0 != a1) { 2715 tcg_out_mov(s, type, a0, a1); 2716 } 2717 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3); 2718 } 2719 break; 2720 2721 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 2722 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 2723 default: 2724 g_assert_not_reached(); 2725 } 2726} 2727 2728int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 2729{ 2730 switch (opc) { 2731 case INDEX_op_add_vec: 2732 case INDEX_op_sub_vec: 2733 case INDEX_op_and_vec: 2734 case INDEX_op_or_vec: 2735 case INDEX_op_xor_vec: 2736 case INDEX_op_andc_vec: 2737 case INDEX_op_orc_vec: 2738 case INDEX_op_neg_vec: 2739 case INDEX_op_abs_vec: 2740 case INDEX_op_not_vec: 2741 case INDEX_op_cmp_vec: 2742 case INDEX_op_shli_vec: 2743 case INDEX_op_shri_vec: 2744 case INDEX_op_sari_vec: 2745 case INDEX_op_ssadd_vec: 2746 case INDEX_op_sssub_vec: 2747 case INDEX_op_usadd_vec: 2748 case INDEX_op_ussub_vec: 2749 case INDEX_op_shlv_vec: 2750 case INDEX_op_bitsel_vec: 2751 return 1; 2752 case INDEX_op_rotli_vec: 2753 case INDEX_op_shrv_vec: 2754 case INDEX_op_sarv_vec: 2755 case INDEX_op_rotlv_vec: 2756 case INDEX_op_rotrv_vec: 2757 return -1; 2758 case INDEX_op_mul_vec: 2759 case INDEX_op_smax_vec: 2760 case INDEX_op_smin_vec: 2761 case INDEX_op_umax_vec: 2762 case INDEX_op_umin_vec: 2763 return vece < MO_64; 2764 2765 default: 2766 return 0; 2767 } 2768} 2769 2770void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 2771 TCGArg a0, ...) 2772{ 2773 va_list va; 2774 TCGv_vec v0, v1, v2, t1, t2, c1; 2775 TCGArg a2; 2776 2777 va_start(va, a0); 2778 v0 = temp_tcgv_vec(arg_temp(a0)); 2779 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 2780 a2 = va_arg(va, TCGArg); 2781 va_end(va); 2782 2783 switch (opc) { 2784 case INDEX_op_rotli_vec: 2785 t1 = tcg_temp_new_vec(type); 2786 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1)); 2787 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece, 2788 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2); 2789 tcg_temp_free_vec(t1); 2790 break; 2791 2792 case INDEX_op_shrv_vec: 2793 case INDEX_op_sarv_vec: 2794 /* Right shifts are negative left shifts for AArch64. */ 2795 v2 = temp_tcgv_vec(arg_temp(a2)); 2796 t1 = tcg_temp_new_vec(type); 2797 tcg_gen_neg_vec(vece, t1, v2); 2798 opc = (opc == INDEX_op_shrv_vec 2799 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec); 2800 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0), 2801 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2802 tcg_temp_free_vec(t1); 2803 break; 2804 2805 case INDEX_op_rotlv_vec: 2806 v2 = temp_tcgv_vec(arg_temp(a2)); 2807 t1 = tcg_temp_new_vec(type); 2808 c1 = tcg_constant_vec(type, vece, 8 << vece); 2809 tcg_gen_sub_vec(vece, t1, v2, c1); 2810 /* Right shifts are negative left shifts for AArch64. */ 2811 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2812 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2813 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0), 2814 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 2815 tcg_gen_or_vec(vece, v0, v0, t1); 2816 tcg_temp_free_vec(t1); 2817 break; 2818 2819 case INDEX_op_rotrv_vec: 2820 v2 = temp_tcgv_vec(arg_temp(a2)); 2821 t1 = tcg_temp_new_vec(type); 2822 t2 = tcg_temp_new_vec(type); 2823 c1 = tcg_constant_vec(type, vece, 8 << vece); 2824 tcg_gen_neg_vec(vece, t1, v2); 2825 tcg_gen_sub_vec(vece, t2, c1, v2); 2826 /* Right shifts are negative left shifts for AArch64. */ 2827 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2828 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2829 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2), 2830 tcgv_vec_arg(v1), tcgv_vec_arg(t2)); 2831 tcg_gen_or_vec(vece, v0, t1, t2); 2832 tcg_temp_free_vec(t1); 2833 tcg_temp_free_vec(t2); 2834 break; 2835 2836 default: 2837 g_assert_not_reached(); 2838 } 2839} 2840 2841static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 2842{ 2843 switch (op) { 2844 case INDEX_op_goto_ptr: 2845 return C_O0_I1(r); 2846 2847 case INDEX_op_ld8u_i32: 2848 case INDEX_op_ld8s_i32: 2849 case INDEX_op_ld16u_i32: 2850 case INDEX_op_ld16s_i32: 2851 case INDEX_op_ld_i32: 2852 case INDEX_op_ld8u_i64: 2853 case INDEX_op_ld8s_i64: 2854 case INDEX_op_ld16u_i64: 2855 case INDEX_op_ld16s_i64: 2856 case INDEX_op_ld32u_i64: 2857 case INDEX_op_ld32s_i64: 2858 case INDEX_op_ld_i64: 2859 case INDEX_op_neg_i32: 2860 case INDEX_op_neg_i64: 2861 case INDEX_op_not_i32: 2862 case INDEX_op_not_i64: 2863 case INDEX_op_bswap16_i32: 2864 case INDEX_op_bswap32_i32: 2865 case INDEX_op_bswap16_i64: 2866 case INDEX_op_bswap32_i64: 2867 case INDEX_op_bswap64_i64: 2868 case INDEX_op_ext8s_i32: 2869 case INDEX_op_ext16s_i32: 2870 case INDEX_op_ext8u_i32: 2871 case INDEX_op_ext16u_i32: 2872 case INDEX_op_ext8s_i64: 2873 case INDEX_op_ext16s_i64: 2874 case INDEX_op_ext32s_i64: 2875 case INDEX_op_ext8u_i64: 2876 case INDEX_op_ext16u_i64: 2877 case INDEX_op_ext32u_i64: 2878 case INDEX_op_ext_i32_i64: 2879 case INDEX_op_extu_i32_i64: 2880 case INDEX_op_extract_i32: 2881 case INDEX_op_extract_i64: 2882 case INDEX_op_sextract_i32: 2883 case INDEX_op_sextract_i64: 2884 return C_O1_I1(r, r); 2885 2886 case INDEX_op_st8_i32: 2887 case INDEX_op_st16_i32: 2888 case INDEX_op_st_i32: 2889 case INDEX_op_st8_i64: 2890 case INDEX_op_st16_i64: 2891 case INDEX_op_st32_i64: 2892 case INDEX_op_st_i64: 2893 return C_O0_I2(rZ, r); 2894 2895 case INDEX_op_add_i32: 2896 case INDEX_op_add_i64: 2897 case INDEX_op_sub_i32: 2898 case INDEX_op_sub_i64: 2899 case INDEX_op_setcond_i32: 2900 case INDEX_op_setcond_i64: 2901 case INDEX_op_negsetcond_i32: 2902 case INDEX_op_negsetcond_i64: 2903 return C_O1_I2(r, r, rA); 2904 2905 case INDEX_op_mul_i32: 2906 case INDEX_op_mul_i64: 2907 case INDEX_op_div_i32: 2908 case INDEX_op_div_i64: 2909 case INDEX_op_divu_i32: 2910 case INDEX_op_divu_i64: 2911 case INDEX_op_rem_i32: 2912 case INDEX_op_rem_i64: 2913 case INDEX_op_remu_i32: 2914 case INDEX_op_remu_i64: 2915 case INDEX_op_muluh_i64: 2916 case INDEX_op_mulsh_i64: 2917 return C_O1_I2(r, r, r); 2918 2919 case INDEX_op_and_i32: 2920 case INDEX_op_and_i64: 2921 case INDEX_op_or_i32: 2922 case INDEX_op_or_i64: 2923 case INDEX_op_xor_i32: 2924 case INDEX_op_xor_i64: 2925 case INDEX_op_andc_i32: 2926 case INDEX_op_andc_i64: 2927 case INDEX_op_orc_i32: 2928 case INDEX_op_orc_i64: 2929 case INDEX_op_eqv_i32: 2930 case INDEX_op_eqv_i64: 2931 return C_O1_I2(r, r, rL); 2932 2933 case INDEX_op_shl_i32: 2934 case INDEX_op_shr_i32: 2935 case INDEX_op_sar_i32: 2936 case INDEX_op_rotl_i32: 2937 case INDEX_op_rotr_i32: 2938 case INDEX_op_shl_i64: 2939 case INDEX_op_shr_i64: 2940 case INDEX_op_sar_i64: 2941 case INDEX_op_rotl_i64: 2942 case INDEX_op_rotr_i64: 2943 return C_O1_I2(r, r, ri); 2944 2945 case INDEX_op_clz_i32: 2946 case INDEX_op_ctz_i32: 2947 case INDEX_op_clz_i64: 2948 case INDEX_op_ctz_i64: 2949 return C_O1_I2(r, r, rAL); 2950 2951 case INDEX_op_brcond_i32: 2952 case INDEX_op_brcond_i64: 2953 return C_O0_I2(r, rA); 2954 2955 case INDEX_op_movcond_i32: 2956 case INDEX_op_movcond_i64: 2957 return C_O1_I4(r, r, rA, rZ, rZ); 2958 2959 case INDEX_op_qemu_ld_a32_i32: 2960 case INDEX_op_qemu_ld_a64_i32: 2961 case INDEX_op_qemu_ld_a32_i64: 2962 case INDEX_op_qemu_ld_a64_i64: 2963 return C_O1_I1(r, r); 2964 case INDEX_op_qemu_ld_a32_i128: 2965 case INDEX_op_qemu_ld_a64_i128: 2966 return C_O2_I1(r, r, r); 2967 case INDEX_op_qemu_st_a32_i32: 2968 case INDEX_op_qemu_st_a64_i32: 2969 case INDEX_op_qemu_st_a32_i64: 2970 case INDEX_op_qemu_st_a64_i64: 2971 return C_O0_I2(rZ, r); 2972 case INDEX_op_qemu_st_a32_i128: 2973 case INDEX_op_qemu_st_a64_i128: 2974 return C_O0_I3(rZ, rZ, r); 2975 2976 case INDEX_op_deposit_i32: 2977 case INDEX_op_deposit_i64: 2978 return C_O1_I2(r, 0, rZ); 2979 2980 case INDEX_op_extract2_i32: 2981 case INDEX_op_extract2_i64: 2982 return C_O1_I2(r, rZ, rZ); 2983 2984 case INDEX_op_add2_i32: 2985 case INDEX_op_add2_i64: 2986 case INDEX_op_sub2_i32: 2987 case INDEX_op_sub2_i64: 2988 return C_O2_I4(r, r, rZ, rZ, rA, rMZ); 2989 2990 case INDEX_op_add_vec: 2991 case INDEX_op_sub_vec: 2992 case INDEX_op_mul_vec: 2993 case INDEX_op_xor_vec: 2994 case INDEX_op_ssadd_vec: 2995 case INDEX_op_sssub_vec: 2996 case INDEX_op_usadd_vec: 2997 case INDEX_op_ussub_vec: 2998 case INDEX_op_smax_vec: 2999 case INDEX_op_smin_vec: 3000 case INDEX_op_umax_vec: 3001 case INDEX_op_umin_vec: 3002 case INDEX_op_shlv_vec: 3003 case INDEX_op_shrv_vec: 3004 case INDEX_op_sarv_vec: 3005 case INDEX_op_aa64_sshl_vec: 3006 return C_O1_I2(w, w, w); 3007 case INDEX_op_not_vec: 3008 case INDEX_op_neg_vec: 3009 case INDEX_op_abs_vec: 3010 case INDEX_op_shli_vec: 3011 case INDEX_op_shri_vec: 3012 case INDEX_op_sari_vec: 3013 return C_O1_I1(w, w); 3014 case INDEX_op_ld_vec: 3015 case INDEX_op_dupm_vec: 3016 return C_O1_I1(w, r); 3017 case INDEX_op_st_vec: 3018 return C_O0_I2(w, r); 3019 case INDEX_op_dup_vec: 3020 return C_O1_I1(w, wr); 3021 case INDEX_op_or_vec: 3022 case INDEX_op_andc_vec: 3023 return C_O1_I2(w, w, wO); 3024 case INDEX_op_and_vec: 3025 case INDEX_op_orc_vec: 3026 return C_O1_I2(w, w, wN); 3027 case INDEX_op_cmp_vec: 3028 return C_O1_I2(w, w, wZ); 3029 case INDEX_op_bitsel_vec: 3030 return C_O1_I3(w, w, w, w); 3031 case INDEX_op_aa64_sli_vec: 3032 return C_O1_I2(w, 0, w); 3033 3034 default: 3035 g_assert_not_reached(); 3036 } 3037} 3038 3039static void tcg_target_init(TCGContext *s) 3040{ 3041 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu; 3042 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu; 3043 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 3044 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 3045 3046 tcg_target_call_clobber_regs = -1ull; 3047 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19); 3048 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20); 3049 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21); 3050 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22); 3051 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23); 3052 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24); 3053 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25); 3054 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26); 3055 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27); 3056 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28); 3057 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29); 3058 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 3059 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 3060 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 3061 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 3062 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 3063 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 3064 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 3065 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 3066 3067 s->reserved_regs = 0; 3068 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 3069 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); 3070 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ 3071 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0); 3072 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); 3073 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); 3074 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0); 3075} 3076 3077/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ 3078#define PUSH_SIZE ((30 - 19 + 1) * 8) 3079 3080#define FRAME_SIZE \ 3081 ((PUSH_SIZE \ 3082 + TCG_STATIC_CALL_ARGS_SIZE \ 3083 + CPU_TEMP_BUF_NLONGS * sizeof(long) \ 3084 + TCG_TARGET_STACK_ALIGN - 1) \ 3085 & ~(TCG_TARGET_STACK_ALIGN - 1)) 3086 3087/* We're expecting a 2 byte uleb128 encoded value. */ 3088QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 3089 3090/* We're expecting to use a single ADDI insn. */ 3091QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); 3092 3093static void tcg_target_qemu_prologue(TCGContext *s) 3094{ 3095 TCGReg r; 3096 3097 tcg_out_bti(s, BTI_C); 3098 3099 /* Push (FP, LR) and allocate space for all saved registers. */ 3100 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, 3101 TCG_REG_SP, -PUSH_SIZE, 1, 1); 3102 3103 /* Set up frame pointer for canonical unwinding. */ 3104 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); 3105 3106 /* Store callee-preserved regs x19..x28. */ 3107 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 3108 int ofs = (r - TCG_REG_X19 + 2) * 8; 3109 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 3110 } 3111 3112 /* Make stack space for TCG locals. */ 3113 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 3114 FRAME_SIZE - PUSH_SIZE); 3115 3116 /* Inform TCG about how to find TCG locals with register, offset, size. */ 3117 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, 3118 CPU_TEMP_BUF_NLONGS * sizeof(long)); 3119 3120 if (!tcg_use_softmmu) { 3121 /* 3122 * Note that XZR cannot be encoded in the address base register slot, 3123 * as that actually encodes SP. Depending on the guest, we may need 3124 * to zero-extend the guest address via the address index register slot, 3125 * therefore we need to load even a zero guest base into a register. 3126 */ 3127 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); 3128 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); 3129 } 3130 3131 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 3132 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); 3133 3134 /* 3135 * Return path for goto_ptr. Set return value to 0, a-la exit_tb, 3136 * and fall through to the rest of the epilogue. 3137 */ 3138 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 3139 tcg_out_bti(s, BTI_J); 3140 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0); 3141 3142 /* TB epilogue */ 3143 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); 3144 tcg_out_bti(s, BTI_J); 3145 3146 /* Remove TCG locals stack space. */ 3147 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 3148 FRAME_SIZE - PUSH_SIZE); 3149 3150 /* Restore registers x19..x28. */ 3151 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 3152 int ofs = (r - TCG_REG_X19 + 2) * 8; 3153 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 3154 } 3155 3156 /* Pop (FP, LR), restore SP to previous frame. */ 3157 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, 3158 TCG_REG_SP, PUSH_SIZE, 0, 1); 3159 tcg_out_insn(s, 3207, RET, TCG_REG_LR); 3160} 3161 3162static void tcg_out_tb_start(TCGContext *s) 3163{ 3164 tcg_out_bti(s, BTI_J); 3165} 3166 3167static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 3168{ 3169 int i; 3170 for (i = 0; i < count; ++i) { 3171 p[i] = NOP; 3172 } 3173} 3174 3175typedef struct { 3176 DebugFrameHeader h; 3177 uint8_t fde_def_cfa[4]; 3178 uint8_t fde_reg_ofs[24]; 3179} DebugFrame; 3180 3181#define ELF_HOST_MACHINE EM_AARCH64 3182 3183static const DebugFrame debug_frame = { 3184 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 3185 .h.cie.id = -1, 3186 .h.cie.version = 1, 3187 .h.cie.code_align = 1, 3188 .h.cie.data_align = 0x78, /* sleb128 -8 */ 3189 .h.cie.return_column = TCG_REG_LR, 3190 3191 /* Total FDE size does not include the "len" member. */ 3192 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 3193 3194 .fde_def_cfa = { 3195 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 3196 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 3197 (FRAME_SIZE >> 7) 3198 }, 3199 .fde_reg_ofs = { 3200 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ 3201 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ 3202 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ 3203 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ 3204 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ 3205 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ 3206 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ 3207 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ 3208 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ 3209 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ 3210 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ 3211 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ 3212 } 3213}; 3214 3215void tcg_register_jit(const void *buf, size_t buf_size) 3216{ 3217 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 3218} 3219