1/* 2 * Initial TCG Implementation for aarch64 3 * 4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH 5 * Written by Claudio Fontana 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or 8 * (at your option) any later version. 9 * 10 * See the COPYING file in the top-level directory for details. 11 */ 12 13#include "../tcg-pool.c.inc" 14#include "qemu/bitops.h" 15 16/* We're going to re-use TCGType in setting of the SF bit, which controls 17 the size of the operation performed. If we know the values match, it 18 makes things much cleaner. */ 19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); 20 21#ifdef CONFIG_DEBUG_TCG 22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 23 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 24 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 25 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 26 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp", 27 28 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 29 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 30 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 31 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31", 32}; 33#endif /* CONFIG_DEBUG_TCG */ 34 35static const int tcg_target_reg_alloc_order[] = { 36 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, 37 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, 38 TCG_REG_X28, /* we will reserve this for guest_base if configured */ 39 40 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, 41 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, 42 TCG_REG_X16, TCG_REG_X17, 43 44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, 46 47 /* X18 reserved by system */ 48 /* X19 reserved for AREG0 */ 49 /* X29 reserved as fp */ 50 /* X30 reserved as temporary */ 51 52 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, 53 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, 54 /* V8 - V15 are call-saved, and skipped. */ 55 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, 56 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, 57 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, 58 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, 59}; 60 61static const int tcg_target_call_iarg_regs[8] = { 62 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 63 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 64}; 65static const int tcg_target_call_oarg_regs[1] = { 66 TCG_REG_X0 67}; 68 69#define TCG_REG_TMP TCG_REG_X30 70#define TCG_VEC_TMP TCG_REG_V31 71 72#ifndef CONFIG_SOFTMMU 73/* Note that XZR cannot be encoded in the address base register slot, 74 as that actaully encodes SP. So if we need to zero-extend the guest 75 address, via the address index register slot, we need to load even 76 a zero guest base into a register. */ 77#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32) 78#define TCG_REG_GUEST_BASE TCG_REG_X28 79#endif 80 81static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 82{ 83 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 84 ptrdiff_t offset = target - src_rx; 85 86 if (offset == sextract64(offset, 0, 26)) { 87 /* read instruction, mask away previous PC_REL26 parameter contents, 88 set the proper offset, then write back the instruction. */ 89 *src_rw = deposit32(*src_rw, 0, 26, offset); 90 return true; 91 } 92 return false; 93} 94 95static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 96{ 97 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 98 ptrdiff_t offset = target - src_rx; 99 100 if (offset == sextract64(offset, 0, 19)) { 101 *src_rw = deposit32(*src_rw, 5, 19, offset); 102 return true; 103 } 104 return false; 105} 106 107static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 108 intptr_t value, intptr_t addend) 109{ 110 tcg_debug_assert(addend == 0); 111 switch (type) { 112 case R_AARCH64_JUMP26: 113 case R_AARCH64_CALL26: 114 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value); 115 case R_AARCH64_CONDBR19: 116 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value); 117 default: 118 g_assert_not_reached(); 119 } 120} 121 122#define TCG_CT_CONST_AIMM 0x100 123#define TCG_CT_CONST_LIMM 0x200 124#define TCG_CT_CONST_ZERO 0x400 125#define TCG_CT_CONST_MONE 0x800 126#define TCG_CT_CONST_ORRI 0x1000 127#define TCG_CT_CONST_ANDI 0x2000 128 129#define ALL_GENERAL_REGS 0xffffffffu 130#define ALL_VECTOR_REGS 0xffffffff00000000ull 131 132#ifdef CONFIG_SOFTMMU 133#define ALL_QLDST_REGS \ 134 (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \ 135 (1 << TCG_REG_X2) | (1 << TCG_REG_X3))) 136#else 137#define ALL_QLDST_REGS ALL_GENERAL_REGS 138#endif 139 140/* Match a constant valid for addition (12-bit, optionally shifted). */ 141static inline bool is_aimm(uint64_t val) 142{ 143 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; 144} 145 146/* Match a constant valid for logical operations. */ 147static inline bool is_limm(uint64_t val) 148{ 149 /* Taking a simplified view of the logical immediates for now, ignoring 150 the replication that can happen across the field. Match bit patterns 151 of the forms 152 0....01....1 153 0..01..10..0 154 and their inverses. */ 155 156 /* Make things easier below, by testing the form with msb clear. */ 157 if ((int64_t)val < 0) { 158 val = ~val; 159 } 160 if (val == 0) { 161 return false; 162 } 163 val += val & -val; 164 return (val & (val - 1)) == 0; 165} 166 167/* Return true if v16 is a valid 16-bit shifted immediate. */ 168static bool is_shimm16(uint16_t v16, int *cmode, int *imm8) 169{ 170 if (v16 == (v16 & 0xff)) { 171 *cmode = 0x8; 172 *imm8 = v16 & 0xff; 173 return true; 174 } else if (v16 == (v16 & 0xff00)) { 175 *cmode = 0xa; 176 *imm8 = v16 >> 8; 177 return true; 178 } 179 return false; 180} 181 182/* Return true if v32 is a valid 32-bit shifted immediate. */ 183static bool is_shimm32(uint32_t v32, int *cmode, int *imm8) 184{ 185 if (v32 == (v32 & 0xff)) { 186 *cmode = 0x0; 187 *imm8 = v32 & 0xff; 188 return true; 189 } else if (v32 == (v32 & 0xff00)) { 190 *cmode = 0x2; 191 *imm8 = (v32 >> 8) & 0xff; 192 return true; 193 } else if (v32 == (v32 & 0xff0000)) { 194 *cmode = 0x4; 195 *imm8 = (v32 >> 16) & 0xff; 196 return true; 197 } else if (v32 == (v32 & 0xff000000)) { 198 *cmode = 0x6; 199 *imm8 = v32 >> 24; 200 return true; 201 } 202 return false; 203} 204 205/* Return true if v32 is a valid 32-bit shifting ones immediate. */ 206static bool is_soimm32(uint32_t v32, int *cmode, int *imm8) 207{ 208 if ((v32 & 0xffff00ff) == 0xff) { 209 *cmode = 0xc; 210 *imm8 = (v32 >> 8) & 0xff; 211 return true; 212 } else if ((v32 & 0xff00ffff) == 0xffff) { 213 *cmode = 0xd; 214 *imm8 = (v32 >> 16) & 0xff; 215 return true; 216 } 217 return false; 218} 219 220/* Return true if v32 is a valid float32 immediate. */ 221static bool is_fimm32(uint32_t v32, int *cmode, int *imm8) 222{ 223 if (extract32(v32, 0, 19) == 0 224 && (extract32(v32, 25, 6) == 0x20 225 || extract32(v32, 25, 6) == 0x1f)) { 226 *cmode = 0xf; 227 *imm8 = (extract32(v32, 31, 1) << 7) 228 | (extract32(v32, 25, 1) << 6) 229 | extract32(v32, 19, 6); 230 return true; 231 } 232 return false; 233} 234 235/* Return true if v64 is a valid float64 immediate. */ 236static bool is_fimm64(uint64_t v64, int *cmode, int *imm8) 237{ 238 if (extract64(v64, 0, 48) == 0 239 && (extract64(v64, 54, 9) == 0x100 240 || extract64(v64, 54, 9) == 0x0ff)) { 241 *cmode = 0xf; 242 *imm8 = (extract64(v64, 63, 1) << 7) 243 | (extract64(v64, 54, 1) << 6) 244 | extract64(v64, 48, 6); 245 return true; 246 } 247 return false; 248} 249 250/* 251 * Return non-zero if v32 can be formed by MOVI+ORR. 252 * Place the parameters for MOVI in (cmode, imm8). 253 * Return the cmode for ORR; the imm8 can be had via extraction from v32. 254 */ 255static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8) 256{ 257 int i; 258 259 for (i = 6; i > 0; i -= 2) { 260 /* Mask out one byte we can add with ORR. */ 261 uint32_t tmp = v32 & ~(0xffu << (i * 4)); 262 if (is_shimm32(tmp, cmode, imm8) || 263 is_soimm32(tmp, cmode, imm8)) { 264 break; 265 } 266 } 267 return i; 268} 269 270/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */ 271static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8) 272{ 273 if (v32 == deposit32(v32, 16, 16, v32)) { 274 return is_shimm16(v32, cmode, imm8); 275 } else { 276 return is_shimm32(v32, cmode, imm8); 277 } 278} 279 280static int tcg_target_const_match(tcg_target_long val, TCGType type, 281 const TCGArgConstraint *arg_ct) 282{ 283 int ct = arg_ct->ct; 284 285 if (ct & TCG_CT_CONST) { 286 return 1; 287 } 288 if (type == TCG_TYPE_I32) { 289 val = (int32_t)val; 290 } 291 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { 292 return 1; 293 } 294 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { 295 return 1; 296 } 297 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 298 return 1; 299 } 300 if ((ct & TCG_CT_CONST_MONE) && val == -1) { 301 return 1; 302 } 303 304 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) { 305 case 0: 306 break; 307 case TCG_CT_CONST_ANDI: 308 val = ~val; 309 /* fallthru */ 310 case TCG_CT_CONST_ORRI: 311 if (val == deposit64(val, 32, 32, val)) { 312 int cmode, imm8; 313 return is_shimm1632(val, &cmode, &imm8); 314 } 315 break; 316 default: 317 /* Both bits should not be set for the same insn. */ 318 g_assert_not_reached(); 319 } 320 321 return 0; 322} 323 324enum aarch64_cond_code { 325 COND_EQ = 0x0, 326 COND_NE = 0x1, 327 COND_CS = 0x2, /* Unsigned greater or equal */ 328 COND_HS = COND_CS, /* ALIAS greater or equal */ 329 COND_CC = 0x3, /* Unsigned less than */ 330 COND_LO = COND_CC, /* ALIAS Lower */ 331 COND_MI = 0x4, /* Negative */ 332 COND_PL = 0x5, /* Zero or greater */ 333 COND_VS = 0x6, /* Overflow */ 334 COND_VC = 0x7, /* No overflow */ 335 COND_HI = 0x8, /* Unsigned greater than */ 336 COND_LS = 0x9, /* Unsigned less or equal */ 337 COND_GE = 0xa, 338 COND_LT = 0xb, 339 COND_GT = 0xc, 340 COND_LE = 0xd, 341 COND_AL = 0xe, 342 COND_NV = 0xf, /* behaves like COND_AL here */ 343}; 344 345static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { 346 [TCG_COND_EQ] = COND_EQ, 347 [TCG_COND_NE] = COND_NE, 348 [TCG_COND_LT] = COND_LT, 349 [TCG_COND_GE] = COND_GE, 350 [TCG_COND_LE] = COND_LE, 351 [TCG_COND_GT] = COND_GT, 352 /* unsigned */ 353 [TCG_COND_LTU] = COND_LO, 354 [TCG_COND_GTU] = COND_HI, 355 [TCG_COND_GEU] = COND_HS, 356 [TCG_COND_LEU] = COND_LS, 357}; 358 359typedef enum { 360 LDST_ST = 0, /* store */ 361 LDST_LD = 1, /* load */ 362 LDST_LD_S_X = 2, /* load and sign-extend into Xt */ 363 LDST_LD_S_W = 3, /* load and sign-extend into Wt */ 364} AArch64LdstType; 365 366/* We encode the format of the insn into the beginning of the name, so that 367 we can have the preprocessor help "typecheck" the insn vs the output 368 function. Arm didn't provide us with nice names for the formats, so we 369 use the section number of the architecture reference manual in which the 370 instruction group is described. */ 371typedef enum { 372 /* Compare and branch (immediate). */ 373 I3201_CBZ = 0x34000000, 374 I3201_CBNZ = 0x35000000, 375 376 /* Conditional branch (immediate). */ 377 I3202_B_C = 0x54000000, 378 379 /* Unconditional branch (immediate). */ 380 I3206_B = 0x14000000, 381 I3206_BL = 0x94000000, 382 383 /* Unconditional branch (register). */ 384 I3207_BR = 0xd61f0000, 385 I3207_BLR = 0xd63f0000, 386 I3207_RET = 0xd65f0000, 387 388 /* AdvSIMD load/store single structure. */ 389 I3303_LD1R = 0x0d40c000, 390 391 /* Load literal for loading the address at pc-relative offset */ 392 I3305_LDR = 0x58000000, 393 I3305_LDR_v64 = 0x5c000000, 394 I3305_LDR_v128 = 0x9c000000, 395 396 /* Load/store register. Described here as 3.3.12, but the helper 397 that emits them can transform to 3.3.10 or 3.3.13. */ 398 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, 399 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, 400 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, 401 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, 402 403 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, 404 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, 405 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, 406 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, 407 408 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, 409 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, 410 411 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, 412 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, 413 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, 414 415 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30, 416 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30, 417 418 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30, 419 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30, 420 421 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30, 422 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30, 423 424 I3312_TO_I3310 = 0x00200800, 425 I3312_TO_I3313 = 0x01000000, 426 427 /* Load/store register pair instructions. */ 428 I3314_LDP = 0x28400000, 429 I3314_STP = 0x28000000, 430 431 /* Add/subtract immediate instructions. */ 432 I3401_ADDI = 0x11000000, 433 I3401_ADDSI = 0x31000000, 434 I3401_SUBI = 0x51000000, 435 I3401_SUBSI = 0x71000000, 436 437 /* Bitfield instructions. */ 438 I3402_BFM = 0x33000000, 439 I3402_SBFM = 0x13000000, 440 I3402_UBFM = 0x53000000, 441 442 /* Extract instruction. */ 443 I3403_EXTR = 0x13800000, 444 445 /* Logical immediate instructions. */ 446 I3404_ANDI = 0x12000000, 447 I3404_ORRI = 0x32000000, 448 I3404_EORI = 0x52000000, 449 450 /* Move wide immediate instructions. */ 451 I3405_MOVN = 0x12800000, 452 I3405_MOVZ = 0x52800000, 453 I3405_MOVK = 0x72800000, 454 455 /* PC relative addressing instructions. */ 456 I3406_ADR = 0x10000000, 457 I3406_ADRP = 0x90000000, 458 459 /* Add/subtract shifted register instructions (without a shift). */ 460 I3502_ADD = 0x0b000000, 461 I3502_ADDS = 0x2b000000, 462 I3502_SUB = 0x4b000000, 463 I3502_SUBS = 0x6b000000, 464 465 /* Add/subtract shifted register instructions (with a shift). */ 466 I3502S_ADD_LSL = I3502_ADD, 467 468 /* Add/subtract with carry instructions. */ 469 I3503_ADC = 0x1a000000, 470 I3503_SBC = 0x5a000000, 471 472 /* Conditional select instructions. */ 473 I3506_CSEL = 0x1a800000, 474 I3506_CSINC = 0x1a800400, 475 I3506_CSINV = 0x5a800000, 476 I3506_CSNEG = 0x5a800400, 477 478 /* Data-processing (1 source) instructions. */ 479 I3507_CLZ = 0x5ac01000, 480 I3507_RBIT = 0x5ac00000, 481 I3507_REV16 = 0x5ac00400, 482 I3507_REV32 = 0x5ac00800, 483 I3507_REV64 = 0x5ac00c00, 484 485 /* Data-processing (2 source) instructions. */ 486 I3508_LSLV = 0x1ac02000, 487 I3508_LSRV = 0x1ac02400, 488 I3508_ASRV = 0x1ac02800, 489 I3508_RORV = 0x1ac02c00, 490 I3508_SMULH = 0x9b407c00, 491 I3508_UMULH = 0x9bc07c00, 492 I3508_UDIV = 0x1ac00800, 493 I3508_SDIV = 0x1ac00c00, 494 495 /* Data-processing (3 source) instructions. */ 496 I3509_MADD = 0x1b000000, 497 I3509_MSUB = 0x1b008000, 498 499 /* Logical shifted register instructions (without a shift). */ 500 I3510_AND = 0x0a000000, 501 I3510_BIC = 0x0a200000, 502 I3510_ORR = 0x2a000000, 503 I3510_ORN = 0x2a200000, 504 I3510_EOR = 0x4a000000, 505 I3510_EON = 0x4a200000, 506 I3510_ANDS = 0x6a000000, 507 508 /* Logical shifted register instructions (with a shift). */ 509 I3502S_AND_LSR = I3510_AND | (1 << 22), 510 511 /* AdvSIMD copy */ 512 I3605_DUP = 0x0e000400, 513 I3605_INS = 0x4e001c00, 514 I3605_UMOV = 0x0e003c00, 515 516 /* AdvSIMD modified immediate */ 517 I3606_MOVI = 0x0f000400, 518 I3606_MVNI = 0x2f000400, 519 I3606_BIC = 0x2f001400, 520 I3606_ORR = 0x0f001400, 521 522 /* AdvSIMD shift by immediate */ 523 I3614_SSHR = 0x0f000400, 524 I3614_SSRA = 0x0f001400, 525 I3614_SHL = 0x0f005400, 526 I3614_SLI = 0x2f005400, 527 I3614_USHR = 0x2f000400, 528 I3614_USRA = 0x2f001400, 529 530 /* AdvSIMD three same. */ 531 I3616_ADD = 0x0e208400, 532 I3616_AND = 0x0e201c00, 533 I3616_BIC = 0x0e601c00, 534 I3616_BIF = 0x2ee01c00, 535 I3616_BIT = 0x2ea01c00, 536 I3616_BSL = 0x2e601c00, 537 I3616_EOR = 0x2e201c00, 538 I3616_MUL = 0x0e209c00, 539 I3616_ORR = 0x0ea01c00, 540 I3616_ORN = 0x0ee01c00, 541 I3616_SUB = 0x2e208400, 542 I3616_CMGT = 0x0e203400, 543 I3616_CMGE = 0x0e203c00, 544 I3616_CMTST = 0x0e208c00, 545 I3616_CMHI = 0x2e203400, 546 I3616_CMHS = 0x2e203c00, 547 I3616_CMEQ = 0x2e208c00, 548 I3616_SMAX = 0x0e206400, 549 I3616_SMIN = 0x0e206c00, 550 I3616_SSHL = 0x0e204400, 551 I3616_SQADD = 0x0e200c00, 552 I3616_SQSUB = 0x0e202c00, 553 I3616_UMAX = 0x2e206400, 554 I3616_UMIN = 0x2e206c00, 555 I3616_UQADD = 0x2e200c00, 556 I3616_UQSUB = 0x2e202c00, 557 I3616_USHL = 0x2e204400, 558 559 /* AdvSIMD two-reg misc. */ 560 I3617_CMGT0 = 0x0e208800, 561 I3617_CMEQ0 = 0x0e209800, 562 I3617_CMLT0 = 0x0e20a800, 563 I3617_CMGE0 = 0x2e208800, 564 I3617_CMLE0 = 0x2e20a800, 565 I3617_NOT = 0x2e205800, 566 I3617_ABS = 0x0e20b800, 567 I3617_NEG = 0x2e20b800, 568 569 /* System instructions. */ 570 NOP = 0xd503201f, 571 DMB_ISH = 0xd50338bf, 572 DMB_LD = 0x00000100, 573 DMB_ST = 0x00000200, 574} AArch64Insn; 575 576static inline uint32_t tcg_in32(TCGContext *s) 577{ 578 uint32_t v = *(uint32_t *)s->code_ptr; 579 return v; 580} 581 582/* Emit an opcode with "type-checking" of the format. */ 583#define tcg_out_insn(S, FMT, OP, ...) \ 584 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) 585 586static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q, 587 TCGReg rt, TCGReg rn, unsigned size) 588{ 589 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30)); 590} 591 592static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, 593 int imm19, TCGReg rt) 594{ 595 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt); 596} 597 598static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, 599 TCGReg rt, int imm19) 600{ 601 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); 602} 603 604static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, 605 TCGCond c, int imm19) 606{ 607 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); 608} 609 610static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) 611{ 612 tcg_out32(s, insn | (imm26 & 0x03ffffff)); 613} 614 615static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) 616{ 617 tcg_out32(s, insn | rn << 5); 618} 619 620static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, 621 TCGReg r1, TCGReg r2, TCGReg rn, 622 tcg_target_long ofs, bool pre, bool w) 623{ 624 insn |= 1u << 31; /* ext */ 625 insn |= pre << 24; 626 insn |= w << 23; 627 628 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); 629 insn |= (ofs & (0x7f << 3)) << (15 - 3); 630 631 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); 632} 633 634static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, 635 TCGReg rd, TCGReg rn, uint64_t aimm) 636{ 637 if (aimm > 0xfff) { 638 tcg_debug_assert((aimm & 0xfff) == 0); 639 aimm >>= 12; 640 tcg_debug_assert(aimm <= 0xfff); 641 aimm |= 1 << 12; /* apply LSL 12 */ 642 } 643 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); 644} 645 646/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 647 (Logical immediate). Both insn groups have N, IMMR and IMMS fields 648 that feed the DecodeBitMasks pseudo function. */ 649static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, 650 TCGReg rd, TCGReg rn, int n, int immr, int imms) 651{ 652 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 653 | rn << 5 | rd); 654} 655 656#define tcg_out_insn_3404 tcg_out_insn_3402 657 658static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, 659 TCGReg rd, TCGReg rn, TCGReg rm, int imms) 660{ 661 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 662 | rn << 5 | rd); 663} 664 665/* This function is used for the Move (wide immediate) instruction group. 666 Note that SHIFT is a full shift count, not the 2 bit HW field. */ 667static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, 668 TCGReg rd, uint16_t half, unsigned shift) 669{ 670 tcg_debug_assert((shift & ~0x30) == 0); 671 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); 672} 673 674static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, 675 TCGReg rd, int64_t disp) 676{ 677 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); 678} 679 680/* This function is for both 3.5.2 (Add/Subtract shifted register), for 681 the rare occasion when we actually want to supply a shift amount. */ 682static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, 683 TCGType ext, TCGReg rd, TCGReg rn, 684 TCGReg rm, int imm6) 685{ 686 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); 687} 688 689/* This function is for 3.5.2 (Add/subtract shifted register), 690 and 3.5.10 (Logical shifted register), for the vast majorty of cases 691 when we don't want to apply a shift. Thus it can also be used for 692 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */ 693static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, 694 TCGReg rd, TCGReg rn, TCGReg rm) 695{ 696 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); 697} 698 699#define tcg_out_insn_3503 tcg_out_insn_3502 700#define tcg_out_insn_3508 tcg_out_insn_3502 701#define tcg_out_insn_3510 tcg_out_insn_3502 702 703static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, 704 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) 705{ 706 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd 707 | tcg_cond_to_aarch64[c] << 12); 708} 709 710static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, 711 TCGReg rd, TCGReg rn) 712{ 713 tcg_out32(s, insn | ext << 31 | rn << 5 | rd); 714} 715 716static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, 717 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) 718{ 719 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); 720} 721 722static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q, 723 TCGReg rd, TCGReg rn, int dst_idx, int src_idx) 724{ 725 /* Note that bit 11 set means general register input. Therefore 726 we can handle both register sets with one function. */ 727 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11) 728 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5); 729} 730 731static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q, 732 TCGReg rd, bool op, int cmode, uint8_t imm8) 733{ 734 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f) 735 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5); 736} 737 738static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q, 739 TCGReg rd, TCGReg rn, unsigned immhb) 740{ 741 tcg_out32(s, insn | q << 30 | immhb << 16 742 | (rn & 0x1f) << 5 | (rd & 0x1f)); 743} 744 745static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q, 746 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 747{ 748 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16 749 | (rn & 0x1f) << 5 | (rd & 0x1f)); 750} 751 752static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q, 753 unsigned size, TCGReg rd, TCGReg rn) 754{ 755 tcg_out32(s, insn | q << 30 | (size << 22) 756 | (rn & 0x1f) << 5 | (rd & 0x1f)); 757} 758 759static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, 760 TCGReg rd, TCGReg base, TCGType ext, 761 TCGReg regoff) 762{ 763 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 764 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | 765 0x4000 | ext << 13 | base << 5 | (rd & 0x1f)); 766} 767 768static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, 769 TCGReg rd, TCGReg rn, intptr_t offset) 770{ 771 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f)); 772} 773 774static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, 775 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) 776{ 777 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 778 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 779 | rn << 5 | (rd & 0x1f)); 780} 781 782/* Register to register move using ORR (shifted register with no shift). */ 783static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) 784{ 785 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); 786} 787 788/* Register to register move using ADDI (move to/from SP). */ 789static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) 790{ 791 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); 792} 793 794/* This function is used for the Logical (immediate) instruction group. 795 The value of LIMM must satisfy IS_LIMM. See the comment above about 796 only supporting simplified logical immediates. */ 797static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, 798 TCGReg rd, TCGReg rn, uint64_t limm) 799{ 800 unsigned h, l, r, c; 801 802 tcg_debug_assert(is_limm(limm)); 803 804 h = clz64(limm); 805 l = ctz64(limm); 806 if (l == 0) { 807 r = 0; /* form 0....01....1 */ 808 c = ctz64(~limm) - 1; 809 if (h == 0) { 810 r = clz64(~limm); /* form 1..10..01..1 */ 811 c += r; 812 } 813 } else { 814 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ 815 c = r - h - 1; 816 } 817 if (ext == TCG_TYPE_I32) { 818 r &= 31; 819 c &= 31; 820 } 821 822 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); 823} 824 825static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 826 TCGReg rd, int64_t v64) 827{ 828 bool q = type == TCG_TYPE_V128; 829 int cmode, imm8, i; 830 831 /* Test all bytes equal first. */ 832 if (vece == MO_8) { 833 imm8 = (uint8_t)v64; 834 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8); 835 return; 836 } 837 838 /* 839 * Test all bytes 0x00 or 0xff second. This can match cases that 840 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below. 841 */ 842 for (i = imm8 = 0; i < 8; i++) { 843 uint8_t byte = v64 >> (i * 8); 844 if (byte == 0xff) { 845 imm8 |= 1 << i; 846 } else if (byte != 0) { 847 goto fail_bytes; 848 } 849 } 850 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8); 851 return; 852 fail_bytes: 853 854 /* 855 * Tests for various replications. For each element width, if we 856 * cannot find an expansion there's no point checking a larger 857 * width because we already know by replication it cannot match. 858 */ 859 if (vece == MO_16) { 860 uint16_t v16 = v64; 861 862 if (is_shimm16(v16, &cmode, &imm8)) { 863 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 864 return; 865 } 866 if (is_shimm16(~v16, &cmode, &imm8)) { 867 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 868 return; 869 } 870 871 /* 872 * Otherwise, all remaining constants can be loaded in two insns: 873 * rd = v16 & 0xff, rd |= v16 & 0xff00. 874 */ 875 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff); 876 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8); 877 return; 878 } else if (vece == MO_32) { 879 uint32_t v32 = v64; 880 uint32_t n32 = ~v32; 881 882 if (is_shimm32(v32, &cmode, &imm8) || 883 is_soimm32(v32, &cmode, &imm8) || 884 is_fimm32(v32, &cmode, &imm8)) { 885 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 886 return; 887 } 888 if (is_shimm32(n32, &cmode, &imm8) || 889 is_soimm32(n32, &cmode, &imm8)) { 890 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 891 return; 892 } 893 894 /* 895 * Restrict the set of constants to those we can load with 896 * two instructions. Others we load from the pool. 897 */ 898 i = is_shimm32_pair(v32, &cmode, &imm8); 899 if (i) { 900 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 901 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8)); 902 return; 903 } 904 i = is_shimm32_pair(n32, &cmode, &imm8); 905 if (i) { 906 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 907 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8)); 908 return; 909 } 910 } else if (is_fimm64(v64, &cmode, &imm8)) { 911 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8); 912 return; 913 } 914 915 /* 916 * As a last resort, load from the constant pool. Sadly there 917 * is no LD1R (literal), so store the full 16-byte vector. 918 */ 919 if (type == TCG_TYPE_V128) { 920 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64); 921 tcg_out_insn(s, 3305, LDR_v128, 0, rd); 922 } else { 923 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0); 924 tcg_out_insn(s, 3305, LDR_v64, 0, rd); 925 } 926} 927 928static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 929 TCGReg rd, TCGReg rs) 930{ 931 int is_q = type - TCG_TYPE_V64; 932 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0); 933 return true; 934} 935 936static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 937 TCGReg r, TCGReg base, intptr_t offset) 938{ 939 TCGReg temp = TCG_REG_TMP; 940 941 if (offset < -0xffffff || offset > 0xffffff) { 942 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset); 943 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base); 944 base = temp; 945 } else { 946 AArch64Insn add_insn = I3401_ADDI; 947 948 if (offset < 0) { 949 add_insn = I3401_SUBI; 950 offset = -offset; 951 } 952 if (offset & 0xfff000) { 953 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000); 954 base = temp; 955 } 956 if (offset & 0xfff) { 957 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff); 958 base = temp; 959 } 960 } 961 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece); 962 return true; 963} 964 965static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 966 tcg_target_long value) 967{ 968 tcg_target_long svalue = value; 969 tcg_target_long ivalue = ~value; 970 tcg_target_long t0, t1, t2; 971 int s0, s1; 972 AArch64Insn opc; 973 974 switch (type) { 975 case TCG_TYPE_I32: 976 case TCG_TYPE_I64: 977 tcg_debug_assert(rd < 32); 978 break; 979 default: 980 g_assert_not_reached(); 981 } 982 983 /* For 32-bit values, discard potential garbage in value. For 64-bit 984 values within [2**31, 2**32-1], we can create smaller sequences by 985 interpreting this as a negative 32-bit number, while ensuring that 986 the high 32 bits are cleared by setting SF=0. */ 987 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { 988 svalue = (int32_t)value; 989 value = (uint32_t)value; 990 ivalue = (uint32_t)ivalue; 991 type = TCG_TYPE_I32; 992 } 993 994 /* Speed things up by handling the common case of small positive 995 and negative values specially. */ 996 if ((value & ~0xffffull) == 0) { 997 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); 998 return; 999 } else if ((ivalue & ~0xffffull) == 0) { 1000 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); 1001 return; 1002 } 1003 1004 /* Check for bitfield immediates. For the benefit of 32-bit quantities, 1005 use the sign-extended value. That lets us match rotated values such 1006 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ 1007 if (is_limm(svalue)) { 1008 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); 1009 return; 1010 } 1011 1012 /* Look for host pointer values within 4G of the PC. This happens 1013 often when loading pointers to QEMU's own data structures. */ 1014 if (type == TCG_TYPE_I64) { 1015 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr); 1016 tcg_target_long disp = value - src_rx; 1017 if (disp == sextract64(disp, 0, 21)) { 1018 tcg_out_insn(s, 3406, ADR, rd, disp); 1019 return; 1020 } 1021 disp = (value >> 12) - (src_rx >> 12); 1022 if (disp == sextract64(disp, 0, 21)) { 1023 tcg_out_insn(s, 3406, ADRP, rd, disp); 1024 if (value & 0xfff) { 1025 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); 1026 } 1027 return; 1028 } 1029 } 1030 1031 /* Would it take fewer insns to begin with MOVN? */ 1032 if (ctpop64(value) >= 32) { 1033 t0 = ivalue; 1034 opc = I3405_MOVN; 1035 } else { 1036 t0 = value; 1037 opc = I3405_MOVZ; 1038 } 1039 s0 = ctz64(t0) & (63 & -16); 1040 t1 = t0 & ~(0xffffUL << s0); 1041 s1 = ctz64(t1) & (63 & -16); 1042 t2 = t1 & ~(0xffffUL << s1); 1043 if (t2 == 0) { 1044 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0); 1045 if (t1 != 0) { 1046 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1); 1047 } 1048 return; 1049 } 1050 1051 /* For more than 2 insns, dump it into the constant pool. */ 1052 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0); 1053 tcg_out_insn(s, 3305, LDR, 0, rd); 1054} 1055 1056/* Define something more legible for general use. */ 1057#define tcg_out_ldst_r tcg_out_insn_3310 1058 1059static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd, 1060 TCGReg rn, intptr_t offset, int lgsize) 1061{ 1062 /* If the offset is naturally aligned and in range, then we can 1063 use the scaled uimm12 encoding */ 1064 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) { 1065 uintptr_t scaled_uimm = offset >> lgsize; 1066 if (scaled_uimm <= 0xfff) { 1067 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); 1068 return; 1069 } 1070 } 1071 1072 /* Small signed offsets can use the unscaled encoding. */ 1073 if (offset >= -256 && offset < 256) { 1074 tcg_out_insn_3312(s, insn, rd, rn, offset); 1075 return; 1076 } 1077 1078 /* Worst-case scenario, move offset to temp register, use reg offset. */ 1079 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset); 1080 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP); 1081} 1082 1083static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 1084{ 1085 if (ret == arg) { 1086 return true; 1087 } 1088 switch (type) { 1089 case TCG_TYPE_I32: 1090 case TCG_TYPE_I64: 1091 if (ret < 32 && arg < 32) { 1092 tcg_out_movr(s, type, ret, arg); 1093 break; 1094 } else if (ret < 32) { 1095 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0); 1096 break; 1097 } else if (arg < 32) { 1098 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0); 1099 break; 1100 } 1101 /* FALLTHRU */ 1102 1103 case TCG_TYPE_V64: 1104 tcg_debug_assert(ret >= 32 && arg >= 32); 1105 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg); 1106 break; 1107 case TCG_TYPE_V128: 1108 tcg_debug_assert(ret >= 32 && arg >= 32); 1109 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg); 1110 break; 1111 1112 default: 1113 g_assert_not_reached(); 1114 } 1115 return true; 1116} 1117 1118static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1119 TCGReg base, intptr_t ofs) 1120{ 1121 AArch64Insn insn; 1122 int lgsz; 1123 1124 switch (type) { 1125 case TCG_TYPE_I32: 1126 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS); 1127 lgsz = 2; 1128 break; 1129 case TCG_TYPE_I64: 1130 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD); 1131 lgsz = 3; 1132 break; 1133 case TCG_TYPE_V64: 1134 insn = I3312_LDRVD; 1135 lgsz = 3; 1136 break; 1137 case TCG_TYPE_V128: 1138 insn = I3312_LDRVQ; 1139 lgsz = 4; 1140 break; 1141 default: 1142 g_assert_not_reached(); 1143 } 1144 tcg_out_ldst(s, insn, ret, base, ofs, lgsz); 1145} 1146 1147static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src, 1148 TCGReg base, intptr_t ofs) 1149{ 1150 AArch64Insn insn; 1151 int lgsz; 1152 1153 switch (type) { 1154 case TCG_TYPE_I32: 1155 insn = (src < 32 ? I3312_STRW : I3312_STRVS); 1156 lgsz = 2; 1157 break; 1158 case TCG_TYPE_I64: 1159 insn = (src < 32 ? I3312_STRX : I3312_STRVD); 1160 lgsz = 3; 1161 break; 1162 case TCG_TYPE_V64: 1163 insn = I3312_STRVD; 1164 lgsz = 3; 1165 break; 1166 case TCG_TYPE_V128: 1167 insn = I3312_STRVQ; 1168 lgsz = 4; 1169 break; 1170 default: 1171 g_assert_not_reached(); 1172 } 1173 tcg_out_ldst(s, insn, src, base, ofs, lgsz); 1174} 1175 1176static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1177 TCGReg base, intptr_t ofs) 1178{ 1179 if (type <= TCG_TYPE_I64 && val == 0) { 1180 tcg_out_st(s, type, TCG_REG_XZR, base, ofs); 1181 return true; 1182 } 1183 return false; 1184} 1185 1186static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, 1187 TCGReg rn, unsigned int a, unsigned int b) 1188{ 1189 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); 1190} 1191 1192static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, 1193 TCGReg rn, unsigned int a, unsigned int b) 1194{ 1195 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); 1196} 1197 1198static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, 1199 TCGReg rn, unsigned int a, unsigned int b) 1200{ 1201 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); 1202} 1203 1204static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, 1205 TCGReg rn, TCGReg rm, unsigned int a) 1206{ 1207 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); 1208} 1209 1210static inline void tcg_out_shl(TCGContext *s, TCGType ext, 1211 TCGReg rd, TCGReg rn, unsigned int m) 1212{ 1213 int bits = ext ? 64 : 32; 1214 int max = bits - 1; 1215 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max)); 1216} 1217 1218static inline void tcg_out_shr(TCGContext *s, TCGType ext, 1219 TCGReg rd, TCGReg rn, unsigned int m) 1220{ 1221 int max = ext ? 63 : 31; 1222 tcg_out_ubfm(s, ext, rd, rn, m & max, max); 1223} 1224 1225static inline void tcg_out_sar(TCGContext *s, TCGType ext, 1226 TCGReg rd, TCGReg rn, unsigned int m) 1227{ 1228 int max = ext ? 63 : 31; 1229 tcg_out_sbfm(s, ext, rd, rn, m & max, max); 1230} 1231 1232static inline void tcg_out_rotr(TCGContext *s, TCGType ext, 1233 TCGReg rd, TCGReg rn, unsigned int m) 1234{ 1235 int max = ext ? 63 : 31; 1236 tcg_out_extr(s, ext, rd, rn, rn, m & max); 1237} 1238 1239static inline void tcg_out_rotl(TCGContext *s, TCGType ext, 1240 TCGReg rd, TCGReg rn, unsigned int m) 1241{ 1242 int bits = ext ? 64 : 32; 1243 int max = bits - 1; 1244 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max)); 1245} 1246 1247static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, 1248 TCGReg rn, unsigned lsb, unsigned width) 1249{ 1250 unsigned size = ext ? 64 : 32; 1251 unsigned a = (size - lsb) & (size - 1); 1252 unsigned b = width - 1; 1253 tcg_out_bfm(s, ext, rd, rn, a, b); 1254} 1255 1256static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, 1257 tcg_target_long b, bool const_b) 1258{ 1259 if (const_b) { 1260 /* Using CMP or CMN aliases. */ 1261 if (b >= 0) { 1262 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); 1263 } else { 1264 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); 1265 } 1266 } else { 1267 /* Using CMP alias SUBS wzr, Wn, Wm */ 1268 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); 1269 } 1270} 1271 1272static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) 1273{ 1274 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1275 tcg_debug_assert(offset == sextract64(offset, 0, 26)); 1276 tcg_out_insn(s, 3206, B, offset); 1277} 1278 1279static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target) 1280{ 1281 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1282 if (offset == sextract64(offset, 0, 26)) { 1283 tcg_out_insn(s, 3206, B, offset); 1284 } else { 1285 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); 1286 tcg_out_insn(s, 3207, BR, TCG_REG_TMP); 1287 } 1288} 1289 1290static inline void tcg_out_callr(TCGContext *s, TCGReg reg) 1291{ 1292 tcg_out_insn(s, 3207, BLR, reg); 1293} 1294 1295static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) 1296{ 1297 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1298 if (offset == sextract64(offset, 0, 26)) { 1299 tcg_out_insn(s, 3206, BL, offset); 1300 } else { 1301 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); 1302 tcg_out_callr(s, TCG_REG_TMP); 1303 } 1304} 1305 1306void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, 1307 uintptr_t jmp_rw, uintptr_t addr) 1308{ 1309 tcg_insn_unit i1, i2; 1310 TCGType rt = TCG_TYPE_I64; 1311 TCGReg rd = TCG_REG_TMP; 1312 uint64_t pair; 1313 1314 ptrdiff_t offset = addr - jmp_rx; 1315 1316 if (offset == sextract64(offset, 0, 26)) { 1317 i1 = I3206_B | ((offset >> 2) & 0x3ffffff); 1318 i2 = NOP; 1319 } else { 1320 offset = (addr >> 12) - (jmp_rx >> 12); 1321 1322 /* patch ADRP */ 1323 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd; 1324 /* patch ADDI */ 1325 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd; 1326 } 1327 pair = (uint64_t)i2 << 32 | i1; 1328 qatomic_set((uint64_t *)jmp_rw, pair); 1329 flush_idcache_range(jmp_rx, jmp_rw, 8); 1330} 1331 1332static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) 1333{ 1334 if (!l->has_value) { 1335 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); 1336 tcg_out_insn(s, 3206, B, 0); 1337 } else { 1338 tcg_out_goto(s, l->u.value_ptr); 1339 } 1340} 1341 1342static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, 1343 TCGArg b, bool b_const, TCGLabel *l) 1344{ 1345 intptr_t offset; 1346 bool need_cmp; 1347 1348 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) { 1349 need_cmp = false; 1350 } else { 1351 need_cmp = true; 1352 tcg_out_cmp(s, ext, a, b, b_const); 1353 } 1354 1355 if (!l->has_value) { 1356 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1357 offset = tcg_in32(s) >> 5; 1358 } else { 1359 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2; 1360 tcg_debug_assert(offset == sextract64(offset, 0, 19)); 1361 } 1362 1363 if (need_cmp) { 1364 tcg_out_insn(s, 3202, B_C, c, offset); 1365 } else if (c == TCG_COND_EQ) { 1366 tcg_out_insn(s, 3201, CBZ, ext, a, offset); 1367 } else { 1368 tcg_out_insn(s, 3201, CBNZ, ext, a, offset); 1369 } 1370} 1371 1372static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn) 1373{ 1374 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn); 1375} 1376 1377static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn) 1378{ 1379 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn); 1380} 1381 1382static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn) 1383{ 1384 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn); 1385} 1386 1387static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits, 1388 TCGReg rd, TCGReg rn) 1389{ 1390 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ 1391 int bits = (8 << s_bits) - 1; 1392 tcg_out_sbfm(s, ext, rd, rn, 0, bits); 1393} 1394 1395static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits, 1396 TCGReg rd, TCGReg rn) 1397{ 1398 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ 1399 int bits = (8 << s_bits) - 1; 1400 tcg_out_ubfm(s, 0, rd, rn, 0, bits); 1401} 1402 1403static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, 1404 TCGReg rn, int64_t aimm) 1405{ 1406 if (aimm >= 0) { 1407 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); 1408 } else { 1409 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); 1410 } 1411} 1412 1413static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, 1414 TCGReg rh, TCGReg al, TCGReg ah, 1415 tcg_target_long bl, tcg_target_long bh, 1416 bool const_bl, bool const_bh, bool sub) 1417{ 1418 TCGReg orig_rl = rl; 1419 AArch64Insn insn; 1420 1421 if (rl == ah || (!const_bh && rl == bh)) { 1422 rl = TCG_REG_TMP; 1423 } 1424 1425 if (const_bl) { 1426 insn = I3401_ADDSI; 1427 if ((bl < 0) ^ sub) { 1428 insn = I3401_SUBSI; 1429 bl = -bl; 1430 } 1431 if (unlikely(al == TCG_REG_XZR)) { 1432 /* ??? We want to allow al to be zero for the benefit of 1433 negation via subtraction. However, that leaves open the 1434 possibility of adding 0+const in the low part, and the 1435 immediate add instructions encode XSP not XZR. Don't try 1436 anything more elaborate here than loading another zero. */ 1437 al = TCG_REG_TMP; 1438 tcg_out_movi(s, ext, al, 0); 1439 } 1440 tcg_out_insn_3401(s, insn, ext, rl, al, bl); 1441 } else { 1442 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); 1443 } 1444 1445 insn = I3503_ADC; 1446 if (const_bh) { 1447 /* Note that the only two constants we support are 0 and -1, and 1448 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ 1449 if ((bh != 0) ^ sub) { 1450 insn = I3503_SBC; 1451 } 1452 bh = TCG_REG_XZR; 1453 } else if (sub) { 1454 insn = I3503_SBC; 1455 } 1456 tcg_out_insn_3503(s, insn, ext, rh, ah, bh); 1457 1458 tcg_out_mov(s, ext, orig_rl, rl); 1459} 1460 1461static inline void tcg_out_mb(TCGContext *s, TCGArg a0) 1462{ 1463 static const uint32_t sync[] = { 1464 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST, 1465 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST, 1466 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1467 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD, 1468 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1469 }; 1470 tcg_out32(s, sync[a0 & TCG_MO_ALL]); 1471} 1472 1473static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, 1474 TCGReg a0, TCGArg b, bool const_b, bool is_ctz) 1475{ 1476 TCGReg a1 = a0; 1477 if (is_ctz) { 1478 a1 = TCG_REG_TMP; 1479 tcg_out_insn(s, 3507, RBIT, ext, a1, a0); 1480 } 1481 if (const_b && b == (ext ? 64 : 32)) { 1482 tcg_out_insn(s, 3507, CLZ, ext, d, a1); 1483 } else { 1484 AArch64Insn sel = I3506_CSEL; 1485 1486 tcg_out_cmp(s, ext, a0, 0, 1); 1487 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1); 1488 1489 if (const_b) { 1490 if (b == -1) { 1491 b = TCG_REG_XZR; 1492 sel = I3506_CSINV; 1493 } else if (b == 0) { 1494 b = TCG_REG_XZR; 1495 } else { 1496 tcg_out_movi(s, ext, d, b); 1497 b = d; 1498 } 1499 } 1500 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE); 1501 } 1502} 1503 1504#ifdef CONFIG_SOFTMMU 1505#include "../tcg-ldst.c.inc" 1506 1507/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, 1508 * TCGMemOpIdx oi, uintptr_t ra) 1509 */ 1510static void * const qemu_ld_helpers[16] = { 1511 [MO_UB] = helper_ret_ldub_mmu, 1512 [MO_LEUW] = helper_le_lduw_mmu, 1513 [MO_LEUL] = helper_le_ldul_mmu, 1514 [MO_LEQ] = helper_le_ldq_mmu, 1515 [MO_BEUW] = helper_be_lduw_mmu, 1516 [MO_BEUL] = helper_be_ldul_mmu, 1517 [MO_BEQ] = helper_be_ldq_mmu, 1518}; 1519 1520/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, 1521 * uintxx_t val, TCGMemOpIdx oi, 1522 * uintptr_t ra) 1523 */ 1524static void * const qemu_st_helpers[16] = { 1525 [MO_UB] = helper_ret_stb_mmu, 1526 [MO_LEUW] = helper_le_stw_mmu, 1527 [MO_LEUL] = helper_le_stl_mmu, 1528 [MO_LEQ] = helper_le_stq_mmu, 1529 [MO_BEUW] = helper_be_stw_mmu, 1530 [MO_BEUL] = helper_be_stl_mmu, 1531 [MO_BEQ] = helper_be_stq_mmu, 1532}; 1533 1534static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target) 1535{ 1536 ptrdiff_t offset = tcg_pcrel_diff(s, target); 1537 tcg_debug_assert(offset == sextract64(offset, 0, 21)); 1538 tcg_out_insn(s, 3406, ADR, rd, offset); 1539} 1540 1541static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1542{ 1543 TCGMemOpIdx oi = lb->oi; 1544 MemOp opc = get_memop(oi); 1545 MemOp size = opc & MO_SIZE; 1546 1547 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1548 return false; 1549 } 1550 1551 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1552 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); 1553 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi); 1554 tcg_out_adr(s, TCG_REG_X3, lb->raddr); 1555 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); 1556 if (opc & MO_SIGN) { 1557 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0); 1558 } else { 1559 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0); 1560 } 1561 1562 tcg_out_goto(s, lb->raddr); 1563 return true; 1564} 1565 1566static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1567{ 1568 TCGMemOpIdx oi = lb->oi; 1569 MemOp opc = get_memop(oi); 1570 MemOp size = opc & MO_SIZE; 1571 1572 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1573 return false; 1574 } 1575 1576 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1577 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); 1578 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); 1579 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi); 1580 tcg_out_adr(s, TCG_REG_X4, lb->raddr); 1581 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); 1582 tcg_out_goto(s, lb->raddr); 1583 return true; 1584} 1585 1586static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, 1587 TCGType ext, TCGReg data_reg, TCGReg addr_reg, 1588 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) 1589{ 1590 TCGLabelQemuLdst *label = new_ldst_label(s); 1591 1592 label->is_ld = is_ld; 1593 label->oi = oi; 1594 label->type = ext; 1595 label->datalo_reg = data_reg; 1596 label->addrlo_reg = addr_reg; 1597 label->raddr = tcg_splitwx_to_rx(raddr); 1598 label->label_ptr[0] = label_ptr; 1599} 1600 1601/* We expect to use a 7-bit scaled negative offset from ENV. */ 1602QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); 1603QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512); 1604 1605/* These offsets are built into the LDP below. */ 1606QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); 1607QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); 1608 1609/* Load and compare a TLB entry, emitting the conditional jump to the 1610 slow path for the failure case, which will be patched later when finalizing 1611 the slow path. Generated code returns the host addend in X1, 1612 clobbers X0,X2,X3,TMP. */ 1613static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc, 1614 tcg_insn_unit **label_ptr, int mem_index, 1615 bool is_read) 1616{ 1617 unsigned a_bits = get_alignment_bits(opc); 1618 unsigned s_bits = opc & MO_SIZE; 1619 unsigned a_mask = (1u << a_bits) - 1; 1620 unsigned s_mask = (1u << s_bits) - 1; 1621 TCGReg x3; 1622 TCGType mask_type; 1623 uint64_t compare_mask; 1624 1625 mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32 1626 ? TCG_TYPE_I64 : TCG_TYPE_I32); 1627 1628 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */ 1629 tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0, 1630 TLB_MASK_TABLE_OFS(mem_index), 1, 0); 1631 1632 /* Extract the TLB index from the address into X0. */ 1633 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, 1634 TCG_REG_X0, TCG_REG_X0, addr_reg, 1635 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); 1636 1637 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */ 1638 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0); 1639 1640 /* Load the tlb comparator into X0, and the fast path addend into X1. */ 1641 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read 1642 ? offsetof(CPUTLBEntry, addr_read) 1643 : offsetof(CPUTLBEntry, addr_write)); 1644 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1, 1645 offsetof(CPUTLBEntry, addend)); 1646 1647 /* For aligned accesses, we check the first byte and include the alignment 1648 bits within the address. For unaligned access, we check that we don't 1649 cross pages using the address of the last byte of the access. */ 1650 if (a_bits >= s_bits) { 1651 x3 = addr_reg; 1652 } else { 1653 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64, 1654 TCG_REG_X3, addr_reg, s_mask - a_mask); 1655 x3 = TCG_REG_X3; 1656 } 1657 compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; 1658 1659 /* Store the page mask part of the address into X3. */ 1660 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, 1661 TCG_REG_X3, x3, compare_mask); 1662 1663 /* Perform the address comparison. */ 1664 tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0); 1665 1666 /* If not equal, we jump to the slow path. */ 1667 *label_ptr = s->code_ptr; 1668 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1669} 1670 1671#endif /* CONFIG_SOFTMMU */ 1672 1673static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, 1674 TCGReg data_r, TCGReg addr_r, 1675 TCGType otype, TCGReg off_r) 1676{ 1677 const MemOp bswap = memop & MO_BSWAP; 1678 1679 switch (memop & MO_SSIZE) { 1680 case MO_UB: 1681 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r); 1682 break; 1683 case MO_SB: 1684 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, 1685 data_r, addr_r, otype, off_r); 1686 break; 1687 case MO_UW: 1688 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); 1689 if (bswap) { 1690 tcg_out_rev16(s, data_r, data_r); 1691 } 1692 break; 1693 case MO_SW: 1694 if (bswap) { 1695 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); 1696 tcg_out_rev16(s, data_r, data_r); 1697 tcg_out_sxt(s, ext, MO_16, data_r, data_r); 1698 } else { 1699 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), 1700 data_r, addr_r, otype, off_r); 1701 } 1702 break; 1703 case MO_UL: 1704 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); 1705 if (bswap) { 1706 tcg_out_rev32(s, data_r, data_r); 1707 } 1708 break; 1709 case MO_SL: 1710 if (bswap) { 1711 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); 1712 tcg_out_rev32(s, data_r, data_r); 1713 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r); 1714 } else { 1715 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r); 1716 } 1717 break; 1718 case MO_Q: 1719 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r); 1720 if (bswap) { 1721 tcg_out_rev64(s, data_r, data_r); 1722 } 1723 break; 1724 default: 1725 tcg_abort(); 1726 } 1727} 1728 1729static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, 1730 TCGReg data_r, TCGReg addr_r, 1731 TCGType otype, TCGReg off_r) 1732{ 1733 const MemOp bswap = memop & MO_BSWAP; 1734 1735 switch (memop & MO_SIZE) { 1736 case MO_8: 1737 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r); 1738 break; 1739 case MO_16: 1740 if (bswap && data_r != TCG_REG_XZR) { 1741 tcg_out_rev16(s, TCG_REG_TMP, data_r); 1742 data_r = TCG_REG_TMP; 1743 } 1744 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r); 1745 break; 1746 case MO_32: 1747 if (bswap && data_r != TCG_REG_XZR) { 1748 tcg_out_rev32(s, TCG_REG_TMP, data_r); 1749 data_r = TCG_REG_TMP; 1750 } 1751 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r); 1752 break; 1753 case MO_64: 1754 if (bswap && data_r != TCG_REG_XZR) { 1755 tcg_out_rev64(s, TCG_REG_TMP, data_r); 1756 data_r = TCG_REG_TMP; 1757 } 1758 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r); 1759 break; 1760 default: 1761 tcg_abort(); 1762 } 1763} 1764 1765static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1766 TCGMemOpIdx oi, TCGType ext) 1767{ 1768 MemOp memop = get_memop(oi); 1769 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; 1770#ifdef CONFIG_SOFTMMU 1771 unsigned mem_index = get_mmuidx(oi); 1772 tcg_insn_unit *label_ptr; 1773 1774 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1); 1775 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1776 TCG_REG_X1, otype, addr_reg); 1777 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg, 1778 s->code_ptr, label_ptr); 1779#else /* !CONFIG_SOFTMMU */ 1780 if (USE_GUEST_BASE) { 1781 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1782 TCG_REG_GUEST_BASE, otype, addr_reg); 1783 } else { 1784 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1785 addr_reg, TCG_TYPE_I64, TCG_REG_XZR); 1786 } 1787#endif /* CONFIG_SOFTMMU */ 1788} 1789 1790static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1791 TCGMemOpIdx oi) 1792{ 1793 MemOp memop = get_memop(oi); 1794 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; 1795#ifdef CONFIG_SOFTMMU 1796 unsigned mem_index = get_mmuidx(oi); 1797 tcg_insn_unit *label_ptr; 1798 1799 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0); 1800 tcg_out_qemu_st_direct(s, memop, data_reg, 1801 TCG_REG_X1, otype, addr_reg); 1802 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64, 1803 data_reg, addr_reg, s->code_ptr, label_ptr); 1804#else /* !CONFIG_SOFTMMU */ 1805 if (USE_GUEST_BASE) { 1806 tcg_out_qemu_st_direct(s, memop, data_reg, 1807 TCG_REG_GUEST_BASE, otype, addr_reg); 1808 } else { 1809 tcg_out_qemu_st_direct(s, memop, data_reg, 1810 addr_reg, TCG_TYPE_I64, TCG_REG_XZR); 1811 } 1812#endif /* CONFIG_SOFTMMU */ 1813} 1814 1815static const tcg_insn_unit *tb_ret_addr; 1816 1817static void tcg_out_op(TCGContext *s, TCGOpcode opc, 1818 const TCGArg args[TCG_MAX_OP_ARGS], 1819 const int const_args[TCG_MAX_OP_ARGS]) 1820{ 1821 /* 99% of the time, we can signal the use of extension registers 1822 by looking to see if the opcode handles 64-bit data. */ 1823 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0; 1824 1825 /* Hoist the loads of the most common arguments. */ 1826 TCGArg a0 = args[0]; 1827 TCGArg a1 = args[1]; 1828 TCGArg a2 = args[2]; 1829 int c2 = const_args[2]; 1830 1831 /* Some operands are defined with "rZ" constraint, a register or 1832 the zero register. These need not actually test args[I] == 0. */ 1833#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) 1834 1835 switch (opc) { 1836 case INDEX_op_exit_tb: 1837 /* Reuse the zeroing that exists for goto_ptr. */ 1838 if (a0 == 0) { 1839 tcg_out_goto_long(s, tcg_code_gen_epilogue); 1840 } else { 1841 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); 1842 tcg_out_goto_long(s, tb_ret_addr); 1843 } 1844 break; 1845 1846 case INDEX_op_goto_tb: 1847 if (s->tb_jmp_insn_offset != NULL) { 1848 /* TCG_TARGET_HAS_direct_jump */ 1849 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic 1850 write can be used to patch the target address. */ 1851 if ((uintptr_t)s->code_ptr & 7) { 1852 tcg_out32(s, NOP); 1853 } 1854 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); 1855 /* actual branch destination will be patched by 1856 tb_target_set_jmp_target later. */ 1857 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0); 1858 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0); 1859 } else { 1860 /* !TCG_TARGET_HAS_direct_jump */ 1861 tcg_debug_assert(s->tb_jmp_target_addr != NULL); 1862 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2; 1863 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP); 1864 } 1865 tcg_out_insn(s, 3207, BR, TCG_REG_TMP); 1866 set_jmp_reset_offset(s, a0); 1867 break; 1868 1869 case INDEX_op_goto_ptr: 1870 tcg_out_insn(s, 3207, BR, a0); 1871 break; 1872 1873 case INDEX_op_br: 1874 tcg_out_goto_label(s, arg_label(a0)); 1875 break; 1876 1877 case INDEX_op_ld8u_i32: 1878 case INDEX_op_ld8u_i64: 1879 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0); 1880 break; 1881 case INDEX_op_ld8s_i32: 1882 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0); 1883 break; 1884 case INDEX_op_ld8s_i64: 1885 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0); 1886 break; 1887 case INDEX_op_ld16u_i32: 1888 case INDEX_op_ld16u_i64: 1889 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1); 1890 break; 1891 case INDEX_op_ld16s_i32: 1892 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1); 1893 break; 1894 case INDEX_op_ld16s_i64: 1895 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1); 1896 break; 1897 case INDEX_op_ld_i32: 1898 case INDEX_op_ld32u_i64: 1899 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2); 1900 break; 1901 case INDEX_op_ld32s_i64: 1902 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2); 1903 break; 1904 case INDEX_op_ld_i64: 1905 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3); 1906 break; 1907 1908 case INDEX_op_st8_i32: 1909 case INDEX_op_st8_i64: 1910 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0); 1911 break; 1912 case INDEX_op_st16_i32: 1913 case INDEX_op_st16_i64: 1914 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1); 1915 break; 1916 case INDEX_op_st_i32: 1917 case INDEX_op_st32_i64: 1918 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2); 1919 break; 1920 case INDEX_op_st_i64: 1921 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3); 1922 break; 1923 1924 case INDEX_op_add_i32: 1925 a2 = (int32_t)a2; 1926 /* FALLTHRU */ 1927 case INDEX_op_add_i64: 1928 if (c2) { 1929 tcg_out_addsubi(s, ext, a0, a1, a2); 1930 } else { 1931 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); 1932 } 1933 break; 1934 1935 case INDEX_op_sub_i32: 1936 a2 = (int32_t)a2; 1937 /* FALLTHRU */ 1938 case INDEX_op_sub_i64: 1939 if (c2) { 1940 tcg_out_addsubi(s, ext, a0, a1, -a2); 1941 } else { 1942 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); 1943 } 1944 break; 1945 1946 case INDEX_op_neg_i64: 1947 case INDEX_op_neg_i32: 1948 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); 1949 break; 1950 1951 case INDEX_op_and_i32: 1952 a2 = (int32_t)a2; 1953 /* FALLTHRU */ 1954 case INDEX_op_and_i64: 1955 if (c2) { 1956 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); 1957 } else { 1958 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); 1959 } 1960 break; 1961 1962 case INDEX_op_andc_i32: 1963 a2 = (int32_t)a2; 1964 /* FALLTHRU */ 1965 case INDEX_op_andc_i64: 1966 if (c2) { 1967 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); 1968 } else { 1969 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); 1970 } 1971 break; 1972 1973 case INDEX_op_or_i32: 1974 a2 = (int32_t)a2; 1975 /* FALLTHRU */ 1976 case INDEX_op_or_i64: 1977 if (c2) { 1978 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); 1979 } else { 1980 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); 1981 } 1982 break; 1983 1984 case INDEX_op_orc_i32: 1985 a2 = (int32_t)a2; 1986 /* FALLTHRU */ 1987 case INDEX_op_orc_i64: 1988 if (c2) { 1989 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); 1990 } else { 1991 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); 1992 } 1993 break; 1994 1995 case INDEX_op_xor_i32: 1996 a2 = (int32_t)a2; 1997 /* FALLTHRU */ 1998 case INDEX_op_xor_i64: 1999 if (c2) { 2000 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); 2001 } else { 2002 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); 2003 } 2004 break; 2005 2006 case INDEX_op_eqv_i32: 2007 a2 = (int32_t)a2; 2008 /* FALLTHRU */ 2009 case INDEX_op_eqv_i64: 2010 if (c2) { 2011 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); 2012 } else { 2013 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); 2014 } 2015 break; 2016 2017 case INDEX_op_not_i64: 2018 case INDEX_op_not_i32: 2019 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); 2020 break; 2021 2022 case INDEX_op_mul_i64: 2023 case INDEX_op_mul_i32: 2024 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); 2025 break; 2026 2027 case INDEX_op_div_i64: 2028 case INDEX_op_div_i32: 2029 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); 2030 break; 2031 case INDEX_op_divu_i64: 2032 case INDEX_op_divu_i32: 2033 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); 2034 break; 2035 2036 case INDEX_op_rem_i64: 2037 case INDEX_op_rem_i32: 2038 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2); 2039 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2040 break; 2041 case INDEX_op_remu_i64: 2042 case INDEX_op_remu_i32: 2043 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2); 2044 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2045 break; 2046 2047 case INDEX_op_shl_i64: 2048 case INDEX_op_shl_i32: 2049 if (c2) { 2050 tcg_out_shl(s, ext, a0, a1, a2); 2051 } else { 2052 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); 2053 } 2054 break; 2055 2056 case INDEX_op_shr_i64: 2057 case INDEX_op_shr_i32: 2058 if (c2) { 2059 tcg_out_shr(s, ext, a0, a1, a2); 2060 } else { 2061 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); 2062 } 2063 break; 2064 2065 case INDEX_op_sar_i64: 2066 case INDEX_op_sar_i32: 2067 if (c2) { 2068 tcg_out_sar(s, ext, a0, a1, a2); 2069 } else { 2070 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); 2071 } 2072 break; 2073 2074 case INDEX_op_rotr_i64: 2075 case INDEX_op_rotr_i32: 2076 if (c2) { 2077 tcg_out_rotr(s, ext, a0, a1, a2); 2078 } else { 2079 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); 2080 } 2081 break; 2082 2083 case INDEX_op_rotl_i64: 2084 case INDEX_op_rotl_i32: 2085 if (c2) { 2086 tcg_out_rotl(s, ext, a0, a1, a2); 2087 } else { 2088 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2); 2089 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP); 2090 } 2091 break; 2092 2093 case INDEX_op_clz_i64: 2094 case INDEX_op_clz_i32: 2095 tcg_out_cltz(s, ext, a0, a1, a2, c2, false); 2096 break; 2097 case INDEX_op_ctz_i64: 2098 case INDEX_op_ctz_i32: 2099 tcg_out_cltz(s, ext, a0, a1, a2, c2, true); 2100 break; 2101 2102 case INDEX_op_brcond_i32: 2103 a1 = (int32_t)a1; 2104 /* FALLTHRU */ 2105 case INDEX_op_brcond_i64: 2106 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); 2107 break; 2108 2109 case INDEX_op_setcond_i32: 2110 a2 = (int32_t)a2; 2111 /* FALLTHRU */ 2112 case INDEX_op_setcond_i64: 2113 tcg_out_cmp(s, ext, a1, a2, c2); 2114 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ 2115 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, 2116 TCG_REG_XZR, tcg_invert_cond(args[3])); 2117 break; 2118 2119 case INDEX_op_movcond_i32: 2120 a2 = (int32_t)a2; 2121 /* FALLTHRU */ 2122 case INDEX_op_movcond_i64: 2123 tcg_out_cmp(s, ext, a1, a2, c2); 2124 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); 2125 break; 2126 2127 case INDEX_op_qemu_ld_i32: 2128 case INDEX_op_qemu_ld_i64: 2129 tcg_out_qemu_ld(s, a0, a1, a2, ext); 2130 break; 2131 case INDEX_op_qemu_st_i32: 2132 case INDEX_op_qemu_st_i64: 2133 tcg_out_qemu_st(s, REG0(0), a1, a2); 2134 break; 2135 2136 case INDEX_op_bswap64_i64: 2137 tcg_out_rev64(s, a0, a1); 2138 break; 2139 case INDEX_op_bswap32_i64: 2140 case INDEX_op_bswap32_i32: 2141 tcg_out_rev32(s, a0, a1); 2142 break; 2143 case INDEX_op_bswap16_i64: 2144 case INDEX_op_bswap16_i32: 2145 tcg_out_rev16(s, a0, a1); 2146 break; 2147 2148 case INDEX_op_ext8s_i64: 2149 case INDEX_op_ext8s_i32: 2150 tcg_out_sxt(s, ext, MO_8, a0, a1); 2151 break; 2152 case INDEX_op_ext16s_i64: 2153 case INDEX_op_ext16s_i32: 2154 tcg_out_sxt(s, ext, MO_16, a0, a1); 2155 break; 2156 case INDEX_op_ext_i32_i64: 2157 case INDEX_op_ext32s_i64: 2158 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1); 2159 break; 2160 case INDEX_op_ext8u_i64: 2161 case INDEX_op_ext8u_i32: 2162 tcg_out_uxt(s, MO_8, a0, a1); 2163 break; 2164 case INDEX_op_ext16u_i64: 2165 case INDEX_op_ext16u_i32: 2166 tcg_out_uxt(s, MO_16, a0, a1); 2167 break; 2168 case INDEX_op_extu_i32_i64: 2169 case INDEX_op_ext32u_i64: 2170 tcg_out_movr(s, TCG_TYPE_I32, a0, a1); 2171 break; 2172 2173 case INDEX_op_deposit_i64: 2174 case INDEX_op_deposit_i32: 2175 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]); 2176 break; 2177 2178 case INDEX_op_extract_i64: 2179 case INDEX_op_extract_i32: 2180 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2181 break; 2182 2183 case INDEX_op_sextract_i64: 2184 case INDEX_op_sextract_i32: 2185 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2186 break; 2187 2188 case INDEX_op_extract2_i64: 2189 case INDEX_op_extract2_i32: 2190 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]); 2191 break; 2192 2193 case INDEX_op_add2_i32: 2194 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2195 (int32_t)args[4], args[5], const_args[4], 2196 const_args[5], false); 2197 break; 2198 case INDEX_op_add2_i64: 2199 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2200 args[5], const_args[4], const_args[5], false); 2201 break; 2202 case INDEX_op_sub2_i32: 2203 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2204 (int32_t)args[4], args[5], const_args[4], 2205 const_args[5], true); 2206 break; 2207 case INDEX_op_sub2_i64: 2208 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2209 args[5], const_args[4], const_args[5], true); 2210 break; 2211 2212 case INDEX_op_muluh_i64: 2213 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); 2214 break; 2215 case INDEX_op_mulsh_i64: 2216 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); 2217 break; 2218 2219 case INDEX_op_mb: 2220 tcg_out_mb(s, a0); 2221 break; 2222 2223 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 2224 case INDEX_op_mov_i64: 2225 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 2226 default: 2227 g_assert_not_reached(); 2228 } 2229 2230#undef REG0 2231} 2232 2233static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 2234 unsigned vecl, unsigned vece, 2235 const TCGArg *args, const int *const_args) 2236{ 2237 static const AArch64Insn cmp_insn[16] = { 2238 [TCG_COND_EQ] = I3616_CMEQ, 2239 [TCG_COND_GT] = I3616_CMGT, 2240 [TCG_COND_GE] = I3616_CMGE, 2241 [TCG_COND_GTU] = I3616_CMHI, 2242 [TCG_COND_GEU] = I3616_CMHS, 2243 }; 2244 static const AArch64Insn cmp0_insn[16] = { 2245 [TCG_COND_EQ] = I3617_CMEQ0, 2246 [TCG_COND_GT] = I3617_CMGT0, 2247 [TCG_COND_GE] = I3617_CMGE0, 2248 [TCG_COND_LT] = I3617_CMLT0, 2249 [TCG_COND_LE] = I3617_CMLE0, 2250 }; 2251 2252 TCGType type = vecl + TCG_TYPE_V64; 2253 unsigned is_q = vecl; 2254 TCGArg a0, a1, a2, a3; 2255 int cmode, imm8; 2256 2257 a0 = args[0]; 2258 a1 = args[1]; 2259 a2 = args[2]; 2260 2261 switch (opc) { 2262 case INDEX_op_ld_vec: 2263 tcg_out_ld(s, type, a0, a1, a2); 2264 break; 2265 case INDEX_op_st_vec: 2266 tcg_out_st(s, type, a0, a1, a2); 2267 break; 2268 case INDEX_op_dupm_vec: 2269 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 2270 break; 2271 case INDEX_op_add_vec: 2272 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2); 2273 break; 2274 case INDEX_op_sub_vec: 2275 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2); 2276 break; 2277 case INDEX_op_mul_vec: 2278 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2); 2279 break; 2280 case INDEX_op_neg_vec: 2281 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1); 2282 break; 2283 case INDEX_op_abs_vec: 2284 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1); 2285 break; 2286 case INDEX_op_and_vec: 2287 if (const_args[2]) { 2288 is_shimm1632(~a2, &cmode, &imm8); 2289 if (a0 == a1) { 2290 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2291 return; 2292 } 2293 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2294 a2 = a0; 2295 } 2296 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2); 2297 break; 2298 case INDEX_op_or_vec: 2299 if (const_args[2]) { 2300 is_shimm1632(a2, &cmode, &imm8); 2301 if (a0 == a1) { 2302 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2303 return; 2304 } 2305 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2306 a2 = a0; 2307 } 2308 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2); 2309 break; 2310 case INDEX_op_andc_vec: 2311 if (const_args[2]) { 2312 is_shimm1632(a2, &cmode, &imm8); 2313 if (a0 == a1) { 2314 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2315 return; 2316 } 2317 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2318 a2 = a0; 2319 } 2320 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2); 2321 break; 2322 case INDEX_op_orc_vec: 2323 if (const_args[2]) { 2324 is_shimm1632(~a2, &cmode, &imm8); 2325 if (a0 == a1) { 2326 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2327 return; 2328 } 2329 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2330 a2 = a0; 2331 } 2332 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2); 2333 break; 2334 case INDEX_op_xor_vec: 2335 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2); 2336 break; 2337 case INDEX_op_ssadd_vec: 2338 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2); 2339 break; 2340 case INDEX_op_sssub_vec: 2341 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2); 2342 break; 2343 case INDEX_op_usadd_vec: 2344 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2); 2345 break; 2346 case INDEX_op_ussub_vec: 2347 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2); 2348 break; 2349 case INDEX_op_smax_vec: 2350 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2); 2351 break; 2352 case INDEX_op_smin_vec: 2353 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2); 2354 break; 2355 case INDEX_op_umax_vec: 2356 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2); 2357 break; 2358 case INDEX_op_umin_vec: 2359 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2); 2360 break; 2361 case INDEX_op_not_vec: 2362 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1); 2363 break; 2364 case INDEX_op_shli_vec: 2365 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece)); 2366 break; 2367 case INDEX_op_shri_vec: 2368 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2); 2369 break; 2370 case INDEX_op_sari_vec: 2371 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); 2372 break; 2373 case INDEX_op_aa64_sli_vec: 2374 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece)); 2375 break; 2376 case INDEX_op_shlv_vec: 2377 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); 2378 break; 2379 case INDEX_op_aa64_sshl_vec: 2380 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2); 2381 break; 2382 case INDEX_op_cmp_vec: 2383 { 2384 TCGCond cond = args[3]; 2385 AArch64Insn insn; 2386 2387 if (cond == TCG_COND_NE) { 2388 if (const_args[2]) { 2389 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1); 2390 } else { 2391 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2); 2392 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); 2393 } 2394 } else { 2395 if (const_args[2]) { 2396 insn = cmp0_insn[cond]; 2397 if (insn) { 2398 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1); 2399 break; 2400 } 2401 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0); 2402 a2 = TCG_VEC_TMP; 2403 } 2404 insn = cmp_insn[cond]; 2405 if (insn == 0) { 2406 TCGArg t; 2407 t = a1, a1 = a2, a2 = t; 2408 cond = tcg_swap_cond(cond); 2409 insn = cmp_insn[cond]; 2410 tcg_debug_assert(insn != 0); 2411 } 2412 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2); 2413 } 2414 } 2415 break; 2416 2417 case INDEX_op_bitsel_vec: 2418 a3 = args[3]; 2419 if (a0 == a3) { 2420 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1); 2421 } else if (a0 == a2) { 2422 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1); 2423 } else { 2424 if (a0 != a1) { 2425 tcg_out_mov(s, type, a0, a1); 2426 } 2427 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3); 2428 } 2429 break; 2430 2431 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 2432 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 2433 default: 2434 g_assert_not_reached(); 2435 } 2436} 2437 2438int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 2439{ 2440 switch (opc) { 2441 case INDEX_op_add_vec: 2442 case INDEX_op_sub_vec: 2443 case INDEX_op_and_vec: 2444 case INDEX_op_or_vec: 2445 case INDEX_op_xor_vec: 2446 case INDEX_op_andc_vec: 2447 case INDEX_op_orc_vec: 2448 case INDEX_op_neg_vec: 2449 case INDEX_op_abs_vec: 2450 case INDEX_op_not_vec: 2451 case INDEX_op_cmp_vec: 2452 case INDEX_op_shli_vec: 2453 case INDEX_op_shri_vec: 2454 case INDEX_op_sari_vec: 2455 case INDEX_op_ssadd_vec: 2456 case INDEX_op_sssub_vec: 2457 case INDEX_op_usadd_vec: 2458 case INDEX_op_ussub_vec: 2459 case INDEX_op_shlv_vec: 2460 case INDEX_op_bitsel_vec: 2461 return 1; 2462 case INDEX_op_rotli_vec: 2463 case INDEX_op_shrv_vec: 2464 case INDEX_op_sarv_vec: 2465 case INDEX_op_rotlv_vec: 2466 case INDEX_op_rotrv_vec: 2467 return -1; 2468 case INDEX_op_mul_vec: 2469 case INDEX_op_smax_vec: 2470 case INDEX_op_smin_vec: 2471 case INDEX_op_umax_vec: 2472 case INDEX_op_umin_vec: 2473 return vece < MO_64; 2474 2475 default: 2476 return 0; 2477 } 2478} 2479 2480void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 2481 TCGArg a0, ...) 2482{ 2483 va_list va; 2484 TCGv_vec v0, v1, v2, t1, t2, c1; 2485 TCGArg a2; 2486 2487 va_start(va, a0); 2488 v0 = temp_tcgv_vec(arg_temp(a0)); 2489 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 2490 a2 = va_arg(va, TCGArg); 2491 v2 = temp_tcgv_vec(arg_temp(a2)); 2492 2493 switch (opc) { 2494 case INDEX_op_rotli_vec: 2495 t1 = tcg_temp_new_vec(type); 2496 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1)); 2497 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece, 2498 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2); 2499 tcg_temp_free_vec(t1); 2500 break; 2501 2502 case INDEX_op_shrv_vec: 2503 case INDEX_op_sarv_vec: 2504 /* Right shifts are negative left shifts for AArch64. */ 2505 t1 = tcg_temp_new_vec(type); 2506 tcg_gen_neg_vec(vece, t1, v2); 2507 opc = (opc == INDEX_op_shrv_vec 2508 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec); 2509 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0), 2510 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2511 tcg_temp_free_vec(t1); 2512 break; 2513 2514 case INDEX_op_rotlv_vec: 2515 t1 = tcg_temp_new_vec(type); 2516 c1 = tcg_constant_vec(type, vece, 8 << vece); 2517 tcg_gen_sub_vec(vece, t1, v2, c1); 2518 /* Right shifts are negative left shifts for AArch64. */ 2519 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2520 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2521 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0), 2522 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 2523 tcg_gen_or_vec(vece, v0, v0, t1); 2524 tcg_temp_free_vec(t1); 2525 break; 2526 2527 case INDEX_op_rotrv_vec: 2528 t1 = tcg_temp_new_vec(type); 2529 t2 = tcg_temp_new_vec(type); 2530 c1 = tcg_constant_vec(type, vece, 8 << vece); 2531 tcg_gen_neg_vec(vece, t1, v2); 2532 tcg_gen_sub_vec(vece, t2, c1, v2); 2533 /* Right shifts are negative left shifts for AArch64. */ 2534 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2535 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2536 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2), 2537 tcgv_vec_arg(v1), tcgv_vec_arg(t2)); 2538 tcg_gen_or_vec(vece, v0, t1, t2); 2539 tcg_temp_free_vec(t1); 2540 tcg_temp_free_vec(t2); 2541 break; 2542 2543 default: 2544 g_assert_not_reached(); 2545 } 2546 2547 va_end(va); 2548} 2549 2550static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 2551{ 2552 switch (op) { 2553 case INDEX_op_goto_ptr: 2554 return C_O0_I1(r); 2555 2556 case INDEX_op_ld8u_i32: 2557 case INDEX_op_ld8s_i32: 2558 case INDEX_op_ld16u_i32: 2559 case INDEX_op_ld16s_i32: 2560 case INDEX_op_ld_i32: 2561 case INDEX_op_ld8u_i64: 2562 case INDEX_op_ld8s_i64: 2563 case INDEX_op_ld16u_i64: 2564 case INDEX_op_ld16s_i64: 2565 case INDEX_op_ld32u_i64: 2566 case INDEX_op_ld32s_i64: 2567 case INDEX_op_ld_i64: 2568 case INDEX_op_neg_i32: 2569 case INDEX_op_neg_i64: 2570 case INDEX_op_not_i32: 2571 case INDEX_op_not_i64: 2572 case INDEX_op_bswap16_i32: 2573 case INDEX_op_bswap32_i32: 2574 case INDEX_op_bswap16_i64: 2575 case INDEX_op_bswap32_i64: 2576 case INDEX_op_bswap64_i64: 2577 case INDEX_op_ext8s_i32: 2578 case INDEX_op_ext16s_i32: 2579 case INDEX_op_ext8u_i32: 2580 case INDEX_op_ext16u_i32: 2581 case INDEX_op_ext8s_i64: 2582 case INDEX_op_ext16s_i64: 2583 case INDEX_op_ext32s_i64: 2584 case INDEX_op_ext8u_i64: 2585 case INDEX_op_ext16u_i64: 2586 case INDEX_op_ext32u_i64: 2587 case INDEX_op_ext_i32_i64: 2588 case INDEX_op_extu_i32_i64: 2589 case INDEX_op_extract_i32: 2590 case INDEX_op_extract_i64: 2591 case INDEX_op_sextract_i32: 2592 case INDEX_op_sextract_i64: 2593 return C_O1_I1(r, r); 2594 2595 case INDEX_op_st8_i32: 2596 case INDEX_op_st16_i32: 2597 case INDEX_op_st_i32: 2598 case INDEX_op_st8_i64: 2599 case INDEX_op_st16_i64: 2600 case INDEX_op_st32_i64: 2601 case INDEX_op_st_i64: 2602 return C_O0_I2(rZ, r); 2603 2604 case INDEX_op_add_i32: 2605 case INDEX_op_add_i64: 2606 case INDEX_op_sub_i32: 2607 case INDEX_op_sub_i64: 2608 case INDEX_op_setcond_i32: 2609 case INDEX_op_setcond_i64: 2610 return C_O1_I2(r, r, rA); 2611 2612 case INDEX_op_mul_i32: 2613 case INDEX_op_mul_i64: 2614 case INDEX_op_div_i32: 2615 case INDEX_op_div_i64: 2616 case INDEX_op_divu_i32: 2617 case INDEX_op_divu_i64: 2618 case INDEX_op_rem_i32: 2619 case INDEX_op_rem_i64: 2620 case INDEX_op_remu_i32: 2621 case INDEX_op_remu_i64: 2622 case INDEX_op_muluh_i64: 2623 case INDEX_op_mulsh_i64: 2624 return C_O1_I2(r, r, r); 2625 2626 case INDEX_op_and_i32: 2627 case INDEX_op_and_i64: 2628 case INDEX_op_or_i32: 2629 case INDEX_op_or_i64: 2630 case INDEX_op_xor_i32: 2631 case INDEX_op_xor_i64: 2632 case INDEX_op_andc_i32: 2633 case INDEX_op_andc_i64: 2634 case INDEX_op_orc_i32: 2635 case INDEX_op_orc_i64: 2636 case INDEX_op_eqv_i32: 2637 case INDEX_op_eqv_i64: 2638 return C_O1_I2(r, r, rL); 2639 2640 case INDEX_op_shl_i32: 2641 case INDEX_op_shr_i32: 2642 case INDEX_op_sar_i32: 2643 case INDEX_op_rotl_i32: 2644 case INDEX_op_rotr_i32: 2645 case INDEX_op_shl_i64: 2646 case INDEX_op_shr_i64: 2647 case INDEX_op_sar_i64: 2648 case INDEX_op_rotl_i64: 2649 case INDEX_op_rotr_i64: 2650 return C_O1_I2(r, r, ri); 2651 2652 case INDEX_op_clz_i32: 2653 case INDEX_op_ctz_i32: 2654 case INDEX_op_clz_i64: 2655 case INDEX_op_ctz_i64: 2656 return C_O1_I2(r, r, rAL); 2657 2658 case INDEX_op_brcond_i32: 2659 case INDEX_op_brcond_i64: 2660 return C_O0_I2(r, rA); 2661 2662 case INDEX_op_movcond_i32: 2663 case INDEX_op_movcond_i64: 2664 return C_O1_I4(r, r, rA, rZ, rZ); 2665 2666 case INDEX_op_qemu_ld_i32: 2667 case INDEX_op_qemu_ld_i64: 2668 return C_O1_I1(r, l); 2669 case INDEX_op_qemu_st_i32: 2670 case INDEX_op_qemu_st_i64: 2671 return C_O0_I2(lZ, l); 2672 2673 case INDEX_op_deposit_i32: 2674 case INDEX_op_deposit_i64: 2675 return C_O1_I2(r, 0, rZ); 2676 2677 case INDEX_op_extract2_i32: 2678 case INDEX_op_extract2_i64: 2679 return C_O1_I2(r, rZ, rZ); 2680 2681 case INDEX_op_add2_i32: 2682 case INDEX_op_add2_i64: 2683 case INDEX_op_sub2_i32: 2684 case INDEX_op_sub2_i64: 2685 return C_O2_I4(r, r, rZ, rZ, rA, rMZ); 2686 2687 case INDEX_op_add_vec: 2688 case INDEX_op_sub_vec: 2689 case INDEX_op_mul_vec: 2690 case INDEX_op_xor_vec: 2691 case INDEX_op_ssadd_vec: 2692 case INDEX_op_sssub_vec: 2693 case INDEX_op_usadd_vec: 2694 case INDEX_op_ussub_vec: 2695 case INDEX_op_smax_vec: 2696 case INDEX_op_smin_vec: 2697 case INDEX_op_umax_vec: 2698 case INDEX_op_umin_vec: 2699 case INDEX_op_shlv_vec: 2700 case INDEX_op_shrv_vec: 2701 case INDEX_op_sarv_vec: 2702 case INDEX_op_aa64_sshl_vec: 2703 return C_O1_I2(w, w, w); 2704 case INDEX_op_not_vec: 2705 case INDEX_op_neg_vec: 2706 case INDEX_op_abs_vec: 2707 case INDEX_op_shli_vec: 2708 case INDEX_op_shri_vec: 2709 case INDEX_op_sari_vec: 2710 return C_O1_I1(w, w); 2711 case INDEX_op_ld_vec: 2712 case INDEX_op_dupm_vec: 2713 return C_O1_I1(w, r); 2714 case INDEX_op_st_vec: 2715 return C_O0_I2(w, r); 2716 case INDEX_op_dup_vec: 2717 return C_O1_I1(w, wr); 2718 case INDEX_op_or_vec: 2719 case INDEX_op_andc_vec: 2720 return C_O1_I2(w, w, wO); 2721 case INDEX_op_and_vec: 2722 case INDEX_op_orc_vec: 2723 return C_O1_I2(w, w, wN); 2724 case INDEX_op_cmp_vec: 2725 return C_O1_I2(w, w, wZ); 2726 case INDEX_op_bitsel_vec: 2727 return C_O1_I3(w, w, w, w); 2728 case INDEX_op_aa64_sli_vec: 2729 return C_O1_I2(w, 0, w); 2730 2731 default: 2732 g_assert_not_reached(); 2733 } 2734} 2735 2736static void tcg_target_init(TCGContext *s) 2737{ 2738 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu; 2739 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu; 2740 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 2741 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 2742 2743 tcg_target_call_clobber_regs = -1ull; 2744 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19); 2745 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20); 2746 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21); 2747 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22); 2748 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23); 2749 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24); 2750 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25); 2751 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26); 2752 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27); 2753 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28); 2754 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29); 2755 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 2756 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 2757 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 2758 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 2759 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 2760 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 2761 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 2762 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 2763 2764 s->reserved_regs = 0; 2765 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 2766 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); 2767 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); 2768 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ 2769 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP); 2770} 2771 2772/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ 2773#define PUSH_SIZE ((30 - 19 + 1) * 8) 2774 2775#define FRAME_SIZE \ 2776 ((PUSH_SIZE \ 2777 + TCG_STATIC_CALL_ARGS_SIZE \ 2778 + CPU_TEMP_BUF_NLONGS * sizeof(long) \ 2779 + TCG_TARGET_STACK_ALIGN - 1) \ 2780 & ~(TCG_TARGET_STACK_ALIGN - 1)) 2781 2782/* We're expecting a 2 byte uleb128 encoded value. */ 2783QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 2784 2785/* We're expecting to use a single ADDI insn. */ 2786QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); 2787 2788static void tcg_target_qemu_prologue(TCGContext *s) 2789{ 2790 TCGReg r; 2791 2792 /* Push (FP, LR) and allocate space for all saved registers. */ 2793 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, 2794 TCG_REG_SP, -PUSH_SIZE, 1, 1); 2795 2796 /* Set up frame pointer for canonical unwinding. */ 2797 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); 2798 2799 /* Store callee-preserved regs x19..x28. */ 2800 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 2801 int ofs = (r - TCG_REG_X19 + 2) * 8; 2802 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 2803 } 2804 2805 /* Make stack space for TCG locals. */ 2806 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 2807 FRAME_SIZE - PUSH_SIZE); 2808 2809 /* Inform TCG about how to find TCG locals with register, offset, size. */ 2810 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, 2811 CPU_TEMP_BUF_NLONGS * sizeof(long)); 2812 2813#if !defined(CONFIG_SOFTMMU) 2814 if (USE_GUEST_BASE) { 2815 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); 2816 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); 2817 } 2818#endif 2819 2820 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 2821 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); 2822 2823 /* 2824 * Return path for goto_ptr. Set return value to 0, a-la exit_tb, 2825 * and fall through to the rest of the epilogue. 2826 */ 2827 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 2828 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0); 2829 2830 /* TB epilogue */ 2831 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); 2832 2833 /* Remove TCG locals stack space. */ 2834 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 2835 FRAME_SIZE - PUSH_SIZE); 2836 2837 /* Restore registers x19..x28. */ 2838 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 2839 int ofs = (r - TCG_REG_X19 + 2) * 8; 2840 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 2841 } 2842 2843 /* Pop (FP, LR), restore SP to previous frame. */ 2844 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, 2845 TCG_REG_SP, PUSH_SIZE, 0, 1); 2846 tcg_out_insn(s, 3207, RET, TCG_REG_LR); 2847} 2848 2849static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 2850{ 2851 int i; 2852 for (i = 0; i < count; ++i) { 2853 p[i] = NOP; 2854 } 2855} 2856 2857typedef struct { 2858 DebugFrameHeader h; 2859 uint8_t fde_def_cfa[4]; 2860 uint8_t fde_reg_ofs[24]; 2861} DebugFrame; 2862 2863#define ELF_HOST_MACHINE EM_AARCH64 2864 2865static const DebugFrame debug_frame = { 2866 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 2867 .h.cie.id = -1, 2868 .h.cie.version = 1, 2869 .h.cie.code_align = 1, 2870 .h.cie.data_align = 0x78, /* sleb128 -8 */ 2871 .h.cie.return_column = TCG_REG_LR, 2872 2873 /* Total FDE size does not include the "len" member. */ 2874 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 2875 2876 .fde_def_cfa = { 2877 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 2878 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 2879 (FRAME_SIZE >> 7) 2880 }, 2881 .fde_reg_ofs = { 2882 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ 2883 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ 2884 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ 2885 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ 2886 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ 2887 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ 2888 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ 2889 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ 2890 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ 2891 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ 2892 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ 2893 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ 2894 } 2895}; 2896 2897void tcg_register_jit(const void *buf, size_t buf_size) 2898{ 2899 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 2900} 2901