1/* 2 * Initial TCG Implementation for aarch64 3 * 4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH 5 * Written by Claudio Fontana 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or 8 * (at your option) any later version. 9 * 10 * See the COPYING file in the top-level directory for details. 11 */ 12 13#include "../tcg-ldst.c.inc" 14#include "../tcg-pool.c.inc" 15#include "qemu/bitops.h" 16#ifdef __linux__ 17#include <asm/hwcap.h> 18#endif 19#ifdef CONFIG_DARWIN 20#include <sys/sysctl.h> 21#endif 22 23/* We're going to re-use TCGType in setting of the SF bit, which controls 24 the size of the operation performed. If we know the values match, it 25 makes things much cleaner. */ 26QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); 27 28#ifdef CONFIG_DEBUG_TCG 29static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 30 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 31 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 32 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 33 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp", 34 35 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 36 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 37 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 38 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31", 39}; 40#endif /* CONFIG_DEBUG_TCG */ 41 42static const int tcg_target_reg_alloc_order[] = { 43 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, 44 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, 45 TCG_REG_X28, /* we will reserve this for guest_base if configured */ 46 47 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, 48 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, 49 TCG_REG_X16, TCG_REG_X17, 50 51 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 52 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, 53 54 /* X18 reserved by system */ 55 /* X19 reserved for AREG0 */ 56 /* X29 reserved as fp */ 57 /* X30 reserved as temporary */ 58 59 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, 60 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, 61 /* V8 - V15 are call-saved, and skipped. */ 62 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, 63 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, 64 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, 65 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, 66}; 67 68static const int tcg_target_call_iarg_regs[8] = { 69 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 70 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 71}; 72 73static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) 74{ 75 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); 76 tcg_debug_assert(slot >= 0 && slot <= 1); 77 return TCG_REG_X0 + slot; 78} 79 80bool have_lse; 81bool have_lse2; 82 83#define TCG_REG_TMP TCG_REG_X30 84#define TCG_VEC_TMP TCG_REG_V31 85 86#ifndef CONFIG_SOFTMMU 87/* Note that XZR cannot be encoded in the address base register slot, 88 as that actaully encodes SP. So if we need to zero-extend the guest 89 address, via the address index register slot, we need to load even 90 a zero guest base into a register. */ 91#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32) 92#define TCG_REG_GUEST_BASE TCG_REG_X28 93#endif 94 95static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 96{ 97 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 98 ptrdiff_t offset = target - src_rx; 99 100 if (offset == sextract64(offset, 0, 26)) { 101 /* read instruction, mask away previous PC_REL26 parameter contents, 102 set the proper offset, then write back the instruction. */ 103 *src_rw = deposit32(*src_rw, 0, 26, offset); 104 return true; 105 } 106 return false; 107} 108 109static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 110{ 111 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 112 ptrdiff_t offset = target - src_rx; 113 114 if (offset == sextract64(offset, 0, 19)) { 115 *src_rw = deposit32(*src_rw, 5, 19, offset); 116 return true; 117 } 118 return false; 119} 120 121static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 122 intptr_t value, intptr_t addend) 123{ 124 tcg_debug_assert(addend == 0); 125 switch (type) { 126 case R_AARCH64_JUMP26: 127 case R_AARCH64_CALL26: 128 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value); 129 case R_AARCH64_CONDBR19: 130 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value); 131 default: 132 g_assert_not_reached(); 133 } 134} 135 136#define TCG_CT_CONST_AIMM 0x100 137#define TCG_CT_CONST_LIMM 0x200 138#define TCG_CT_CONST_ZERO 0x400 139#define TCG_CT_CONST_MONE 0x800 140#define TCG_CT_CONST_ORRI 0x1000 141#define TCG_CT_CONST_ANDI 0x2000 142 143#define ALL_GENERAL_REGS 0xffffffffu 144#define ALL_VECTOR_REGS 0xffffffff00000000ull 145 146#ifdef CONFIG_SOFTMMU 147#define ALL_QLDST_REGS \ 148 (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \ 149 (1 << TCG_REG_X2) | (1 << TCG_REG_X3))) 150#else 151#define ALL_QLDST_REGS ALL_GENERAL_REGS 152#endif 153 154/* Match a constant valid for addition (12-bit, optionally shifted). */ 155static inline bool is_aimm(uint64_t val) 156{ 157 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; 158} 159 160/* Match a constant valid for logical operations. */ 161static inline bool is_limm(uint64_t val) 162{ 163 /* Taking a simplified view of the logical immediates for now, ignoring 164 the replication that can happen across the field. Match bit patterns 165 of the forms 166 0....01....1 167 0..01..10..0 168 and their inverses. */ 169 170 /* Make things easier below, by testing the form with msb clear. */ 171 if ((int64_t)val < 0) { 172 val = ~val; 173 } 174 if (val == 0) { 175 return false; 176 } 177 val += val & -val; 178 return (val & (val - 1)) == 0; 179} 180 181/* Return true if v16 is a valid 16-bit shifted immediate. */ 182static bool is_shimm16(uint16_t v16, int *cmode, int *imm8) 183{ 184 if (v16 == (v16 & 0xff)) { 185 *cmode = 0x8; 186 *imm8 = v16 & 0xff; 187 return true; 188 } else if (v16 == (v16 & 0xff00)) { 189 *cmode = 0xa; 190 *imm8 = v16 >> 8; 191 return true; 192 } 193 return false; 194} 195 196/* Return true if v32 is a valid 32-bit shifted immediate. */ 197static bool is_shimm32(uint32_t v32, int *cmode, int *imm8) 198{ 199 if (v32 == (v32 & 0xff)) { 200 *cmode = 0x0; 201 *imm8 = v32 & 0xff; 202 return true; 203 } else if (v32 == (v32 & 0xff00)) { 204 *cmode = 0x2; 205 *imm8 = (v32 >> 8) & 0xff; 206 return true; 207 } else if (v32 == (v32 & 0xff0000)) { 208 *cmode = 0x4; 209 *imm8 = (v32 >> 16) & 0xff; 210 return true; 211 } else if (v32 == (v32 & 0xff000000)) { 212 *cmode = 0x6; 213 *imm8 = v32 >> 24; 214 return true; 215 } 216 return false; 217} 218 219/* Return true if v32 is a valid 32-bit shifting ones immediate. */ 220static bool is_soimm32(uint32_t v32, int *cmode, int *imm8) 221{ 222 if ((v32 & 0xffff00ff) == 0xff) { 223 *cmode = 0xc; 224 *imm8 = (v32 >> 8) & 0xff; 225 return true; 226 } else if ((v32 & 0xff00ffff) == 0xffff) { 227 *cmode = 0xd; 228 *imm8 = (v32 >> 16) & 0xff; 229 return true; 230 } 231 return false; 232} 233 234/* Return true if v32 is a valid float32 immediate. */ 235static bool is_fimm32(uint32_t v32, int *cmode, int *imm8) 236{ 237 if (extract32(v32, 0, 19) == 0 238 && (extract32(v32, 25, 6) == 0x20 239 || extract32(v32, 25, 6) == 0x1f)) { 240 *cmode = 0xf; 241 *imm8 = (extract32(v32, 31, 1) << 7) 242 | (extract32(v32, 25, 1) << 6) 243 | extract32(v32, 19, 6); 244 return true; 245 } 246 return false; 247} 248 249/* Return true if v64 is a valid float64 immediate. */ 250static bool is_fimm64(uint64_t v64, int *cmode, int *imm8) 251{ 252 if (extract64(v64, 0, 48) == 0 253 && (extract64(v64, 54, 9) == 0x100 254 || extract64(v64, 54, 9) == 0x0ff)) { 255 *cmode = 0xf; 256 *imm8 = (extract64(v64, 63, 1) << 7) 257 | (extract64(v64, 54, 1) << 6) 258 | extract64(v64, 48, 6); 259 return true; 260 } 261 return false; 262} 263 264/* 265 * Return non-zero if v32 can be formed by MOVI+ORR. 266 * Place the parameters for MOVI in (cmode, imm8). 267 * Return the cmode for ORR; the imm8 can be had via extraction from v32. 268 */ 269static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8) 270{ 271 int i; 272 273 for (i = 6; i > 0; i -= 2) { 274 /* Mask out one byte we can add with ORR. */ 275 uint32_t tmp = v32 & ~(0xffu << (i * 4)); 276 if (is_shimm32(tmp, cmode, imm8) || 277 is_soimm32(tmp, cmode, imm8)) { 278 break; 279 } 280 } 281 return i; 282} 283 284/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */ 285static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8) 286{ 287 if (v32 == deposit32(v32, 16, 16, v32)) { 288 return is_shimm16(v32, cmode, imm8); 289 } else { 290 return is_shimm32(v32, cmode, imm8); 291 } 292} 293 294static bool tcg_target_const_match(int64_t val, TCGType type, int ct) 295{ 296 if (ct & TCG_CT_CONST) { 297 return 1; 298 } 299 if (type == TCG_TYPE_I32) { 300 val = (int32_t)val; 301 } 302 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { 303 return 1; 304 } 305 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { 306 return 1; 307 } 308 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 309 return 1; 310 } 311 if ((ct & TCG_CT_CONST_MONE) && val == -1) { 312 return 1; 313 } 314 315 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) { 316 case 0: 317 break; 318 case TCG_CT_CONST_ANDI: 319 val = ~val; 320 /* fallthru */ 321 case TCG_CT_CONST_ORRI: 322 if (val == deposit64(val, 32, 32, val)) { 323 int cmode, imm8; 324 return is_shimm1632(val, &cmode, &imm8); 325 } 326 break; 327 default: 328 /* Both bits should not be set for the same insn. */ 329 g_assert_not_reached(); 330 } 331 332 return 0; 333} 334 335enum aarch64_cond_code { 336 COND_EQ = 0x0, 337 COND_NE = 0x1, 338 COND_CS = 0x2, /* Unsigned greater or equal */ 339 COND_HS = COND_CS, /* ALIAS greater or equal */ 340 COND_CC = 0x3, /* Unsigned less than */ 341 COND_LO = COND_CC, /* ALIAS Lower */ 342 COND_MI = 0x4, /* Negative */ 343 COND_PL = 0x5, /* Zero or greater */ 344 COND_VS = 0x6, /* Overflow */ 345 COND_VC = 0x7, /* No overflow */ 346 COND_HI = 0x8, /* Unsigned greater than */ 347 COND_LS = 0x9, /* Unsigned less or equal */ 348 COND_GE = 0xa, 349 COND_LT = 0xb, 350 COND_GT = 0xc, 351 COND_LE = 0xd, 352 COND_AL = 0xe, 353 COND_NV = 0xf, /* behaves like COND_AL here */ 354}; 355 356static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { 357 [TCG_COND_EQ] = COND_EQ, 358 [TCG_COND_NE] = COND_NE, 359 [TCG_COND_LT] = COND_LT, 360 [TCG_COND_GE] = COND_GE, 361 [TCG_COND_LE] = COND_LE, 362 [TCG_COND_GT] = COND_GT, 363 /* unsigned */ 364 [TCG_COND_LTU] = COND_LO, 365 [TCG_COND_GTU] = COND_HI, 366 [TCG_COND_GEU] = COND_HS, 367 [TCG_COND_LEU] = COND_LS, 368}; 369 370typedef enum { 371 LDST_ST = 0, /* store */ 372 LDST_LD = 1, /* load */ 373 LDST_LD_S_X = 2, /* load and sign-extend into Xt */ 374 LDST_LD_S_W = 3, /* load and sign-extend into Wt */ 375} AArch64LdstType; 376 377/* We encode the format of the insn into the beginning of the name, so that 378 we can have the preprocessor help "typecheck" the insn vs the output 379 function. Arm didn't provide us with nice names for the formats, so we 380 use the section number of the architecture reference manual in which the 381 instruction group is described. */ 382typedef enum { 383 /* Compare and branch (immediate). */ 384 I3201_CBZ = 0x34000000, 385 I3201_CBNZ = 0x35000000, 386 387 /* Conditional branch (immediate). */ 388 I3202_B_C = 0x54000000, 389 390 /* Unconditional branch (immediate). */ 391 I3206_B = 0x14000000, 392 I3206_BL = 0x94000000, 393 394 /* Unconditional branch (register). */ 395 I3207_BR = 0xd61f0000, 396 I3207_BLR = 0xd63f0000, 397 I3207_RET = 0xd65f0000, 398 399 /* AdvSIMD load/store single structure. */ 400 I3303_LD1R = 0x0d40c000, 401 402 /* Load literal for loading the address at pc-relative offset */ 403 I3305_LDR = 0x58000000, 404 I3305_LDR_v64 = 0x5c000000, 405 I3305_LDR_v128 = 0x9c000000, 406 407 /* Load/store register. Described here as 3.3.12, but the helper 408 that emits them can transform to 3.3.10 or 3.3.13. */ 409 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, 410 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, 411 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, 412 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, 413 414 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, 415 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, 416 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, 417 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, 418 419 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, 420 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, 421 422 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, 423 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, 424 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, 425 426 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30, 427 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30, 428 429 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30, 430 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30, 431 432 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30, 433 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30, 434 435 I3312_TO_I3310 = 0x00200800, 436 I3312_TO_I3313 = 0x01000000, 437 438 /* Load/store register pair instructions. */ 439 I3314_LDP = 0x28400000, 440 I3314_STP = 0x28000000, 441 442 /* Add/subtract immediate instructions. */ 443 I3401_ADDI = 0x11000000, 444 I3401_ADDSI = 0x31000000, 445 I3401_SUBI = 0x51000000, 446 I3401_SUBSI = 0x71000000, 447 448 /* Bitfield instructions. */ 449 I3402_BFM = 0x33000000, 450 I3402_SBFM = 0x13000000, 451 I3402_UBFM = 0x53000000, 452 453 /* Extract instruction. */ 454 I3403_EXTR = 0x13800000, 455 456 /* Logical immediate instructions. */ 457 I3404_ANDI = 0x12000000, 458 I3404_ORRI = 0x32000000, 459 I3404_EORI = 0x52000000, 460 I3404_ANDSI = 0x72000000, 461 462 /* Move wide immediate instructions. */ 463 I3405_MOVN = 0x12800000, 464 I3405_MOVZ = 0x52800000, 465 I3405_MOVK = 0x72800000, 466 467 /* PC relative addressing instructions. */ 468 I3406_ADR = 0x10000000, 469 I3406_ADRP = 0x90000000, 470 471 /* Add/subtract shifted register instructions (without a shift). */ 472 I3502_ADD = 0x0b000000, 473 I3502_ADDS = 0x2b000000, 474 I3502_SUB = 0x4b000000, 475 I3502_SUBS = 0x6b000000, 476 477 /* Add/subtract shifted register instructions (with a shift). */ 478 I3502S_ADD_LSL = I3502_ADD, 479 480 /* Add/subtract with carry instructions. */ 481 I3503_ADC = 0x1a000000, 482 I3503_SBC = 0x5a000000, 483 484 /* Conditional select instructions. */ 485 I3506_CSEL = 0x1a800000, 486 I3506_CSINC = 0x1a800400, 487 I3506_CSINV = 0x5a800000, 488 I3506_CSNEG = 0x5a800400, 489 490 /* Data-processing (1 source) instructions. */ 491 I3507_CLZ = 0x5ac01000, 492 I3507_RBIT = 0x5ac00000, 493 I3507_REV = 0x5ac00000, /* + size << 10 */ 494 495 /* Data-processing (2 source) instructions. */ 496 I3508_LSLV = 0x1ac02000, 497 I3508_LSRV = 0x1ac02400, 498 I3508_ASRV = 0x1ac02800, 499 I3508_RORV = 0x1ac02c00, 500 I3508_SMULH = 0x9b407c00, 501 I3508_UMULH = 0x9bc07c00, 502 I3508_UDIV = 0x1ac00800, 503 I3508_SDIV = 0x1ac00c00, 504 505 /* Data-processing (3 source) instructions. */ 506 I3509_MADD = 0x1b000000, 507 I3509_MSUB = 0x1b008000, 508 509 /* Logical shifted register instructions (without a shift). */ 510 I3510_AND = 0x0a000000, 511 I3510_BIC = 0x0a200000, 512 I3510_ORR = 0x2a000000, 513 I3510_ORN = 0x2a200000, 514 I3510_EOR = 0x4a000000, 515 I3510_EON = 0x4a200000, 516 I3510_ANDS = 0x6a000000, 517 518 /* Logical shifted register instructions (with a shift). */ 519 I3502S_AND_LSR = I3510_AND | (1 << 22), 520 521 /* AdvSIMD copy */ 522 I3605_DUP = 0x0e000400, 523 I3605_INS = 0x4e001c00, 524 I3605_UMOV = 0x0e003c00, 525 526 /* AdvSIMD modified immediate */ 527 I3606_MOVI = 0x0f000400, 528 I3606_MVNI = 0x2f000400, 529 I3606_BIC = 0x2f001400, 530 I3606_ORR = 0x0f001400, 531 532 /* AdvSIMD scalar shift by immediate */ 533 I3609_SSHR = 0x5f000400, 534 I3609_SSRA = 0x5f001400, 535 I3609_SHL = 0x5f005400, 536 I3609_USHR = 0x7f000400, 537 I3609_USRA = 0x7f001400, 538 I3609_SLI = 0x7f005400, 539 540 /* AdvSIMD scalar three same */ 541 I3611_SQADD = 0x5e200c00, 542 I3611_SQSUB = 0x5e202c00, 543 I3611_CMGT = 0x5e203400, 544 I3611_CMGE = 0x5e203c00, 545 I3611_SSHL = 0x5e204400, 546 I3611_ADD = 0x5e208400, 547 I3611_CMTST = 0x5e208c00, 548 I3611_UQADD = 0x7e200c00, 549 I3611_UQSUB = 0x7e202c00, 550 I3611_CMHI = 0x7e203400, 551 I3611_CMHS = 0x7e203c00, 552 I3611_USHL = 0x7e204400, 553 I3611_SUB = 0x7e208400, 554 I3611_CMEQ = 0x7e208c00, 555 556 /* AdvSIMD scalar two-reg misc */ 557 I3612_CMGT0 = 0x5e208800, 558 I3612_CMEQ0 = 0x5e209800, 559 I3612_CMLT0 = 0x5e20a800, 560 I3612_ABS = 0x5e20b800, 561 I3612_CMGE0 = 0x7e208800, 562 I3612_CMLE0 = 0x7e209800, 563 I3612_NEG = 0x7e20b800, 564 565 /* AdvSIMD shift by immediate */ 566 I3614_SSHR = 0x0f000400, 567 I3614_SSRA = 0x0f001400, 568 I3614_SHL = 0x0f005400, 569 I3614_SLI = 0x2f005400, 570 I3614_USHR = 0x2f000400, 571 I3614_USRA = 0x2f001400, 572 573 /* AdvSIMD three same. */ 574 I3616_ADD = 0x0e208400, 575 I3616_AND = 0x0e201c00, 576 I3616_BIC = 0x0e601c00, 577 I3616_BIF = 0x2ee01c00, 578 I3616_BIT = 0x2ea01c00, 579 I3616_BSL = 0x2e601c00, 580 I3616_EOR = 0x2e201c00, 581 I3616_MUL = 0x0e209c00, 582 I3616_ORR = 0x0ea01c00, 583 I3616_ORN = 0x0ee01c00, 584 I3616_SUB = 0x2e208400, 585 I3616_CMGT = 0x0e203400, 586 I3616_CMGE = 0x0e203c00, 587 I3616_CMTST = 0x0e208c00, 588 I3616_CMHI = 0x2e203400, 589 I3616_CMHS = 0x2e203c00, 590 I3616_CMEQ = 0x2e208c00, 591 I3616_SMAX = 0x0e206400, 592 I3616_SMIN = 0x0e206c00, 593 I3616_SSHL = 0x0e204400, 594 I3616_SQADD = 0x0e200c00, 595 I3616_SQSUB = 0x0e202c00, 596 I3616_UMAX = 0x2e206400, 597 I3616_UMIN = 0x2e206c00, 598 I3616_UQADD = 0x2e200c00, 599 I3616_UQSUB = 0x2e202c00, 600 I3616_USHL = 0x2e204400, 601 602 /* AdvSIMD two-reg misc. */ 603 I3617_CMGT0 = 0x0e208800, 604 I3617_CMEQ0 = 0x0e209800, 605 I3617_CMLT0 = 0x0e20a800, 606 I3617_CMGE0 = 0x2e208800, 607 I3617_CMLE0 = 0x2e209800, 608 I3617_NOT = 0x2e205800, 609 I3617_ABS = 0x0e20b800, 610 I3617_NEG = 0x2e20b800, 611 612 /* System instructions. */ 613 NOP = 0xd503201f, 614 DMB_ISH = 0xd50338bf, 615 DMB_LD = 0x00000100, 616 DMB_ST = 0x00000200, 617} AArch64Insn; 618 619static inline uint32_t tcg_in32(TCGContext *s) 620{ 621 uint32_t v = *(uint32_t *)s->code_ptr; 622 return v; 623} 624 625/* Emit an opcode with "type-checking" of the format. */ 626#define tcg_out_insn(S, FMT, OP, ...) \ 627 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) 628 629static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q, 630 TCGReg rt, TCGReg rn, unsigned size) 631{ 632 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30)); 633} 634 635static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, 636 int imm19, TCGReg rt) 637{ 638 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt); 639} 640 641static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, 642 TCGReg rt, int imm19) 643{ 644 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); 645} 646 647static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, 648 TCGCond c, int imm19) 649{ 650 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); 651} 652 653static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) 654{ 655 tcg_out32(s, insn | (imm26 & 0x03ffffff)); 656} 657 658static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) 659{ 660 tcg_out32(s, insn | rn << 5); 661} 662 663static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, 664 TCGReg r1, TCGReg r2, TCGReg rn, 665 tcg_target_long ofs, bool pre, bool w) 666{ 667 insn |= 1u << 31; /* ext */ 668 insn |= pre << 24; 669 insn |= w << 23; 670 671 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); 672 insn |= (ofs & (0x7f << 3)) << (15 - 3); 673 674 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); 675} 676 677static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, 678 TCGReg rd, TCGReg rn, uint64_t aimm) 679{ 680 if (aimm > 0xfff) { 681 tcg_debug_assert((aimm & 0xfff) == 0); 682 aimm >>= 12; 683 tcg_debug_assert(aimm <= 0xfff); 684 aimm |= 1 << 12; /* apply LSL 12 */ 685 } 686 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); 687} 688 689/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 690 (Logical immediate). Both insn groups have N, IMMR and IMMS fields 691 that feed the DecodeBitMasks pseudo function. */ 692static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, 693 TCGReg rd, TCGReg rn, int n, int immr, int imms) 694{ 695 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 696 | rn << 5 | rd); 697} 698 699#define tcg_out_insn_3404 tcg_out_insn_3402 700 701static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, 702 TCGReg rd, TCGReg rn, TCGReg rm, int imms) 703{ 704 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 705 | rn << 5 | rd); 706} 707 708/* This function is used for the Move (wide immediate) instruction group. 709 Note that SHIFT is a full shift count, not the 2 bit HW field. */ 710static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, 711 TCGReg rd, uint16_t half, unsigned shift) 712{ 713 tcg_debug_assert((shift & ~0x30) == 0); 714 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); 715} 716 717static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, 718 TCGReg rd, int64_t disp) 719{ 720 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); 721} 722 723/* This function is for both 3.5.2 (Add/Subtract shifted register), for 724 the rare occasion when we actually want to supply a shift amount. */ 725static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, 726 TCGType ext, TCGReg rd, TCGReg rn, 727 TCGReg rm, int imm6) 728{ 729 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); 730} 731 732/* This function is for 3.5.2 (Add/subtract shifted register), 733 and 3.5.10 (Logical shifted register), for the vast majorty of cases 734 when we don't want to apply a shift. Thus it can also be used for 735 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */ 736static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, 737 TCGReg rd, TCGReg rn, TCGReg rm) 738{ 739 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); 740} 741 742#define tcg_out_insn_3503 tcg_out_insn_3502 743#define tcg_out_insn_3508 tcg_out_insn_3502 744#define tcg_out_insn_3510 tcg_out_insn_3502 745 746static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, 747 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) 748{ 749 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd 750 | tcg_cond_to_aarch64[c] << 12); 751} 752 753static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, 754 TCGReg rd, TCGReg rn) 755{ 756 tcg_out32(s, insn | ext << 31 | rn << 5 | rd); 757} 758 759static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, 760 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) 761{ 762 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); 763} 764 765static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q, 766 TCGReg rd, TCGReg rn, int dst_idx, int src_idx) 767{ 768 /* Note that bit 11 set means general register input. Therefore 769 we can handle both register sets with one function. */ 770 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11) 771 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5); 772} 773 774static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q, 775 TCGReg rd, bool op, int cmode, uint8_t imm8) 776{ 777 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f) 778 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5); 779} 780 781static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn, 782 TCGReg rd, TCGReg rn, unsigned immhb) 783{ 784 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f)); 785} 786 787static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn, 788 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 789{ 790 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16 791 | (rn & 0x1f) << 5 | (rd & 0x1f)); 792} 793 794static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn, 795 unsigned size, TCGReg rd, TCGReg rn) 796{ 797 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f)); 798} 799 800static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q, 801 TCGReg rd, TCGReg rn, unsigned immhb) 802{ 803 tcg_out32(s, insn | q << 30 | immhb << 16 804 | (rn & 0x1f) << 5 | (rd & 0x1f)); 805} 806 807static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q, 808 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 809{ 810 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16 811 | (rn & 0x1f) << 5 | (rd & 0x1f)); 812} 813 814static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q, 815 unsigned size, TCGReg rd, TCGReg rn) 816{ 817 tcg_out32(s, insn | q << 30 | (size << 22) 818 | (rn & 0x1f) << 5 | (rd & 0x1f)); 819} 820 821static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, 822 TCGReg rd, TCGReg base, TCGType ext, 823 TCGReg regoff) 824{ 825 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 826 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | 827 0x4000 | ext << 13 | base << 5 | (rd & 0x1f)); 828} 829 830static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, 831 TCGReg rd, TCGReg rn, intptr_t offset) 832{ 833 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f)); 834} 835 836static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, 837 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) 838{ 839 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 840 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 841 | rn << 5 | (rd & 0x1f)); 842} 843 844/* Register to register move using ORR (shifted register with no shift). */ 845static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) 846{ 847 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); 848} 849 850/* Register to register move using ADDI (move to/from SP). */ 851static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) 852{ 853 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); 854} 855 856/* This function is used for the Logical (immediate) instruction group. 857 The value of LIMM must satisfy IS_LIMM. See the comment above about 858 only supporting simplified logical immediates. */ 859static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, 860 TCGReg rd, TCGReg rn, uint64_t limm) 861{ 862 unsigned h, l, r, c; 863 864 tcg_debug_assert(is_limm(limm)); 865 866 h = clz64(limm); 867 l = ctz64(limm); 868 if (l == 0) { 869 r = 0; /* form 0....01....1 */ 870 c = ctz64(~limm) - 1; 871 if (h == 0) { 872 r = clz64(~limm); /* form 1..10..01..1 */ 873 c += r; 874 } 875 } else { 876 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ 877 c = r - h - 1; 878 } 879 if (ext == TCG_TYPE_I32) { 880 r &= 31; 881 c &= 31; 882 } 883 884 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); 885} 886 887static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 888 TCGReg rd, int64_t v64) 889{ 890 bool q = type == TCG_TYPE_V128; 891 int cmode, imm8, i; 892 893 /* Test all bytes equal first. */ 894 if (vece == MO_8) { 895 imm8 = (uint8_t)v64; 896 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8); 897 return; 898 } 899 900 /* 901 * Test all bytes 0x00 or 0xff second. This can match cases that 902 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below. 903 */ 904 for (i = imm8 = 0; i < 8; i++) { 905 uint8_t byte = v64 >> (i * 8); 906 if (byte == 0xff) { 907 imm8 |= 1 << i; 908 } else if (byte != 0) { 909 goto fail_bytes; 910 } 911 } 912 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8); 913 return; 914 fail_bytes: 915 916 /* 917 * Tests for various replications. For each element width, if we 918 * cannot find an expansion there's no point checking a larger 919 * width because we already know by replication it cannot match. 920 */ 921 if (vece == MO_16) { 922 uint16_t v16 = v64; 923 924 if (is_shimm16(v16, &cmode, &imm8)) { 925 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 926 return; 927 } 928 if (is_shimm16(~v16, &cmode, &imm8)) { 929 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 930 return; 931 } 932 933 /* 934 * Otherwise, all remaining constants can be loaded in two insns: 935 * rd = v16 & 0xff, rd |= v16 & 0xff00. 936 */ 937 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff); 938 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8); 939 return; 940 } else if (vece == MO_32) { 941 uint32_t v32 = v64; 942 uint32_t n32 = ~v32; 943 944 if (is_shimm32(v32, &cmode, &imm8) || 945 is_soimm32(v32, &cmode, &imm8) || 946 is_fimm32(v32, &cmode, &imm8)) { 947 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 948 return; 949 } 950 if (is_shimm32(n32, &cmode, &imm8) || 951 is_soimm32(n32, &cmode, &imm8)) { 952 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 953 return; 954 } 955 956 /* 957 * Restrict the set of constants to those we can load with 958 * two instructions. Others we load from the pool. 959 */ 960 i = is_shimm32_pair(v32, &cmode, &imm8); 961 if (i) { 962 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 963 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8)); 964 return; 965 } 966 i = is_shimm32_pair(n32, &cmode, &imm8); 967 if (i) { 968 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 969 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8)); 970 return; 971 } 972 } else if (is_fimm64(v64, &cmode, &imm8)) { 973 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8); 974 return; 975 } 976 977 /* 978 * As a last resort, load from the constant pool. Sadly there 979 * is no LD1R (literal), so store the full 16-byte vector. 980 */ 981 if (type == TCG_TYPE_V128) { 982 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64); 983 tcg_out_insn(s, 3305, LDR_v128, 0, rd); 984 } else { 985 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0); 986 tcg_out_insn(s, 3305, LDR_v64, 0, rd); 987 } 988} 989 990static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 991 TCGReg rd, TCGReg rs) 992{ 993 int is_q = type - TCG_TYPE_V64; 994 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0); 995 return true; 996} 997 998static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 999 TCGReg r, TCGReg base, intptr_t offset) 1000{ 1001 TCGReg temp = TCG_REG_TMP; 1002 1003 if (offset < -0xffffff || offset > 0xffffff) { 1004 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset); 1005 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base); 1006 base = temp; 1007 } else { 1008 AArch64Insn add_insn = I3401_ADDI; 1009 1010 if (offset < 0) { 1011 add_insn = I3401_SUBI; 1012 offset = -offset; 1013 } 1014 if (offset & 0xfff000) { 1015 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000); 1016 base = temp; 1017 } 1018 if (offset & 0xfff) { 1019 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff); 1020 base = temp; 1021 } 1022 } 1023 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece); 1024 return true; 1025} 1026 1027static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 1028 tcg_target_long value) 1029{ 1030 tcg_target_long svalue = value; 1031 tcg_target_long ivalue = ~value; 1032 tcg_target_long t0, t1, t2; 1033 int s0, s1; 1034 AArch64Insn opc; 1035 1036 switch (type) { 1037 case TCG_TYPE_I32: 1038 case TCG_TYPE_I64: 1039 tcg_debug_assert(rd < 32); 1040 break; 1041 default: 1042 g_assert_not_reached(); 1043 } 1044 1045 /* For 32-bit values, discard potential garbage in value. For 64-bit 1046 values within [2**31, 2**32-1], we can create smaller sequences by 1047 interpreting this as a negative 32-bit number, while ensuring that 1048 the high 32 bits are cleared by setting SF=0. */ 1049 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { 1050 svalue = (int32_t)value; 1051 value = (uint32_t)value; 1052 ivalue = (uint32_t)ivalue; 1053 type = TCG_TYPE_I32; 1054 } 1055 1056 /* Speed things up by handling the common case of small positive 1057 and negative values specially. */ 1058 if ((value & ~0xffffull) == 0) { 1059 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); 1060 return; 1061 } else if ((ivalue & ~0xffffull) == 0) { 1062 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); 1063 return; 1064 } 1065 1066 /* Check for bitfield immediates. For the benefit of 32-bit quantities, 1067 use the sign-extended value. That lets us match rotated values such 1068 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ 1069 if (is_limm(svalue)) { 1070 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); 1071 return; 1072 } 1073 1074 /* Look for host pointer values within 4G of the PC. This happens 1075 often when loading pointers to QEMU's own data structures. */ 1076 if (type == TCG_TYPE_I64) { 1077 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr); 1078 tcg_target_long disp = value - src_rx; 1079 if (disp == sextract64(disp, 0, 21)) { 1080 tcg_out_insn(s, 3406, ADR, rd, disp); 1081 return; 1082 } 1083 disp = (value >> 12) - (src_rx >> 12); 1084 if (disp == sextract64(disp, 0, 21)) { 1085 tcg_out_insn(s, 3406, ADRP, rd, disp); 1086 if (value & 0xfff) { 1087 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); 1088 } 1089 return; 1090 } 1091 } 1092 1093 /* Would it take fewer insns to begin with MOVN? */ 1094 if (ctpop64(value) >= 32) { 1095 t0 = ivalue; 1096 opc = I3405_MOVN; 1097 } else { 1098 t0 = value; 1099 opc = I3405_MOVZ; 1100 } 1101 s0 = ctz64(t0) & (63 & -16); 1102 t1 = t0 & ~(0xffffull << s0); 1103 s1 = ctz64(t1) & (63 & -16); 1104 t2 = t1 & ~(0xffffull << s1); 1105 if (t2 == 0) { 1106 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0); 1107 if (t1 != 0) { 1108 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1); 1109 } 1110 return; 1111 } 1112 1113 /* For more than 2 insns, dump it into the constant pool. */ 1114 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0); 1115 tcg_out_insn(s, 3305, LDR, 0, rd); 1116} 1117 1118static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) 1119{ 1120 return false; 1121} 1122 1123static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, 1124 tcg_target_long imm) 1125{ 1126 /* This function is only used for passing structs by reference. */ 1127 g_assert_not_reached(); 1128} 1129 1130/* Define something more legible for general use. */ 1131#define tcg_out_ldst_r tcg_out_insn_3310 1132 1133static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd, 1134 TCGReg rn, intptr_t offset, int lgsize) 1135{ 1136 /* If the offset is naturally aligned and in range, then we can 1137 use the scaled uimm12 encoding */ 1138 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) { 1139 uintptr_t scaled_uimm = offset >> lgsize; 1140 if (scaled_uimm <= 0xfff) { 1141 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); 1142 return; 1143 } 1144 } 1145 1146 /* Small signed offsets can use the unscaled encoding. */ 1147 if (offset >= -256 && offset < 256) { 1148 tcg_out_insn_3312(s, insn, rd, rn, offset); 1149 return; 1150 } 1151 1152 /* Worst-case scenario, move offset to temp register, use reg offset. */ 1153 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset); 1154 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP); 1155} 1156 1157static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 1158{ 1159 if (ret == arg) { 1160 return true; 1161 } 1162 switch (type) { 1163 case TCG_TYPE_I32: 1164 case TCG_TYPE_I64: 1165 if (ret < 32 && arg < 32) { 1166 tcg_out_movr(s, type, ret, arg); 1167 break; 1168 } else if (ret < 32) { 1169 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0); 1170 break; 1171 } else if (arg < 32) { 1172 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0); 1173 break; 1174 } 1175 /* FALLTHRU */ 1176 1177 case TCG_TYPE_V64: 1178 tcg_debug_assert(ret >= 32 && arg >= 32); 1179 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg); 1180 break; 1181 case TCG_TYPE_V128: 1182 tcg_debug_assert(ret >= 32 && arg >= 32); 1183 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg); 1184 break; 1185 1186 default: 1187 g_assert_not_reached(); 1188 } 1189 return true; 1190} 1191 1192static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1193 TCGReg base, intptr_t ofs) 1194{ 1195 AArch64Insn insn; 1196 int lgsz; 1197 1198 switch (type) { 1199 case TCG_TYPE_I32: 1200 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS); 1201 lgsz = 2; 1202 break; 1203 case TCG_TYPE_I64: 1204 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD); 1205 lgsz = 3; 1206 break; 1207 case TCG_TYPE_V64: 1208 insn = I3312_LDRVD; 1209 lgsz = 3; 1210 break; 1211 case TCG_TYPE_V128: 1212 insn = I3312_LDRVQ; 1213 lgsz = 4; 1214 break; 1215 default: 1216 g_assert_not_reached(); 1217 } 1218 tcg_out_ldst(s, insn, ret, base, ofs, lgsz); 1219} 1220 1221static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src, 1222 TCGReg base, intptr_t ofs) 1223{ 1224 AArch64Insn insn; 1225 int lgsz; 1226 1227 switch (type) { 1228 case TCG_TYPE_I32: 1229 insn = (src < 32 ? I3312_STRW : I3312_STRVS); 1230 lgsz = 2; 1231 break; 1232 case TCG_TYPE_I64: 1233 insn = (src < 32 ? I3312_STRX : I3312_STRVD); 1234 lgsz = 3; 1235 break; 1236 case TCG_TYPE_V64: 1237 insn = I3312_STRVD; 1238 lgsz = 3; 1239 break; 1240 case TCG_TYPE_V128: 1241 insn = I3312_STRVQ; 1242 lgsz = 4; 1243 break; 1244 default: 1245 g_assert_not_reached(); 1246 } 1247 tcg_out_ldst(s, insn, src, base, ofs, lgsz); 1248} 1249 1250static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1251 TCGReg base, intptr_t ofs) 1252{ 1253 if (type <= TCG_TYPE_I64 && val == 0) { 1254 tcg_out_st(s, type, TCG_REG_XZR, base, ofs); 1255 return true; 1256 } 1257 return false; 1258} 1259 1260static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, 1261 TCGReg rn, unsigned int a, unsigned int b) 1262{ 1263 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); 1264} 1265 1266static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, 1267 TCGReg rn, unsigned int a, unsigned int b) 1268{ 1269 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); 1270} 1271 1272static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, 1273 TCGReg rn, unsigned int a, unsigned int b) 1274{ 1275 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); 1276} 1277 1278static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, 1279 TCGReg rn, TCGReg rm, unsigned int a) 1280{ 1281 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); 1282} 1283 1284static inline void tcg_out_shl(TCGContext *s, TCGType ext, 1285 TCGReg rd, TCGReg rn, unsigned int m) 1286{ 1287 int bits = ext ? 64 : 32; 1288 int max = bits - 1; 1289 tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max); 1290} 1291 1292static inline void tcg_out_shr(TCGContext *s, TCGType ext, 1293 TCGReg rd, TCGReg rn, unsigned int m) 1294{ 1295 int max = ext ? 63 : 31; 1296 tcg_out_ubfm(s, ext, rd, rn, m & max, max); 1297} 1298 1299static inline void tcg_out_sar(TCGContext *s, TCGType ext, 1300 TCGReg rd, TCGReg rn, unsigned int m) 1301{ 1302 int max = ext ? 63 : 31; 1303 tcg_out_sbfm(s, ext, rd, rn, m & max, max); 1304} 1305 1306static inline void tcg_out_rotr(TCGContext *s, TCGType ext, 1307 TCGReg rd, TCGReg rn, unsigned int m) 1308{ 1309 int max = ext ? 63 : 31; 1310 tcg_out_extr(s, ext, rd, rn, rn, m & max); 1311} 1312 1313static inline void tcg_out_rotl(TCGContext *s, TCGType ext, 1314 TCGReg rd, TCGReg rn, unsigned int m) 1315{ 1316 int max = ext ? 63 : 31; 1317 tcg_out_extr(s, ext, rd, rn, rn, -m & max); 1318} 1319 1320static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, 1321 TCGReg rn, unsigned lsb, unsigned width) 1322{ 1323 unsigned size = ext ? 64 : 32; 1324 unsigned a = (size - lsb) & (size - 1); 1325 unsigned b = width - 1; 1326 tcg_out_bfm(s, ext, rd, rn, a, b); 1327} 1328 1329static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, 1330 tcg_target_long b, bool const_b) 1331{ 1332 if (const_b) { 1333 /* Using CMP or CMN aliases. */ 1334 if (b >= 0) { 1335 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); 1336 } else { 1337 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); 1338 } 1339 } else { 1340 /* Using CMP alias SUBS wzr, Wn, Wm */ 1341 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); 1342 } 1343} 1344 1345static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) 1346{ 1347 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1348 tcg_debug_assert(offset == sextract64(offset, 0, 26)); 1349 tcg_out_insn(s, 3206, B, offset); 1350} 1351 1352static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target) 1353{ 1354 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1355 if (offset == sextract64(offset, 0, 26)) { 1356 tcg_out_insn(s, 3206, B, offset); 1357 } else { 1358 /* Choose X9 as a call-clobbered non-LR temporary. */ 1359 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target); 1360 tcg_out_insn(s, 3207, BR, TCG_REG_X9); 1361 } 1362} 1363 1364static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target) 1365{ 1366 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1367 if (offset == sextract64(offset, 0, 26)) { 1368 tcg_out_insn(s, 3206, BL, offset); 1369 } else { 1370 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); 1371 tcg_out_insn(s, 3207, BLR, TCG_REG_TMP); 1372 } 1373} 1374 1375static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 1376 const TCGHelperInfo *info) 1377{ 1378 tcg_out_call_int(s, target); 1379} 1380 1381static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) 1382{ 1383 if (!l->has_value) { 1384 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); 1385 tcg_out_insn(s, 3206, B, 0); 1386 } else { 1387 tcg_out_goto(s, l->u.value_ptr); 1388 } 1389} 1390 1391static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, 1392 TCGArg b, bool b_const, TCGLabel *l) 1393{ 1394 intptr_t offset; 1395 bool need_cmp; 1396 1397 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) { 1398 need_cmp = false; 1399 } else { 1400 need_cmp = true; 1401 tcg_out_cmp(s, ext, a, b, b_const); 1402 } 1403 1404 if (!l->has_value) { 1405 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1406 offset = tcg_in32(s) >> 5; 1407 } else { 1408 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2; 1409 tcg_debug_assert(offset == sextract64(offset, 0, 19)); 1410 } 1411 1412 if (need_cmp) { 1413 tcg_out_insn(s, 3202, B_C, c, offset); 1414 } else if (c == TCG_COND_EQ) { 1415 tcg_out_insn(s, 3201, CBZ, ext, a, offset); 1416 } else { 1417 tcg_out_insn(s, 3201, CBNZ, ext, a, offset); 1418 } 1419} 1420 1421static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits, 1422 TCGReg rd, TCGReg rn) 1423{ 1424 /* REV, REV16, REV32 */ 1425 tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn); 1426} 1427 1428static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits, 1429 TCGReg rd, TCGReg rn) 1430{ 1431 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ 1432 int bits = (8 << s_bits) - 1; 1433 tcg_out_sbfm(s, ext, rd, rn, 0, bits); 1434} 1435 1436static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn) 1437{ 1438 tcg_out_sxt(s, type, MO_8, rd, rn); 1439} 1440 1441static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn) 1442{ 1443 tcg_out_sxt(s, type, MO_16, rd, rn); 1444} 1445 1446static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn) 1447{ 1448 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn); 1449} 1450 1451static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn) 1452{ 1453 tcg_out_ext32s(s, rd, rn); 1454} 1455 1456static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits, 1457 TCGReg rd, TCGReg rn) 1458{ 1459 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ 1460 int bits = (8 << s_bits) - 1; 1461 tcg_out_ubfm(s, 0, rd, rn, 0, bits); 1462} 1463 1464static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn) 1465{ 1466 tcg_out_uxt(s, MO_8, rd, rn); 1467} 1468 1469static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn) 1470{ 1471 tcg_out_uxt(s, MO_16, rd, rn); 1472} 1473 1474static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn) 1475{ 1476 tcg_out_movr(s, TCG_TYPE_I32, rd, rn); 1477} 1478 1479static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn) 1480{ 1481 tcg_out_ext32u(s, rd, rn); 1482} 1483 1484static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) 1485{ 1486 tcg_out_mov(s, TCG_TYPE_I32, rd, rn); 1487} 1488 1489static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, 1490 TCGReg rn, int64_t aimm) 1491{ 1492 if (aimm >= 0) { 1493 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); 1494 } else { 1495 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); 1496 } 1497} 1498 1499static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, 1500 TCGReg rh, TCGReg al, TCGReg ah, 1501 tcg_target_long bl, tcg_target_long bh, 1502 bool const_bl, bool const_bh, bool sub) 1503{ 1504 TCGReg orig_rl = rl; 1505 AArch64Insn insn; 1506 1507 if (rl == ah || (!const_bh && rl == bh)) { 1508 rl = TCG_REG_TMP; 1509 } 1510 1511 if (const_bl) { 1512 if (bl < 0) { 1513 bl = -bl; 1514 insn = sub ? I3401_ADDSI : I3401_SUBSI; 1515 } else { 1516 insn = sub ? I3401_SUBSI : I3401_ADDSI; 1517 } 1518 1519 if (unlikely(al == TCG_REG_XZR)) { 1520 /* ??? We want to allow al to be zero for the benefit of 1521 negation via subtraction. However, that leaves open the 1522 possibility of adding 0+const in the low part, and the 1523 immediate add instructions encode XSP not XZR. Don't try 1524 anything more elaborate here than loading another zero. */ 1525 al = TCG_REG_TMP; 1526 tcg_out_movi(s, ext, al, 0); 1527 } 1528 tcg_out_insn_3401(s, insn, ext, rl, al, bl); 1529 } else { 1530 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); 1531 } 1532 1533 insn = I3503_ADC; 1534 if (const_bh) { 1535 /* Note that the only two constants we support are 0 and -1, and 1536 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ 1537 if ((bh != 0) ^ sub) { 1538 insn = I3503_SBC; 1539 } 1540 bh = TCG_REG_XZR; 1541 } else if (sub) { 1542 insn = I3503_SBC; 1543 } 1544 tcg_out_insn_3503(s, insn, ext, rh, ah, bh); 1545 1546 tcg_out_mov(s, ext, orig_rl, rl); 1547} 1548 1549static inline void tcg_out_mb(TCGContext *s, TCGArg a0) 1550{ 1551 static const uint32_t sync[] = { 1552 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST, 1553 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST, 1554 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1555 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD, 1556 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1557 }; 1558 tcg_out32(s, sync[a0 & TCG_MO_ALL]); 1559} 1560 1561static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, 1562 TCGReg a0, TCGArg b, bool const_b, bool is_ctz) 1563{ 1564 TCGReg a1 = a0; 1565 if (is_ctz) { 1566 a1 = TCG_REG_TMP; 1567 tcg_out_insn(s, 3507, RBIT, ext, a1, a0); 1568 } 1569 if (const_b && b == (ext ? 64 : 32)) { 1570 tcg_out_insn(s, 3507, CLZ, ext, d, a1); 1571 } else { 1572 AArch64Insn sel = I3506_CSEL; 1573 1574 tcg_out_cmp(s, ext, a0, 0, 1); 1575 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1); 1576 1577 if (const_b) { 1578 if (b == -1) { 1579 b = TCG_REG_XZR; 1580 sel = I3506_CSINV; 1581 } else if (b == 0) { 1582 b = TCG_REG_XZR; 1583 } else { 1584 tcg_out_movi(s, ext, d, b); 1585 b = d; 1586 } 1587 } 1588 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE); 1589 } 1590} 1591 1592typedef struct { 1593 TCGReg base; 1594 TCGReg index; 1595 TCGType index_ext; 1596} HostAddress; 1597 1598bool tcg_target_has_memory_bswap(MemOp memop) 1599{ 1600 return false; 1601} 1602 1603static const TCGLdstHelperParam ldst_helper_param = { 1604 .ntmp = 1, .tmp = { TCG_REG_TMP } 1605}; 1606 1607static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1608{ 1609 MemOp opc = get_memop(lb->oi); 1610 1611 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1612 return false; 1613 } 1614 1615 tcg_out_ld_helper_args(s, lb, &ldst_helper_param); 1616 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); 1617 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); 1618 tcg_out_goto(s, lb->raddr); 1619 return true; 1620} 1621 1622static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1623{ 1624 MemOp opc = get_memop(lb->oi); 1625 1626 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1627 return false; 1628 } 1629 1630 tcg_out_st_helper_args(s, lb, &ldst_helper_param); 1631 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]); 1632 tcg_out_goto(s, lb->raddr); 1633 return true; 1634} 1635 1636/* 1637 * For softmmu, perform the TLB load and compare. 1638 * For useronly, perform any required alignment tests. 1639 * In both cases, return a TCGLabelQemuLdst structure if the slow path 1640 * is required and fill in @h with the host address for the fast path. 1641 */ 1642static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, 1643 TCGReg addr_reg, MemOpIdx oi, 1644 bool is_ld) 1645{ 1646 TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; 1647 TCGLabelQemuLdst *ldst = NULL; 1648 MemOp opc = get_memop(oi); 1649 unsigned a_bits = get_alignment_bits(opc); 1650 unsigned a_mask = (1u << a_bits) - 1; 1651 1652#ifdef CONFIG_SOFTMMU 1653 unsigned s_bits = opc & MO_SIZE; 1654 unsigned s_mask = (1u << s_bits) - 1; 1655 unsigned mem_index = get_mmuidx(oi); 1656 TCGReg x3; 1657 TCGType mask_type; 1658 uint64_t compare_mask; 1659 1660 ldst = new_ldst_label(s); 1661 ldst->is_ld = is_ld; 1662 ldst->oi = oi; 1663 ldst->addrlo_reg = addr_reg; 1664 1665 mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32 1666 ? TCG_TYPE_I64 : TCG_TYPE_I32); 1667 1668 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */ 1669 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); 1670 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512); 1671 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); 1672 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); 1673 tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0, 1674 TLB_MASK_TABLE_OFS(mem_index), 1, 0); 1675 1676 /* Extract the TLB index from the address into X0. */ 1677 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, 1678 TCG_REG_X0, TCG_REG_X0, addr_reg, 1679 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); 1680 1681 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */ 1682 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0); 1683 1684 /* Load the tlb comparator into X0, and the fast path addend into X1. */ 1685 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, 1686 is_ld ? offsetof(CPUTLBEntry, addr_read) 1687 : offsetof(CPUTLBEntry, addr_write)); 1688 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1, 1689 offsetof(CPUTLBEntry, addend)); 1690 1691 /* 1692 * For aligned accesses, we check the first byte and include the alignment 1693 * bits within the address. For unaligned access, we check that we don't 1694 * cross pages using the address of the last byte of the access. 1695 */ 1696 if (a_bits >= s_bits) { 1697 x3 = addr_reg; 1698 } else { 1699 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64, 1700 TCG_REG_X3, addr_reg, s_mask - a_mask); 1701 x3 = TCG_REG_X3; 1702 } 1703 compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; 1704 1705 /* Store the page mask part of the address into X3. */ 1706 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, 1707 TCG_REG_X3, x3, compare_mask); 1708 1709 /* Perform the address comparison. */ 1710 tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0); 1711 1712 /* If not equal, we jump to the slow path. */ 1713 ldst->label_ptr[0] = s->code_ptr; 1714 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1715 1716 *h = (HostAddress){ 1717 .base = TCG_REG_X1, 1718 .index = addr_reg, 1719 .index_ext = addr_type 1720 }; 1721#else 1722 if (a_mask) { 1723 ldst = new_ldst_label(s); 1724 1725 ldst->is_ld = is_ld; 1726 ldst->oi = oi; 1727 ldst->addrlo_reg = addr_reg; 1728 1729 /* tst addr, #mask */ 1730 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask); 1731 1732 /* b.ne slow_path */ 1733 ldst->label_ptr[0] = s->code_ptr; 1734 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1735 } 1736 1737 if (USE_GUEST_BASE) { 1738 *h = (HostAddress){ 1739 .base = TCG_REG_GUEST_BASE, 1740 .index = addr_reg, 1741 .index_ext = addr_type 1742 }; 1743 } else { 1744 *h = (HostAddress){ 1745 .base = addr_reg, 1746 .index = TCG_REG_XZR, 1747 .index_ext = TCG_TYPE_I64 1748 }; 1749 } 1750#endif 1751 1752 return ldst; 1753} 1754 1755static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, 1756 TCGReg data_r, HostAddress h) 1757{ 1758 switch (memop & MO_SSIZE) { 1759 case MO_UB: 1760 tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index); 1761 break; 1762 case MO_SB: 1763 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, 1764 data_r, h.base, h.index_ext, h.index); 1765 break; 1766 case MO_UW: 1767 tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index); 1768 break; 1769 case MO_SW: 1770 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), 1771 data_r, h.base, h.index_ext, h.index); 1772 break; 1773 case MO_UL: 1774 tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index); 1775 break; 1776 case MO_SL: 1777 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index); 1778 break; 1779 case MO_UQ: 1780 tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index); 1781 break; 1782 default: 1783 g_assert_not_reached(); 1784 } 1785} 1786 1787static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, 1788 TCGReg data_r, HostAddress h) 1789{ 1790 switch (memop & MO_SIZE) { 1791 case MO_8: 1792 tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index); 1793 break; 1794 case MO_16: 1795 tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index); 1796 break; 1797 case MO_32: 1798 tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index); 1799 break; 1800 case MO_64: 1801 tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index); 1802 break; 1803 default: 1804 g_assert_not_reached(); 1805 } 1806} 1807 1808static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1809 MemOpIdx oi, TCGType data_type) 1810{ 1811 TCGLabelQemuLdst *ldst; 1812 HostAddress h; 1813 1814 ldst = prepare_host_addr(s, &h, addr_reg, oi, true); 1815 tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h); 1816 1817 if (ldst) { 1818 ldst->type = data_type; 1819 ldst->datalo_reg = data_reg; 1820 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1821 } 1822} 1823 1824static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1825 MemOpIdx oi, TCGType data_type) 1826{ 1827 TCGLabelQemuLdst *ldst; 1828 HostAddress h; 1829 1830 ldst = prepare_host_addr(s, &h, addr_reg, oi, false); 1831 tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h); 1832 1833 if (ldst) { 1834 ldst->type = data_type; 1835 ldst->datalo_reg = data_reg; 1836 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1837 } 1838} 1839 1840static const tcg_insn_unit *tb_ret_addr; 1841 1842static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) 1843{ 1844 /* Reuse the zeroing that exists for goto_ptr. */ 1845 if (a0 == 0) { 1846 tcg_out_goto_long(s, tcg_code_gen_epilogue); 1847 } else { 1848 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); 1849 tcg_out_goto_long(s, tb_ret_addr); 1850 } 1851} 1852 1853static void tcg_out_goto_tb(TCGContext *s, int which) 1854{ 1855 /* 1856 * Direct branch, or indirect address load, will be patched 1857 * by tb_target_set_jmp_target. Assert indirect load offset 1858 * in range early, regardless of direct branch distance. 1859 */ 1860 intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which)); 1861 tcg_debug_assert(i_off == sextract64(i_off, 0, 21)); 1862 1863 set_jmp_insn_offset(s, which); 1864 tcg_out32(s, I3206_B); 1865 tcg_out_insn(s, 3207, BR, TCG_REG_TMP); 1866 set_jmp_reset_offset(s, which); 1867} 1868 1869void tb_target_set_jmp_target(const TranslationBlock *tb, int n, 1870 uintptr_t jmp_rx, uintptr_t jmp_rw) 1871{ 1872 uintptr_t d_addr = tb->jmp_target_addr[n]; 1873 ptrdiff_t d_offset = d_addr - jmp_rx; 1874 tcg_insn_unit insn; 1875 1876 /* Either directly branch, or indirect branch load. */ 1877 if (d_offset == sextract64(d_offset, 0, 28)) { 1878 insn = deposit32(I3206_B, 0, 26, d_offset >> 2); 1879 } else { 1880 uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n]; 1881 ptrdiff_t i_offset = i_addr - jmp_rx; 1882 1883 /* Note that we asserted this in range in tcg_out_goto_tb. */ 1884 insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2); 1885 } 1886 qatomic_set((uint32_t *)jmp_rw, insn); 1887 flush_idcache_range(jmp_rx, jmp_rw, 4); 1888} 1889 1890static void tcg_out_op(TCGContext *s, TCGOpcode opc, 1891 const TCGArg args[TCG_MAX_OP_ARGS], 1892 const int const_args[TCG_MAX_OP_ARGS]) 1893{ 1894 /* 99% of the time, we can signal the use of extension registers 1895 by looking to see if the opcode handles 64-bit data. */ 1896 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0; 1897 1898 /* Hoist the loads of the most common arguments. */ 1899 TCGArg a0 = args[0]; 1900 TCGArg a1 = args[1]; 1901 TCGArg a2 = args[2]; 1902 int c2 = const_args[2]; 1903 1904 /* Some operands are defined with "rZ" constraint, a register or 1905 the zero register. These need not actually test args[I] == 0. */ 1906#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) 1907 1908 switch (opc) { 1909 case INDEX_op_goto_ptr: 1910 tcg_out_insn(s, 3207, BR, a0); 1911 break; 1912 1913 case INDEX_op_br: 1914 tcg_out_goto_label(s, arg_label(a0)); 1915 break; 1916 1917 case INDEX_op_ld8u_i32: 1918 case INDEX_op_ld8u_i64: 1919 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0); 1920 break; 1921 case INDEX_op_ld8s_i32: 1922 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0); 1923 break; 1924 case INDEX_op_ld8s_i64: 1925 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0); 1926 break; 1927 case INDEX_op_ld16u_i32: 1928 case INDEX_op_ld16u_i64: 1929 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1); 1930 break; 1931 case INDEX_op_ld16s_i32: 1932 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1); 1933 break; 1934 case INDEX_op_ld16s_i64: 1935 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1); 1936 break; 1937 case INDEX_op_ld_i32: 1938 case INDEX_op_ld32u_i64: 1939 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2); 1940 break; 1941 case INDEX_op_ld32s_i64: 1942 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2); 1943 break; 1944 case INDEX_op_ld_i64: 1945 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3); 1946 break; 1947 1948 case INDEX_op_st8_i32: 1949 case INDEX_op_st8_i64: 1950 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0); 1951 break; 1952 case INDEX_op_st16_i32: 1953 case INDEX_op_st16_i64: 1954 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1); 1955 break; 1956 case INDEX_op_st_i32: 1957 case INDEX_op_st32_i64: 1958 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2); 1959 break; 1960 case INDEX_op_st_i64: 1961 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3); 1962 break; 1963 1964 case INDEX_op_add_i32: 1965 a2 = (int32_t)a2; 1966 /* FALLTHRU */ 1967 case INDEX_op_add_i64: 1968 if (c2) { 1969 tcg_out_addsubi(s, ext, a0, a1, a2); 1970 } else { 1971 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); 1972 } 1973 break; 1974 1975 case INDEX_op_sub_i32: 1976 a2 = (int32_t)a2; 1977 /* FALLTHRU */ 1978 case INDEX_op_sub_i64: 1979 if (c2) { 1980 tcg_out_addsubi(s, ext, a0, a1, -a2); 1981 } else { 1982 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); 1983 } 1984 break; 1985 1986 case INDEX_op_neg_i64: 1987 case INDEX_op_neg_i32: 1988 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); 1989 break; 1990 1991 case INDEX_op_and_i32: 1992 a2 = (int32_t)a2; 1993 /* FALLTHRU */ 1994 case INDEX_op_and_i64: 1995 if (c2) { 1996 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); 1997 } else { 1998 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); 1999 } 2000 break; 2001 2002 case INDEX_op_andc_i32: 2003 a2 = (int32_t)a2; 2004 /* FALLTHRU */ 2005 case INDEX_op_andc_i64: 2006 if (c2) { 2007 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); 2008 } else { 2009 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); 2010 } 2011 break; 2012 2013 case INDEX_op_or_i32: 2014 a2 = (int32_t)a2; 2015 /* FALLTHRU */ 2016 case INDEX_op_or_i64: 2017 if (c2) { 2018 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); 2019 } else { 2020 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); 2021 } 2022 break; 2023 2024 case INDEX_op_orc_i32: 2025 a2 = (int32_t)a2; 2026 /* FALLTHRU */ 2027 case INDEX_op_orc_i64: 2028 if (c2) { 2029 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); 2030 } else { 2031 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); 2032 } 2033 break; 2034 2035 case INDEX_op_xor_i32: 2036 a2 = (int32_t)a2; 2037 /* FALLTHRU */ 2038 case INDEX_op_xor_i64: 2039 if (c2) { 2040 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); 2041 } else { 2042 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); 2043 } 2044 break; 2045 2046 case INDEX_op_eqv_i32: 2047 a2 = (int32_t)a2; 2048 /* FALLTHRU */ 2049 case INDEX_op_eqv_i64: 2050 if (c2) { 2051 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); 2052 } else { 2053 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); 2054 } 2055 break; 2056 2057 case INDEX_op_not_i64: 2058 case INDEX_op_not_i32: 2059 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); 2060 break; 2061 2062 case INDEX_op_mul_i64: 2063 case INDEX_op_mul_i32: 2064 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); 2065 break; 2066 2067 case INDEX_op_div_i64: 2068 case INDEX_op_div_i32: 2069 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); 2070 break; 2071 case INDEX_op_divu_i64: 2072 case INDEX_op_divu_i32: 2073 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); 2074 break; 2075 2076 case INDEX_op_rem_i64: 2077 case INDEX_op_rem_i32: 2078 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2); 2079 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2080 break; 2081 case INDEX_op_remu_i64: 2082 case INDEX_op_remu_i32: 2083 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2); 2084 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2085 break; 2086 2087 case INDEX_op_shl_i64: 2088 case INDEX_op_shl_i32: 2089 if (c2) { 2090 tcg_out_shl(s, ext, a0, a1, a2); 2091 } else { 2092 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); 2093 } 2094 break; 2095 2096 case INDEX_op_shr_i64: 2097 case INDEX_op_shr_i32: 2098 if (c2) { 2099 tcg_out_shr(s, ext, a0, a1, a2); 2100 } else { 2101 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); 2102 } 2103 break; 2104 2105 case INDEX_op_sar_i64: 2106 case INDEX_op_sar_i32: 2107 if (c2) { 2108 tcg_out_sar(s, ext, a0, a1, a2); 2109 } else { 2110 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); 2111 } 2112 break; 2113 2114 case INDEX_op_rotr_i64: 2115 case INDEX_op_rotr_i32: 2116 if (c2) { 2117 tcg_out_rotr(s, ext, a0, a1, a2); 2118 } else { 2119 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); 2120 } 2121 break; 2122 2123 case INDEX_op_rotl_i64: 2124 case INDEX_op_rotl_i32: 2125 if (c2) { 2126 tcg_out_rotl(s, ext, a0, a1, a2); 2127 } else { 2128 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2); 2129 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP); 2130 } 2131 break; 2132 2133 case INDEX_op_clz_i64: 2134 case INDEX_op_clz_i32: 2135 tcg_out_cltz(s, ext, a0, a1, a2, c2, false); 2136 break; 2137 case INDEX_op_ctz_i64: 2138 case INDEX_op_ctz_i32: 2139 tcg_out_cltz(s, ext, a0, a1, a2, c2, true); 2140 break; 2141 2142 case INDEX_op_brcond_i32: 2143 a1 = (int32_t)a1; 2144 /* FALLTHRU */ 2145 case INDEX_op_brcond_i64: 2146 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); 2147 break; 2148 2149 case INDEX_op_setcond_i32: 2150 a2 = (int32_t)a2; 2151 /* FALLTHRU */ 2152 case INDEX_op_setcond_i64: 2153 tcg_out_cmp(s, ext, a1, a2, c2); 2154 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ 2155 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, 2156 TCG_REG_XZR, tcg_invert_cond(args[3])); 2157 break; 2158 2159 case INDEX_op_movcond_i32: 2160 a2 = (int32_t)a2; 2161 /* FALLTHRU */ 2162 case INDEX_op_movcond_i64: 2163 tcg_out_cmp(s, ext, a1, a2, c2); 2164 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); 2165 break; 2166 2167 case INDEX_op_qemu_ld_i32: 2168 case INDEX_op_qemu_ld_i64: 2169 tcg_out_qemu_ld(s, a0, a1, a2, ext); 2170 break; 2171 case INDEX_op_qemu_st_i32: 2172 case INDEX_op_qemu_st_i64: 2173 tcg_out_qemu_st(s, REG0(0), a1, a2, ext); 2174 break; 2175 2176 case INDEX_op_bswap64_i64: 2177 tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1); 2178 break; 2179 case INDEX_op_bswap32_i64: 2180 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2181 if (a2 & TCG_BSWAP_OS) { 2182 tcg_out_ext32s(s, a0, a0); 2183 } 2184 break; 2185 case INDEX_op_bswap32_i32: 2186 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2187 break; 2188 case INDEX_op_bswap16_i64: 2189 case INDEX_op_bswap16_i32: 2190 tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1); 2191 if (a2 & TCG_BSWAP_OS) { 2192 /* Output must be sign-extended. */ 2193 tcg_out_ext16s(s, ext, a0, a0); 2194 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 2195 /* Output must be zero-extended, but input isn't. */ 2196 tcg_out_ext16u(s, a0, a0); 2197 } 2198 break; 2199 2200 case INDEX_op_deposit_i64: 2201 case INDEX_op_deposit_i32: 2202 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]); 2203 break; 2204 2205 case INDEX_op_extract_i64: 2206 case INDEX_op_extract_i32: 2207 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2208 break; 2209 2210 case INDEX_op_sextract_i64: 2211 case INDEX_op_sextract_i32: 2212 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2213 break; 2214 2215 case INDEX_op_extract2_i64: 2216 case INDEX_op_extract2_i32: 2217 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]); 2218 break; 2219 2220 case INDEX_op_add2_i32: 2221 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2222 (int32_t)args[4], args[5], const_args[4], 2223 const_args[5], false); 2224 break; 2225 case INDEX_op_add2_i64: 2226 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2227 args[5], const_args[4], const_args[5], false); 2228 break; 2229 case INDEX_op_sub2_i32: 2230 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2231 (int32_t)args[4], args[5], const_args[4], 2232 const_args[5], true); 2233 break; 2234 case INDEX_op_sub2_i64: 2235 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2236 args[5], const_args[4], const_args[5], true); 2237 break; 2238 2239 case INDEX_op_muluh_i64: 2240 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); 2241 break; 2242 case INDEX_op_mulsh_i64: 2243 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); 2244 break; 2245 2246 case INDEX_op_mb: 2247 tcg_out_mb(s, a0); 2248 break; 2249 2250 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 2251 case INDEX_op_mov_i64: 2252 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 2253 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ 2254 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ 2255 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ 2256 case INDEX_op_ext8s_i64: 2257 case INDEX_op_ext8u_i32: 2258 case INDEX_op_ext8u_i64: 2259 case INDEX_op_ext16s_i64: 2260 case INDEX_op_ext16s_i32: 2261 case INDEX_op_ext16u_i64: 2262 case INDEX_op_ext16u_i32: 2263 case INDEX_op_ext32s_i64: 2264 case INDEX_op_ext32u_i64: 2265 case INDEX_op_ext_i32_i64: 2266 case INDEX_op_extu_i32_i64: 2267 case INDEX_op_extrl_i64_i32: 2268 default: 2269 g_assert_not_reached(); 2270 } 2271 2272#undef REG0 2273} 2274 2275static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 2276 unsigned vecl, unsigned vece, 2277 const TCGArg args[TCG_MAX_OP_ARGS], 2278 const int const_args[TCG_MAX_OP_ARGS]) 2279{ 2280 static const AArch64Insn cmp_vec_insn[16] = { 2281 [TCG_COND_EQ] = I3616_CMEQ, 2282 [TCG_COND_GT] = I3616_CMGT, 2283 [TCG_COND_GE] = I3616_CMGE, 2284 [TCG_COND_GTU] = I3616_CMHI, 2285 [TCG_COND_GEU] = I3616_CMHS, 2286 }; 2287 static const AArch64Insn cmp_scalar_insn[16] = { 2288 [TCG_COND_EQ] = I3611_CMEQ, 2289 [TCG_COND_GT] = I3611_CMGT, 2290 [TCG_COND_GE] = I3611_CMGE, 2291 [TCG_COND_GTU] = I3611_CMHI, 2292 [TCG_COND_GEU] = I3611_CMHS, 2293 }; 2294 static const AArch64Insn cmp0_vec_insn[16] = { 2295 [TCG_COND_EQ] = I3617_CMEQ0, 2296 [TCG_COND_GT] = I3617_CMGT0, 2297 [TCG_COND_GE] = I3617_CMGE0, 2298 [TCG_COND_LT] = I3617_CMLT0, 2299 [TCG_COND_LE] = I3617_CMLE0, 2300 }; 2301 static const AArch64Insn cmp0_scalar_insn[16] = { 2302 [TCG_COND_EQ] = I3612_CMEQ0, 2303 [TCG_COND_GT] = I3612_CMGT0, 2304 [TCG_COND_GE] = I3612_CMGE0, 2305 [TCG_COND_LT] = I3612_CMLT0, 2306 [TCG_COND_LE] = I3612_CMLE0, 2307 }; 2308 2309 TCGType type = vecl + TCG_TYPE_V64; 2310 unsigned is_q = vecl; 2311 bool is_scalar = !is_q && vece == MO_64; 2312 TCGArg a0, a1, a2, a3; 2313 int cmode, imm8; 2314 2315 a0 = args[0]; 2316 a1 = args[1]; 2317 a2 = args[2]; 2318 2319 switch (opc) { 2320 case INDEX_op_ld_vec: 2321 tcg_out_ld(s, type, a0, a1, a2); 2322 break; 2323 case INDEX_op_st_vec: 2324 tcg_out_st(s, type, a0, a1, a2); 2325 break; 2326 case INDEX_op_dupm_vec: 2327 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 2328 break; 2329 case INDEX_op_add_vec: 2330 if (is_scalar) { 2331 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2); 2332 } else { 2333 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2); 2334 } 2335 break; 2336 case INDEX_op_sub_vec: 2337 if (is_scalar) { 2338 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2); 2339 } else { 2340 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2); 2341 } 2342 break; 2343 case INDEX_op_mul_vec: 2344 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2); 2345 break; 2346 case INDEX_op_neg_vec: 2347 if (is_scalar) { 2348 tcg_out_insn(s, 3612, NEG, vece, a0, a1); 2349 } else { 2350 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1); 2351 } 2352 break; 2353 case INDEX_op_abs_vec: 2354 if (is_scalar) { 2355 tcg_out_insn(s, 3612, ABS, vece, a0, a1); 2356 } else { 2357 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1); 2358 } 2359 break; 2360 case INDEX_op_and_vec: 2361 if (const_args[2]) { 2362 is_shimm1632(~a2, &cmode, &imm8); 2363 if (a0 == a1) { 2364 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2365 return; 2366 } 2367 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2368 a2 = a0; 2369 } 2370 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2); 2371 break; 2372 case INDEX_op_or_vec: 2373 if (const_args[2]) { 2374 is_shimm1632(a2, &cmode, &imm8); 2375 if (a0 == a1) { 2376 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2377 return; 2378 } 2379 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2380 a2 = a0; 2381 } 2382 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2); 2383 break; 2384 case INDEX_op_andc_vec: 2385 if (const_args[2]) { 2386 is_shimm1632(a2, &cmode, &imm8); 2387 if (a0 == a1) { 2388 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2389 return; 2390 } 2391 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2392 a2 = a0; 2393 } 2394 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2); 2395 break; 2396 case INDEX_op_orc_vec: 2397 if (const_args[2]) { 2398 is_shimm1632(~a2, &cmode, &imm8); 2399 if (a0 == a1) { 2400 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2401 return; 2402 } 2403 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2404 a2 = a0; 2405 } 2406 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2); 2407 break; 2408 case INDEX_op_xor_vec: 2409 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2); 2410 break; 2411 case INDEX_op_ssadd_vec: 2412 if (is_scalar) { 2413 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2); 2414 } else { 2415 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2); 2416 } 2417 break; 2418 case INDEX_op_sssub_vec: 2419 if (is_scalar) { 2420 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2); 2421 } else { 2422 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2); 2423 } 2424 break; 2425 case INDEX_op_usadd_vec: 2426 if (is_scalar) { 2427 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2); 2428 } else { 2429 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2); 2430 } 2431 break; 2432 case INDEX_op_ussub_vec: 2433 if (is_scalar) { 2434 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2); 2435 } else { 2436 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2); 2437 } 2438 break; 2439 case INDEX_op_smax_vec: 2440 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2); 2441 break; 2442 case INDEX_op_smin_vec: 2443 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2); 2444 break; 2445 case INDEX_op_umax_vec: 2446 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2); 2447 break; 2448 case INDEX_op_umin_vec: 2449 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2); 2450 break; 2451 case INDEX_op_not_vec: 2452 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1); 2453 break; 2454 case INDEX_op_shli_vec: 2455 if (is_scalar) { 2456 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece)); 2457 } else { 2458 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece)); 2459 } 2460 break; 2461 case INDEX_op_shri_vec: 2462 if (is_scalar) { 2463 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2); 2464 } else { 2465 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2); 2466 } 2467 break; 2468 case INDEX_op_sari_vec: 2469 if (is_scalar) { 2470 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2); 2471 } else { 2472 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); 2473 } 2474 break; 2475 case INDEX_op_aa64_sli_vec: 2476 if (is_scalar) { 2477 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece)); 2478 } else { 2479 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece)); 2480 } 2481 break; 2482 case INDEX_op_shlv_vec: 2483 if (is_scalar) { 2484 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2); 2485 } else { 2486 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); 2487 } 2488 break; 2489 case INDEX_op_aa64_sshl_vec: 2490 if (is_scalar) { 2491 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2); 2492 } else { 2493 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2); 2494 } 2495 break; 2496 case INDEX_op_cmp_vec: 2497 { 2498 TCGCond cond = args[3]; 2499 AArch64Insn insn; 2500 2501 if (cond == TCG_COND_NE) { 2502 if (const_args[2]) { 2503 if (is_scalar) { 2504 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1); 2505 } else { 2506 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1); 2507 } 2508 } else { 2509 if (is_scalar) { 2510 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2); 2511 } else { 2512 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2); 2513 } 2514 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); 2515 } 2516 } else { 2517 if (const_args[2]) { 2518 if (is_scalar) { 2519 insn = cmp0_scalar_insn[cond]; 2520 if (insn) { 2521 tcg_out_insn_3612(s, insn, vece, a0, a1); 2522 break; 2523 } 2524 } else { 2525 insn = cmp0_vec_insn[cond]; 2526 if (insn) { 2527 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1); 2528 break; 2529 } 2530 } 2531 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0); 2532 a2 = TCG_VEC_TMP; 2533 } 2534 if (is_scalar) { 2535 insn = cmp_scalar_insn[cond]; 2536 if (insn == 0) { 2537 TCGArg t; 2538 t = a1, a1 = a2, a2 = t; 2539 cond = tcg_swap_cond(cond); 2540 insn = cmp_scalar_insn[cond]; 2541 tcg_debug_assert(insn != 0); 2542 } 2543 tcg_out_insn_3611(s, insn, vece, a0, a1, a2); 2544 } else { 2545 insn = cmp_vec_insn[cond]; 2546 if (insn == 0) { 2547 TCGArg t; 2548 t = a1, a1 = a2, a2 = t; 2549 cond = tcg_swap_cond(cond); 2550 insn = cmp_vec_insn[cond]; 2551 tcg_debug_assert(insn != 0); 2552 } 2553 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2); 2554 } 2555 } 2556 } 2557 break; 2558 2559 case INDEX_op_bitsel_vec: 2560 a3 = args[3]; 2561 if (a0 == a3) { 2562 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1); 2563 } else if (a0 == a2) { 2564 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1); 2565 } else { 2566 if (a0 != a1) { 2567 tcg_out_mov(s, type, a0, a1); 2568 } 2569 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3); 2570 } 2571 break; 2572 2573 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 2574 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 2575 default: 2576 g_assert_not_reached(); 2577 } 2578} 2579 2580int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 2581{ 2582 switch (opc) { 2583 case INDEX_op_add_vec: 2584 case INDEX_op_sub_vec: 2585 case INDEX_op_and_vec: 2586 case INDEX_op_or_vec: 2587 case INDEX_op_xor_vec: 2588 case INDEX_op_andc_vec: 2589 case INDEX_op_orc_vec: 2590 case INDEX_op_neg_vec: 2591 case INDEX_op_abs_vec: 2592 case INDEX_op_not_vec: 2593 case INDEX_op_cmp_vec: 2594 case INDEX_op_shli_vec: 2595 case INDEX_op_shri_vec: 2596 case INDEX_op_sari_vec: 2597 case INDEX_op_ssadd_vec: 2598 case INDEX_op_sssub_vec: 2599 case INDEX_op_usadd_vec: 2600 case INDEX_op_ussub_vec: 2601 case INDEX_op_shlv_vec: 2602 case INDEX_op_bitsel_vec: 2603 return 1; 2604 case INDEX_op_rotli_vec: 2605 case INDEX_op_shrv_vec: 2606 case INDEX_op_sarv_vec: 2607 case INDEX_op_rotlv_vec: 2608 case INDEX_op_rotrv_vec: 2609 return -1; 2610 case INDEX_op_mul_vec: 2611 case INDEX_op_smax_vec: 2612 case INDEX_op_smin_vec: 2613 case INDEX_op_umax_vec: 2614 case INDEX_op_umin_vec: 2615 return vece < MO_64; 2616 2617 default: 2618 return 0; 2619 } 2620} 2621 2622void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 2623 TCGArg a0, ...) 2624{ 2625 va_list va; 2626 TCGv_vec v0, v1, v2, t1, t2, c1; 2627 TCGArg a2; 2628 2629 va_start(va, a0); 2630 v0 = temp_tcgv_vec(arg_temp(a0)); 2631 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 2632 a2 = va_arg(va, TCGArg); 2633 va_end(va); 2634 2635 switch (opc) { 2636 case INDEX_op_rotli_vec: 2637 t1 = tcg_temp_new_vec(type); 2638 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1)); 2639 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece, 2640 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2); 2641 tcg_temp_free_vec(t1); 2642 break; 2643 2644 case INDEX_op_shrv_vec: 2645 case INDEX_op_sarv_vec: 2646 /* Right shifts are negative left shifts for AArch64. */ 2647 v2 = temp_tcgv_vec(arg_temp(a2)); 2648 t1 = tcg_temp_new_vec(type); 2649 tcg_gen_neg_vec(vece, t1, v2); 2650 opc = (opc == INDEX_op_shrv_vec 2651 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec); 2652 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0), 2653 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2654 tcg_temp_free_vec(t1); 2655 break; 2656 2657 case INDEX_op_rotlv_vec: 2658 v2 = temp_tcgv_vec(arg_temp(a2)); 2659 t1 = tcg_temp_new_vec(type); 2660 c1 = tcg_constant_vec(type, vece, 8 << vece); 2661 tcg_gen_sub_vec(vece, t1, v2, c1); 2662 /* Right shifts are negative left shifts for AArch64. */ 2663 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2664 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2665 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0), 2666 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 2667 tcg_gen_or_vec(vece, v0, v0, t1); 2668 tcg_temp_free_vec(t1); 2669 break; 2670 2671 case INDEX_op_rotrv_vec: 2672 v2 = temp_tcgv_vec(arg_temp(a2)); 2673 t1 = tcg_temp_new_vec(type); 2674 t2 = tcg_temp_new_vec(type); 2675 c1 = tcg_constant_vec(type, vece, 8 << vece); 2676 tcg_gen_neg_vec(vece, t1, v2); 2677 tcg_gen_sub_vec(vece, t2, c1, v2); 2678 /* Right shifts are negative left shifts for AArch64. */ 2679 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2680 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2681 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2), 2682 tcgv_vec_arg(v1), tcgv_vec_arg(t2)); 2683 tcg_gen_or_vec(vece, v0, t1, t2); 2684 tcg_temp_free_vec(t1); 2685 tcg_temp_free_vec(t2); 2686 break; 2687 2688 default: 2689 g_assert_not_reached(); 2690 } 2691} 2692 2693static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 2694{ 2695 switch (op) { 2696 case INDEX_op_goto_ptr: 2697 return C_O0_I1(r); 2698 2699 case INDEX_op_ld8u_i32: 2700 case INDEX_op_ld8s_i32: 2701 case INDEX_op_ld16u_i32: 2702 case INDEX_op_ld16s_i32: 2703 case INDEX_op_ld_i32: 2704 case INDEX_op_ld8u_i64: 2705 case INDEX_op_ld8s_i64: 2706 case INDEX_op_ld16u_i64: 2707 case INDEX_op_ld16s_i64: 2708 case INDEX_op_ld32u_i64: 2709 case INDEX_op_ld32s_i64: 2710 case INDEX_op_ld_i64: 2711 case INDEX_op_neg_i32: 2712 case INDEX_op_neg_i64: 2713 case INDEX_op_not_i32: 2714 case INDEX_op_not_i64: 2715 case INDEX_op_bswap16_i32: 2716 case INDEX_op_bswap32_i32: 2717 case INDEX_op_bswap16_i64: 2718 case INDEX_op_bswap32_i64: 2719 case INDEX_op_bswap64_i64: 2720 case INDEX_op_ext8s_i32: 2721 case INDEX_op_ext16s_i32: 2722 case INDEX_op_ext8u_i32: 2723 case INDEX_op_ext16u_i32: 2724 case INDEX_op_ext8s_i64: 2725 case INDEX_op_ext16s_i64: 2726 case INDEX_op_ext32s_i64: 2727 case INDEX_op_ext8u_i64: 2728 case INDEX_op_ext16u_i64: 2729 case INDEX_op_ext32u_i64: 2730 case INDEX_op_ext_i32_i64: 2731 case INDEX_op_extu_i32_i64: 2732 case INDEX_op_extract_i32: 2733 case INDEX_op_extract_i64: 2734 case INDEX_op_sextract_i32: 2735 case INDEX_op_sextract_i64: 2736 return C_O1_I1(r, r); 2737 2738 case INDEX_op_st8_i32: 2739 case INDEX_op_st16_i32: 2740 case INDEX_op_st_i32: 2741 case INDEX_op_st8_i64: 2742 case INDEX_op_st16_i64: 2743 case INDEX_op_st32_i64: 2744 case INDEX_op_st_i64: 2745 return C_O0_I2(rZ, r); 2746 2747 case INDEX_op_add_i32: 2748 case INDEX_op_add_i64: 2749 case INDEX_op_sub_i32: 2750 case INDEX_op_sub_i64: 2751 case INDEX_op_setcond_i32: 2752 case INDEX_op_setcond_i64: 2753 return C_O1_I2(r, r, rA); 2754 2755 case INDEX_op_mul_i32: 2756 case INDEX_op_mul_i64: 2757 case INDEX_op_div_i32: 2758 case INDEX_op_div_i64: 2759 case INDEX_op_divu_i32: 2760 case INDEX_op_divu_i64: 2761 case INDEX_op_rem_i32: 2762 case INDEX_op_rem_i64: 2763 case INDEX_op_remu_i32: 2764 case INDEX_op_remu_i64: 2765 case INDEX_op_muluh_i64: 2766 case INDEX_op_mulsh_i64: 2767 return C_O1_I2(r, r, r); 2768 2769 case INDEX_op_and_i32: 2770 case INDEX_op_and_i64: 2771 case INDEX_op_or_i32: 2772 case INDEX_op_or_i64: 2773 case INDEX_op_xor_i32: 2774 case INDEX_op_xor_i64: 2775 case INDEX_op_andc_i32: 2776 case INDEX_op_andc_i64: 2777 case INDEX_op_orc_i32: 2778 case INDEX_op_orc_i64: 2779 case INDEX_op_eqv_i32: 2780 case INDEX_op_eqv_i64: 2781 return C_O1_I2(r, r, rL); 2782 2783 case INDEX_op_shl_i32: 2784 case INDEX_op_shr_i32: 2785 case INDEX_op_sar_i32: 2786 case INDEX_op_rotl_i32: 2787 case INDEX_op_rotr_i32: 2788 case INDEX_op_shl_i64: 2789 case INDEX_op_shr_i64: 2790 case INDEX_op_sar_i64: 2791 case INDEX_op_rotl_i64: 2792 case INDEX_op_rotr_i64: 2793 return C_O1_I2(r, r, ri); 2794 2795 case INDEX_op_clz_i32: 2796 case INDEX_op_ctz_i32: 2797 case INDEX_op_clz_i64: 2798 case INDEX_op_ctz_i64: 2799 return C_O1_I2(r, r, rAL); 2800 2801 case INDEX_op_brcond_i32: 2802 case INDEX_op_brcond_i64: 2803 return C_O0_I2(r, rA); 2804 2805 case INDEX_op_movcond_i32: 2806 case INDEX_op_movcond_i64: 2807 return C_O1_I4(r, r, rA, rZ, rZ); 2808 2809 case INDEX_op_qemu_ld_i32: 2810 case INDEX_op_qemu_ld_i64: 2811 return C_O1_I1(r, l); 2812 case INDEX_op_qemu_st_i32: 2813 case INDEX_op_qemu_st_i64: 2814 return C_O0_I2(lZ, l); 2815 2816 case INDEX_op_deposit_i32: 2817 case INDEX_op_deposit_i64: 2818 return C_O1_I2(r, 0, rZ); 2819 2820 case INDEX_op_extract2_i32: 2821 case INDEX_op_extract2_i64: 2822 return C_O1_I2(r, rZ, rZ); 2823 2824 case INDEX_op_add2_i32: 2825 case INDEX_op_add2_i64: 2826 case INDEX_op_sub2_i32: 2827 case INDEX_op_sub2_i64: 2828 return C_O2_I4(r, r, rZ, rZ, rA, rMZ); 2829 2830 case INDEX_op_add_vec: 2831 case INDEX_op_sub_vec: 2832 case INDEX_op_mul_vec: 2833 case INDEX_op_xor_vec: 2834 case INDEX_op_ssadd_vec: 2835 case INDEX_op_sssub_vec: 2836 case INDEX_op_usadd_vec: 2837 case INDEX_op_ussub_vec: 2838 case INDEX_op_smax_vec: 2839 case INDEX_op_smin_vec: 2840 case INDEX_op_umax_vec: 2841 case INDEX_op_umin_vec: 2842 case INDEX_op_shlv_vec: 2843 case INDEX_op_shrv_vec: 2844 case INDEX_op_sarv_vec: 2845 case INDEX_op_aa64_sshl_vec: 2846 return C_O1_I2(w, w, w); 2847 case INDEX_op_not_vec: 2848 case INDEX_op_neg_vec: 2849 case INDEX_op_abs_vec: 2850 case INDEX_op_shli_vec: 2851 case INDEX_op_shri_vec: 2852 case INDEX_op_sari_vec: 2853 return C_O1_I1(w, w); 2854 case INDEX_op_ld_vec: 2855 case INDEX_op_dupm_vec: 2856 return C_O1_I1(w, r); 2857 case INDEX_op_st_vec: 2858 return C_O0_I2(w, r); 2859 case INDEX_op_dup_vec: 2860 return C_O1_I1(w, wr); 2861 case INDEX_op_or_vec: 2862 case INDEX_op_andc_vec: 2863 return C_O1_I2(w, w, wO); 2864 case INDEX_op_and_vec: 2865 case INDEX_op_orc_vec: 2866 return C_O1_I2(w, w, wN); 2867 case INDEX_op_cmp_vec: 2868 return C_O1_I2(w, w, wZ); 2869 case INDEX_op_bitsel_vec: 2870 return C_O1_I3(w, w, w, w); 2871 case INDEX_op_aa64_sli_vec: 2872 return C_O1_I2(w, 0, w); 2873 2874 default: 2875 g_assert_not_reached(); 2876 } 2877} 2878 2879#ifdef CONFIG_DARWIN 2880static bool sysctl_for_bool(const char *name) 2881{ 2882 int val = 0; 2883 size_t len = sizeof(val); 2884 2885 if (sysctlbyname(name, &val, &len, NULL, 0) == 0) { 2886 return val != 0; 2887 } 2888 2889 /* 2890 * We might in the future ask for properties not present in older kernels, 2891 * but we're only asking about static properties, all of which should be 2892 * 'int'. So we shouln't see ENOMEM (val too small), or any of the other 2893 * more exotic errors. 2894 */ 2895 assert(errno == ENOENT); 2896 return false; 2897} 2898#endif 2899 2900static void tcg_target_init(TCGContext *s) 2901{ 2902#ifdef __linux__ 2903 unsigned long hwcap = qemu_getauxval(AT_HWCAP); 2904 have_lse = hwcap & HWCAP_ATOMICS; 2905 have_lse2 = hwcap & HWCAP_USCAT; 2906#endif 2907#ifdef CONFIG_DARWIN 2908 have_lse = sysctl_for_bool("hw.optional.arm.FEAT_LSE"); 2909 have_lse2 = sysctl_for_bool("hw.optional.arm.FEAT_LSE2"); 2910#endif 2911 2912 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu; 2913 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu; 2914 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 2915 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 2916 2917 tcg_target_call_clobber_regs = -1ull; 2918 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19); 2919 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20); 2920 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21); 2921 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22); 2922 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23); 2923 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24); 2924 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25); 2925 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26); 2926 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27); 2927 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28); 2928 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29); 2929 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 2930 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 2931 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 2932 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 2933 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 2934 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 2935 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 2936 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 2937 2938 s->reserved_regs = 0; 2939 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 2940 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); 2941 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); 2942 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ 2943 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP); 2944} 2945 2946/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ 2947#define PUSH_SIZE ((30 - 19 + 1) * 8) 2948 2949#define FRAME_SIZE \ 2950 ((PUSH_SIZE \ 2951 + TCG_STATIC_CALL_ARGS_SIZE \ 2952 + CPU_TEMP_BUF_NLONGS * sizeof(long) \ 2953 + TCG_TARGET_STACK_ALIGN - 1) \ 2954 & ~(TCG_TARGET_STACK_ALIGN - 1)) 2955 2956/* We're expecting a 2 byte uleb128 encoded value. */ 2957QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 2958 2959/* We're expecting to use a single ADDI insn. */ 2960QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); 2961 2962static void tcg_target_qemu_prologue(TCGContext *s) 2963{ 2964 TCGReg r; 2965 2966 /* Push (FP, LR) and allocate space for all saved registers. */ 2967 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, 2968 TCG_REG_SP, -PUSH_SIZE, 1, 1); 2969 2970 /* Set up frame pointer for canonical unwinding. */ 2971 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); 2972 2973 /* Store callee-preserved regs x19..x28. */ 2974 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 2975 int ofs = (r - TCG_REG_X19 + 2) * 8; 2976 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 2977 } 2978 2979 /* Make stack space for TCG locals. */ 2980 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 2981 FRAME_SIZE - PUSH_SIZE); 2982 2983 /* Inform TCG about how to find TCG locals with register, offset, size. */ 2984 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, 2985 CPU_TEMP_BUF_NLONGS * sizeof(long)); 2986 2987#if !defined(CONFIG_SOFTMMU) 2988 if (USE_GUEST_BASE) { 2989 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); 2990 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); 2991 } 2992#endif 2993 2994 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 2995 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); 2996 2997 /* 2998 * Return path for goto_ptr. Set return value to 0, a-la exit_tb, 2999 * and fall through to the rest of the epilogue. 3000 */ 3001 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 3002 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0); 3003 3004 /* TB epilogue */ 3005 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); 3006 3007 /* Remove TCG locals stack space. */ 3008 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 3009 FRAME_SIZE - PUSH_SIZE); 3010 3011 /* Restore registers x19..x28. */ 3012 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 3013 int ofs = (r - TCG_REG_X19 + 2) * 8; 3014 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 3015 } 3016 3017 /* Pop (FP, LR), restore SP to previous frame. */ 3018 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, 3019 TCG_REG_SP, PUSH_SIZE, 0, 1); 3020 tcg_out_insn(s, 3207, RET, TCG_REG_LR); 3021} 3022 3023static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 3024{ 3025 int i; 3026 for (i = 0; i < count; ++i) { 3027 p[i] = NOP; 3028 } 3029} 3030 3031typedef struct { 3032 DebugFrameHeader h; 3033 uint8_t fde_def_cfa[4]; 3034 uint8_t fde_reg_ofs[24]; 3035} DebugFrame; 3036 3037#define ELF_HOST_MACHINE EM_AARCH64 3038 3039static const DebugFrame debug_frame = { 3040 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 3041 .h.cie.id = -1, 3042 .h.cie.version = 1, 3043 .h.cie.code_align = 1, 3044 .h.cie.data_align = 0x78, /* sleb128 -8 */ 3045 .h.cie.return_column = TCG_REG_LR, 3046 3047 /* Total FDE size does not include the "len" member. */ 3048 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 3049 3050 .fde_def_cfa = { 3051 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 3052 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 3053 (FRAME_SIZE >> 7) 3054 }, 3055 .fde_reg_ofs = { 3056 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ 3057 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ 3058 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ 3059 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ 3060 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ 3061 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ 3062 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ 3063 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ 3064 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ 3065 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ 3066 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ 3067 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ 3068 } 3069}; 3070 3071void tcg_register_jit(const void *buf, size_t buf_size) 3072{ 3073 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 3074} 3075