1/* 2 * Initial TCG Implementation for aarch64 3 * 4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH 5 * Written by Claudio Fontana 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or 8 * (at your option) any later version. 9 * 10 * See the COPYING file in the top-level directory for details. 11 */ 12 13#include "../tcg-ldst.c.inc" 14#include "../tcg-pool.c.inc" 15#include "qemu/bitops.h" 16 17/* We're going to re-use TCGType in setting of the SF bit, which controls 18 the size of the operation performed. If we know the values match, it 19 makes things much cleaner. */ 20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); 21 22#ifdef CONFIG_DEBUG_TCG 23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 24 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 25 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 26 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 27 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp", 28 29 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 30 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 31 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 32 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31", 33}; 34#endif /* CONFIG_DEBUG_TCG */ 35 36static const int tcg_target_reg_alloc_order[] = { 37 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, 38 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, 39 TCG_REG_X28, /* we will reserve this for guest_base if configured */ 40 41 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, 42 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, 43 TCG_REG_X16, TCG_REG_X17, 44 45 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 46 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, 47 48 /* X18 reserved by system */ 49 /* X19 reserved for AREG0 */ 50 /* X29 reserved as fp */ 51 /* X30 reserved as temporary */ 52 53 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, 54 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, 55 /* V8 - V15 are call-saved, and skipped. */ 56 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, 57 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, 58 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, 59 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, 60}; 61 62static const int tcg_target_call_iarg_regs[8] = { 63 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 64 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 65}; 66 67static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) 68{ 69 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); 70 tcg_debug_assert(slot >= 0 && slot <= 1); 71 return TCG_REG_X0 + slot; 72} 73 74#define TCG_REG_TMP TCG_REG_X30 75#define TCG_VEC_TMP TCG_REG_V31 76 77#ifndef CONFIG_SOFTMMU 78/* Note that XZR cannot be encoded in the address base register slot, 79 as that actaully encodes SP. So if we need to zero-extend the guest 80 address, via the address index register slot, we need to load even 81 a zero guest base into a register. */ 82#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32) 83#define TCG_REG_GUEST_BASE TCG_REG_X28 84#endif 85 86static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 87{ 88 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 89 ptrdiff_t offset = target - src_rx; 90 91 if (offset == sextract64(offset, 0, 26)) { 92 /* read instruction, mask away previous PC_REL26 parameter contents, 93 set the proper offset, then write back the instruction. */ 94 *src_rw = deposit32(*src_rw, 0, 26, offset); 95 return true; 96 } 97 return false; 98} 99 100static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 101{ 102 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 103 ptrdiff_t offset = target - src_rx; 104 105 if (offset == sextract64(offset, 0, 19)) { 106 *src_rw = deposit32(*src_rw, 5, 19, offset); 107 return true; 108 } 109 return false; 110} 111 112static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 113 intptr_t value, intptr_t addend) 114{ 115 tcg_debug_assert(addend == 0); 116 switch (type) { 117 case R_AARCH64_JUMP26: 118 case R_AARCH64_CALL26: 119 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value); 120 case R_AARCH64_CONDBR19: 121 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value); 122 default: 123 g_assert_not_reached(); 124 } 125} 126 127#define TCG_CT_CONST_AIMM 0x100 128#define TCG_CT_CONST_LIMM 0x200 129#define TCG_CT_CONST_ZERO 0x400 130#define TCG_CT_CONST_MONE 0x800 131#define TCG_CT_CONST_ORRI 0x1000 132#define TCG_CT_CONST_ANDI 0x2000 133 134#define ALL_GENERAL_REGS 0xffffffffu 135#define ALL_VECTOR_REGS 0xffffffff00000000ull 136 137#ifdef CONFIG_SOFTMMU 138#define ALL_QLDST_REGS \ 139 (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \ 140 (1 << TCG_REG_X2) | (1 << TCG_REG_X3))) 141#else 142#define ALL_QLDST_REGS ALL_GENERAL_REGS 143#endif 144 145/* Match a constant valid for addition (12-bit, optionally shifted). */ 146static inline bool is_aimm(uint64_t val) 147{ 148 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; 149} 150 151/* Match a constant valid for logical operations. */ 152static inline bool is_limm(uint64_t val) 153{ 154 /* Taking a simplified view of the logical immediates for now, ignoring 155 the replication that can happen across the field. Match bit patterns 156 of the forms 157 0....01....1 158 0..01..10..0 159 and their inverses. */ 160 161 /* Make things easier below, by testing the form with msb clear. */ 162 if ((int64_t)val < 0) { 163 val = ~val; 164 } 165 if (val == 0) { 166 return false; 167 } 168 val += val & -val; 169 return (val & (val - 1)) == 0; 170} 171 172/* Return true if v16 is a valid 16-bit shifted immediate. */ 173static bool is_shimm16(uint16_t v16, int *cmode, int *imm8) 174{ 175 if (v16 == (v16 & 0xff)) { 176 *cmode = 0x8; 177 *imm8 = v16 & 0xff; 178 return true; 179 } else if (v16 == (v16 & 0xff00)) { 180 *cmode = 0xa; 181 *imm8 = v16 >> 8; 182 return true; 183 } 184 return false; 185} 186 187/* Return true if v32 is a valid 32-bit shifted immediate. */ 188static bool is_shimm32(uint32_t v32, int *cmode, int *imm8) 189{ 190 if (v32 == (v32 & 0xff)) { 191 *cmode = 0x0; 192 *imm8 = v32 & 0xff; 193 return true; 194 } else if (v32 == (v32 & 0xff00)) { 195 *cmode = 0x2; 196 *imm8 = (v32 >> 8) & 0xff; 197 return true; 198 } else if (v32 == (v32 & 0xff0000)) { 199 *cmode = 0x4; 200 *imm8 = (v32 >> 16) & 0xff; 201 return true; 202 } else if (v32 == (v32 & 0xff000000)) { 203 *cmode = 0x6; 204 *imm8 = v32 >> 24; 205 return true; 206 } 207 return false; 208} 209 210/* Return true if v32 is a valid 32-bit shifting ones immediate. */ 211static bool is_soimm32(uint32_t v32, int *cmode, int *imm8) 212{ 213 if ((v32 & 0xffff00ff) == 0xff) { 214 *cmode = 0xc; 215 *imm8 = (v32 >> 8) & 0xff; 216 return true; 217 } else if ((v32 & 0xff00ffff) == 0xffff) { 218 *cmode = 0xd; 219 *imm8 = (v32 >> 16) & 0xff; 220 return true; 221 } 222 return false; 223} 224 225/* Return true if v32 is a valid float32 immediate. */ 226static bool is_fimm32(uint32_t v32, int *cmode, int *imm8) 227{ 228 if (extract32(v32, 0, 19) == 0 229 && (extract32(v32, 25, 6) == 0x20 230 || extract32(v32, 25, 6) == 0x1f)) { 231 *cmode = 0xf; 232 *imm8 = (extract32(v32, 31, 1) << 7) 233 | (extract32(v32, 25, 1) << 6) 234 | extract32(v32, 19, 6); 235 return true; 236 } 237 return false; 238} 239 240/* Return true if v64 is a valid float64 immediate. */ 241static bool is_fimm64(uint64_t v64, int *cmode, int *imm8) 242{ 243 if (extract64(v64, 0, 48) == 0 244 && (extract64(v64, 54, 9) == 0x100 245 || extract64(v64, 54, 9) == 0x0ff)) { 246 *cmode = 0xf; 247 *imm8 = (extract64(v64, 63, 1) << 7) 248 | (extract64(v64, 54, 1) << 6) 249 | extract64(v64, 48, 6); 250 return true; 251 } 252 return false; 253} 254 255/* 256 * Return non-zero if v32 can be formed by MOVI+ORR. 257 * Place the parameters for MOVI in (cmode, imm8). 258 * Return the cmode for ORR; the imm8 can be had via extraction from v32. 259 */ 260static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8) 261{ 262 int i; 263 264 for (i = 6; i > 0; i -= 2) { 265 /* Mask out one byte we can add with ORR. */ 266 uint32_t tmp = v32 & ~(0xffu << (i * 4)); 267 if (is_shimm32(tmp, cmode, imm8) || 268 is_soimm32(tmp, cmode, imm8)) { 269 break; 270 } 271 } 272 return i; 273} 274 275/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */ 276static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8) 277{ 278 if (v32 == deposit32(v32, 16, 16, v32)) { 279 return is_shimm16(v32, cmode, imm8); 280 } else { 281 return is_shimm32(v32, cmode, imm8); 282 } 283} 284 285static bool tcg_target_const_match(int64_t val, TCGType type, int ct) 286{ 287 if (ct & TCG_CT_CONST) { 288 return 1; 289 } 290 if (type == TCG_TYPE_I32) { 291 val = (int32_t)val; 292 } 293 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { 294 return 1; 295 } 296 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { 297 return 1; 298 } 299 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 300 return 1; 301 } 302 if ((ct & TCG_CT_CONST_MONE) && val == -1) { 303 return 1; 304 } 305 306 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) { 307 case 0: 308 break; 309 case TCG_CT_CONST_ANDI: 310 val = ~val; 311 /* fallthru */ 312 case TCG_CT_CONST_ORRI: 313 if (val == deposit64(val, 32, 32, val)) { 314 int cmode, imm8; 315 return is_shimm1632(val, &cmode, &imm8); 316 } 317 break; 318 default: 319 /* Both bits should not be set for the same insn. */ 320 g_assert_not_reached(); 321 } 322 323 return 0; 324} 325 326enum aarch64_cond_code { 327 COND_EQ = 0x0, 328 COND_NE = 0x1, 329 COND_CS = 0x2, /* Unsigned greater or equal */ 330 COND_HS = COND_CS, /* ALIAS greater or equal */ 331 COND_CC = 0x3, /* Unsigned less than */ 332 COND_LO = COND_CC, /* ALIAS Lower */ 333 COND_MI = 0x4, /* Negative */ 334 COND_PL = 0x5, /* Zero or greater */ 335 COND_VS = 0x6, /* Overflow */ 336 COND_VC = 0x7, /* No overflow */ 337 COND_HI = 0x8, /* Unsigned greater than */ 338 COND_LS = 0x9, /* Unsigned less or equal */ 339 COND_GE = 0xa, 340 COND_LT = 0xb, 341 COND_GT = 0xc, 342 COND_LE = 0xd, 343 COND_AL = 0xe, 344 COND_NV = 0xf, /* behaves like COND_AL here */ 345}; 346 347static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { 348 [TCG_COND_EQ] = COND_EQ, 349 [TCG_COND_NE] = COND_NE, 350 [TCG_COND_LT] = COND_LT, 351 [TCG_COND_GE] = COND_GE, 352 [TCG_COND_LE] = COND_LE, 353 [TCG_COND_GT] = COND_GT, 354 /* unsigned */ 355 [TCG_COND_LTU] = COND_LO, 356 [TCG_COND_GTU] = COND_HI, 357 [TCG_COND_GEU] = COND_HS, 358 [TCG_COND_LEU] = COND_LS, 359}; 360 361typedef enum { 362 LDST_ST = 0, /* store */ 363 LDST_LD = 1, /* load */ 364 LDST_LD_S_X = 2, /* load and sign-extend into Xt */ 365 LDST_LD_S_W = 3, /* load and sign-extend into Wt */ 366} AArch64LdstType; 367 368/* We encode the format of the insn into the beginning of the name, so that 369 we can have the preprocessor help "typecheck" the insn vs the output 370 function. Arm didn't provide us with nice names for the formats, so we 371 use the section number of the architecture reference manual in which the 372 instruction group is described. */ 373typedef enum { 374 /* Compare and branch (immediate). */ 375 I3201_CBZ = 0x34000000, 376 I3201_CBNZ = 0x35000000, 377 378 /* Conditional branch (immediate). */ 379 I3202_B_C = 0x54000000, 380 381 /* Unconditional branch (immediate). */ 382 I3206_B = 0x14000000, 383 I3206_BL = 0x94000000, 384 385 /* Unconditional branch (register). */ 386 I3207_BR = 0xd61f0000, 387 I3207_BLR = 0xd63f0000, 388 I3207_RET = 0xd65f0000, 389 390 /* AdvSIMD load/store single structure. */ 391 I3303_LD1R = 0x0d40c000, 392 393 /* Load literal for loading the address at pc-relative offset */ 394 I3305_LDR = 0x58000000, 395 I3305_LDR_v64 = 0x5c000000, 396 I3305_LDR_v128 = 0x9c000000, 397 398 /* Load/store register. Described here as 3.3.12, but the helper 399 that emits them can transform to 3.3.10 or 3.3.13. */ 400 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, 401 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, 402 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, 403 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, 404 405 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, 406 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, 407 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, 408 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, 409 410 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, 411 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, 412 413 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, 414 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, 415 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, 416 417 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30, 418 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30, 419 420 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30, 421 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30, 422 423 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30, 424 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30, 425 426 I3312_TO_I3310 = 0x00200800, 427 I3312_TO_I3313 = 0x01000000, 428 429 /* Load/store register pair instructions. */ 430 I3314_LDP = 0x28400000, 431 I3314_STP = 0x28000000, 432 433 /* Add/subtract immediate instructions. */ 434 I3401_ADDI = 0x11000000, 435 I3401_ADDSI = 0x31000000, 436 I3401_SUBI = 0x51000000, 437 I3401_SUBSI = 0x71000000, 438 439 /* Bitfield instructions. */ 440 I3402_BFM = 0x33000000, 441 I3402_SBFM = 0x13000000, 442 I3402_UBFM = 0x53000000, 443 444 /* Extract instruction. */ 445 I3403_EXTR = 0x13800000, 446 447 /* Logical immediate instructions. */ 448 I3404_ANDI = 0x12000000, 449 I3404_ORRI = 0x32000000, 450 I3404_EORI = 0x52000000, 451 I3404_ANDSI = 0x72000000, 452 453 /* Move wide immediate instructions. */ 454 I3405_MOVN = 0x12800000, 455 I3405_MOVZ = 0x52800000, 456 I3405_MOVK = 0x72800000, 457 458 /* PC relative addressing instructions. */ 459 I3406_ADR = 0x10000000, 460 I3406_ADRP = 0x90000000, 461 462 /* Add/subtract shifted register instructions (without a shift). */ 463 I3502_ADD = 0x0b000000, 464 I3502_ADDS = 0x2b000000, 465 I3502_SUB = 0x4b000000, 466 I3502_SUBS = 0x6b000000, 467 468 /* Add/subtract shifted register instructions (with a shift). */ 469 I3502S_ADD_LSL = I3502_ADD, 470 471 /* Add/subtract with carry instructions. */ 472 I3503_ADC = 0x1a000000, 473 I3503_SBC = 0x5a000000, 474 475 /* Conditional select instructions. */ 476 I3506_CSEL = 0x1a800000, 477 I3506_CSINC = 0x1a800400, 478 I3506_CSINV = 0x5a800000, 479 I3506_CSNEG = 0x5a800400, 480 481 /* Data-processing (1 source) instructions. */ 482 I3507_CLZ = 0x5ac01000, 483 I3507_RBIT = 0x5ac00000, 484 I3507_REV = 0x5ac00000, /* + size << 10 */ 485 486 /* Data-processing (2 source) instructions. */ 487 I3508_LSLV = 0x1ac02000, 488 I3508_LSRV = 0x1ac02400, 489 I3508_ASRV = 0x1ac02800, 490 I3508_RORV = 0x1ac02c00, 491 I3508_SMULH = 0x9b407c00, 492 I3508_UMULH = 0x9bc07c00, 493 I3508_UDIV = 0x1ac00800, 494 I3508_SDIV = 0x1ac00c00, 495 496 /* Data-processing (3 source) instructions. */ 497 I3509_MADD = 0x1b000000, 498 I3509_MSUB = 0x1b008000, 499 500 /* Logical shifted register instructions (without a shift). */ 501 I3510_AND = 0x0a000000, 502 I3510_BIC = 0x0a200000, 503 I3510_ORR = 0x2a000000, 504 I3510_ORN = 0x2a200000, 505 I3510_EOR = 0x4a000000, 506 I3510_EON = 0x4a200000, 507 I3510_ANDS = 0x6a000000, 508 509 /* Logical shifted register instructions (with a shift). */ 510 I3502S_AND_LSR = I3510_AND | (1 << 22), 511 512 /* AdvSIMD copy */ 513 I3605_DUP = 0x0e000400, 514 I3605_INS = 0x4e001c00, 515 I3605_UMOV = 0x0e003c00, 516 517 /* AdvSIMD modified immediate */ 518 I3606_MOVI = 0x0f000400, 519 I3606_MVNI = 0x2f000400, 520 I3606_BIC = 0x2f001400, 521 I3606_ORR = 0x0f001400, 522 523 /* AdvSIMD scalar shift by immediate */ 524 I3609_SSHR = 0x5f000400, 525 I3609_SSRA = 0x5f001400, 526 I3609_SHL = 0x5f005400, 527 I3609_USHR = 0x7f000400, 528 I3609_USRA = 0x7f001400, 529 I3609_SLI = 0x7f005400, 530 531 /* AdvSIMD scalar three same */ 532 I3611_SQADD = 0x5e200c00, 533 I3611_SQSUB = 0x5e202c00, 534 I3611_CMGT = 0x5e203400, 535 I3611_CMGE = 0x5e203c00, 536 I3611_SSHL = 0x5e204400, 537 I3611_ADD = 0x5e208400, 538 I3611_CMTST = 0x5e208c00, 539 I3611_UQADD = 0x7e200c00, 540 I3611_UQSUB = 0x7e202c00, 541 I3611_CMHI = 0x7e203400, 542 I3611_CMHS = 0x7e203c00, 543 I3611_USHL = 0x7e204400, 544 I3611_SUB = 0x7e208400, 545 I3611_CMEQ = 0x7e208c00, 546 547 /* AdvSIMD scalar two-reg misc */ 548 I3612_CMGT0 = 0x5e208800, 549 I3612_CMEQ0 = 0x5e209800, 550 I3612_CMLT0 = 0x5e20a800, 551 I3612_ABS = 0x5e20b800, 552 I3612_CMGE0 = 0x7e208800, 553 I3612_CMLE0 = 0x7e209800, 554 I3612_NEG = 0x7e20b800, 555 556 /* AdvSIMD shift by immediate */ 557 I3614_SSHR = 0x0f000400, 558 I3614_SSRA = 0x0f001400, 559 I3614_SHL = 0x0f005400, 560 I3614_SLI = 0x2f005400, 561 I3614_USHR = 0x2f000400, 562 I3614_USRA = 0x2f001400, 563 564 /* AdvSIMD three same. */ 565 I3616_ADD = 0x0e208400, 566 I3616_AND = 0x0e201c00, 567 I3616_BIC = 0x0e601c00, 568 I3616_BIF = 0x2ee01c00, 569 I3616_BIT = 0x2ea01c00, 570 I3616_BSL = 0x2e601c00, 571 I3616_EOR = 0x2e201c00, 572 I3616_MUL = 0x0e209c00, 573 I3616_ORR = 0x0ea01c00, 574 I3616_ORN = 0x0ee01c00, 575 I3616_SUB = 0x2e208400, 576 I3616_CMGT = 0x0e203400, 577 I3616_CMGE = 0x0e203c00, 578 I3616_CMTST = 0x0e208c00, 579 I3616_CMHI = 0x2e203400, 580 I3616_CMHS = 0x2e203c00, 581 I3616_CMEQ = 0x2e208c00, 582 I3616_SMAX = 0x0e206400, 583 I3616_SMIN = 0x0e206c00, 584 I3616_SSHL = 0x0e204400, 585 I3616_SQADD = 0x0e200c00, 586 I3616_SQSUB = 0x0e202c00, 587 I3616_UMAX = 0x2e206400, 588 I3616_UMIN = 0x2e206c00, 589 I3616_UQADD = 0x2e200c00, 590 I3616_UQSUB = 0x2e202c00, 591 I3616_USHL = 0x2e204400, 592 593 /* AdvSIMD two-reg misc. */ 594 I3617_CMGT0 = 0x0e208800, 595 I3617_CMEQ0 = 0x0e209800, 596 I3617_CMLT0 = 0x0e20a800, 597 I3617_CMGE0 = 0x2e208800, 598 I3617_CMLE0 = 0x2e209800, 599 I3617_NOT = 0x2e205800, 600 I3617_ABS = 0x0e20b800, 601 I3617_NEG = 0x2e20b800, 602 603 /* System instructions. */ 604 NOP = 0xd503201f, 605 DMB_ISH = 0xd50338bf, 606 DMB_LD = 0x00000100, 607 DMB_ST = 0x00000200, 608} AArch64Insn; 609 610static inline uint32_t tcg_in32(TCGContext *s) 611{ 612 uint32_t v = *(uint32_t *)s->code_ptr; 613 return v; 614} 615 616/* Emit an opcode with "type-checking" of the format. */ 617#define tcg_out_insn(S, FMT, OP, ...) \ 618 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) 619 620static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q, 621 TCGReg rt, TCGReg rn, unsigned size) 622{ 623 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30)); 624} 625 626static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, 627 int imm19, TCGReg rt) 628{ 629 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt); 630} 631 632static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, 633 TCGReg rt, int imm19) 634{ 635 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); 636} 637 638static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, 639 TCGCond c, int imm19) 640{ 641 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); 642} 643 644static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) 645{ 646 tcg_out32(s, insn | (imm26 & 0x03ffffff)); 647} 648 649static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) 650{ 651 tcg_out32(s, insn | rn << 5); 652} 653 654static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, 655 TCGReg r1, TCGReg r2, TCGReg rn, 656 tcg_target_long ofs, bool pre, bool w) 657{ 658 insn |= 1u << 31; /* ext */ 659 insn |= pre << 24; 660 insn |= w << 23; 661 662 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); 663 insn |= (ofs & (0x7f << 3)) << (15 - 3); 664 665 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); 666} 667 668static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, 669 TCGReg rd, TCGReg rn, uint64_t aimm) 670{ 671 if (aimm > 0xfff) { 672 tcg_debug_assert((aimm & 0xfff) == 0); 673 aimm >>= 12; 674 tcg_debug_assert(aimm <= 0xfff); 675 aimm |= 1 << 12; /* apply LSL 12 */ 676 } 677 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); 678} 679 680/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 681 (Logical immediate). Both insn groups have N, IMMR and IMMS fields 682 that feed the DecodeBitMasks pseudo function. */ 683static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, 684 TCGReg rd, TCGReg rn, int n, int immr, int imms) 685{ 686 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 687 | rn << 5 | rd); 688} 689 690#define tcg_out_insn_3404 tcg_out_insn_3402 691 692static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, 693 TCGReg rd, TCGReg rn, TCGReg rm, int imms) 694{ 695 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 696 | rn << 5 | rd); 697} 698 699/* This function is used for the Move (wide immediate) instruction group. 700 Note that SHIFT is a full shift count, not the 2 bit HW field. */ 701static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, 702 TCGReg rd, uint16_t half, unsigned shift) 703{ 704 tcg_debug_assert((shift & ~0x30) == 0); 705 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); 706} 707 708static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, 709 TCGReg rd, int64_t disp) 710{ 711 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); 712} 713 714/* This function is for both 3.5.2 (Add/Subtract shifted register), for 715 the rare occasion when we actually want to supply a shift amount. */ 716static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, 717 TCGType ext, TCGReg rd, TCGReg rn, 718 TCGReg rm, int imm6) 719{ 720 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); 721} 722 723/* This function is for 3.5.2 (Add/subtract shifted register), 724 and 3.5.10 (Logical shifted register), for the vast majorty of cases 725 when we don't want to apply a shift. Thus it can also be used for 726 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */ 727static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, 728 TCGReg rd, TCGReg rn, TCGReg rm) 729{ 730 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); 731} 732 733#define tcg_out_insn_3503 tcg_out_insn_3502 734#define tcg_out_insn_3508 tcg_out_insn_3502 735#define tcg_out_insn_3510 tcg_out_insn_3502 736 737static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, 738 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) 739{ 740 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd 741 | tcg_cond_to_aarch64[c] << 12); 742} 743 744static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, 745 TCGReg rd, TCGReg rn) 746{ 747 tcg_out32(s, insn | ext << 31 | rn << 5 | rd); 748} 749 750static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, 751 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) 752{ 753 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); 754} 755 756static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q, 757 TCGReg rd, TCGReg rn, int dst_idx, int src_idx) 758{ 759 /* Note that bit 11 set means general register input. Therefore 760 we can handle both register sets with one function. */ 761 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11) 762 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5); 763} 764 765static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q, 766 TCGReg rd, bool op, int cmode, uint8_t imm8) 767{ 768 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f) 769 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5); 770} 771 772static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn, 773 TCGReg rd, TCGReg rn, unsigned immhb) 774{ 775 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f)); 776} 777 778static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn, 779 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 780{ 781 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16 782 | (rn & 0x1f) << 5 | (rd & 0x1f)); 783} 784 785static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn, 786 unsigned size, TCGReg rd, TCGReg rn) 787{ 788 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f)); 789} 790 791static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q, 792 TCGReg rd, TCGReg rn, unsigned immhb) 793{ 794 tcg_out32(s, insn | q << 30 | immhb << 16 795 | (rn & 0x1f) << 5 | (rd & 0x1f)); 796} 797 798static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q, 799 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 800{ 801 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16 802 | (rn & 0x1f) << 5 | (rd & 0x1f)); 803} 804 805static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q, 806 unsigned size, TCGReg rd, TCGReg rn) 807{ 808 tcg_out32(s, insn | q << 30 | (size << 22) 809 | (rn & 0x1f) << 5 | (rd & 0x1f)); 810} 811 812static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, 813 TCGReg rd, TCGReg base, TCGType ext, 814 TCGReg regoff) 815{ 816 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 817 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | 818 0x4000 | ext << 13 | base << 5 | (rd & 0x1f)); 819} 820 821static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, 822 TCGReg rd, TCGReg rn, intptr_t offset) 823{ 824 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f)); 825} 826 827static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, 828 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) 829{ 830 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 831 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 832 | rn << 5 | (rd & 0x1f)); 833} 834 835/* Register to register move using ORR (shifted register with no shift). */ 836static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) 837{ 838 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); 839} 840 841/* Register to register move using ADDI (move to/from SP). */ 842static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) 843{ 844 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); 845} 846 847/* This function is used for the Logical (immediate) instruction group. 848 The value of LIMM must satisfy IS_LIMM. See the comment above about 849 only supporting simplified logical immediates. */ 850static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, 851 TCGReg rd, TCGReg rn, uint64_t limm) 852{ 853 unsigned h, l, r, c; 854 855 tcg_debug_assert(is_limm(limm)); 856 857 h = clz64(limm); 858 l = ctz64(limm); 859 if (l == 0) { 860 r = 0; /* form 0....01....1 */ 861 c = ctz64(~limm) - 1; 862 if (h == 0) { 863 r = clz64(~limm); /* form 1..10..01..1 */ 864 c += r; 865 } 866 } else { 867 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ 868 c = r - h - 1; 869 } 870 if (ext == TCG_TYPE_I32) { 871 r &= 31; 872 c &= 31; 873 } 874 875 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); 876} 877 878static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 879 TCGReg rd, int64_t v64) 880{ 881 bool q = type == TCG_TYPE_V128; 882 int cmode, imm8, i; 883 884 /* Test all bytes equal first. */ 885 if (vece == MO_8) { 886 imm8 = (uint8_t)v64; 887 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8); 888 return; 889 } 890 891 /* 892 * Test all bytes 0x00 or 0xff second. This can match cases that 893 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below. 894 */ 895 for (i = imm8 = 0; i < 8; i++) { 896 uint8_t byte = v64 >> (i * 8); 897 if (byte == 0xff) { 898 imm8 |= 1 << i; 899 } else if (byte != 0) { 900 goto fail_bytes; 901 } 902 } 903 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8); 904 return; 905 fail_bytes: 906 907 /* 908 * Tests for various replications. For each element width, if we 909 * cannot find an expansion there's no point checking a larger 910 * width because we already know by replication it cannot match. 911 */ 912 if (vece == MO_16) { 913 uint16_t v16 = v64; 914 915 if (is_shimm16(v16, &cmode, &imm8)) { 916 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 917 return; 918 } 919 if (is_shimm16(~v16, &cmode, &imm8)) { 920 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 921 return; 922 } 923 924 /* 925 * Otherwise, all remaining constants can be loaded in two insns: 926 * rd = v16 & 0xff, rd |= v16 & 0xff00. 927 */ 928 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff); 929 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8); 930 return; 931 } else if (vece == MO_32) { 932 uint32_t v32 = v64; 933 uint32_t n32 = ~v32; 934 935 if (is_shimm32(v32, &cmode, &imm8) || 936 is_soimm32(v32, &cmode, &imm8) || 937 is_fimm32(v32, &cmode, &imm8)) { 938 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 939 return; 940 } 941 if (is_shimm32(n32, &cmode, &imm8) || 942 is_soimm32(n32, &cmode, &imm8)) { 943 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 944 return; 945 } 946 947 /* 948 * Restrict the set of constants to those we can load with 949 * two instructions. Others we load from the pool. 950 */ 951 i = is_shimm32_pair(v32, &cmode, &imm8); 952 if (i) { 953 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 954 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8)); 955 return; 956 } 957 i = is_shimm32_pair(n32, &cmode, &imm8); 958 if (i) { 959 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 960 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8)); 961 return; 962 } 963 } else if (is_fimm64(v64, &cmode, &imm8)) { 964 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8); 965 return; 966 } 967 968 /* 969 * As a last resort, load from the constant pool. Sadly there 970 * is no LD1R (literal), so store the full 16-byte vector. 971 */ 972 if (type == TCG_TYPE_V128) { 973 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64); 974 tcg_out_insn(s, 3305, LDR_v128, 0, rd); 975 } else { 976 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0); 977 tcg_out_insn(s, 3305, LDR_v64, 0, rd); 978 } 979} 980 981static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 982 TCGReg rd, TCGReg rs) 983{ 984 int is_q = type - TCG_TYPE_V64; 985 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0); 986 return true; 987} 988 989static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 990 TCGReg r, TCGReg base, intptr_t offset) 991{ 992 TCGReg temp = TCG_REG_TMP; 993 994 if (offset < -0xffffff || offset > 0xffffff) { 995 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset); 996 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base); 997 base = temp; 998 } else { 999 AArch64Insn add_insn = I3401_ADDI; 1000 1001 if (offset < 0) { 1002 add_insn = I3401_SUBI; 1003 offset = -offset; 1004 } 1005 if (offset & 0xfff000) { 1006 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000); 1007 base = temp; 1008 } 1009 if (offset & 0xfff) { 1010 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff); 1011 base = temp; 1012 } 1013 } 1014 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece); 1015 return true; 1016} 1017 1018static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 1019 tcg_target_long value) 1020{ 1021 tcg_target_long svalue = value; 1022 tcg_target_long ivalue = ~value; 1023 tcg_target_long t0, t1, t2; 1024 int s0, s1; 1025 AArch64Insn opc; 1026 1027 switch (type) { 1028 case TCG_TYPE_I32: 1029 case TCG_TYPE_I64: 1030 tcg_debug_assert(rd < 32); 1031 break; 1032 default: 1033 g_assert_not_reached(); 1034 } 1035 1036 /* For 32-bit values, discard potential garbage in value. For 64-bit 1037 values within [2**31, 2**32-1], we can create smaller sequences by 1038 interpreting this as a negative 32-bit number, while ensuring that 1039 the high 32 bits are cleared by setting SF=0. */ 1040 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { 1041 svalue = (int32_t)value; 1042 value = (uint32_t)value; 1043 ivalue = (uint32_t)ivalue; 1044 type = TCG_TYPE_I32; 1045 } 1046 1047 /* Speed things up by handling the common case of small positive 1048 and negative values specially. */ 1049 if ((value & ~0xffffull) == 0) { 1050 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); 1051 return; 1052 } else if ((ivalue & ~0xffffull) == 0) { 1053 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); 1054 return; 1055 } 1056 1057 /* Check for bitfield immediates. For the benefit of 32-bit quantities, 1058 use the sign-extended value. That lets us match rotated values such 1059 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ 1060 if (is_limm(svalue)) { 1061 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); 1062 return; 1063 } 1064 1065 /* Look for host pointer values within 4G of the PC. This happens 1066 often when loading pointers to QEMU's own data structures. */ 1067 if (type == TCG_TYPE_I64) { 1068 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr); 1069 tcg_target_long disp = value - src_rx; 1070 if (disp == sextract64(disp, 0, 21)) { 1071 tcg_out_insn(s, 3406, ADR, rd, disp); 1072 return; 1073 } 1074 disp = (value >> 12) - (src_rx >> 12); 1075 if (disp == sextract64(disp, 0, 21)) { 1076 tcg_out_insn(s, 3406, ADRP, rd, disp); 1077 if (value & 0xfff) { 1078 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); 1079 } 1080 return; 1081 } 1082 } 1083 1084 /* Would it take fewer insns to begin with MOVN? */ 1085 if (ctpop64(value) >= 32) { 1086 t0 = ivalue; 1087 opc = I3405_MOVN; 1088 } else { 1089 t0 = value; 1090 opc = I3405_MOVZ; 1091 } 1092 s0 = ctz64(t0) & (63 & -16); 1093 t1 = t0 & ~(0xffffull << s0); 1094 s1 = ctz64(t1) & (63 & -16); 1095 t2 = t1 & ~(0xffffull << s1); 1096 if (t2 == 0) { 1097 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0); 1098 if (t1 != 0) { 1099 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1); 1100 } 1101 return; 1102 } 1103 1104 /* For more than 2 insns, dump it into the constant pool. */ 1105 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0); 1106 tcg_out_insn(s, 3305, LDR, 0, rd); 1107} 1108 1109static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) 1110{ 1111 return false; 1112} 1113 1114static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, 1115 tcg_target_long imm) 1116{ 1117 /* This function is only used for passing structs by reference. */ 1118 g_assert_not_reached(); 1119} 1120 1121/* Define something more legible for general use. */ 1122#define tcg_out_ldst_r tcg_out_insn_3310 1123 1124static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd, 1125 TCGReg rn, intptr_t offset, int lgsize) 1126{ 1127 /* If the offset is naturally aligned and in range, then we can 1128 use the scaled uimm12 encoding */ 1129 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) { 1130 uintptr_t scaled_uimm = offset >> lgsize; 1131 if (scaled_uimm <= 0xfff) { 1132 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); 1133 return; 1134 } 1135 } 1136 1137 /* Small signed offsets can use the unscaled encoding. */ 1138 if (offset >= -256 && offset < 256) { 1139 tcg_out_insn_3312(s, insn, rd, rn, offset); 1140 return; 1141 } 1142 1143 /* Worst-case scenario, move offset to temp register, use reg offset. */ 1144 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset); 1145 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP); 1146} 1147 1148static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 1149{ 1150 if (ret == arg) { 1151 return true; 1152 } 1153 switch (type) { 1154 case TCG_TYPE_I32: 1155 case TCG_TYPE_I64: 1156 if (ret < 32 && arg < 32) { 1157 tcg_out_movr(s, type, ret, arg); 1158 break; 1159 } else if (ret < 32) { 1160 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0); 1161 break; 1162 } else if (arg < 32) { 1163 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0); 1164 break; 1165 } 1166 /* FALLTHRU */ 1167 1168 case TCG_TYPE_V64: 1169 tcg_debug_assert(ret >= 32 && arg >= 32); 1170 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg); 1171 break; 1172 case TCG_TYPE_V128: 1173 tcg_debug_assert(ret >= 32 && arg >= 32); 1174 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg); 1175 break; 1176 1177 default: 1178 g_assert_not_reached(); 1179 } 1180 return true; 1181} 1182 1183static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1184 TCGReg base, intptr_t ofs) 1185{ 1186 AArch64Insn insn; 1187 int lgsz; 1188 1189 switch (type) { 1190 case TCG_TYPE_I32: 1191 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS); 1192 lgsz = 2; 1193 break; 1194 case TCG_TYPE_I64: 1195 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD); 1196 lgsz = 3; 1197 break; 1198 case TCG_TYPE_V64: 1199 insn = I3312_LDRVD; 1200 lgsz = 3; 1201 break; 1202 case TCG_TYPE_V128: 1203 insn = I3312_LDRVQ; 1204 lgsz = 4; 1205 break; 1206 default: 1207 g_assert_not_reached(); 1208 } 1209 tcg_out_ldst(s, insn, ret, base, ofs, lgsz); 1210} 1211 1212static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src, 1213 TCGReg base, intptr_t ofs) 1214{ 1215 AArch64Insn insn; 1216 int lgsz; 1217 1218 switch (type) { 1219 case TCG_TYPE_I32: 1220 insn = (src < 32 ? I3312_STRW : I3312_STRVS); 1221 lgsz = 2; 1222 break; 1223 case TCG_TYPE_I64: 1224 insn = (src < 32 ? I3312_STRX : I3312_STRVD); 1225 lgsz = 3; 1226 break; 1227 case TCG_TYPE_V64: 1228 insn = I3312_STRVD; 1229 lgsz = 3; 1230 break; 1231 case TCG_TYPE_V128: 1232 insn = I3312_STRVQ; 1233 lgsz = 4; 1234 break; 1235 default: 1236 g_assert_not_reached(); 1237 } 1238 tcg_out_ldst(s, insn, src, base, ofs, lgsz); 1239} 1240 1241static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1242 TCGReg base, intptr_t ofs) 1243{ 1244 if (type <= TCG_TYPE_I64 && val == 0) { 1245 tcg_out_st(s, type, TCG_REG_XZR, base, ofs); 1246 return true; 1247 } 1248 return false; 1249} 1250 1251static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, 1252 TCGReg rn, unsigned int a, unsigned int b) 1253{ 1254 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); 1255} 1256 1257static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, 1258 TCGReg rn, unsigned int a, unsigned int b) 1259{ 1260 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); 1261} 1262 1263static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, 1264 TCGReg rn, unsigned int a, unsigned int b) 1265{ 1266 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); 1267} 1268 1269static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, 1270 TCGReg rn, TCGReg rm, unsigned int a) 1271{ 1272 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); 1273} 1274 1275static inline void tcg_out_shl(TCGContext *s, TCGType ext, 1276 TCGReg rd, TCGReg rn, unsigned int m) 1277{ 1278 int bits = ext ? 64 : 32; 1279 int max = bits - 1; 1280 tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max); 1281} 1282 1283static inline void tcg_out_shr(TCGContext *s, TCGType ext, 1284 TCGReg rd, TCGReg rn, unsigned int m) 1285{ 1286 int max = ext ? 63 : 31; 1287 tcg_out_ubfm(s, ext, rd, rn, m & max, max); 1288} 1289 1290static inline void tcg_out_sar(TCGContext *s, TCGType ext, 1291 TCGReg rd, TCGReg rn, unsigned int m) 1292{ 1293 int max = ext ? 63 : 31; 1294 tcg_out_sbfm(s, ext, rd, rn, m & max, max); 1295} 1296 1297static inline void tcg_out_rotr(TCGContext *s, TCGType ext, 1298 TCGReg rd, TCGReg rn, unsigned int m) 1299{ 1300 int max = ext ? 63 : 31; 1301 tcg_out_extr(s, ext, rd, rn, rn, m & max); 1302} 1303 1304static inline void tcg_out_rotl(TCGContext *s, TCGType ext, 1305 TCGReg rd, TCGReg rn, unsigned int m) 1306{ 1307 int max = ext ? 63 : 31; 1308 tcg_out_extr(s, ext, rd, rn, rn, -m & max); 1309} 1310 1311static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, 1312 TCGReg rn, unsigned lsb, unsigned width) 1313{ 1314 unsigned size = ext ? 64 : 32; 1315 unsigned a = (size - lsb) & (size - 1); 1316 unsigned b = width - 1; 1317 tcg_out_bfm(s, ext, rd, rn, a, b); 1318} 1319 1320static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, 1321 tcg_target_long b, bool const_b) 1322{ 1323 if (const_b) { 1324 /* Using CMP or CMN aliases. */ 1325 if (b >= 0) { 1326 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); 1327 } else { 1328 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); 1329 } 1330 } else { 1331 /* Using CMP alias SUBS wzr, Wn, Wm */ 1332 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); 1333 } 1334} 1335 1336static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) 1337{ 1338 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1339 tcg_debug_assert(offset == sextract64(offset, 0, 26)); 1340 tcg_out_insn(s, 3206, B, offset); 1341} 1342 1343static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target) 1344{ 1345 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1346 if (offset == sextract64(offset, 0, 26)) { 1347 tcg_out_insn(s, 3206, B, offset); 1348 } else { 1349 /* Choose X9 as a call-clobbered non-LR temporary. */ 1350 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target); 1351 tcg_out_insn(s, 3207, BR, TCG_REG_X9); 1352 } 1353} 1354 1355static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target) 1356{ 1357 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1358 if (offset == sextract64(offset, 0, 26)) { 1359 tcg_out_insn(s, 3206, BL, offset); 1360 } else { 1361 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); 1362 tcg_out_insn(s, 3207, BLR, TCG_REG_TMP); 1363 } 1364} 1365 1366static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 1367 const TCGHelperInfo *info) 1368{ 1369 tcg_out_call_int(s, target); 1370} 1371 1372static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) 1373{ 1374 if (!l->has_value) { 1375 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); 1376 tcg_out_insn(s, 3206, B, 0); 1377 } else { 1378 tcg_out_goto(s, l->u.value_ptr); 1379 } 1380} 1381 1382static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, 1383 TCGArg b, bool b_const, TCGLabel *l) 1384{ 1385 intptr_t offset; 1386 bool need_cmp; 1387 1388 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) { 1389 need_cmp = false; 1390 } else { 1391 need_cmp = true; 1392 tcg_out_cmp(s, ext, a, b, b_const); 1393 } 1394 1395 if (!l->has_value) { 1396 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1397 offset = tcg_in32(s) >> 5; 1398 } else { 1399 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2; 1400 tcg_debug_assert(offset == sextract64(offset, 0, 19)); 1401 } 1402 1403 if (need_cmp) { 1404 tcg_out_insn(s, 3202, B_C, c, offset); 1405 } else if (c == TCG_COND_EQ) { 1406 tcg_out_insn(s, 3201, CBZ, ext, a, offset); 1407 } else { 1408 tcg_out_insn(s, 3201, CBNZ, ext, a, offset); 1409 } 1410} 1411 1412static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits, 1413 TCGReg rd, TCGReg rn) 1414{ 1415 /* REV, REV16, REV32 */ 1416 tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn); 1417} 1418 1419static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits, 1420 TCGReg rd, TCGReg rn) 1421{ 1422 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ 1423 int bits = (8 << s_bits) - 1; 1424 tcg_out_sbfm(s, ext, rd, rn, 0, bits); 1425} 1426 1427static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn) 1428{ 1429 tcg_out_sxt(s, type, MO_8, rd, rn); 1430} 1431 1432static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn) 1433{ 1434 tcg_out_sxt(s, type, MO_16, rd, rn); 1435} 1436 1437static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn) 1438{ 1439 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn); 1440} 1441 1442static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn) 1443{ 1444 tcg_out_ext32s(s, rd, rn); 1445} 1446 1447static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits, 1448 TCGReg rd, TCGReg rn) 1449{ 1450 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ 1451 int bits = (8 << s_bits) - 1; 1452 tcg_out_ubfm(s, 0, rd, rn, 0, bits); 1453} 1454 1455static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn) 1456{ 1457 tcg_out_uxt(s, MO_8, rd, rn); 1458} 1459 1460static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn) 1461{ 1462 tcg_out_uxt(s, MO_16, rd, rn); 1463} 1464 1465static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn) 1466{ 1467 tcg_out_movr(s, TCG_TYPE_I32, rd, rn); 1468} 1469 1470static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn) 1471{ 1472 tcg_out_ext32u(s, rd, rn); 1473} 1474 1475static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) 1476{ 1477 tcg_out_mov(s, TCG_TYPE_I32, rd, rn); 1478} 1479 1480static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, 1481 TCGReg rn, int64_t aimm) 1482{ 1483 if (aimm >= 0) { 1484 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); 1485 } else { 1486 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); 1487 } 1488} 1489 1490static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, 1491 TCGReg rh, TCGReg al, TCGReg ah, 1492 tcg_target_long bl, tcg_target_long bh, 1493 bool const_bl, bool const_bh, bool sub) 1494{ 1495 TCGReg orig_rl = rl; 1496 AArch64Insn insn; 1497 1498 if (rl == ah || (!const_bh && rl == bh)) { 1499 rl = TCG_REG_TMP; 1500 } 1501 1502 if (const_bl) { 1503 if (bl < 0) { 1504 bl = -bl; 1505 insn = sub ? I3401_ADDSI : I3401_SUBSI; 1506 } else { 1507 insn = sub ? I3401_SUBSI : I3401_ADDSI; 1508 } 1509 1510 if (unlikely(al == TCG_REG_XZR)) { 1511 /* ??? We want to allow al to be zero for the benefit of 1512 negation via subtraction. However, that leaves open the 1513 possibility of adding 0+const in the low part, and the 1514 immediate add instructions encode XSP not XZR. Don't try 1515 anything more elaborate here than loading another zero. */ 1516 al = TCG_REG_TMP; 1517 tcg_out_movi(s, ext, al, 0); 1518 } 1519 tcg_out_insn_3401(s, insn, ext, rl, al, bl); 1520 } else { 1521 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); 1522 } 1523 1524 insn = I3503_ADC; 1525 if (const_bh) { 1526 /* Note that the only two constants we support are 0 and -1, and 1527 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ 1528 if ((bh != 0) ^ sub) { 1529 insn = I3503_SBC; 1530 } 1531 bh = TCG_REG_XZR; 1532 } else if (sub) { 1533 insn = I3503_SBC; 1534 } 1535 tcg_out_insn_3503(s, insn, ext, rh, ah, bh); 1536 1537 tcg_out_mov(s, ext, orig_rl, rl); 1538} 1539 1540static inline void tcg_out_mb(TCGContext *s, TCGArg a0) 1541{ 1542 static const uint32_t sync[] = { 1543 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST, 1544 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST, 1545 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1546 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD, 1547 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1548 }; 1549 tcg_out32(s, sync[a0 & TCG_MO_ALL]); 1550} 1551 1552static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, 1553 TCGReg a0, TCGArg b, bool const_b, bool is_ctz) 1554{ 1555 TCGReg a1 = a0; 1556 if (is_ctz) { 1557 a1 = TCG_REG_TMP; 1558 tcg_out_insn(s, 3507, RBIT, ext, a1, a0); 1559 } 1560 if (const_b && b == (ext ? 64 : 32)) { 1561 tcg_out_insn(s, 3507, CLZ, ext, d, a1); 1562 } else { 1563 AArch64Insn sel = I3506_CSEL; 1564 1565 tcg_out_cmp(s, ext, a0, 0, 1); 1566 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1); 1567 1568 if (const_b) { 1569 if (b == -1) { 1570 b = TCG_REG_XZR; 1571 sel = I3506_CSINV; 1572 } else if (b == 0) { 1573 b = TCG_REG_XZR; 1574 } else { 1575 tcg_out_movi(s, ext, d, b); 1576 b = d; 1577 } 1578 } 1579 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE); 1580 } 1581} 1582 1583static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target) 1584{ 1585 ptrdiff_t offset = tcg_pcrel_diff(s, target); 1586 tcg_debug_assert(offset == sextract64(offset, 0, 21)); 1587 tcg_out_insn(s, 3406, ADR, rd, offset); 1588} 1589 1590typedef struct { 1591 TCGReg base; 1592 TCGReg index; 1593 TCGType index_ext; 1594} HostAddress; 1595 1596#ifdef CONFIG_SOFTMMU 1597/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, 1598 * MemOpIdx oi, uintptr_t ra) 1599 */ 1600static void * const qemu_ld_helpers[MO_SIZE + 1] = { 1601 [MO_8] = helper_ret_ldub_mmu, 1602#if HOST_BIG_ENDIAN 1603 [MO_16] = helper_be_lduw_mmu, 1604 [MO_32] = helper_be_ldul_mmu, 1605 [MO_64] = helper_be_ldq_mmu, 1606#else 1607 [MO_16] = helper_le_lduw_mmu, 1608 [MO_32] = helper_le_ldul_mmu, 1609 [MO_64] = helper_le_ldq_mmu, 1610#endif 1611}; 1612 1613/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, 1614 * uintxx_t val, MemOpIdx oi, 1615 * uintptr_t ra) 1616 */ 1617static void * const qemu_st_helpers[MO_SIZE + 1] = { 1618 [MO_8] = helper_ret_stb_mmu, 1619#if HOST_BIG_ENDIAN 1620 [MO_16] = helper_be_stw_mmu, 1621 [MO_32] = helper_be_stl_mmu, 1622 [MO_64] = helper_be_stq_mmu, 1623#else 1624 [MO_16] = helper_le_stw_mmu, 1625 [MO_32] = helper_le_stl_mmu, 1626 [MO_64] = helper_le_stq_mmu, 1627#endif 1628}; 1629 1630static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1631{ 1632 MemOpIdx oi = lb->oi; 1633 MemOp opc = get_memop(oi); 1634 1635 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1636 return false; 1637 } 1638 1639 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1640 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); 1641 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi); 1642 tcg_out_adr(s, TCG_REG_X3, lb->raddr); 1643 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); 1644 1645 tcg_out_movext(s, lb->type, lb->datalo_reg, 1646 TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_X0); 1647 tcg_out_goto(s, lb->raddr); 1648 return true; 1649} 1650 1651static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1652{ 1653 MemOpIdx oi = lb->oi; 1654 MemOp opc = get_memop(oi); 1655 MemOp size = opc & MO_SIZE; 1656 1657 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1658 return false; 1659 } 1660 1661 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1662 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); 1663 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); 1664 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi); 1665 tcg_out_adr(s, TCG_REG_X4, lb->raddr); 1666 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]); 1667 tcg_out_goto(s, lb->raddr); 1668 return true; 1669} 1670 1671static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi, 1672 TCGType ext, TCGReg data_reg, TCGReg addr_reg, 1673 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) 1674{ 1675 TCGLabelQemuLdst *label = new_ldst_label(s); 1676 1677 label->is_ld = is_ld; 1678 label->oi = oi; 1679 label->type = ext; 1680 label->datalo_reg = data_reg; 1681 label->addrlo_reg = addr_reg; 1682 label->raddr = tcg_splitwx_to_rx(raddr); 1683 label->label_ptr[0] = label_ptr; 1684} 1685 1686/* We expect to use a 7-bit scaled negative offset from ENV. */ 1687QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); 1688QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512); 1689 1690/* These offsets are built into the LDP below. */ 1691QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); 1692QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); 1693 1694/* Load and compare a TLB entry, emitting the conditional jump to the 1695 slow path for the failure case, which will be patched later when finalizing 1696 the slow path. Generated code returns the host addend in X1, 1697 clobbers X0,X2,X3,TMP. */ 1698static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc, 1699 tcg_insn_unit **label_ptr, int mem_index, 1700 bool is_read) 1701{ 1702 unsigned a_bits = get_alignment_bits(opc); 1703 unsigned s_bits = opc & MO_SIZE; 1704 unsigned a_mask = (1u << a_bits) - 1; 1705 unsigned s_mask = (1u << s_bits) - 1; 1706 TCGReg x3; 1707 TCGType mask_type; 1708 uint64_t compare_mask; 1709 1710 mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32 1711 ? TCG_TYPE_I64 : TCG_TYPE_I32); 1712 1713 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */ 1714 tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0, 1715 TLB_MASK_TABLE_OFS(mem_index), 1, 0); 1716 1717 /* Extract the TLB index from the address into X0. */ 1718 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, 1719 TCG_REG_X0, TCG_REG_X0, addr_reg, 1720 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); 1721 1722 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */ 1723 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0); 1724 1725 /* Load the tlb comparator into X0, and the fast path addend into X1. */ 1726 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read 1727 ? offsetof(CPUTLBEntry, addr_read) 1728 : offsetof(CPUTLBEntry, addr_write)); 1729 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1, 1730 offsetof(CPUTLBEntry, addend)); 1731 1732 /* For aligned accesses, we check the first byte and include the alignment 1733 bits within the address. For unaligned access, we check that we don't 1734 cross pages using the address of the last byte of the access. */ 1735 if (a_bits >= s_bits) { 1736 x3 = addr_reg; 1737 } else { 1738 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64, 1739 TCG_REG_X3, addr_reg, s_mask - a_mask); 1740 x3 = TCG_REG_X3; 1741 } 1742 compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; 1743 1744 /* Store the page mask part of the address into X3. */ 1745 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, 1746 TCG_REG_X3, x3, compare_mask); 1747 1748 /* Perform the address comparison. */ 1749 tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0); 1750 1751 /* If not equal, we jump to the slow path. */ 1752 *label_ptr = s->code_ptr; 1753 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1754} 1755 1756#else 1757static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg, 1758 unsigned a_bits) 1759{ 1760 unsigned a_mask = (1 << a_bits) - 1; 1761 TCGLabelQemuLdst *label = new_ldst_label(s); 1762 1763 label->is_ld = is_ld; 1764 label->addrlo_reg = addr_reg; 1765 1766 /* tst addr, #mask */ 1767 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask); 1768 1769 label->label_ptr[0] = s->code_ptr; 1770 1771 /* b.ne slow_path */ 1772 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1773 1774 label->raddr = tcg_splitwx_to_rx(s->code_ptr); 1775} 1776 1777static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) 1778{ 1779 if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1780 return false; 1781 } 1782 1783 tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_X1, l->addrlo_reg); 1784 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1785 1786 /* "Tail call" to the helper, with the return address back inline. */ 1787 tcg_out_adr(s, TCG_REG_LR, l->raddr); 1788 tcg_out_goto_long(s, (const void *)(l->is_ld ? helper_unaligned_ld 1789 : helper_unaligned_st)); 1790 return true; 1791} 1792 1793static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 1794{ 1795 return tcg_out_fail_alignment(s, l); 1796} 1797 1798static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 1799{ 1800 return tcg_out_fail_alignment(s, l); 1801} 1802#endif /* CONFIG_SOFTMMU */ 1803 1804static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, 1805 TCGReg data_r, HostAddress h) 1806{ 1807 switch (memop & MO_SSIZE) { 1808 case MO_UB: 1809 tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index); 1810 break; 1811 case MO_SB: 1812 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, 1813 data_r, h.base, h.index_ext, h.index); 1814 break; 1815 case MO_UW: 1816 tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index); 1817 break; 1818 case MO_SW: 1819 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), 1820 data_r, h.base, h.index_ext, h.index); 1821 break; 1822 case MO_UL: 1823 tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index); 1824 break; 1825 case MO_SL: 1826 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index); 1827 break; 1828 case MO_UQ: 1829 tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index); 1830 break; 1831 default: 1832 g_assert_not_reached(); 1833 } 1834} 1835 1836static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, 1837 TCGReg data_r, HostAddress h) 1838{ 1839 switch (memop & MO_SIZE) { 1840 case MO_8: 1841 tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index); 1842 break; 1843 case MO_16: 1844 tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index); 1845 break; 1846 case MO_32: 1847 tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index); 1848 break; 1849 case MO_64: 1850 tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index); 1851 break; 1852 default: 1853 g_assert_not_reached(); 1854 } 1855} 1856 1857static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1858 MemOpIdx oi, TCGType data_type) 1859{ 1860 MemOp memop = get_memop(oi); 1861 TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; 1862 HostAddress h; 1863 1864 /* Byte swapping is left to middle-end expansion. */ 1865 tcg_debug_assert((memop & MO_BSWAP) == 0); 1866 1867#ifdef CONFIG_SOFTMMU 1868 tcg_insn_unit *label_ptr; 1869 1870 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, get_mmuidx(oi), 1); 1871 1872 h = (HostAddress){ 1873 .base = TCG_REG_X1, 1874 .index = addr_reg, 1875 .index_ext = addr_type 1876 }; 1877 tcg_out_qemu_ld_direct(s, memop, data_type, data_reg, h); 1878 1879 add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg, 1880 s->code_ptr, label_ptr); 1881#else /* !CONFIG_SOFTMMU */ 1882 unsigned a_bits = get_alignment_bits(memop); 1883 if (a_bits) { 1884 tcg_out_test_alignment(s, true, addr_reg, a_bits); 1885 } 1886 if (USE_GUEST_BASE) { 1887 h = (HostAddress){ 1888 .base = TCG_REG_GUEST_BASE, 1889 .index = addr_reg, 1890 .index_ext = addr_type 1891 }; 1892 } else { 1893 h = (HostAddress){ 1894 .base = addr_reg, 1895 .index = TCG_REG_XZR, 1896 .index_ext = TCG_TYPE_I64 1897 }; 1898 } 1899 tcg_out_qemu_ld_direct(s, memop, data_type, data_reg, h); 1900#endif /* CONFIG_SOFTMMU */ 1901} 1902 1903static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1904 MemOpIdx oi, TCGType data_type) 1905{ 1906 MemOp memop = get_memop(oi); 1907 TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; 1908 HostAddress h; 1909 1910 /* Byte swapping is left to middle-end expansion. */ 1911 tcg_debug_assert((memop & MO_BSWAP) == 0); 1912 1913#ifdef CONFIG_SOFTMMU 1914 tcg_insn_unit *label_ptr; 1915 1916 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, get_mmuidx(oi), 0); 1917 1918 h = (HostAddress){ 1919 .base = TCG_REG_X1, 1920 .index = addr_reg, 1921 .index_ext = addr_type 1922 }; 1923 tcg_out_qemu_st_direct(s, memop, data_reg, h); 1924 1925 add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg, 1926 s->code_ptr, label_ptr); 1927#else /* !CONFIG_SOFTMMU */ 1928 unsigned a_bits = get_alignment_bits(memop); 1929 if (a_bits) { 1930 tcg_out_test_alignment(s, false, addr_reg, a_bits); 1931 } 1932 if (USE_GUEST_BASE) { 1933 h = (HostAddress){ 1934 .base = TCG_REG_GUEST_BASE, 1935 .index = addr_reg, 1936 .index_ext = addr_type 1937 }; 1938 } else { 1939 h = (HostAddress){ 1940 .base = addr_reg, 1941 .index = TCG_REG_XZR, 1942 .index_ext = TCG_TYPE_I64 1943 }; 1944 } 1945 tcg_out_qemu_st_direct(s, memop, data_reg, h); 1946#endif /* CONFIG_SOFTMMU */ 1947} 1948 1949static const tcg_insn_unit *tb_ret_addr; 1950 1951static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) 1952{ 1953 /* Reuse the zeroing that exists for goto_ptr. */ 1954 if (a0 == 0) { 1955 tcg_out_goto_long(s, tcg_code_gen_epilogue); 1956 } else { 1957 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); 1958 tcg_out_goto_long(s, tb_ret_addr); 1959 } 1960} 1961 1962static void tcg_out_goto_tb(TCGContext *s, int which) 1963{ 1964 /* 1965 * Direct branch, or indirect address load, will be patched 1966 * by tb_target_set_jmp_target. Assert indirect load offset 1967 * in range early, regardless of direct branch distance. 1968 */ 1969 intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which)); 1970 tcg_debug_assert(i_off == sextract64(i_off, 0, 21)); 1971 1972 set_jmp_insn_offset(s, which); 1973 tcg_out32(s, I3206_B); 1974 tcg_out_insn(s, 3207, BR, TCG_REG_TMP); 1975 set_jmp_reset_offset(s, which); 1976} 1977 1978void tb_target_set_jmp_target(const TranslationBlock *tb, int n, 1979 uintptr_t jmp_rx, uintptr_t jmp_rw) 1980{ 1981 uintptr_t d_addr = tb->jmp_target_addr[n]; 1982 ptrdiff_t d_offset = d_addr - jmp_rx; 1983 tcg_insn_unit insn; 1984 1985 /* Either directly branch, or indirect branch load. */ 1986 if (d_offset == sextract64(d_offset, 0, 28)) { 1987 insn = deposit32(I3206_B, 0, 26, d_offset >> 2); 1988 } else { 1989 uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n]; 1990 ptrdiff_t i_offset = i_addr - jmp_rx; 1991 1992 /* Note that we asserted this in range in tcg_out_goto_tb. */ 1993 insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2); 1994 } 1995 qatomic_set((uint32_t *)jmp_rw, insn); 1996 flush_idcache_range(jmp_rx, jmp_rw, 4); 1997} 1998 1999static void tcg_out_op(TCGContext *s, TCGOpcode opc, 2000 const TCGArg args[TCG_MAX_OP_ARGS], 2001 const int const_args[TCG_MAX_OP_ARGS]) 2002{ 2003 /* 99% of the time, we can signal the use of extension registers 2004 by looking to see if the opcode handles 64-bit data. */ 2005 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0; 2006 2007 /* Hoist the loads of the most common arguments. */ 2008 TCGArg a0 = args[0]; 2009 TCGArg a1 = args[1]; 2010 TCGArg a2 = args[2]; 2011 int c2 = const_args[2]; 2012 2013 /* Some operands are defined with "rZ" constraint, a register or 2014 the zero register. These need not actually test args[I] == 0. */ 2015#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) 2016 2017 switch (opc) { 2018 case INDEX_op_goto_ptr: 2019 tcg_out_insn(s, 3207, BR, a0); 2020 break; 2021 2022 case INDEX_op_br: 2023 tcg_out_goto_label(s, arg_label(a0)); 2024 break; 2025 2026 case INDEX_op_ld8u_i32: 2027 case INDEX_op_ld8u_i64: 2028 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0); 2029 break; 2030 case INDEX_op_ld8s_i32: 2031 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0); 2032 break; 2033 case INDEX_op_ld8s_i64: 2034 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0); 2035 break; 2036 case INDEX_op_ld16u_i32: 2037 case INDEX_op_ld16u_i64: 2038 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1); 2039 break; 2040 case INDEX_op_ld16s_i32: 2041 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1); 2042 break; 2043 case INDEX_op_ld16s_i64: 2044 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1); 2045 break; 2046 case INDEX_op_ld_i32: 2047 case INDEX_op_ld32u_i64: 2048 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2); 2049 break; 2050 case INDEX_op_ld32s_i64: 2051 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2); 2052 break; 2053 case INDEX_op_ld_i64: 2054 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3); 2055 break; 2056 2057 case INDEX_op_st8_i32: 2058 case INDEX_op_st8_i64: 2059 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0); 2060 break; 2061 case INDEX_op_st16_i32: 2062 case INDEX_op_st16_i64: 2063 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1); 2064 break; 2065 case INDEX_op_st_i32: 2066 case INDEX_op_st32_i64: 2067 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2); 2068 break; 2069 case INDEX_op_st_i64: 2070 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3); 2071 break; 2072 2073 case INDEX_op_add_i32: 2074 a2 = (int32_t)a2; 2075 /* FALLTHRU */ 2076 case INDEX_op_add_i64: 2077 if (c2) { 2078 tcg_out_addsubi(s, ext, a0, a1, a2); 2079 } else { 2080 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); 2081 } 2082 break; 2083 2084 case INDEX_op_sub_i32: 2085 a2 = (int32_t)a2; 2086 /* FALLTHRU */ 2087 case INDEX_op_sub_i64: 2088 if (c2) { 2089 tcg_out_addsubi(s, ext, a0, a1, -a2); 2090 } else { 2091 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); 2092 } 2093 break; 2094 2095 case INDEX_op_neg_i64: 2096 case INDEX_op_neg_i32: 2097 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); 2098 break; 2099 2100 case INDEX_op_and_i32: 2101 a2 = (int32_t)a2; 2102 /* FALLTHRU */ 2103 case INDEX_op_and_i64: 2104 if (c2) { 2105 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); 2106 } else { 2107 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); 2108 } 2109 break; 2110 2111 case INDEX_op_andc_i32: 2112 a2 = (int32_t)a2; 2113 /* FALLTHRU */ 2114 case INDEX_op_andc_i64: 2115 if (c2) { 2116 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); 2117 } else { 2118 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); 2119 } 2120 break; 2121 2122 case INDEX_op_or_i32: 2123 a2 = (int32_t)a2; 2124 /* FALLTHRU */ 2125 case INDEX_op_or_i64: 2126 if (c2) { 2127 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); 2128 } else { 2129 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); 2130 } 2131 break; 2132 2133 case INDEX_op_orc_i32: 2134 a2 = (int32_t)a2; 2135 /* FALLTHRU */ 2136 case INDEX_op_orc_i64: 2137 if (c2) { 2138 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); 2139 } else { 2140 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); 2141 } 2142 break; 2143 2144 case INDEX_op_xor_i32: 2145 a2 = (int32_t)a2; 2146 /* FALLTHRU */ 2147 case INDEX_op_xor_i64: 2148 if (c2) { 2149 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); 2150 } else { 2151 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); 2152 } 2153 break; 2154 2155 case INDEX_op_eqv_i32: 2156 a2 = (int32_t)a2; 2157 /* FALLTHRU */ 2158 case INDEX_op_eqv_i64: 2159 if (c2) { 2160 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); 2161 } else { 2162 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); 2163 } 2164 break; 2165 2166 case INDEX_op_not_i64: 2167 case INDEX_op_not_i32: 2168 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); 2169 break; 2170 2171 case INDEX_op_mul_i64: 2172 case INDEX_op_mul_i32: 2173 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); 2174 break; 2175 2176 case INDEX_op_div_i64: 2177 case INDEX_op_div_i32: 2178 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); 2179 break; 2180 case INDEX_op_divu_i64: 2181 case INDEX_op_divu_i32: 2182 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); 2183 break; 2184 2185 case INDEX_op_rem_i64: 2186 case INDEX_op_rem_i32: 2187 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2); 2188 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2189 break; 2190 case INDEX_op_remu_i64: 2191 case INDEX_op_remu_i32: 2192 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2); 2193 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2194 break; 2195 2196 case INDEX_op_shl_i64: 2197 case INDEX_op_shl_i32: 2198 if (c2) { 2199 tcg_out_shl(s, ext, a0, a1, a2); 2200 } else { 2201 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); 2202 } 2203 break; 2204 2205 case INDEX_op_shr_i64: 2206 case INDEX_op_shr_i32: 2207 if (c2) { 2208 tcg_out_shr(s, ext, a0, a1, a2); 2209 } else { 2210 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); 2211 } 2212 break; 2213 2214 case INDEX_op_sar_i64: 2215 case INDEX_op_sar_i32: 2216 if (c2) { 2217 tcg_out_sar(s, ext, a0, a1, a2); 2218 } else { 2219 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); 2220 } 2221 break; 2222 2223 case INDEX_op_rotr_i64: 2224 case INDEX_op_rotr_i32: 2225 if (c2) { 2226 tcg_out_rotr(s, ext, a0, a1, a2); 2227 } else { 2228 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); 2229 } 2230 break; 2231 2232 case INDEX_op_rotl_i64: 2233 case INDEX_op_rotl_i32: 2234 if (c2) { 2235 tcg_out_rotl(s, ext, a0, a1, a2); 2236 } else { 2237 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2); 2238 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP); 2239 } 2240 break; 2241 2242 case INDEX_op_clz_i64: 2243 case INDEX_op_clz_i32: 2244 tcg_out_cltz(s, ext, a0, a1, a2, c2, false); 2245 break; 2246 case INDEX_op_ctz_i64: 2247 case INDEX_op_ctz_i32: 2248 tcg_out_cltz(s, ext, a0, a1, a2, c2, true); 2249 break; 2250 2251 case INDEX_op_brcond_i32: 2252 a1 = (int32_t)a1; 2253 /* FALLTHRU */ 2254 case INDEX_op_brcond_i64: 2255 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); 2256 break; 2257 2258 case INDEX_op_setcond_i32: 2259 a2 = (int32_t)a2; 2260 /* FALLTHRU */ 2261 case INDEX_op_setcond_i64: 2262 tcg_out_cmp(s, ext, a1, a2, c2); 2263 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ 2264 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, 2265 TCG_REG_XZR, tcg_invert_cond(args[3])); 2266 break; 2267 2268 case INDEX_op_movcond_i32: 2269 a2 = (int32_t)a2; 2270 /* FALLTHRU */ 2271 case INDEX_op_movcond_i64: 2272 tcg_out_cmp(s, ext, a1, a2, c2); 2273 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); 2274 break; 2275 2276 case INDEX_op_qemu_ld_i32: 2277 case INDEX_op_qemu_ld_i64: 2278 tcg_out_qemu_ld(s, a0, a1, a2, ext); 2279 break; 2280 case INDEX_op_qemu_st_i32: 2281 case INDEX_op_qemu_st_i64: 2282 tcg_out_qemu_st(s, REG0(0), a1, a2, ext); 2283 break; 2284 2285 case INDEX_op_bswap64_i64: 2286 tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1); 2287 break; 2288 case INDEX_op_bswap32_i64: 2289 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2290 if (a2 & TCG_BSWAP_OS) { 2291 tcg_out_ext32s(s, a0, a0); 2292 } 2293 break; 2294 case INDEX_op_bswap32_i32: 2295 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2296 break; 2297 case INDEX_op_bswap16_i64: 2298 case INDEX_op_bswap16_i32: 2299 tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1); 2300 if (a2 & TCG_BSWAP_OS) { 2301 /* Output must be sign-extended. */ 2302 tcg_out_ext16s(s, ext, a0, a0); 2303 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 2304 /* Output must be zero-extended, but input isn't. */ 2305 tcg_out_ext16u(s, a0, a0); 2306 } 2307 break; 2308 2309 case INDEX_op_deposit_i64: 2310 case INDEX_op_deposit_i32: 2311 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]); 2312 break; 2313 2314 case INDEX_op_extract_i64: 2315 case INDEX_op_extract_i32: 2316 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2317 break; 2318 2319 case INDEX_op_sextract_i64: 2320 case INDEX_op_sextract_i32: 2321 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2322 break; 2323 2324 case INDEX_op_extract2_i64: 2325 case INDEX_op_extract2_i32: 2326 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]); 2327 break; 2328 2329 case INDEX_op_add2_i32: 2330 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2331 (int32_t)args[4], args[5], const_args[4], 2332 const_args[5], false); 2333 break; 2334 case INDEX_op_add2_i64: 2335 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2336 args[5], const_args[4], const_args[5], false); 2337 break; 2338 case INDEX_op_sub2_i32: 2339 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2340 (int32_t)args[4], args[5], const_args[4], 2341 const_args[5], true); 2342 break; 2343 case INDEX_op_sub2_i64: 2344 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2345 args[5], const_args[4], const_args[5], true); 2346 break; 2347 2348 case INDEX_op_muluh_i64: 2349 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); 2350 break; 2351 case INDEX_op_mulsh_i64: 2352 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); 2353 break; 2354 2355 case INDEX_op_mb: 2356 tcg_out_mb(s, a0); 2357 break; 2358 2359 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 2360 case INDEX_op_mov_i64: 2361 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 2362 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ 2363 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ 2364 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ 2365 case INDEX_op_ext8s_i64: 2366 case INDEX_op_ext8u_i32: 2367 case INDEX_op_ext8u_i64: 2368 case INDEX_op_ext16s_i64: 2369 case INDEX_op_ext16s_i32: 2370 case INDEX_op_ext16u_i64: 2371 case INDEX_op_ext16u_i32: 2372 case INDEX_op_ext32s_i64: 2373 case INDEX_op_ext32u_i64: 2374 case INDEX_op_ext_i32_i64: 2375 case INDEX_op_extu_i32_i64: 2376 case INDEX_op_extrl_i64_i32: 2377 default: 2378 g_assert_not_reached(); 2379 } 2380 2381#undef REG0 2382} 2383 2384static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 2385 unsigned vecl, unsigned vece, 2386 const TCGArg args[TCG_MAX_OP_ARGS], 2387 const int const_args[TCG_MAX_OP_ARGS]) 2388{ 2389 static const AArch64Insn cmp_vec_insn[16] = { 2390 [TCG_COND_EQ] = I3616_CMEQ, 2391 [TCG_COND_GT] = I3616_CMGT, 2392 [TCG_COND_GE] = I3616_CMGE, 2393 [TCG_COND_GTU] = I3616_CMHI, 2394 [TCG_COND_GEU] = I3616_CMHS, 2395 }; 2396 static const AArch64Insn cmp_scalar_insn[16] = { 2397 [TCG_COND_EQ] = I3611_CMEQ, 2398 [TCG_COND_GT] = I3611_CMGT, 2399 [TCG_COND_GE] = I3611_CMGE, 2400 [TCG_COND_GTU] = I3611_CMHI, 2401 [TCG_COND_GEU] = I3611_CMHS, 2402 }; 2403 static const AArch64Insn cmp0_vec_insn[16] = { 2404 [TCG_COND_EQ] = I3617_CMEQ0, 2405 [TCG_COND_GT] = I3617_CMGT0, 2406 [TCG_COND_GE] = I3617_CMGE0, 2407 [TCG_COND_LT] = I3617_CMLT0, 2408 [TCG_COND_LE] = I3617_CMLE0, 2409 }; 2410 static const AArch64Insn cmp0_scalar_insn[16] = { 2411 [TCG_COND_EQ] = I3612_CMEQ0, 2412 [TCG_COND_GT] = I3612_CMGT0, 2413 [TCG_COND_GE] = I3612_CMGE0, 2414 [TCG_COND_LT] = I3612_CMLT0, 2415 [TCG_COND_LE] = I3612_CMLE0, 2416 }; 2417 2418 TCGType type = vecl + TCG_TYPE_V64; 2419 unsigned is_q = vecl; 2420 bool is_scalar = !is_q && vece == MO_64; 2421 TCGArg a0, a1, a2, a3; 2422 int cmode, imm8; 2423 2424 a0 = args[0]; 2425 a1 = args[1]; 2426 a2 = args[2]; 2427 2428 switch (opc) { 2429 case INDEX_op_ld_vec: 2430 tcg_out_ld(s, type, a0, a1, a2); 2431 break; 2432 case INDEX_op_st_vec: 2433 tcg_out_st(s, type, a0, a1, a2); 2434 break; 2435 case INDEX_op_dupm_vec: 2436 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 2437 break; 2438 case INDEX_op_add_vec: 2439 if (is_scalar) { 2440 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2); 2441 } else { 2442 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2); 2443 } 2444 break; 2445 case INDEX_op_sub_vec: 2446 if (is_scalar) { 2447 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2); 2448 } else { 2449 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2); 2450 } 2451 break; 2452 case INDEX_op_mul_vec: 2453 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2); 2454 break; 2455 case INDEX_op_neg_vec: 2456 if (is_scalar) { 2457 tcg_out_insn(s, 3612, NEG, vece, a0, a1); 2458 } else { 2459 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1); 2460 } 2461 break; 2462 case INDEX_op_abs_vec: 2463 if (is_scalar) { 2464 tcg_out_insn(s, 3612, ABS, vece, a0, a1); 2465 } else { 2466 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1); 2467 } 2468 break; 2469 case INDEX_op_and_vec: 2470 if (const_args[2]) { 2471 is_shimm1632(~a2, &cmode, &imm8); 2472 if (a0 == a1) { 2473 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2474 return; 2475 } 2476 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2477 a2 = a0; 2478 } 2479 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2); 2480 break; 2481 case INDEX_op_or_vec: 2482 if (const_args[2]) { 2483 is_shimm1632(a2, &cmode, &imm8); 2484 if (a0 == a1) { 2485 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2486 return; 2487 } 2488 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2489 a2 = a0; 2490 } 2491 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2); 2492 break; 2493 case INDEX_op_andc_vec: 2494 if (const_args[2]) { 2495 is_shimm1632(a2, &cmode, &imm8); 2496 if (a0 == a1) { 2497 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2498 return; 2499 } 2500 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2501 a2 = a0; 2502 } 2503 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2); 2504 break; 2505 case INDEX_op_orc_vec: 2506 if (const_args[2]) { 2507 is_shimm1632(~a2, &cmode, &imm8); 2508 if (a0 == a1) { 2509 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2510 return; 2511 } 2512 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2513 a2 = a0; 2514 } 2515 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2); 2516 break; 2517 case INDEX_op_xor_vec: 2518 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2); 2519 break; 2520 case INDEX_op_ssadd_vec: 2521 if (is_scalar) { 2522 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2); 2523 } else { 2524 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2); 2525 } 2526 break; 2527 case INDEX_op_sssub_vec: 2528 if (is_scalar) { 2529 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2); 2530 } else { 2531 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2); 2532 } 2533 break; 2534 case INDEX_op_usadd_vec: 2535 if (is_scalar) { 2536 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2); 2537 } else { 2538 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2); 2539 } 2540 break; 2541 case INDEX_op_ussub_vec: 2542 if (is_scalar) { 2543 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2); 2544 } else { 2545 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2); 2546 } 2547 break; 2548 case INDEX_op_smax_vec: 2549 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2); 2550 break; 2551 case INDEX_op_smin_vec: 2552 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2); 2553 break; 2554 case INDEX_op_umax_vec: 2555 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2); 2556 break; 2557 case INDEX_op_umin_vec: 2558 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2); 2559 break; 2560 case INDEX_op_not_vec: 2561 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1); 2562 break; 2563 case INDEX_op_shli_vec: 2564 if (is_scalar) { 2565 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece)); 2566 } else { 2567 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece)); 2568 } 2569 break; 2570 case INDEX_op_shri_vec: 2571 if (is_scalar) { 2572 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2); 2573 } else { 2574 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2); 2575 } 2576 break; 2577 case INDEX_op_sari_vec: 2578 if (is_scalar) { 2579 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2); 2580 } else { 2581 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); 2582 } 2583 break; 2584 case INDEX_op_aa64_sli_vec: 2585 if (is_scalar) { 2586 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece)); 2587 } else { 2588 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece)); 2589 } 2590 break; 2591 case INDEX_op_shlv_vec: 2592 if (is_scalar) { 2593 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2); 2594 } else { 2595 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); 2596 } 2597 break; 2598 case INDEX_op_aa64_sshl_vec: 2599 if (is_scalar) { 2600 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2); 2601 } else { 2602 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2); 2603 } 2604 break; 2605 case INDEX_op_cmp_vec: 2606 { 2607 TCGCond cond = args[3]; 2608 AArch64Insn insn; 2609 2610 if (cond == TCG_COND_NE) { 2611 if (const_args[2]) { 2612 if (is_scalar) { 2613 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1); 2614 } else { 2615 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1); 2616 } 2617 } else { 2618 if (is_scalar) { 2619 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2); 2620 } else { 2621 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2); 2622 } 2623 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); 2624 } 2625 } else { 2626 if (const_args[2]) { 2627 if (is_scalar) { 2628 insn = cmp0_scalar_insn[cond]; 2629 if (insn) { 2630 tcg_out_insn_3612(s, insn, vece, a0, a1); 2631 break; 2632 } 2633 } else { 2634 insn = cmp0_vec_insn[cond]; 2635 if (insn) { 2636 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1); 2637 break; 2638 } 2639 } 2640 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0); 2641 a2 = TCG_VEC_TMP; 2642 } 2643 if (is_scalar) { 2644 insn = cmp_scalar_insn[cond]; 2645 if (insn == 0) { 2646 TCGArg t; 2647 t = a1, a1 = a2, a2 = t; 2648 cond = tcg_swap_cond(cond); 2649 insn = cmp_scalar_insn[cond]; 2650 tcg_debug_assert(insn != 0); 2651 } 2652 tcg_out_insn_3611(s, insn, vece, a0, a1, a2); 2653 } else { 2654 insn = cmp_vec_insn[cond]; 2655 if (insn == 0) { 2656 TCGArg t; 2657 t = a1, a1 = a2, a2 = t; 2658 cond = tcg_swap_cond(cond); 2659 insn = cmp_vec_insn[cond]; 2660 tcg_debug_assert(insn != 0); 2661 } 2662 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2); 2663 } 2664 } 2665 } 2666 break; 2667 2668 case INDEX_op_bitsel_vec: 2669 a3 = args[3]; 2670 if (a0 == a3) { 2671 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1); 2672 } else if (a0 == a2) { 2673 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1); 2674 } else { 2675 if (a0 != a1) { 2676 tcg_out_mov(s, type, a0, a1); 2677 } 2678 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3); 2679 } 2680 break; 2681 2682 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 2683 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 2684 default: 2685 g_assert_not_reached(); 2686 } 2687} 2688 2689int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 2690{ 2691 switch (opc) { 2692 case INDEX_op_add_vec: 2693 case INDEX_op_sub_vec: 2694 case INDEX_op_and_vec: 2695 case INDEX_op_or_vec: 2696 case INDEX_op_xor_vec: 2697 case INDEX_op_andc_vec: 2698 case INDEX_op_orc_vec: 2699 case INDEX_op_neg_vec: 2700 case INDEX_op_abs_vec: 2701 case INDEX_op_not_vec: 2702 case INDEX_op_cmp_vec: 2703 case INDEX_op_shli_vec: 2704 case INDEX_op_shri_vec: 2705 case INDEX_op_sari_vec: 2706 case INDEX_op_ssadd_vec: 2707 case INDEX_op_sssub_vec: 2708 case INDEX_op_usadd_vec: 2709 case INDEX_op_ussub_vec: 2710 case INDEX_op_shlv_vec: 2711 case INDEX_op_bitsel_vec: 2712 return 1; 2713 case INDEX_op_rotli_vec: 2714 case INDEX_op_shrv_vec: 2715 case INDEX_op_sarv_vec: 2716 case INDEX_op_rotlv_vec: 2717 case INDEX_op_rotrv_vec: 2718 return -1; 2719 case INDEX_op_mul_vec: 2720 case INDEX_op_smax_vec: 2721 case INDEX_op_smin_vec: 2722 case INDEX_op_umax_vec: 2723 case INDEX_op_umin_vec: 2724 return vece < MO_64; 2725 2726 default: 2727 return 0; 2728 } 2729} 2730 2731void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 2732 TCGArg a0, ...) 2733{ 2734 va_list va; 2735 TCGv_vec v0, v1, v2, t1, t2, c1; 2736 TCGArg a2; 2737 2738 va_start(va, a0); 2739 v0 = temp_tcgv_vec(arg_temp(a0)); 2740 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 2741 a2 = va_arg(va, TCGArg); 2742 va_end(va); 2743 2744 switch (opc) { 2745 case INDEX_op_rotli_vec: 2746 t1 = tcg_temp_new_vec(type); 2747 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1)); 2748 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece, 2749 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2); 2750 tcg_temp_free_vec(t1); 2751 break; 2752 2753 case INDEX_op_shrv_vec: 2754 case INDEX_op_sarv_vec: 2755 /* Right shifts are negative left shifts for AArch64. */ 2756 v2 = temp_tcgv_vec(arg_temp(a2)); 2757 t1 = tcg_temp_new_vec(type); 2758 tcg_gen_neg_vec(vece, t1, v2); 2759 opc = (opc == INDEX_op_shrv_vec 2760 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec); 2761 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0), 2762 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2763 tcg_temp_free_vec(t1); 2764 break; 2765 2766 case INDEX_op_rotlv_vec: 2767 v2 = temp_tcgv_vec(arg_temp(a2)); 2768 t1 = tcg_temp_new_vec(type); 2769 c1 = tcg_constant_vec(type, vece, 8 << vece); 2770 tcg_gen_sub_vec(vece, t1, v2, c1); 2771 /* Right shifts are negative left shifts for AArch64. */ 2772 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2773 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2774 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0), 2775 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 2776 tcg_gen_or_vec(vece, v0, v0, t1); 2777 tcg_temp_free_vec(t1); 2778 break; 2779 2780 case INDEX_op_rotrv_vec: 2781 v2 = temp_tcgv_vec(arg_temp(a2)); 2782 t1 = tcg_temp_new_vec(type); 2783 t2 = tcg_temp_new_vec(type); 2784 c1 = tcg_constant_vec(type, vece, 8 << vece); 2785 tcg_gen_neg_vec(vece, t1, v2); 2786 tcg_gen_sub_vec(vece, t2, c1, v2); 2787 /* Right shifts are negative left shifts for AArch64. */ 2788 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2789 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2790 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2), 2791 tcgv_vec_arg(v1), tcgv_vec_arg(t2)); 2792 tcg_gen_or_vec(vece, v0, t1, t2); 2793 tcg_temp_free_vec(t1); 2794 tcg_temp_free_vec(t2); 2795 break; 2796 2797 default: 2798 g_assert_not_reached(); 2799 } 2800} 2801 2802static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 2803{ 2804 switch (op) { 2805 case INDEX_op_goto_ptr: 2806 return C_O0_I1(r); 2807 2808 case INDEX_op_ld8u_i32: 2809 case INDEX_op_ld8s_i32: 2810 case INDEX_op_ld16u_i32: 2811 case INDEX_op_ld16s_i32: 2812 case INDEX_op_ld_i32: 2813 case INDEX_op_ld8u_i64: 2814 case INDEX_op_ld8s_i64: 2815 case INDEX_op_ld16u_i64: 2816 case INDEX_op_ld16s_i64: 2817 case INDEX_op_ld32u_i64: 2818 case INDEX_op_ld32s_i64: 2819 case INDEX_op_ld_i64: 2820 case INDEX_op_neg_i32: 2821 case INDEX_op_neg_i64: 2822 case INDEX_op_not_i32: 2823 case INDEX_op_not_i64: 2824 case INDEX_op_bswap16_i32: 2825 case INDEX_op_bswap32_i32: 2826 case INDEX_op_bswap16_i64: 2827 case INDEX_op_bswap32_i64: 2828 case INDEX_op_bswap64_i64: 2829 case INDEX_op_ext8s_i32: 2830 case INDEX_op_ext16s_i32: 2831 case INDEX_op_ext8u_i32: 2832 case INDEX_op_ext16u_i32: 2833 case INDEX_op_ext8s_i64: 2834 case INDEX_op_ext16s_i64: 2835 case INDEX_op_ext32s_i64: 2836 case INDEX_op_ext8u_i64: 2837 case INDEX_op_ext16u_i64: 2838 case INDEX_op_ext32u_i64: 2839 case INDEX_op_ext_i32_i64: 2840 case INDEX_op_extu_i32_i64: 2841 case INDEX_op_extract_i32: 2842 case INDEX_op_extract_i64: 2843 case INDEX_op_sextract_i32: 2844 case INDEX_op_sextract_i64: 2845 return C_O1_I1(r, r); 2846 2847 case INDEX_op_st8_i32: 2848 case INDEX_op_st16_i32: 2849 case INDEX_op_st_i32: 2850 case INDEX_op_st8_i64: 2851 case INDEX_op_st16_i64: 2852 case INDEX_op_st32_i64: 2853 case INDEX_op_st_i64: 2854 return C_O0_I2(rZ, r); 2855 2856 case INDEX_op_add_i32: 2857 case INDEX_op_add_i64: 2858 case INDEX_op_sub_i32: 2859 case INDEX_op_sub_i64: 2860 case INDEX_op_setcond_i32: 2861 case INDEX_op_setcond_i64: 2862 return C_O1_I2(r, r, rA); 2863 2864 case INDEX_op_mul_i32: 2865 case INDEX_op_mul_i64: 2866 case INDEX_op_div_i32: 2867 case INDEX_op_div_i64: 2868 case INDEX_op_divu_i32: 2869 case INDEX_op_divu_i64: 2870 case INDEX_op_rem_i32: 2871 case INDEX_op_rem_i64: 2872 case INDEX_op_remu_i32: 2873 case INDEX_op_remu_i64: 2874 case INDEX_op_muluh_i64: 2875 case INDEX_op_mulsh_i64: 2876 return C_O1_I2(r, r, r); 2877 2878 case INDEX_op_and_i32: 2879 case INDEX_op_and_i64: 2880 case INDEX_op_or_i32: 2881 case INDEX_op_or_i64: 2882 case INDEX_op_xor_i32: 2883 case INDEX_op_xor_i64: 2884 case INDEX_op_andc_i32: 2885 case INDEX_op_andc_i64: 2886 case INDEX_op_orc_i32: 2887 case INDEX_op_orc_i64: 2888 case INDEX_op_eqv_i32: 2889 case INDEX_op_eqv_i64: 2890 return C_O1_I2(r, r, rL); 2891 2892 case INDEX_op_shl_i32: 2893 case INDEX_op_shr_i32: 2894 case INDEX_op_sar_i32: 2895 case INDEX_op_rotl_i32: 2896 case INDEX_op_rotr_i32: 2897 case INDEX_op_shl_i64: 2898 case INDEX_op_shr_i64: 2899 case INDEX_op_sar_i64: 2900 case INDEX_op_rotl_i64: 2901 case INDEX_op_rotr_i64: 2902 return C_O1_I2(r, r, ri); 2903 2904 case INDEX_op_clz_i32: 2905 case INDEX_op_ctz_i32: 2906 case INDEX_op_clz_i64: 2907 case INDEX_op_ctz_i64: 2908 return C_O1_I2(r, r, rAL); 2909 2910 case INDEX_op_brcond_i32: 2911 case INDEX_op_brcond_i64: 2912 return C_O0_I2(r, rA); 2913 2914 case INDEX_op_movcond_i32: 2915 case INDEX_op_movcond_i64: 2916 return C_O1_I4(r, r, rA, rZ, rZ); 2917 2918 case INDEX_op_qemu_ld_i32: 2919 case INDEX_op_qemu_ld_i64: 2920 return C_O1_I1(r, l); 2921 case INDEX_op_qemu_st_i32: 2922 case INDEX_op_qemu_st_i64: 2923 return C_O0_I2(lZ, l); 2924 2925 case INDEX_op_deposit_i32: 2926 case INDEX_op_deposit_i64: 2927 return C_O1_I2(r, 0, rZ); 2928 2929 case INDEX_op_extract2_i32: 2930 case INDEX_op_extract2_i64: 2931 return C_O1_I2(r, rZ, rZ); 2932 2933 case INDEX_op_add2_i32: 2934 case INDEX_op_add2_i64: 2935 case INDEX_op_sub2_i32: 2936 case INDEX_op_sub2_i64: 2937 return C_O2_I4(r, r, rZ, rZ, rA, rMZ); 2938 2939 case INDEX_op_add_vec: 2940 case INDEX_op_sub_vec: 2941 case INDEX_op_mul_vec: 2942 case INDEX_op_xor_vec: 2943 case INDEX_op_ssadd_vec: 2944 case INDEX_op_sssub_vec: 2945 case INDEX_op_usadd_vec: 2946 case INDEX_op_ussub_vec: 2947 case INDEX_op_smax_vec: 2948 case INDEX_op_smin_vec: 2949 case INDEX_op_umax_vec: 2950 case INDEX_op_umin_vec: 2951 case INDEX_op_shlv_vec: 2952 case INDEX_op_shrv_vec: 2953 case INDEX_op_sarv_vec: 2954 case INDEX_op_aa64_sshl_vec: 2955 return C_O1_I2(w, w, w); 2956 case INDEX_op_not_vec: 2957 case INDEX_op_neg_vec: 2958 case INDEX_op_abs_vec: 2959 case INDEX_op_shli_vec: 2960 case INDEX_op_shri_vec: 2961 case INDEX_op_sari_vec: 2962 return C_O1_I1(w, w); 2963 case INDEX_op_ld_vec: 2964 case INDEX_op_dupm_vec: 2965 return C_O1_I1(w, r); 2966 case INDEX_op_st_vec: 2967 return C_O0_I2(w, r); 2968 case INDEX_op_dup_vec: 2969 return C_O1_I1(w, wr); 2970 case INDEX_op_or_vec: 2971 case INDEX_op_andc_vec: 2972 return C_O1_I2(w, w, wO); 2973 case INDEX_op_and_vec: 2974 case INDEX_op_orc_vec: 2975 return C_O1_I2(w, w, wN); 2976 case INDEX_op_cmp_vec: 2977 return C_O1_I2(w, w, wZ); 2978 case INDEX_op_bitsel_vec: 2979 return C_O1_I3(w, w, w, w); 2980 case INDEX_op_aa64_sli_vec: 2981 return C_O1_I2(w, 0, w); 2982 2983 default: 2984 g_assert_not_reached(); 2985 } 2986} 2987 2988static void tcg_target_init(TCGContext *s) 2989{ 2990 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu; 2991 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu; 2992 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 2993 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 2994 2995 tcg_target_call_clobber_regs = -1ull; 2996 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19); 2997 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20); 2998 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21); 2999 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22); 3000 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23); 3001 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24); 3002 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25); 3003 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26); 3004 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27); 3005 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28); 3006 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29); 3007 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 3008 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 3009 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 3010 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 3011 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 3012 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 3013 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 3014 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 3015 3016 s->reserved_regs = 0; 3017 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 3018 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); 3019 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); 3020 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ 3021 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP); 3022} 3023 3024/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ 3025#define PUSH_SIZE ((30 - 19 + 1) * 8) 3026 3027#define FRAME_SIZE \ 3028 ((PUSH_SIZE \ 3029 + TCG_STATIC_CALL_ARGS_SIZE \ 3030 + CPU_TEMP_BUF_NLONGS * sizeof(long) \ 3031 + TCG_TARGET_STACK_ALIGN - 1) \ 3032 & ~(TCG_TARGET_STACK_ALIGN - 1)) 3033 3034/* We're expecting a 2 byte uleb128 encoded value. */ 3035QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 3036 3037/* We're expecting to use a single ADDI insn. */ 3038QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); 3039 3040static void tcg_target_qemu_prologue(TCGContext *s) 3041{ 3042 TCGReg r; 3043 3044 /* Push (FP, LR) and allocate space for all saved registers. */ 3045 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, 3046 TCG_REG_SP, -PUSH_SIZE, 1, 1); 3047 3048 /* Set up frame pointer for canonical unwinding. */ 3049 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); 3050 3051 /* Store callee-preserved regs x19..x28. */ 3052 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 3053 int ofs = (r - TCG_REG_X19 + 2) * 8; 3054 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 3055 } 3056 3057 /* Make stack space for TCG locals. */ 3058 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 3059 FRAME_SIZE - PUSH_SIZE); 3060 3061 /* Inform TCG about how to find TCG locals with register, offset, size. */ 3062 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, 3063 CPU_TEMP_BUF_NLONGS * sizeof(long)); 3064 3065#if !defined(CONFIG_SOFTMMU) 3066 if (USE_GUEST_BASE) { 3067 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); 3068 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); 3069 } 3070#endif 3071 3072 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 3073 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); 3074 3075 /* 3076 * Return path for goto_ptr. Set return value to 0, a-la exit_tb, 3077 * and fall through to the rest of the epilogue. 3078 */ 3079 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 3080 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0); 3081 3082 /* TB epilogue */ 3083 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); 3084 3085 /* Remove TCG locals stack space. */ 3086 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 3087 FRAME_SIZE - PUSH_SIZE); 3088 3089 /* Restore registers x19..x28. */ 3090 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 3091 int ofs = (r - TCG_REG_X19 + 2) * 8; 3092 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 3093 } 3094 3095 /* Pop (FP, LR), restore SP to previous frame. */ 3096 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, 3097 TCG_REG_SP, PUSH_SIZE, 0, 1); 3098 tcg_out_insn(s, 3207, RET, TCG_REG_LR); 3099} 3100 3101static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 3102{ 3103 int i; 3104 for (i = 0; i < count; ++i) { 3105 p[i] = NOP; 3106 } 3107} 3108 3109typedef struct { 3110 DebugFrameHeader h; 3111 uint8_t fde_def_cfa[4]; 3112 uint8_t fde_reg_ofs[24]; 3113} DebugFrame; 3114 3115#define ELF_HOST_MACHINE EM_AARCH64 3116 3117static const DebugFrame debug_frame = { 3118 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 3119 .h.cie.id = -1, 3120 .h.cie.version = 1, 3121 .h.cie.code_align = 1, 3122 .h.cie.data_align = 0x78, /* sleb128 -8 */ 3123 .h.cie.return_column = TCG_REG_LR, 3124 3125 /* Total FDE size does not include the "len" member. */ 3126 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 3127 3128 .fde_def_cfa = { 3129 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 3130 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 3131 (FRAME_SIZE >> 7) 3132 }, 3133 .fde_reg_ofs = { 3134 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ 3135 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ 3136 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ 3137 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ 3138 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ 3139 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ 3140 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ 3141 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ 3142 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ 3143 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ 3144 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ 3145 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ 3146 } 3147}; 3148 3149void tcg_register_jit(const void *buf, size_t buf_size) 3150{ 3151 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 3152} 3153