1/* 2 * Initial TCG Implementation for aarch64 3 * 4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH 5 * Written by Claudio Fontana 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or 8 * (at your option) any later version. 9 * 10 * See the COPYING file in the top-level directory for details. 11 */ 12 13#include "../tcg-ldst.c.inc" 14#include "../tcg-pool.c.inc" 15#include "qemu/bitops.h" 16 17/* We're going to re-use TCGType in setting of the SF bit, which controls 18 the size of the operation performed. If we know the values match, it 19 makes things much cleaner. */ 20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); 21 22#ifdef CONFIG_DEBUG_TCG 23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 24 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 25 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 26 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 27 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp", 28 29 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 30 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 31 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 32 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31", 33}; 34#endif /* CONFIG_DEBUG_TCG */ 35 36static const int tcg_target_reg_alloc_order[] = { 37 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, 38 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, 39 TCG_REG_X28, /* we will reserve this for guest_base if configured */ 40 41 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, 42 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, 43 TCG_REG_X16, TCG_REG_X17, 44 45 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 46 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, 47 48 /* X18 reserved by system */ 49 /* X19 reserved for AREG0 */ 50 /* X29 reserved as fp */ 51 /* X30 reserved as temporary */ 52 53 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, 54 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, 55 /* V8 - V15 are call-saved, and skipped. */ 56 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, 57 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, 58 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, 59 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, 60}; 61 62static const int tcg_target_call_iarg_regs[8] = { 63 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 64 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 65}; 66 67static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) 68{ 69 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); 70 tcg_debug_assert(slot >= 0 && slot <= 1); 71 return TCG_REG_X0 + slot; 72} 73 74#define TCG_REG_TMP TCG_REG_X30 75#define TCG_VEC_TMP TCG_REG_V31 76 77#ifndef CONFIG_SOFTMMU 78/* Note that XZR cannot be encoded in the address base register slot, 79 as that actaully encodes SP. So if we need to zero-extend the guest 80 address, via the address index register slot, we need to load even 81 a zero guest base into a register. */ 82#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32) 83#define TCG_REG_GUEST_BASE TCG_REG_X28 84#endif 85 86static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 87{ 88 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 89 ptrdiff_t offset = target - src_rx; 90 91 if (offset == sextract64(offset, 0, 26)) { 92 /* read instruction, mask away previous PC_REL26 parameter contents, 93 set the proper offset, then write back the instruction. */ 94 *src_rw = deposit32(*src_rw, 0, 26, offset); 95 return true; 96 } 97 return false; 98} 99 100static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 101{ 102 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 103 ptrdiff_t offset = target - src_rx; 104 105 if (offset == sextract64(offset, 0, 19)) { 106 *src_rw = deposit32(*src_rw, 5, 19, offset); 107 return true; 108 } 109 return false; 110} 111 112static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 113 intptr_t value, intptr_t addend) 114{ 115 tcg_debug_assert(addend == 0); 116 switch (type) { 117 case R_AARCH64_JUMP26: 118 case R_AARCH64_CALL26: 119 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value); 120 case R_AARCH64_CONDBR19: 121 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value); 122 default: 123 g_assert_not_reached(); 124 } 125} 126 127#define TCG_CT_CONST_AIMM 0x100 128#define TCG_CT_CONST_LIMM 0x200 129#define TCG_CT_CONST_ZERO 0x400 130#define TCG_CT_CONST_MONE 0x800 131#define TCG_CT_CONST_ORRI 0x1000 132#define TCG_CT_CONST_ANDI 0x2000 133 134#define ALL_GENERAL_REGS 0xffffffffu 135#define ALL_VECTOR_REGS 0xffffffff00000000ull 136 137#ifdef CONFIG_SOFTMMU 138#define ALL_QLDST_REGS \ 139 (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \ 140 (1 << TCG_REG_X2) | (1 << TCG_REG_X3))) 141#else 142#define ALL_QLDST_REGS ALL_GENERAL_REGS 143#endif 144 145/* Match a constant valid for addition (12-bit, optionally shifted). */ 146static inline bool is_aimm(uint64_t val) 147{ 148 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; 149} 150 151/* Match a constant valid for logical operations. */ 152static inline bool is_limm(uint64_t val) 153{ 154 /* Taking a simplified view of the logical immediates for now, ignoring 155 the replication that can happen across the field. Match bit patterns 156 of the forms 157 0....01....1 158 0..01..10..0 159 and their inverses. */ 160 161 /* Make things easier below, by testing the form with msb clear. */ 162 if ((int64_t)val < 0) { 163 val = ~val; 164 } 165 if (val == 0) { 166 return false; 167 } 168 val += val & -val; 169 return (val & (val - 1)) == 0; 170} 171 172/* Return true if v16 is a valid 16-bit shifted immediate. */ 173static bool is_shimm16(uint16_t v16, int *cmode, int *imm8) 174{ 175 if (v16 == (v16 & 0xff)) { 176 *cmode = 0x8; 177 *imm8 = v16 & 0xff; 178 return true; 179 } else if (v16 == (v16 & 0xff00)) { 180 *cmode = 0xa; 181 *imm8 = v16 >> 8; 182 return true; 183 } 184 return false; 185} 186 187/* Return true if v32 is a valid 32-bit shifted immediate. */ 188static bool is_shimm32(uint32_t v32, int *cmode, int *imm8) 189{ 190 if (v32 == (v32 & 0xff)) { 191 *cmode = 0x0; 192 *imm8 = v32 & 0xff; 193 return true; 194 } else if (v32 == (v32 & 0xff00)) { 195 *cmode = 0x2; 196 *imm8 = (v32 >> 8) & 0xff; 197 return true; 198 } else if (v32 == (v32 & 0xff0000)) { 199 *cmode = 0x4; 200 *imm8 = (v32 >> 16) & 0xff; 201 return true; 202 } else if (v32 == (v32 & 0xff000000)) { 203 *cmode = 0x6; 204 *imm8 = v32 >> 24; 205 return true; 206 } 207 return false; 208} 209 210/* Return true if v32 is a valid 32-bit shifting ones immediate. */ 211static bool is_soimm32(uint32_t v32, int *cmode, int *imm8) 212{ 213 if ((v32 & 0xffff00ff) == 0xff) { 214 *cmode = 0xc; 215 *imm8 = (v32 >> 8) & 0xff; 216 return true; 217 } else if ((v32 & 0xff00ffff) == 0xffff) { 218 *cmode = 0xd; 219 *imm8 = (v32 >> 16) & 0xff; 220 return true; 221 } 222 return false; 223} 224 225/* Return true if v32 is a valid float32 immediate. */ 226static bool is_fimm32(uint32_t v32, int *cmode, int *imm8) 227{ 228 if (extract32(v32, 0, 19) == 0 229 && (extract32(v32, 25, 6) == 0x20 230 || extract32(v32, 25, 6) == 0x1f)) { 231 *cmode = 0xf; 232 *imm8 = (extract32(v32, 31, 1) << 7) 233 | (extract32(v32, 25, 1) << 6) 234 | extract32(v32, 19, 6); 235 return true; 236 } 237 return false; 238} 239 240/* Return true if v64 is a valid float64 immediate. */ 241static bool is_fimm64(uint64_t v64, int *cmode, int *imm8) 242{ 243 if (extract64(v64, 0, 48) == 0 244 && (extract64(v64, 54, 9) == 0x100 245 || extract64(v64, 54, 9) == 0x0ff)) { 246 *cmode = 0xf; 247 *imm8 = (extract64(v64, 63, 1) << 7) 248 | (extract64(v64, 54, 1) << 6) 249 | extract64(v64, 48, 6); 250 return true; 251 } 252 return false; 253} 254 255/* 256 * Return non-zero if v32 can be formed by MOVI+ORR. 257 * Place the parameters for MOVI in (cmode, imm8). 258 * Return the cmode for ORR; the imm8 can be had via extraction from v32. 259 */ 260static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8) 261{ 262 int i; 263 264 for (i = 6; i > 0; i -= 2) { 265 /* Mask out one byte we can add with ORR. */ 266 uint32_t tmp = v32 & ~(0xffu << (i * 4)); 267 if (is_shimm32(tmp, cmode, imm8) || 268 is_soimm32(tmp, cmode, imm8)) { 269 break; 270 } 271 } 272 return i; 273} 274 275/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */ 276static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8) 277{ 278 if (v32 == deposit32(v32, 16, 16, v32)) { 279 return is_shimm16(v32, cmode, imm8); 280 } else { 281 return is_shimm32(v32, cmode, imm8); 282 } 283} 284 285static bool tcg_target_const_match(int64_t val, TCGType type, int ct) 286{ 287 if (ct & TCG_CT_CONST) { 288 return 1; 289 } 290 if (type == TCG_TYPE_I32) { 291 val = (int32_t)val; 292 } 293 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { 294 return 1; 295 } 296 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { 297 return 1; 298 } 299 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 300 return 1; 301 } 302 if ((ct & TCG_CT_CONST_MONE) && val == -1) { 303 return 1; 304 } 305 306 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) { 307 case 0: 308 break; 309 case TCG_CT_CONST_ANDI: 310 val = ~val; 311 /* fallthru */ 312 case TCG_CT_CONST_ORRI: 313 if (val == deposit64(val, 32, 32, val)) { 314 int cmode, imm8; 315 return is_shimm1632(val, &cmode, &imm8); 316 } 317 break; 318 default: 319 /* Both bits should not be set for the same insn. */ 320 g_assert_not_reached(); 321 } 322 323 return 0; 324} 325 326enum aarch64_cond_code { 327 COND_EQ = 0x0, 328 COND_NE = 0x1, 329 COND_CS = 0x2, /* Unsigned greater or equal */ 330 COND_HS = COND_CS, /* ALIAS greater or equal */ 331 COND_CC = 0x3, /* Unsigned less than */ 332 COND_LO = COND_CC, /* ALIAS Lower */ 333 COND_MI = 0x4, /* Negative */ 334 COND_PL = 0x5, /* Zero or greater */ 335 COND_VS = 0x6, /* Overflow */ 336 COND_VC = 0x7, /* No overflow */ 337 COND_HI = 0x8, /* Unsigned greater than */ 338 COND_LS = 0x9, /* Unsigned less or equal */ 339 COND_GE = 0xa, 340 COND_LT = 0xb, 341 COND_GT = 0xc, 342 COND_LE = 0xd, 343 COND_AL = 0xe, 344 COND_NV = 0xf, /* behaves like COND_AL here */ 345}; 346 347static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { 348 [TCG_COND_EQ] = COND_EQ, 349 [TCG_COND_NE] = COND_NE, 350 [TCG_COND_LT] = COND_LT, 351 [TCG_COND_GE] = COND_GE, 352 [TCG_COND_LE] = COND_LE, 353 [TCG_COND_GT] = COND_GT, 354 /* unsigned */ 355 [TCG_COND_LTU] = COND_LO, 356 [TCG_COND_GTU] = COND_HI, 357 [TCG_COND_GEU] = COND_HS, 358 [TCG_COND_LEU] = COND_LS, 359}; 360 361typedef enum { 362 LDST_ST = 0, /* store */ 363 LDST_LD = 1, /* load */ 364 LDST_LD_S_X = 2, /* load and sign-extend into Xt */ 365 LDST_LD_S_W = 3, /* load and sign-extend into Wt */ 366} AArch64LdstType; 367 368/* We encode the format of the insn into the beginning of the name, so that 369 we can have the preprocessor help "typecheck" the insn vs the output 370 function. Arm didn't provide us with nice names for the formats, so we 371 use the section number of the architecture reference manual in which the 372 instruction group is described. */ 373typedef enum { 374 /* Compare and branch (immediate). */ 375 I3201_CBZ = 0x34000000, 376 I3201_CBNZ = 0x35000000, 377 378 /* Conditional branch (immediate). */ 379 I3202_B_C = 0x54000000, 380 381 /* Unconditional branch (immediate). */ 382 I3206_B = 0x14000000, 383 I3206_BL = 0x94000000, 384 385 /* Unconditional branch (register). */ 386 I3207_BR = 0xd61f0000, 387 I3207_BLR = 0xd63f0000, 388 I3207_RET = 0xd65f0000, 389 390 /* AdvSIMD load/store single structure. */ 391 I3303_LD1R = 0x0d40c000, 392 393 /* Load literal for loading the address at pc-relative offset */ 394 I3305_LDR = 0x58000000, 395 I3305_LDR_v64 = 0x5c000000, 396 I3305_LDR_v128 = 0x9c000000, 397 398 /* Load/store register. Described here as 3.3.12, but the helper 399 that emits them can transform to 3.3.10 or 3.3.13. */ 400 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, 401 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, 402 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, 403 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, 404 405 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, 406 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, 407 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, 408 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, 409 410 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, 411 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, 412 413 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, 414 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, 415 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, 416 417 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30, 418 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30, 419 420 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30, 421 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30, 422 423 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30, 424 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30, 425 426 I3312_TO_I3310 = 0x00200800, 427 I3312_TO_I3313 = 0x01000000, 428 429 /* Load/store register pair instructions. */ 430 I3314_LDP = 0x28400000, 431 I3314_STP = 0x28000000, 432 433 /* Add/subtract immediate instructions. */ 434 I3401_ADDI = 0x11000000, 435 I3401_ADDSI = 0x31000000, 436 I3401_SUBI = 0x51000000, 437 I3401_SUBSI = 0x71000000, 438 439 /* Bitfield instructions. */ 440 I3402_BFM = 0x33000000, 441 I3402_SBFM = 0x13000000, 442 I3402_UBFM = 0x53000000, 443 444 /* Extract instruction. */ 445 I3403_EXTR = 0x13800000, 446 447 /* Logical immediate instructions. */ 448 I3404_ANDI = 0x12000000, 449 I3404_ORRI = 0x32000000, 450 I3404_EORI = 0x52000000, 451 I3404_ANDSI = 0x72000000, 452 453 /* Move wide immediate instructions. */ 454 I3405_MOVN = 0x12800000, 455 I3405_MOVZ = 0x52800000, 456 I3405_MOVK = 0x72800000, 457 458 /* PC relative addressing instructions. */ 459 I3406_ADR = 0x10000000, 460 I3406_ADRP = 0x90000000, 461 462 /* Add/subtract shifted register instructions (without a shift). */ 463 I3502_ADD = 0x0b000000, 464 I3502_ADDS = 0x2b000000, 465 I3502_SUB = 0x4b000000, 466 I3502_SUBS = 0x6b000000, 467 468 /* Add/subtract shifted register instructions (with a shift). */ 469 I3502S_ADD_LSL = I3502_ADD, 470 471 /* Add/subtract with carry instructions. */ 472 I3503_ADC = 0x1a000000, 473 I3503_SBC = 0x5a000000, 474 475 /* Conditional select instructions. */ 476 I3506_CSEL = 0x1a800000, 477 I3506_CSINC = 0x1a800400, 478 I3506_CSINV = 0x5a800000, 479 I3506_CSNEG = 0x5a800400, 480 481 /* Data-processing (1 source) instructions. */ 482 I3507_CLZ = 0x5ac01000, 483 I3507_RBIT = 0x5ac00000, 484 I3507_REV = 0x5ac00000, /* + size << 10 */ 485 486 /* Data-processing (2 source) instructions. */ 487 I3508_LSLV = 0x1ac02000, 488 I3508_LSRV = 0x1ac02400, 489 I3508_ASRV = 0x1ac02800, 490 I3508_RORV = 0x1ac02c00, 491 I3508_SMULH = 0x9b407c00, 492 I3508_UMULH = 0x9bc07c00, 493 I3508_UDIV = 0x1ac00800, 494 I3508_SDIV = 0x1ac00c00, 495 496 /* Data-processing (3 source) instructions. */ 497 I3509_MADD = 0x1b000000, 498 I3509_MSUB = 0x1b008000, 499 500 /* Logical shifted register instructions (without a shift). */ 501 I3510_AND = 0x0a000000, 502 I3510_BIC = 0x0a200000, 503 I3510_ORR = 0x2a000000, 504 I3510_ORN = 0x2a200000, 505 I3510_EOR = 0x4a000000, 506 I3510_EON = 0x4a200000, 507 I3510_ANDS = 0x6a000000, 508 509 /* Logical shifted register instructions (with a shift). */ 510 I3502S_AND_LSR = I3510_AND | (1 << 22), 511 512 /* AdvSIMD copy */ 513 I3605_DUP = 0x0e000400, 514 I3605_INS = 0x4e001c00, 515 I3605_UMOV = 0x0e003c00, 516 517 /* AdvSIMD modified immediate */ 518 I3606_MOVI = 0x0f000400, 519 I3606_MVNI = 0x2f000400, 520 I3606_BIC = 0x2f001400, 521 I3606_ORR = 0x0f001400, 522 523 /* AdvSIMD scalar shift by immediate */ 524 I3609_SSHR = 0x5f000400, 525 I3609_SSRA = 0x5f001400, 526 I3609_SHL = 0x5f005400, 527 I3609_USHR = 0x7f000400, 528 I3609_USRA = 0x7f001400, 529 I3609_SLI = 0x7f005400, 530 531 /* AdvSIMD scalar three same */ 532 I3611_SQADD = 0x5e200c00, 533 I3611_SQSUB = 0x5e202c00, 534 I3611_CMGT = 0x5e203400, 535 I3611_CMGE = 0x5e203c00, 536 I3611_SSHL = 0x5e204400, 537 I3611_ADD = 0x5e208400, 538 I3611_CMTST = 0x5e208c00, 539 I3611_UQADD = 0x7e200c00, 540 I3611_UQSUB = 0x7e202c00, 541 I3611_CMHI = 0x7e203400, 542 I3611_CMHS = 0x7e203c00, 543 I3611_USHL = 0x7e204400, 544 I3611_SUB = 0x7e208400, 545 I3611_CMEQ = 0x7e208c00, 546 547 /* AdvSIMD scalar two-reg misc */ 548 I3612_CMGT0 = 0x5e208800, 549 I3612_CMEQ0 = 0x5e209800, 550 I3612_CMLT0 = 0x5e20a800, 551 I3612_ABS = 0x5e20b800, 552 I3612_CMGE0 = 0x7e208800, 553 I3612_CMLE0 = 0x7e209800, 554 I3612_NEG = 0x7e20b800, 555 556 /* AdvSIMD shift by immediate */ 557 I3614_SSHR = 0x0f000400, 558 I3614_SSRA = 0x0f001400, 559 I3614_SHL = 0x0f005400, 560 I3614_SLI = 0x2f005400, 561 I3614_USHR = 0x2f000400, 562 I3614_USRA = 0x2f001400, 563 564 /* AdvSIMD three same. */ 565 I3616_ADD = 0x0e208400, 566 I3616_AND = 0x0e201c00, 567 I3616_BIC = 0x0e601c00, 568 I3616_BIF = 0x2ee01c00, 569 I3616_BIT = 0x2ea01c00, 570 I3616_BSL = 0x2e601c00, 571 I3616_EOR = 0x2e201c00, 572 I3616_MUL = 0x0e209c00, 573 I3616_ORR = 0x0ea01c00, 574 I3616_ORN = 0x0ee01c00, 575 I3616_SUB = 0x2e208400, 576 I3616_CMGT = 0x0e203400, 577 I3616_CMGE = 0x0e203c00, 578 I3616_CMTST = 0x0e208c00, 579 I3616_CMHI = 0x2e203400, 580 I3616_CMHS = 0x2e203c00, 581 I3616_CMEQ = 0x2e208c00, 582 I3616_SMAX = 0x0e206400, 583 I3616_SMIN = 0x0e206c00, 584 I3616_SSHL = 0x0e204400, 585 I3616_SQADD = 0x0e200c00, 586 I3616_SQSUB = 0x0e202c00, 587 I3616_UMAX = 0x2e206400, 588 I3616_UMIN = 0x2e206c00, 589 I3616_UQADD = 0x2e200c00, 590 I3616_UQSUB = 0x2e202c00, 591 I3616_USHL = 0x2e204400, 592 593 /* AdvSIMD two-reg misc. */ 594 I3617_CMGT0 = 0x0e208800, 595 I3617_CMEQ0 = 0x0e209800, 596 I3617_CMLT0 = 0x0e20a800, 597 I3617_CMGE0 = 0x2e208800, 598 I3617_CMLE0 = 0x2e209800, 599 I3617_NOT = 0x2e205800, 600 I3617_ABS = 0x0e20b800, 601 I3617_NEG = 0x2e20b800, 602 603 /* System instructions. */ 604 NOP = 0xd503201f, 605 DMB_ISH = 0xd50338bf, 606 DMB_LD = 0x00000100, 607 DMB_ST = 0x00000200, 608} AArch64Insn; 609 610static inline uint32_t tcg_in32(TCGContext *s) 611{ 612 uint32_t v = *(uint32_t *)s->code_ptr; 613 return v; 614} 615 616/* Emit an opcode with "type-checking" of the format. */ 617#define tcg_out_insn(S, FMT, OP, ...) \ 618 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) 619 620static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q, 621 TCGReg rt, TCGReg rn, unsigned size) 622{ 623 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30)); 624} 625 626static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, 627 int imm19, TCGReg rt) 628{ 629 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt); 630} 631 632static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, 633 TCGReg rt, int imm19) 634{ 635 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); 636} 637 638static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, 639 TCGCond c, int imm19) 640{ 641 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); 642} 643 644static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) 645{ 646 tcg_out32(s, insn | (imm26 & 0x03ffffff)); 647} 648 649static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) 650{ 651 tcg_out32(s, insn | rn << 5); 652} 653 654static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, 655 TCGReg r1, TCGReg r2, TCGReg rn, 656 tcg_target_long ofs, bool pre, bool w) 657{ 658 insn |= 1u << 31; /* ext */ 659 insn |= pre << 24; 660 insn |= w << 23; 661 662 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); 663 insn |= (ofs & (0x7f << 3)) << (15 - 3); 664 665 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); 666} 667 668static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, 669 TCGReg rd, TCGReg rn, uint64_t aimm) 670{ 671 if (aimm > 0xfff) { 672 tcg_debug_assert((aimm & 0xfff) == 0); 673 aimm >>= 12; 674 tcg_debug_assert(aimm <= 0xfff); 675 aimm |= 1 << 12; /* apply LSL 12 */ 676 } 677 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); 678} 679 680/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 681 (Logical immediate). Both insn groups have N, IMMR and IMMS fields 682 that feed the DecodeBitMasks pseudo function. */ 683static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, 684 TCGReg rd, TCGReg rn, int n, int immr, int imms) 685{ 686 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 687 | rn << 5 | rd); 688} 689 690#define tcg_out_insn_3404 tcg_out_insn_3402 691 692static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, 693 TCGReg rd, TCGReg rn, TCGReg rm, int imms) 694{ 695 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 696 | rn << 5 | rd); 697} 698 699/* This function is used for the Move (wide immediate) instruction group. 700 Note that SHIFT is a full shift count, not the 2 bit HW field. */ 701static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, 702 TCGReg rd, uint16_t half, unsigned shift) 703{ 704 tcg_debug_assert((shift & ~0x30) == 0); 705 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); 706} 707 708static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, 709 TCGReg rd, int64_t disp) 710{ 711 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); 712} 713 714/* This function is for both 3.5.2 (Add/Subtract shifted register), for 715 the rare occasion when we actually want to supply a shift amount. */ 716static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, 717 TCGType ext, TCGReg rd, TCGReg rn, 718 TCGReg rm, int imm6) 719{ 720 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); 721} 722 723/* This function is for 3.5.2 (Add/subtract shifted register), 724 and 3.5.10 (Logical shifted register), for the vast majorty of cases 725 when we don't want to apply a shift. Thus it can also be used for 726 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */ 727static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, 728 TCGReg rd, TCGReg rn, TCGReg rm) 729{ 730 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); 731} 732 733#define tcg_out_insn_3503 tcg_out_insn_3502 734#define tcg_out_insn_3508 tcg_out_insn_3502 735#define tcg_out_insn_3510 tcg_out_insn_3502 736 737static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, 738 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) 739{ 740 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd 741 | tcg_cond_to_aarch64[c] << 12); 742} 743 744static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, 745 TCGReg rd, TCGReg rn) 746{ 747 tcg_out32(s, insn | ext << 31 | rn << 5 | rd); 748} 749 750static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, 751 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) 752{ 753 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); 754} 755 756static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q, 757 TCGReg rd, TCGReg rn, int dst_idx, int src_idx) 758{ 759 /* Note that bit 11 set means general register input. Therefore 760 we can handle both register sets with one function. */ 761 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11) 762 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5); 763} 764 765static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q, 766 TCGReg rd, bool op, int cmode, uint8_t imm8) 767{ 768 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f) 769 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5); 770} 771 772static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn, 773 TCGReg rd, TCGReg rn, unsigned immhb) 774{ 775 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f)); 776} 777 778static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn, 779 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 780{ 781 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16 782 | (rn & 0x1f) << 5 | (rd & 0x1f)); 783} 784 785static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn, 786 unsigned size, TCGReg rd, TCGReg rn) 787{ 788 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f)); 789} 790 791static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q, 792 TCGReg rd, TCGReg rn, unsigned immhb) 793{ 794 tcg_out32(s, insn | q << 30 | immhb << 16 795 | (rn & 0x1f) << 5 | (rd & 0x1f)); 796} 797 798static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q, 799 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 800{ 801 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16 802 | (rn & 0x1f) << 5 | (rd & 0x1f)); 803} 804 805static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q, 806 unsigned size, TCGReg rd, TCGReg rn) 807{ 808 tcg_out32(s, insn | q << 30 | (size << 22) 809 | (rn & 0x1f) << 5 | (rd & 0x1f)); 810} 811 812static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, 813 TCGReg rd, TCGReg base, TCGType ext, 814 TCGReg regoff) 815{ 816 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 817 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | 818 0x4000 | ext << 13 | base << 5 | (rd & 0x1f)); 819} 820 821static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, 822 TCGReg rd, TCGReg rn, intptr_t offset) 823{ 824 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f)); 825} 826 827static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, 828 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) 829{ 830 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 831 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 832 | rn << 5 | (rd & 0x1f)); 833} 834 835/* Register to register move using ORR (shifted register with no shift). */ 836static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) 837{ 838 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); 839} 840 841/* Register to register move using ADDI (move to/from SP). */ 842static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) 843{ 844 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); 845} 846 847/* This function is used for the Logical (immediate) instruction group. 848 The value of LIMM must satisfy IS_LIMM. See the comment above about 849 only supporting simplified logical immediates. */ 850static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, 851 TCGReg rd, TCGReg rn, uint64_t limm) 852{ 853 unsigned h, l, r, c; 854 855 tcg_debug_assert(is_limm(limm)); 856 857 h = clz64(limm); 858 l = ctz64(limm); 859 if (l == 0) { 860 r = 0; /* form 0....01....1 */ 861 c = ctz64(~limm) - 1; 862 if (h == 0) { 863 r = clz64(~limm); /* form 1..10..01..1 */ 864 c += r; 865 } 866 } else { 867 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ 868 c = r - h - 1; 869 } 870 if (ext == TCG_TYPE_I32) { 871 r &= 31; 872 c &= 31; 873 } 874 875 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); 876} 877 878static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 879 TCGReg rd, int64_t v64) 880{ 881 bool q = type == TCG_TYPE_V128; 882 int cmode, imm8, i; 883 884 /* Test all bytes equal first. */ 885 if (vece == MO_8) { 886 imm8 = (uint8_t)v64; 887 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8); 888 return; 889 } 890 891 /* 892 * Test all bytes 0x00 or 0xff second. This can match cases that 893 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below. 894 */ 895 for (i = imm8 = 0; i < 8; i++) { 896 uint8_t byte = v64 >> (i * 8); 897 if (byte == 0xff) { 898 imm8 |= 1 << i; 899 } else if (byte != 0) { 900 goto fail_bytes; 901 } 902 } 903 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8); 904 return; 905 fail_bytes: 906 907 /* 908 * Tests for various replications. For each element width, if we 909 * cannot find an expansion there's no point checking a larger 910 * width because we already know by replication it cannot match. 911 */ 912 if (vece == MO_16) { 913 uint16_t v16 = v64; 914 915 if (is_shimm16(v16, &cmode, &imm8)) { 916 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 917 return; 918 } 919 if (is_shimm16(~v16, &cmode, &imm8)) { 920 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 921 return; 922 } 923 924 /* 925 * Otherwise, all remaining constants can be loaded in two insns: 926 * rd = v16 & 0xff, rd |= v16 & 0xff00. 927 */ 928 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff); 929 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8); 930 return; 931 } else if (vece == MO_32) { 932 uint32_t v32 = v64; 933 uint32_t n32 = ~v32; 934 935 if (is_shimm32(v32, &cmode, &imm8) || 936 is_soimm32(v32, &cmode, &imm8) || 937 is_fimm32(v32, &cmode, &imm8)) { 938 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 939 return; 940 } 941 if (is_shimm32(n32, &cmode, &imm8) || 942 is_soimm32(n32, &cmode, &imm8)) { 943 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 944 return; 945 } 946 947 /* 948 * Restrict the set of constants to those we can load with 949 * two instructions. Others we load from the pool. 950 */ 951 i = is_shimm32_pair(v32, &cmode, &imm8); 952 if (i) { 953 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 954 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8)); 955 return; 956 } 957 i = is_shimm32_pair(n32, &cmode, &imm8); 958 if (i) { 959 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 960 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8)); 961 return; 962 } 963 } else if (is_fimm64(v64, &cmode, &imm8)) { 964 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8); 965 return; 966 } 967 968 /* 969 * As a last resort, load from the constant pool. Sadly there 970 * is no LD1R (literal), so store the full 16-byte vector. 971 */ 972 if (type == TCG_TYPE_V128) { 973 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64); 974 tcg_out_insn(s, 3305, LDR_v128, 0, rd); 975 } else { 976 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0); 977 tcg_out_insn(s, 3305, LDR_v64, 0, rd); 978 } 979} 980 981static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 982 TCGReg rd, TCGReg rs) 983{ 984 int is_q = type - TCG_TYPE_V64; 985 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0); 986 return true; 987} 988 989static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 990 TCGReg r, TCGReg base, intptr_t offset) 991{ 992 TCGReg temp = TCG_REG_TMP; 993 994 if (offset < -0xffffff || offset > 0xffffff) { 995 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset); 996 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base); 997 base = temp; 998 } else { 999 AArch64Insn add_insn = I3401_ADDI; 1000 1001 if (offset < 0) { 1002 add_insn = I3401_SUBI; 1003 offset = -offset; 1004 } 1005 if (offset & 0xfff000) { 1006 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000); 1007 base = temp; 1008 } 1009 if (offset & 0xfff) { 1010 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff); 1011 base = temp; 1012 } 1013 } 1014 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece); 1015 return true; 1016} 1017 1018static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 1019 tcg_target_long value) 1020{ 1021 tcg_target_long svalue = value; 1022 tcg_target_long ivalue = ~value; 1023 tcg_target_long t0, t1, t2; 1024 int s0, s1; 1025 AArch64Insn opc; 1026 1027 switch (type) { 1028 case TCG_TYPE_I32: 1029 case TCG_TYPE_I64: 1030 tcg_debug_assert(rd < 32); 1031 break; 1032 default: 1033 g_assert_not_reached(); 1034 } 1035 1036 /* For 32-bit values, discard potential garbage in value. For 64-bit 1037 values within [2**31, 2**32-1], we can create smaller sequences by 1038 interpreting this as a negative 32-bit number, while ensuring that 1039 the high 32 bits are cleared by setting SF=0. */ 1040 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { 1041 svalue = (int32_t)value; 1042 value = (uint32_t)value; 1043 ivalue = (uint32_t)ivalue; 1044 type = TCG_TYPE_I32; 1045 } 1046 1047 /* Speed things up by handling the common case of small positive 1048 and negative values specially. */ 1049 if ((value & ~0xffffull) == 0) { 1050 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); 1051 return; 1052 } else if ((ivalue & ~0xffffull) == 0) { 1053 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); 1054 return; 1055 } 1056 1057 /* Check for bitfield immediates. For the benefit of 32-bit quantities, 1058 use the sign-extended value. That lets us match rotated values such 1059 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ 1060 if (is_limm(svalue)) { 1061 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); 1062 return; 1063 } 1064 1065 /* Look for host pointer values within 4G of the PC. This happens 1066 often when loading pointers to QEMU's own data structures. */ 1067 if (type == TCG_TYPE_I64) { 1068 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr); 1069 tcg_target_long disp = value - src_rx; 1070 if (disp == sextract64(disp, 0, 21)) { 1071 tcg_out_insn(s, 3406, ADR, rd, disp); 1072 return; 1073 } 1074 disp = (value >> 12) - (src_rx >> 12); 1075 if (disp == sextract64(disp, 0, 21)) { 1076 tcg_out_insn(s, 3406, ADRP, rd, disp); 1077 if (value & 0xfff) { 1078 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); 1079 } 1080 return; 1081 } 1082 } 1083 1084 /* Would it take fewer insns to begin with MOVN? */ 1085 if (ctpop64(value) >= 32) { 1086 t0 = ivalue; 1087 opc = I3405_MOVN; 1088 } else { 1089 t0 = value; 1090 opc = I3405_MOVZ; 1091 } 1092 s0 = ctz64(t0) & (63 & -16); 1093 t1 = t0 & ~(0xffffull << s0); 1094 s1 = ctz64(t1) & (63 & -16); 1095 t2 = t1 & ~(0xffffull << s1); 1096 if (t2 == 0) { 1097 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0); 1098 if (t1 != 0) { 1099 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1); 1100 } 1101 return; 1102 } 1103 1104 /* For more than 2 insns, dump it into the constant pool. */ 1105 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0); 1106 tcg_out_insn(s, 3305, LDR, 0, rd); 1107} 1108 1109static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, 1110 tcg_target_long imm) 1111{ 1112 /* This function is only used for passing structs by reference. */ 1113 g_assert_not_reached(); 1114} 1115 1116/* Define something more legible for general use. */ 1117#define tcg_out_ldst_r tcg_out_insn_3310 1118 1119static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd, 1120 TCGReg rn, intptr_t offset, int lgsize) 1121{ 1122 /* If the offset is naturally aligned and in range, then we can 1123 use the scaled uimm12 encoding */ 1124 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) { 1125 uintptr_t scaled_uimm = offset >> lgsize; 1126 if (scaled_uimm <= 0xfff) { 1127 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); 1128 return; 1129 } 1130 } 1131 1132 /* Small signed offsets can use the unscaled encoding. */ 1133 if (offset >= -256 && offset < 256) { 1134 tcg_out_insn_3312(s, insn, rd, rn, offset); 1135 return; 1136 } 1137 1138 /* Worst-case scenario, move offset to temp register, use reg offset. */ 1139 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset); 1140 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP); 1141} 1142 1143static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 1144{ 1145 if (ret == arg) { 1146 return true; 1147 } 1148 switch (type) { 1149 case TCG_TYPE_I32: 1150 case TCG_TYPE_I64: 1151 if (ret < 32 && arg < 32) { 1152 tcg_out_movr(s, type, ret, arg); 1153 break; 1154 } else if (ret < 32) { 1155 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0); 1156 break; 1157 } else if (arg < 32) { 1158 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0); 1159 break; 1160 } 1161 /* FALLTHRU */ 1162 1163 case TCG_TYPE_V64: 1164 tcg_debug_assert(ret >= 32 && arg >= 32); 1165 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg); 1166 break; 1167 case TCG_TYPE_V128: 1168 tcg_debug_assert(ret >= 32 && arg >= 32); 1169 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg); 1170 break; 1171 1172 default: 1173 g_assert_not_reached(); 1174 } 1175 return true; 1176} 1177 1178static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1179 TCGReg base, intptr_t ofs) 1180{ 1181 AArch64Insn insn; 1182 int lgsz; 1183 1184 switch (type) { 1185 case TCG_TYPE_I32: 1186 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS); 1187 lgsz = 2; 1188 break; 1189 case TCG_TYPE_I64: 1190 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD); 1191 lgsz = 3; 1192 break; 1193 case TCG_TYPE_V64: 1194 insn = I3312_LDRVD; 1195 lgsz = 3; 1196 break; 1197 case TCG_TYPE_V128: 1198 insn = I3312_LDRVQ; 1199 lgsz = 4; 1200 break; 1201 default: 1202 g_assert_not_reached(); 1203 } 1204 tcg_out_ldst(s, insn, ret, base, ofs, lgsz); 1205} 1206 1207static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src, 1208 TCGReg base, intptr_t ofs) 1209{ 1210 AArch64Insn insn; 1211 int lgsz; 1212 1213 switch (type) { 1214 case TCG_TYPE_I32: 1215 insn = (src < 32 ? I3312_STRW : I3312_STRVS); 1216 lgsz = 2; 1217 break; 1218 case TCG_TYPE_I64: 1219 insn = (src < 32 ? I3312_STRX : I3312_STRVD); 1220 lgsz = 3; 1221 break; 1222 case TCG_TYPE_V64: 1223 insn = I3312_STRVD; 1224 lgsz = 3; 1225 break; 1226 case TCG_TYPE_V128: 1227 insn = I3312_STRVQ; 1228 lgsz = 4; 1229 break; 1230 default: 1231 g_assert_not_reached(); 1232 } 1233 tcg_out_ldst(s, insn, src, base, ofs, lgsz); 1234} 1235 1236static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1237 TCGReg base, intptr_t ofs) 1238{ 1239 if (type <= TCG_TYPE_I64 && val == 0) { 1240 tcg_out_st(s, type, TCG_REG_XZR, base, ofs); 1241 return true; 1242 } 1243 return false; 1244} 1245 1246static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, 1247 TCGReg rn, unsigned int a, unsigned int b) 1248{ 1249 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); 1250} 1251 1252static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, 1253 TCGReg rn, unsigned int a, unsigned int b) 1254{ 1255 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); 1256} 1257 1258static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, 1259 TCGReg rn, unsigned int a, unsigned int b) 1260{ 1261 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); 1262} 1263 1264static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, 1265 TCGReg rn, TCGReg rm, unsigned int a) 1266{ 1267 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); 1268} 1269 1270static inline void tcg_out_shl(TCGContext *s, TCGType ext, 1271 TCGReg rd, TCGReg rn, unsigned int m) 1272{ 1273 int bits = ext ? 64 : 32; 1274 int max = bits - 1; 1275 tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max); 1276} 1277 1278static inline void tcg_out_shr(TCGContext *s, TCGType ext, 1279 TCGReg rd, TCGReg rn, unsigned int m) 1280{ 1281 int max = ext ? 63 : 31; 1282 tcg_out_ubfm(s, ext, rd, rn, m & max, max); 1283} 1284 1285static inline void tcg_out_sar(TCGContext *s, TCGType ext, 1286 TCGReg rd, TCGReg rn, unsigned int m) 1287{ 1288 int max = ext ? 63 : 31; 1289 tcg_out_sbfm(s, ext, rd, rn, m & max, max); 1290} 1291 1292static inline void tcg_out_rotr(TCGContext *s, TCGType ext, 1293 TCGReg rd, TCGReg rn, unsigned int m) 1294{ 1295 int max = ext ? 63 : 31; 1296 tcg_out_extr(s, ext, rd, rn, rn, m & max); 1297} 1298 1299static inline void tcg_out_rotl(TCGContext *s, TCGType ext, 1300 TCGReg rd, TCGReg rn, unsigned int m) 1301{ 1302 int max = ext ? 63 : 31; 1303 tcg_out_extr(s, ext, rd, rn, rn, -m & max); 1304} 1305 1306static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, 1307 TCGReg rn, unsigned lsb, unsigned width) 1308{ 1309 unsigned size = ext ? 64 : 32; 1310 unsigned a = (size - lsb) & (size - 1); 1311 unsigned b = width - 1; 1312 tcg_out_bfm(s, ext, rd, rn, a, b); 1313} 1314 1315static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, 1316 tcg_target_long b, bool const_b) 1317{ 1318 if (const_b) { 1319 /* Using CMP or CMN aliases. */ 1320 if (b >= 0) { 1321 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); 1322 } else { 1323 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); 1324 } 1325 } else { 1326 /* Using CMP alias SUBS wzr, Wn, Wm */ 1327 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); 1328 } 1329} 1330 1331static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) 1332{ 1333 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1334 tcg_debug_assert(offset == sextract64(offset, 0, 26)); 1335 tcg_out_insn(s, 3206, B, offset); 1336} 1337 1338static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target) 1339{ 1340 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1341 if (offset == sextract64(offset, 0, 26)) { 1342 tcg_out_insn(s, 3206, B, offset); 1343 } else { 1344 /* Choose X9 as a call-clobbered non-LR temporary. */ 1345 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target); 1346 tcg_out_insn(s, 3207, BR, TCG_REG_X9); 1347 } 1348} 1349 1350static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target) 1351{ 1352 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1353 if (offset == sextract64(offset, 0, 26)) { 1354 tcg_out_insn(s, 3206, BL, offset); 1355 } else { 1356 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); 1357 tcg_out_insn(s, 3207, BLR, TCG_REG_TMP); 1358 } 1359} 1360 1361static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 1362 const TCGHelperInfo *info) 1363{ 1364 tcg_out_call_int(s, target); 1365} 1366 1367static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) 1368{ 1369 if (!l->has_value) { 1370 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); 1371 tcg_out_insn(s, 3206, B, 0); 1372 } else { 1373 tcg_out_goto(s, l->u.value_ptr); 1374 } 1375} 1376 1377static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, 1378 TCGArg b, bool b_const, TCGLabel *l) 1379{ 1380 intptr_t offset; 1381 bool need_cmp; 1382 1383 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) { 1384 need_cmp = false; 1385 } else { 1386 need_cmp = true; 1387 tcg_out_cmp(s, ext, a, b, b_const); 1388 } 1389 1390 if (!l->has_value) { 1391 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1392 offset = tcg_in32(s) >> 5; 1393 } else { 1394 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2; 1395 tcg_debug_assert(offset == sextract64(offset, 0, 19)); 1396 } 1397 1398 if (need_cmp) { 1399 tcg_out_insn(s, 3202, B_C, c, offset); 1400 } else if (c == TCG_COND_EQ) { 1401 tcg_out_insn(s, 3201, CBZ, ext, a, offset); 1402 } else { 1403 tcg_out_insn(s, 3201, CBNZ, ext, a, offset); 1404 } 1405} 1406 1407static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits, 1408 TCGReg rd, TCGReg rn) 1409{ 1410 /* REV, REV16, REV32 */ 1411 tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn); 1412} 1413 1414static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits, 1415 TCGReg rd, TCGReg rn) 1416{ 1417 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ 1418 int bits = (8 << s_bits) - 1; 1419 tcg_out_sbfm(s, ext, rd, rn, 0, bits); 1420} 1421 1422static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits, 1423 TCGReg rd, TCGReg rn) 1424{ 1425 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ 1426 int bits = (8 << s_bits) - 1; 1427 tcg_out_ubfm(s, 0, rd, rn, 0, bits); 1428} 1429 1430static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, 1431 TCGReg rn, int64_t aimm) 1432{ 1433 if (aimm >= 0) { 1434 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); 1435 } else { 1436 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); 1437 } 1438} 1439 1440static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, 1441 TCGReg rh, TCGReg al, TCGReg ah, 1442 tcg_target_long bl, tcg_target_long bh, 1443 bool const_bl, bool const_bh, bool sub) 1444{ 1445 TCGReg orig_rl = rl; 1446 AArch64Insn insn; 1447 1448 if (rl == ah || (!const_bh && rl == bh)) { 1449 rl = TCG_REG_TMP; 1450 } 1451 1452 if (const_bl) { 1453 if (bl < 0) { 1454 bl = -bl; 1455 insn = sub ? I3401_ADDSI : I3401_SUBSI; 1456 } else { 1457 insn = sub ? I3401_SUBSI : I3401_ADDSI; 1458 } 1459 1460 if (unlikely(al == TCG_REG_XZR)) { 1461 /* ??? We want to allow al to be zero for the benefit of 1462 negation via subtraction. However, that leaves open the 1463 possibility of adding 0+const in the low part, and the 1464 immediate add instructions encode XSP not XZR. Don't try 1465 anything more elaborate here than loading another zero. */ 1466 al = TCG_REG_TMP; 1467 tcg_out_movi(s, ext, al, 0); 1468 } 1469 tcg_out_insn_3401(s, insn, ext, rl, al, bl); 1470 } else { 1471 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); 1472 } 1473 1474 insn = I3503_ADC; 1475 if (const_bh) { 1476 /* Note that the only two constants we support are 0 and -1, and 1477 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ 1478 if ((bh != 0) ^ sub) { 1479 insn = I3503_SBC; 1480 } 1481 bh = TCG_REG_XZR; 1482 } else if (sub) { 1483 insn = I3503_SBC; 1484 } 1485 tcg_out_insn_3503(s, insn, ext, rh, ah, bh); 1486 1487 tcg_out_mov(s, ext, orig_rl, rl); 1488} 1489 1490static inline void tcg_out_mb(TCGContext *s, TCGArg a0) 1491{ 1492 static const uint32_t sync[] = { 1493 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST, 1494 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST, 1495 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1496 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD, 1497 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1498 }; 1499 tcg_out32(s, sync[a0 & TCG_MO_ALL]); 1500} 1501 1502static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, 1503 TCGReg a0, TCGArg b, bool const_b, bool is_ctz) 1504{ 1505 TCGReg a1 = a0; 1506 if (is_ctz) { 1507 a1 = TCG_REG_TMP; 1508 tcg_out_insn(s, 3507, RBIT, ext, a1, a0); 1509 } 1510 if (const_b && b == (ext ? 64 : 32)) { 1511 tcg_out_insn(s, 3507, CLZ, ext, d, a1); 1512 } else { 1513 AArch64Insn sel = I3506_CSEL; 1514 1515 tcg_out_cmp(s, ext, a0, 0, 1); 1516 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1); 1517 1518 if (const_b) { 1519 if (b == -1) { 1520 b = TCG_REG_XZR; 1521 sel = I3506_CSINV; 1522 } else if (b == 0) { 1523 b = TCG_REG_XZR; 1524 } else { 1525 tcg_out_movi(s, ext, d, b); 1526 b = d; 1527 } 1528 } 1529 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE); 1530 } 1531} 1532 1533static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target) 1534{ 1535 ptrdiff_t offset = tcg_pcrel_diff(s, target); 1536 tcg_debug_assert(offset == sextract64(offset, 0, 21)); 1537 tcg_out_insn(s, 3406, ADR, rd, offset); 1538} 1539 1540#ifdef CONFIG_SOFTMMU 1541/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, 1542 * MemOpIdx oi, uintptr_t ra) 1543 */ 1544static void * const qemu_ld_helpers[MO_SIZE + 1] = { 1545 [MO_8] = helper_ret_ldub_mmu, 1546#if HOST_BIG_ENDIAN 1547 [MO_16] = helper_be_lduw_mmu, 1548 [MO_32] = helper_be_ldul_mmu, 1549 [MO_64] = helper_be_ldq_mmu, 1550#else 1551 [MO_16] = helper_le_lduw_mmu, 1552 [MO_32] = helper_le_ldul_mmu, 1553 [MO_64] = helper_le_ldq_mmu, 1554#endif 1555}; 1556 1557/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, 1558 * uintxx_t val, MemOpIdx oi, 1559 * uintptr_t ra) 1560 */ 1561static void * const qemu_st_helpers[MO_SIZE + 1] = { 1562 [MO_8] = helper_ret_stb_mmu, 1563#if HOST_BIG_ENDIAN 1564 [MO_16] = helper_be_stw_mmu, 1565 [MO_32] = helper_be_stl_mmu, 1566 [MO_64] = helper_be_stq_mmu, 1567#else 1568 [MO_16] = helper_le_stw_mmu, 1569 [MO_32] = helper_le_stl_mmu, 1570 [MO_64] = helper_le_stq_mmu, 1571#endif 1572}; 1573 1574static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1575{ 1576 MemOpIdx oi = lb->oi; 1577 MemOp opc = get_memop(oi); 1578 MemOp size = opc & MO_SIZE; 1579 1580 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1581 return false; 1582 } 1583 1584 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1585 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); 1586 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi); 1587 tcg_out_adr(s, TCG_REG_X3, lb->raddr); 1588 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); 1589 if (opc & MO_SIGN) { 1590 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0); 1591 } else { 1592 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0); 1593 } 1594 1595 tcg_out_goto(s, lb->raddr); 1596 return true; 1597} 1598 1599static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1600{ 1601 MemOpIdx oi = lb->oi; 1602 MemOp opc = get_memop(oi); 1603 MemOp size = opc & MO_SIZE; 1604 1605 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1606 return false; 1607 } 1608 1609 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1610 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); 1611 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); 1612 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi); 1613 tcg_out_adr(s, TCG_REG_X4, lb->raddr); 1614 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]); 1615 tcg_out_goto(s, lb->raddr); 1616 return true; 1617} 1618 1619static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi, 1620 TCGType ext, TCGReg data_reg, TCGReg addr_reg, 1621 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) 1622{ 1623 TCGLabelQemuLdst *label = new_ldst_label(s); 1624 1625 label->is_ld = is_ld; 1626 label->oi = oi; 1627 label->type = ext; 1628 label->datalo_reg = data_reg; 1629 label->addrlo_reg = addr_reg; 1630 label->raddr = tcg_splitwx_to_rx(raddr); 1631 label->label_ptr[0] = label_ptr; 1632} 1633 1634/* We expect to use a 7-bit scaled negative offset from ENV. */ 1635QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); 1636QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512); 1637 1638/* These offsets are built into the LDP below. */ 1639QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); 1640QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); 1641 1642/* Load and compare a TLB entry, emitting the conditional jump to the 1643 slow path for the failure case, which will be patched later when finalizing 1644 the slow path. Generated code returns the host addend in X1, 1645 clobbers X0,X2,X3,TMP. */ 1646static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc, 1647 tcg_insn_unit **label_ptr, int mem_index, 1648 bool is_read) 1649{ 1650 unsigned a_bits = get_alignment_bits(opc); 1651 unsigned s_bits = opc & MO_SIZE; 1652 unsigned a_mask = (1u << a_bits) - 1; 1653 unsigned s_mask = (1u << s_bits) - 1; 1654 TCGReg x3; 1655 TCGType mask_type; 1656 uint64_t compare_mask; 1657 1658 mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32 1659 ? TCG_TYPE_I64 : TCG_TYPE_I32); 1660 1661 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */ 1662 tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0, 1663 TLB_MASK_TABLE_OFS(mem_index), 1, 0); 1664 1665 /* Extract the TLB index from the address into X0. */ 1666 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, 1667 TCG_REG_X0, TCG_REG_X0, addr_reg, 1668 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); 1669 1670 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */ 1671 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0); 1672 1673 /* Load the tlb comparator into X0, and the fast path addend into X1. */ 1674 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read 1675 ? offsetof(CPUTLBEntry, addr_read) 1676 : offsetof(CPUTLBEntry, addr_write)); 1677 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1, 1678 offsetof(CPUTLBEntry, addend)); 1679 1680 /* For aligned accesses, we check the first byte and include the alignment 1681 bits within the address. For unaligned access, we check that we don't 1682 cross pages using the address of the last byte of the access. */ 1683 if (a_bits >= s_bits) { 1684 x3 = addr_reg; 1685 } else { 1686 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64, 1687 TCG_REG_X3, addr_reg, s_mask - a_mask); 1688 x3 = TCG_REG_X3; 1689 } 1690 compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; 1691 1692 /* Store the page mask part of the address into X3. */ 1693 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, 1694 TCG_REG_X3, x3, compare_mask); 1695 1696 /* Perform the address comparison. */ 1697 tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0); 1698 1699 /* If not equal, we jump to the slow path. */ 1700 *label_ptr = s->code_ptr; 1701 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1702} 1703 1704#else 1705static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg, 1706 unsigned a_bits) 1707{ 1708 unsigned a_mask = (1 << a_bits) - 1; 1709 TCGLabelQemuLdst *label = new_ldst_label(s); 1710 1711 label->is_ld = is_ld; 1712 label->addrlo_reg = addr_reg; 1713 1714 /* tst addr, #mask */ 1715 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask); 1716 1717 label->label_ptr[0] = s->code_ptr; 1718 1719 /* b.ne slow_path */ 1720 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1721 1722 label->raddr = tcg_splitwx_to_rx(s->code_ptr); 1723} 1724 1725static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) 1726{ 1727 if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1728 return false; 1729 } 1730 1731 tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_X1, l->addrlo_reg); 1732 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1733 1734 /* "Tail call" to the helper, with the return address back inline. */ 1735 tcg_out_adr(s, TCG_REG_LR, l->raddr); 1736 tcg_out_goto_long(s, (const void *)(l->is_ld ? helper_unaligned_ld 1737 : helper_unaligned_st)); 1738 return true; 1739} 1740 1741static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 1742{ 1743 return tcg_out_fail_alignment(s, l); 1744} 1745 1746static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 1747{ 1748 return tcg_out_fail_alignment(s, l); 1749} 1750#endif /* CONFIG_SOFTMMU */ 1751 1752static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, 1753 TCGReg data_r, TCGReg addr_r, 1754 TCGType otype, TCGReg off_r) 1755{ 1756 switch (memop & MO_SSIZE) { 1757 case MO_UB: 1758 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r); 1759 break; 1760 case MO_SB: 1761 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, 1762 data_r, addr_r, otype, off_r); 1763 break; 1764 case MO_UW: 1765 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); 1766 break; 1767 case MO_SW: 1768 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), 1769 data_r, addr_r, otype, off_r); 1770 break; 1771 case MO_UL: 1772 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); 1773 break; 1774 case MO_SL: 1775 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r); 1776 break; 1777 case MO_UQ: 1778 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r); 1779 break; 1780 default: 1781 tcg_abort(); 1782 } 1783} 1784 1785static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, 1786 TCGReg data_r, TCGReg addr_r, 1787 TCGType otype, TCGReg off_r) 1788{ 1789 switch (memop & MO_SIZE) { 1790 case MO_8: 1791 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r); 1792 break; 1793 case MO_16: 1794 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r); 1795 break; 1796 case MO_32: 1797 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r); 1798 break; 1799 case MO_64: 1800 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r); 1801 break; 1802 default: 1803 tcg_abort(); 1804 } 1805} 1806 1807static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1808 MemOpIdx oi, TCGType ext) 1809{ 1810 MemOp memop = get_memop(oi); 1811 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; 1812 1813 /* Byte swapping is left to middle-end expansion. */ 1814 tcg_debug_assert((memop & MO_BSWAP) == 0); 1815 1816#ifdef CONFIG_SOFTMMU 1817 unsigned mem_index = get_mmuidx(oi); 1818 tcg_insn_unit *label_ptr; 1819 1820 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1); 1821 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1822 TCG_REG_X1, otype, addr_reg); 1823 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg, 1824 s->code_ptr, label_ptr); 1825#else /* !CONFIG_SOFTMMU */ 1826 unsigned a_bits = get_alignment_bits(memop); 1827 if (a_bits) { 1828 tcg_out_test_alignment(s, true, addr_reg, a_bits); 1829 } 1830 if (USE_GUEST_BASE) { 1831 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1832 TCG_REG_GUEST_BASE, otype, addr_reg); 1833 } else { 1834 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1835 addr_reg, TCG_TYPE_I64, TCG_REG_XZR); 1836 } 1837#endif /* CONFIG_SOFTMMU */ 1838} 1839 1840static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1841 MemOpIdx oi) 1842{ 1843 MemOp memop = get_memop(oi); 1844 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; 1845 1846 /* Byte swapping is left to middle-end expansion. */ 1847 tcg_debug_assert((memop & MO_BSWAP) == 0); 1848 1849#ifdef CONFIG_SOFTMMU 1850 unsigned mem_index = get_mmuidx(oi); 1851 tcg_insn_unit *label_ptr; 1852 1853 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0); 1854 tcg_out_qemu_st_direct(s, memop, data_reg, 1855 TCG_REG_X1, otype, addr_reg); 1856 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64, 1857 data_reg, addr_reg, s->code_ptr, label_ptr); 1858#else /* !CONFIG_SOFTMMU */ 1859 unsigned a_bits = get_alignment_bits(memop); 1860 if (a_bits) { 1861 tcg_out_test_alignment(s, false, addr_reg, a_bits); 1862 } 1863 if (USE_GUEST_BASE) { 1864 tcg_out_qemu_st_direct(s, memop, data_reg, 1865 TCG_REG_GUEST_BASE, otype, addr_reg); 1866 } else { 1867 tcg_out_qemu_st_direct(s, memop, data_reg, 1868 addr_reg, TCG_TYPE_I64, TCG_REG_XZR); 1869 } 1870#endif /* CONFIG_SOFTMMU */ 1871} 1872 1873static const tcg_insn_unit *tb_ret_addr; 1874 1875static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) 1876{ 1877 /* Reuse the zeroing that exists for goto_ptr. */ 1878 if (a0 == 0) { 1879 tcg_out_goto_long(s, tcg_code_gen_epilogue); 1880 } else { 1881 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); 1882 tcg_out_goto_long(s, tb_ret_addr); 1883 } 1884} 1885 1886static void tcg_out_goto_tb(TCGContext *s, int which) 1887{ 1888 /* 1889 * Direct branch, or indirect address load, will be patched 1890 * by tb_target_set_jmp_target. Assert indirect load offset 1891 * in range early, regardless of direct branch distance. 1892 */ 1893 intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which)); 1894 tcg_debug_assert(i_off == sextract64(i_off, 0, 21)); 1895 1896 set_jmp_insn_offset(s, which); 1897 tcg_out32(s, I3206_B); 1898 tcg_out_insn(s, 3207, BR, TCG_REG_TMP); 1899 set_jmp_reset_offset(s, which); 1900} 1901 1902void tb_target_set_jmp_target(const TranslationBlock *tb, int n, 1903 uintptr_t jmp_rx, uintptr_t jmp_rw) 1904{ 1905 uintptr_t d_addr = tb->jmp_target_addr[n]; 1906 ptrdiff_t d_offset = d_addr - jmp_rx; 1907 tcg_insn_unit insn; 1908 1909 /* Either directly branch, or indirect branch load. */ 1910 if (d_offset == sextract64(d_offset, 0, 28)) { 1911 insn = deposit32(I3206_B, 0, 26, d_offset >> 2); 1912 } else { 1913 uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n]; 1914 ptrdiff_t i_offset = i_addr - jmp_rx; 1915 1916 /* Note that we asserted this in range in tcg_out_goto_tb. */ 1917 insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2); 1918 } 1919 qatomic_set((uint32_t *)jmp_rw, insn); 1920 flush_idcache_range(jmp_rx, jmp_rw, 4); 1921} 1922 1923static void tcg_out_op(TCGContext *s, TCGOpcode opc, 1924 const TCGArg args[TCG_MAX_OP_ARGS], 1925 const int const_args[TCG_MAX_OP_ARGS]) 1926{ 1927 /* 99% of the time, we can signal the use of extension registers 1928 by looking to see if the opcode handles 64-bit data. */ 1929 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0; 1930 1931 /* Hoist the loads of the most common arguments. */ 1932 TCGArg a0 = args[0]; 1933 TCGArg a1 = args[1]; 1934 TCGArg a2 = args[2]; 1935 int c2 = const_args[2]; 1936 1937 /* Some operands are defined with "rZ" constraint, a register or 1938 the zero register. These need not actually test args[I] == 0. */ 1939#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) 1940 1941 switch (opc) { 1942 case INDEX_op_goto_ptr: 1943 tcg_out_insn(s, 3207, BR, a0); 1944 break; 1945 1946 case INDEX_op_br: 1947 tcg_out_goto_label(s, arg_label(a0)); 1948 break; 1949 1950 case INDEX_op_ld8u_i32: 1951 case INDEX_op_ld8u_i64: 1952 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0); 1953 break; 1954 case INDEX_op_ld8s_i32: 1955 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0); 1956 break; 1957 case INDEX_op_ld8s_i64: 1958 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0); 1959 break; 1960 case INDEX_op_ld16u_i32: 1961 case INDEX_op_ld16u_i64: 1962 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1); 1963 break; 1964 case INDEX_op_ld16s_i32: 1965 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1); 1966 break; 1967 case INDEX_op_ld16s_i64: 1968 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1); 1969 break; 1970 case INDEX_op_ld_i32: 1971 case INDEX_op_ld32u_i64: 1972 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2); 1973 break; 1974 case INDEX_op_ld32s_i64: 1975 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2); 1976 break; 1977 case INDEX_op_ld_i64: 1978 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3); 1979 break; 1980 1981 case INDEX_op_st8_i32: 1982 case INDEX_op_st8_i64: 1983 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0); 1984 break; 1985 case INDEX_op_st16_i32: 1986 case INDEX_op_st16_i64: 1987 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1); 1988 break; 1989 case INDEX_op_st_i32: 1990 case INDEX_op_st32_i64: 1991 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2); 1992 break; 1993 case INDEX_op_st_i64: 1994 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3); 1995 break; 1996 1997 case INDEX_op_add_i32: 1998 a2 = (int32_t)a2; 1999 /* FALLTHRU */ 2000 case INDEX_op_add_i64: 2001 if (c2) { 2002 tcg_out_addsubi(s, ext, a0, a1, a2); 2003 } else { 2004 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); 2005 } 2006 break; 2007 2008 case INDEX_op_sub_i32: 2009 a2 = (int32_t)a2; 2010 /* FALLTHRU */ 2011 case INDEX_op_sub_i64: 2012 if (c2) { 2013 tcg_out_addsubi(s, ext, a0, a1, -a2); 2014 } else { 2015 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); 2016 } 2017 break; 2018 2019 case INDEX_op_neg_i64: 2020 case INDEX_op_neg_i32: 2021 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); 2022 break; 2023 2024 case INDEX_op_and_i32: 2025 a2 = (int32_t)a2; 2026 /* FALLTHRU */ 2027 case INDEX_op_and_i64: 2028 if (c2) { 2029 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); 2030 } else { 2031 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); 2032 } 2033 break; 2034 2035 case INDEX_op_andc_i32: 2036 a2 = (int32_t)a2; 2037 /* FALLTHRU */ 2038 case INDEX_op_andc_i64: 2039 if (c2) { 2040 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); 2041 } else { 2042 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); 2043 } 2044 break; 2045 2046 case INDEX_op_or_i32: 2047 a2 = (int32_t)a2; 2048 /* FALLTHRU */ 2049 case INDEX_op_or_i64: 2050 if (c2) { 2051 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); 2052 } else { 2053 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); 2054 } 2055 break; 2056 2057 case INDEX_op_orc_i32: 2058 a2 = (int32_t)a2; 2059 /* FALLTHRU */ 2060 case INDEX_op_orc_i64: 2061 if (c2) { 2062 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); 2063 } else { 2064 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); 2065 } 2066 break; 2067 2068 case INDEX_op_xor_i32: 2069 a2 = (int32_t)a2; 2070 /* FALLTHRU */ 2071 case INDEX_op_xor_i64: 2072 if (c2) { 2073 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); 2074 } else { 2075 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); 2076 } 2077 break; 2078 2079 case INDEX_op_eqv_i32: 2080 a2 = (int32_t)a2; 2081 /* FALLTHRU */ 2082 case INDEX_op_eqv_i64: 2083 if (c2) { 2084 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); 2085 } else { 2086 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); 2087 } 2088 break; 2089 2090 case INDEX_op_not_i64: 2091 case INDEX_op_not_i32: 2092 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); 2093 break; 2094 2095 case INDEX_op_mul_i64: 2096 case INDEX_op_mul_i32: 2097 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); 2098 break; 2099 2100 case INDEX_op_div_i64: 2101 case INDEX_op_div_i32: 2102 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); 2103 break; 2104 case INDEX_op_divu_i64: 2105 case INDEX_op_divu_i32: 2106 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); 2107 break; 2108 2109 case INDEX_op_rem_i64: 2110 case INDEX_op_rem_i32: 2111 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2); 2112 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2113 break; 2114 case INDEX_op_remu_i64: 2115 case INDEX_op_remu_i32: 2116 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2); 2117 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2118 break; 2119 2120 case INDEX_op_shl_i64: 2121 case INDEX_op_shl_i32: 2122 if (c2) { 2123 tcg_out_shl(s, ext, a0, a1, a2); 2124 } else { 2125 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); 2126 } 2127 break; 2128 2129 case INDEX_op_shr_i64: 2130 case INDEX_op_shr_i32: 2131 if (c2) { 2132 tcg_out_shr(s, ext, a0, a1, a2); 2133 } else { 2134 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); 2135 } 2136 break; 2137 2138 case INDEX_op_sar_i64: 2139 case INDEX_op_sar_i32: 2140 if (c2) { 2141 tcg_out_sar(s, ext, a0, a1, a2); 2142 } else { 2143 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); 2144 } 2145 break; 2146 2147 case INDEX_op_rotr_i64: 2148 case INDEX_op_rotr_i32: 2149 if (c2) { 2150 tcg_out_rotr(s, ext, a0, a1, a2); 2151 } else { 2152 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); 2153 } 2154 break; 2155 2156 case INDEX_op_rotl_i64: 2157 case INDEX_op_rotl_i32: 2158 if (c2) { 2159 tcg_out_rotl(s, ext, a0, a1, a2); 2160 } else { 2161 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2); 2162 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP); 2163 } 2164 break; 2165 2166 case INDEX_op_clz_i64: 2167 case INDEX_op_clz_i32: 2168 tcg_out_cltz(s, ext, a0, a1, a2, c2, false); 2169 break; 2170 case INDEX_op_ctz_i64: 2171 case INDEX_op_ctz_i32: 2172 tcg_out_cltz(s, ext, a0, a1, a2, c2, true); 2173 break; 2174 2175 case INDEX_op_brcond_i32: 2176 a1 = (int32_t)a1; 2177 /* FALLTHRU */ 2178 case INDEX_op_brcond_i64: 2179 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); 2180 break; 2181 2182 case INDEX_op_setcond_i32: 2183 a2 = (int32_t)a2; 2184 /* FALLTHRU */ 2185 case INDEX_op_setcond_i64: 2186 tcg_out_cmp(s, ext, a1, a2, c2); 2187 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ 2188 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, 2189 TCG_REG_XZR, tcg_invert_cond(args[3])); 2190 break; 2191 2192 case INDEX_op_movcond_i32: 2193 a2 = (int32_t)a2; 2194 /* FALLTHRU */ 2195 case INDEX_op_movcond_i64: 2196 tcg_out_cmp(s, ext, a1, a2, c2); 2197 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); 2198 break; 2199 2200 case INDEX_op_qemu_ld_i32: 2201 case INDEX_op_qemu_ld_i64: 2202 tcg_out_qemu_ld(s, a0, a1, a2, ext); 2203 break; 2204 case INDEX_op_qemu_st_i32: 2205 case INDEX_op_qemu_st_i64: 2206 tcg_out_qemu_st(s, REG0(0), a1, a2); 2207 break; 2208 2209 case INDEX_op_bswap64_i64: 2210 tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1); 2211 break; 2212 case INDEX_op_bswap32_i64: 2213 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2214 if (a2 & TCG_BSWAP_OS) { 2215 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a0); 2216 } 2217 break; 2218 case INDEX_op_bswap32_i32: 2219 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2220 break; 2221 case INDEX_op_bswap16_i64: 2222 case INDEX_op_bswap16_i32: 2223 tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1); 2224 if (a2 & TCG_BSWAP_OS) { 2225 /* Output must be sign-extended. */ 2226 tcg_out_sxt(s, ext, MO_16, a0, a0); 2227 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 2228 /* Output must be zero-extended, but input isn't. */ 2229 tcg_out_uxt(s, MO_16, a0, a0); 2230 } 2231 break; 2232 2233 case INDEX_op_ext8s_i64: 2234 case INDEX_op_ext8s_i32: 2235 tcg_out_sxt(s, ext, MO_8, a0, a1); 2236 break; 2237 case INDEX_op_ext16s_i64: 2238 case INDEX_op_ext16s_i32: 2239 tcg_out_sxt(s, ext, MO_16, a0, a1); 2240 break; 2241 case INDEX_op_ext_i32_i64: 2242 case INDEX_op_ext32s_i64: 2243 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1); 2244 break; 2245 case INDEX_op_ext8u_i64: 2246 case INDEX_op_ext8u_i32: 2247 tcg_out_uxt(s, MO_8, a0, a1); 2248 break; 2249 case INDEX_op_ext16u_i64: 2250 case INDEX_op_ext16u_i32: 2251 tcg_out_uxt(s, MO_16, a0, a1); 2252 break; 2253 case INDEX_op_extu_i32_i64: 2254 case INDEX_op_ext32u_i64: 2255 tcg_out_movr(s, TCG_TYPE_I32, a0, a1); 2256 break; 2257 2258 case INDEX_op_deposit_i64: 2259 case INDEX_op_deposit_i32: 2260 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]); 2261 break; 2262 2263 case INDEX_op_extract_i64: 2264 case INDEX_op_extract_i32: 2265 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2266 break; 2267 2268 case INDEX_op_sextract_i64: 2269 case INDEX_op_sextract_i32: 2270 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2271 break; 2272 2273 case INDEX_op_extract2_i64: 2274 case INDEX_op_extract2_i32: 2275 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]); 2276 break; 2277 2278 case INDEX_op_add2_i32: 2279 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2280 (int32_t)args[4], args[5], const_args[4], 2281 const_args[5], false); 2282 break; 2283 case INDEX_op_add2_i64: 2284 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2285 args[5], const_args[4], const_args[5], false); 2286 break; 2287 case INDEX_op_sub2_i32: 2288 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2289 (int32_t)args[4], args[5], const_args[4], 2290 const_args[5], true); 2291 break; 2292 case INDEX_op_sub2_i64: 2293 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2294 args[5], const_args[4], const_args[5], true); 2295 break; 2296 2297 case INDEX_op_muluh_i64: 2298 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); 2299 break; 2300 case INDEX_op_mulsh_i64: 2301 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); 2302 break; 2303 2304 case INDEX_op_mb: 2305 tcg_out_mb(s, a0); 2306 break; 2307 2308 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 2309 case INDEX_op_mov_i64: 2310 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 2311 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ 2312 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ 2313 default: 2314 g_assert_not_reached(); 2315 } 2316 2317#undef REG0 2318} 2319 2320static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 2321 unsigned vecl, unsigned vece, 2322 const TCGArg args[TCG_MAX_OP_ARGS], 2323 const int const_args[TCG_MAX_OP_ARGS]) 2324{ 2325 static const AArch64Insn cmp_vec_insn[16] = { 2326 [TCG_COND_EQ] = I3616_CMEQ, 2327 [TCG_COND_GT] = I3616_CMGT, 2328 [TCG_COND_GE] = I3616_CMGE, 2329 [TCG_COND_GTU] = I3616_CMHI, 2330 [TCG_COND_GEU] = I3616_CMHS, 2331 }; 2332 static const AArch64Insn cmp_scalar_insn[16] = { 2333 [TCG_COND_EQ] = I3611_CMEQ, 2334 [TCG_COND_GT] = I3611_CMGT, 2335 [TCG_COND_GE] = I3611_CMGE, 2336 [TCG_COND_GTU] = I3611_CMHI, 2337 [TCG_COND_GEU] = I3611_CMHS, 2338 }; 2339 static const AArch64Insn cmp0_vec_insn[16] = { 2340 [TCG_COND_EQ] = I3617_CMEQ0, 2341 [TCG_COND_GT] = I3617_CMGT0, 2342 [TCG_COND_GE] = I3617_CMGE0, 2343 [TCG_COND_LT] = I3617_CMLT0, 2344 [TCG_COND_LE] = I3617_CMLE0, 2345 }; 2346 static const AArch64Insn cmp0_scalar_insn[16] = { 2347 [TCG_COND_EQ] = I3612_CMEQ0, 2348 [TCG_COND_GT] = I3612_CMGT0, 2349 [TCG_COND_GE] = I3612_CMGE0, 2350 [TCG_COND_LT] = I3612_CMLT0, 2351 [TCG_COND_LE] = I3612_CMLE0, 2352 }; 2353 2354 TCGType type = vecl + TCG_TYPE_V64; 2355 unsigned is_q = vecl; 2356 bool is_scalar = !is_q && vece == MO_64; 2357 TCGArg a0, a1, a2, a3; 2358 int cmode, imm8; 2359 2360 a0 = args[0]; 2361 a1 = args[1]; 2362 a2 = args[2]; 2363 2364 switch (opc) { 2365 case INDEX_op_ld_vec: 2366 tcg_out_ld(s, type, a0, a1, a2); 2367 break; 2368 case INDEX_op_st_vec: 2369 tcg_out_st(s, type, a0, a1, a2); 2370 break; 2371 case INDEX_op_dupm_vec: 2372 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 2373 break; 2374 case INDEX_op_add_vec: 2375 if (is_scalar) { 2376 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2); 2377 } else { 2378 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2); 2379 } 2380 break; 2381 case INDEX_op_sub_vec: 2382 if (is_scalar) { 2383 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2); 2384 } else { 2385 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2); 2386 } 2387 break; 2388 case INDEX_op_mul_vec: 2389 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2); 2390 break; 2391 case INDEX_op_neg_vec: 2392 if (is_scalar) { 2393 tcg_out_insn(s, 3612, NEG, vece, a0, a1); 2394 } else { 2395 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1); 2396 } 2397 break; 2398 case INDEX_op_abs_vec: 2399 if (is_scalar) { 2400 tcg_out_insn(s, 3612, ABS, vece, a0, a1); 2401 } else { 2402 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1); 2403 } 2404 break; 2405 case INDEX_op_and_vec: 2406 if (const_args[2]) { 2407 is_shimm1632(~a2, &cmode, &imm8); 2408 if (a0 == a1) { 2409 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2410 return; 2411 } 2412 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2413 a2 = a0; 2414 } 2415 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2); 2416 break; 2417 case INDEX_op_or_vec: 2418 if (const_args[2]) { 2419 is_shimm1632(a2, &cmode, &imm8); 2420 if (a0 == a1) { 2421 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2422 return; 2423 } 2424 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2425 a2 = a0; 2426 } 2427 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2); 2428 break; 2429 case INDEX_op_andc_vec: 2430 if (const_args[2]) { 2431 is_shimm1632(a2, &cmode, &imm8); 2432 if (a0 == a1) { 2433 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2434 return; 2435 } 2436 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2437 a2 = a0; 2438 } 2439 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2); 2440 break; 2441 case INDEX_op_orc_vec: 2442 if (const_args[2]) { 2443 is_shimm1632(~a2, &cmode, &imm8); 2444 if (a0 == a1) { 2445 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2446 return; 2447 } 2448 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2449 a2 = a0; 2450 } 2451 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2); 2452 break; 2453 case INDEX_op_xor_vec: 2454 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2); 2455 break; 2456 case INDEX_op_ssadd_vec: 2457 if (is_scalar) { 2458 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2); 2459 } else { 2460 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2); 2461 } 2462 break; 2463 case INDEX_op_sssub_vec: 2464 if (is_scalar) { 2465 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2); 2466 } else { 2467 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2); 2468 } 2469 break; 2470 case INDEX_op_usadd_vec: 2471 if (is_scalar) { 2472 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2); 2473 } else { 2474 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2); 2475 } 2476 break; 2477 case INDEX_op_ussub_vec: 2478 if (is_scalar) { 2479 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2); 2480 } else { 2481 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2); 2482 } 2483 break; 2484 case INDEX_op_smax_vec: 2485 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2); 2486 break; 2487 case INDEX_op_smin_vec: 2488 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2); 2489 break; 2490 case INDEX_op_umax_vec: 2491 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2); 2492 break; 2493 case INDEX_op_umin_vec: 2494 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2); 2495 break; 2496 case INDEX_op_not_vec: 2497 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1); 2498 break; 2499 case INDEX_op_shli_vec: 2500 if (is_scalar) { 2501 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece)); 2502 } else { 2503 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece)); 2504 } 2505 break; 2506 case INDEX_op_shri_vec: 2507 if (is_scalar) { 2508 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2); 2509 } else { 2510 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2); 2511 } 2512 break; 2513 case INDEX_op_sari_vec: 2514 if (is_scalar) { 2515 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2); 2516 } else { 2517 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); 2518 } 2519 break; 2520 case INDEX_op_aa64_sli_vec: 2521 if (is_scalar) { 2522 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece)); 2523 } else { 2524 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece)); 2525 } 2526 break; 2527 case INDEX_op_shlv_vec: 2528 if (is_scalar) { 2529 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2); 2530 } else { 2531 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); 2532 } 2533 break; 2534 case INDEX_op_aa64_sshl_vec: 2535 if (is_scalar) { 2536 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2); 2537 } else { 2538 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2); 2539 } 2540 break; 2541 case INDEX_op_cmp_vec: 2542 { 2543 TCGCond cond = args[3]; 2544 AArch64Insn insn; 2545 2546 if (cond == TCG_COND_NE) { 2547 if (const_args[2]) { 2548 if (is_scalar) { 2549 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1); 2550 } else { 2551 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1); 2552 } 2553 } else { 2554 if (is_scalar) { 2555 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2); 2556 } else { 2557 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2); 2558 } 2559 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); 2560 } 2561 } else { 2562 if (const_args[2]) { 2563 if (is_scalar) { 2564 insn = cmp0_scalar_insn[cond]; 2565 if (insn) { 2566 tcg_out_insn_3612(s, insn, vece, a0, a1); 2567 break; 2568 } 2569 } else { 2570 insn = cmp0_vec_insn[cond]; 2571 if (insn) { 2572 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1); 2573 break; 2574 } 2575 } 2576 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0); 2577 a2 = TCG_VEC_TMP; 2578 } 2579 if (is_scalar) { 2580 insn = cmp_scalar_insn[cond]; 2581 if (insn == 0) { 2582 TCGArg t; 2583 t = a1, a1 = a2, a2 = t; 2584 cond = tcg_swap_cond(cond); 2585 insn = cmp_scalar_insn[cond]; 2586 tcg_debug_assert(insn != 0); 2587 } 2588 tcg_out_insn_3611(s, insn, vece, a0, a1, a2); 2589 } else { 2590 insn = cmp_vec_insn[cond]; 2591 if (insn == 0) { 2592 TCGArg t; 2593 t = a1, a1 = a2, a2 = t; 2594 cond = tcg_swap_cond(cond); 2595 insn = cmp_vec_insn[cond]; 2596 tcg_debug_assert(insn != 0); 2597 } 2598 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2); 2599 } 2600 } 2601 } 2602 break; 2603 2604 case INDEX_op_bitsel_vec: 2605 a3 = args[3]; 2606 if (a0 == a3) { 2607 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1); 2608 } else if (a0 == a2) { 2609 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1); 2610 } else { 2611 if (a0 != a1) { 2612 tcg_out_mov(s, type, a0, a1); 2613 } 2614 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3); 2615 } 2616 break; 2617 2618 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 2619 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 2620 default: 2621 g_assert_not_reached(); 2622 } 2623} 2624 2625int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 2626{ 2627 switch (opc) { 2628 case INDEX_op_add_vec: 2629 case INDEX_op_sub_vec: 2630 case INDEX_op_and_vec: 2631 case INDEX_op_or_vec: 2632 case INDEX_op_xor_vec: 2633 case INDEX_op_andc_vec: 2634 case INDEX_op_orc_vec: 2635 case INDEX_op_neg_vec: 2636 case INDEX_op_abs_vec: 2637 case INDEX_op_not_vec: 2638 case INDEX_op_cmp_vec: 2639 case INDEX_op_shli_vec: 2640 case INDEX_op_shri_vec: 2641 case INDEX_op_sari_vec: 2642 case INDEX_op_ssadd_vec: 2643 case INDEX_op_sssub_vec: 2644 case INDEX_op_usadd_vec: 2645 case INDEX_op_ussub_vec: 2646 case INDEX_op_shlv_vec: 2647 case INDEX_op_bitsel_vec: 2648 return 1; 2649 case INDEX_op_rotli_vec: 2650 case INDEX_op_shrv_vec: 2651 case INDEX_op_sarv_vec: 2652 case INDEX_op_rotlv_vec: 2653 case INDEX_op_rotrv_vec: 2654 return -1; 2655 case INDEX_op_mul_vec: 2656 case INDEX_op_smax_vec: 2657 case INDEX_op_smin_vec: 2658 case INDEX_op_umax_vec: 2659 case INDEX_op_umin_vec: 2660 return vece < MO_64; 2661 2662 default: 2663 return 0; 2664 } 2665} 2666 2667void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 2668 TCGArg a0, ...) 2669{ 2670 va_list va; 2671 TCGv_vec v0, v1, v2, t1, t2, c1; 2672 TCGArg a2; 2673 2674 va_start(va, a0); 2675 v0 = temp_tcgv_vec(arg_temp(a0)); 2676 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 2677 a2 = va_arg(va, TCGArg); 2678 va_end(va); 2679 2680 switch (opc) { 2681 case INDEX_op_rotli_vec: 2682 t1 = tcg_temp_new_vec(type); 2683 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1)); 2684 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece, 2685 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2); 2686 tcg_temp_free_vec(t1); 2687 break; 2688 2689 case INDEX_op_shrv_vec: 2690 case INDEX_op_sarv_vec: 2691 /* Right shifts are negative left shifts for AArch64. */ 2692 v2 = temp_tcgv_vec(arg_temp(a2)); 2693 t1 = tcg_temp_new_vec(type); 2694 tcg_gen_neg_vec(vece, t1, v2); 2695 opc = (opc == INDEX_op_shrv_vec 2696 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec); 2697 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0), 2698 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2699 tcg_temp_free_vec(t1); 2700 break; 2701 2702 case INDEX_op_rotlv_vec: 2703 v2 = temp_tcgv_vec(arg_temp(a2)); 2704 t1 = tcg_temp_new_vec(type); 2705 c1 = tcg_constant_vec(type, vece, 8 << vece); 2706 tcg_gen_sub_vec(vece, t1, v2, c1); 2707 /* Right shifts are negative left shifts for AArch64. */ 2708 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2709 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2710 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0), 2711 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 2712 tcg_gen_or_vec(vece, v0, v0, t1); 2713 tcg_temp_free_vec(t1); 2714 break; 2715 2716 case INDEX_op_rotrv_vec: 2717 v2 = temp_tcgv_vec(arg_temp(a2)); 2718 t1 = tcg_temp_new_vec(type); 2719 t2 = tcg_temp_new_vec(type); 2720 c1 = tcg_constant_vec(type, vece, 8 << vece); 2721 tcg_gen_neg_vec(vece, t1, v2); 2722 tcg_gen_sub_vec(vece, t2, c1, v2); 2723 /* Right shifts are negative left shifts for AArch64. */ 2724 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2725 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2726 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2), 2727 tcgv_vec_arg(v1), tcgv_vec_arg(t2)); 2728 tcg_gen_or_vec(vece, v0, t1, t2); 2729 tcg_temp_free_vec(t1); 2730 tcg_temp_free_vec(t2); 2731 break; 2732 2733 default: 2734 g_assert_not_reached(); 2735 } 2736} 2737 2738static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 2739{ 2740 switch (op) { 2741 case INDEX_op_goto_ptr: 2742 return C_O0_I1(r); 2743 2744 case INDEX_op_ld8u_i32: 2745 case INDEX_op_ld8s_i32: 2746 case INDEX_op_ld16u_i32: 2747 case INDEX_op_ld16s_i32: 2748 case INDEX_op_ld_i32: 2749 case INDEX_op_ld8u_i64: 2750 case INDEX_op_ld8s_i64: 2751 case INDEX_op_ld16u_i64: 2752 case INDEX_op_ld16s_i64: 2753 case INDEX_op_ld32u_i64: 2754 case INDEX_op_ld32s_i64: 2755 case INDEX_op_ld_i64: 2756 case INDEX_op_neg_i32: 2757 case INDEX_op_neg_i64: 2758 case INDEX_op_not_i32: 2759 case INDEX_op_not_i64: 2760 case INDEX_op_bswap16_i32: 2761 case INDEX_op_bswap32_i32: 2762 case INDEX_op_bswap16_i64: 2763 case INDEX_op_bswap32_i64: 2764 case INDEX_op_bswap64_i64: 2765 case INDEX_op_ext8s_i32: 2766 case INDEX_op_ext16s_i32: 2767 case INDEX_op_ext8u_i32: 2768 case INDEX_op_ext16u_i32: 2769 case INDEX_op_ext8s_i64: 2770 case INDEX_op_ext16s_i64: 2771 case INDEX_op_ext32s_i64: 2772 case INDEX_op_ext8u_i64: 2773 case INDEX_op_ext16u_i64: 2774 case INDEX_op_ext32u_i64: 2775 case INDEX_op_ext_i32_i64: 2776 case INDEX_op_extu_i32_i64: 2777 case INDEX_op_extract_i32: 2778 case INDEX_op_extract_i64: 2779 case INDEX_op_sextract_i32: 2780 case INDEX_op_sextract_i64: 2781 return C_O1_I1(r, r); 2782 2783 case INDEX_op_st8_i32: 2784 case INDEX_op_st16_i32: 2785 case INDEX_op_st_i32: 2786 case INDEX_op_st8_i64: 2787 case INDEX_op_st16_i64: 2788 case INDEX_op_st32_i64: 2789 case INDEX_op_st_i64: 2790 return C_O0_I2(rZ, r); 2791 2792 case INDEX_op_add_i32: 2793 case INDEX_op_add_i64: 2794 case INDEX_op_sub_i32: 2795 case INDEX_op_sub_i64: 2796 case INDEX_op_setcond_i32: 2797 case INDEX_op_setcond_i64: 2798 return C_O1_I2(r, r, rA); 2799 2800 case INDEX_op_mul_i32: 2801 case INDEX_op_mul_i64: 2802 case INDEX_op_div_i32: 2803 case INDEX_op_div_i64: 2804 case INDEX_op_divu_i32: 2805 case INDEX_op_divu_i64: 2806 case INDEX_op_rem_i32: 2807 case INDEX_op_rem_i64: 2808 case INDEX_op_remu_i32: 2809 case INDEX_op_remu_i64: 2810 case INDEX_op_muluh_i64: 2811 case INDEX_op_mulsh_i64: 2812 return C_O1_I2(r, r, r); 2813 2814 case INDEX_op_and_i32: 2815 case INDEX_op_and_i64: 2816 case INDEX_op_or_i32: 2817 case INDEX_op_or_i64: 2818 case INDEX_op_xor_i32: 2819 case INDEX_op_xor_i64: 2820 case INDEX_op_andc_i32: 2821 case INDEX_op_andc_i64: 2822 case INDEX_op_orc_i32: 2823 case INDEX_op_orc_i64: 2824 case INDEX_op_eqv_i32: 2825 case INDEX_op_eqv_i64: 2826 return C_O1_I2(r, r, rL); 2827 2828 case INDEX_op_shl_i32: 2829 case INDEX_op_shr_i32: 2830 case INDEX_op_sar_i32: 2831 case INDEX_op_rotl_i32: 2832 case INDEX_op_rotr_i32: 2833 case INDEX_op_shl_i64: 2834 case INDEX_op_shr_i64: 2835 case INDEX_op_sar_i64: 2836 case INDEX_op_rotl_i64: 2837 case INDEX_op_rotr_i64: 2838 return C_O1_I2(r, r, ri); 2839 2840 case INDEX_op_clz_i32: 2841 case INDEX_op_ctz_i32: 2842 case INDEX_op_clz_i64: 2843 case INDEX_op_ctz_i64: 2844 return C_O1_I2(r, r, rAL); 2845 2846 case INDEX_op_brcond_i32: 2847 case INDEX_op_brcond_i64: 2848 return C_O0_I2(r, rA); 2849 2850 case INDEX_op_movcond_i32: 2851 case INDEX_op_movcond_i64: 2852 return C_O1_I4(r, r, rA, rZ, rZ); 2853 2854 case INDEX_op_qemu_ld_i32: 2855 case INDEX_op_qemu_ld_i64: 2856 return C_O1_I1(r, l); 2857 case INDEX_op_qemu_st_i32: 2858 case INDEX_op_qemu_st_i64: 2859 return C_O0_I2(lZ, l); 2860 2861 case INDEX_op_deposit_i32: 2862 case INDEX_op_deposit_i64: 2863 return C_O1_I2(r, 0, rZ); 2864 2865 case INDEX_op_extract2_i32: 2866 case INDEX_op_extract2_i64: 2867 return C_O1_I2(r, rZ, rZ); 2868 2869 case INDEX_op_add2_i32: 2870 case INDEX_op_add2_i64: 2871 case INDEX_op_sub2_i32: 2872 case INDEX_op_sub2_i64: 2873 return C_O2_I4(r, r, rZ, rZ, rA, rMZ); 2874 2875 case INDEX_op_add_vec: 2876 case INDEX_op_sub_vec: 2877 case INDEX_op_mul_vec: 2878 case INDEX_op_xor_vec: 2879 case INDEX_op_ssadd_vec: 2880 case INDEX_op_sssub_vec: 2881 case INDEX_op_usadd_vec: 2882 case INDEX_op_ussub_vec: 2883 case INDEX_op_smax_vec: 2884 case INDEX_op_smin_vec: 2885 case INDEX_op_umax_vec: 2886 case INDEX_op_umin_vec: 2887 case INDEX_op_shlv_vec: 2888 case INDEX_op_shrv_vec: 2889 case INDEX_op_sarv_vec: 2890 case INDEX_op_aa64_sshl_vec: 2891 return C_O1_I2(w, w, w); 2892 case INDEX_op_not_vec: 2893 case INDEX_op_neg_vec: 2894 case INDEX_op_abs_vec: 2895 case INDEX_op_shli_vec: 2896 case INDEX_op_shri_vec: 2897 case INDEX_op_sari_vec: 2898 return C_O1_I1(w, w); 2899 case INDEX_op_ld_vec: 2900 case INDEX_op_dupm_vec: 2901 return C_O1_I1(w, r); 2902 case INDEX_op_st_vec: 2903 return C_O0_I2(w, r); 2904 case INDEX_op_dup_vec: 2905 return C_O1_I1(w, wr); 2906 case INDEX_op_or_vec: 2907 case INDEX_op_andc_vec: 2908 return C_O1_I2(w, w, wO); 2909 case INDEX_op_and_vec: 2910 case INDEX_op_orc_vec: 2911 return C_O1_I2(w, w, wN); 2912 case INDEX_op_cmp_vec: 2913 return C_O1_I2(w, w, wZ); 2914 case INDEX_op_bitsel_vec: 2915 return C_O1_I3(w, w, w, w); 2916 case INDEX_op_aa64_sli_vec: 2917 return C_O1_I2(w, 0, w); 2918 2919 default: 2920 g_assert_not_reached(); 2921 } 2922} 2923 2924static void tcg_target_init(TCGContext *s) 2925{ 2926 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu; 2927 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu; 2928 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 2929 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 2930 2931 tcg_target_call_clobber_regs = -1ull; 2932 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19); 2933 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20); 2934 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21); 2935 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22); 2936 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23); 2937 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24); 2938 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25); 2939 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26); 2940 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27); 2941 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28); 2942 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29); 2943 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 2944 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 2945 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 2946 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 2947 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 2948 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 2949 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 2950 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 2951 2952 s->reserved_regs = 0; 2953 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 2954 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); 2955 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); 2956 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ 2957 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP); 2958} 2959 2960/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ 2961#define PUSH_SIZE ((30 - 19 + 1) * 8) 2962 2963#define FRAME_SIZE \ 2964 ((PUSH_SIZE \ 2965 + TCG_STATIC_CALL_ARGS_SIZE \ 2966 + CPU_TEMP_BUF_NLONGS * sizeof(long) \ 2967 + TCG_TARGET_STACK_ALIGN - 1) \ 2968 & ~(TCG_TARGET_STACK_ALIGN - 1)) 2969 2970/* We're expecting a 2 byte uleb128 encoded value. */ 2971QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 2972 2973/* We're expecting to use a single ADDI insn. */ 2974QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); 2975 2976static void tcg_target_qemu_prologue(TCGContext *s) 2977{ 2978 TCGReg r; 2979 2980 /* Push (FP, LR) and allocate space for all saved registers. */ 2981 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, 2982 TCG_REG_SP, -PUSH_SIZE, 1, 1); 2983 2984 /* Set up frame pointer for canonical unwinding. */ 2985 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); 2986 2987 /* Store callee-preserved regs x19..x28. */ 2988 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 2989 int ofs = (r - TCG_REG_X19 + 2) * 8; 2990 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 2991 } 2992 2993 /* Make stack space for TCG locals. */ 2994 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 2995 FRAME_SIZE - PUSH_SIZE); 2996 2997 /* Inform TCG about how to find TCG locals with register, offset, size. */ 2998 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, 2999 CPU_TEMP_BUF_NLONGS * sizeof(long)); 3000 3001#if !defined(CONFIG_SOFTMMU) 3002 if (USE_GUEST_BASE) { 3003 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); 3004 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); 3005 } 3006#endif 3007 3008 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 3009 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); 3010 3011 /* 3012 * Return path for goto_ptr. Set return value to 0, a-la exit_tb, 3013 * and fall through to the rest of the epilogue. 3014 */ 3015 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 3016 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0); 3017 3018 /* TB epilogue */ 3019 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); 3020 3021 /* Remove TCG locals stack space. */ 3022 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 3023 FRAME_SIZE - PUSH_SIZE); 3024 3025 /* Restore registers x19..x28. */ 3026 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 3027 int ofs = (r - TCG_REG_X19 + 2) * 8; 3028 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 3029 } 3030 3031 /* Pop (FP, LR), restore SP to previous frame. */ 3032 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, 3033 TCG_REG_SP, PUSH_SIZE, 0, 1); 3034 tcg_out_insn(s, 3207, RET, TCG_REG_LR); 3035} 3036 3037static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 3038{ 3039 int i; 3040 for (i = 0; i < count; ++i) { 3041 p[i] = NOP; 3042 } 3043} 3044 3045typedef struct { 3046 DebugFrameHeader h; 3047 uint8_t fde_def_cfa[4]; 3048 uint8_t fde_reg_ofs[24]; 3049} DebugFrame; 3050 3051#define ELF_HOST_MACHINE EM_AARCH64 3052 3053static const DebugFrame debug_frame = { 3054 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 3055 .h.cie.id = -1, 3056 .h.cie.version = 1, 3057 .h.cie.code_align = 1, 3058 .h.cie.data_align = 0x78, /* sleb128 -8 */ 3059 .h.cie.return_column = TCG_REG_LR, 3060 3061 /* Total FDE size does not include the "len" member. */ 3062 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 3063 3064 .fde_def_cfa = { 3065 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 3066 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 3067 (FRAME_SIZE >> 7) 3068 }, 3069 .fde_reg_ofs = { 3070 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ 3071 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ 3072 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ 3073 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ 3074 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ 3075 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ 3076 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ 3077 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ 3078 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ 3079 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ 3080 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ 3081 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ 3082 } 3083}; 3084 3085void tcg_register_jit(const void *buf, size_t buf_size) 3086{ 3087 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 3088} 3089