1/* 2 * Initial TCG Implementation for aarch64 3 * 4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH 5 * Written by Claudio Fontana 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or 8 * (at your option) any later version. 9 * 10 * See the COPYING file in the top-level directory for details. 11 */ 12 13#include "../tcg-pool.c.inc" 14#include "qemu/bitops.h" 15 16/* We're going to re-use TCGType in setting of the SF bit, which controls 17 the size of the operation performed. If we know the values match, it 18 makes things much cleaner. */ 19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); 20 21#ifdef CONFIG_DEBUG_TCG 22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 23 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 24 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 25 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 26 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp", 27 28 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 29 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 30 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 31 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31", 32}; 33#endif /* CONFIG_DEBUG_TCG */ 34 35static const int tcg_target_reg_alloc_order[] = { 36 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, 37 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, 38 TCG_REG_X28, /* we will reserve this for guest_base if configured */ 39 40 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, 41 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, 42 TCG_REG_X16, TCG_REG_X17, 43 44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, 46 47 /* X18 reserved by system */ 48 /* X19 reserved for AREG0 */ 49 /* X29 reserved as fp */ 50 /* X30 reserved as temporary */ 51 52 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, 53 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, 54 /* V8 - V15 are call-saved, and skipped. */ 55 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, 56 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, 57 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, 58 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, 59}; 60 61static const int tcg_target_call_iarg_regs[8] = { 62 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 63 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 64}; 65static const int tcg_target_call_oarg_regs[1] = { 66 TCG_REG_X0 67}; 68 69#define TCG_REG_TMP TCG_REG_X30 70#define TCG_VEC_TMP TCG_REG_V31 71 72#ifndef CONFIG_SOFTMMU 73/* Note that XZR cannot be encoded in the address base register slot, 74 as that actaully encodes SP. So if we need to zero-extend the guest 75 address, via the address index register slot, we need to load even 76 a zero guest base into a register. */ 77#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32) 78#define TCG_REG_GUEST_BASE TCG_REG_X28 79#endif 80 81static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 82{ 83 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 84 ptrdiff_t offset = target - src_rx; 85 86 if (offset == sextract64(offset, 0, 26)) { 87 /* read instruction, mask away previous PC_REL26 parameter contents, 88 set the proper offset, then write back the instruction. */ 89 *src_rw = deposit32(*src_rw, 0, 26, offset); 90 return true; 91 } 92 return false; 93} 94 95static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 96{ 97 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 98 ptrdiff_t offset = target - src_rx; 99 100 if (offset == sextract64(offset, 0, 19)) { 101 *src_rw = deposit32(*src_rw, 5, 19, offset); 102 return true; 103 } 104 return false; 105} 106 107static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 108 intptr_t value, intptr_t addend) 109{ 110 tcg_debug_assert(addend == 0); 111 switch (type) { 112 case R_AARCH64_JUMP26: 113 case R_AARCH64_CALL26: 114 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value); 115 case R_AARCH64_CONDBR19: 116 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value); 117 default: 118 g_assert_not_reached(); 119 } 120} 121 122#define TCG_CT_CONST_AIMM 0x100 123#define TCG_CT_CONST_LIMM 0x200 124#define TCG_CT_CONST_ZERO 0x400 125#define TCG_CT_CONST_MONE 0x800 126#define TCG_CT_CONST_ORRI 0x1000 127#define TCG_CT_CONST_ANDI 0x2000 128 129#define ALL_GENERAL_REGS 0xffffffffu 130#define ALL_VECTOR_REGS 0xffffffff00000000ull 131 132#ifdef CONFIG_SOFTMMU 133#define ALL_QLDST_REGS \ 134 (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \ 135 (1 << TCG_REG_X2) | (1 << TCG_REG_X3))) 136#else 137#define ALL_QLDST_REGS ALL_GENERAL_REGS 138#endif 139 140/* Match a constant valid for addition (12-bit, optionally shifted). */ 141static inline bool is_aimm(uint64_t val) 142{ 143 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; 144} 145 146/* Match a constant valid for logical operations. */ 147static inline bool is_limm(uint64_t val) 148{ 149 /* Taking a simplified view of the logical immediates for now, ignoring 150 the replication that can happen across the field. Match bit patterns 151 of the forms 152 0....01....1 153 0..01..10..0 154 and their inverses. */ 155 156 /* Make things easier below, by testing the form with msb clear. */ 157 if ((int64_t)val < 0) { 158 val = ~val; 159 } 160 if (val == 0) { 161 return false; 162 } 163 val += val & -val; 164 return (val & (val - 1)) == 0; 165} 166 167/* Return true if v16 is a valid 16-bit shifted immediate. */ 168static bool is_shimm16(uint16_t v16, int *cmode, int *imm8) 169{ 170 if (v16 == (v16 & 0xff)) { 171 *cmode = 0x8; 172 *imm8 = v16 & 0xff; 173 return true; 174 } else if (v16 == (v16 & 0xff00)) { 175 *cmode = 0xa; 176 *imm8 = v16 >> 8; 177 return true; 178 } 179 return false; 180} 181 182/* Return true if v32 is a valid 32-bit shifted immediate. */ 183static bool is_shimm32(uint32_t v32, int *cmode, int *imm8) 184{ 185 if (v32 == (v32 & 0xff)) { 186 *cmode = 0x0; 187 *imm8 = v32 & 0xff; 188 return true; 189 } else if (v32 == (v32 & 0xff00)) { 190 *cmode = 0x2; 191 *imm8 = (v32 >> 8) & 0xff; 192 return true; 193 } else if (v32 == (v32 & 0xff0000)) { 194 *cmode = 0x4; 195 *imm8 = (v32 >> 16) & 0xff; 196 return true; 197 } else if (v32 == (v32 & 0xff000000)) { 198 *cmode = 0x6; 199 *imm8 = v32 >> 24; 200 return true; 201 } 202 return false; 203} 204 205/* Return true if v32 is a valid 32-bit shifting ones immediate. */ 206static bool is_soimm32(uint32_t v32, int *cmode, int *imm8) 207{ 208 if ((v32 & 0xffff00ff) == 0xff) { 209 *cmode = 0xc; 210 *imm8 = (v32 >> 8) & 0xff; 211 return true; 212 } else if ((v32 & 0xff00ffff) == 0xffff) { 213 *cmode = 0xd; 214 *imm8 = (v32 >> 16) & 0xff; 215 return true; 216 } 217 return false; 218} 219 220/* Return true if v32 is a valid float32 immediate. */ 221static bool is_fimm32(uint32_t v32, int *cmode, int *imm8) 222{ 223 if (extract32(v32, 0, 19) == 0 224 && (extract32(v32, 25, 6) == 0x20 225 || extract32(v32, 25, 6) == 0x1f)) { 226 *cmode = 0xf; 227 *imm8 = (extract32(v32, 31, 1) << 7) 228 | (extract32(v32, 25, 1) << 6) 229 | extract32(v32, 19, 6); 230 return true; 231 } 232 return false; 233} 234 235/* Return true if v64 is a valid float64 immediate. */ 236static bool is_fimm64(uint64_t v64, int *cmode, int *imm8) 237{ 238 if (extract64(v64, 0, 48) == 0 239 && (extract64(v64, 54, 9) == 0x100 240 || extract64(v64, 54, 9) == 0x0ff)) { 241 *cmode = 0xf; 242 *imm8 = (extract64(v64, 63, 1) << 7) 243 | (extract64(v64, 54, 1) << 6) 244 | extract64(v64, 48, 6); 245 return true; 246 } 247 return false; 248} 249 250/* 251 * Return non-zero if v32 can be formed by MOVI+ORR. 252 * Place the parameters for MOVI in (cmode, imm8). 253 * Return the cmode for ORR; the imm8 can be had via extraction from v32. 254 */ 255static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8) 256{ 257 int i; 258 259 for (i = 6; i > 0; i -= 2) { 260 /* Mask out one byte we can add with ORR. */ 261 uint32_t tmp = v32 & ~(0xffu << (i * 4)); 262 if (is_shimm32(tmp, cmode, imm8) || 263 is_soimm32(tmp, cmode, imm8)) { 264 break; 265 } 266 } 267 return i; 268} 269 270/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */ 271static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8) 272{ 273 if (v32 == deposit32(v32, 16, 16, v32)) { 274 return is_shimm16(v32, cmode, imm8); 275 } else { 276 return is_shimm32(v32, cmode, imm8); 277 } 278} 279 280static int tcg_target_const_match(tcg_target_long val, TCGType type, 281 const TCGArgConstraint *arg_ct) 282{ 283 int ct = arg_ct->ct; 284 285 if (ct & TCG_CT_CONST) { 286 return 1; 287 } 288 if (type == TCG_TYPE_I32) { 289 val = (int32_t)val; 290 } 291 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { 292 return 1; 293 } 294 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { 295 return 1; 296 } 297 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 298 return 1; 299 } 300 if ((ct & TCG_CT_CONST_MONE) && val == -1) { 301 return 1; 302 } 303 304 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) { 305 case 0: 306 break; 307 case TCG_CT_CONST_ANDI: 308 val = ~val; 309 /* fallthru */ 310 case TCG_CT_CONST_ORRI: 311 if (val == deposit64(val, 32, 32, val)) { 312 int cmode, imm8; 313 return is_shimm1632(val, &cmode, &imm8); 314 } 315 break; 316 default: 317 /* Both bits should not be set for the same insn. */ 318 g_assert_not_reached(); 319 } 320 321 return 0; 322} 323 324enum aarch64_cond_code { 325 COND_EQ = 0x0, 326 COND_NE = 0x1, 327 COND_CS = 0x2, /* Unsigned greater or equal */ 328 COND_HS = COND_CS, /* ALIAS greater or equal */ 329 COND_CC = 0x3, /* Unsigned less than */ 330 COND_LO = COND_CC, /* ALIAS Lower */ 331 COND_MI = 0x4, /* Negative */ 332 COND_PL = 0x5, /* Zero or greater */ 333 COND_VS = 0x6, /* Overflow */ 334 COND_VC = 0x7, /* No overflow */ 335 COND_HI = 0x8, /* Unsigned greater than */ 336 COND_LS = 0x9, /* Unsigned less or equal */ 337 COND_GE = 0xa, 338 COND_LT = 0xb, 339 COND_GT = 0xc, 340 COND_LE = 0xd, 341 COND_AL = 0xe, 342 COND_NV = 0xf, /* behaves like COND_AL here */ 343}; 344 345static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { 346 [TCG_COND_EQ] = COND_EQ, 347 [TCG_COND_NE] = COND_NE, 348 [TCG_COND_LT] = COND_LT, 349 [TCG_COND_GE] = COND_GE, 350 [TCG_COND_LE] = COND_LE, 351 [TCG_COND_GT] = COND_GT, 352 /* unsigned */ 353 [TCG_COND_LTU] = COND_LO, 354 [TCG_COND_GTU] = COND_HI, 355 [TCG_COND_GEU] = COND_HS, 356 [TCG_COND_LEU] = COND_LS, 357}; 358 359typedef enum { 360 LDST_ST = 0, /* store */ 361 LDST_LD = 1, /* load */ 362 LDST_LD_S_X = 2, /* load and sign-extend into Xt */ 363 LDST_LD_S_W = 3, /* load and sign-extend into Wt */ 364} AArch64LdstType; 365 366/* We encode the format of the insn into the beginning of the name, so that 367 we can have the preprocessor help "typecheck" the insn vs the output 368 function. Arm didn't provide us with nice names for the formats, so we 369 use the section number of the architecture reference manual in which the 370 instruction group is described. */ 371typedef enum { 372 /* Compare and branch (immediate). */ 373 I3201_CBZ = 0x34000000, 374 I3201_CBNZ = 0x35000000, 375 376 /* Conditional branch (immediate). */ 377 I3202_B_C = 0x54000000, 378 379 /* Unconditional branch (immediate). */ 380 I3206_B = 0x14000000, 381 I3206_BL = 0x94000000, 382 383 /* Unconditional branch (register). */ 384 I3207_BR = 0xd61f0000, 385 I3207_BLR = 0xd63f0000, 386 I3207_RET = 0xd65f0000, 387 388 /* AdvSIMD load/store single structure. */ 389 I3303_LD1R = 0x0d40c000, 390 391 /* Load literal for loading the address at pc-relative offset */ 392 I3305_LDR = 0x58000000, 393 I3305_LDR_v64 = 0x5c000000, 394 I3305_LDR_v128 = 0x9c000000, 395 396 /* Load/store register. Described here as 3.3.12, but the helper 397 that emits them can transform to 3.3.10 or 3.3.13. */ 398 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, 399 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, 400 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, 401 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, 402 403 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, 404 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, 405 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, 406 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, 407 408 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, 409 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, 410 411 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, 412 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, 413 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, 414 415 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30, 416 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30, 417 418 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30, 419 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30, 420 421 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30, 422 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30, 423 424 I3312_TO_I3310 = 0x00200800, 425 I3312_TO_I3313 = 0x01000000, 426 427 /* Load/store register pair instructions. */ 428 I3314_LDP = 0x28400000, 429 I3314_STP = 0x28000000, 430 431 /* Add/subtract immediate instructions. */ 432 I3401_ADDI = 0x11000000, 433 I3401_ADDSI = 0x31000000, 434 I3401_SUBI = 0x51000000, 435 I3401_SUBSI = 0x71000000, 436 437 /* Bitfield instructions. */ 438 I3402_BFM = 0x33000000, 439 I3402_SBFM = 0x13000000, 440 I3402_UBFM = 0x53000000, 441 442 /* Extract instruction. */ 443 I3403_EXTR = 0x13800000, 444 445 /* Logical immediate instructions. */ 446 I3404_ANDI = 0x12000000, 447 I3404_ORRI = 0x32000000, 448 I3404_EORI = 0x52000000, 449 450 /* Move wide immediate instructions. */ 451 I3405_MOVN = 0x12800000, 452 I3405_MOVZ = 0x52800000, 453 I3405_MOVK = 0x72800000, 454 455 /* PC relative addressing instructions. */ 456 I3406_ADR = 0x10000000, 457 I3406_ADRP = 0x90000000, 458 459 /* Add/subtract shifted register instructions (without a shift). */ 460 I3502_ADD = 0x0b000000, 461 I3502_ADDS = 0x2b000000, 462 I3502_SUB = 0x4b000000, 463 I3502_SUBS = 0x6b000000, 464 465 /* Add/subtract shifted register instructions (with a shift). */ 466 I3502S_ADD_LSL = I3502_ADD, 467 468 /* Add/subtract with carry instructions. */ 469 I3503_ADC = 0x1a000000, 470 I3503_SBC = 0x5a000000, 471 472 /* Conditional select instructions. */ 473 I3506_CSEL = 0x1a800000, 474 I3506_CSINC = 0x1a800400, 475 I3506_CSINV = 0x5a800000, 476 I3506_CSNEG = 0x5a800400, 477 478 /* Data-processing (1 source) instructions. */ 479 I3507_CLZ = 0x5ac01000, 480 I3507_RBIT = 0x5ac00000, 481 I3507_REV16 = 0x5ac00400, 482 I3507_REV32 = 0x5ac00800, 483 I3507_REV64 = 0x5ac00c00, 484 485 /* Data-processing (2 source) instructions. */ 486 I3508_LSLV = 0x1ac02000, 487 I3508_LSRV = 0x1ac02400, 488 I3508_ASRV = 0x1ac02800, 489 I3508_RORV = 0x1ac02c00, 490 I3508_SMULH = 0x9b407c00, 491 I3508_UMULH = 0x9bc07c00, 492 I3508_UDIV = 0x1ac00800, 493 I3508_SDIV = 0x1ac00c00, 494 495 /* Data-processing (3 source) instructions. */ 496 I3509_MADD = 0x1b000000, 497 I3509_MSUB = 0x1b008000, 498 499 /* Logical shifted register instructions (without a shift). */ 500 I3510_AND = 0x0a000000, 501 I3510_BIC = 0x0a200000, 502 I3510_ORR = 0x2a000000, 503 I3510_ORN = 0x2a200000, 504 I3510_EOR = 0x4a000000, 505 I3510_EON = 0x4a200000, 506 I3510_ANDS = 0x6a000000, 507 508 /* Logical shifted register instructions (with a shift). */ 509 I3502S_AND_LSR = I3510_AND | (1 << 22), 510 511 /* AdvSIMD copy */ 512 I3605_DUP = 0x0e000400, 513 I3605_INS = 0x4e001c00, 514 I3605_UMOV = 0x0e003c00, 515 516 /* AdvSIMD modified immediate */ 517 I3606_MOVI = 0x0f000400, 518 I3606_MVNI = 0x2f000400, 519 I3606_BIC = 0x2f001400, 520 I3606_ORR = 0x0f001400, 521 522 /* AdvSIMD scalar shift by immediate */ 523 I3609_SSHR = 0x5f000400, 524 I3609_SSRA = 0x5f001400, 525 I3609_SHL = 0x5f005400, 526 I3609_USHR = 0x7f000400, 527 I3609_USRA = 0x7f001400, 528 I3609_SLI = 0x7f005400, 529 530 /* AdvSIMD scalar three same */ 531 I3611_SQADD = 0x5e200c00, 532 I3611_SQSUB = 0x5e202c00, 533 I3611_CMGT = 0x5e203400, 534 I3611_CMGE = 0x5e203c00, 535 I3611_SSHL = 0x5e204400, 536 I3611_ADD = 0x5e208400, 537 I3611_CMTST = 0x5e208c00, 538 I3611_UQADD = 0x7e200c00, 539 I3611_UQSUB = 0x7e202c00, 540 I3611_CMHI = 0x7e203400, 541 I3611_CMHS = 0x7e203c00, 542 I3611_USHL = 0x7e204400, 543 I3611_SUB = 0x7e208400, 544 I3611_CMEQ = 0x7e208c00, 545 546 /* AdvSIMD scalar two-reg misc */ 547 I3612_CMGT0 = 0x5e208800, 548 I3612_CMEQ0 = 0x5e209800, 549 I3612_CMLT0 = 0x5e20a800, 550 I3612_ABS = 0x5e20b800, 551 I3612_CMGE0 = 0x7e208800, 552 I3612_CMLE0 = 0x7e209800, 553 I3612_NEG = 0x7e20b800, 554 555 /* AdvSIMD shift by immediate */ 556 I3614_SSHR = 0x0f000400, 557 I3614_SSRA = 0x0f001400, 558 I3614_SHL = 0x0f005400, 559 I3614_SLI = 0x2f005400, 560 I3614_USHR = 0x2f000400, 561 I3614_USRA = 0x2f001400, 562 563 /* AdvSIMD three same. */ 564 I3616_ADD = 0x0e208400, 565 I3616_AND = 0x0e201c00, 566 I3616_BIC = 0x0e601c00, 567 I3616_BIF = 0x2ee01c00, 568 I3616_BIT = 0x2ea01c00, 569 I3616_BSL = 0x2e601c00, 570 I3616_EOR = 0x2e201c00, 571 I3616_MUL = 0x0e209c00, 572 I3616_ORR = 0x0ea01c00, 573 I3616_ORN = 0x0ee01c00, 574 I3616_SUB = 0x2e208400, 575 I3616_CMGT = 0x0e203400, 576 I3616_CMGE = 0x0e203c00, 577 I3616_CMTST = 0x0e208c00, 578 I3616_CMHI = 0x2e203400, 579 I3616_CMHS = 0x2e203c00, 580 I3616_CMEQ = 0x2e208c00, 581 I3616_SMAX = 0x0e206400, 582 I3616_SMIN = 0x0e206c00, 583 I3616_SSHL = 0x0e204400, 584 I3616_SQADD = 0x0e200c00, 585 I3616_SQSUB = 0x0e202c00, 586 I3616_UMAX = 0x2e206400, 587 I3616_UMIN = 0x2e206c00, 588 I3616_UQADD = 0x2e200c00, 589 I3616_UQSUB = 0x2e202c00, 590 I3616_USHL = 0x2e204400, 591 592 /* AdvSIMD two-reg misc. */ 593 I3617_CMGT0 = 0x0e208800, 594 I3617_CMEQ0 = 0x0e209800, 595 I3617_CMLT0 = 0x0e20a800, 596 I3617_CMGE0 = 0x2e208800, 597 I3617_CMLE0 = 0x2e209800, 598 I3617_NOT = 0x2e205800, 599 I3617_ABS = 0x0e20b800, 600 I3617_NEG = 0x2e20b800, 601 602 /* System instructions. */ 603 NOP = 0xd503201f, 604 DMB_ISH = 0xd50338bf, 605 DMB_LD = 0x00000100, 606 DMB_ST = 0x00000200, 607} AArch64Insn; 608 609static inline uint32_t tcg_in32(TCGContext *s) 610{ 611 uint32_t v = *(uint32_t *)s->code_ptr; 612 return v; 613} 614 615/* Emit an opcode with "type-checking" of the format. */ 616#define tcg_out_insn(S, FMT, OP, ...) \ 617 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) 618 619static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q, 620 TCGReg rt, TCGReg rn, unsigned size) 621{ 622 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30)); 623} 624 625static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, 626 int imm19, TCGReg rt) 627{ 628 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt); 629} 630 631static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, 632 TCGReg rt, int imm19) 633{ 634 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); 635} 636 637static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, 638 TCGCond c, int imm19) 639{ 640 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); 641} 642 643static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) 644{ 645 tcg_out32(s, insn | (imm26 & 0x03ffffff)); 646} 647 648static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) 649{ 650 tcg_out32(s, insn | rn << 5); 651} 652 653static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, 654 TCGReg r1, TCGReg r2, TCGReg rn, 655 tcg_target_long ofs, bool pre, bool w) 656{ 657 insn |= 1u << 31; /* ext */ 658 insn |= pre << 24; 659 insn |= w << 23; 660 661 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); 662 insn |= (ofs & (0x7f << 3)) << (15 - 3); 663 664 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); 665} 666 667static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, 668 TCGReg rd, TCGReg rn, uint64_t aimm) 669{ 670 if (aimm > 0xfff) { 671 tcg_debug_assert((aimm & 0xfff) == 0); 672 aimm >>= 12; 673 tcg_debug_assert(aimm <= 0xfff); 674 aimm |= 1 << 12; /* apply LSL 12 */ 675 } 676 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); 677} 678 679/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 680 (Logical immediate). Both insn groups have N, IMMR and IMMS fields 681 that feed the DecodeBitMasks pseudo function. */ 682static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, 683 TCGReg rd, TCGReg rn, int n, int immr, int imms) 684{ 685 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 686 | rn << 5 | rd); 687} 688 689#define tcg_out_insn_3404 tcg_out_insn_3402 690 691static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, 692 TCGReg rd, TCGReg rn, TCGReg rm, int imms) 693{ 694 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 695 | rn << 5 | rd); 696} 697 698/* This function is used for the Move (wide immediate) instruction group. 699 Note that SHIFT is a full shift count, not the 2 bit HW field. */ 700static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, 701 TCGReg rd, uint16_t half, unsigned shift) 702{ 703 tcg_debug_assert((shift & ~0x30) == 0); 704 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); 705} 706 707static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, 708 TCGReg rd, int64_t disp) 709{ 710 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); 711} 712 713/* This function is for both 3.5.2 (Add/Subtract shifted register), for 714 the rare occasion when we actually want to supply a shift amount. */ 715static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, 716 TCGType ext, TCGReg rd, TCGReg rn, 717 TCGReg rm, int imm6) 718{ 719 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); 720} 721 722/* This function is for 3.5.2 (Add/subtract shifted register), 723 and 3.5.10 (Logical shifted register), for the vast majorty of cases 724 when we don't want to apply a shift. Thus it can also be used for 725 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */ 726static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, 727 TCGReg rd, TCGReg rn, TCGReg rm) 728{ 729 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); 730} 731 732#define tcg_out_insn_3503 tcg_out_insn_3502 733#define tcg_out_insn_3508 tcg_out_insn_3502 734#define tcg_out_insn_3510 tcg_out_insn_3502 735 736static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, 737 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) 738{ 739 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd 740 | tcg_cond_to_aarch64[c] << 12); 741} 742 743static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, 744 TCGReg rd, TCGReg rn) 745{ 746 tcg_out32(s, insn | ext << 31 | rn << 5 | rd); 747} 748 749static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, 750 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) 751{ 752 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); 753} 754 755static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q, 756 TCGReg rd, TCGReg rn, int dst_idx, int src_idx) 757{ 758 /* Note that bit 11 set means general register input. Therefore 759 we can handle both register sets with one function. */ 760 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11) 761 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5); 762} 763 764static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q, 765 TCGReg rd, bool op, int cmode, uint8_t imm8) 766{ 767 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f) 768 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5); 769} 770 771static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn, 772 TCGReg rd, TCGReg rn, unsigned immhb) 773{ 774 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f)); 775} 776 777static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn, 778 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 779{ 780 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16 781 | (rn & 0x1f) << 5 | (rd & 0x1f)); 782} 783 784static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn, 785 unsigned size, TCGReg rd, TCGReg rn) 786{ 787 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f)); 788} 789 790static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q, 791 TCGReg rd, TCGReg rn, unsigned immhb) 792{ 793 tcg_out32(s, insn | q << 30 | immhb << 16 794 | (rn & 0x1f) << 5 | (rd & 0x1f)); 795} 796 797static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q, 798 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 799{ 800 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16 801 | (rn & 0x1f) << 5 | (rd & 0x1f)); 802} 803 804static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q, 805 unsigned size, TCGReg rd, TCGReg rn) 806{ 807 tcg_out32(s, insn | q << 30 | (size << 22) 808 | (rn & 0x1f) << 5 | (rd & 0x1f)); 809} 810 811static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, 812 TCGReg rd, TCGReg base, TCGType ext, 813 TCGReg regoff) 814{ 815 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 816 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | 817 0x4000 | ext << 13 | base << 5 | (rd & 0x1f)); 818} 819 820static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, 821 TCGReg rd, TCGReg rn, intptr_t offset) 822{ 823 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f)); 824} 825 826static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, 827 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) 828{ 829 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 830 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 831 | rn << 5 | (rd & 0x1f)); 832} 833 834/* Register to register move using ORR (shifted register with no shift). */ 835static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) 836{ 837 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); 838} 839 840/* Register to register move using ADDI (move to/from SP). */ 841static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) 842{ 843 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); 844} 845 846/* This function is used for the Logical (immediate) instruction group. 847 The value of LIMM must satisfy IS_LIMM. See the comment above about 848 only supporting simplified logical immediates. */ 849static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, 850 TCGReg rd, TCGReg rn, uint64_t limm) 851{ 852 unsigned h, l, r, c; 853 854 tcg_debug_assert(is_limm(limm)); 855 856 h = clz64(limm); 857 l = ctz64(limm); 858 if (l == 0) { 859 r = 0; /* form 0....01....1 */ 860 c = ctz64(~limm) - 1; 861 if (h == 0) { 862 r = clz64(~limm); /* form 1..10..01..1 */ 863 c += r; 864 } 865 } else { 866 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ 867 c = r - h - 1; 868 } 869 if (ext == TCG_TYPE_I32) { 870 r &= 31; 871 c &= 31; 872 } 873 874 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); 875} 876 877static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 878 TCGReg rd, int64_t v64) 879{ 880 bool q = type == TCG_TYPE_V128; 881 int cmode, imm8, i; 882 883 /* Test all bytes equal first. */ 884 if (vece == MO_8) { 885 imm8 = (uint8_t)v64; 886 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8); 887 return; 888 } 889 890 /* 891 * Test all bytes 0x00 or 0xff second. This can match cases that 892 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below. 893 */ 894 for (i = imm8 = 0; i < 8; i++) { 895 uint8_t byte = v64 >> (i * 8); 896 if (byte == 0xff) { 897 imm8 |= 1 << i; 898 } else if (byte != 0) { 899 goto fail_bytes; 900 } 901 } 902 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8); 903 return; 904 fail_bytes: 905 906 /* 907 * Tests for various replications. For each element width, if we 908 * cannot find an expansion there's no point checking a larger 909 * width because we already know by replication it cannot match. 910 */ 911 if (vece == MO_16) { 912 uint16_t v16 = v64; 913 914 if (is_shimm16(v16, &cmode, &imm8)) { 915 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 916 return; 917 } 918 if (is_shimm16(~v16, &cmode, &imm8)) { 919 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 920 return; 921 } 922 923 /* 924 * Otherwise, all remaining constants can be loaded in two insns: 925 * rd = v16 & 0xff, rd |= v16 & 0xff00. 926 */ 927 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff); 928 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8); 929 return; 930 } else if (vece == MO_32) { 931 uint32_t v32 = v64; 932 uint32_t n32 = ~v32; 933 934 if (is_shimm32(v32, &cmode, &imm8) || 935 is_soimm32(v32, &cmode, &imm8) || 936 is_fimm32(v32, &cmode, &imm8)) { 937 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 938 return; 939 } 940 if (is_shimm32(n32, &cmode, &imm8) || 941 is_soimm32(n32, &cmode, &imm8)) { 942 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 943 return; 944 } 945 946 /* 947 * Restrict the set of constants to those we can load with 948 * two instructions. Others we load from the pool. 949 */ 950 i = is_shimm32_pair(v32, &cmode, &imm8); 951 if (i) { 952 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 953 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8)); 954 return; 955 } 956 i = is_shimm32_pair(n32, &cmode, &imm8); 957 if (i) { 958 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 959 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8)); 960 return; 961 } 962 } else if (is_fimm64(v64, &cmode, &imm8)) { 963 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8); 964 return; 965 } 966 967 /* 968 * As a last resort, load from the constant pool. Sadly there 969 * is no LD1R (literal), so store the full 16-byte vector. 970 */ 971 if (type == TCG_TYPE_V128) { 972 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64); 973 tcg_out_insn(s, 3305, LDR_v128, 0, rd); 974 } else { 975 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0); 976 tcg_out_insn(s, 3305, LDR_v64, 0, rd); 977 } 978} 979 980static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 981 TCGReg rd, TCGReg rs) 982{ 983 int is_q = type - TCG_TYPE_V64; 984 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0); 985 return true; 986} 987 988static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 989 TCGReg r, TCGReg base, intptr_t offset) 990{ 991 TCGReg temp = TCG_REG_TMP; 992 993 if (offset < -0xffffff || offset > 0xffffff) { 994 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset); 995 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base); 996 base = temp; 997 } else { 998 AArch64Insn add_insn = I3401_ADDI; 999 1000 if (offset < 0) { 1001 add_insn = I3401_SUBI; 1002 offset = -offset; 1003 } 1004 if (offset & 0xfff000) { 1005 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000); 1006 base = temp; 1007 } 1008 if (offset & 0xfff) { 1009 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff); 1010 base = temp; 1011 } 1012 } 1013 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece); 1014 return true; 1015} 1016 1017static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 1018 tcg_target_long value) 1019{ 1020 tcg_target_long svalue = value; 1021 tcg_target_long ivalue = ~value; 1022 tcg_target_long t0, t1, t2; 1023 int s0, s1; 1024 AArch64Insn opc; 1025 1026 switch (type) { 1027 case TCG_TYPE_I32: 1028 case TCG_TYPE_I64: 1029 tcg_debug_assert(rd < 32); 1030 break; 1031 default: 1032 g_assert_not_reached(); 1033 } 1034 1035 /* For 32-bit values, discard potential garbage in value. For 64-bit 1036 values within [2**31, 2**32-1], we can create smaller sequences by 1037 interpreting this as a negative 32-bit number, while ensuring that 1038 the high 32 bits are cleared by setting SF=0. */ 1039 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { 1040 svalue = (int32_t)value; 1041 value = (uint32_t)value; 1042 ivalue = (uint32_t)ivalue; 1043 type = TCG_TYPE_I32; 1044 } 1045 1046 /* Speed things up by handling the common case of small positive 1047 and negative values specially. */ 1048 if ((value & ~0xffffull) == 0) { 1049 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); 1050 return; 1051 } else if ((ivalue & ~0xffffull) == 0) { 1052 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); 1053 return; 1054 } 1055 1056 /* Check for bitfield immediates. For the benefit of 32-bit quantities, 1057 use the sign-extended value. That lets us match rotated values such 1058 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ 1059 if (is_limm(svalue)) { 1060 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); 1061 return; 1062 } 1063 1064 /* Look for host pointer values within 4G of the PC. This happens 1065 often when loading pointers to QEMU's own data structures. */ 1066 if (type == TCG_TYPE_I64) { 1067 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr); 1068 tcg_target_long disp = value - src_rx; 1069 if (disp == sextract64(disp, 0, 21)) { 1070 tcg_out_insn(s, 3406, ADR, rd, disp); 1071 return; 1072 } 1073 disp = (value >> 12) - (src_rx >> 12); 1074 if (disp == sextract64(disp, 0, 21)) { 1075 tcg_out_insn(s, 3406, ADRP, rd, disp); 1076 if (value & 0xfff) { 1077 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); 1078 } 1079 return; 1080 } 1081 } 1082 1083 /* Would it take fewer insns to begin with MOVN? */ 1084 if (ctpop64(value) >= 32) { 1085 t0 = ivalue; 1086 opc = I3405_MOVN; 1087 } else { 1088 t0 = value; 1089 opc = I3405_MOVZ; 1090 } 1091 s0 = ctz64(t0) & (63 & -16); 1092 t1 = t0 & ~(0xffffUL << s0); 1093 s1 = ctz64(t1) & (63 & -16); 1094 t2 = t1 & ~(0xffffUL << s1); 1095 if (t2 == 0) { 1096 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0); 1097 if (t1 != 0) { 1098 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1); 1099 } 1100 return; 1101 } 1102 1103 /* For more than 2 insns, dump it into the constant pool. */ 1104 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0); 1105 tcg_out_insn(s, 3305, LDR, 0, rd); 1106} 1107 1108/* Define something more legible for general use. */ 1109#define tcg_out_ldst_r tcg_out_insn_3310 1110 1111static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd, 1112 TCGReg rn, intptr_t offset, int lgsize) 1113{ 1114 /* If the offset is naturally aligned and in range, then we can 1115 use the scaled uimm12 encoding */ 1116 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) { 1117 uintptr_t scaled_uimm = offset >> lgsize; 1118 if (scaled_uimm <= 0xfff) { 1119 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); 1120 return; 1121 } 1122 } 1123 1124 /* Small signed offsets can use the unscaled encoding. */ 1125 if (offset >= -256 && offset < 256) { 1126 tcg_out_insn_3312(s, insn, rd, rn, offset); 1127 return; 1128 } 1129 1130 /* Worst-case scenario, move offset to temp register, use reg offset. */ 1131 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset); 1132 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP); 1133} 1134 1135static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 1136{ 1137 if (ret == arg) { 1138 return true; 1139 } 1140 switch (type) { 1141 case TCG_TYPE_I32: 1142 case TCG_TYPE_I64: 1143 if (ret < 32 && arg < 32) { 1144 tcg_out_movr(s, type, ret, arg); 1145 break; 1146 } else if (ret < 32) { 1147 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0); 1148 break; 1149 } else if (arg < 32) { 1150 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0); 1151 break; 1152 } 1153 /* FALLTHRU */ 1154 1155 case TCG_TYPE_V64: 1156 tcg_debug_assert(ret >= 32 && arg >= 32); 1157 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg); 1158 break; 1159 case TCG_TYPE_V128: 1160 tcg_debug_assert(ret >= 32 && arg >= 32); 1161 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg); 1162 break; 1163 1164 default: 1165 g_assert_not_reached(); 1166 } 1167 return true; 1168} 1169 1170static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1171 TCGReg base, intptr_t ofs) 1172{ 1173 AArch64Insn insn; 1174 int lgsz; 1175 1176 switch (type) { 1177 case TCG_TYPE_I32: 1178 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS); 1179 lgsz = 2; 1180 break; 1181 case TCG_TYPE_I64: 1182 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD); 1183 lgsz = 3; 1184 break; 1185 case TCG_TYPE_V64: 1186 insn = I3312_LDRVD; 1187 lgsz = 3; 1188 break; 1189 case TCG_TYPE_V128: 1190 insn = I3312_LDRVQ; 1191 lgsz = 4; 1192 break; 1193 default: 1194 g_assert_not_reached(); 1195 } 1196 tcg_out_ldst(s, insn, ret, base, ofs, lgsz); 1197} 1198 1199static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src, 1200 TCGReg base, intptr_t ofs) 1201{ 1202 AArch64Insn insn; 1203 int lgsz; 1204 1205 switch (type) { 1206 case TCG_TYPE_I32: 1207 insn = (src < 32 ? I3312_STRW : I3312_STRVS); 1208 lgsz = 2; 1209 break; 1210 case TCG_TYPE_I64: 1211 insn = (src < 32 ? I3312_STRX : I3312_STRVD); 1212 lgsz = 3; 1213 break; 1214 case TCG_TYPE_V64: 1215 insn = I3312_STRVD; 1216 lgsz = 3; 1217 break; 1218 case TCG_TYPE_V128: 1219 insn = I3312_STRVQ; 1220 lgsz = 4; 1221 break; 1222 default: 1223 g_assert_not_reached(); 1224 } 1225 tcg_out_ldst(s, insn, src, base, ofs, lgsz); 1226} 1227 1228static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1229 TCGReg base, intptr_t ofs) 1230{ 1231 if (type <= TCG_TYPE_I64 && val == 0) { 1232 tcg_out_st(s, type, TCG_REG_XZR, base, ofs); 1233 return true; 1234 } 1235 return false; 1236} 1237 1238static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, 1239 TCGReg rn, unsigned int a, unsigned int b) 1240{ 1241 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); 1242} 1243 1244static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, 1245 TCGReg rn, unsigned int a, unsigned int b) 1246{ 1247 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); 1248} 1249 1250static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, 1251 TCGReg rn, unsigned int a, unsigned int b) 1252{ 1253 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); 1254} 1255 1256static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, 1257 TCGReg rn, TCGReg rm, unsigned int a) 1258{ 1259 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); 1260} 1261 1262static inline void tcg_out_shl(TCGContext *s, TCGType ext, 1263 TCGReg rd, TCGReg rn, unsigned int m) 1264{ 1265 int bits = ext ? 64 : 32; 1266 int max = bits - 1; 1267 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max)); 1268} 1269 1270static inline void tcg_out_shr(TCGContext *s, TCGType ext, 1271 TCGReg rd, TCGReg rn, unsigned int m) 1272{ 1273 int max = ext ? 63 : 31; 1274 tcg_out_ubfm(s, ext, rd, rn, m & max, max); 1275} 1276 1277static inline void tcg_out_sar(TCGContext *s, TCGType ext, 1278 TCGReg rd, TCGReg rn, unsigned int m) 1279{ 1280 int max = ext ? 63 : 31; 1281 tcg_out_sbfm(s, ext, rd, rn, m & max, max); 1282} 1283 1284static inline void tcg_out_rotr(TCGContext *s, TCGType ext, 1285 TCGReg rd, TCGReg rn, unsigned int m) 1286{ 1287 int max = ext ? 63 : 31; 1288 tcg_out_extr(s, ext, rd, rn, rn, m & max); 1289} 1290 1291static inline void tcg_out_rotl(TCGContext *s, TCGType ext, 1292 TCGReg rd, TCGReg rn, unsigned int m) 1293{ 1294 int max = ext ? 63 : 31; 1295 tcg_out_extr(s, ext, rd, rn, rn, -m & max); 1296} 1297 1298static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, 1299 TCGReg rn, unsigned lsb, unsigned width) 1300{ 1301 unsigned size = ext ? 64 : 32; 1302 unsigned a = (size - lsb) & (size - 1); 1303 unsigned b = width - 1; 1304 tcg_out_bfm(s, ext, rd, rn, a, b); 1305} 1306 1307static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, 1308 tcg_target_long b, bool const_b) 1309{ 1310 if (const_b) { 1311 /* Using CMP or CMN aliases. */ 1312 if (b >= 0) { 1313 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); 1314 } else { 1315 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); 1316 } 1317 } else { 1318 /* Using CMP alias SUBS wzr, Wn, Wm */ 1319 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); 1320 } 1321} 1322 1323static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) 1324{ 1325 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1326 tcg_debug_assert(offset == sextract64(offset, 0, 26)); 1327 tcg_out_insn(s, 3206, B, offset); 1328} 1329 1330static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target) 1331{ 1332 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1333 if (offset == sextract64(offset, 0, 26)) { 1334 tcg_out_insn(s, 3206, B, offset); 1335 } else { 1336 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); 1337 tcg_out_insn(s, 3207, BR, TCG_REG_TMP); 1338 } 1339} 1340 1341static inline void tcg_out_callr(TCGContext *s, TCGReg reg) 1342{ 1343 tcg_out_insn(s, 3207, BLR, reg); 1344} 1345 1346static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) 1347{ 1348 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1349 if (offset == sextract64(offset, 0, 26)) { 1350 tcg_out_insn(s, 3206, BL, offset); 1351 } else { 1352 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); 1353 tcg_out_callr(s, TCG_REG_TMP); 1354 } 1355} 1356 1357void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, 1358 uintptr_t jmp_rw, uintptr_t addr) 1359{ 1360 tcg_insn_unit i1, i2; 1361 TCGType rt = TCG_TYPE_I64; 1362 TCGReg rd = TCG_REG_TMP; 1363 uint64_t pair; 1364 1365 ptrdiff_t offset = addr - jmp_rx; 1366 1367 if (offset == sextract64(offset, 0, 26)) { 1368 i1 = I3206_B | ((offset >> 2) & 0x3ffffff); 1369 i2 = NOP; 1370 } else { 1371 offset = (addr >> 12) - (jmp_rx >> 12); 1372 1373 /* patch ADRP */ 1374 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd; 1375 /* patch ADDI */ 1376 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd; 1377 } 1378 pair = (uint64_t)i2 << 32 | i1; 1379 qatomic_set((uint64_t *)jmp_rw, pair); 1380 flush_idcache_range(jmp_rx, jmp_rw, 8); 1381} 1382 1383static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) 1384{ 1385 if (!l->has_value) { 1386 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); 1387 tcg_out_insn(s, 3206, B, 0); 1388 } else { 1389 tcg_out_goto(s, l->u.value_ptr); 1390 } 1391} 1392 1393static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, 1394 TCGArg b, bool b_const, TCGLabel *l) 1395{ 1396 intptr_t offset; 1397 bool need_cmp; 1398 1399 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) { 1400 need_cmp = false; 1401 } else { 1402 need_cmp = true; 1403 tcg_out_cmp(s, ext, a, b, b_const); 1404 } 1405 1406 if (!l->has_value) { 1407 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1408 offset = tcg_in32(s) >> 5; 1409 } else { 1410 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2; 1411 tcg_debug_assert(offset == sextract64(offset, 0, 19)); 1412 } 1413 1414 if (need_cmp) { 1415 tcg_out_insn(s, 3202, B_C, c, offset); 1416 } else if (c == TCG_COND_EQ) { 1417 tcg_out_insn(s, 3201, CBZ, ext, a, offset); 1418 } else { 1419 tcg_out_insn(s, 3201, CBNZ, ext, a, offset); 1420 } 1421} 1422 1423static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn) 1424{ 1425 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn); 1426} 1427 1428static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn) 1429{ 1430 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn); 1431} 1432 1433static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn) 1434{ 1435 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn); 1436} 1437 1438static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits, 1439 TCGReg rd, TCGReg rn) 1440{ 1441 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ 1442 int bits = (8 << s_bits) - 1; 1443 tcg_out_sbfm(s, ext, rd, rn, 0, bits); 1444} 1445 1446static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits, 1447 TCGReg rd, TCGReg rn) 1448{ 1449 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ 1450 int bits = (8 << s_bits) - 1; 1451 tcg_out_ubfm(s, 0, rd, rn, 0, bits); 1452} 1453 1454static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, 1455 TCGReg rn, int64_t aimm) 1456{ 1457 if (aimm >= 0) { 1458 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); 1459 } else { 1460 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); 1461 } 1462} 1463 1464static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, 1465 TCGReg rh, TCGReg al, TCGReg ah, 1466 tcg_target_long bl, tcg_target_long bh, 1467 bool const_bl, bool const_bh, bool sub) 1468{ 1469 TCGReg orig_rl = rl; 1470 AArch64Insn insn; 1471 1472 if (rl == ah || (!const_bh && rl == bh)) { 1473 rl = TCG_REG_TMP; 1474 } 1475 1476 if (const_bl) { 1477 if (bl < 0) { 1478 bl = -bl; 1479 insn = sub ? I3401_ADDSI : I3401_SUBSI; 1480 } else { 1481 insn = sub ? I3401_SUBSI : I3401_ADDSI; 1482 } 1483 1484 if (unlikely(al == TCG_REG_XZR)) { 1485 /* ??? We want to allow al to be zero for the benefit of 1486 negation via subtraction. However, that leaves open the 1487 possibility of adding 0+const in the low part, and the 1488 immediate add instructions encode XSP not XZR. Don't try 1489 anything more elaborate here than loading another zero. */ 1490 al = TCG_REG_TMP; 1491 tcg_out_movi(s, ext, al, 0); 1492 } 1493 tcg_out_insn_3401(s, insn, ext, rl, al, bl); 1494 } else { 1495 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); 1496 } 1497 1498 insn = I3503_ADC; 1499 if (const_bh) { 1500 /* Note that the only two constants we support are 0 and -1, and 1501 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ 1502 if ((bh != 0) ^ sub) { 1503 insn = I3503_SBC; 1504 } 1505 bh = TCG_REG_XZR; 1506 } else if (sub) { 1507 insn = I3503_SBC; 1508 } 1509 tcg_out_insn_3503(s, insn, ext, rh, ah, bh); 1510 1511 tcg_out_mov(s, ext, orig_rl, rl); 1512} 1513 1514static inline void tcg_out_mb(TCGContext *s, TCGArg a0) 1515{ 1516 static const uint32_t sync[] = { 1517 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST, 1518 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST, 1519 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1520 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD, 1521 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1522 }; 1523 tcg_out32(s, sync[a0 & TCG_MO_ALL]); 1524} 1525 1526static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, 1527 TCGReg a0, TCGArg b, bool const_b, bool is_ctz) 1528{ 1529 TCGReg a1 = a0; 1530 if (is_ctz) { 1531 a1 = TCG_REG_TMP; 1532 tcg_out_insn(s, 3507, RBIT, ext, a1, a0); 1533 } 1534 if (const_b && b == (ext ? 64 : 32)) { 1535 tcg_out_insn(s, 3507, CLZ, ext, d, a1); 1536 } else { 1537 AArch64Insn sel = I3506_CSEL; 1538 1539 tcg_out_cmp(s, ext, a0, 0, 1); 1540 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1); 1541 1542 if (const_b) { 1543 if (b == -1) { 1544 b = TCG_REG_XZR; 1545 sel = I3506_CSINV; 1546 } else if (b == 0) { 1547 b = TCG_REG_XZR; 1548 } else { 1549 tcg_out_movi(s, ext, d, b); 1550 b = d; 1551 } 1552 } 1553 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE); 1554 } 1555} 1556 1557#ifdef CONFIG_SOFTMMU 1558#include "../tcg-ldst.c.inc" 1559 1560/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, 1561 * TCGMemOpIdx oi, uintptr_t ra) 1562 */ 1563static void * const qemu_ld_helpers[16] = { 1564 [MO_UB] = helper_ret_ldub_mmu, 1565 [MO_LEUW] = helper_le_lduw_mmu, 1566 [MO_LEUL] = helper_le_ldul_mmu, 1567 [MO_LEQ] = helper_le_ldq_mmu, 1568 [MO_BEUW] = helper_be_lduw_mmu, 1569 [MO_BEUL] = helper_be_ldul_mmu, 1570 [MO_BEQ] = helper_be_ldq_mmu, 1571}; 1572 1573/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, 1574 * uintxx_t val, TCGMemOpIdx oi, 1575 * uintptr_t ra) 1576 */ 1577static void * const qemu_st_helpers[16] = { 1578 [MO_UB] = helper_ret_stb_mmu, 1579 [MO_LEUW] = helper_le_stw_mmu, 1580 [MO_LEUL] = helper_le_stl_mmu, 1581 [MO_LEQ] = helper_le_stq_mmu, 1582 [MO_BEUW] = helper_be_stw_mmu, 1583 [MO_BEUL] = helper_be_stl_mmu, 1584 [MO_BEQ] = helper_be_stq_mmu, 1585}; 1586 1587static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target) 1588{ 1589 ptrdiff_t offset = tcg_pcrel_diff(s, target); 1590 tcg_debug_assert(offset == sextract64(offset, 0, 21)); 1591 tcg_out_insn(s, 3406, ADR, rd, offset); 1592} 1593 1594static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1595{ 1596 TCGMemOpIdx oi = lb->oi; 1597 MemOp opc = get_memop(oi); 1598 MemOp size = opc & MO_SIZE; 1599 1600 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1601 return false; 1602 } 1603 1604 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1605 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); 1606 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi); 1607 tcg_out_adr(s, TCG_REG_X3, lb->raddr); 1608 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); 1609 if (opc & MO_SIGN) { 1610 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0); 1611 } else { 1612 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0); 1613 } 1614 1615 tcg_out_goto(s, lb->raddr); 1616 return true; 1617} 1618 1619static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1620{ 1621 TCGMemOpIdx oi = lb->oi; 1622 MemOp opc = get_memop(oi); 1623 MemOp size = opc & MO_SIZE; 1624 1625 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1626 return false; 1627 } 1628 1629 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1630 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); 1631 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); 1632 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi); 1633 tcg_out_adr(s, TCG_REG_X4, lb->raddr); 1634 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); 1635 tcg_out_goto(s, lb->raddr); 1636 return true; 1637} 1638 1639static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, 1640 TCGType ext, TCGReg data_reg, TCGReg addr_reg, 1641 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) 1642{ 1643 TCGLabelQemuLdst *label = new_ldst_label(s); 1644 1645 label->is_ld = is_ld; 1646 label->oi = oi; 1647 label->type = ext; 1648 label->datalo_reg = data_reg; 1649 label->addrlo_reg = addr_reg; 1650 label->raddr = tcg_splitwx_to_rx(raddr); 1651 label->label_ptr[0] = label_ptr; 1652} 1653 1654/* We expect to use a 7-bit scaled negative offset from ENV. */ 1655QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); 1656QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512); 1657 1658/* These offsets are built into the LDP below. */ 1659QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); 1660QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); 1661 1662/* Load and compare a TLB entry, emitting the conditional jump to the 1663 slow path for the failure case, which will be patched later when finalizing 1664 the slow path. Generated code returns the host addend in X1, 1665 clobbers X0,X2,X3,TMP. */ 1666static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc, 1667 tcg_insn_unit **label_ptr, int mem_index, 1668 bool is_read) 1669{ 1670 unsigned a_bits = get_alignment_bits(opc); 1671 unsigned s_bits = opc & MO_SIZE; 1672 unsigned a_mask = (1u << a_bits) - 1; 1673 unsigned s_mask = (1u << s_bits) - 1; 1674 TCGReg x3; 1675 TCGType mask_type; 1676 uint64_t compare_mask; 1677 1678 mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32 1679 ? TCG_TYPE_I64 : TCG_TYPE_I32); 1680 1681 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */ 1682 tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0, 1683 TLB_MASK_TABLE_OFS(mem_index), 1, 0); 1684 1685 /* Extract the TLB index from the address into X0. */ 1686 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, 1687 TCG_REG_X0, TCG_REG_X0, addr_reg, 1688 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); 1689 1690 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */ 1691 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0); 1692 1693 /* Load the tlb comparator into X0, and the fast path addend into X1. */ 1694 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read 1695 ? offsetof(CPUTLBEntry, addr_read) 1696 : offsetof(CPUTLBEntry, addr_write)); 1697 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1, 1698 offsetof(CPUTLBEntry, addend)); 1699 1700 /* For aligned accesses, we check the first byte and include the alignment 1701 bits within the address. For unaligned access, we check that we don't 1702 cross pages using the address of the last byte of the access. */ 1703 if (a_bits >= s_bits) { 1704 x3 = addr_reg; 1705 } else { 1706 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64, 1707 TCG_REG_X3, addr_reg, s_mask - a_mask); 1708 x3 = TCG_REG_X3; 1709 } 1710 compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; 1711 1712 /* Store the page mask part of the address into X3. */ 1713 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, 1714 TCG_REG_X3, x3, compare_mask); 1715 1716 /* Perform the address comparison. */ 1717 tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0); 1718 1719 /* If not equal, we jump to the slow path. */ 1720 *label_ptr = s->code_ptr; 1721 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1722} 1723 1724#endif /* CONFIG_SOFTMMU */ 1725 1726static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, 1727 TCGReg data_r, TCGReg addr_r, 1728 TCGType otype, TCGReg off_r) 1729{ 1730 const MemOp bswap = memop & MO_BSWAP; 1731 1732 switch (memop & MO_SSIZE) { 1733 case MO_UB: 1734 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r); 1735 break; 1736 case MO_SB: 1737 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, 1738 data_r, addr_r, otype, off_r); 1739 break; 1740 case MO_UW: 1741 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); 1742 if (bswap) { 1743 tcg_out_rev16(s, data_r, data_r); 1744 } 1745 break; 1746 case MO_SW: 1747 if (bswap) { 1748 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); 1749 tcg_out_rev16(s, data_r, data_r); 1750 tcg_out_sxt(s, ext, MO_16, data_r, data_r); 1751 } else { 1752 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), 1753 data_r, addr_r, otype, off_r); 1754 } 1755 break; 1756 case MO_UL: 1757 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); 1758 if (bswap) { 1759 tcg_out_rev32(s, data_r, data_r); 1760 } 1761 break; 1762 case MO_SL: 1763 if (bswap) { 1764 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); 1765 tcg_out_rev32(s, data_r, data_r); 1766 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r); 1767 } else { 1768 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r); 1769 } 1770 break; 1771 case MO_Q: 1772 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r); 1773 if (bswap) { 1774 tcg_out_rev64(s, data_r, data_r); 1775 } 1776 break; 1777 default: 1778 tcg_abort(); 1779 } 1780} 1781 1782static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, 1783 TCGReg data_r, TCGReg addr_r, 1784 TCGType otype, TCGReg off_r) 1785{ 1786 const MemOp bswap = memop & MO_BSWAP; 1787 1788 switch (memop & MO_SIZE) { 1789 case MO_8: 1790 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r); 1791 break; 1792 case MO_16: 1793 if (bswap && data_r != TCG_REG_XZR) { 1794 tcg_out_rev16(s, TCG_REG_TMP, data_r); 1795 data_r = TCG_REG_TMP; 1796 } 1797 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r); 1798 break; 1799 case MO_32: 1800 if (bswap && data_r != TCG_REG_XZR) { 1801 tcg_out_rev32(s, TCG_REG_TMP, data_r); 1802 data_r = TCG_REG_TMP; 1803 } 1804 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r); 1805 break; 1806 case MO_64: 1807 if (bswap && data_r != TCG_REG_XZR) { 1808 tcg_out_rev64(s, TCG_REG_TMP, data_r); 1809 data_r = TCG_REG_TMP; 1810 } 1811 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r); 1812 break; 1813 default: 1814 tcg_abort(); 1815 } 1816} 1817 1818static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1819 TCGMemOpIdx oi, TCGType ext) 1820{ 1821 MemOp memop = get_memop(oi); 1822 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; 1823#ifdef CONFIG_SOFTMMU 1824 unsigned mem_index = get_mmuidx(oi); 1825 tcg_insn_unit *label_ptr; 1826 1827 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1); 1828 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1829 TCG_REG_X1, otype, addr_reg); 1830 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg, 1831 s->code_ptr, label_ptr); 1832#else /* !CONFIG_SOFTMMU */ 1833 if (USE_GUEST_BASE) { 1834 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1835 TCG_REG_GUEST_BASE, otype, addr_reg); 1836 } else { 1837 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1838 addr_reg, TCG_TYPE_I64, TCG_REG_XZR); 1839 } 1840#endif /* CONFIG_SOFTMMU */ 1841} 1842 1843static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1844 TCGMemOpIdx oi) 1845{ 1846 MemOp memop = get_memop(oi); 1847 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; 1848#ifdef CONFIG_SOFTMMU 1849 unsigned mem_index = get_mmuidx(oi); 1850 tcg_insn_unit *label_ptr; 1851 1852 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0); 1853 tcg_out_qemu_st_direct(s, memop, data_reg, 1854 TCG_REG_X1, otype, addr_reg); 1855 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64, 1856 data_reg, addr_reg, s->code_ptr, label_ptr); 1857#else /* !CONFIG_SOFTMMU */ 1858 if (USE_GUEST_BASE) { 1859 tcg_out_qemu_st_direct(s, memop, data_reg, 1860 TCG_REG_GUEST_BASE, otype, addr_reg); 1861 } else { 1862 tcg_out_qemu_st_direct(s, memop, data_reg, 1863 addr_reg, TCG_TYPE_I64, TCG_REG_XZR); 1864 } 1865#endif /* CONFIG_SOFTMMU */ 1866} 1867 1868static const tcg_insn_unit *tb_ret_addr; 1869 1870static void tcg_out_op(TCGContext *s, TCGOpcode opc, 1871 const TCGArg args[TCG_MAX_OP_ARGS], 1872 const int const_args[TCG_MAX_OP_ARGS]) 1873{ 1874 /* 99% of the time, we can signal the use of extension registers 1875 by looking to see if the opcode handles 64-bit data. */ 1876 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0; 1877 1878 /* Hoist the loads of the most common arguments. */ 1879 TCGArg a0 = args[0]; 1880 TCGArg a1 = args[1]; 1881 TCGArg a2 = args[2]; 1882 int c2 = const_args[2]; 1883 1884 /* Some operands are defined with "rZ" constraint, a register or 1885 the zero register. These need not actually test args[I] == 0. */ 1886#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) 1887 1888 switch (opc) { 1889 case INDEX_op_exit_tb: 1890 /* Reuse the zeroing that exists for goto_ptr. */ 1891 if (a0 == 0) { 1892 tcg_out_goto_long(s, tcg_code_gen_epilogue); 1893 } else { 1894 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); 1895 tcg_out_goto_long(s, tb_ret_addr); 1896 } 1897 break; 1898 1899 case INDEX_op_goto_tb: 1900 if (s->tb_jmp_insn_offset != NULL) { 1901 /* TCG_TARGET_HAS_direct_jump */ 1902 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic 1903 write can be used to patch the target address. */ 1904 if ((uintptr_t)s->code_ptr & 7) { 1905 tcg_out32(s, NOP); 1906 } 1907 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); 1908 /* actual branch destination will be patched by 1909 tb_target_set_jmp_target later. */ 1910 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0); 1911 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0); 1912 } else { 1913 /* !TCG_TARGET_HAS_direct_jump */ 1914 tcg_debug_assert(s->tb_jmp_target_addr != NULL); 1915 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2; 1916 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP); 1917 } 1918 tcg_out_insn(s, 3207, BR, TCG_REG_TMP); 1919 set_jmp_reset_offset(s, a0); 1920 break; 1921 1922 case INDEX_op_goto_ptr: 1923 tcg_out_insn(s, 3207, BR, a0); 1924 break; 1925 1926 case INDEX_op_br: 1927 tcg_out_goto_label(s, arg_label(a0)); 1928 break; 1929 1930 case INDEX_op_ld8u_i32: 1931 case INDEX_op_ld8u_i64: 1932 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0); 1933 break; 1934 case INDEX_op_ld8s_i32: 1935 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0); 1936 break; 1937 case INDEX_op_ld8s_i64: 1938 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0); 1939 break; 1940 case INDEX_op_ld16u_i32: 1941 case INDEX_op_ld16u_i64: 1942 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1); 1943 break; 1944 case INDEX_op_ld16s_i32: 1945 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1); 1946 break; 1947 case INDEX_op_ld16s_i64: 1948 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1); 1949 break; 1950 case INDEX_op_ld_i32: 1951 case INDEX_op_ld32u_i64: 1952 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2); 1953 break; 1954 case INDEX_op_ld32s_i64: 1955 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2); 1956 break; 1957 case INDEX_op_ld_i64: 1958 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3); 1959 break; 1960 1961 case INDEX_op_st8_i32: 1962 case INDEX_op_st8_i64: 1963 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0); 1964 break; 1965 case INDEX_op_st16_i32: 1966 case INDEX_op_st16_i64: 1967 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1); 1968 break; 1969 case INDEX_op_st_i32: 1970 case INDEX_op_st32_i64: 1971 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2); 1972 break; 1973 case INDEX_op_st_i64: 1974 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3); 1975 break; 1976 1977 case INDEX_op_add_i32: 1978 a2 = (int32_t)a2; 1979 /* FALLTHRU */ 1980 case INDEX_op_add_i64: 1981 if (c2) { 1982 tcg_out_addsubi(s, ext, a0, a1, a2); 1983 } else { 1984 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); 1985 } 1986 break; 1987 1988 case INDEX_op_sub_i32: 1989 a2 = (int32_t)a2; 1990 /* FALLTHRU */ 1991 case INDEX_op_sub_i64: 1992 if (c2) { 1993 tcg_out_addsubi(s, ext, a0, a1, -a2); 1994 } else { 1995 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); 1996 } 1997 break; 1998 1999 case INDEX_op_neg_i64: 2000 case INDEX_op_neg_i32: 2001 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); 2002 break; 2003 2004 case INDEX_op_and_i32: 2005 a2 = (int32_t)a2; 2006 /* FALLTHRU */ 2007 case INDEX_op_and_i64: 2008 if (c2) { 2009 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); 2010 } else { 2011 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); 2012 } 2013 break; 2014 2015 case INDEX_op_andc_i32: 2016 a2 = (int32_t)a2; 2017 /* FALLTHRU */ 2018 case INDEX_op_andc_i64: 2019 if (c2) { 2020 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); 2021 } else { 2022 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); 2023 } 2024 break; 2025 2026 case INDEX_op_or_i32: 2027 a2 = (int32_t)a2; 2028 /* FALLTHRU */ 2029 case INDEX_op_or_i64: 2030 if (c2) { 2031 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); 2032 } else { 2033 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); 2034 } 2035 break; 2036 2037 case INDEX_op_orc_i32: 2038 a2 = (int32_t)a2; 2039 /* FALLTHRU */ 2040 case INDEX_op_orc_i64: 2041 if (c2) { 2042 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); 2043 } else { 2044 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); 2045 } 2046 break; 2047 2048 case INDEX_op_xor_i32: 2049 a2 = (int32_t)a2; 2050 /* FALLTHRU */ 2051 case INDEX_op_xor_i64: 2052 if (c2) { 2053 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); 2054 } else { 2055 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); 2056 } 2057 break; 2058 2059 case INDEX_op_eqv_i32: 2060 a2 = (int32_t)a2; 2061 /* FALLTHRU */ 2062 case INDEX_op_eqv_i64: 2063 if (c2) { 2064 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); 2065 } else { 2066 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); 2067 } 2068 break; 2069 2070 case INDEX_op_not_i64: 2071 case INDEX_op_not_i32: 2072 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); 2073 break; 2074 2075 case INDEX_op_mul_i64: 2076 case INDEX_op_mul_i32: 2077 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); 2078 break; 2079 2080 case INDEX_op_div_i64: 2081 case INDEX_op_div_i32: 2082 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); 2083 break; 2084 case INDEX_op_divu_i64: 2085 case INDEX_op_divu_i32: 2086 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); 2087 break; 2088 2089 case INDEX_op_rem_i64: 2090 case INDEX_op_rem_i32: 2091 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2); 2092 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2093 break; 2094 case INDEX_op_remu_i64: 2095 case INDEX_op_remu_i32: 2096 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2); 2097 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2098 break; 2099 2100 case INDEX_op_shl_i64: 2101 case INDEX_op_shl_i32: 2102 if (c2) { 2103 tcg_out_shl(s, ext, a0, a1, a2); 2104 } else { 2105 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); 2106 } 2107 break; 2108 2109 case INDEX_op_shr_i64: 2110 case INDEX_op_shr_i32: 2111 if (c2) { 2112 tcg_out_shr(s, ext, a0, a1, a2); 2113 } else { 2114 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); 2115 } 2116 break; 2117 2118 case INDEX_op_sar_i64: 2119 case INDEX_op_sar_i32: 2120 if (c2) { 2121 tcg_out_sar(s, ext, a0, a1, a2); 2122 } else { 2123 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); 2124 } 2125 break; 2126 2127 case INDEX_op_rotr_i64: 2128 case INDEX_op_rotr_i32: 2129 if (c2) { 2130 tcg_out_rotr(s, ext, a0, a1, a2); 2131 } else { 2132 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); 2133 } 2134 break; 2135 2136 case INDEX_op_rotl_i64: 2137 case INDEX_op_rotl_i32: 2138 if (c2) { 2139 tcg_out_rotl(s, ext, a0, a1, a2); 2140 } else { 2141 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2); 2142 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP); 2143 } 2144 break; 2145 2146 case INDEX_op_clz_i64: 2147 case INDEX_op_clz_i32: 2148 tcg_out_cltz(s, ext, a0, a1, a2, c2, false); 2149 break; 2150 case INDEX_op_ctz_i64: 2151 case INDEX_op_ctz_i32: 2152 tcg_out_cltz(s, ext, a0, a1, a2, c2, true); 2153 break; 2154 2155 case INDEX_op_brcond_i32: 2156 a1 = (int32_t)a1; 2157 /* FALLTHRU */ 2158 case INDEX_op_brcond_i64: 2159 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); 2160 break; 2161 2162 case INDEX_op_setcond_i32: 2163 a2 = (int32_t)a2; 2164 /* FALLTHRU */ 2165 case INDEX_op_setcond_i64: 2166 tcg_out_cmp(s, ext, a1, a2, c2); 2167 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ 2168 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, 2169 TCG_REG_XZR, tcg_invert_cond(args[3])); 2170 break; 2171 2172 case INDEX_op_movcond_i32: 2173 a2 = (int32_t)a2; 2174 /* FALLTHRU */ 2175 case INDEX_op_movcond_i64: 2176 tcg_out_cmp(s, ext, a1, a2, c2); 2177 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); 2178 break; 2179 2180 case INDEX_op_qemu_ld_i32: 2181 case INDEX_op_qemu_ld_i64: 2182 tcg_out_qemu_ld(s, a0, a1, a2, ext); 2183 break; 2184 case INDEX_op_qemu_st_i32: 2185 case INDEX_op_qemu_st_i64: 2186 tcg_out_qemu_st(s, REG0(0), a1, a2); 2187 break; 2188 2189 case INDEX_op_bswap64_i64: 2190 tcg_out_rev64(s, a0, a1); 2191 break; 2192 case INDEX_op_bswap32_i64: 2193 case INDEX_op_bswap32_i32: 2194 tcg_out_rev32(s, a0, a1); 2195 break; 2196 case INDEX_op_bswap16_i64: 2197 case INDEX_op_bswap16_i32: 2198 tcg_out_rev16(s, a0, a1); 2199 break; 2200 2201 case INDEX_op_ext8s_i64: 2202 case INDEX_op_ext8s_i32: 2203 tcg_out_sxt(s, ext, MO_8, a0, a1); 2204 break; 2205 case INDEX_op_ext16s_i64: 2206 case INDEX_op_ext16s_i32: 2207 tcg_out_sxt(s, ext, MO_16, a0, a1); 2208 break; 2209 case INDEX_op_ext_i32_i64: 2210 case INDEX_op_ext32s_i64: 2211 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1); 2212 break; 2213 case INDEX_op_ext8u_i64: 2214 case INDEX_op_ext8u_i32: 2215 tcg_out_uxt(s, MO_8, a0, a1); 2216 break; 2217 case INDEX_op_ext16u_i64: 2218 case INDEX_op_ext16u_i32: 2219 tcg_out_uxt(s, MO_16, a0, a1); 2220 break; 2221 case INDEX_op_extu_i32_i64: 2222 case INDEX_op_ext32u_i64: 2223 tcg_out_movr(s, TCG_TYPE_I32, a0, a1); 2224 break; 2225 2226 case INDEX_op_deposit_i64: 2227 case INDEX_op_deposit_i32: 2228 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]); 2229 break; 2230 2231 case INDEX_op_extract_i64: 2232 case INDEX_op_extract_i32: 2233 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2234 break; 2235 2236 case INDEX_op_sextract_i64: 2237 case INDEX_op_sextract_i32: 2238 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2239 break; 2240 2241 case INDEX_op_extract2_i64: 2242 case INDEX_op_extract2_i32: 2243 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]); 2244 break; 2245 2246 case INDEX_op_add2_i32: 2247 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2248 (int32_t)args[4], args[5], const_args[4], 2249 const_args[5], false); 2250 break; 2251 case INDEX_op_add2_i64: 2252 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2253 args[5], const_args[4], const_args[5], false); 2254 break; 2255 case INDEX_op_sub2_i32: 2256 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2257 (int32_t)args[4], args[5], const_args[4], 2258 const_args[5], true); 2259 break; 2260 case INDEX_op_sub2_i64: 2261 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2262 args[5], const_args[4], const_args[5], true); 2263 break; 2264 2265 case INDEX_op_muluh_i64: 2266 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); 2267 break; 2268 case INDEX_op_mulsh_i64: 2269 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); 2270 break; 2271 2272 case INDEX_op_mb: 2273 tcg_out_mb(s, a0); 2274 break; 2275 2276 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 2277 case INDEX_op_mov_i64: 2278 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 2279 default: 2280 g_assert_not_reached(); 2281 } 2282 2283#undef REG0 2284} 2285 2286static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 2287 unsigned vecl, unsigned vece, 2288 const TCGArg args[TCG_MAX_OP_ARGS], 2289 const int const_args[TCG_MAX_OP_ARGS]) 2290{ 2291 static const AArch64Insn cmp_vec_insn[16] = { 2292 [TCG_COND_EQ] = I3616_CMEQ, 2293 [TCG_COND_GT] = I3616_CMGT, 2294 [TCG_COND_GE] = I3616_CMGE, 2295 [TCG_COND_GTU] = I3616_CMHI, 2296 [TCG_COND_GEU] = I3616_CMHS, 2297 }; 2298 static const AArch64Insn cmp_scalar_insn[16] = { 2299 [TCG_COND_EQ] = I3611_CMEQ, 2300 [TCG_COND_GT] = I3611_CMGT, 2301 [TCG_COND_GE] = I3611_CMGE, 2302 [TCG_COND_GTU] = I3611_CMHI, 2303 [TCG_COND_GEU] = I3611_CMHS, 2304 }; 2305 static const AArch64Insn cmp0_vec_insn[16] = { 2306 [TCG_COND_EQ] = I3617_CMEQ0, 2307 [TCG_COND_GT] = I3617_CMGT0, 2308 [TCG_COND_GE] = I3617_CMGE0, 2309 [TCG_COND_LT] = I3617_CMLT0, 2310 [TCG_COND_LE] = I3617_CMLE0, 2311 }; 2312 static const AArch64Insn cmp0_scalar_insn[16] = { 2313 [TCG_COND_EQ] = I3612_CMEQ0, 2314 [TCG_COND_GT] = I3612_CMGT0, 2315 [TCG_COND_GE] = I3612_CMGE0, 2316 [TCG_COND_LT] = I3612_CMLT0, 2317 [TCG_COND_LE] = I3612_CMLE0, 2318 }; 2319 2320 TCGType type = vecl + TCG_TYPE_V64; 2321 unsigned is_q = vecl; 2322 bool is_scalar = !is_q && vece == MO_64; 2323 TCGArg a0, a1, a2, a3; 2324 int cmode, imm8; 2325 2326 a0 = args[0]; 2327 a1 = args[1]; 2328 a2 = args[2]; 2329 2330 switch (opc) { 2331 case INDEX_op_ld_vec: 2332 tcg_out_ld(s, type, a0, a1, a2); 2333 break; 2334 case INDEX_op_st_vec: 2335 tcg_out_st(s, type, a0, a1, a2); 2336 break; 2337 case INDEX_op_dupm_vec: 2338 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 2339 break; 2340 case INDEX_op_add_vec: 2341 if (is_scalar) { 2342 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2); 2343 } else { 2344 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2); 2345 } 2346 break; 2347 case INDEX_op_sub_vec: 2348 if (is_scalar) { 2349 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2); 2350 } else { 2351 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2); 2352 } 2353 break; 2354 case INDEX_op_mul_vec: 2355 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2); 2356 break; 2357 case INDEX_op_neg_vec: 2358 if (is_scalar) { 2359 tcg_out_insn(s, 3612, NEG, vece, a0, a1); 2360 } else { 2361 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1); 2362 } 2363 break; 2364 case INDEX_op_abs_vec: 2365 if (is_scalar) { 2366 tcg_out_insn(s, 3612, ABS, vece, a0, a1); 2367 } else { 2368 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1); 2369 } 2370 break; 2371 case INDEX_op_and_vec: 2372 if (const_args[2]) { 2373 is_shimm1632(~a2, &cmode, &imm8); 2374 if (a0 == a1) { 2375 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2376 return; 2377 } 2378 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2379 a2 = a0; 2380 } 2381 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2); 2382 break; 2383 case INDEX_op_or_vec: 2384 if (const_args[2]) { 2385 is_shimm1632(a2, &cmode, &imm8); 2386 if (a0 == a1) { 2387 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2388 return; 2389 } 2390 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2391 a2 = a0; 2392 } 2393 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2); 2394 break; 2395 case INDEX_op_andc_vec: 2396 if (const_args[2]) { 2397 is_shimm1632(a2, &cmode, &imm8); 2398 if (a0 == a1) { 2399 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2400 return; 2401 } 2402 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2403 a2 = a0; 2404 } 2405 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2); 2406 break; 2407 case INDEX_op_orc_vec: 2408 if (const_args[2]) { 2409 is_shimm1632(~a2, &cmode, &imm8); 2410 if (a0 == a1) { 2411 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2412 return; 2413 } 2414 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2415 a2 = a0; 2416 } 2417 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2); 2418 break; 2419 case INDEX_op_xor_vec: 2420 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2); 2421 break; 2422 case INDEX_op_ssadd_vec: 2423 if (is_scalar) { 2424 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2); 2425 } else { 2426 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2); 2427 } 2428 break; 2429 case INDEX_op_sssub_vec: 2430 if (is_scalar) { 2431 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2); 2432 } else { 2433 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2); 2434 } 2435 break; 2436 case INDEX_op_usadd_vec: 2437 if (is_scalar) { 2438 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2); 2439 } else { 2440 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2); 2441 } 2442 break; 2443 case INDEX_op_ussub_vec: 2444 if (is_scalar) { 2445 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2); 2446 } else { 2447 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2); 2448 } 2449 break; 2450 case INDEX_op_smax_vec: 2451 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2); 2452 break; 2453 case INDEX_op_smin_vec: 2454 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2); 2455 break; 2456 case INDEX_op_umax_vec: 2457 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2); 2458 break; 2459 case INDEX_op_umin_vec: 2460 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2); 2461 break; 2462 case INDEX_op_not_vec: 2463 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1); 2464 break; 2465 case INDEX_op_shli_vec: 2466 if (is_scalar) { 2467 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece)); 2468 } else { 2469 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece)); 2470 } 2471 break; 2472 case INDEX_op_shri_vec: 2473 if (is_scalar) { 2474 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2); 2475 } else { 2476 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2); 2477 } 2478 break; 2479 case INDEX_op_sari_vec: 2480 if (is_scalar) { 2481 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2); 2482 } else { 2483 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); 2484 } 2485 break; 2486 case INDEX_op_aa64_sli_vec: 2487 if (is_scalar) { 2488 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece)); 2489 } else { 2490 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece)); 2491 } 2492 break; 2493 case INDEX_op_shlv_vec: 2494 if (is_scalar) { 2495 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2); 2496 } else { 2497 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); 2498 } 2499 break; 2500 case INDEX_op_aa64_sshl_vec: 2501 if (is_scalar) { 2502 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2); 2503 } else { 2504 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2); 2505 } 2506 break; 2507 case INDEX_op_cmp_vec: 2508 { 2509 TCGCond cond = args[3]; 2510 AArch64Insn insn; 2511 2512 if (cond == TCG_COND_NE) { 2513 if (const_args[2]) { 2514 if (is_scalar) { 2515 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1); 2516 } else { 2517 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1); 2518 } 2519 } else { 2520 if (is_scalar) { 2521 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2); 2522 } else { 2523 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2); 2524 } 2525 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); 2526 } 2527 } else { 2528 if (const_args[2]) { 2529 if (is_scalar) { 2530 insn = cmp0_scalar_insn[cond]; 2531 if (insn) { 2532 tcg_out_insn_3612(s, insn, vece, a0, a1); 2533 break; 2534 } 2535 } else { 2536 insn = cmp0_vec_insn[cond]; 2537 if (insn) { 2538 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1); 2539 break; 2540 } 2541 } 2542 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0); 2543 a2 = TCG_VEC_TMP; 2544 } 2545 if (is_scalar) { 2546 insn = cmp_scalar_insn[cond]; 2547 if (insn == 0) { 2548 TCGArg t; 2549 t = a1, a1 = a2, a2 = t; 2550 cond = tcg_swap_cond(cond); 2551 insn = cmp_scalar_insn[cond]; 2552 tcg_debug_assert(insn != 0); 2553 } 2554 tcg_out_insn_3611(s, insn, vece, a0, a1, a2); 2555 } else { 2556 insn = cmp_vec_insn[cond]; 2557 if (insn == 0) { 2558 TCGArg t; 2559 t = a1, a1 = a2, a2 = t; 2560 cond = tcg_swap_cond(cond); 2561 insn = cmp_vec_insn[cond]; 2562 tcg_debug_assert(insn != 0); 2563 } 2564 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2); 2565 } 2566 } 2567 } 2568 break; 2569 2570 case INDEX_op_bitsel_vec: 2571 a3 = args[3]; 2572 if (a0 == a3) { 2573 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1); 2574 } else if (a0 == a2) { 2575 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1); 2576 } else { 2577 if (a0 != a1) { 2578 tcg_out_mov(s, type, a0, a1); 2579 } 2580 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3); 2581 } 2582 break; 2583 2584 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 2585 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 2586 default: 2587 g_assert_not_reached(); 2588 } 2589} 2590 2591int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 2592{ 2593 switch (opc) { 2594 case INDEX_op_add_vec: 2595 case INDEX_op_sub_vec: 2596 case INDEX_op_and_vec: 2597 case INDEX_op_or_vec: 2598 case INDEX_op_xor_vec: 2599 case INDEX_op_andc_vec: 2600 case INDEX_op_orc_vec: 2601 case INDEX_op_neg_vec: 2602 case INDEX_op_abs_vec: 2603 case INDEX_op_not_vec: 2604 case INDEX_op_cmp_vec: 2605 case INDEX_op_shli_vec: 2606 case INDEX_op_shri_vec: 2607 case INDEX_op_sari_vec: 2608 case INDEX_op_ssadd_vec: 2609 case INDEX_op_sssub_vec: 2610 case INDEX_op_usadd_vec: 2611 case INDEX_op_ussub_vec: 2612 case INDEX_op_shlv_vec: 2613 case INDEX_op_bitsel_vec: 2614 return 1; 2615 case INDEX_op_rotli_vec: 2616 case INDEX_op_shrv_vec: 2617 case INDEX_op_sarv_vec: 2618 case INDEX_op_rotlv_vec: 2619 case INDEX_op_rotrv_vec: 2620 return -1; 2621 case INDEX_op_mul_vec: 2622 case INDEX_op_smax_vec: 2623 case INDEX_op_smin_vec: 2624 case INDEX_op_umax_vec: 2625 case INDEX_op_umin_vec: 2626 return vece < MO_64; 2627 2628 default: 2629 return 0; 2630 } 2631} 2632 2633void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 2634 TCGArg a0, ...) 2635{ 2636 va_list va; 2637 TCGv_vec v0, v1, v2, t1, t2, c1; 2638 TCGArg a2; 2639 2640 va_start(va, a0); 2641 v0 = temp_tcgv_vec(arg_temp(a0)); 2642 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 2643 a2 = va_arg(va, TCGArg); 2644 va_end(va); 2645 2646 switch (opc) { 2647 case INDEX_op_rotli_vec: 2648 t1 = tcg_temp_new_vec(type); 2649 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1)); 2650 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece, 2651 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2); 2652 tcg_temp_free_vec(t1); 2653 break; 2654 2655 case INDEX_op_shrv_vec: 2656 case INDEX_op_sarv_vec: 2657 /* Right shifts are negative left shifts for AArch64. */ 2658 v2 = temp_tcgv_vec(arg_temp(a2)); 2659 t1 = tcg_temp_new_vec(type); 2660 tcg_gen_neg_vec(vece, t1, v2); 2661 opc = (opc == INDEX_op_shrv_vec 2662 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec); 2663 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0), 2664 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2665 tcg_temp_free_vec(t1); 2666 break; 2667 2668 case INDEX_op_rotlv_vec: 2669 v2 = temp_tcgv_vec(arg_temp(a2)); 2670 t1 = tcg_temp_new_vec(type); 2671 c1 = tcg_constant_vec(type, vece, 8 << vece); 2672 tcg_gen_sub_vec(vece, t1, v2, c1); 2673 /* Right shifts are negative left shifts for AArch64. */ 2674 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2675 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2676 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0), 2677 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 2678 tcg_gen_or_vec(vece, v0, v0, t1); 2679 tcg_temp_free_vec(t1); 2680 break; 2681 2682 case INDEX_op_rotrv_vec: 2683 v2 = temp_tcgv_vec(arg_temp(a2)); 2684 t1 = tcg_temp_new_vec(type); 2685 t2 = tcg_temp_new_vec(type); 2686 c1 = tcg_constant_vec(type, vece, 8 << vece); 2687 tcg_gen_neg_vec(vece, t1, v2); 2688 tcg_gen_sub_vec(vece, t2, c1, v2); 2689 /* Right shifts are negative left shifts for AArch64. */ 2690 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2691 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2692 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2), 2693 tcgv_vec_arg(v1), tcgv_vec_arg(t2)); 2694 tcg_gen_or_vec(vece, v0, t1, t2); 2695 tcg_temp_free_vec(t1); 2696 tcg_temp_free_vec(t2); 2697 break; 2698 2699 default: 2700 g_assert_not_reached(); 2701 } 2702} 2703 2704static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 2705{ 2706 switch (op) { 2707 case INDEX_op_goto_ptr: 2708 return C_O0_I1(r); 2709 2710 case INDEX_op_ld8u_i32: 2711 case INDEX_op_ld8s_i32: 2712 case INDEX_op_ld16u_i32: 2713 case INDEX_op_ld16s_i32: 2714 case INDEX_op_ld_i32: 2715 case INDEX_op_ld8u_i64: 2716 case INDEX_op_ld8s_i64: 2717 case INDEX_op_ld16u_i64: 2718 case INDEX_op_ld16s_i64: 2719 case INDEX_op_ld32u_i64: 2720 case INDEX_op_ld32s_i64: 2721 case INDEX_op_ld_i64: 2722 case INDEX_op_neg_i32: 2723 case INDEX_op_neg_i64: 2724 case INDEX_op_not_i32: 2725 case INDEX_op_not_i64: 2726 case INDEX_op_bswap16_i32: 2727 case INDEX_op_bswap32_i32: 2728 case INDEX_op_bswap16_i64: 2729 case INDEX_op_bswap32_i64: 2730 case INDEX_op_bswap64_i64: 2731 case INDEX_op_ext8s_i32: 2732 case INDEX_op_ext16s_i32: 2733 case INDEX_op_ext8u_i32: 2734 case INDEX_op_ext16u_i32: 2735 case INDEX_op_ext8s_i64: 2736 case INDEX_op_ext16s_i64: 2737 case INDEX_op_ext32s_i64: 2738 case INDEX_op_ext8u_i64: 2739 case INDEX_op_ext16u_i64: 2740 case INDEX_op_ext32u_i64: 2741 case INDEX_op_ext_i32_i64: 2742 case INDEX_op_extu_i32_i64: 2743 case INDEX_op_extract_i32: 2744 case INDEX_op_extract_i64: 2745 case INDEX_op_sextract_i32: 2746 case INDEX_op_sextract_i64: 2747 return C_O1_I1(r, r); 2748 2749 case INDEX_op_st8_i32: 2750 case INDEX_op_st16_i32: 2751 case INDEX_op_st_i32: 2752 case INDEX_op_st8_i64: 2753 case INDEX_op_st16_i64: 2754 case INDEX_op_st32_i64: 2755 case INDEX_op_st_i64: 2756 return C_O0_I2(rZ, r); 2757 2758 case INDEX_op_add_i32: 2759 case INDEX_op_add_i64: 2760 case INDEX_op_sub_i32: 2761 case INDEX_op_sub_i64: 2762 case INDEX_op_setcond_i32: 2763 case INDEX_op_setcond_i64: 2764 return C_O1_I2(r, r, rA); 2765 2766 case INDEX_op_mul_i32: 2767 case INDEX_op_mul_i64: 2768 case INDEX_op_div_i32: 2769 case INDEX_op_div_i64: 2770 case INDEX_op_divu_i32: 2771 case INDEX_op_divu_i64: 2772 case INDEX_op_rem_i32: 2773 case INDEX_op_rem_i64: 2774 case INDEX_op_remu_i32: 2775 case INDEX_op_remu_i64: 2776 case INDEX_op_muluh_i64: 2777 case INDEX_op_mulsh_i64: 2778 return C_O1_I2(r, r, r); 2779 2780 case INDEX_op_and_i32: 2781 case INDEX_op_and_i64: 2782 case INDEX_op_or_i32: 2783 case INDEX_op_or_i64: 2784 case INDEX_op_xor_i32: 2785 case INDEX_op_xor_i64: 2786 case INDEX_op_andc_i32: 2787 case INDEX_op_andc_i64: 2788 case INDEX_op_orc_i32: 2789 case INDEX_op_orc_i64: 2790 case INDEX_op_eqv_i32: 2791 case INDEX_op_eqv_i64: 2792 return C_O1_I2(r, r, rL); 2793 2794 case INDEX_op_shl_i32: 2795 case INDEX_op_shr_i32: 2796 case INDEX_op_sar_i32: 2797 case INDEX_op_rotl_i32: 2798 case INDEX_op_rotr_i32: 2799 case INDEX_op_shl_i64: 2800 case INDEX_op_shr_i64: 2801 case INDEX_op_sar_i64: 2802 case INDEX_op_rotl_i64: 2803 case INDEX_op_rotr_i64: 2804 return C_O1_I2(r, r, ri); 2805 2806 case INDEX_op_clz_i32: 2807 case INDEX_op_ctz_i32: 2808 case INDEX_op_clz_i64: 2809 case INDEX_op_ctz_i64: 2810 return C_O1_I2(r, r, rAL); 2811 2812 case INDEX_op_brcond_i32: 2813 case INDEX_op_brcond_i64: 2814 return C_O0_I2(r, rA); 2815 2816 case INDEX_op_movcond_i32: 2817 case INDEX_op_movcond_i64: 2818 return C_O1_I4(r, r, rA, rZ, rZ); 2819 2820 case INDEX_op_qemu_ld_i32: 2821 case INDEX_op_qemu_ld_i64: 2822 return C_O1_I1(r, l); 2823 case INDEX_op_qemu_st_i32: 2824 case INDEX_op_qemu_st_i64: 2825 return C_O0_I2(lZ, l); 2826 2827 case INDEX_op_deposit_i32: 2828 case INDEX_op_deposit_i64: 2829 return C_O1_I2(r, 0, rZ); 2830 2831 case INDEX_op_extract2_i32: 2832 case INDEX_op_extract2_i64: 2833 return C_O1_I2(r, rZ, rZ); 2834 2835 case INDEX_op_add2_i32: 2836 case INDEX_op_add2_i64: 2837 case INDEX_op_sub2_i32: 2838 case INDEX_op_sub2_i64: 2839 return C_O2_I4(r, r, rZ, rZ, rA, rMZ); 2840 2841 case INDEX_op_add_vec: 2842 case INDEX_op_sub_vec: 2843 case INDEX_op_mul_vec: 2844 case INDEX_op_xor_vec: 2845 case INDEX_op_ssadd_vec: 2846 case INDEX_op_sssub_vec: 2847 case INDEX_op_usadd_vec: 2848 case INDEX_op_ussub_vec: 2849 case INDEX_op_smax_vec: 2850 case INDEX_op_smin_vec: 2851 case INDEX_op_umax_vec: 2852 case INDEX_op_umin_vec: 2853 case INDEX_op_shlv_vec: 2854 case INDEX_op_shrv_vec: 2855 case INDEX_op_sarv_vec: 2856 case INDEX_op_aa64_sshl_vec: 2857 return C_O1_I2(w, w, w); 2858 case INDEX_op_not_vec: 2859 case INDEX_op_neg_vec: 2860 case INDEX_op_abs_vec: 2861 case INDEX_op_shli_vec: 2862 case INDEX_op_shri_vec: 2863 case INDEX_op_sari_vec: 2864 return C_O1_I1(w, w); 2865 case INDEX_op_ld_vec: 2866 case INDEX_op_dupm_vec: 2867 return C_O1_I1(w, r); 2868 case INDEX_op_st_vec: 2869 return C_O0_I2(w, r); 2870 case INDEX_op_dup_vec: 2871 return C_O1_I1(w, wr); 2872 case INDEX_op_or_vec: 2873 case INDEX_op_andc_vec: 2874 return C_O1_I2(w, w, wO); 2875 case INDEX_op_and_vec: 2876 case INDEX_op_orc_vec: 2877 return C_O1_I2(w, w, wN); 2878 case INDEX_op_cmp_vec: 2879 return C_O1_I2(w, w, wZ); 2880 case INDEX_op_bitsel_vec: 2881 return C_O1_I3(w, w, w, w); 2882 case INDEX_op_aa64_sli_vec: 2883 return C_O1_I2(w, 0, w); 2884 2885 default: 2886 g_assert_not_reached(); 2887 } 2888} 2889 2890static void tcg_target_init(TCGContext *s) 2891{ 2892 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu; 2893 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu; 2894 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 2895 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 2896 2897 tcg_target_call_clobber_regs = -1ull; 2898 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19); 2899 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20); 2900 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21); 2901 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22); 2902 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23); 2903 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24); 2904 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25); 2905 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26); 2906 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27); 2907 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28); 2908 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29); 2909 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 2910 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 2911 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 2912 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 2913 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 2914 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 2915 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 2916 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 2917 2918 s->reserved_regs = 0; 2919 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 2920 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); 2921 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); 2922 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ 2923 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP); 2924} 2925 2926/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ 2927#define PUSH_SIZE ((30 - 19 + 1) * 8) 2928 2929#define FRAME_SIZE \ 2930 ((PUSH_SIZE \ 2931 + TCG_STATIC_CALL_ARGS_SIZE \ 2932 + CPU_TEMP_BUF_NLONGS * sizeof(long) \ 2933 + TCG_TARGET_STACK_ALIGN - 1) \ 2934 & ~(TCG_TARGET_STACK_ALIGN - 1)) 2935 2936/* We're expecting a 2 byte uleb128 encoded value. */ 2937QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 2938 2939/* We're expecting to use a single ADDI insn. */ 2940QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); 2941 2942static void tcg_target_qemu_prologue(TCGContext *s) 2943{ 2944 TCGReg r; 2945 2946 /* Push (FP, LR) and allocate space for all saved registers. */ 2947 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, 2948 TCG_REG_SP, -PUSH_SIZE, 1, 1); 2949 2950 /* Set up frame pointer for canonical unwinding. */ 2951 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); 2952 2953 /* Store callee-preserved regs x19..x28. */ 2954 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 2955 int ofs = (r - TCG_REG_X19 + 2) * 8; 2956 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 2957 } 2958 2959 /* Make stack space for TCG locals. */ 2960 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 2961 FRAME_SIZE - PUSH_SIZE); 2962 2963 /* Inform TCG about how to find TCG locals with register, offset, size. */ 2964 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, 2965 CPU_TEMP_BUF_NLONGS * sizeof(long)); 2966 2967#if !defined(CONFIG_SOFTMMU) 2968 if (USE_GUEST_BASE) { 2969 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); 2970 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); 2971 } 2972#endif 2973 2974 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 2975 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); 2976 2977 /* 2978 * Return path for goto_ptr. Set return value to 0, a-la exit_tb, 2979 * and fall through to the rest of the epilogue. 2980 */ 2981 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 2982 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0); 2983 2984 /* TB epilogue */ 2985 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); 2986 2987 /* Remove TCG locals stack space. */ 2988 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 2989 FRAME_SIZE - PUSH_SIZE); 2990 2991 /* Restore registers x19..x28. */ 2992 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 2993 int ofs = (r - TCG_REG_X19 + 2) * 8; 2994 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 2995 } 2996 2997 /* Pop (FP, LR), restore SP to previous frame. */ 2998 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, 2999 TCG_REG_SP, PUSH_SIZE, 0, 1); 3000 tcg_out_insn(s, 3207, RET, TCG_REG_LR); 3001} 3002 3003static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 3004{ 3005 int i; 3006 for (i = 0; i < count; ++i) { 3007 p[i] = NOP; 3008 } 3009} 3010 3011typedef struct { 3012 DebugFrameHeader h; 3013 uint8_t fde_def_cfa[4]; 3014 uint8_t fde_reg_ofs[24]; 3015} DebugFrame; 3016 3017#define ELF_HOST_MACHINE EM_AARCH64 3018 3019static const DebugFrame debug_frame = { 3020 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 3021 .h.cie.id = -1, 3022 .h.cie.version = 1, 3023 .h.cie.code_align = 1, 3024 .h.cie.data_align = 0x78, /* sleb128 -8 */ 3025 .h.cie.return_column = TCG_REG_LR, 3026 3027 /* Total FDE size does not include the "len" member. */ 3028 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 3029 3030 .fde_def_cfa = { 3031 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 3032 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 3033 (FRAME_SIZE >> 7) 3034 }, 3035 .fde_reg_ofs = { 3036 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ 3037 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ 3038 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ 3039 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ 3040 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ 3041 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ 3042 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ 3043 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ 3044 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ 3045 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ 3046 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ 3047 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ 3048 } 3049}; 3050 3051void tcg_register_jit(const void *buf, size_t buf_size) 3052{ 3053 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 3054} 3055