1/* 2 * Initial TCG Implementation for aarch64 3 * 4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH 5 * Written by Claudio Fontana 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or 8 * (at your option) any later version. 9 * 10 * See the COPYING file in the top-level directory for details. 11 */ 12 13#include "../tcg-pool.c.inc" 14#include "qemu/bitops.h" 15 16/* We're going to re-use TCGType in setting of the SF bit, which controls 17 the size of the operation performed. If we know the values match, it 18 makes things much cleaner. */ 19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); 20 21#ifdef CONFIG_DEBUG_TCG 22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 23 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 24 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 25 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 26 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp", 27 28 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 29 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 30 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 31 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31", 32}; 33#endif /* CONFIG_DEBUG_TCG */ 34 35static const int tcg_target_reg_alloc_order[] = { 36 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, 37 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, 38 TCG_REG_X28, /* we will reserve this for guest_base if configured */ 39 40 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, 41 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, 42 TCG_REG_X16, TCG_REG_X17, 43 44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, 46 47 /* X18 reserved by system */ 48 /* X19 reserved for AREG0 */ 49 /* X29 reserved as fp */ 50 /* X30 reserved as temporary */ 51 52 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, 53 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, 54 /* V8 - V15 are call-saved, and skipped. */ 55 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, 56 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, 57 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, 58 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, 59}; 60 61static const int tcg_target_call_iarg_regs[8] = { 62 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 63 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 64}; 65static const int tcg_target_call_oarg_regs[1] = { 66 TCG_REG_X0 67}; 68 69#define TCG_REG_TMP TCG_REG_X30 70#define TCG_VEC_TMP TCG_REG_V31 71 72#ifndef CONFIG_SOFTMMU 73/* Note that XZR cannot be encoded in the address base register slot, 74 as that actaully encodes SP. So if we need to zero-extend the guest 75 address, via the address index register slot, we need to load even 76 a zero guest base into a register. */ 77#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32) 78#define TCG_REG_GUEST_BASE TCG_REG_X28 79#endif 80 81static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 82{ 83 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 84 ptrdiff_t offset = target - src_rx; 85 86 if (offset == sextract64(offset, 0, 26)) { 87 /* read instruction, mask away previous PC_REL26 parameter contents, 88 set the proper offset, then write back the instruction. */ 89 *src_rw = deposit32(*src_rw, 0, 26, offset); 90 return true; 91 } 92 return false; 93} 94 95static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 96{ 97 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 98 ptrdiff_t offset = target - src_rx; 99 100 if (offset == sextract64(offset, 0, 19)) { 101 *src_rw = deposit32(*src_rw, 5, 19, offset); 102 return true; 103 } 104 return false; 105} 106 107static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 108 intptr_t value, intptr_t addend) 109{ 110 tcg_debug_assert(addend == 0); 111 switch (type) { 112 case R_AARCH64_JUMP26: 113 case R_AARCH64_CALL26: 114 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value); 115 case R_AARCH64_CONDBR19: 116 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value); 117 default: 118 g_assert_not_reached(); 119 } 120} 121 122#define TCG_CT_CONST_AIMM 0x100 123#define TCG_CT_CONST_LIMM 0x200 124#define TCG_CT_CONST_ZERO 0x400 125#define TCG_CT_CONST_MONE 0x800 126#define TCG_CT_CONST_ORRI 0x1000 127#define TCG_CT_CONST_ANDI 0x2000 128 129#define ALL_GENERAL_REGS 0xffffffffu 130#define ALL_VECTOR_REGS 0xffffffff00000000ull 131 132#ifdef CONFIG_SOFTMMU 133#define ALL_QLDST_REGS \ 134 (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \ 135 (1 << TCG_REG_X2) | (1 << TCG_REG_X3))) 136#else 137#define ALL_QLDST_REGS ALL_GENERAL_REGS 138#endif 139 140/* Match a constant valid for addition (12-bit, optionally shifted). */ 141static inline bool is_aimm(uint64_t val) 142{ 143 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; 144} 145 146/* Match a constant valid for logical operations. */ 147static inline bool is_limm(uint64_t val) 148{ 149 /* Taking a simplified view of the logical immediates for now, ignoring 150 the replication that can happen across the field. Match bit patterns 151 of the forms 152 0....01....1 153 0..01..10..0 154 and their inverses. */ 155 156 /* Make things easier below, by testing the form with msb clear. */ 157 if ((int64_t)val < 0) { 158 val = ~val; 159 } 160 if (val == 0) { 161 return false; 162 } 163 val += val & -val; 164 return (val & (val - 1)) == 0; 165} 166 167/* Return true if v16 is a valid 16-bit shifted immediate. */ 168static bool is_shimm16(uint16_t v16, int *cmode, int *imm8) 169{ 170 if (v16 == (v16 & 0xff)) { 171 *cmode = 0x8; 172 *imm8 = v16 & 0xff; 173 return true; 174 } else if (v16 == (v16 & 0xff00)) { 175 *cmode = 0xa; 176 *imm8 = v16 >> 8; 177 return true; 178 } 179 return false; 180} 181 182/* Return true if v32 is a valid 32-bit shifted immediate. */ 183static bool is_shimm32(uint32_t v32, int *cmode, int *imm8) 184{ 185 if (v32 == (v32 & 0xff)) { 186 *cmode = 0x0; 187 *imm8 = v32 & 0xff; 188 return true; 189 } else if (v32 == (v32 & 0xff00)) { 190 *cmode = 0x2; 191 *imm8 = (v32 >> 8) & 0xff; 192 return true; 193 } else if (v32 == (v32 & 0xff0000)) { 194 *cmode = 0x4; 195 *imm8 = (v32 >> 16) & 0xff; 196 return true; 197 } else if (v32 == (v32 & 0xff000000)) { 198 *cmode = 0x6; 199 *imm8 = v32 >> 24; 200 return true; 201 } 202 return false; 203} 204 205/* Return true if v32 is a valid 32-bit shifting ones immediate. */ 206static bool is_soimm32(uint32_t v32, int *cmode, int *imm8) 207{ 208 if ((v32 & 0xffff00ff) == 0xff) { 209 *cmode = 0xc; 210 *imm8 = (v32 >> 8) & 0xff; 211 return true; 212 } else if ((v32 & 0xff00ffff) == 0xffff) { 213 *cmode = 0xd; 214 *imm8 = (v32 >> 16) & 0xff; 215 return true; 216 } 217 return false; 218} 219 220/* Return true if v32 is a valid float32 immediate. */ 221static bool is_fimm32(uint32_t v32, int *cmode, int *imm8) 222{ 223 if (extract32(v32, 0, 19) == 0 224 && (extract32(v32, 25, 6) == 0x20 225 || extract32(v32, 25, 6) == 0x1f)) { 226 *cmode = 0xf; 227 *imm8 = (extract32(v32, 31, 1) << 7) 228 | (extract32(v32, 25, 1) << 6) 229 | extract32(v32, 19, 6); 230 return true; 231 } 232 return false; 233} 234 235/* Return true if v64 is a valid float64 immediate. */ 236static bool is_fimm64(uint64_t v64, int *cmode, int *imm8) 237{ 238 if (extract64(v64, 0, 48) == 0 239 && (extract64(v64, 54, 9) == 0x100 240 || extract64(v64, 54, 9) == 0x0ff)) { 241 *cmode = 0xf; 242 *imm8 = (extract64(v64, 63, 1) << 7) 243 | (extract64(v64, 54, 1) << 6) 244 | extract64(v64, 48, 6); 245 return true; 246 } 247 return false; 248} 249 250/* 251 * Return non-zero if v32 can be formed by MOVI+ORR. 252 * Place the parameters for MOVI in (cmode, imm8). 253 * Return the cmode for ORR; the imm8 can be had via extraction from v32. 254 */ 255static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8) 256{ 257 int i; 258 259 for (i = 6; i > 0; i -= 2) { 260 /* Mask out one byte we can add with ORR. */ 261 uint32_t tmp = v32 & ~(0xffu << (i * 4)); 262 if (is_shimm32(tmp, cmode, imm8) || 263 is_soimm32(tmp, cmode, imm8)) { 264 break; 265 } 266 } 267 return i; 268} 269 270/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */ 271static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8) 272{ 273 if (v32 == deposit32(v32, 16, 16, v32)) { 274 return is_shimm16(v32, cmode, imm8); 275 } else { 276 return is_shimm32(v32, cmode, imm8); 277 } 278} 279 280static int tcg_target_const_match(tcg_target_long val, TCGType type, 281 const TCGArgConstraint *arg_ct) 282{ 283 int ct = arg_ct->ct; 284 285 if (ct & TCG_CT_CONST) { 286 return 1; 287 } 288 if (type == TCG_TYPE_I32) { 289 val = (int32_t)val; 290 } 291 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { 292 return 1; 293 } 294 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { 295 return 1; 296 } 297 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 298 return 1; 299 } 300 if ((ct & TCG_CT_CONST_MONE) && val == -1) { 301 return 1; 302 } 303 304 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) { 305 case 0: 306 break; 307 case TCG_CT_CONST_ANDI: 308 val = ~val; 309 /* fallthru */ 310 case TCG_CT_CONST_ORRI: 311 if (val == deposit64(val, 32, 32, val)) { 312 int cmode, imm8; 313 return is_shimm1632(val, &cmode, &imm8); 314 } 315 break; 316 default: 317 /* Both bits should not be set for the same insn. */ 318 g_assert_not_reached(); 319 } 320 321 return 0; 322} 323 324enum aarch64_cond_code { 325 COND_EQ = 0x0, 326 COND_NE = 0x1, 327 COND_CS = 0x2, /* Unsigned greater or equal */ 328 COND_HS = COND_CS, /* ALIAS greater or equal */ 329 COND_CC = 0x3, /* Unsigned less than */ 330 COND_LO = COND_CC, /* ALIAS Lower */ 331 COND_MI = 0x4, /* Negative */ 332 COND_PL = 0x5, /* Zero or greater */ 333 COND_VS = 0x6, /* Overflow */ 334 COND_VC = 0x7, /* No overflow */ 335 COND_HI = 0x8, /* Unsigned greater than */ 336 COND_LS = 0x9, /* Unsigned less or equal */ 337 COND_GE = 0xa, 338 COND_LT = 0xb, 339 COND_GT = 0xc, 340 COND_LE = 0xd, 341 COND_AL = 0xe, 342 COND_NV = 0xf, /* behaves like COND_AL here */ 343}; 344 345static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { 346 [TCG_COND_EQ] = COND_EQ, 347 [TCG_COND_NE] = COND_NE, 348 [TCG_COND_LT] = COND_LT, 349 [TCG_COND_GE] = COND_GE, 350 [TCG_COND_LE] = COND_LE, 351 [TCG_COND_GT] = COND_GT, 352 /* unsigned */ 353 [TCG_COND_LTU] = COND_LO, 354 [TCG_COND_GTU] = COND_HI, 355 [TCG_COND_GEU] = COND_HS, 356 [TCG_COND_LEU] = COND_LS, 357}; 358 359typedef enum { 360 LDST_ST = 0, /* store */ 361 LDST_LD = 1, /* load */ 362 LDST_LD_S_X = 2, /* load and sign-extend into Xt */ 363 LDST_LD_S_W = 3, /* load and sign-extend into Wt */ 364} AArch64LdstType; 365 366/* We encode the format of the insn into the beginning of the name, so that 367 we can have the preprocessor help "typecheck" the insn vs the output 368 function. Arm didn't provide us with nice names for the formats, so we 369 use the section number of the architecture reference manual in which the 370 instruction group is described. */ 371typedef enum { 372 /* Compare and branch (immediate). */ 373 I3201_CBZ = 0x34000000, 374 I3201_CBNZ = 0x35000000, 375 376 /* Conditional branch (immediate). */ 377 I3202_B_C = 0x54000000, 378 379 /* Unconditional branch (immediate). */ 380 I3206_B = 0x14000000, 381 I3206_BL = 0x94000000, 382 383 /* Unconditional branch (register). */ 384 I3207_BR = 0xd61f0000, 385 I3207_BLR = 0xd63f0000, 386 I3207_RET = 0xd65f0000, 387 388 /* AdvSIMD load/store single structure. */ 389 I3303_LD1R = 0x0d40c000, 390 391 /* Load literal for loading the address at pc-relative offset */ 392 I3305_LDR = 0x58000000, 393 I3305_LDR_v64 = 0x5c000000, 394 I3305_LDR_v128 = 0x9c000000, 395 396 /* Load/store register. Described here as 3.3.12, but the helper 397 that emits them can transform to 3.3.10 or 3.3.13. */ 398 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, 399 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, 400 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, 401 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, 402 403 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, 404 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, 405 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, 406 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, 407 408 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, 409 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, 410 411 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, 412 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, 413 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, 414 415 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30, 416 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30, 417 418 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30, 419 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30, 420 421 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30, 422 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30, 423 424 I3312_TO_I3310 = 0x00200800, 425 I3312_TO_I3313 = 0x01000000, 426 427 /* Load/store register pair instructions. */ 428 I3314_LDP = 0x28400000, 429 I3314_STP = 0x28000000, 430 431 /* Add/subtract immediate instructions. */ 432 I3401_ADDI = 0x11000000, 433 I3401_ADDSI = 0x31000000, 434 I3401_SUBI = 0x51000000, 435 I3401_SUBSI = 0x71000000, 436 437 /* Bitfield instructions. */ 438 I3402_BFM = 0x33000000, 439 I3402_SBFM = 0x13000000, 440 I3402_UBFM = 0x53000000, 441 442 /* Extract instruction. */ 443 I3403_EXTR = 0x13800000, 444 445 /* Logical immediate instructions. */ 446 I3404_ANDI = 0x12000000, 447 I3404_ORRI = 0x32000000, 448 I3404_EORI = 0x52000000, 449 450 /* Move wide immediate instructions. */ 451 I3405_MOVN = 0x12800000, 452 I3405_MOVZ = 0x52800000, 453 I3405_MOVK = 0x72800000, 454 455 /* PC relative addressing instructions. */ 456 I3406_ADR = 0x10000000, 457 I3406_ADRP = 0x90000000, 458 459 /* Add/subtract shifted register instructions (without a shift). */ 460 I3502_ADD = 0x0b000000, 461 I3502_ADDS = 0x2b000000, 462 I3502_SUB = 0x4b000000, 463 I3502_SUBS = 0x6b000000, 464 465 /* Add/subtract shifted register instructions (with a shift). */ 466 I3502S_ADD_LSL = I3502_ADD, 467 468 /* Add/subtract with carry instructions. */ 469 I3503_ADC = 0x1a000000, 470 I3503_SBC = 0x5a000000, 471 472 /* Conditional select instructions. */ 473 I3506_CSEL = 0x1a800000, 474 I3506_CSINC = 0x1a800400, 475 I3506_CSINV = 0x5a800000, 476 I3506_CSNEG = 0x5a800400, 477 478 /* Data-processing (1 source) instructions. */ 479 I3507_CLZ = 0x5ac01000, 480 I3507_RBIT = 0x5ac00000, 481 I3507_REV16 = 0x5ac00400, 482 I3507_REV32 = 0x5ac00800, 483 I3507_REV64 = 0x5ac00c00, 484 485 /* Data-processing (2 source) instructions. */ 486 I3508_LSLV = 0x1ac02000, 487 I3508_LSRV = 0x1ac02400, 488 I3508_ASRV = 0x1ac02800, 489 I3508_RORV = 0x1ac02c00, 490 I3508_SMULH = 0x9b407c00, 491 I3508_UMULH = 0x9bc07c00, 492 I3508_UDIV = 0x1ac00800, 493 I3508_SDIV = 0x1ac00c00, 494 495 /* Data-processing (3 source) instructions. */ 496 I3509_MADD = 0x1b000000, 497 I3509_MSUB = 0x1b008000, 498 499 /* Logical shifted register instructions (without a shift). */ 500 I3510_AND = 0x0a000000, 501 I3510_BIC = 0x0a200000, 502 I3510_ORR = 0x2a000000, 503 I3510_ORN = 0x2a200000, 504 I3510_EOR = 0x4a000000, 505 I3510_EON = 0x4a200000, 506 I3510_ANDS = 0x6a000000, 507 508 /* Logical shifted register instructions (with a shift). */ 509 I3502S_AND_LSR = I3510_AND | (1 << 22), 510 511 /* AdvSIMD copy */ 512 I3605_DUP = 0x0e000400, 513 I3605_INS = 0x4e001c00, 514 I3605_UMOV = 0x0e003c00, 515 516 /* AdvSIMD modified immediate */ 517 I3606_MOVI = 0x0f000400, 518 I3606_MVNI = 0x2f000400, 519 I3606_BIC = 0x2f001400, 520 I3606_ORR = 0x0f001400, 521 522 /* AdvSIMD scalar shift by immediate */ 523 I3609_SSHR = 0x5f000400, 524 I3609_SSRA = 0x5f001400, 525 I3609_SHL = 0x5f005400, 526 I3609_USHR = 0x7f000400, 527 I3609_USRA = 0x7f001400, 528 I3609_SLI = 0x7f005400, 529 530 /* AdvSIMD scalar three same */ 531 I3611_SQADD = 0x5e200c00, 532 I3611_SQSUB = 0x5e202c00, 533 I3611_CMGT = 0x5e203400, 534 I3611_CMGE = 0x5e203c00, 535 I3611_SSHL = 0x5e204400, 536 I3611_ADD = 0x5e208400, 537 I3611_CMTST = 0x5e208c00, 538 I3611_UQADD = 0x7e200c00, 539 I3611_UQSUB = 0x7e202c00, 540 I3611_CMHI = 0x7e203400, 541 I3611_CMHS = 0x7e203c00, 542 I3611_USHL = 0x7e204400, 543 I3611_SUB = 0x7e208400, 544 I3611_CMEQ = 0x7e208c00, 545 546 /* AdvSIMD scalar two-reg misc */ 547 I3612_CMGT0 = 0x5e208800, 548 I3612_CMEQ0 = 0x5e209800, 549 I3612_CMLT0 = 0x5e20a800, 550 I3612_ABS = 0x5e20b800, 551 I3612_CMGE0 = 0x7e208800, 552 I3612_CMLE0 = 0x7e209800, 553 I3612_NEG = 0x7e20b800, 554 555 /* AdvSIMD shift by immediate */ 556 I3614_SSHR = 0x0f000400, 557 I3614_SSRA = 0x0f001400, 558 I3614_SHL = 0x0f005400, 559 I3614_SLI = 0x2f005400, 560 I3614_USHR = 0x2f000400, 561 I3614_USRA = 0x2f001400, 562 563 /* AdvSIMD three same. */ 564 I3616_ADD = 0x0e208400, 565 I3616_AND = 0x0e201c00, 566 I3616_BIC = 0x0e601c00, 567 I3616_BIF = 0x2ee01c00, 568 I3616_BIT = 0x2ea01c00, 569 I3616_BSL = 0x2e601c00, 570 I3616_EOR = 0x2e201c00, 571 I3616_MUL = 0x0e209c00, 572 I3616_ORR = 0x0ea01c00, 573 I3616_ORN = 0x0ee01c00, 574 I3616_SUB = 0x2e208400, 575 I3616_CMGT = 0x0e203400, 576 I3616_CMGE = 0x0e203c00, 577 I3616_CMTST = 0x0e208c00, 578 I3616_CMHI = 0x2e203400, 579 I3616_CMHS = 0x2e203c00, 580 I3616_CMEQ = 0x2e208c00, 581 I3616_SMAX = 0x0e206400, 582 I3616_SMIN = 0x0e206c00, 583 I3616_SSHL = 0x0e204400, 584 I3616_SQADD = 0x0e200c00, 585 I3616_SQSUB = 0x0e202c00, 586 I3616_UMAX = 0x2e206400, 587 I3616_UMIN = 0x2e206c00, 588 I3616_UQADD = 0x2e200c00, 589 I3616_UQSUB = 0x2e202c00, 590 I3616_USHL = 0x2e204400, 591 592 /* AdvSIMD two-reg misc. */ 593 I3617_CMGT0 = 0x0e208800, 594 I3617_CMEQ0 = 0x0e209800, 595 I3617_CMLT0 = 0x0e20a800, 596 I3617_CMGE0 = 0x2e208800, 597 I3617_CMLE0 = 0x2e209800, 598 I3617_NOT = 0x2e205800, 599 I3617_ABS = 0x0e20b800, 600 I3617_NEG = 0x2e20b800, 601 602 /* System instructions. */ 603 NOP = 0xd503201f, 604 DMB_ISH = 0xd50338bf, 605 DMB_LD = 0x00000100, 606 DMB_ST = 0x00000200, 607} AArch64Insn; 608 609static inline uint32_t tcg_in32(TCGContext *s) 610{ 611 uint32_t v = *(uint32_t *)s->code_ptr; 612 return v; 613} 614 615/* Emit an opcode with "type-checking" of the format. */ 616#define tcg_out_insn(S, FMT, OP, ...) \ 617 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) 618 619static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q, 620 TCGReg rt, TCGReg rn, unsigned size) 621{ 622 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30)); 623} 624 625static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, 626 int imm19, TCGReg rt) 627{ 628 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt); 629} 630 631static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, 632 TCGReg rt, int imm19) 633{ 634 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); 635} 636 637static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, 638 TCGCond c, int imm19) 639{ 640 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); 641} 642 643static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) 644{ 645 tcg_out32(s, insn | (imm26 & 0x03ffffff)); 646} 647 648static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) 649{ 650 tcg_out32(s, insn | rn << 5); 651} 652 653static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, 654 TCGReg r1, TCGReg r2, TCGReg rn, 655 tcg_target_long ofs, bool pre, bool w) 656{ 657 insn |= 1u << 31; /* ext */ 658 insn |= pre << 24; 659 insn |= w << 23; 660 661 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); 662 insn |= (ofs & (0x7f << 3)) << (15 - 3); 663 664 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); 665} 666 667static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, 668 TCGReg rd, TCGReg rn, uint64_t aimm) 669{ 670 if (aimm > 0xfff) { 671 tcg_debug_assert((aimm & 0xfff) == 0); 672 aimm >>= 12; 673 tcg_debug_assert(aimm <= 0xfff); 674 aimm |= 1 << 12; /* apply LSL 12 */ 675 } 676 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); 677} 678 679/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 680 (Logical immediate). Both insn groups have N, IMMR and IMMS fields 681 that feed the DecodeBitMasks pseudo function. */ 682static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, 683 TCGReg rd, TCGReg rn, int n, int immr, int imms) 684{ 685 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 686 | rn << 5 | rd); 687} 688 689#define tcg_out_insn_3404 tcg_out_insn_3402 690 691static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, 692 TCGReg rd, TCGReg rn, TCGReg rm, int imms) 693{ 694 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 695 | rn << 5 | rd); 696} 697 698/* This function is used for the Move (wide immediate) instruction group. 699 Note that SHIFT is a full shift count, not the 2 bit HW field. */ 700static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, 701 TCGReg rd, uint16_t half, unsigned shift) 702{ 703 tcg_debug_assert((shift & ~0x30) == 0); 704 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); 705} 706 707static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, 708 TCGReg rd, int64_t disp) 709{ 710 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); 711} 712 713/* This function is for both 3.5.2 (Add/Subtract shifted register), for 714 the rare occasion when we actually want to supply a shift amount. */ 715static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, 716 TCGType ext, TCGReg rd, TCGReg rn, 717 TCGReg rm, int imm6) 718{ 719 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); 720} 721 722/* This function is for 3.5.2 (Add/subtract shifted register), 723 and 3.5.10 (Logical shifted register), for the vast majorty of cases 724 when we don't want to apply a shift. Thus it can also be used for 725 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */ 726static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, 727 TCGReg rd, TCGReg rn, TCGReg rm) 728{ 729 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); 730} 731 732#define tcg_out_insn_3503 tcg_out_insn_3502 733#define tcg_out_insn_3508 tcg_out_insn_3502 734#define tcg_out_insn_3510 tcg_out_insn_3502 735 736static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, 737 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) 738{ 739 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd 740 | tcg_cond_to_aarch64[c] << 12); 741} 742 743static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, 744 TCGReg rd, TCGReg rn) 745{ 746 tcg_out32(s, insn | ext << 31 | rn << 5 | rd); 747} 748 749static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, 750 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) 751{ 752 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); 753} 754 755static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q, 756 TCGReg rd, TCGReg rn, int dst_idx, int src_idx) 757{ 758 /* Note that bit 11 set means general register input. Therefore 759 we can handle both register sets with one function. */ 760 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11) 761 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5); 762} 763 764static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q, 765 TCGReg rd, bool op, int cmode, uint8_t imm8) 766{ 767 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f) 768 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5); 769} 770 771static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn, 772 TCGReg rd, TCGReg rn, unsigned immhb) 773{ 774 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f)); 775} 776 777static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn, 778 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 779{ 780 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16 781 | (rn & 0x1f) << 5 | (rd & 0x1f)); 782} 783 784static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn, 785 unsigned size, TCGReg rd, TCGReg rn) 786{ 787 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f)); 788} 789 790static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q, 791 TCGReg rd, TCGReg rn, unsigned immhb) 792{ 793 tcg_out32(s, insn | q << 30 | immhb << 16 794 | (rn & 0x1f) << 5 | (rd & 0x1f)); 795} 796 797static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q, 798 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 799{ 800 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16 801 | (rn & 0x1f) << 5 | (rd & 0x1f)); 802} 803 804static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q, 805 unsigned size, TCGReg rd, TCGReg rn) 806{ 807 tcg_out32(s, insn | q << 30 | (size << 22) 808 | (rn & 0x1f) << 5 | (rd & 0x1f)); 809} 810 811static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, 812 TCGReg rd, TCGReg base, TCGType ext, 813 TCGReg regoff) 814{ 815 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 816 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | 817 0x4000 | ext << 13 | base << 5 | (rd & 0x1f)); 818} 819 820static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, 821 TCGReg rd, TCGReg rn, intptr_t offset) 822{ 823 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f)); 824} 825 826static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, 827 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) 828{ 829 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 830 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 831 | rn << 5 | (rd & 0x1f)); 832} 833 834/* Register to register move using ORR (shifted register with no shift). */ 835static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) 836{ 837 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); 838} 839 840/* Register to register move using ADDI (move to/from SP). */ 841static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) 842{ 843 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); 844} 845 846/* This function is used for the Logical (immediate) instruction group. 847 The value of LIMM must satisfy IS_LIMM. See the comment above about 848 only supporting simplified logical immediates. */ 849static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, 850 TCGReg rd, TCGReg rn, uint64_t limm) 851{ 852 unsigned h, l, r, c; 853 854 tcg_debug_assert(is_limm(limm)); 855 856 h = clz64(limm); 857 l = ctz64(limm); 858 if (l == 0) { 859 r = 0; /* form 0....01....1 */ 860 c = ctz64(~limm) - 1; 861 if (h == 0) { 862 r = clz64(~limm); /* form 1..10..01..1 */ 863 c += r; 864 } 865 } else { 866 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ 867 c = r - h - 1; 868 } 869 if (ext == TCG_TYPE_I32) { 870 r &= 31; 871 c &= 31; 872 } 873 874 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); 875} 876 877static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 878 TCGReg rd, int64_t v64) 879{ 880 bool q = type == TCG_TYPE_V128; 881 int cmode, imm8, i; 882 883 /* Test all bytes equal first. */ 884 if (vece == MO_8) { 885 imm8 = (uint8_t)v64; 886 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8); 887 return; 888 } 889 890 /* 891 * Test all bytes 0x00 or 0xff second. This can match cases that 892 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below. 893 */ 894 for (i = imm8 = 0; i < 8; i++) { 895 uint8_t byte = v64 >> (i * 8); 896 if (byte == 0xff) { 897 imm8 |= 1 << i; 898 } else if (byte != 0) { 899 goto fail_bytes; 900 } 901 } 902 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8); 903 return; 904 fail_bytes: 905 906 /* 907 * Tests for various replications. For each element width, if we 908 * cannot find an expansion there's no point checking a larger 909 * width because we already know by replication it cannot match. 910 */ 911 if (vece == MO_16) { 912 uint16_t v16 = v64; 913 914 if (is_shimm16(v16, &cmode, &imm8)) { 915 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 916 return; 917 } 918 if (is_shimm16(~v16, &cmode, &imm8)) { 919 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 920 return; 921 } 922 923 /* 924 * Otherwise, all remaining constants can be loaded in two insns: 925 * rd = v16 & 0xff, rd |= v16 & 0xff00. 926 */ 927 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff); 928 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8); 929 return; 930 } else if (vece == MO_32) { 931 uint32_t v32 = v64; 932 uint32_t n32 = ~v32; 933 934 if (is_shimm32(v32, &cmode, &imm8) || 935 is_soimm32(v32, &cmode, &imm8) || 936 is_fimm32(v32, &cmode, &imm8)) { 937 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 938 return; 939 } 940 if (is_shimm32(n32, &cmode, &imm8) || 941 is_soimm32(n32, &cmode, &imm8)) { 942 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 943 return; 944 } 945 946 /* 947 * Restrict the set of constants to those we can load with 948 * two instructions. Others we load from the pool. 949 */ 950 i = is_shimm32_pair(v32, &cmode, &imm8); 951 if (i) { 952 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 953 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8)); 954 return; 955 } 956 i = is_shimm32_pair(n32, &cmode, &imm8); 957 if (i) { 958 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 959 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8)); 960 return; 961 } 962 } else if (is_fimm64(v64, &cmode, &imm8)) { 963 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8); 964 return; 965 } 966 967 /* 968 * As a last resort, load from the constant pool. Sadly there 969 * is no LD1R (literal), so store the full 16-byte vector. 970 */ 971 if (type == TCG_TYPE_V128) { 972 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64); 973 tcg_out_insn(s, 3305, LDR_v128, 0, rd); 974 } else { 975 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0); 976 tcg_out_insn(s, 3305, LDR_v64, 0, rd); 977 } 978} 979 980static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 981 TCGReg rd, TCGReg rs) 982{ 983 int is_q = type - TCG_TYPE_V64; 984 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0); 985 return true; 986} 987 988static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 989 TCGReg r, TCGReg base, intptr_t offset) 990{ 991 TCGReg temp = TCG_REG_TMP; 992 993 if (offset < -0xffffff || offset > 0xffffff) { 994 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset); 995 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base); 996 base = temp; 997 } else { 998 AArch64Insn add_insn = I3401_ADDI; 999 1000 if (offset < 0) { 1001 add_insn = I3401_SUBI; 1002 offset = -offset; 1003 } 1004 if (offset & 0xfff000) { 1005 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000); 1006 base = temp; 1007 } 1008 if (offset & 0xfff) { 1009 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff); 1010 base = temp; 1011 } 1012 } 1013 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece); 1014 return true; 1015} 1016 1017static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 1018 tcg_target_long value) 1019{ 1020 tcg_target_long svalue = value; 1021 tcg_target_long ivalue = ~value; 1022 tcg_target_long t0, t1, t2; 1023 int s0, s1; 1024 AArch64Insn opc; 1025 1026 switch (type) { 1027 case TCG_TYPE_I32: 1028 case TCG_TYPE_I64: 1029 tcg_debug_assert(rd < 32); 1030 break; 1031 default: 1032 g_assert_not_reached(); 1033 } 1034 1035 /* For 32-bit values, discard potential garbage in value. For 64-bit 1036 values within [2**31, 2**32-1], we can create smaller sequences by 1037 interpreting this as a negative 32-bit number, while ensuring that 1038 the high 32 bits are cleared by setting SF=0. */ 1039 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { 1040 svalue = (int32_t)value; 1041 value = (uint32_t)value; 1042 ivalue = (uint32_t)ivalue; 1043 type = TCG_TYPE_I32; 1044 } 1045 1046 /* Speed things up by handling the common case of small positive 1047 and negative values specially. */ 1048 if ((value & ~0xffffull) == 0) { 1049 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); 1050 return; 1051 } else if ((ivalue & ~0xffffull) == 0) { 1052 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); 1053 return; 1054 } 1055 1056 /* Check for bitfield immediates. For the benefit of 32-bit quantities, 1057 use the sign-extended value. That lets us match rotated values such 1058 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ 1059 if (is_limm(svalue)) { 1060 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); 1061 return; 1062 } 1063 1064 /* Look for host pointer values within 4G of the PC. This happens 1065 often when loading pointers to QEMU's own data structures. */ 1066 if (type == TCG_TYPE_I64) { 1067 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr); 1068 tcg_target_long disp = value - src_rx; 1069 if (disp == sextract64(disp, 0, 21)) { 1070 tcg_out_insn(s, 3406, ADR, rd, disp); 1071 return; 1072 } 1073 disp = (value >> 12) - (src_rx >> 12); 1074 if (disp == sextract64(disp, 0, 21)) { 1075 tcg_out_insn(s, 3406, ADRP, rd, disp); 1076 if (value & 0xfff) { 1077 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); 1078 } 1079 return; 1080 } 1081 } 1082 1083 /* Would it take fewer insns to begin with MOVN? */ 1084 if (ctpop64(value) >= 32) { 1085 t0 = ivalue; 1086 opc = I3405_MOVN; 1087 } else { 1088 t0 = value; 1089 opc = I3405_MOVZ; 1090 } 1091 s0 = ctz64(t0) & (63 & -16); 1092 t1 = t0 & ~(0xffffUL << s0); 1093 s1 = ctz64(t1) & (63 & -16); 1094 t2 = t1 & ~(0xffffUL << s1); 1095 if (t2 == 0) { 1096 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0); 1097 if (t1 != 0) { 1098 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1); 1099 } 1100 return; 1101 } 1102 1103 /* For more than 2 insns, dump it into the constant pool. */ 1104 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0); 1105 tcg_out_insn(s, 3305, LDR, 0, rd); 1106} 1107 1108/* Define something more legible for general use. */ 1109#define tcg_out_ldst_r tcg_out_insn_3310 1110 1111static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd, 1112 TCGReg rn, intptr_t offset, int lgsize) 1113{ 1114 /* If the offset is naturally aligned and in range, then we can 1115 use the scaled uimm12 encoding */ 1116 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) { 1117 uintptr_t scaled_uimm = offset >> lgsize; 1118 if (scaled_uimm <= 0xfff) { 1119 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); 1120 return; 1121 } 1122 } 1123 1124 /* Small signed offsets can use the unscaled encoding. */ 1125 if (offset >= -256 && offset < 256) { 1126 tcg_out_insn_3312(s, insn, rd, rn, offset); 1127 return; 1128 } 1129 1130 /* Worst-case scenario, move offset to temp register, use reg offset. */ 1131 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset); 1132 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP); 1133} 1134 1135static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 1136{ 1137 if (ret == arg) { 1138 return true; 1139 } 1140 switch (type) { 1141 case TCG_TYPE_I32: 1142 case TCG_TYPE_I64: 1143 if (ret < 32 && arg < 32) { 1144 tcg_out_movr(s, type, ret, arg); 1145 break; 1146 } else if (ret < 32) { 1147 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0); 1148 break; 1149 } else if (arg < 32) { 1150 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0); 1151 break; 1152 } 1153 /* FALLTHRU */ 1154 1155 case TCG_TYPE_V64: 1156 tcg_debug_assert(ret >= 32 && arg >= 32); 1157 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg); 1158 break; 1159 case TCG_TYPE_V128: 1160 tcg_debug_assert(ret >= 32 && arg >= 32); 1161 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg); 1162 break; 1163 1164 default: 1165 g_assert_not_reached(); 1166 } 1167 return true; 1168} 1169 1170static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1171 TCGReg base, intptr_t ofs) 1172{ 1173 AArch64Insn insn; 1174 int lgsz; 1175 1176 switch (type) { 1177 case TCG_TYPE_I32: 1178 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS); 1179 lgsz = 2; 1180 break; 1181 case TCG_TYPE_I64: 1182 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD); 1183 lgsz = 3; 1184 break; 1185 case TCG_TYPE_V64: 1186 insn = I3312_LDRVD; 1187 lgsz = 3; 1188 break; 1189 case TCG_TYPE_V128: 1190 insn = I3312_LDRVQ; 1191 lgsz = 4; 1192 break; 1193 default: 1194 g_assert_not_reached(); 1195 } 1196 tcg_out_ldst(s, insn, ret, base, ofs, lgsz); 1197} 1198 1199static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src, 1200 TCGReg base, intptr_t ofs) 1201{ 1202 AArch64Insn insn; 1203 int lgsz; 1204 1205 switch (type) { 1206 case TCG_TYPE_I32: 1207 insn = (src < 32 ? I3312_STRW : I3312_STRVS); 1208 lgsz = 2; 1209 break; 1210 case TCG_TYPE_I64: 1211 insn = (src < 32 ? I3312_STRX : I3312_STRVD); 1212 lgsz = 3; 1213 break; 1214 case TCG_TYPE_V64: 1215 insn = I3312_STRVD; 1216 lgsz = 3; 1217 break; 1218 case TCG_TYPE_V128: 1219 insn = I3312_STRVQ; 1220 lgsz = 4; 1221 break; 1222 default: 1223 g_assert_not_reached(); 1224 } 1225 tcg_out_ldst(s, insn, src, base, ofs, lgsz); 1226} 1227 1228static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1229 TCGReg base, intptr_t ofs) 1230{ 1231 if (type <= TCG_TYPE_I64 && val == 0) { 1232 tcg_out_st(s, type, TCG_REG_XZR, base, ofs); 1233 return true; 1234 } 1235 return false; 1236} 1237 1238static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, 1239 TCGReg rn, unsigned int a, unsigned int b) 1240{ 1241 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); 1242} 1243 1244static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, 1245 TCGReg rn, unsigned int a, unsigned int b) 1246{ 1247 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); 1248} 1249 1250static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, 1251 TCGReg rn, unsigned int a, unsigned int b) 1252{ 1253 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); 1254} 1255 1256static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, 1257 TCGReg rn, TCGReg rm, unsigned int a) 1258{ 1259 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); 1260} 1261 1262static inline void tcg_out_shl(TCGContext *s, TCGType ext, 1263 TCGReg rd, TCGReg rn, unsigned int m) 1264{ 1265 int bits = ext ? 64 : 32; 1266 int max = bits - 1; 1267 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max)); 1268} 1269 1270static inline void tcg_out_shr(TCGContext *s, TCGType ext, 1271 TCGReg rd, TCGReg rn, unsigned int m) 1272{ 1273 int max = ext ? 63 : 31; 1274 tcg_out_ubfm(s, ext, rd, rn, m & max, max); 1275} 1276 1277static inline void tcg_out_sar(TCGContext *s, TCGType ext, 1278 TCGReg rd, TCGReg rn, unsigned int m) 1279{ 1280 int max = ext ? 63 : 31; 1281 tcg_out_sbfm(s, ext, rd, rn, m & max, max); 1282} 1283 1284static inline void tcg_out_rotr(TCGContext *s, TCGType ext, 1285 TCGReg rd, TCGReg rn, unsigned int m) 1286{ 1287 int max = ext ? 63 : 31; 1288 tcg_out_extr(s, ext, rd, rn, rn, m & max); 1289} 1290 1291static inline void tcg_out_rotl(TCGContext *s, TCGType ext, 1292 TCGReg rd, TCGReg rn, unsigned int m) 1293{ 1294 int bits = ext ? 64 : 32; 1295 int max = bits - 1; 1296 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max)); 1297} 1298 1299static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, 1300 TCGReg rn, unsigned lsb, unsigned width) 1301{ 1302 unsigned size = ext ? 64 : 32; 1303 unsigned a = (size - lsb) & (size - 1); 1304 unsigned b = width - 1; 1305 tcg_out_bfm(s, ext, rd, rn, a, b); 1306} 1307 1308static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, 1309 tcg_target_long b, bool const_b) 1310{ 1311 if (const_b) { 1312 /* Using CMP or CMN aliases. */ 1313 if (b >= 0) { 1314 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); 1315 } else { 1316 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); 1317 } 1318 } else { 1319 /* Using CMP alias SUBS wzr, Wn, Wm */ 1320 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); 1321 } 1322} 1323 1324static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) 1325{ 1326 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1327 tcg_debug_assert(offset == sextract64(offset, 0, 26)); 1328 tcg_out_insn(s, 3206, B, offset); 1329} 1330 1331static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target) 1332{ 1333 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1334 if (offset == sextract64(offset, 0, 26)) { 1335 tcg_out_insn(s, 3206, B, offset); 1336 } else { 1337 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); 1338 tcg_out_insn(s, 3207, BR, TCG_REG_TMP); 1339 } 1340} 1341 1342static inline void tcg_out_callr(TCGContext *s, TCGReg reg) 1343{ 1344 tcg_out_insn(s, 3207, BLR, reg); 1345} 1346 1347static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) 1348{ 1349 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1350 if (offset == sextract64(offset, 0, 26)) { 1351 tcg_out_insn(s, 3206, BL, offset); 1352 } else { 1353 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); 1354 tcg_out_callr(s, TCG_REG_TMP); 1355 } 1356} 1357 1358void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, 1359 uintptr_t jmp_rw, uintptr_t addr) 1360{ 1361 tcg_insn_unit i1, i2; 1362 TCGType rt = TCG_TYPE_I64; 1363 TCGReg rd = TCG_REG_TMP; 1364 uint64_t pair; 1365 1366 ptrdiff_t offset = addr - jmp_rx; 1367 1368 if (offset == sextract64(offset, 0, 26)) { 1369 i1 = I3206_B | ((offset >> 2) & 0x3ffffff); 1370 i2 = NOP; 1371 } else { 1372 offset = (addr >> 12) - (jmp_rx >> 12); 1373 1374 /* patch ADRP */ 1375 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd; 1376 /* patch ADDI */ 1377 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd; 1378 } 1379 pair = (uint64_t)i2 << 32 | i1; 1380 qatomic_set((uint64_t *)jmp_rw, pair); 1381 flush_idcache_range(jmp_rx, jmp_rw, 8); 1382} 1383 1384static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) 1385{ 1386 if (!l->has_value) { 1387 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); 1388 tcg_out_insn(s, 3206, B, 0); 1389 } else { 1390 tcg_out_goto(s, l->u.value_ptr); 1391 } 1392} 1393 1394static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, 1395 TCGArg b, bool b_const, TCGLabel *l) 1396{ 1397 intptr_t offset; 1398 bool need_cmp; 1399 1400 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) { 1401 need_cmp = false; 1402 } else { 1403 need_cmp = true; 1404 tcg_out_cmp(s, ext, a, b, b_const); 1405 } 1406 1407 if (!l->has_value) { 1408 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1409 offset = tcg_in32(s) >> 5; 1410 } else { 1411 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2; 1412 tcg_debug_assert(offset == sextract64(offset, 0, 19)); 1413 } 1414 1415 if (need_cmp) { 1416 tcg_out_insn(s, 3202, B_C, c, offset); 1417 } else if (c == TCG_COND_EQ) { 1418 tcg_out_insn(s, 3201, CBZ, ext, a, offset); 1419 } else { 1420 tcg_out_insn(s, 3201, CBNZ, ext, a, offset); 1421 } 1422} 1423 1424static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn) 1425{ 1426 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn); 1427} 1428 1429static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn) 1430{ 1431 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn); 1432} 1433 1434static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn) 1435{ 1436 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn); 1437} 1438 1439static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits, 1440 TCGReg rd, TCGReg rn) 1441{ 1442 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ 1443 int bits = (8 << s_bits) - 1; 1444 tcg_out_sbfm(s, ext, rd, rn, 0, bits); 1445} 1446 1447static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits, 1448 TCGReg rd, TCGReg rn) 1449{ 1450 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ 1451 int bits = (8 << s_bits) - 1; 1452 tcg_out_ubfm(s, 0, rd, rn, 0, bits); 1453} 1454 1455static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, 1456 TCGReg rn, int64_t aimm) 1457{ 1458 if (aimm >= 0) { 1459 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); 1460 } else { 1461 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); 1462 } 1463} 1464 1465static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, 1466 TCGReg rh, TCGReg al, TCGReg ah, 1467 tcg_target_long bl, tcg_target_long bh, 1468 bool const_bl, bool const_bh, bool sub) 1469{ 1470 TCGReg orig_rl = rl; 1471 AArch64Insn insn; 1472 1473 if (rl == ah || (!const_bh && rl == bh)) { 1474 rl = TCG_REG_TMP; 1475 } 1476 1477 if (const_bl) { 1478 if (bl < 0) { 1479 bl = -bl; 1480 insn = sub ? I3401_ADDSI : I3401_SUBSI; 1481 } else { 1482 insn = sub ? I3401_SUBSI : I3401_ADDSI; 1483 } 1484 1485 if (unlikely(al == TCG_REG_XZR)) { 1486 /* ??? We want to allow al to be zero for the benefit of 1487 negation via subtraction. However, that leaves open the 1488 possibility of adding 0+const in the low part, and the 1489 immediate add instructions encode XSP not XZR. Don't try 1490 anything more elaborate here than loading another zero. */ 1491 al = TCG_REG_TMP; 1492 tcg_out_movi(s, ext, al, 0); 1493 } 1494 tcg_out_insn_3401(s, insn, ext, rl, al, bl); 1495 } else { 1496 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); 1497 } 1498 1499 insn = I3503_ADC; 1500 if (const_bh) { 1501 /* Note that the only two constants we support are 0 and -1, and 1502 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ 1503 if ((bh != 0) ^ sub) { 1504 insn = I3503_SBC; 1505 } 1506 bh = TCG_REG_XZR; 1507 } else if (sub) { 1508 insn = I3503_SBC; 1509 } 1510 tcg_out_insn_3503(s, insn, ext, rh, ah, bh); 1511 1512 tcg_out_mov(s, ext, orig_rl, rl); 1513} 1514 1515static inline void tcg_out_mb(TCGContext *s, TCGArg a0) 1516{ 1517 static const uint32_t sync[] = { 1518 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST, 1519 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST, 1520 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1521 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD, 1522 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1523 }; 1524 tcg_out32(s, sync[a0 & TCG_MO_ALL]); 1525} 1526 1527static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, 1528 TCGReg a0, TCGArg b, bool const_b, bool is_ctz) 1529{ 1530 TCGReg a1 = a0; 1531 if (is_ctz) { 1532 a1 = TCG_REG_TMP; 1533 tcg_out_insn(s, 3507, RBIT, ext, a1, a0); 1534 } 1535 if (const_b && b == (ext ? 64 : 32)) { 1536 tcg_out_insn(s, 3507, CLZ, ext, d, a1); 1537 } else { 1538 AArch64Insn sel = I3506_CSEL; 1539 1540 tcg_out_cmp(s, ext, a0, 0, 1); 1541 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1); 1542 1543 if (const_b) { 1544 if (b == -1) { 1545 b = TCG_REG_XZR; 1546 sel = I3506_CSINV; 1547 } else if (b == 0) { 1548 b = TCG_REG_XZR; 1549 } else { 1550 tcg_out_movi(s, ext, d, b); 1551 b = d; 1552 } 1553 } 1554 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE); 1555 } 1556} 1557 1558#ifdef CONFIG_SOFTMMU 1559#include "../tcg-ldst.c.inc" 1560 1561/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, 1562 * TCGMemOpIdx oi, uintptr_t ra) 1563 */ 1564static void * const qemu_ld_helpers[16] = { 1565 [MO_UB] = helper_ret_ldub_mmu, 1566 [MO_LEUW] = helper_le_lduw_mmu, 1567 [MO_LEUL] = helper_le_ldul_mmu, 1568 [MO_LEQ] = helper_le_ldq_mmu, 1569 [MO_BEUW] = helper_be_lduw_mmu, 1570 [MO_BEUL] = helper_be_ldul_mmu, 1571 [MO_BEQ] = helper_be_ldq_mmu, 1572}; 1573 1574/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, 1575 * uintxx_t val, TCGMemOpIdx oi, 1576 * uintptr_t ra) 1577 */ 1578static void * const qemu_st_helpers[16] = { 1579 [MO_UB] = helper_ret_stb_mmu, 1580 [MO_LEUW] = helper_le_stw_mmu, 1581 [MO_LEUL] = helper_le_stl_mmu, 1582 [MO_LEQ] = helper_le_stq_mmu, 1583 [MO_BEUW] = helper_be_stw_mmu, 1584 [MO_BEUL] = helper_be_stl_mmu, 1585 [MO_BEQ] = helper_be_stq_mmu, 1586}; 1587 1588static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target) 1589{ 1590 ptrdiff_t offset = tcg_pcrel_diff(s, target); 1591 tcg_debug_assert(offset == sextract64(offset, 0, 21)); 1592 tcg_out_insn(s, 3406, ADR, rd, offset); 1593} 1594 1595static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1596{ 1597 TCGMemOpIdx oi = lb->oi; 1598 MemOp opc = get_memop(oi); 1599 MemOp size = opc & MO_SIZE; 1600 1601 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1602 return false; 1603 } 1604 1605 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1606 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); 1607 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi); 1608 tcg_out_adr(s, TCG_REG_X3, lb->raddr); 1609 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); 1610 if (opc & MO_SIGN) { 1611 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0); 1612 } else { 1613 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0); 1614 } 1615 1616 tcg_out_goto(s, lb->raddr); 1617 return true; 1618} 1619 1620static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1621{ 1622 TCGMemOpIdx oi = lb->oi; 1623 MemOp opc = get_memop(oi); 1624 MemOp size = opc & MO_SIZE; 1625 1626 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1627 return false; 1628 } 1629 1630 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1631 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); 1632 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); 1633 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi); 1634 tcg_out_adr(s, TCG_REG_X4, lb->raddr); 1635 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); 1636 tcg_out_goto(s, lb->raddr); 1637 return true; 1638} 1639 1640static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, 1641 TCGType ext, TCGReg data_reg, TCGReg addr_reg, 1642 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) 1643{ 1644 TCGLabelQemuLdst *label = new_ldst_label(s); 1645 1646 label->is_ld = is_ld; 1647 label->oi = oi; 1648 label->type = ext; 1649 label->datalo_reg = data_reg; 1650 label->addrlo_reg = addr_reg; 1651 label->raddr = tcg_splitwx_to_rx(raddr); 1652 label->label_ptr[0] = label_ptr; 1653} 1654 1655/* We expect to use a 7-bit scaled negative offset from ENV. */ 1656QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); 1657QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512); 1658 1659/* These offsets are built into the LDP below. */ 1660QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); 1661QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); 1662 1663/* Load and compare a TLB entry, emitting the conditional jump to the 1664 slow path for the failure case, which will be patched later when finalizing 1665 the slow path. Generated code returns the host addend in X1, 1666 clobbers X0,X2,X3,TMP. */ 1667static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc, 1668 tcg_insn_unit **label_ptr, int mem_index, 1669 bool is_read) 1670{ 1671 unsigned a_bits = get_alignment_bits(opc); 1672 unsigned s_bits = opc & MO_SIZE; 1673 unsigned a_mask = (1u << a_bits) - 1; 1674 unsigned s_mask = (1u << s_bits) - 1; 1675 TCGReg x3; 1676 TCGType mask_type; 1677 uint64_t compare_mask; 1678 1679 mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32 1680 ? TCG_TYPE_I64 : TCG_TYPE_I32); 1681 1682 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */ 1683 tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0, 1684 TLB_MASK_TABLE_OFS(mem_index), 1, 0); 1685 1686 /* Extract the TLB index from the address into X0. */ 1687 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, 1688 TCG_REG_X0, TCG_REG_X0, addr_reg, 1689 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); 1690 1691 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */ 1692 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0); 1693 1694 /* Load the tlb comparator into X0, and the fast path addend into X1. */ 1695 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read 1696 ? offsetof(CPUTLBEntry, addr_read) 1697 : offsetof(CPUTLBEntry, addr_write)); 1698 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1, 1699 offsetof(CPUTLBEntry, addend)); 1700 1701 /* For aligned accesses, we check the first byte and include the alignment 1702 bits within the address. For unaligned access, we check that we don't 1703 cross pages using the address of the last byte of the access. */ 1704 if (a_bits >= s_bits) { 1705 x3 = addr_reg; 1706 } else { 1707 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64, 1708 TCG_REG_X3, addr_reg, s_mask - a_mask); 1709 x3 = TCG_REG_X3; 1710 } 1711 compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; 1712 1713 /* Store the page mask part of the address into X3. */ 1714 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, 1715 TCG_REG_X3, x3, compare_mask); 1716 1717 /* Perform the address comparison. */ 1718 tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0); 1719 1720 /* If not equal, we jump to the slow path. */ 1721 *label_ptr = s->code_ptr; 1722 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1723} 1724 1725#endif /* CONFIG_SOFTMMU */ 1726 1727static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, 1728 TCGReg data_r, TCGReg addr_r, 1729 TCGType otype, TCGReg off_r) 1730{ 1731 const MemOp bswap = memop & MO_BSWAP; 1732 1733 switch (memop & MO_SSIZE) { 1734 case MO_UB: 1735 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r); 1736 break; 1737 case MO_SB: 1738 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, 1739 data_r, addr_r, otype, off_r); 1740 break; 1741 case MO_UW: 1742 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); 1743 if (bswap) { 1744 tcg_out_rev16(s, data_r, data_r); 1745 } 1746 break; 1747 case MO_SW: 1748 if (bswap) { 1749 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); 1750 tcg_out_rev16(s, data_r, data_r); 1751 tcg_out_sxt(s, ext, MO_16, data_r, data_r); 1752 } else { 1753 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), 1754 data_r, addr_r, otype, off_r); 1755 } 1756 break; 1757 case MO_UL: 1758 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); 1759 if (bswap) { 1760 tcg_out_rev32(s, data_r, data_r); 1761 } 1762 break; 1763 case MO_SL: 1764 if (bswap) { 1765 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); 1766 tcg_out_rev32(s, data_r, data_r); 1767 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r); 1768 } else { 1769 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r); 1770 } 1771 break; 1772 case MO_Q: 1773 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r); 1774 if (bswap) { 1775 tcg_out_rev64(s, data_r, data_r); 1776 } 1777 break; 1778 default: 1779 tcg_abort(); 1780 } 1781} 1782 1783static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, 1784 TCGReg data_r, TCGReg addr_r, 1785 TCGType otype, TCGReg off_r) 1786{ 1787 const MemOp bswap = memop & MO_BSWAP; 1788 1789 switch (memop & MO_SIZE) { 1790 case MO_8: 1791 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r); 1792 break; 1793 case MO_16: 1794 if (bswap && data_r != TCG_REG_XZR) { 1795 tcg_out_rev16(s, TCG_REG_TMP, data_r); 1796 data_r = TCG_REG_TMP; 1797 } 1798 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r); 1799 break; 1800 case MO_32: 1801 if (bswap && data_r != TCG_REG_XZR) { 1802 tcg_out_rev32(s, TCG_REG_TMP, data_r); 1803 data_r = TCG_REG_TMP; 1804 } 1805 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r); 1806 break; 1807 case MO_64: 1808 if (bswap && data_r != TCG_REG_XZR) { 1809 tcg_out_rev64(s, TCG_REG_TMP, data_r); 1810 data_r = TCG_REG_TMP; 1811 } 1812 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r); 1813 break; 1814 default: 1815 tcg_abort(); 1816 } 1817} 1818 1819static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1820 TCGMemOpIdx oi, TCGType ext) 1821{ 1822 MemOp memop = get_memop(oi); 1823 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; 1824#ifdef CONFIG_SOFTMMU 1825 unsigned mem_index = get_mmuidx(oi); 1826 tcg_insn_unit *label_ptr; 1827 1828 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1); 1829 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1830 TCG_REG_X1, otype, addr_reg); 1831 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg, 1832 s->code_ptr, label_ptr); 1833#else /* !CONFIG_SOFTMMU */ 1834 if (USE_GUEST_BASE) { 1835 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1836 TCG_REG_GUEST_BASE, otype, addr_reg); 1837 } else { 1838 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1839 addr_reg, TCG_TYPE_I64, TCG_REG_XZR); 1840 } 1841#endif /* CONFIG_SOFTMMU */ 1842} 1843 1844static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1845 TCGMemOpIdx oi) 1846{ 1847 MemOp memop = get_memop(oi); 1848 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; 1849#ifdef CONFIG_SOFTMMU 1850 unsigned mem_index = get_mmuidx(oi); 1851 tcg_insn_unit *label_ptr; 1852 1853 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0); 1854 tcg_out_qemu_st_direct(s, memop, data_reg, 1855 TCG_REG_X1, otype, addr_reg); 1856 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64, 1857 data_reg, addr_reg, s->code_ptr, label_ptr); 1858#else /* !CONFIG_SOFTMMU */ 1859 if (USE_GUEST_BASE) { 1860 tcg_out_qemu_st_direct(s, memop, data_reg, 1861 TCG_REG_GUEST_BASE, otype, addr_reg); 1862 } else { 1863 tcg_out_qemu_st_direct(s, memop, data_reg, 1864 addr_reg, TCG_TYPE_I64, TCG_REG_XZR); 1865 } 1866#endif /* CONFIG_SOFTMMU */ 1867} 1868 1869static const tcg_insn_unit *tb_ret_addr; 1870 1871static void tcg_out_op(TCGContext *s, TCGOpcode opc, 1872 const TCGArg args[TCG_MAX_OP_ARGS], 1873 const int const_args[TCG_MAX_OP_ARGS]) 1874{ 1875 /* 99% of the time, we can signal the use of extension registers 1876 by looking to see if the opcode handles 64-bit data. */ 1877 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0; 1878 1879 /* Hoist the loads of the most common arguments. */ 1880 TCGArg a0 = args[0]; 1881 TCGArg a1 = args[1]; 1882 TCGArg a2 = args[2]; 1883 int c2 = const_args[2]; 1884 1885 /* Some operands are defined with "rZ" constraint, a register or 1886 the zero register. These need not actually test args[I] == 0. */ 1887#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) 1888 1889 switch (opc) { 1890 case INDEX_op_exit_tb: 1891 /* Reuse the zeroing that exists for goto_ptr. */ 1892 if (a0 == 0) { 1893 tcg_out_goto_long(s, tcg_code_gen_epilogue); 1894 } else { 1895 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); 1896 tcg_out_goto_long(s, tb_ret_addr); 1897 } 1898 break; 1899 1900 case INDEX_op_goto_tb: 1901 if (s->tb_jmp_insn_offset != NULL) { 1902 /* TCG_TARGET_HAS_direct_jump */ 1903 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic 1904 write can be used to patch the target address. */ 1905 if ((uintptr_t)s->code_ptr & 7) { 1906 tcg_out32(s, NOP); 1907 } 1908 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); 1909 /* actual branch destination will be patched by 1910 tb_target_set_jmp_target later. */ 1911 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0); 1912 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0); 1913 } else { 1914 /* !TCG_TARGET_HAS_direct_jump */ 1915 tcg_debug_assert(s->tb_jmp_target_addr != NULL); 1916 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2; 1917 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP); 1918 } 1919 tcg_out_insn(s, 3207, BR, TCG_REG_TMP); 1920 set_jmp_reset_offset(s, a0); 1921 break; 1922 1923 case INDEX_op_goto_ptr: 1924 tcg_out_insn(s, 3207, BR, a0); 1925 break; 1926 1927 case INDEX_op_br: 1928 tcg_out_goto_label(s, arg_label(a0)); 1929 break; 1930 1931 case INDEX_op_ld8u_i32: 1932 case INDEX_op_ld8u_i64: 1933 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0); 1934 break; 1935 case INDEX_op_ld8s_i32: 1936 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0); 1937 break; 1938 case INDEX_op_ld8s_i64: 1939 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0); 1940 break; 1941 case INDEX_op_ld16u_i32: 1942 case INDEX_op_ld16u_i64: 1943 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1); 1944 break; 1945 case INDEX_op_ld16s_i32: 1946 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1); 1947 break; 1948 case INDEX_op_ld16s_i64: 1949 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1); 1950 break; 1951 case INDEX_op_ld_i32: 1952 case INDEX_op_ld32u_i64: 1953 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2); 1954 break; 1955 case INDEX_op_ld32s_i64: 1956 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2); 1957 break; 1958 case INDEX_op_ld_i64: 1959 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3); 1960 break; 1961 1962 case INDEX_op_st8_i32: 1963 case INDEX_op_st8_i64: 1964 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0); 1965 break; 1966 case INDEX_op_st16_i32: 1967 case INDEX_op_st16_i64: 1968 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1); 1969 break; 1970 case INDEX_op_st_i32: 1971 case INDEX_op_st32_i64: 1972 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2); 1973 break; 1974 case INDEX_op_st_i64: 1975 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3); 1976 break; 1977 1978 case INDEX_op_add_i32: 1979 a2 = (int32_t)a2; 1980 /* FALLTHRU */ 1981 case INDEX_op_add_i64: 1982 if (c2) { 1983 tcg_out_addsubi(s, ext, a0, a1, a2); 1984 } else { 1985 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); 1986 } 1987 break; 1988 1989 case INDEX_op_sub_i32: 1990 a2 = (int32_t)a2; 1991 /* FALLTHRU */ 1992 case INDEX_op_sub_i64: 1993 if (c2) { 1994 tcg_out_addsubi(s, ext, a0, a1, -a2); 1995 } else { 1996 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); 1997 } 1998 break; 1999 2000 case INDEX_op_neg_i64: 2001 case INDEX_op_neg_i32: 2002 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); 2003 break; 2004 2005 case INDEX_op_and_i32: 2006 a2 = (int32_t)a2; 2007 /* FALLTHRU */ 2008 case INDEX_op_and_i64: 2009 if (c2) { 2010 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); 2011 } else { 2012 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); 2013 } 2014 break; 2015 2016 case INDEX_op_andc_i32: 2017 a2 = (int32_t)a2; 2018 /* FALLTHRU */ 2019 case INDEX_op_andc_i64: 2020 if (c2) { 2021 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); 2022 } else { 2023 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); 2024 } 2025 break; 2026 2027 case INDEX_op_or_i32: 2028 a2 = (int32_t)a2; 2029 /* FALLTHRU */ 2030 case INDEX_op_or_i64: 2031 if (c2) { 2032 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); 2033 } else { 2034 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); 2035 } 2036 break; 2037 2038 case INDEX_op_orc_i32: 2039 a2 = (int32_t)a2; 2040 /* FALLTHRU */ 2041 case INDEX_op_orc_i64: 2042 if (c2) { 2043 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); 2044 } else { 2045 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); 2046 } 2047 break; 2048 2049 case INDEX_op_xor_i32: 2050 a2 = (int32_t)a2; 2051 /* FALLTHRU */ 2052 case INDEX_op_xor_i64: 2053 if (c2) { 2054 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); 2055 } else { 2056 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); 2057 } 2058 break; 2059 2060 case INDEX_op_eqv_i32: 2061 a2 = (int32_t)a2; 2062 /* FALLTHRU */ 2063 case INDEX_op_eqv_i64: 2064 if (c2) { 2065 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); 2066 } else { 2067 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); 2068 } 2069 break; 2070 2071 case INDEX_op_not_i64: 2072 case INDEX_op_not_i32: 2073 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); 2074 break; 2075 2076 case INDEX_op_mul_i64: 2077 case INDEX_op_mul_i32: 2078 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); 2079 break; 2080 2081 case INDEX_op_div_i64: 2082 case INDEX_op_div_i32: 2083 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); 2084 break; 2085 case INDEX_op_divu_i64: 2086 case INDEX_op_divu_i32: 2087 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); 2088 break; 2089 2090 case INDEX_op_rem_i64: 2091 case INDEX_op_rem_i32: 2092 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2); 2093 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2094 break; 2095 case INDEX_op_remu_i64: 2096 case INDEX_op_remu_i32: 2097 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2); 2098 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2099 break; 2100 2101 case INDEX_op_shl_i64: 2102 case INDEX_op_shl_i32: 2103 if (c2) { 2104 tcg_out_shl(s, ext, a0, a1, a2); 2105 } else { 2106 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); 2107 } 2108 break; 2109 2110 case INDEX_op_shr_i64: 2111 case INDEX_op_shr_i32: 2112 if (c2) { 2113 tcg_out_shr(s, ext, a0, a1, a2); 2114 } else { 2115 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); 2116 } 2117 break; 2118 2119 case INDEX_op_sar_i64: 2120 case INDEX_op_sar_i32: 2121 if (c2) { 2122 tcg_out_sar(s, ext, a0, a1, a2); 2123 } else { 2124 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); 2125 } 2126 break; 2127 2128 case INDEX_op_rotr_i64: 2129 case INDEX_op_rotr_i32: 2130 if (c2) { 2131 tcg_out_rotr(s, ext, a0, a1, a2); 2132 } else { 2133 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); 2134 } 2135 break; 2136 2137 case INDEX_op_rotl_i64: 2138 case INDEX_op_rotl_i32: 2139 if (c2) { 2140 tcg_out_rotl(s, ext, a0, a1, a2); 2141 } else { 2142 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2); 2143 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP); 2144 } 2145 break; 2146 2147 case INDEX_op_clz_i64: 2148 case INDEX_op_clz_i32: 2149 tcg_out_cltz(s, ext, a0, a1, a2, c2, false); 2150 break; 2151 case INDEX_op_ctz_i64: 2152 case INDEX_op_ctz_i32: 2153 tcg_out_cltz(s, ext, a0, a1, a2, c2, true); 2154 break; 2155 2156 case INDEX_op_brcond_i32: 2157 a1 = (int32_t)a1; 2158 /* FALLTHRU */ 2159 case INDEX_op_brcond_i64: 2160 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); 2161 break; 2162 2163 case INDEX_op_setcond_i32: 2164 a2 = (int32_t)a2; 2165 /* FALLTHRU */ 2166 case INDEX_op_setcond_i64: 2167 tcg_out_cmp(s, ext, a1, a2, c2); 2168 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ 2169 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, 2170 TCG_REG_XZR, tcg_invert_cond(args[3])); 2171 break; 2172 2173 case INDEX_op_movcond_i32: 2174 a2 = (int32_t)a2; 2175 /* FALLTHRU */ 2176 case INDEX_op_movcond_i64: 2177 tcg_out_cmp(s, ext, a1, a2, c2); 2178 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); 2179 break; 2180 2181 case INDEX_op_qemu_ld_i32: 2182 case INDEX_op_qemu_ld_i64: 2183 tcg_out_qemu_ld(s, a0, a1, a2, ext); 2184 break; 2185 case INDEX_op_qemu_st_i32: 2186 case INDEX_op_qemu_st_i64: 2187 tcg_out_qemu_st(s, REG0(0), a1, a2); 2188 break; 2189 2190 case INDEX_op_bswap64_i64: 2191 tcg_out_rev64(s, a0, a1); 2192 break; 2193 case INDEX_op_bswap32_i64: 2194 case INDEX_op_bswap32_i32: 2195 tcg_out_rev32(s, a0, a1); 2196 break; 2197 case INDEX_op_bswap16_i64: 2198 case INDEX_op_bswap16_i32: 2199 tcg_out_rev16(s, a0, a1); 2200 break; 2201 2202 case INDEX_op_ext8s_i64: 2203 case INDEX_op_ext8s_i32: 2204 tcg_out_sxt(s, ext, MO_8, a0, a1); 2205 break; 2206 case INDEX_op_ext16s_i64: 2207 case INDEX_op_ext16s_i32: 2208 tcg_out_sxt(s, ext, MO_16, a0, a1); 2209 break; 2210 case INDEX_op_ext_i32_i64: 2211 case INDEX_op_ext32s_i64: 2212 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1); 2213 break; 2214 case INDEX_op_ext8u_i64: 2215 case INDEX_op_ext8u_i32: 2216 tcg_out_uxt(s, MO_8, a0, a1); 2217 break; 2218 case INDEX_op_ext16u_i64: 2219 case INDEX_op_ext16u_i32: 2220 tcg_out_uxt(s, MO_16, a0, a1); 2221 break; 2222 case INDEX_op_extu_i32_i64: 2223 case INDEX_op_ext32u_i64: 2224 tcg_out_movr(s, TCG_TYPE_I32, a0, a1); 2225 break; 2226 2227 case INDEX_op_deposit_i64: 2228 case INDEX_op_deposit_i32: 2229 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]); 2230 break; 2231 2232 case INDEX_op_extract_i64: 2233 case INDEX_op_extract_i32: 2234 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2235 break; 2236 2237 case INDEX_op_sextract_i64: 2238 case INDEX_op_sextract_i32: 2239 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2240 break; 2241 2242 case INDEX_op_extract2_i64: 2243 case INDEX_op_extract2_i32: 2244 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]); 2245 break; 2246 2247 case INDEX_op_add2_i32: 2248 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2249 (int32_t)args[4], args[5], const_args[4], 2250 const_args[5], false); 2251 break; 2252 case INDEX_op_add2_i64: 2253 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2254 args[5], const_args[4], const_args[5], false); 2255 break; 2256 case INDEX_op_sub2_i32: 2257 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2258 (int32_t)args[4], args[5], const_args[4], 2259 const_args[5], true); 2260 break; 2261 case INDEX_op_sub2_i64: 2262 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2263 args[5], const_args[4], const_args[5], true); 2264 break; 2265 2266 case INDEX_op_muluh_i64: 2267 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); 2268 break; 2269 case INDEX_op_mulsh_i64: 2270 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); 2271 break; 2272 2273 case INDEX_op_mb: 2274 tcg_out_mb(s, a0); 2275 break; 2276 2277 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 2278 case INDEX_op_mov_i64: 2279 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 2280 default: 2281 g_assert_not_reached(); 2282 } 2283 2284#undef REG0 2285} 2286 2287static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 2288 unsigned vecl, unsigned vece, 2289 const TCGArg args[TCG_MAX_OP_ARGS], 2290 const int const_args[TCG_MAX_OP_ARGS]) 2291{ 2292 static const AArch64Insn cmp_vec_insn[16] = { 2293 [TCG_COND_EQ] = I3616_CMEQ, 2294 [TCG_COND_GT] = I3616_CMGT, 2295 [TCG_COND_GE] = I3616_CMGE, 2296 [TCG_COND_GTU] = I3616_CMHI, 2297 [TCG_COND_GEU] = I3616_CMHS, 2298 }; 2299 static const AArch64Insn cmp_scalar_insn[16] = { 2300 [TCG_COND_EQ] = I3611_CMEQ, 2301 [TCG_COND_GT] = I3611_CMGT, 2302 [TCG_COND_GE] = I3611_CMGE, 2303 [TCG_COND_GTU] = I3611_CMHI, 2304 [TCG_COND_GEU] = I3611_CMHS, 2305 }; 2306 static const AArch64Insn cmp0_vec_insn[16] = { 2307 [TCG_COND_EQ] = I3617_CMEQ0, 2308 [TCG_COND_GT] = I3617_CMGT0, 2309 [TCG_COND_GE] = I3617_CMGE0, 2310 [TCG_COND_LT] = I3617_CMLT0, 2311 [TCG_COND_LE] = I3617_CMLE0, 2312 }; 2313 static const AArch64Insn cmp0_scalar_insn[16] = { 2314 [TCG_COND_EQ] = I3612_CMEQ0, 2315 [TCG_COND_GT] = I3612_CMGT0, 2316 [TCG_COND_GE] = I3612_CMGE0, 2317 [TCG_COND_LT] = I3612_CMLT0, 2318 [TCG_COND_LE] = I3612_CMLE0, 2319 }; 2320 2321 TCGType type = vecl + TCG_TYPE_V64; 2322 unsigned is_q = vecl; 2323 bool is_scalar = !is_q && vece == MO_64; 2324 TCGArg a0, a1, a2, a3; 2325 int cmode, imm8; 2326 2327 a0 = args[0]; 2328 a1 = args[1]; 2329 a2 = args[2]; 2330 2331 switch (opc) { 2332 case INDEX_op_ld_vec: 2333 tcg_out_ld(s, type, a0, a1, a2); 2334 break; 2335 case INDEX_op_st_vec: 2336 tcg_out_st(s, type, a0, a1, a2); 2337 break; 2338 case INDEX_op_dupm_vec: 2339 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 2340 break; 2341 case INDEX_op_add_vec: 2342 if (is_scalar) { 2343 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2); 2344 } else { 2345 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2); 2346 } 2347 break; 2348 case INDEX_op_sub_vec: 2349 if (is_scalar) { 2350 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2); 2351 } else { 2352 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2); 2353 } 2354 break; 2355 case INDEX_op_mul_vec: 2356 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2); 2357 break; 2358 case INDEX_op_neg_vec: 2359 if (is_scalar) { 2360 tcg_out_insn(s, 3612, NEG, vece, a0, a1); 2361 } else { 2362 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1); 2363 } 2364 break; 2365 case INDEX_op_abs_vec: 2366 if (is_scalar) { 2367 tcg_out_insn(s, 3612, ABS, vece, a0, a1); 2368 } else { 2369 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1); 2370 } 2371 break; 2372 case INDEX_op_and_vec: 2373 if (const_args[2]) { 2374 is_shimm1632(~a2, &cmode, &imm8); 2375 if (a0 == a1) { 2376 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2377 return; 2378 } 2379 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2380 a2 = a0; 2381 } 2382 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2); 2383 break; 2384 case INDEX_op_or_vec: 2385 if (const_args[2]) { 2386 is_shimm1632(a2, &cmode, &imm8); 2387 if (a0 == a1) { 2388 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2389 return; 2390 } 2391 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2392 a2 = a0; 2393 } 2394 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2); 2395 break; 2396 case INDEX_op_andc_vec: 2397 if (const_args[2]) { 2398 is_shimm1632(a2, &cmode, &imm8); 2399 if (a0 == a1) { 2400 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2401 return; 2402 } 2403 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2404 a2 = a0; 2405 } 2406 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2); 2407 break; 2408 case INDEX_op_orc_vec: 2409 if (const_args[2]) { 2410 is_shimm1632(~a2, &cmode, &imm8); 2411 if (a0 == a1) { 2412 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2413 return; 2414 } 2415 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2416 a2 = a0; 2417 } 2418 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2); 2419 break; 2420 case INDEX_op_xor_vec: 2421 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2); 2422 break; 2423 case INDEX_op_ssadd_vec: 2424 if (is_scalar) { 2425 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2); 2426 } else { 2427 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2); 2428 } 2429 break; 2430 case INDEX_op_sssub_vec: 2431 if (is_scalar) { 2432 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2); 2433 } else { 2434 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2); 2435 } 2436 break; 2437 case INDEX_op_usadd_vec: 2438 if (is_scalar) { 2439 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2); 2440 } else { 2441 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2); 2442 } 2443 break; 2444 case INDEX_op_ussub_vec: 2445 if (is_scalar) { 2446 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2); 2447 } else { 2448 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2); 2449 } 2450 break; 2451 case INDEX_op_smax_vec: 2452 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2); 2453 break; 2454 case INDEX_op_smin_vec: 2455 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2); 2456 break; 2457 case INDEX_op_umax_vec: 2458 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2); 2459 break; 2460 case INDEX_op_umin_vec: 2461 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2); 2462 break; 2463 case INDEX_op_not_vec: 2464 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1); 2465 break; 2466 case INDEX_op_shli_vec: 2467 if (is_scalar) { 2468 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece)); 2469 } else { 2470 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece)); 2471 } 2472 break; 2473 case INDEX_op_shri_vec: 2474 if (is_scalar) { 2475 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2); 2476 } else { 2477 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2); 2478 } 2479 break; 2480 case INDEX_op_sari_vec: 2481 if (is_scalar) { 2482 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2); 2483 } else { 2484 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); 2485 } 2486 break; 2487 case INDEX_op_aa64_sli_vec: 2488 if (is_scalar) { 2489 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece)); 2490 } else { 2491 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece)); 2492 } 2493 break; 2494 case INDEX_op_shlv_vec: 2495 if (is_scalar) { 2496 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2); 2497 } else { 2498 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); 2499 } 2500 break; 2501 case INDEX_op_aa64_sshl_vec: 2502 if (is_scalar) { 2503 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2); 2504 } else { 2505 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2); 2506 } 2507 break; 2508 case INDEX_op_cmp_vec: 2509 { 2510 TCGCond cond = args[3]; 2511 AArch64Insn insn; 2512 2513 if (cond == TCG_COND_NE) { 2514 if (const_args[2]) { 2515 if (is_scalar) { 2516 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1); 2517 } else { 2518 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1); 2519 } 2520 } else { 2521 if (is_scalar) { 2522 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2); 2523 } else { 2524 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2); 2525 } 2526 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); 2527 } 2528 } else { 2529 if (const_args[2]) { 2530 if (is_scalar) { 2531 insn = cmp0_scalar_insn[cond]; 2532 if (insn) { 2533 tcg_out_insn_3612(s, insn, vece, a0, a1); 2534 break; 2535 } 2536 } else { 2537 insn = cmp0_vec_insn[cond]; 2538 if (insn) { 2539 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1); 2540 break; 2541 } 2542 } 2543 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0); 2544 a2 = TCG_VEC_TMP; 2545 } 2546 if (is_scalar) { 2547 insn = cmp_scalar_insn[cond]; 2548 if (insn == 0) { 2549 TCGArg t; 2550 t = a1, a1 = a2, a2 = t; 2551 cond = tcg_swap_cond(cond); 2552 insn = cmp_scalar_insn[cond]; 2553 tcg_debug_assert(insn != 0); 2554 } 2555 tcg_out_insn_3611(s, insn, vece, a0, a1, a2); 2556 } else { 2557 insn = cmp_vec_insn[cond]; 2558 if (insn == 0) { 2559 TCGArg t; 2560 t = a1, a1 = a2, a2 = t; 2561 cond = tcg_swap_cond(cond); 2562 insn = cmp_vec_insn[cond]; 2563 tcg_debug_assert(insn != 0); 2564 } 2565 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2); 2566 } 2567 } 2568 } 2569 break; 2570 2571 case INDEX_op_bitsel_vec: 2572 a3 = args[3]; 2573 if (a0 == a3) { 2574 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1); 2575 } else if (a0 == a2) { 2576 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1); 2577 } else { 2578 if (a0 != a1) { 2579 tcg_out_mov(s, type, a0, a1); 2580 } 2581 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3); 2582 } 2583 break; 2584 2585 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 2586 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 2587 default: 2588 g_assert_not_reached(); 2589 } 2590} 2591 2592int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 2593{ 2594 switch (opc) { 2595 case INDEX_op_add_vec: 2596 case INDEX_op_sub_vec: 2597 case INDEX_op_and_vec: 2598 case INDEX_op_or_vec: 2599 case INDEX_op_xor_vec: 2600 case INDEX_op_andc_vec: 2601 case INDEX_op_orc_vec: 2602 case INDEX_op_neg_vec: 2603 case INDEX_op_abs_vec: 2604 case INDEX_op_not_vec: 2605 case INDEX_op_cmp_vec: 2606 case INDEX_op_shli_vec: 2607 case INDEX_op_shri_vec: 2608 case INDEX_op_sari_vec: 2609 case INDEX_op_ssadd_vec: 2610 case INDEX_op_sssub_vec: 2611 case INDEX_op_usadd_vec: 2612 case INDEX_op_ussub_vec: 2613 case INDEX_op_shlv_vec: 2614 case INDEX_op_bitsel_vec: 2615 return 1; 2616 case INDEX_op_rotli_vec: 2617 case INDEX_op_shrv_vec: 2618 case INDEX_op_sarv_vec: 2619 case INDEX_op_rotlv_vec: 2620 case INDEX_op_rotrv_vec: 2621 return -1; 2622 case INDEX_op_mul_vec: 2623 case INDEX_op_smax_vec: 2624 case INDEX_op_smin_vec: 2625 case INDEX_op_umax_vec: 2626 case INDEX_op_umin_vec: 2627 return vece < MO_64; 2628 2629 default: 2630 return 0; 2631 } 2632} 2633 2634void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 2635 TCGArg a0, ...) 2636{ 2637 va_list va; 2638 TCGv_vec v0, v1, v2, t1, t2, c1; 2639 TCGArg a2; 2640 2641 va_start(va, a0); 2642 v0 = temp_tcgv_vec(arg_temp(a0)); 2643 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 2644 a2 = va_arg(va, TCGArg); 2645 va_end(va); 2646 2647 switch (opc) { 2648 case INDEX_op_rotli_vec: 2649 t1 = tcg_temp_new_vec(type); 2650 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1)); 2651 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece, 2652 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2); 2653 tcg_temp_free_vec(t1); 2654 break; 2655 2656 case INDEX_op_shrv_vec: 2657 case INDEX_op_sarv_vec: 2658 /* Right shifts are negative left shifts for AArch64. */ 2659 v2 = temp_tcgv_vec(arg_temp(a2)); 2660 t1 = tcg_temp_new_vec(type); 2661 tcg_gen_neg_vec(vece, t1, v2); 2662 opc = (opc == INDEX_op_shrv_vec 2663 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec); 2664 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0), 2665 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2666 tcg_temp_free_vec(t1); 2667 break; 2668 2669 case INDEX_op_rotlv_vec: 2670 v2 = temp_tcgv_vec(arg_temp(a2)); 2671 t1 = tcg_temp_new_vec(type); 2672 c1 = tcg_constant_vec(type, vece, 8 << vece); 2673 tcg_gen_sub_vec(vece, t1, v2, c1); 2674 /* Right shifts are negative left shifts for AArch64. */ 2675 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2676 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2677 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0), 2678 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 2679 tcg_gen_or_vec(vece, v0, v0, t1); 2680 tcg_temp_free_vec(t1); 2681 break; 2682 2683 case INDEX_op_rotrv_vec: 2684 v2 = temp_tcgv_vec(arg_temp(a2)); 2685 t1 = tcg_temp_new_vec(type); 2686 t2 = tcg_temp_new_vec(type); 2687 c1 = tcg_constant_vec(type, vece, 8 << vece); 2688 tcg_gen_neg_vec(vece, t1, v2); 2689 tcg_gen_sub_vec(vece, t2, c1, v2); 2690 /* Right shifts are negative left shifts for AArch64. */ 2691 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2692 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2693 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2), 2694 tcgv_vec_arg(v1), tcgv_vec_arg(t2)); 2695 tcg_gen_or_vec(vece, v0, t1, t2); 2696 tcg_temp_free_vec(t1); 2697 tcg_temp_free_vec(t2); 2698 break; 2699 2700 default: 2701 g_assert_not_reached(); 2702 } 2703} 2704 2705static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 2706{ 2707 switch (op) { 2708 case INDEX_op_goto_ptr: 2709 return C_O0_I1(r); 2710 2711 case INDEX_op_ld8u_i32: 2712 case INDEX_op_ld8s_i32: 2713 case INDEX_op_ld16u_i32: 2714 case INDEX_op_ld16s_i32: 2715 case INDEX_op_ld_i32: 2716 case INDEX_op_ld8u_i64: 2717 case INDEX_op_ld8s_i64: 2718 case INDEX_op_ld16u_i64: 2719 case INDEX_op_ld16s_i64: 2720 case INDEX_op_ld32u_i64: 2721 case INDEX_op_ld32s_i64: 2722 case INDEX_op_ld_i64: 2723 case INDEX_op_neg_i32: 2724 case INDEX_op_neg_i64: 2725 case INDEX_op_not_i32: 2726 case INDEX_op_not_i64: 2727 case INDEX_op_bswap16_i32: 2728 case INDEX_op_bswap32_i32: 2729 case INDEX_op_bswap16_i64: 2730 case INDEX_op_bswap32_i64: 2731 case INDEX_op_bswap64_i64: 2732 case INDEX_op_ext8s_i32: 2733 case INDEX_op_ext16s_i32: 2734 case INDEX_op_ext8u_i32: 2735 case INDEX_op_ext16u_i32: 2736 case INDEX_op_ext8s_i64: 2737 case INDEX_op_ext16s_i64: 2738 case INDEX_op_ext32s_i64: 2739 case INDEX_op_ext8u_i64: 2740 case INDEX_op_ext16u_i64: 2741 case INDEX_op_ext32u_i64: 2742 case INDEX_op_ext_i32_i64: 2743 case INDEX_op_extu_i32_i64: 2744 case INDEX_op_extract_i32: 2745 case INDEX_op_extract_i64: 2746 case INDEX_op_sextract_i32: 2747 case INDEX_op_sextract_i64: 2748 return C_O1_I1(r, r); 2749 2750 case INDEX_op_st8_i32: 2751 case INDEX_op_st16_i32: 2752 case INDEX_op_st_i32: 2753 case INDEX_op_st8_i64: 2754 case INDEX_op_st16_i64: 2755 case INDEX_op_st32_i64: 2756 case INDEX_op_st_i64: 2757 return C_O0_I2(rZ, r); 2758 2759 case INDEX_op_add_i32: 2760 case INDEX_op_add_i64: 2761 case INDEX_op_sub_i32: 2762 case INDEX_op_sub_i64: 2763 case INDEX_op_setcond_i32: 2764 case INDEX_op_setcond_i64: 2765 return C_O1_I2(r, r, rA); 2766 2767 case INDEX_op_mul_i32: 2768 case INDEX_op_mul_i64: 2769 case INDEX_op_div_i32: 2770 case INDEX_op_div_i64: 2771 case INDEX_op_divu_i32: 2772 case INDEX_op_divu_i64: 2773 case INDEX_op_rem_i32: 2774 case INDEX_op_rem_i64: 2775 case INDEX_op_remu_i32: 2776 case INDEX_op_remu_i64: 2777 case INDEX_op_muluh_i64: 2778 case INDEX_op_mulsh_i64: 2779 return C_O1_I2(r, r, r); 2780 2781 case INDEX_op_and_i32: 2782 case INDEX_op_and_i64: 2783 case INDEX_op_or_i32: 2784 case INDEX_op_or_i64: 2785 case INDEX_op_xor_i32: 2786 case INDEX_op_xor_i64: 2787 case INDEX_op_andc_i32: 2788 case INDEX_op_andc_i64: 2789 case INDEX_op_orc_i32: 2790 case INDEX_op_orc_i64: 2791 case INDEX_op_eqv_i32: 2792 case INDEX_op_eqv_i64: 2793 return C_O1_I2(r, r, rL); 2794 2795 case INDEX_op_shl_i32: 2796 case INDEX_op_shr_i32: 2797 case INDEX_op_sar_i32: 2798 case INDEX_op_rotl_i32: 2799 case INDEX_op_rotr_i32: 2800 case INDEX_op_shl_i64: 2801 case INDEX_op_shr_i64: 2802 case INDEX_op_sar_i64: 2803 case INDEX_op_rotl_i64: 2804 case INDEX_op_rotr_i64: 2805 return C_O1_I2(r, r, ri); 2806 2807 case INDEX_op_clz_i32: 2808 case INDEX_op_ctz_i32: 2809 case INDEX_op_clz_i64: 2810 case INDEX_op_ctz_i64: 2811 return C_O1_I2(r, r, rAL); 2812 2813 case INDEX_op_brcond_i32: 2814 case INDEX_op_brcond_i64: 2815 return C_O0_I2(r, rA); 2816 2817 case INDEX_op_movcond_i32: 2818 case INDEX_op_movcond_i64: 2819 return C_O1_I4(r, r, rA, rZ, rZ); 2820 2821 case INDEX_op_qemu_ld_i32: 2822 case INDEX_op_qemu_ld_i64: 2823 return C_O1_I1(r, l); 2824 case INDEX_op_qemu_st_i32: 2825 case INDEX_op_qemu_st_i64: 2826 return C_O0_I2(lZ, l); 2827 2828 case INDEX_op_deposit_i32: 2829 case INDEX_op_deposit_i64: 2830 return C_O1_I2(r, 0, rZ); 2831 2832 case INDEX_op_extract2_i32: 2833 case INDEX_op_extract2_i64: 2834 return C_O1_I2(r, rZ, rZ); 2835 2836 case INDEX_op_add2_i32: 2837 case INDEX_op_add2_i64: 2838 case INDEX_op_sub2_i32: 2839 case INDEX_op_sub2_i64: 2840 return C_O2_I4(r, r, rZ, rZ, rA, rMZ); 2841 2842 case INDEX_op_add_vec: 2843 case INDEX_op_sub_vec: 2844 case INDEX_op_mul_vec: 2845 case INDEX_op_xor_vec: 2846 case INDEX_op_ssadd_vec: 2847 case INDEX_op_sssub_vec: 2848 case INDEX_op_usadd_vec: 2849 case INDEX_op_ussub_vec: 2850 case INDEX_op_smax_vec: 2851 case INDEX_op_smin_vec: 2852 case INDEX_op_umax_vec: 2853 case INDEX_op_umin_vec: 2854 case INDEX_op_shlv_vec: 2855 case INDEX_op_shrv_vec: 2856 case INDEX_op_sarv_vec: 2857 case INDEX_op_aa64_sshl_vec: 2858 return C_O1_I2(w, w, w); 2859 case INDEX_op_not_vec: 2860 case INDEX_op_neg_vec: 2861 case INDEX_op_abs_vec: 2862 case INDEX_op_shli_vec: 2863 case INDEX_op_shri_vec: 2864 case INDEX_op_sari_vec: 2865 return C_O1_I1(w, w); 2866 case INDEX_op_ld_vec: 2867 case INDEX_op_dupm_vec: 2868 return C_O1_I1(w, r); 2869 case INDEX_op_st_vec: 2870 return C_O0_I2(w, r); 2871 case INDEX_op_dup_vec: 2872 return C_O1_I1(w, wr); 2873 case INDEX_op_or_vec: 2874 case INDEX_op_andc_vec: 2875 return C_O1_I2(w, w, wO); 2876 case INDEX_op_and_vec: 2877 case INDEX_op_orc_vec: 2878 return C_O1_I2(w, w, wN); 2879 case INDEX_op_cmp_vec: 2880 return C_O1_I2(w, w, wZ); 2881 case INDEX_op_bitsel_vec: 2882 return C_O1_I3(w, w, w, w); 2883 case INDEX_op_aa64_sli_vec: 2884 return C_O1_I2(w, 0, w); 2885 2886 default: 2887 g_assert_not_reached(); 2888 } 2889} 2890 2891static void tcg_target_init(TCGContext *s) 2892{ 2893 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu; 2894 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu; 2895 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 2896 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 2897 2898 tcg_target_call_clobber_regs = -1ull; 2899 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19); 2900 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20); 2901 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21); 2902 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22); 2903 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23); 2904 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24); 2905 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25); 2906 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26); 2907 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27); 2908 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28); 2909 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29); 2910 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 2911 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 2912 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 2913 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 2914 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 2915 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 2916 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 2917 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 2918 2919 s->reserved_regs = 0; 2920 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 2921 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); 2922 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); 2923 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ 2924 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP); 2925} 2926 2927/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ 2928#define PUSH_SIZE ((30 - 19 + 1) * 8) 2929 2930#define FRAME_SIZE \ 2931 ((PUSH_SIZE \ 2932 + TCG_STATIC_CALL_ARGS_SIZE \ 2933 + CPU_TEMP_BUF_NLONGS * sizeof(long) \ 2934 + TCG_TARGET_STACK_ALIGN - 1) \ 2935 & ~(TCG_TARGET_STACK_ALIGN - 1)) 2936 2937/* We're expecting a 2 byte uleb128 encoded value. */ 2938QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 2939 2940/* We're expecting to use a single ADDI insn. */ 2941QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); 2942 2943static void tcg_target_qemu_prologue(TCGContext *s) 2944{ 2945 TCGReg r; 2946 2947 /* Push (FP, LR) and allocate space for all saved registers. */ 2948 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, 2949 TCG_REG_SP, -PUSH_SIZE, 1, 1); 2950 2951 /* Set up frame pointer for canonical unwinding. */ 2952 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); 2953 2954 /* Store callee-preserved regs x19..x28. */ 2955 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 2956 int ofs = (r - TCG_REG_X19 + 2) * 8; 2957 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 2958 } 2959 2960 /* Make stack space for TCG locals. */ 2961 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 2962 FRAME_SIZE - PUSH_SIZE); 2963 2964 /* Inform TCG about how to find TCG locals with register, offset, size. */ 2965 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, 2966 CPU_TEMP_BUF_NLONGS * sizeof(long)); 2967 2968#if !defined(CONFIG_SOFTMMU) 2969 if (USE_GUEST_BASE) { 2970 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); 2971 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); 2972 } 2973#endif 2974 2975 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 2976 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); 2977 2978 /* 2979 * Return path for goto_ptr. Set return value to 0, a-la exit_tb, 2980 * and fall through to the rest of the epilogue. 2981 */ 2982 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 2983 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0); 2984 2985 /* TB epilogue */ 2986 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); 2987 2988 /* Remove TCG locals stack space. */ 2989 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 2990 FRAME_SIZE - PUSH_SIZE); 2991 2992 /* Restore registers x19..x28. */ 2993 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 2994 int ofs = (r - TCG_REG_X19 + 2) * 8; 2995 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 2996 } 2997 2998 /* Pop (FP, LR), restore SP to previous frame. */ 2999 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, 3000 TCG_REG_SP, PUSH_SIZE, 0, 1); 3001 tcg_out_insn(s, 3207, RET, TCG_REG_LR); 3002} 3003 3004static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 3005{ 3006 int i; 3007 for (i = 0; i < count; ++i) { 3008 p[i] = NOP; 3009 } 3010} 3011 3012typedef struct { 3013 DebugFrameHeader h; 3014 uint8_t fde_def_cfa[4]; 3015 uint8_t fde_reg_ofs[24]; 3016} DebugFrame; 3017 3018#define ELF_HOST_MACHINE EM_AARCH64 3019 3020static const DebugFrame debug_frame = { 3021 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 3022 .h.cie.id = -1, 3023 .h.cie.version = 1, 3024 .h.cie.code_align = 1, 3025 .h.cie.data_align = 0x78, /* sleb128 -8 */ 3026 .h.cie.return_column = TCG_REG_LR, 3027 3028 /* Total FDE size does not include the "len" member. */ 3029 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 3030 3031 .fde_def_cfa = { 3032 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 3033 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 3034 (FRAME_SIZE >> 7) 3035 }, 3036 .fde_reg_ofs = { 3037 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ 3038 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ 3039 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ 3040 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ 3041 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ 3042 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ 3043 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ 3044 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ 3045 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ 3046 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ 3047 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ 3048 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ 3049 } 3050}; 3051 3052void tcg_register_jit(const void *buf, size_t buf_size) 3053{ 3054 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 3055} 3056