1/* 2 * Initial TCG Implementation for aarch64 3 * 4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH 5 * Written by Claudio Fontana 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or 8 * (at your option) any later version. 9 * 10 * See the COPYING file in the top-level directory for details. 11 */ 12 13#include "../tcg-ldst.c.inc" 14#include "../tcg-pool.c.inc" 15#include "qemu/bitops.h" 16 17/* We're going to re-use TCGType in setting of the SF bit, which controls 18 the size of the operation performed. If we know the values match, it 19 makes things much cleaner. */ 20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); 21 22#ifdef CONFIG_DEBUG_TCG 23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 24 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 25 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 26 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 27 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp", 28 29 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 30 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 31 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 32 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31", 33}; 34#endif /* CONFIG_DEBUG_TCG */ 35 36static const int tcg_target_reg_alloc_order[] = { 37 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, 38 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, 39 TCG_REG_X28, /* we will reserve this for guest_base if configured */ 40 41 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, 42 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, 43 44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, 46 47 /* X16 reserved as temporary */ 48 /* X17 reserved as temporary */ 49 /* X18 reserved by system */ 50 /* X19 reserved for AREG0 */ 51 /* X29 reserved as fp */ 52 /* X30 reserved as temporary */ 53 54 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, 55 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, 56 /* V8 - V15 are call-saved, and skipped. */ 57 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, 58 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, 59 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, 60 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, 61}; 62 63static const int tcg_target_call_iarg_regs[8] = { 64 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 65 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 66}; 67 68static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) 69{ 70 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); 71 tcg_debug_assert(slot >= 0 && slot <= 1); 72 return TCG_REG_X0 + slot; 73} 74 75#define TCG_REG_TMP0 TCG_REG_X16 76#define TCG_REG_TMP1 TCG_REG_X17 77#define TCG_REG_TMP2 TCG_REG_X30 78#define TCG_VEC_TMP0 TCG_REG_V31 79 80#define TCG_REG_GUEST_BASE TCG_REG_X28 81 82static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 83{ 84 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 85 ptrdiff_t offset = target - src_rx; 86 87 if (offset == sextract64(offset, 0, 26)) { 88 /* read instruction, mask away previous PC_REL26 parameter contents, 89 set the proper offset, then write back the instruction. */ 90 *src_rw = deposit32(*src_rw, 0, 26, offset); 91 return true; 92 } 93 return false; 94} 95 96static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 97{ 98 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 99 ptrdiff_t offset = target - src_rx; 100 101 if (offset == sextract64(offset, 0, 19)) { 102 *src_rw = deposit32(*src_rw, 5, 19, offset); 103 return true; 104 } 105 return false; 106} 107 108static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 109 intptr_t value, intptr_t addend) 110{ 111 tcg_debug_assert(addend == 0); 112 switch (type) { 113 case R_AARCH64_JUMP26: 114 case R_AARCH64_CALL26: 115 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value); 116 case R_AARCH64_CONDBR19: 117 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value); 118 default: 119 g_assert_not_reached(); 120 } 121} 122 123#define TCG_CT_CONST_AIMM 0x100 124#define TCG_CT_CONST_LIMM 0x200 125#define TCG_CT_CONST_ZERO 0x400 126#define TCG_CT_CONST_MONE 0x800 127#define TCG_CT_CONST_ORRI 0x1000 128#define TCG_CT_CONST_ANDI 0x2000 129 130#define ALL_GENERAL_REGS 0xffffffffu 131#define ALL_VECTOR_REGS 0xffffffff00000000ull 132 133/* Match a constant valid for addition (12-bit, optionally shifted). */ 134static inline bool is_aimm(uint64_t val) 135{ 136 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; 137} 138 139/* Match a constant valid for logical operations. */ 140static inline bool is_limm(uint64_t val) 141{ 142 /* Taking a simplified view of the logical immediates for now, ignoring 143 the replication that can happen across the field. Match bit patterns 144 of the forms 145 0....01....1 146 0..01..10..0 147 and their inverses. */ 148 149 /* Make things easier below, by testing the form with msb clear. */ 150 if ((int64_t)val < 0) { 151 val = ~val; 152 } 153 if (val == 0) { 154 return false; 155 } 156 val += val & -val; 157 return (val & (val - 1)) == 0; 158} 159 160/* Return true if v16 is a valid 16-bit shifted immediate. */ 161static bool is_shimm16(uint16_t v16, int *cmode, int *imm8) 162{ 163 if (v16 == (v16 & 0xff)) { 164 *cmode = 0x8; 165 *imm8 = v16 & 0xff; 166 return true; 167 } else if (v16 == (v16 & 0xff00)) { 168 *cmode = 0xa; 169 *imm8 = v16 >> 8; 170 return true; 171 } 172 return false; 173} 174 175/* Return true if v32 is a valid 32-bit shifted immediate. */ 176static bool is_shimm32(uint32_t v32, int *cmode, int *imm8) 177{ 178 if (v32 == (v32 & 0xff)) { 179 *cmode = 0x0; 180 *imm8 = v32 & 0xff; 181 return true; 182 } else if (v32 == (v32 & 0xff00)) { 183 *cmode = 0x2; 184 *imm8 = (v32 >> 8) & 0xff; 185 return true; 186 } else if (v32 == (v32 & 0xff0000)) { 187 *cmode = 0x4; 188 *imm8 = (v32 >> 16) & 0xff; 189 return true; 190 } else if (v32 == (v32 & 0xff000000)) { 191 *cmode = 0x6; 192 *imm8 = v32 >> 24; 193 return true; 194 } 195 return false; 196} 197 198/* Return true if v32 is a valid 32-bit shifting ones immediate. */ 199static bool is_soimm32(uint32_t v32, int *cmode, int *imm8) 200{ 201 if ((v32 & 0xffff00ff) == 0xff) { 202 *cmode = 0xc; 203 *imm8 = (v32 >> 8) & 0xff; 204 return true; 205 } else if ((v32 & 0xff00ffff) == 0xffff) { 206 *cmode = 0xd; 207 *imm8 = (v32 >> 16) & 0xff; 208 return true; 209 } 210 return false; 211} 212 213/* Return true if v32 is a valid float32 immediate. */ 214static bool is_fimm32(uint32_t v32, int *cmode, int *imm8) 215{ 216 if (extract32(v32, 0, 19) == 0 217 && (extract32(v32, 25, 6) == 0x20 218 || extract32(v32, 25, 6) == 0x1f)) { 219 *cmode = 0xf; 220 *imm8 = (extract32(v32, 31, 1) << 7) 221 | (extract32(v32, 25, 1) << 6) 222 | extract32(v32, 19, 6); 223 return true; 224 } 225 return false; 226} 227 228/* Return true if v64 is a valid float64 immediate. */ 229static bool is_fimm64(uint64_t v64, int *cmode, int *imm8) 230{ 231 if (extract64(v64, 0, 48) == 0 232 && (extract64(v64, 54, 9) == 0x100 233 || extract64(v64, 54, 9) == 0x0ff)) { 234 *cmode = 0xf; 235 *imm8 = (extract64(v64, 63, 1) << 7) 236 | (extract64(v64, 54, 1) << 6) 237 | extract64(v64, 48, 6); 238 return true; 239 } 240 return false; 241} 242 243/* 244 * Return non-zero if v32 can be formed by MOVI+ORR. 245 * Place the parameters for MOVI in (cmode, imm8). 246 * Return the cmode for ORR; the imm8 can be had via extraction from v32. 247 */ 248static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8) 249{ 250 int i; 251 252 for (i = 6; i > 0; i -= 2) { 253 /* Mask out one byte we can add with ORR. */ 254 uint32_t tmp = v32 & ~(0xffu << (i * 4)); 255 if (is_shimm32(tmp, cmode, imm8) || 256 is_soimm32(tmp, cmode, imm8)) { 257 break; 258 } 259 } 260 return i; 261} 262 263/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */ 264static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8) 265{ 266 if (v32 == deposit32(v32, 16, 16, v32)) { 267 return is_shimm16(v32, cmode, imm8); 268 } else { 269 return is_shimm32(v32, cmode, imm8); 270 } 271} 272 273static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece) 274{ 275 if (ct & TCG_CT_CONST) { 276 return 1; 277 } 278 if (type == TCG_TYPE_I32) { 279 val = (int32_t)val; 280 } 281 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { 282 return 1; 283 } 284 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { 285 return 1; 286 } 287 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 288 return 1; 289 } 290 if ((ct & TCG_CT_CONST_MONE) && val == -1) { 291 return 1; 292 } 293 294 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) { 295 case 0: 296 break; 297 case TCG_CT_CONST_ANDI: 298 val = ~val; 299 /* fallthru */ 300 case TCG_CT_CONST_ORRI: 301 if (val == deposit64(val, 32, 32, val)) { 302 int cmode, imm8; 303 return is_shimm1632(val, &cmode, &imm8); 304 } 305 break; 306 default: 307 /* Both bits should not be set for the same insn. */ 308 g_assert_not_reached(); 309 } 310 311 return 0; 312} 313 314enum aarch64_cond_code { 315 COND_EQ = 0x0, 316 COND_NE = 0x1, 317 COND_CS = 0x2, /* Unsigned greater or equal */ 318 COND_HS = COND_CS, /* ALIAS greater or equal */ 319 COND_CC = 0x3, /* Unsigned less than */ 320 COND_LO = COND_CC, /* ALIAS Lower */ 321 COND_MI = 0x4, /* Negative */ 322 COND_PL = 0x5, /* Zero or greater */ 323 COND_VS = 0x6, /* Overflow */ 324 COND_VC = 0x7, /* No overflow */ 325 COND_HI = 0x8, /* Unsigned greater than */ 326 COND_LS = 0x9, /* Unsigned less or equal */ 327 COND_GE = 0xa, 328 COND_LT = 0xb, 329 COND_GT = 0xc, 330 COND_LE = 0xd, 331 COND_AL = 0xe, 332 COND_NV = 0xf, /* behaves like COND_AL here */ 333}; 334 335static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { 336 [TCG_COND_EQ] = COND_EQ, 337 [TCG_COND_NE] = COND_NE, 338 [TCG_COND_LT] = COND_LT, 339 [TCG_COND_GE] = COND_GE, 340 [TCG_COND_LE] = COND_LE, 341 [TCG_COND_GT] = COND_GT, 342 /* unsigned */ 343 [TCG_COND_LTU] = COND_LO, 344 [TCG_COND_GTU] = COND_HI, 345 [TCG_COND_GEU] = COND_HS, 346 [TCG_COND_LEU] = COND_LS, 347}; 348 349typedef enum { 350 LDST_ST = 0, /* store */ 351 LDST_LD = 1, /* load */ 352 LDST_LD_S_X = 2, /* load and sign-extend into Xt */ 353 LDST_LD_S_W = 3, /* load and sign-extend into Wt */ 354} AArch64LdstType; 355 356/* We encode the format of the insn into the beginning of the name, so that 357 we can have the preprocessor help "typecheck" the insn vs the output 358 function. Arm didn't provide us with nice names for the formats, so we 359 use the section number of the architecture reference manual in which the 360 instruction group is described. */ 361typedef enum { 362 /* Compare and branch (immediate). */ 363 I3201_CBZ = 0x34000000, 364 I3201_CBNZ = 0x35000000, 365 366 /* Conditional branch (immediate). */ 367 I3202_B_C = 0x54000000, 368 369 /* Unconditional branch (immediate). */ 370 I3206_B = 0x14000000, 371 I3206_BL = 0x94000000, 372 373 /* Unconditional branch (register). */ 374 I3207_BR = 0xd61f0000, 375 I3207_BLR = 0xd63f0000, 376 I3207_RET = 0xd65f0000, 377 378 /* AdvSIMD load/store single structure. */ 379 I3303_LD1R = 0x0d40c000, 380 381 /* Load literal for loading the address at pc-relative offset */ 382 I3305_LDR = 0x58000000, 383 I3305_LDR_v64 = 0x5c000000, 384 I3305_LDR_v128 = 0x9c000000, 385 386 /* Load/store exclusive. */ 387 I3306_LDXP = 0xc8600000, 388 I3306_STXP = 0xc8200000, 389 390 /* Load/store register. Described here as 3.3.12, but the helper 391 that emits them can transform to 3.3.10 or 3.3.13. */ 392 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, 393 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, 394 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, 395 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, 396 397 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, 398 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, 399 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, 400 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, 401 402 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, 403 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, 404 405 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, 406 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, 407 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, 408 409 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30, 410 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30, 411 412 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30, 413 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30, 414 415 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30, 416 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30, 417 418 I3312_TO_I3310 = 0x00200800, 419 I3312_TO_I3313 = 0x01000000, 420 421 /* Load/store register pair instructions. */ 422 I3314_LDP = 0x28400000, 423 I3314_STP = 0x28000000, 424 425 /* Add/subtract immediate instructions. */ 426 I3401_ADDI = 0x11000000, 427 I3401_ADDSI = 0x31000000, 428 I3401_SUBI = 0x51000000, 429 I3401_SUBSI = 0x71000000, 430 431 /* Bitfield instructions. */ 432 I3402_BFM = 0x33000000, 433 I3402_SBFM = 0x13000000, 434 I3402_UBFM = 0x53000000, 435 436 /* Extract instruction. */ 437 I3403_EXTR = 0x13800000, 438 439 /* Logical immediate instructions. */ 440 I3404_ANDI = 0x12000000, 441 I3404_ORRI = 0x32000000, 442 I3404_EORI = 0x52000000, 443 I3404_ANDSI = 0x72000000, 444 445 /* Move wide immediate instructions. */ 446 I3405_MOVN = 0x12800000, 447 I3405_MOVZ = 0x52800000, 448 I3405_MOVK = 0x72800000, 449 450 /* PC relative addressing instructions. */ 451 I3406_ADR = 0x10000000, 452 I3406_ADRP = 0x90000000, 453 454 /* Add/subtract extended register instructions. */ 455 I3501_ADD = 0x0b200000, 456 457 /* Add/subtract shifted register instructions (without a shift). */ 458 I3502_ADD = 0x0b000000, 459 I3502_ADDS = 0x2b000000, 460 I3502_SUB = 0x4b000000, 461 I3502_SUBS = 0x6b000000, 462 463 /* Add/subtract shifted register instructions (with a shift). */ 464 I3502S_ADD_LSL = I3502_ADD, 465 466 /* Add/subtract with carry instructions. */ 467 I3503_ADC = 0x1a000000, 468 I3503_SBC = 0x5a000000, 469 470 /* Conditional select instructions. */ 471 I3506_CSEL = 0x1a800000, 472 I3506_CSINC = 0x1a800400, 473 I3506_CSINV = 0x5a800000, 474 I3506_CSNEG = 0x5a800400, 475 476 /* Data-processing (1 source) instructions. */ 477 I3507_CLZ = 0x5ac01000, 478 I3507_RBIT = 0x5ac00000, 479 I3507_REV = 0x5ac00000, /* + size << 10 */ 480 481 /* Data-processing (2 source) instructions. */ 482 I3508_LSLV = 0x1ac02000, 483 I3508_LSRV = 0x1ac02400, 484 I3508_ASRV = 0x1ac02800, 485 I3508_RORV = 0x1ac02c00, 486 I3508_SMULH = 0x9b407c00, 487 I3508_UMULH = 0x9bc07c00, 488 I3508_UDIV = 0x1ac00800, 489 I3508_SDIV = 0x1ac00c00, 490 491 /* Data-processing (3 source) instructions. */ 492 I3509_MADD = 0x1b000000, 493 I3509_MSUB = 0x1b008000, 494 495 /* Logical shifted register instructions (without a shift). */ 496 I3510_AND = 0x0a000000, 497 I3510_BIC = 0x0a200000, 498 I3510_ORR = 0x2a000000, 499 I3510_ORN = 0x2a200000, 500 I3510_EOR = 0x4a000000, 501 I3510_EON = 0x4a200000, 502 I3510_ANDS = 0x6a000000, 503 504 /* Logical shifted register instructions (with a shift). */ 505 I3502S_AND_LSR = I3510_AND | (1 << 22), 506 507 /* AdvSIMD copy */ 508 I3605_DUP = 0x0e000400, 509 I3605_INS = 0x4e001c00, 510 I3605_UMOV = 0x0e003c00, 511 512 /* AdvSIMD modified immediate */ 513 I3606_MOVI = 0x0f000400, 514 I3606_MVNI = 0x2f000400, 515 I3606_BIC = 0x2f001400, 516 I3606_ORR = 0x0f001400, 517 518 /* AdvSIMD scalar shift by immediate */ 519 I3609_SSHR = 0x5f000400, 520 I3609_SSRA = 0x5f001400, 521 I3609_SHL = 0x5f005400, 522 I3609_USHR = 0x7f000400, 523 I3609_USRA = 0x7f001400, 524 I3609_SLI = 0x7f005400, 525 526 /* AdvSIMD scalar three same */ 527 I3611_SQADD = 0x5e200c00, 528 I3611_SQSUB = 0x5e202c00, 529 I3611_CMGT = 0x5e203400, 530 I3611_CMGE = 0x5e203c00, 531 I3611_SSHL = 0x5e204400, 532 I3611_ADD = 0x5e208400, 533 I3611_CMTST = 0x5e208c00, 534 I3611_UQADD = 0x7e200c00, 535 I3611_UQSUB = 0x7e202c00, 536 I3611_CMHI = 0x7e203400, 537 I3611_CMHS = 0x7e203c00, 538 I3611_USHL = 0x7e204400, 539 I3611_SUB = 0x7e208400, 540 I3611_CMEQ = 0x7e208c00, 541 542 /* AdvSIMD scalar two-reg misc */ 543 I3612_CMGT0 = 0x5e208800, 544 I3612_CMEQ0 = 0x5e209800, 545 I3612_CMLT0 = 0x5e20a800, 546 I3612_ABS = 0x5e20b800, 547 I3612_CMGE0 = 0x7e208800, 548 I3612_CMLE0 = 0x7e209800, 549 I3612_NEG = 0x7e20b800, 550 551 /* AdvSIMD shift by immediate */ 552 I3614_SSHR = 0x0f000400, 553 I3614_SSRA = 0x0f001400, 554 I3614_SHL = 0x0f005400, 555 I3614_SLI = 0x2f005400, 556 I3614_USHR = 0x2f000400, 557 I3614_USRA = 0x2f001400, 558 559 /* AdvSIMD three same. */ 560 I3616_ADD = 0x0e208400, 561 I3616_AND = 0x0e201c00, 562 I3616_BIC = 0x0e601c00, 563 I3616_BIF = 0x2ee01c00, 564 I3616_BIT = 0x2ea01c00, 565 I3616_BSL = 0x2e601c00, 566 I3616_EOR = 0x2e201c00, 567 I3616_MUL = 0x0e209c00, 568 I3616_ORR = 0x0ea01c00, 569 I3616_ORN = 0x0ee01c00, 570 I3616_SUB = 0x2e208400, 571 I3616_CMGT = 0x0e203400, 572 I3616_CMGE = 0x0e203c00, 573 I3616_CMTST = 0x0e208c00, 574 I3616_CMHI = 0x2e203400, 575 I3616_CMHS = 0x2e203c00, 576 I3616_CMEQ = 0x2e208c00, 577 I3616_SMAX = 0x0e206400, 578 I3616_SMIN = 0x0e206c00, 579 I3616_SSHL = 0x0e204400, 580 I3616_SQADD = 0x0e200c00, 581 I3616_SQSUB = 0x0e202c00, 582 I3616_UMAX = 0x2e206400, 583 I3616_UMIN = 0x2e206c00, 584 I3616_UQADD = 0x2e200c00, 585 I3616_UQSUB = 0x2e202c00, 586 I3616_USHL = 0x2e204400, 587 588 /* AdvSIMD two-reg misc. */ 589 I3617_CMGT0 = 0x0e208800, 590 I3617_CMEQ0 = 0x0e209800, 591 I3617_CMLT0 = 0x0e20a800, 592 I3617_CMGE0 = 0x2e208800, 593 I3617_CMLE0 = 0x2e209800, 594 I3617_NOT = 0x2e205800, 595 I3617_ABS = 0x0e20b800, 596 I3617_NEG = 0x2e20b800, 597 598 /* System instructions. */ 599 NOP = 0xd503201f, 600 DMB_ISH = 0xd50338bf, 601 DMB_LD = 0x00000100, 602 DMB_ST = 0x00000200, 603 604 BTI_C = 0xd503245f, 605 BTI_J = 0xd503249f, 606 BTI_JC = 0xd50324df, 607} AArch64Insn; 608 609static inline uint32_t tcg_in32(TCGContext *s) 610{ 611 uint32_t v = *(uint32_t *)s->code_ptr; 612 return v; 613} 614 615/* Emit an opcode with "type-checking" of the format. */ 616#define tcg_out_insn(S, FMT, OP, ...) \ 617 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) 618 619static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q, 620 TCGReg rt, TCGReg rn, unsigned size) 621{ 622 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30)); 623} 624 625static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, 626 int imm19, TCGReg rt) 627{ 628 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt); 629} 630 631static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs, 632 TCGReg rt, TCGReg rt2, TCGReg rn) 633{ 634 tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt); 635} 636 637static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, 638 TCGReg rt, int imm19) 639{ 640 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); 641} 642 643static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, 644 TCGCond c, int imm19) 645{ 646 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); 647} 648 649static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) 650{ 651 tcg_out32(s, insn | (imm26 & 0x03ffffff)); 652} 653 654static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) 655{ 656 tcg_out32(s, insn | rn << 5); 657} 658 659static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, 660 TCGReg r1, TCGReg r2, TCGReg rn, 661 tcg_target_long ofs, bool pre, bool w) 662{ 663 insn |= 1u << 31; /* ext */ 664 insn |= pre << 24; 665 insn |= w << 23; 666 667 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); 668 insn |= (ofs & (0x7f << 3)) << (15 - 3); 669 670 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); 671} 672 673static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, 674 TCGReg rd, TCGReg rn, uint64_t aimm) 675{ 676 if (aimm > 0xfff) { 677 tcg_debug_assert((aimm & 0xfff) == 0); 678 aimm >>= 12; 679 tcg_debug_assert(aimm <= 0xfff); 680 aimm |= 1 << 12; /* apply LSL 12 */ 681 } 682 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); 683} 684 685/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 686 (Logical immediate). Both insn groups have N, IMMR and IMMS fields 687 that feed the DecodeBitMasks pseudo function. */ 688static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, 689 TCGReg rd, TCGReg rn, int n, int immr, int imms) 690{ 691 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 692 | rn << 5 | rd); 693} 694 695#define tcg_out_insn_3404 tcg_out_insn_3402 696 697static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, 698 TCGReg rd, TCGReg rn, TCGReg rm, int imms) 699{ 700 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 701 | rn << 5 | rd); 702} 703 704/* This function is used for the Move (wide immediate) instruction group. 705 Note that SHIFT is a full shift count, not the 2 bit HW field. */ 706static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, 707 TCGReg rd, uint16_t half, unsigned shift) 708{ 709 tcg_debug_assert((shift & ~0x30) == 0); 710 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); 711} 712 713static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, 714 TCGReg rd, int64_t disp) 715{ 716 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); 717} 718 719static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn, 720 TCGType sf, TCGReg rd, TCGReg rn, 721 TCGReg rm, int opt, int imm3) 722{ 723 tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 | 724 imm3 << 10 | rn << 5 | rd); 725} 726 727/* This function is for both 3.5.2 (Add/Subtract shifted register), for 728 the rare occasion when we actually want to supply a shift amount. */ 729static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, 730 TCGType ext, TCGReg rd, TCGReg rn, 731 TCGReg rm, int imm6) 732{ 733 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); 734} 735 736/* This function is for 3.5.2 (Add/subtract shifted register), 737 and 3.5.10 (Logical shifted register), for the vast majorty of cases 738 when we don't want to apply a shift. Thus it can also be used for 739 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */ 740static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, 741 TCGReg rd, TCGReg rn, TCGReg rm) 742{ 743 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); 744} 745 746#define tcg_out_insn_3503 tcg_out_insn_3502 747#define tcg_out_insn_3508 tcg_out_insn_3502 748#define tcg_out_insn_3510 tcg_out_insn_3502 749 750static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, 751 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) 752{ 753 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd 754 | tcg_cond_to_aarch64[c] << 12); 755} 756 757static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, 758 TCGReg rd, TCGReg rn) 759{ 760 tcg_out32(s, insn | ext << 31 | rn << 5 | rd); 761} 762 763static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, 764 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) 765{ 766 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); 767} 768 769static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q, 770 TCGReg rd, TCGReg rn, int dst_idx, int src_idx) 771{ 772 /* Note that bit 11 set means general register input. Therefore 773 we can handle both register sets with one function. */ 774 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11) 775 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5); 776} 777 778static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q, 779 TCGReg rd, bool op, int cmode, uint8_t imm8) 780{ 781 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f) 782 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5); 783} 784 785static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn, 786 TCGReg rd, TCGReg rn, unsigned immhb) 787{ 788 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f)); 789} 790 791static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn, 792 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 793{ 794 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16 795 | (rn & 0x1f) << 5 | (rd & 0x1f)); 796} 797 798static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn, 799 unsigned size, TCGReg rd, TCGReg rn) 800{ 801 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f)); 802} 803 804static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q, 805 TCGReg rd, TCGReg rn, unsigned immhb) 806{ 807 tcg_out32(s, insn | q << 30 | immhb << 16 808 | (rn & 0x1f) << 5 | (rd & 0x1f)); 809} 810 811static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q, 812 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 813{ 814 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16 815 | (rn & 0x1f) << 5 | (rd & 0x1f)); 816} 817 818static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q, 819 unsigned size, TCGReg rd, TCGReg rn) 820{ 821 tcg_out32(s, insn | q << 30 | (size << 22) 822 | (rn & 0x1f) << 5 | (rd & 0x1f)); 823} 824 825static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, 826 TCGReg rd, TCGReg base, TCGType ext, 827 TCGReg regoff) 828{ 829 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 830 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | 831 0x4000 | ext << 13 | base << 5 | (rd & 0x1f)); 832} 833 834static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, 835 TCGReg rd, TCGReg rn, intptr_t offset) 836{ 837 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f)); 838} 839 840static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, 841 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) 842{ 843 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 844 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 845 | rn << 5 | (rd & 0x1f)); 846} 847 848static void tcg_out_bti(TCGContext *s, AArch64Insn insn) 849{ 850 /* 851 * While BTI insns are nops on hosts without FEAT_BTI, 852 * there is no point in emitting them in that case either. 853 */ 854 if (cpuinfo & CPUINFO_BTI) { 855 tcg_out32(s, insn); 856 } 857} 858 859/* Register to register move using ORR (shifted register with no shift). */ 860static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) 861{ 862 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); 863} 864 865/* Register to register move using ADDI (move to/from SP). */ 866static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) 867{ 868 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); 869} 870 871/* This function is used for the Logical (immediate) instruction group. 872 The value of LIMM must satisfy IS_LIMM. See the comment above about 873 only supporting simplified logical immediates. */ 874static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, 875 TCGReg rd, TCGReg rn, uint64_t limm) 876{ 877 unsigned h, l, r, c; 878 879 tcg_debug_assert(is_limm(limm)); 880 881 h = clz64(limm); 882 l = ctz64(limm); 883 if (l == 0) { 884 r = 0; /* form 0....01....1 */ 885 c = ctz64(~limm) - 1; 886 if (h == 0) { 887 r = clz64(~limm); /* form 1..10..01..1 */ 888 c += r; 889 } 890 } else { 891 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ 892 c = r - h - 1; 893 } 894 if (ext == TCG_TYPE_I32) { 895 r &= 31; 896 c &= 31; 897 } 898 899 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); 900} 901 902static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 903 TCGReg rd, int64_t v64) 904{ 905 bool q = type == TCG_TYPE_V128; 906 int cmode, imm8, i; 907 908 /* Test all bytes equal first. */ 909 if (vece == MO_8) { 910 imm8 = (uint8_t)v64; 911 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8); 912 return; 913 } 914 915 /* 916 * Test all bytes 0x00 or 0xff second. This can match cases that 917 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below. 918 */ 919 for (i = imm8 = 0; i < 8; i++) { 920 uint8_t byte = v64 >> (i * 8); 921 if (byte == 0xff) { 922 imm8 |= 1 << i; 923 } else if (byte != 0) { 924 goto fail_bytes; 925 } 926 } 927 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8); 928 return; 929 fail_bytes: 930 931 /* 932 * Tests for various replications. For each element width, if we 933 * cannot find an expansion there's no point checking a larger 934 * width because we already know by replication it cannot match. 935 */ 936 if (vece == MO_16) { 937 uint16_t v16 = v64; 938 939 if (is_shimm16(v16, &cmode, &imm8)) { 940 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 941 return; 942 } 943 if (is_shimm16(~v16, &cmode, &imm8)) { 944 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 945 return; 946 } 947 948 /* 949 * Otherwise, all remaining constants can be loaded in two insns: 950 * rd = v16 & 0xff, rd |= v16 & 0xff00. 951 */ 952 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff); 953 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8); 954 return; 955 } else if (vece == MO_32) { 956 uint32_t v32 = v64; 957 uint32_t n32 = ~v32; 958 959 if (is_shimm32(v32, &cmode, &imm8) || 960 is_soimm32(v32, &cmode, &imm8) || 961 is_fimm32(v32, &cmode, &imm8)) { 962 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 963 return; 964 } 965 if (is_shimm32(n32, &cmode, &imm8) || 966 is_soimm32(n32, &cmode, &imm8)) { 967 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 968 return; 969 } 970 971 /* 972 * Restrict the set of constants to those we can load with 973 * two instructions. Others we load from the pool. 974 */ 975 i = is_shimm32_pair(v32, &cmode, &imm8); 976 if (i) { 977 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 978 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8)); 979 return; 980 } 981 i = is_shimm32_pair(n32, &cmode, &imm8); 982 if (i) { 983 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 984 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8)); 985 return; 986 } 987 } else if (is_fimm64(v64, &cmode, &imm8)) { 988 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8); 989 return; 990 } 991 992 /* 993 * As a last resort, load from the constant pool. Sadly there 994 * is no LD1R (literal), so store the full 16-byte vector. 995 */ 996 if (type == TCG_TYPE_V128) { 997 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64); 998 tcg_out_insn(s, 3305, LDR_v128, 0, rd); 999 } else { 1000 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0); 1001 tcg_out_insn(s, 3305, LDR_v64, 0, rd); 1002 } 1003} 1004 1005static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 1006 TCGReg rd, TCGReg rs) 1007{ 1008 int is_q = type - TCG_TYPE_V64; 1009 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0); 1010 return true; 1011} 1012 1013static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 1014 TCGReg r, TCGReg base, intptr_t offset) 1015{ 1016 TCGReg temp = TCG_REG_TMP0; 1017 1018 if (offset < -0xffffff || offset > 0xffffff) { 1019 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset); 1020 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base); 1021 base = temp; 1022 } else { 1023 AArch64Insn add_insn = I3401_ADDI; 1024 1025 if (offset < 0) { 1026 add_insn = I3401_SUBI; 1027 offset = -offset; 1028 } 1029 if (offset & 0xfff000) { 1030 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000); 1031 base = temp; 1032 } 1033 if (offset & 0xfff) { 1034 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff); 1035 base = temp; 1036 } 1037 } 1038 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece); 1039 return true; 1040} 1041 1042static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 1043 tcg_target_long value) 1044{ 1045 tcg_target_long svalue = value; 1046 tcg_target_long ivalue = ~value; 1047 tcg_target_long t0, t1, t2; 1048 int s0, s1; 1049 AArch64Insn opc; 1050 1051 switch (type) { 1052 case TCG_TYPE_I32: 1053 case TCG_TYPE_I64: 1054 tcg_debug_assert(rd < 32); 1055 break; 1056 default: 1057 g_assert_not_reached(); 1058 } 1059 1060 /* For 32-bit values, discard potential garbage in value. For 64-bit 1061 values within [2**31, 2**32-1], we can create smaller sequences by 1062 interpreting this as a negative 32-bit number, while ensuring that 1063 the high 32 bits are cleared by setting SF=0. */ 1064 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { 1065 svalue = (int32_t)value; 1066 value = (uint32_t)value; 1067 ivalue = (uint32_t)ivalue; 1068 type = TCG_TYPE_I32; 1069 } 1070 1071 /* Speed things up by handling the common case of small positive 1072 and negative values specially. */ 1073 if ((value & ~0xffffull) == 0) { 1074 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); 1075 return; 1076 } else if ((ivalue & ~0xffffull) == 0) { 1077 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); 1078 return; 1079 } 1080 1081 /* Check for bitfield immediates. For the benefit of 32-bit quantities, 1082 use the sign-extended value. That lets us match rotated values such 1083 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ 1084 if (is_limm(svalue)) { 1085 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); 1086 return; 1087 } 1088 1089 /* Look for host pointer values within 4G of the PC. This happens 1090 often when loading pointers to QEMU's own data structures. */ 1091 if (type == TCG_TYPE_I64) { 1092 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr); 1093 tcg_target_long disp = value - src_rx; 1094 if (disp == sextract64(disp, 0, 21)) { 1095 tcg_out_insn(s, 3406, ADR, rd, disp); 1096 return; 1097 } 1098 disp = (value >> 12) - (src_rx >> 12); 1099 if (disp == sextract64(disp, 0, 21)) { 1100 tcg_out_insn(s, 3406, ADRP, rd, disp); 1101 if (value & 0xfff) { 1102 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); 1103 } 1104 return; 1105 } 1106 } 1107 1108 /* Would it take fewer insns to begin with MOVN? */ 1109 if (ctpop64(value) >= 32) { 1110 t0 = ivalue; 1111 opc = I3405_MOVN; 1112 } else { 1113 t0 = value; 1114 opc = I3405_MOVZ; 1115 } 1116 s0 = ctz64(t0) & (63 & -16); 1117 t1 = t0 & ~(0xffffull << s0); 1118 s1 = ctz64(t1) & (63 & -16); 1119 t2 = t1 & ~(0xffffull << s1); 1120 if (t2 == 0) { 1121 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0); 1122 if (t1 != 0) { 1123 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1); 1124 } 1125 return; 1126 } 1127 1128 /* For more than 2 insns, dump it into the constant pool. */ 1129 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0); 1130 tcg_out_insn(s, 3305, LDR, 0, rd); 1131} 1132 1133static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) 1134{ 1135 return false; 1136} 1137 1138static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, 1139 tcg_target_long imm) 1140{ 1141 /* This function is only used for passing structs by reference. */ 1142 g_assert_not_reached(); 1143} 1144 1145/* Define something more legible for general use. */ 1146#define tcg_out_ldst_r tcg_out_insn_3310 1147 1148static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd, 1149 TCGReg rn, intptr_t offset, int lgsize) 1150{ 1151 /* If the offset is naturally aligned and in range, then we can 1152 use the scaled uimm12 encoding */ 1153 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) { 1154 uintptr_t scaled_uimm = offset >> lgsize; 1155 if (scaled_uimm <= 0xfff) { 1156 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); 1157 return; 1158 } 1159 } 1160 1161 /* Small signed offsets can use the unscaled encoding. */ 1162 if (offset >= -256 && offset < 256) { 1163 tcg_out_insn_3312(s, insn, rd, rn, offset); 1164 return; 1165 } 1166 1167 /* Worst-case scenario, move offset to temp register, use reg offset. */ 1168 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset); 1169 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0); 1170} 1171 1172static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 1173{ 1174 if (ret == arg) { 1175 return true; 1176 } 1177 switch (type) { 1178 case TCG_TYPE_I32: 1179 case TCG_TYPE_I64: 1180 if (ret < 32 && arg < 32) { 1181 tcg_out_movr(s, type, ret, arg); 1182 break; 1183 } else if (ret < 32) { 1184 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0); 1185 break; 1186 } else if (arg < 32) { 1187 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0); 1188 break; 1189 } 1190 /* FALLTHRU */ 1191 1192 case TCG_TYPE_V64: 1193 tcg_debug_assert(ret >= 32 && arg >= 32); 1194 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg); 1195 break; 1196 case TCG_TYPE_V128: 1197 tcg_debug_assert(ret >= 32 && arg >= 32); 1198 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg); 1199 break; 1200 1201 default: 1202 g_assert_not_reached(); 1203 } 1204 return true; 1205} 1206 1207static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1208 TCGReg base, intptr_t ofs) 1209{ 1210 AArch64Insn insn; 1211 int lgsz; 1212 1213 switch (type) { 1214 case TCG_TYPE_I32: 1215 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS); 1216 lgsz = 2; 1217 break; 1218 case TCG_TYPE_I64: 1219 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD); 1220 lgsz = 3; 1221 break; 1222 case TCG_TYPE_V64: 1223 insn = I3312_LDRVD; 1224 lgsz = 3; 1225 break; 1226 case TCG_TYPE_V128: 1227 insn = I3312_LDRVQ; 1228 lgsz = 4; 1229 break; 1230 default: 1231 g_assert_not_reached(); 1232 } 1233 tcg_out_ldst(s, insn, ret, base, ofs, lgsz); 1234} 1235 1236static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src, 1237 TCGReg base, intptr_t ofs) 1238{ 1239 AArch64Insn insn; 1240 int lgsz; 1241 1242 switch (type) { 1243 case TCG_TYPE_I32: 1244 insn = (src < 32 ? I3312_STRW : I3312_STRVS); 1245 lgsz = 2; 1246 break; 1247 case TCG_TYPE_I64: 1248 insn = (src < 32 ? I3312_STRX : I3312_STRVD); 1249 lgsz = 3; 1250 break; 1251 case TCG_TYPE_V64: 1252 insn = I3312_STRVD; 1253 lgsz = 3; 1254 break; 1255 case TCG_TYPE_V128: 1256 insn = I3312_STRVQ; 1257 lgsz = 4; 1258 break; 1259 default: 1260 g_assert_not_reached(); 1261 } 1262 tcg_out_ldst(s, insn, src, base, ofs, lgsz); 1263} 1264 1265static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1266 TCGReg base, intptr_t ofs) 1267{ 1268 if (type <= TCG_TYPE_I64 && val == 0) { 1269 tcg_out_st(s, type, TCG_REG_XZR, base, ofs); 1270 return true; 1271 } 1272 return false; 1273} 1274 1275static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, 1276 TCGReg rn, unsigned int a, unsigned int b) 1277{ 1278 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); 1279} 1280 1281static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, 1282 TCGReg rn, unsigned int a, unsigned int b) 1283{ 1284 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); 1285} 1286 1287static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, 1288 TCGReg rn, unsigned int a, unsigned int b) 1289{ 1290 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); 1291} 1292 1293static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, 1294 TCGReg rn, TCGReg rm, unsigned int a) 1295{ 1296 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); 1297} 1298 1299static inline void tcg_out_shl(TCGContext *s, TCGType ext, 1300 TCGReg rd, TCGReg rn, unsigned int m) 1301{ 1302 int bits = ext ? 64 : 32; 1303 int max = bits - 1; 1304 tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max); 1305} 1306 1307static inline void tcg_out_shr(TCGContext *s, TCGType ext, 1308 TCGReg rd, TCGReg rn, unsigned int m) 1309{ 1310 int max = ext ? 63 : 31; 1311 tcg_out_ubfm(s, ext, rd, rn, m & max, max); 1312} 1313 1314static inline void tcg_out_sar(TCGContext *s, TCGType ext, 1315 TCGReg rd, TCGReg rn, unsigned int m) 1316{ 1317 int max = ext ? 63 : 31; 1318 tcg_out_sbfm(s, ext, rd, rn, m & max, max); 1319} 1320 1321static inline void tcg_out_rotr(TCGContext *s, TCGType ext, 1322 TCGReg rd, TCGReg rn, unsigned int m) 1323{ 1324 int max = ext ? 63 : 31; 1325 tcg_out_extr(s, ext, rd, rn, rn, m & max); 1326} 1327 1328static inline void tcg_out_rotl(TCGContext *s, TCGType ext, 1329 TCGReg rd, TCGReg rn, unsigned int m) 1330{ 1331 int max = ext ? 63 : 31; 1332 tcg_out_extr(s, ext, rd, rn, rn, -m & max); 1333} 1334 1335static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, 1336 TCGReg rn, unsigned lsb, unsigned width) 1337{ 1338 unsigned size = ext ? 64 : 32; 1339 unsigned a = (size - lsb) & (size - 1); 1340 unsigned b = width - 1; 1341 tcg_out_bfm(s, ext, rd, rn, a, b); 1342} 1343 1344static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, 1345 tcg_target_long b, bool const_b) 1346{ 1347 if (const_b) { 1348 /* Using CMP or CMN aliases. */ 1349 if (b >= 0) { 1350 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); 1351 } else { 1352 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); 1353 } 1354 } else { 1355 /* Using CMP alias SUBS wzr, Wn, Wm */ 1356 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); 1357 } 1358} 1359 1360static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) 1361{ 1362 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1363 tcg_debug_assert(offset == sextract64(offset, 0, 26)); 1364 tcg_out_insn(s, 3206, B, offset); 1365} 1366 1367static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target) 1368{ 1369 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1370 if (offset == sextract64(offset, 0, 26)) { 1371 tcg_out_insn(s, 3206, BL, offset); 1372 } else { 1373 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target); 1374 tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0); 1375 } 1376} 1377 1378static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 1379 const TCGHelperInfo *info) 1380{ 1381 tcg_out_call_int(s, target); 1382} 1383 1384static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) 1385{ 1386 if (!l->has_value) { 1387 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); 1388 tcg_out_insn(s, 3206, B, 0); 1389 } else { 1390 tcg_out_goto(s, l->u.value_ptr); 1391 } 1392} 1393 1394static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, 1395 TCGArg b, bool b_const, TCGLabel *l) 1396{ 1397 intptr_t offset; 1398 bool need_cmp; 1399 1400 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) { 1401 need_cmp = false; 1402 } else { 1403 need_cmp = true; 1404 tcg_out_cmp(s, ext, a, b, b_const); 1405 } 1406 1407 if (!l->has_value) { 1408 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1409 offset = tcg_in32(s) >> 5; 1410 } else { 1411 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2; 1412 tcg_debug_assert(offset == sextract64(offset, 0, 19)); 1413 } 1414 1415 if (need_cmp) { 1416 tcg_out_insn(s, 3202, B_C, c, offset); 1417 } else if (c == TCG_COND_EQ) { 1418 tcg_out_insn(s, 3201, CBZ, ext, a, offset); 1419 } else { 1420 tcg_out_insn(s, 3201, CBNZ, ext, a, offset); 1421 } 1422} 1423 1424static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits, 1425 TCGReg rd, TCGReg rn) 1426{ 1427 /* REV, REV16, REV32 */ 1428 tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn); 1429} 1430 1431static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits, 1432 TCGReg rd, TCGReg rn) 1433{ 1434 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ 1435 int bits = (8 << s_bits) - 1; 1436 tcg_out_sbfm(s, ext, rd, rn, 0, bits); 1437} 1438 1439static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn) 1440{ 1441 tcg_out_sxt(s, type, MO_8, rd, rn); 1442} 1443 1444static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn) 1445{ 1446 tcg_out_sxt(s, type, MO_16, rd, rn); 1447} 1448 1449static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn) 1450{ 1451 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn); 1452} 1453 1454static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn) 1455{ 1456 tcg_out_ext32s(s, rd, rn); 1457} 1458 1459static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits, 1460 TCGReg rd, TCGReg rn) 1461{ 1462 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ 1463 int bits = (8 << s_bits) - 1; 1464 tcg_out_ubfm(s, 0, rd, rn, 0, bits); 1465} 1466 1467static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn) 1468{ 1469 tcg_out_uxt(s, MO_8, rd, rn); 1470} 1471 1472static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn) 1473{ 1474 tcg_out_uxt(s, MO_16, rd, rn); 1475} 1476 1477static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn) 1478{ 1479 tcg_out_movr(s, TCG_TYPE_I32, rd, rn); 1480} 1481 1482static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn) 1483{ 1484 tcg_out_ext32u(s, rd, rn); 1485} 1486 1487static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) 1488{ 1489 tcg_out_mov(s, TCG_TYPE_I32, rd, rn); 1490} 1491 1492static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, 1493 TCGReg rn, int64_t aimm) 1494{ 1495 if (aimm >= 0) { 1496 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); 1497 } else { 1498 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); 1499 } 1500} 1501 1502static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, 1503 TCGReg rh, TCGReg al, TCGReg ah, 1504 tcg_target_long bl, tcg_target_long bh, 1505 bool const_bl, bool const_bh, bool sub) 1506{ 1507 TCGReg orig_rl = rl; 1508 AArch64Insn insn; 1509 1510 if (rl == ah || (!const_bh && rl == bh)) { 1511 rl = TCG_REG_TMP0; 1512 } 1513 1514 if (const_bl) { 1515 if (bl < 0) { 1516 bl = -bl; 1517 insn = sub ? I3401_ADDSI : I3401_SUBSI; 1518 } else { 1519 insn = sub ? I3401_SUBSI : I3401_ADDSI; 1520 } 1521 1522 if (unlikely(al == TCG_REG_XZR)) { 1523 /* ??? We want to allow al to be zero for the benefit of 1524 negation via subtraction. However, that leaves open the 1525 possibility of adding 0+const in the low part, and the 1526 immediate add instructions encode XSP not XZR. Don't try 1527 anything more elaborate here than loading another zero. */ 1528 al = TCG_REG_TMP0; 1529 tcg_out_movi(s, ext, al, 0); 1530 } 1531 tcg_out_insn_3401(s, insn, ext, rl, al, bl); 1532 } else { 1533 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); 1534 } 1535 1536 insn = I3503_ADC; 1537 if (const_bh) { 1538 /* Note that the only two constants we support are 0 and -1, and 1539 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ 1540 if ((bh != 0) ^ sub) { 1541 insn = I3503_SBC; 1542 } 1543 bh = TCG_REG_XZR; 1544 } else if (sub) { 1545 insn = I3503_SBC; 1546 } 1547 tcg_out_insn_3503(s, insn, ext, rh, ah, bh); 1548 1549 tcg_out_mov(s, ext, orig_rl, rl); 1550} 1551 1552static inline void tcg_out_mb(TCGContext *s, TCGArg a0) 1553{ 1554 static const uint32_t sync[] = { 1555 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST, 1556 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST, 1557 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1558 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD, 1559 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1560 }; 1561 tcg_out32(s, sync[a0 & TCG_MO_ALL]); 1562} 1563 1564static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, 1565 TCGReg a0, TCGArg b, bool const_b, bool is_ctz) 1566{ 1567 TCGReg a1 = a0; 1568 if (is_ctz) { 1569 a1 = TCG_REG_TMP0; 1570 tcg_out_insn(s, 3507, RBIT, ext, a1, a0); 1571 } 1572 if (const_b && b == (ext ? 64 : 32)) { 1573 tcg_out_insn(s, 3507, CLZ, ext, d, a1); 1574 } else { 1575 AArch64Insn sel = I3506_CSEL; 1576 1577 tcg_out_cmp(s, ext, a0, 0, 1); 1578 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1); 1579 1580 if (const_b) { 1581 if (b == -1) { 1582 b = TCG_REG_XZR; 1583 sel = I3506_CSINV; 1584 } else if (b == 0) { 1585 b = TCG_REG_XZR; 1586 } else { 1587 tcg_out_movi(s, ext, d, b); 1588 b = d; 1589 } 1590 } 1591 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE); 1592 } 1593} 1594 1595typedef struct { 1596 TCGReg base; 1597 TCGReg index; 1598 TCGType index_ext; 1599 TCGAtomAlign aa; 1600} HostAddress; 1601 1602bool tcg_target_has_memory_bswap(MemOp memop) 1603{ 1604 return false; 1605} 1606 1607static const TCGLdstHelperParam ldst_helper_param = { 1608 .ntmp = 1, .tmp = { TCG_REG_TMP0 } 1609}; 1610 1611static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1612{ 1613 MemOp opc = get_memop(lb->oi); 1614 1615 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1616 return false; 1617 } 1618 1619 tcg_out_ld_helper_args(s, lb, &ldst_helper_param); 1620 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); 1621 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); 1622 tcg_out_goto(s, lb->raddr); 1623 return true; 1624} 1625 1626static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1627{ 1628 MemOp opc = get_memop(lb->oi); 1629 1630 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1631 return false; 1632 } 1633 1634 tcg_out_st_helper_args(s, lb, &ldst_helper_param); 1635 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]); 1636 tcg_out_goto(s, lb->raddr); 1637 return true; 1638} 1639 1640/* We expect to use a 7-bit scaled negative offset from ENV. */ 1641#define MIN_TLB_MASK_TABLE_OFS -512 1642 1643/* 1644 * For system-mode, perform the TLB load and compare. 1645 * For user-mode, perform any required alignment tests. 1646 * In both cases, return a TCGLabelQemuLdst structure if the slow path 1647 * is required and fill in @h with the host address for the fast path. 1648 */ 1649static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, 1650 TCGReg addr_reg, MemOpIdx oi, 1651 bool is_ld) 1652{ 1653 TCGType addr_type = s->addr_type; 1654 TCGLabelQemuLdst *ldst = NULL; 1655 MemOp opc = get_memop(oi); 1656 MemOp s_bits = opc & MO_SIZE; 1657 unsigned a_mask; 1658 1659 h->aa = atom_and_align_for_opc(s, opc, 1660 have_lse2 ? MO_ATOM_WITHIN16 1661 : MO_ATOM_IFALIGN, 1662 s_bits == MO_128); 1663 a_mask = (1 << h->aa.align) - 1; 1664 1665 if (tcg_use_softmmu) { 1666 unsigned s_mask = (1u << s_bits) - 1; 1667 unsigned mem_index = get_mmuidx(oi); 1668 TCGReg addr_adj; 1669 TCGType mask_type; 1670 uint64_t compare_mask; 1671 1672 ldst = new_ldst_label(s); 1673 ldst->is_ld = is_ld; 1674 ldst->oi = oi; 1675 ldst->addrlo_reg = addr_reg; 1676 1677 mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32 1678 ? TCG_TYPE_I64 : TCG_TYPE_I32); 1679 1680 /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */ 1681 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); 1682 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); 1683 tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0, 1684 tlb_mask_table_ofs(s, mem_index), 1, 0); 1685 1686 /* Extract the TLB index from the address into X0. */ 1687 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, 1688 TCG_REG_TMP0, TCG_REG_TMP0, addr_reg, 1689 s->page_bits - CPU_TLB_ENTRY_BITS); 1690 1691 /* Add the tlb_table pointer, forming the CPUTLBEntry address. */ 1692 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0); 1693 1694 /* Load the tlb comparator into TMP0, and the fast path addend. */ 1695 QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN); 1696 tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1, 1697 is_ld ? offsetof(CPUTLBEntry, addr_read) 1698 : offsetof(CPUTLBEntry, addr_write)); 1699 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, 1700 offsetof(CPUTLBEntry, addend)); 1701 1702 /* 1703 * For aligned accesses, we check the first byte and include 1704 * the alignment bits within the address. For unaligned access, 1705 * we check that we don't cross pages using the address of the 1706 * last byte of the access. 1707 */ 1708 if (a_mask >= s_mask) { 1709 addr_adj = addr_reg; 1710 } else { 1711 addr_adj = TCG_REG_TMP2; 1712 tcg_out_insn(s, 3401, ADDI, addr_type, 1713 addr_adj, addr_reg, s_mask - a_mask); 1714 } 1715 compare_mask = (uint64_t)s->page_mask | a_mask; 1716 1717 /* Store the page mask part of the address into TMP2. */ 1718 tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2, 1719 addr_adj, compare_mask); 1720 1721 /* Perform the address comparison. */ 1722 tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0); 1723 1724 /* If not equal, we jump to the slow path. */ 1725 ldst->label_ptr[0] = s->code_ptr; 1726 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1727 1728 h->base = TCG_REG_TMP1; 1729 h->index = addr_reg; 1730 h->index_ext = addr_type; 1731 } else { 1732 if (a_mask) { 1733 ldst = new_ldst_label(s); 1734 1735 ldst->is_ld = is_ld; 1736 ldst->oi = oi; 1737 ldst->addrlo_reg = addr_reg; 1738 1739 /* tst addr, #mask */ 1740 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask); 1741 1742 /* b.ne slow_path */ 1743 ldst->label_ptr[0] = s->code_ptr; 1744 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1745 } 1746 1747 if (guest_base || addr_type == TCG_TYPE_I32) { 1748 h->base = TCG_REG_GUEST_BASE; 1749 h->index = addr_reg; 1750 h->index_ext = addr_type; 1751 } else { 1752 h->base = addr_reg; 1753 h->index = TCG_REG_XZR; 1754 h->index_ext = TCG_TYPE_I64; 1755 } 1756 } 1757 1758 return ldst; 1759} 1760 1761static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, 1762 TCGReg data_r, HostAddress h) 1763{ 1764 switch (memop & MO_SSIZE) { 1765 case MO_UB: 1766 tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index); 1767 break; 1768 case MO_SB: 1769 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, 1770 data_r, h.base, h.index_ext, h.index); 1771 break; 1772 case MO_UW: 1773 tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index); 1774 break; 1775 case MO_SW: 1776 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), 1777 data_r, h.base, h.index_ext, h.index); 1778 break; 1779 case MO_UL: 1780 tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index); 1781 break; 1782 case MO_SL: 1783 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index); 1784 break; 1785 case MO_UQ: 1786 tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index); 1787 break; 1788 default: 1789 g_assert_not_reached(); 1790 } 1791} 1792 1793static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, 1794 TCGReg data_r, HostAddress h) 1795{ 1796 switch (memop & MO_SIZE) { 1797 case MO_8: 1798 tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index); 1799 break; 1800 case MO_16: 1801 tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index); 1802 break; 1803 case MO_32: 1804 tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index); 1805 break; 1806 case MO_64: 1807 tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index); 1808 break; 1809 default: 1810 g_assert_not_reached(); 1811 } 1812} 1813 1814static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1815 MemOpIdx oi, TCGType data_type) 1816{ 1817 TCGLabelQemuLdst *ldst; 1818 HostAddress h; 1819 1820 ldst = prepare_host_addr(s, &h, addr_reg, oi, true); 1821 tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h); 1822 1823 if (ldst) { 1824 ldst->type = data_type; 1825 ldst->datalo_reg = data_reg; 1826 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1827 } 1828} 1829 1830static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1831 MemOpIdx oi, TCGType data_type) 1832{ 1833 TCGLabelQemuLdst *ldst; 1834 HostAddress h; 1835 1836 ldst = prepare_host_addr(s, &h, addr_reg, oi, false); 1837 tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h); 1838 1839 if (ldst) { 1840 ldst->type = data_type; 1841 ldst->datalo_reg = data_reg; 1842 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1843 } 1844} 1845 1846static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, 1847 TCGReg addr_reg, MemOpIdx oi, bool is_ld) 1848{ 1849 TCGLabelQemuLdst *ldst; 1850 HostAddress h; 1851 TCGReg base; 1852 bool use_pair; 1853 1854 ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld); 1855 1856 /* Compose the final address, as LDP/STP have no indexing. */ 1857 if (h.index == TCG_REG_XZR) { 1858 base = h.base; 1859 } else { 1860 base = TCG_REG_TMP2; 1861 if (h.index_ext == TCG_TYPE_I32) { 1862 /* add base, base, index, uxtw */ 1863 tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base, 1864 h.base, h.index, MO_32, 0); 1865 } else { 1866 /* add base, base, index */ 1867 tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index); 1868 } 1869 } 1870 1871 use_pair = h.aa.atom < MO_128 || have_lse2; 1872 1873 if (!use_pair) { 1874 tcg_insn_unit *branch = NULL; 1875 TCGReg ll, lh, sl, sh; 1876 1877 /* 1878 * If we have already checked for 16-byte alignment, that's all 1879 * we need. Otherwise we have determined that misaligned atomicity 1880 * may be handled with two 8-byte loads. 1881 */ 1882 if (h.aa.align < MO_128) { 1883 /* 1884 * TODO: align should be MO_64, so we only need test bit 3, 1885 * which means we could use TBNZ instead of ANDS+B_C. 1886 */ 1887 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15); 1888 branch = s->code_ptr; 1889 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1890 use_pair = true; 1891 } 1892 1893 if (is_ld) { 1894 /* 1895 * 16-byte atomicity without LSE2 requires LDXP+STXP loop: 1896 * ldxp lo, hi, [base] 1897 * stxp t0, lo, hi, [base] 1898 * cbnz t0, .-8 1899 * Require no overlap between data{lo,hi} and base. 1900 */ 1901 if (datalo == base || datahi == base) { 1902 tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base); 1903 base = TCG_REG_TMP2; 1904 } 1905 ll = sl = datalo; 1906 lh = sh = datahi; 1907 } else { 1908 /* 1909 * 16-byte atomicity without LSE2 requires LDXP+STXP loop: 1910 * 1: ldxp t0, t1, [base] 1911 * stxp t0, lo, hi, [base] 1912 * cbnz t0, 1b 1913 */ 1914 tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1); 1915 ll = TCG_REG_TMP0; 1916 lh = TCG_REG_TMP1; 1917 sl = datalo; 1918 sh = datahi; 1919 } 1920 1921 tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base); 1922 tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base); 1923 tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2); 1924 1925 if (use_pair) { 1926 /* "b .+8", branching across the one insn of use_pair. */ 1927 tcg_out_insn(s, 3206, B, 2); 1928 reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr)); 1929 } 1930 } 1931 1932 if (use_pair) { 1933 if (is_ld) { 1934 tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0); 1935 } else { 1936 tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0); 1937 } 1938 } 1939 1940 if (ldst) { 1941 ldst->type = TCG_TYPE_I128; 1942 ldst->datalo_reg = datalo; 1943 ldst->datahi_reg = datahi; 1944 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1945 } 1946} 1947 1948static const tcg_insn_unit *tb_ret_addr; 1949 1950static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) 1951{ 1952 const tcg_insn_unit *target; 1953 ptrdiff_t offset; 1954 1955 /* Reuse the zeroing that exists for goto_ptr. */ 1956 if (a0 == 0) { 1957 target = tcg_code_gen_epilogue; 1958 } else { 1959 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); 1960 target = tb_ret_addr; 1961 } 1962 1963 offset = tcg_pcrel_diff(s, target) >> 2; 1964 if (offset == sextract64(offset, 0, 26)) { 1965 tcg_out_insn(s, 3206, B, offset); 1966 } else { 1967 /* 1968 * Only x16/x17 generate BTI type Jump (2), 1969 * other registers generate BTI type Jump|Call (3). 1970 */ 1971 QEMU_BUILD_BUG_ON(TCG_REG_TMP0 != TCG_REG_X16); 1972 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target); 1973 tcg_out_insn(s, 3207, BR, TCG_REG_TMP0); 1974 } 1975} 1976 1977static void tcg_out_goto_tb(TCGContext *s, int which) 1978{ 1979 /* 1980 * Direct branch, or indirect address load, will be patched 1981 * by tb_target_set_jmp_target. Assert indirect load offset 1982 * in range early, regardless of direct branch distance. 1983 */ 1984 intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which)); 1985 tcg_debug_assert(i_off == sextract64(i_off, 0, 21)); 1986 1987 set_jmp_insn_offset(s, which); 1988 tcg_out32(s, I3206_B); 1989 tcg_out_insn(s, 3207, BR, TCG_REG_TMP0); 1990 set_jmp_reset_offset(s, which); 1991 tcg_out_bti(s, BTI_J); 1992} 1993 1994void tb_target_set_jmp_target(const TranslationBlock *tb, int n, 1995 uintptr_t jmp_rx, uintptr_t jmp_rw) 1996{ 1997 uintptr_t d_addr = tb->jmp_target_addr[n]; 1998 ptrdiff_t d_offset = d_addr - jmp_rx; 1999 tcg_insn_unit insn; 2000 2001 /* Either directly branch, or indirect branch load. */ 2002 if (d_offset == sextract64(d_offset, 0, 28)) { 2003 insn = deposit32(I3206_B, 0, 26, d_offset >> 2); 2004 } else { 2005 uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n]; 2006 ptrdiff_t i_offset = i_addr - jmp_rx; 2007 2008 /* Note that we asserted this in range in tcg_out_goto_tb. */ 2009 insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2); 2010 } 2011 qatomic_set((uint32_t *)jmp_rw, insn); 2012 flush_idcache_range(jmp_rx, jmp_rw, 4); 2013} 2014 2015static void tcg_out_op(TCGContext *s, TCGOpcode opc, 2016 const TCGArg args[TCG_MAX_OP_ARGS], 2017 const int const_args[TCG_MAX_OP_ARGS]) 2018{ 2019 /* 99% of the time, we can signal the use of extension registers 2020 by looking to see if the opcode handles 64-bit data. */ 2021 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0; 2022 2023 /* Hoist the loads of the most common arguments. */ 2024 TCGArg a0 = args[0]; 2025 TCGArg a1 = args[1]; 2026 TCGArg a2 = args[2]; 2027 int c2 = const_args[2]; 2028 2029 /* Some operands are defined with "rZ" constraint, a register or 2030 the zero register. These need not actually test args[I] == 0. */ 2031#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) 2032 2033 switch (opc) { 2034 case INDEX_op_goto_ptr: 2035 tcg_out_insn(s, 3207, BR, a0); 2036 break; 2037 2038 case INDEX_op_br: 2039 tcg_out_goto_label(s, arg_label(a0)); 2040 break; 2041 2042 case INDEX_op_ld8u_i32: 2043 case INDEX_op_ld8u_i64: 2044 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0); 2045 break; 2046 case INDEX_op_ld8s_i32: 2047 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0); 2048 break; 2049 case INDEX_op_ld8s_i64: 2050 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0); 2051 break; 2052 case INDEX_op_ld16u_i32: 2053 case INDEX_op_ld16u_i64: 2054 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1); 2055 break; 2056 case INDEX_op_ld16s_i32: 2057 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1); 2058 break; 2059 case INDEX_op_ld16s_i64: 2060 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1); 2061 break; 2062 case INDEX_op_ld_i32: 2063 case INDEX_op_ld32u_i64: 2064 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2); 2065 break; 2066 case INDEX_op_ld32s_i64: 2067 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2); 2068 break; 2069 case INDEX_op_ld_i64: 2070 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3); 2071 break; 2072 2073 case INDEX_op_st8_i32: 2074 case INDEX_op_st8_i64: 2075 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0); 2076 break; 2077 case INDEX_op_st16_i32: 2078 case INDEX_op_st16_i64: 2079 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1); 2080 break; 2081 case INDEX_op_st_i32: 2082 case INDEX_op_st32_i64: 2083 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2); 2084 break; 2085 case INDEX_op_st_i64: 2086 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3); 2087 break; 2088 2089 case INDEX_op_add_i32: 2090 a2 = (int32_t)a2; 2091 /* FALLTHRU */ 2092 case INDEX_op_add_i64: 2093 if (c2) { 2094 tcg_out_addsubi(s, ext, a0, a1, a2); 2095 } else { 2096 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); 2097 } 2098 break; 2099 2100 case INDEX_op_sub_i32: 2101 a2 = (int32_t)a2; 2102 /* FALLTHRU */ 2103 case INDEX_op_sub_i64: 2104 if (c2) { 2105 tcg_out_addsubi(s, ext, a0, a1, -a2); 2106 } else { 2107 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); 2108 } 2109 break; 2110 2111 case INDEX_op_neg_i64: 2112 case INDEX_op_neg_i32: 2113 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); 2114 break; 2115 2116 case INDEX_op_and_i32: 2117 a2 = (int32_t)a2; 2118 /* FALLTHRU */ 2119 case INDEX_op_and_i64: 2120 if (c2) { 2121 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); 2122 } else { 2123 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); 2124 } 2125 break; 2126 2127 case INDEX_op_andc_i32: 2128 a2 = (int32_t)a2; 2129 /* FALLTHRU */ 2130 case INDEX_op_andc_i64: 2131 if (c2) { 2132 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); 2133 } else { 2134 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); 2135 } 2136 break; 2137 2138 case INDEX_op_or_i32: 2139 a2 = (int32_t)a2; 2140 /* FALLTHRU */ 2141 case INDEX_op_or_i64: 2142 if (c2) { 2143 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); 2144 } else { 2145 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); 2146 } 2147 break; 2148 2149 case INDEX_op_orc_i32: 2150 a2 = (int32_t)a2; 2151 /* FALLTHRU */ 2152 case INDEX_op_orc_i64: 2153 if (c2) { 2154 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); 2155 } else { 2156 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); 2157 } 2158 break; 2159 2160 case INDEX_op_xor_i32: 2161 a2 = (int32_t)a2; 2162 /* FALLTHRU */ 2163 case INDEX_op_xor_i64: 2164 if (c2) { 2165 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); 2166 } else { 2167 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); 2168 } 2169 break; 2170 2171 case INDEX_op_eqv_i32: 2172 a2 = (int32_t)a2; 2173 /* FALLTHRU */ 2174 case INDEX_op_eqv_i64: 2175 if (c2) { 2176 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); 2177 } else { 2178 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); 2179 } 2180 break; 2181 2182 case INDEX_op_not_i64: 2183 case INDEX_op_not_i32: 2184 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); 2185 break; 2186 2187 case INDEX_op_mul_i64: 2188 case INDEX_op_mul_i32: 2189 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); 2190 break; 2191 2192 case INDEX_op_div_i64: 2193 case INDEX_op_div_i32: 2194 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); 2195 break; 2196 case INDEX_op_divu_i64: 2197 case INDEX_op_divu_i32: 2198 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); 2199 break; 2200 2201 case INDEX_op_rem_i64: 2202 case INDEX_op_rem_i32: 2203 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2); 2204 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1); 2205 break; 2206 case INDEX_op_remu_i64: 2207 case INDEX_op_remu_i32: 2208 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2); 2209 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1); 2210 break; 2211 2212 case INDEX_op_shl_i64: 2213 case INDEX_op_shl_i32: 2214 if (c2) { 2215 tcg_out_shl(s, ext, a0, a1, a2); 2216 } else { 2217 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); 2218 } 2219 break; 2220 2221 case INDEX_op_shr_i64: 2222 case INDEX_op_shr_i32: 2223 if (c2) { 2224 tcg_out_shr(s, ext, a0, a1, a2); 2225 } else { 2226 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); 2227 } 2228 break; 2229 2230 case INDEX_op_sar_i64: 2231 case INDEX_op_sar_i32: 2232 if (c2) { 2233 tcg_out_sar(s, ext, a0, a1, a2); 2234 } else { 2235 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); 2236 } 2237 break; 2238 2239 case INDEX_op_rotr_i64: 2240 case INDEX_op_rotr_i32: 2241 if (c2) { 2242 tcg_out_rotr(s, ext, a0, a1, a2); 2243 } else { 2244 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); 2245 } 2246 break; 2247 2248 case INDEX_op_rotl_i64: 2249 case INDEX_op_rotl_i32: 2250 if (c2) { 2251 tcg_out_rotl(s, ext, a0, a1, a2); 2252 } else { 2253 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2); 2254 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0); 2255 } 2256 break; 2257 2258 case INDEX_op_clz_i64: 2259 case INDEX_op_clz_i32: 2260 tcg_out_cltz(s, ext, a0, a1, a2, c2, false); 2261 break; 2262 case INDEX_op_ctz_i64: 2263 case INDEX_op_ctz_i32: 2264 tcg_out_cltz(s, ext, a0, a1, a2, c2, true); 2265 break; 2266 2267 case INDEX_op_brcond_i32: 2268 a1 = (int32_t)a1; 2269 /* FALLTHRU */ 2270 case INDEX_op_brcond_i64: 2271 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); 2272 break; 2273 2274 case INDEX_op_setcond_i32: 2275 a2 = (int32_t)a2; 2276 /* FALLTHRU */ 2277 case INDEX_op_setcond_i64: 2278 tcg_out_cmp(s, ext, a1, a2, c2); 2279 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ 2280 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, 2281 TCG_REG_XZR, tcg_invert_cond(args[3])); 2282 break; 2283 2284 case INDEX_op_negsetcond_i32: 2285 a2 = (int32_t)a2; 2286 /* FALLTHRU */ 2287 case INDEX_op_negsetcond_i64: 2288 tcg_out_cmp(s, ext, a1, a2, c2); 2289 /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond). */ 2290 tcg_out_insn(s, 3506, CSINV, ext, a0, TCG_REG_XZR, 2291 TCG_REG_XZR, tcg_invert_cond(args[3])); 2292 break; 2293 2294 case INDEX_op_movcond_i32: 2295 a2 = (int32_t)a2; 2296 /* FALLTHRU */ 2297 case INDEX_op_movcond_i64: 2298 tcg_out_cmp(s, ext, a1, a2, c2); 2299 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); 2300 break; 2301 2302 case INDEX_op_qemu_ld_a32_i32: 2303 case INDEX_op_qemu_ld_a64_i32: 2304 case INDEX_op_qemu_ld_a32_i64: 2305 case INDEX_op_qemu_ld_a64_i64: 2306 tcg_out_qemu_ld(s, a0, a1, a2, ext); 2307 break; 2308 case INDEX_op_qemu_st_a32_i32: 2309 case INDEX_op_qemu_st_a64_i32: 2310 case INDEX_op_qemu_st_a32_i64: 2311 case INDEX_op_qemu_st_a64_i64: 2312 tcg_out_qemu_st(s, REG0(0), a1, a2, ext); 2313 break; 2314 case INDEX_op_qemu_ld_a32_i128: 2315 case INDEX_op_qemu_ld_a64_i128: 2316 tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true); 2317 break; 2318 case INDEX_op_qemu_st_a32_i128: 2319 case INDEX_op_qemu_st_a64_i128: 2320 tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false); 2321 break; 2322 2323 case INDEX_op_bswap64_i64: 2324 tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1); 2325 break; 2326 case INDEX_op_bswap32_i64: 2327 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2328 if (a2 & TCG_BSWAP_OS) { 2329 tcg_out_ext32s(s, a0, a0); 2330 } 2331 break; 2332 case INDEX_op_bswap32_i32: 2333 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2334 break; 2335 case INDEX_op_bswap16_i64: 2336 case INDEX_op_bswap16_i32: 2337 tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1); 2338 if (a2 & TCG_BSWAP_OS) { 2339 /* Output must be sign-extended. */ 2340 tcg_out_ext16s(s, ext, a0, a0); 2341 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 2342 /* Output must be zero-extended, but input isn't. */ 2343 tcg_out_ext16u(s, a0, a0); 2344 } 2345 break; 2346 2347 case INDEX_op_deposit_i64: 2348 case INDEX_op_deposit_i32: 2349 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]); 2350 break; 2351 2352 case INDEX_op_extract_i64: 2353 case INDEX_op_extract_i32: 2354 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2355 break; 2356 2357 case INDEX_op_sextract_i64: 2358 case INDEX_op_sextract_i32: 2359 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2360 break; 2361 2362 case INDEX_op_extract2_i64: 2363 case INDEX_op_extract2_i32: 2364 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]); 2365 break; 2366 2367 case INDEX_op_add2_i32: 2368 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2369 (int32_t)args[4], args[5], const_args[4], 2370 const_args[5], false); 2371 break; 2372 case INDEX_op_add2_i64: 2373 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2374 args[5], const_args[4], const_args[5], false); 2375 break; 2376 case INDEX_op_sub2_i32: 2377 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2378 (int32_t)args[4], args[5], const_args[4], 2379 const_args[5], true); 2380 break; 2381 case INDEX_op_sub2_i64: 2382 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2383 args[5], const_args[4], const_args[5], true); 2384 break; 2385 2386 case INDEX_op_muluh_i64: 2387 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); 2388 break; 2389 case INDEX_op_mulsh_i64: 2390 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); 2391 break; 2392 2393 case INDEX_op_mb: 2394 tcg_out_mb(s, a0); 2395 break; 2396 2397 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 2398 case INDEX_op_mov_i64: 2399 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 2400 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ 2401 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ 2402 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ 2403 case INDEX_op_ext8s_i64: 2404 case INDEX_op_ext8u_i32: 2405 case INDEX_op_ext8u_i64: 2406 case INDEX_op_ext16s_i64: 2407 case INDEX_op_ext16s_i32: 2408 case INDEX_op_ext16u_i64: 2409 case INDEX_op_ext16u_i32: 2410 case INDEX_op_ext32s_i64: 2411 case INDEX_op_ext32u_i64: 2412 case INDEX_op_ext_i32_i64: 2413 case INDEX_op_extu_i32_i64: 2414 case INDEX_op_extrl_i64_i32: 2415 default: 2416 g_assert_not_reached(); 2417 } 2418 2419#undef REG0 2420} 2421 2422static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 2423 unsigned vecl, unsigned vece, 2424 const TCGArg args[TCG_MAX_OP_ARGS], 2425 const int const_args[TCG_MAX_OP_ARGS]) 2426{ 2427 static const AArch64Insn cmp_vec_insn[16] = { 2428 [TCG_COND_EQ] = I3616_CMEQ, 2429 [TCG_COND_GT] = I3616_CMGT, 2430 [TCG_COND_GE] = I3616_CMGE, 2431 [TCG_COND_GTU] = I3616_CMHI, 2432 [TCG_COND_GEU] = I3616_CMHS, 2433 }; 2434 static const AArch64Insn cmp_scalar_insn[16] = { 2435 [TCG_COND_EQ] = I3611_CMEQ, 2436 [TCG_COND_GT] = I3611_CMGT, 2437 [TCG_COND_GE] = I3611_CMGE, 2438 [TCG_COND_GTU] = I3611_CMHI, 2439 [TCG_COND_GEU] = I3611_CMHS, 2440 }; 2441 static const AArch64Insn cmp0_vec_insn[16] = { 2442 [TCG_COND_EQ] = I3617_CMEQ0, 2443 [TCG_COND_GT] = I3617_CMGT0, 2444 [TCG_COND_GE] = I3617_CMGE0, 2445 [TCG_COND_LT] = I3617_CMLT0, 2446 [TCG_COND_LE] = I3617_CMLE0, 2447 }; 2448 static const AArch64Insn cmp0_scalar_insn[16] = { 2449 [TCG_COND_EQ] = I3612_CMEQ0, 2450 [TCG_COND_GT] = I3612_CMGT0, 2451 [TCG_COND_GE] = I3612_CMGE0, 2452 [TCG_COND_LT] = I3612_CMLT0, 2453 [TCG_COND_LE] = I3612_CMLE0, 2454 }; 2455 2456 TCGType type = vecl + TCG_TYPE_V64; 2457 unsigned is_q = vecl; 2458 bool is_scalar = !is_q && vece == MO_64; 2459 TCGArg a0, a1, a2, a3; 2460 int cmode, imm8; 2461 2462 a0 = args[0]; 2463 a1 = args[1]; 2464 a2 = args[2]; 2465 2466 switch (opc) { 2467 case INDEX_op_ld_vec: 2468 tcg_out_ld(s, type, a0, a1, a2); 2469 break; 2470 case INDEX_op_st_vec: 2471 tcg_out_st(s, type, a0, a1, a2); 2472 break; 2473 case INDEX_op_dupm_vec: 2474 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 2475 break; 2476 case INDEX_op_add_vec: 2477 if (is_scalar) { 2478 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2); 2479 } else { 2480 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2); 2481 } 2482 break; 2483 case INDEX_op_sub_vec: 2484 if (is_scalar) { 2485 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2); 2486 } else { 2487 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2); 2488 } 2489 break; 2490 case INDEX_op_mul_vec: 2491 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2); 2492 break; 2493 case INDEX_op_neg_vec: 2494 if (is_scalar) { 2495 tcg_out_insn(s, 3612, NEG, vece, a0, a1); 2496 } else { 2497 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1); 2498 } 2499 break; 2500 case INDEX_op_abs_vec: 2501 if (is_scalar) { 2502 tcg_out_insn(s, 3612, ABS, vece, a0, a1); 2503 } else { 2504 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1); 2505 } 2506 break; 2507 case INDEX_op_and_vec: 2508 if (const_args[2]) { 2509 is_shimm1632(~a2, &cmode, &imm8); 2510 if (a0 == a1) { 2511 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2512 return; 2513 } 2514 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2515 a2 = a0; 2516 } 2517 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2); 2518 break; 2519 case INDEX_op_or_vec: 2520 if (const_args[2]) { 2521 is_shimm1632(a2, &cmode, &imm8); 2522 if (a0 == a1) { 2523 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2524 return; 2525 } 2526 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2527 a2 = a0; 2528 } 2529 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2); 2530 break; 2531 case INDEX_op_andc_vec: 2532 if (const_args[2]) { 2533 is_shimm1632(a2, &cmode, &imm8); 2534 if (a0 == a1) { 2535 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2536 return; 2537 } 2538 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2539 a2 = a0; 2540 } 2541 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2); 2542 break; 2543 case INDEX_op_orc_vec: 2544 if (const_args[2]) { 2545 is_shimm1632(~a2, &cmode, &imm8); 2546 if (a0 == a1) { 2547 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2548 return; 2549 } 2550 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2551 a2 = a0; 2552 } 2553 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2); 2554 break; 2555 case INDEX_op_xor_vec: 2556 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2); 2557 break; 2558 case INDEX_op_ssadd_vec: 2559 if (is_scalar) { 2560 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2); 2561 } else { 2562 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2); 2563 } 2564 break; 2565 case INDEX_op_sssub_vec: 2566 if (is_scalar) { 2567 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2); 2568 } else { 2569 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2); 2570 } 2571 break; 2572 case INDEX_op_usadd_vec: 2573 if (is_scalar) { 2574 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2); 2575 } else { 2576 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2); 2577 } 2578 break; 2579 case INDEX_op_ussub_vec: 2580 if (is_scalar) { 2581 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2); 2582 } else { 2583 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2); 2584 } 2585 break; 2586 case INDEX_op_smax_vec: 2587 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2); 2588 break; 2589 case INDEX_op_smin_vec: 2590 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2); 2591 break; 2592 case INDEX_op_umax_vec: 2593 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2); 2594 break; 2595 case INDEX_op_umin_vec: 2596 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2); 2597 break; 2598 case INDEX_op_not_vec: 2599 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1); 2600 break; 2601 case INDEX_op_shli_vec: 2602 if (is_scalar) { 2603 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece)); 2604 } else { 2605 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece)); 2606 } 2607 break; 2608 case INDEX_op_shri_vec: 2609 if (is_scalar) { 2610 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2); 2611 } else { 2612 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2); 2613 } 2614 break; 2615 case INDEX_op_sari_vec: 2616 if (is_scalar) { 2617 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2); 2618 } else { 2619 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); 2620 } 2621 break; 2622 case INDEX_op_aa64_sli_vec: 2623 if (is_scalar) { 2624 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece)); 2625 } else { 2626 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece)); 2627 } 2628 break; 2629 case INDEX_op_shlv_vec: 2630 if (is_scalar) { 2631 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2); 2632 } else { 2633 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); 2634 } 2635 break; 2636 case INDEX_op_aa64_sshl_vec: 2637 if (is_scalar) { 2638 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2); 2639 } else { 2640 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2); 2641 } 2642 break; 2643 case INDEX_op_cmp_vec: 2644 { 2645 TCGCond cond = args[3]; 2646 AArch64Insn insn; 2647 2648 if (cond == TCG_COND_NE) { 2649 if (const_args[2]) { 2650 if (is_scalar) { 2651 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1); 2652 } else { 2653 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1); 2654 } 2655 } else { 2656 if (is_scalar) { 2657 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2); 2658 } else { 2659 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2); 2660 } 2661 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); 2662 } 2663 } else { 2664 if (const_args[2]) { 2665 if (is_scalar) { 2666 insn = cmp0_scalar_insn[cond]; 2667 if (insn) { 2668 tcg_out_insn_3612(s, insn, vece, a0, a1); 2669 break; 2670 } 2671 } else { 2672 insn = cmp0_vec_insn[cond]; 2673 if (insn) { 2674 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1); 2675 break; 2676 } 2677 } 2678 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0); 2679 a2 = TCG_VEC_TMP0; 2680 } 2681 if (is_scalar) { 2682 insn = cmp_scalar_insn[cond]; 2683 if (insn == 0) { 2684 TCGArg t; 2685 t = a1, a1 = a2, a2 = t; 2686 cond = tcg_swap_cond(cond); 2687 insn = cmp_scalar_insn[cond]; 2688 tcg_debug_assert(insn != 0); 2689 } 2690 tcg_out_insn_3611(s, insn, vece, a0, a1, a2); 2691 } else { 2692 insn = cmp_vec_insn[cond]; 2693 if (insn == 0) { 2694 TCGArg t; 2695 t = a1, a1 = a2, a2 = t; 2696 cond = tcg_swap_cond(cond); 2697 insn = cmp_vec_insn[cond]; 2698 tcg_debug_assert(insn != 0); 2699 } 2700 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2); 2701 } 2702 } 2703 } 2704 break; 2705 2706 case INDEX_op_bitsel_vec: 2707 a3 = args[3]; 2708 if (a0 == a3) { 2709 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1); 2710 } else if (a0 == a2) { 2711 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1); 2712 } else { 2713 if (a0 != a1) { 2714 tcg_out_mov(s, type, a0, a1); 2715 } 2716 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3); 2717 } 2718 break; 2719 2720 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 2721 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 2722 default: 2723 g_assert_not_reached(); 2724 } 2725} 2726 2727int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 2728{ 2729 switch (opc) { 2730 case INDEX_op_add_vec: 2731 case INDEX_op_sub_vec: 2732 case INDEX_op_and_vec: 2733 case INDEX_op_or_vec: 2734 case INDEX_op_xor_vec: 2735 case INDEX_op_andc_vec: 2736 case INDEX_op_orc_vec: 2737 case INDEX_op_neg_vec: 2738 case INDEX_op_abs_vec: 2739 case INDEX_op_not_vec: 2740 case INDEX_op_cmp_vec: 2741 case INDEX_op_shli_vec: 2742 case INDEX_op_shri_vec: 2743 case INDEX_op_sari_vec: 2744 case INDEX_op_ssadd_vec: 2745 case INDEX_op_sssub_vec: 2746 case INDEX_op_usadd_vec: 2747 case INDEX_op_ussub_vec: 2748 case INDEX_op_shlv_vec: 2749 case INDEX_op_bitsel_vec: 2750 return 1; 2751 case INDEX_op_rotli_vec: 2752 case INDEX_op_shrv_vec: 2753 case INDEX_op_sarv_vec: 2754 case INDEX_op_rotlv_vec: 2755 case INDEX_op_rotrv_vec: 2756 return -1; 2757 case INDEX_op_mul_vec: 2758 case INDEX_op_smax_vec: 2759 case INDEX_op_smin_vec: 2760 case INDEX_op_umax_vec: 2761 case INDEX_op_umin_vec: 2762 return vece < MO_64; 2763 2764 default: 2765 return 0; 2766 } 2767} 2768 2769void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 2770 TCGArg a0, ...) 2771{ 2772 va_list va; 2773 TCGv_vec v0, v1, v2, t1, t2, c1; 2774 TCGArg a2; 2775 2776 va_start(va, a0); 2777 v0 = temp_tcgv_vec(arg_temp(a0)); 2778 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 2779 a2 = va_arg(va, TCGArg); 2780 va_end(va); 2781 2782 switch (opc) { 2783 case INDEX_op_rotli_vec: 2784 t1 = tcg_temp_new_vec(type); 2785 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1)); 2786 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece, 2787 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2); 2788 tcg_temp_free_vec(t1); 2789 break; 2790 2791 case INDEX_op_shrv_vec: 2792 case INDEX_op_sarv_vec: 2793 /* Right shifts are negative left shifts for AArch64. */ 2794 v2 = temp_tcgv_vec(arg_temp(a2)); 2795 t1 = tcg_temp_new_vec(type); 2796 tcg_gen_neg_vec(vece, t1, v2); 2797 opc = (opc == INDEX_op_shrv_vec 2798 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec); 2799 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0), 2800 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2801 tcg_temp_free_vec(t1); 2802 break; 2803 2804 case INDEX_op_rotlv_vec: 2805 v2 = temp_tcgv_vec(arg_temp(a2)); 2806 t1 = tcg_temp_new_vec(type); 2807 c1 = tcg_constant_vec(type, vece, 8 << vece); 2808 tcg_gen_sub_vec(vece, t1, v2, c1); 2809 /* Right shifts are negative left shifts for AArch64. */ 2810 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2811 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2812 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0), 2813 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 2814 tcg_gen_or_vec(vece, v0, v0, t1); 2815 tcg_temp_free_vec(t1); 2816 break; 2817 2818 case INDEX_op_rotrv_vec: 2819 v2 = temp_tcgv_vec(arg_temp(a2)); 2820 t1 = tcg_temp_new_vec(type); 2821 t2 = tcg_temp_new_vec(type); 2822 c1 = tcg_constant_vec(type, vece, 8 << vece); 2823 tcg_gen_neg_vec(vece, t1, v2); 2824 tcg_gen_sub_vec(vece, t2, c1, v2); 2825 /* Right shifts are negative left shifts for AArch64. */ 2826 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2827 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2828 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2), 2829 tcgv_vec_arg(v1), tcgv_vec_arg(t2)); 2830 tcg_gen_or_vec(vece, v0, t1, t2); 2831 tcg_temp_free_vec(t1); 2832 tcg_temp_free_vec(t2); 2833 break; 2834 2835 default: 2836 g_assert_not_reached(); 2837 } 2838} 2839 2840static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 2841{ 2842 switch (op) { 2843 case INDEX_op_goto_ptr: 2844 return C_O0_I1(r); 2845 2846 case INDEX_op_ld8u_i32: 2847 case INDEX_op_ld8s_i32: 2848 case INDEX_op_ld16u_i32: 2849 case INDEX_op_ld16s_i32: 2850 case INDEX_op_ld_i32: 2851 case INDEX_op_ld8u_i64: 2852 case INDEX_op_ld8s_i64: 2853 case INDEX_op_ld16u_i64: 2854 case INDEX_op_ld16s_i64: 2855 case INDEX_op_ld32u_i64: 2856 case INDEX_op_ld32s_i64: 2857 case INDEX_op_ld_i64: 2858 case INDEX_op_neg_i32: 2859 case INDEX_op_neg_i64: 2860 case INDEX_op_not_i32: 2861 case INDEX_op_not_i64: 2862 case INDEX_op_bswap16_i32: 2863 case INDEX_op_bswap32_i32: 2864 case INDEX_op_bswap16_i64: 2865 case INDEX_op_bswap32_i64: 2866 case INDEX_op_bswap64_i64: 2867 case INDEX_op_ext8s_i32: 2868 case INDEX_op_ext16s_i32: 2869 case INDEX_op_ext8u_i32: 2870 case INDEX_op_ext16u_i32: 2871 case INDEX_op_ext8s_i64: 2872 case INDEX_op_ext16s_i64: 2873 case INDEX_op_ext32s_i64: 2874 case INDEX_op_ext8u_i64: 2875 case INDEX_op_ext16u_i64: 2876 case INDEX_op_ext32u_i64: 2877 case INDEX_op_ext_i32_i64: 2878 case INDEX_op_extu_i32_i64: 2879 case INDEX_op_extract_i32: 2880 case INDEX_op_extract_i64: 2881 case INDEX_op_sextract_i32: 2882 case INDEX_op_sextract_i64: 2883 return C_O1_I1(r, r); 2884 2885 case INDEX_op_st8_i32: 2886 case INDEX_op_st16_i32: 2887 case INDEX_op_st_i32: 2888 case INDEX_op_st8_i64: 2889 case INDEX_op_st16_i64: 2890 case INDEX_op_st32_i64: 2891 case INDEX_op_st_i64: 2892 return C_O0_I2(rZ, r); 2893 2894 case INDEX_op_add_i32: 2895 case INDEX_op_add_i64: 2896 case INDEX_op_sub_i32: 2897 case INDEX_op_sub_i64: 2898 case INDEX_op_setcond_i32: 2899 case INDEX_op_setcond_i64: 2900 case INDEX_op_negsetcond_i32: 2901 case INDEX_op_negsetcond_i64: 2902 return C_O1_I2(r, r, rA); 2903 2904 case INDEX_op_mul_i32: 2905 case INDEX_op_mul_i64: 2906 case INDEX_op_div_i32: 2907 case INDEX_op_div_i64: 2908 case INDEX_op_divu_i32: 2909 case INDEX_op_divu_i64: 2910 case INDEX_op_rem_i32: 2911 case INDEX_op_rem_i64: 2912 case INDEX_op_remu_i32: 2913 case INDEX_op_remu_i64: 2914 case INDEX_op_muluh_i64: 2915 case INDEX_op_mulsh_i64: 2916 return C_O1_I2(r, r, r); 2917 2918 case INDEX_op_and_i32: 2919 case INDEX_op_and_i64: 2920 case INDEX_op_or_i32: 2921 case INDEX_op_or_i64: 2922 case INDEX_op_xor_i32: 2923 case INDEX_op_xor_i64: 2924 case INDEX_op_andc_i32: 2925 case INDEX_op_andc_i64: 2926 case INDEX_op_orc_i32: 2927 case INDEX_op_orc_i64: 2928 case INDEX_op_eqv_i32: 2929 case INDEX_op_eqv_i64: 2930 return C_O1_I2(r, r, rL); 2931 2932 case INDEX_op_shl_i32: 2933 case INDEX_op_shr_i32: 2934 case INDEX_op_sar_i32: 2935 case INDEX_op_rotl_i32: 2936 case INDEX_op_rotr_i32: 2937 case INDEX_op_shl_i64: 2938 case INDEX_op_shr_i64: 2939 case INDEX_op_sar_i64: 2940 case INDEX_op_rotl_i64: 2941 case INDEX_op_rotr_i64: 2942 return C_O1_I2(r, r, ri); 2943 2944 case INDEX_op_clz_i32: 2945 case INDEX_op_ctz_i32: 2946 case INDEX_op_clz_i64: 2947 case INDEX_op_ctz_i64: 2948 return C_O1_I2(r, r, rAL); 2949 2950 case INDEX_op_brcond_i32: 2951 case INDEX_op_brcond_i64: 2952 return C_O0_I2(r, rA); 2953 2954 case INDEX_op_movcond_i32: 2955 case INDEX_op_movcond_i64: 2956 return C_O1_I4(r, r, rA, rZ, rZ); 2957 2958 case INDEX_op_qemu_ld_a32_i32: 2959 case INDEX_op_qemu_ld_a64_i32: 2960 case INDEX_op_qemu_ld_a32_i64: 2961 case INDEX_op_qemu_ld_a64_i64: 2962 return C_O1_I1(r, r); 2963 case INDEX_op_qemu_ld_a32_i128: 2964 case INDEX_op_qemu_ld_a64_i128: 2965 return C_O2_I1(r, r, r); 2966 case INDEX_op_qemu_st_a32_i32: 2967 case INDEX_op_qemu_st_a64_i32: 2968 case INDEX_op_qemu_st_a32_i64: 2969 case INDEX_op_qemu_st_a64_i64: 2970 return C_O0_I2(rZ, r); 2971 case INDEX_op_qemu_st_a32_i128: 2972 case INDEX_op_qemu_st_a64_i128: 2973 return C_O0_I3(rZ, rZ, r); 2974 2975 case INDEX_op_deposit_i32: 2976 case INDEX_op_deposit_i64: 2977 return C_O1_I2(r, 0, rZ); 2978 2979 case INDEX_op_extract2_i32: 2980 case INDEX_op_extract2_i64: 2981 return C_O1_I2(r, rZ, rZ); 2982 2983 case INDEX_op_add2_i32: 2984 case INDEX_op_add2_i64: 2985 case INDEX_op_sub2_i32: 2986 case INDEX_op_sub2_i64: 2987 return C_O2_I4(r, r, rZ, rZ, rA, rMZ); 2988 2989 case INDEX_op_add_vec: 2990 case INDEX_op_sub_vec: 2991 case INDEX_op_mul_vec: 2992 case INDEX_op_xor_vec: 2993 case INDEX_op_ssadd_vec: 2994 case INDEX_op_sssub_vec: 2995 case INDEX_op_usadd_vec: 2996 case INDEX_op_ussub_vec: 2997 case INDEX_op_smax_vec: 2998 case INDEX_op_smin_vec: 2999 case INDEX_op_umax_vec: 3000 case INDEX_op_umin_vec: 3001 case INDEX_op_shlv_vec: 3002 case INDEX_op_shrv_vec: 3003 case INDEX_op_sarv_vec: 3004 case INDEX_op_aa64_sshl_vec: 3005 return C_O1_I2(w, w, w); 3006 case INDEX_op_not_vec: 3007 case INDEX_op_neg_vec: 3008 case INDEX_op_abs_vec: 3009 case INDEX_op_shli_vec: 3010 case INDEX_op_shri_vec: 3011 case INDEX_op_sari_vec: 3012 return C_O1_I1(w, w); 3013 case INDEX_op_ld_vec: 3014 case INDEX_op_dupm_vec: 3015 return C_O1_I1(w, r); 3016 case INDEX_op_st_vec: 3017 return C_O0_I2(w, r); 3018 case INDEX_op_dup_vec: 3019 return C_O1_I1(w, wr); 3020 case INDEX_op_or_vec: 3021 case INDEX_op_andc_vec: 3022 return C_O1_I2(w, w, wO); 3023 case INDEX_op_and_vec: 3024 case INDEX_op_orc_vec: 3025 return C_O1_I2(w, w, wN); 3026 case INDEX_op_cmp_vec: 3027 return C_O1_I2(w, w, wZ); 3028 case INDEX_op_bitsel_vec: 3029 return C_O1_I3(w, w, w, w); 3030 case INDEX_op_aa64_sli_vec: 3031 return C_O1_I2(w, 0, w); 3032 3033 default: 3034 g_assert_not_reached(); 3035 } 3036} 3037 3038static void tcg_target_init(TCGContext *s) 3039{ 3040 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu; 3041 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu; 3042 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 3043 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 3044 3045 tcg_target_call_clobber_regs = -1ull; 3046 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19); 3047 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20); 3048 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21); 3049 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22); 3050 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23); 3051 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24); 3052 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25); 3053 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26); 3054 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27); 3055 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28); 3056 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29); 3057 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 3058 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 3059 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 3060 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 3061 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 3062 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 3063 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 3064 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 3065 3066 s->reserved_regs = 0; 3067 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 3068 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); 3069 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ 3070 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0); 3071 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); 3072 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); 3073 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0); 3074} 3075 3076/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ 3077#define PUSH_SIZE ((30 - 19 + 1) * 8) 3078 3079#define FRAME_SIZE \ 3080 ((PUSH_SIZE \ 3081 + TCG_STATIC_CALL_ARGS_SIZE \ 3082 + CPU_TEMP_BUF_NLONGS * sizeof(long) \ 3083 + TCG_TARGET_STACK_ALIGN - 1) \ 3084 & ~(TCG_TARGET_STACK_ALIGN - 1)) 3085 3086/* We're expecting a 2 byte uleb128 encoded value. */ 3087QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 3088 3089/* We're expecting to use a single ADDI insn. */ 3090QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); 3091 3092static void tcg_target_qemu_prologue(TCGContext *s) 3093{ 3094 TCGReg r; 3095 3096 tcg_out_bti(s, BTI_C); 3097 3098 /* Push (FP, LR) and allocate space for all saved registers. */ 3099 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, 3100 TCG_REG_SP, -PUSH_SIZE, 1, 1); 3101 3102 /* Set up frame pointer for canonical unwinding. */ 3103 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); 3104 3105 /* Store callee-preserved regs x19..x28. */ 3106 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 3107 int ofs = (r - TCG_REG_X19 + 2) * 8; 3108 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 3109 } 3110 3111 /* Make stack space for TCG locals. */ 3112 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 3113 FRAME_SIZE - PUSH_SIZE); 3114 3115 /* Inform TCG about how to find TCG locals with register, offset, size. */ 3116 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, 3117 CPU_TEMP_BUF_NLONGS * sizeof(long)); 3118 3119 if (!tcg_use_softmmu) { 3120 /* 3121 * Note that XZR cannot be encoded in the address base register slot, 3122 * as that actually encodes SP. Depending on the guest, we may need 3123 * to zero-extend the guest address via the address index register slot, 3124 * therefore we need to load even a zero guest base into a register. 3125 */ 3126 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); 3127 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); 3128 } 3129 3130 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 3131 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); 3132 3133 /* 3134 * Return path for goto_ptr. Set return value to 0, a-la exit_tb, 3135 * and fall through to the rest of the epilogue. 3136 */ 3137 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 3138 tcg_out_bti(s, BTI_J); 3139 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0); 3140 3141 /* TB epilogue */ 3142 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); 3143 tcg_out_bti(s, BTI_J); 3144 3145 /* Remove TCG locals stack space. */ 3146 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 3147 FRAME_SIZE - PUSH_SIZE); 3148 3149 /* Restore registers x19..x28. */ 3150 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 3151 int ofs = (r - TCG_REG_X19 + 2) * 8; 3152 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 3153 } 3154 3155 /* Pop (FP, LR), restore SP to previous frame. */ 3156 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, 3157 TCG_REG_SP, PUSH_SIZE, 0, 1); 3158 tcg_out_insn(s, 3207, RET, TCG_REG_LR); 3159} 3160 3161static void tcg_out_tb_start(TCGContext *s) 3162{ 3163 tcg_out_bti(s, BTI_J); 3164} 3165 3166static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 3167{ 3168 int i; 3169 for (i = 0; i < count; ++i) { 3170 p[i] = NOP; 3171 } 3172} 3173 3174typedef struct { 3175 DebugFrameHeader h; 3176 uint8_t fde_def_cfa[4]; 3177 uint8_t fde_reg_ofs[24]; 3178} DebugFrame; 3179 3180#define ELF_HOST_MACHINE EM_AARCH64 3181 3182static const DebugFrame debug_frame = { 3183 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 3184 .h.cie.id = -1, 3185 .h.cie.version = 1, 3186 .h.cie.code_align = 1, 3187 .h.cie.data_align = 0x78, /* sleb128 -8 */ 3188 .h.cie.return_column = TCG_REG_LR, 3189 3190 /* Total FDE size does not include the "len" member. */ 3191 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 3192 3193 .fde_def_cfa = { 3194 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 3195 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 3196 (FRAME_SIZE >> 7) 3197 }, 3198 .fde_reg_ofs = { 3199 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ 3200 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ 3201 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ 3202 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ 3203 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ 3204 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ 3205 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ 3206 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ 3207 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ 3208 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ 3209 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ 3210 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ 3211 } 3212}; 3213 3214void tcg_register_jit(const void *buf, size_t buf_size) 3215{ 3216 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 3217} 3218