1/* 2 * Initial TCG Implementation for aarch64 3 * 4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH 5 * Written by Claudio Fontana 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or 8 * (at your option) any later version. 9 * 10 * See the COPYING file in the top-level directory for details. 11 */ 12 13#include "qemu/bitops.h" 14 15/* Used for function call generation. */ 16#define TCG_REG_CALL_STACK TCG_REG_SP 17#define TCG_TARGET_STACK_ALIGN 16 18#define TCG_TARGET_CALL_STACK_OFFSET 0 19#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL 20#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL 21#ifdef CONFIG_DARWIN 22# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL 23#else 24# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN 25#endif 26#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL 27 28/* We're going to re-use TCGType in setting of the SF bit, which controls 29 the size of the operation performed. If we know the values match, it 30 makes things much cleaner. */ 31QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); 32 33#ifdef CONFIG_DEBUG_TCG 34static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 35 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 36 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 37 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 38 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp", 39 40 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 41 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 42 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 43 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31", 44}; 45#endif /* CONFIG_DEBUG_TCG */ 46 47static const int tcg_target_reg_alloc_order[] = { 48 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, 49 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, 50 TCG_REG_X28, /* we will reserve this for guest_base if configured */ 51 52 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, 53 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, 54 55 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 56 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, 57 58 /* X16 reserved as temporary */ 59 /* X17 reserved as temporary */ 60 /* X18 reserved by system */ 61 /* X19 reserved for AREG0 */ 62 /* X29 reserved as fp */ 63 /* X30 reserved as temporary */ 64 65 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, 66 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, 67 /* V8 - V15 are call-saved, and skipped. */ 68 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, 69 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, 70 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, 71 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, 72}; 73 74static const int tcg_target_call_iarg_regs[8] = { 75 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 76 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 77}; 78 79static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) 80{ 81 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); 82 tcg_debug_assert(slot >= 0 && slot <= 1); 83 return TCG_REG_X0 + slot; 84} 85 86#define TCG_REG_TMP0 TCG_REG_X16 87#define TCG_REG_TMP1 TCG_REG_X17 88#define TCG_REG_TMP2 TCG_REG_X30 89#define TCG_VEC_TMP0 TCG_REG_V31 90 91#define TCG_REG_GUEST_BASE TCG_REG_X28 92 93static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 94{ 95 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 96 ptrdiff_t offset = target - src_rx; 97 98 if (offset == sextract64(offset, 0, 26)) { 99 /* read instruction, mask away previous PC_REL26 parameter contents, 100 set the proper offset, then write back the instruction. */ 101 *src_rw = deposit32(*src_rw, 0, 26, offset); 102 return true; 103 } 104 return false; 105} 106 107static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 108{ 109 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 110 ptrdiff_t offset = target - src_rx; 111 112 if (offset == sextract64(offset, 0, 19)) { 113 *src_rw = deposit32(*src_rw, 5, 19, offset); 114 return true; 115 } 116 return false; 117} 118 119static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 120{ 121 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 122 ptrdiff_t offset = target - src_rx; 123 124 if (offset == sextract64(offset, 0, 14)) { 125 *src_rw = deposit32(*src_rw, 5, 14, offset); 126 return true; 127 } 128 return false; 129} 130 131static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 132 intptr_t value, intptr_t addend) 133{ 134 tcg_debug_assert(addend == 0); 135 switch (type) { 136 case R_AARCH64_JUMP26: 137 case R_AARCH64_CALL26: 138 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value); 139 case R_AARCH64_CONDBR19: 140 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value); 141 case R_AARCH64_TSTBR14: 142 return reloc_pc14(code_ptr, (const tcg_insn_unit *)value); 143 default: 144 g_assert_not_reached(); 145 } 146} 147 148#define TCG_CT_CONST_AIMM 0x100 149#define TCG_CT_CONST_LIMM 0x200 150#define TCG_CT_CONST_ZERO 0x400 151#define TCG_CT_CONST_MONE 0x800 152#define TCG_CT_CONST_ORRI 0x1000 153#define TCG_CT_CONST_ANDI 0x2000 154#define TCG_CT_CONST_CMP 0x4000 155 156#define ALL_GENERAL_REGS 0xffffffffu 157#define ALL_VECTOR_REGS 0xffffffff00000000ull 158 159/* Match a constant valid for addition (12-bit, optionally shifted). */ 160static inline bool is_aimm(uint64_t val) 161{ 162 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; 163} 164 165/* Match a constant valid for logical operations. */ 166static inline bool is_limm(uint64_t val) 167{ 168 /* Taking a simplified view of the logical immediates for now, ignoring 169 the replication that can happen across the field. Match bit patterns 170 of the forms 171 0....01....1 172 0..01..10..0 173 and their inverses. */ 174 175 /* Make things easier below, by testing the form with msb clear. */ 176 if ((int64_t)val < 0) { 177 val = ~val; 178 } 179 if (val == 0) { 180 return false; 181 } 182 val += val & -val; 183 return (val & (val - 1)) == 0; 184} 185 186/* Return true if v16 is a valid 16-bit shifted immediate. */ 187static bool is_shimm16(uint16_t v16, int *cmode, int *imm8) 188{ 189 if (v16 == (v16 & 0xff)) { 190 *cmode = 0x8; 191 *imm8 = v16 & 0xff; 192 return true; 193 } else if (v16 == (v16 & 0xff00)) { 194 *cmode = 0xa; 195 *imm8 = v16 >> 8; 196 return true; 197 } 198 return false; 199} 200 201/* Return true if v32 is a valid 32-bit shifted immediate. */ 202static bool is_shimm32(uint32_t v32, int *cmode, int *imm8) 203{ 204 if (v32 == (v32 & 0xff)) { 205 *cmode = 0x0; 206 *imm8 = v32 & 0xff; 207 return true; 208 } else if (v32 == (v32 & 0xff00)) { 209 *cmode = 0x2; 210 *imm8 = (v32 >> 8) & 0xff; 211 return true; 212 } else if (v32 == (v32 & 0xff0000)) { 213 *cmode = 0x4; 214 *imm8 = (v32 >> 16) & 0xff; 215 return true; 216 } else if (v32 == (v32 & 0xff000000)) { 217 *cmode = 0x6; 218 *imm8 = v32 >> 24; 219 return true; 220 } 221 return false; 222} 223 224/* Return true if v32 is a valid 32-bit shifting ones immediate. */ 225static bool is_soimm32(uint32_t v32, int *cmode, int *imm8) 226{ 227 if ((v32 & 0xffff00ff) == 0xff) { 228 *cmode = 0xc; 229 *imm8 = (v32 >> 8) & 0xff; 230 return true; 231 } else if ((v32 & 0xff00ffff) == 0xffff) { 232 *cmode = 0xd; 233 *imm8 = (v32 >> 16) & 0xff; 234 return true; 235 } 236 return false; 237} 238 239/* Return true if v32 is a valid float32 immediate. */ 240static bool is_fimm32(uint32_t v32, int *cmode, int *imm8) 241{ 242 if (extract32(v32, 0, 19) == 0 243 && (extract32(v32, 25, 6) == 0x20 244 || extract32(v32, 25, 6) == 0x1f)) { 245 *cmode = 0xf; 246 *imm8 = (extract32(v32, 31, 1) << 7) 247 | (extract32(v32, 25, 1) << 6) 248 | extract32(v32, 19, 6); 249 return true; 250 } 251 return false; 252} 253 254/* Return true if v64 is a valid float64 immediate. */ 255static bool is_fimm64(uint64_t v64, int *cmode, int *imm8) 256{ 257 if (extract64(v64, 0, 48) == 0 258 && (extract64(v64, 54, 9) == 0x100 259 || extract64(v64, 54, 9) == 0x0ff)) { 260 *cmode = 0xf; 261 *imm8 = (extract64(v64, 63, 1) << 7) 262 | (extract64(v64, 54, 1) << 6) 263 | extract64(v64, 48, 6); 264 return true; 265 } 266 return false; 267} 268 269/* 270 * Return non-zero if v32 can be formed by MOVI+ORR. 271 * Place the parameters for MOVI in (cmode, imm8). 272 * Return the cmode for ORR; the imm8 can be had via extraction from v32. 273 */ 274static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8) 275{ 276 int i; 277 278 for (i = 6; i > 0; i -= 2) { 279 /* Mask out one byte we can add with ORR. */ 280 uint32_t tmp = v32 & ~(0xffu << (i * 4)); 281 if (is_shimm32(tmp, cmode, imm8) || 282 is_soimm32(tmp, cmode, imm8)) { 283 break; 284 } 285 } 286 return i; 287} 288 289/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */ 290static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8) 291{ 292 if (v32 == deposit32(v32, 16, 16, v32)) { 293 return is_shimm16(v32, cmode, imm8); 294 } else { 295 return is_shimm32(v32, cmode, imm8); 296 } 297} 298 299static bool tcg_target_const_match(int64_t val, int ct, 300 TCGType type, TCGCond cond, int vece) 301{ 302 if (ct & TCG_CT_CONST) { 303 return 1; 304 } 305 if (type == TCG_TYPE_I32) { 306 val = (int32_t)val; 307 } 308 309 if (ct & TCG_CT_CONST_CMP) { 310 if (is_tst_cond(cond)) { 311 ct |= TCG_CT_CONST_LIMM; 312 } else { 313 ct |= TCG_CT_CONST_AIMM; 314 } 315 } 316 317 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { 318 return 1; 319 } 320 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { 321 return 1; 322 } 323 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 324 return 1; 325 } 326 if ((ct & TCG_CT_CONST_MONE) && val == -1) { 327 return 1; 328 } 329 330 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) { 331 case 0: 332 break; 333 case TCG_CT_CONST_ANDI: 334 val = ~val; 335 /* fallthru */ 336 case TCG_CT_CONST_ORRI: 337 if (val == deposit64(val, 32, 32, val)) { 338 int cmode, imm8; 339 return is_shimm1632(val, &cmode, &imm8); 340 } 341 break; 342 default: 343 /* Both bits should not be set for the same insn. */ 344 g_assert_not_reached(); 345 } 346 347 return 0; 348} 349 350enum aarch64_cond_code { 351 COND_EQ = 0x0, 352 COND_NE = 0x1, 353 COND_CS = 0x2, /* Unsigned greater or equal */ 354 COND_HS = COND_CS, /* ALIAS greater or equal */ 355 COND_CC = 0x3, /* Unsigned less than */ 356 COND_LO = COND_CC, /* ALIAS Lower */ 357 COND_MI = 0x4, /* Negative */ 358 COND_PL = 0x5, /* Zero or greater */ 359 COND_VS = 0x6, /* Overflow */ 360 COND_VC = 0x7, /* No overflow */ 361 COND_HI = 0x8, /* Unsigned greater than */ 362 COND_LS = 0x9, /* Unsigned less or equal */ 363 COND_GE = 0xa, 364 COND_LT = 0xb, 365 COND_GT = 0xc, 366 COND_LE = 0xd, 367 COND_AL = 0xe, 368 COND_NV = 0xf, /* behaves like COND_AL here */ 369}; 370 371static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { 372 [TCG_COND_EQ] = COND_EQ, 373 [TCG_COND_NE] = COND_NE, 374 [TCG_COND_LT] = COND_LT, 375 [TCG_COND_GE] = COND_GE, 376 [TCG_COND_LE] = COND_LE, 377 [TCG_COND_GT] = COND_GT, 378 /* unsigned */ 379 [TCG_COND_LTU] = COND_LO, 380 [TCG_COND_GTU] = COND_HI, 381 [TCG_COND_GEU] = COND_HS, 382 [TCG_COND_LEU] = COND_LS, 383 /* bit test */ 384 [TCG_COND_TSTEQ] = COND_EQ, 385 [TCG_COND_TSTNE] = COND_NE, 386}; 387 388typedef enum { 389 LDST_ST = 0, /* store */ 390 LDST_LD = 1, /* load */ 391 LDST_LD_S_X = 2, /* load and sign-extend into Xt */ 392 LDST_LD_S_W = 3, /* load and sign-extend into Wt */ 393} AArch64LdstType; 394 395/* We encode the format of the insn into the beginning of the name, so that 396 we can have the preprocessor help "typecheck" the insn vs the output 397 function. Arm didn't provide us with nice names for the formats, so we 398 use the section number of the architecture reference manual in which the 399 instruction group is described. */ 400typedef enum { 401 /* Compare and branch (immediate). */ 402 I3201_CBZ = 0x34000000, 403 I3201_CBNZ = 0x35000000, 404 405 /* Conditional branch (immediate). */ 406 I3202_B_C = 0x54000000, 407 408 /* Test and branch (immediate). */ 409 I3205_TBZ = 0x36000000, 410 I3205_TBNZ = 0x37000000, 411 412 /* Unconditional branch (immediate). */ 413 I3206_B = 0x14000000, 414 I3206_BL = 0x94000000, 415 416 /* Unconditional branch (register). */ 417 I3207_BR = 0xd61f0000, 418 I3207_BLR = 0xd63f0000, 419 I3207_RET = 0xd65f0000, 420 421 /* AdvSIMD load/store single structure. */ 422 I3303_LD1R = 0x0d40c000, 423 424 /* Load literal for loading the address at pc-relative offset */ 425 I3305_LDR = 0x58000000, 426 I3305_LDR_v64 = 0x5c000000, 427 I3305_LDR_v128 = 0x9c000000, 428 429 /* Load/store exclusive. */ 430 I3306_LDXP = 0xc8600000, 431 I3306_STXP = 0xc8200000, 432 433 /* Load/store register. Described here as 3.3.12, but the helper 434 that emits them can transform to 3.3.10 or 3.3.13. */ 435 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, 436 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, 437 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, 438 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, 439 440 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, 441 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, 442 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, 443 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, 444 445 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, 446 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, 447 448 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, 449 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, 450 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, 451 452 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30, 453 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30, 454 455 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30, 456 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30, 457 458 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30, 459 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30, 460 461 I3312_TO_I3310 = 0x00200800, 462 I3312_TO_I3313 = 0x01000000, 463 464 /* Load/store register pair instructions. */ 465 I3314_LDP = 0x28400000, 466 I3314_STP = 0x28000000, 467 468 /* Add/subtract immediate instructions. */ 469 I3401_ADDI = 0x11000000, 470 I3401_ADDSI = 0x31000000, 471 I3401_SUBI = 0x51000000, 472 I3401_SUBSI = 0x71000000, 473 474 /* Bitfield instructions. */ 475 I3402_BFM = 0x33000000, 476 I3402_SBFM = 0x13000000, 477 I3402_UBFM = 0x53000000, 478 479 /* Extract instruction. */ 480 I3403_EXTR = 0x13800000, 481 482 /* Logical immediate instructions. */ 483 I3404_ANDI = 0x12000000, 484 I3404_ORRI = 0x32000000, 485 I3404_EORI = 0x52000000, 486 I3404_ANDSI = 0x72000000, 487 488 /* Move wide immediate instructions. */ 489 I3405_MOVN = 0x12800000, 490 I3405_MOVZ = 0x52800000, 491 I3405_MOVK = 0x72800000, 492 493 /* PC relative addressing instructions. */ 494 I3406_ADR = 0x10000000, 495 I3406_ADRP = 0x90000000, 496 497 /* Add/subtract extended register instructions. */ 498 I3501_ADD = 0x0b200000, 499 500 /* Add/subtract shifted register instructions (without a shift). */ 501 I3502_ADD = 0x0b000000, 502 I3502_ADDS = 0x2b000000, 503 I3502_SUB = 0x4b000000, 504 I3502_SUBS = 0x6b000000, 505 506 /* Add/subtract shifted register instructions (with a shift). */ 507 I3502S_ADD_LSL = I3502_ADD, 508 509 /* Add/subtract with carry instructions. */ 510 I3503_ADC = 0x1a000000, 511 I3503_SBC = 0x5a000000, 512 513 /* Conditional select instructions. */ 514 I3506_CSEL = 0x1a800000, 515 I3506_CSINC = 0x1a800400, 516 I3506_CSINV = 0x5a800000, 517 I3506_CSNEG = 0x5a800400, 518 519 /* Data-processing (1 source) instructions. */ 520 I3507_CLZ = 0x5ac01000, 521 I3507_RBIT = 0x5ac00000, 522 I3507_REV = 0x5ac00000, /* + size << 10 */ 523 524 /* Data-processing (2 source) instructions. */ 525 I3508_LSLV = 0x1ac02000, 526 I3508_LSRV = 0x1ac02400, 527 I3508_ASRV = 0x1ac02800, 528 I3508_RORV = 0x1ac02c00, 529 I3508_SMULH = 0x9b407c00, 530 I3508_UMULH = 0x9bc07c00, 531 I3508_UDIV = 0x1ac00800, 532 I3508_SDIV = 0x1ac00c00, 533 534 /* Data-processing (3 source) instructions. */ 535 I3509_MADD = 0x1b000000, 536 I3509_MSUB = 0x1b008000, 537 538 /* Logical shifted register instructions (without a shift). */ 539 I3510_AND = 0x0a000000, 540 I3510_BIC = 0x0a200000, 541 I3510_ORR = 0x2a000000, 542 I3510_ORN = 0x2a200000, 543 I3510_EOR = 0x4a000000, 544 I3510_EON = 0x4a200000, 545 I3510_ANDS = 0x6a000000, 546 547 /* Logical shifted register instructions (with a shift). */ 548 I3502S_AND_LSR = I3510_AND | (1 << 22), 549 550 /* AdvSIMD copy */ 551 I3605_DUP = 0x0e000400, 552 I3605_INS = 0x4e001c00, 553 I3605_UMOV = 0x0e003c00, 554 555 /* AdvSIMD modified immediate */ 556 I3606_MOVI = 0x0f000400, 557 I3606_MVNI = 0x2f000400, 558 I3606_BIC = 0x2f001400, 559 I3606_ORR = 0x0f001400, 560 561 /* AdvSIMD scalar shift by immediate */ 562 I3609_SSHR = 0x5f000400, 563 I3609_SSRA = 0x5f001400, 564 I3609_SHL = 0x5f005400, 565 I3609_USHR = 0x7f000400, 566 I3609_USRA = 0x7f001400, 567 I3609_SLI = 0x7f005400, 568 569 /* AdvSIMD scalar three same */ 570 I3611_SQADD = 0x5e200c00, 571 I3611_SQSUB = 0x5e202c00, 572 I3611_CMGT = 0x5e203400, 573 I3611_CMGE = 0x5e203c00, 574 I3611_SSHL = 0x5e204400, 575 I3611_ADD = 0x5e208400, 576 I3611_CMTST = 0x5e208c00, 577 I3611_UQADD = 0x7e200c00, 578 I3611_UQSUB = 0x7e202c00, 579 I3611_CMHI = 0x7e203400, 580 I3611_CMHS = 0x7e203c00, 581 I3611_USHL = 0x7e204400, 582 I3611_SUB = 0x7e208400, 583 I3611_CMEQ = 0x7e208c00, 584 585 /* AdvSIMD scalar two-reg misc */ 586 I3612_CMGT0 = 0x5e208800, 587 I3612_CMEQ0 = 0x5e209800, 588 I3612_CMLT0 = 0x5e20a800, 589 I3612_ABS = 0x5e20b800, 590 I3612_CMGE0 = 0x7e208800, 591 I3612_CMLE0 = 0x7e209800, 592 I3612_NEG = 0x7e20b800, 593 594 /* AdvSIMD shift by immediate */ 595 I3614_SSHR = 0x0f000400, 596 I3614_SSRA = 0x0f001400, 597 I3614_SHL = 0x0f005400, 598 I3614_SLI = 0x2f005400, 599 I3614_USHR = 0x2f000400, 600 I3614_USRA = 0x2f001400, 601 602 /* AdvSIMD three same. */ 603 I3616_ADD = 0x0e208400, 604 I3616_AND = 0x0e201c00, 605 I3616_BIC = 0x0e601c00, 606 I3616_BIF = 0x2ee01c00, 607 I3616_BIT = 0x2ea01c00, 608 I3616_BSL = 0x2e601c00, 609 I3616_EOR = 0x2e201c00, 610 I3616_MUL = 0x0e209c00, 611 I3616_ORR = 0x0ea01c00, 612 I3616_ORN = 0x0ee01c00, 613 I3616_SUB = 0x2e208400, 614 I3616_CMGT = 0x0e203400, 615 I3616_CMGE = 0x0e203c00, 616 I3616_CMTST = 0x0e208c00, 617 I3616_CMHI = 0x2e203400, 618 I3616_CMHS = 0x2e203c00, 619 I3616_CMEQ = 0x2e208c00, 620 I3616_SMAX = 0x0e206400, 621 I3616_SMIN = 0x0e206c00, 622 I3616_SSHL = 0x0e204400, 623 I3616_SQADD = 0x0e200c00, 624 I3616_SQSUB = 0x0e202c00, 625 I3616_UMAX = 0x2e206400, 626 I3616_UMIN = 0x2e206c00, 627 I3616_UQADD = 0x2e200c00, 628 I3616_UQSUB = 0x2e202c00, 629 I3616_USHL = 0x2e204400, 630 631 /* AdvSIMD two-reg misc. */ 632 I3617_CMGT0 = 0x0e208800, 633 I3617_CMEQ0 = 0x0e209800, 634 I3617_CMLT0 = 0x0e20a800, 635 I3617_CMGE0 = 0x2e208800, 636 I3617_CMLE0 = 0x2e209800, 637 I3617_NOT = 0x2e205800, 638 I3617_ABS = 0x0e20b800, 639 I3617_NEG = 0x2e20b800, 640 641 /* System instructions. */ 642 NOP = 0xd503201f, 643 DMB_ISH = 0xd50338bf, 644 DMB_LD = 0x00000100, 645 DMB_ST = 0x00000200, 646 647 BTI_C = 0xd503245f, 648 BTI_J = 0xd503249f, 649 BTI_JC = 0xd50324df, 650} AArch64Insn; 651 652static inline uint32_t tcg_in32(TCGContext *s) 653{ 654 uint32_t v = *(uint32_t *)s->code_ptr; 655 return v; 656} 657 658/* Emit an opcode with "type-checking" of the format. */ 659#define tcg_out_insn(S, FMT, OP, ...) \ 660 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) 661 662static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q, 663 TCGReg rt, TCGReg rn, unsigned size) 664{ 665 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30)); 666} 667 668static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, 669 int imm19, TCGReg rt) 670{ 671 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt); 672} 673 674static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs, 675 TCGReg rt, TCGReg rt2, TCGReg rn) 676{ 677 tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt); 678} 679 680static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, 681 TCGReg rt, int imm19) 682{ 683 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); 684} 685 686static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, 687 TCGCond c, int imm19) 688{ 689 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); 690} 691 692static void tcg_out_insn_3205(TCGContext *s, AArch64Insn insn, 693 TCGReg rt, int imm6, int imm14) 694{ 695 insn |= (imm6 & 0x20) << (31 - 5); 696 insn |= (imm6 & 0x1f) << 19; 697 tcg_out32(s, insn | (imm14 & 0x3fff) << 5 | rt); 698} 699 700static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) 701{ 702 tcg_out32(s, insn | (imm26 & 0x03ffffff)); 703} 704 705static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) 706{ 707 tcg_out32(s, insn | rn << 5); 708} 709 710static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, 711 TCGReg r1, TCGReg r2, TCGReg rn, 712 tcg_target_long ofs, bool pre, bool w) 713{ 714 insn |= 1u << 31; /* ext */ 715 insn |= pre << 24; 716 insn |= w << 23; 717 718 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); 719 insn |= (ofs & (0x7f << 3)) << (15 - 3); 720 721 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); 722} 723 724static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, 725 TCGReg rd, TCGReg rn, uint64_t aimm) 726{ 727 if (aimm > 0xfff) { 728 tcg_debug_assert((aimm & 0xfff) == 0); 729 aimm >>= 12; 730 tcg_debug_assert(aimm <= 0xfff); 731 aimm |= 1 << 12; /* apply LSL 12 */ 732 } 733 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); 734} 735 736/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 737 (Logical immediate). Both insn groups have N, IMMR and IMMS fields 738 that feed the DecodeBitMasks pseudo function. */ 739static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, 740 TCGReg rd, TCGReg rn, int n, int immr, int imms) 741{ 742 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 743 | rn << 5 | rd); 744} 745 746#define tcg_out_insn_3404 tcg_out_insn_3402 747 748static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, 749 TCGReg rd, TCGReg rn, TCGReg rm, int imms) 750{ 751 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 752 | rn << 5 | rd); 753} 754 755/* This function is used for the Move (wide immediate) instruction group. 756 Note that SHIFT is a full shift count, not the 2 bit HW field. */ 757static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, 758 TCGReg rd, uint16_t half, unsigned shift) 759{ 760 tcg_debug_assert((shift & ~0x30) == 0); 761 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); 762} 763 764static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, 765 TCGReg rd, int64_t disp) 766{ 767 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); 768} 769 770static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn, 771 TCGType sf, TCGReg rd, TCGReg rn, 772 TCGReg rm, int opt, int imm3) 773{ 774 tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 | 775 imm3 << 10 | rn << 5 | rd); 776} 777 778/* This function is for both 3.5.2 (Add/Subtract shifted register), for 779 the rare occasion when we actually want to supply a shift amount. */ 780static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, 781 TCGType ext, TCGReg rd, TCGReg rn, 782 TCGReg rm, int imm6) 783{ 784 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); 785} 786 787/* This function is for 3.5.2 (Add/subtract shifted register), 788 and 3.5.10 (Logical shifted register), for the vast majorty of cases 789 when we don't want to apply a shift. Thus it can also be used for 790 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */ 791static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, 792 TCGReg rd, TCGReg rn, TCGReg rm) 793{ 794 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); 795} 796 797#define tcg_out_insn_3503 tcg_out_insn_3502 798#define tcg_out_insn_3508 tcg_out_insn_3502 799#define tcg_out_insn_3510 tcg_out_insn_3502 800 801static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, 802 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) 803{ 804 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd 805 | tcg_cond_to_aarch64[c] << 12); 806} 807 808static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, 809 TCGReg rd, TCGReg rn) 810{ 811 tcg_out32(s, insn | ext << 31 | rn << 5 | rd); 812} 813 814static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, 815 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) 816{ 817 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); 818} 819 820static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q, 821 TCGReg rd, TCGReg rn, int dst_idx, int src_idx) 822{ 823 /* Note that bit 11 set means general register input. Therefore 824 we can handle both register sets with one function. */ 825 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11) 826 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5); 827} 828 829static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q, 830 TCGReg rd, bool op, int cmode, uint8_t imm8) 831{ 832 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f) 833 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5); 834} 835 836static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn, 837 TCGReg rd, TCGReg rn, unsigned immhb) 838{ 839 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f)); 840} 841 842static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn, 843 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 844{ 845 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16 846 | (rn & 0x1f) << 5 | (rd & 0x1f)); 847} 848 849static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn, 850 unsigned size, TCGReg rd, TCGReg rn) 851{ 852 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f)); 853} 854 855static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q, 856 TCGReg rd, TCGReg rn, unsigned immhb) 857{ 858 tcg_out32(s, insn | q << 30 | immhb << 16 859 | (rn & 0x1f) << 5 | (rd & 0x1f)); 860} 861 862static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q, 863 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 864{ 865 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16 866 | (rn & 0x1f) << 5 | (rd & 0x1f)); 867} 868 869static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q, 870 unsigned size, TCGReg rd, TCGReg rn) 871{ 872 tcg_out32(s, insn | q << 30 | (size << 22) 873 | (rn & 0x1f) << 5 | (rd & 0x1f)); 874} 875 876static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, 877 TCGReg rd, TCGReg base, TCGType ext, 878 TCGReg regoff) 879{ 880 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 881 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | 882 0x4000 | ext << 13 | base << 5 | (rd & 0x1f)); 883} 884 885static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, 886 TCGReg rd, TCGReg rn, intptr_t offset) 887{ 888 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f)); 889} 890 891static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, 892 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) 893{ 894 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 895 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 896 | rn << 5 | (rd & 0x1f)); 897} 898 899static void tcg_out_bti(TCGContext *s, AArch64Insn insn) 900{ 901 /* 902 * While BTI insns are nops on hosts without FEAT_BTI, 903 * there is no point in emitting them in that case either. 904 */ 905 if (cpuinfo & CPUINFO_BTI) { 906 tcg_out32(s, insn); 907 } 908} 909 910/* Register to register move using ORR (shifted register with no shift). */ 911static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) 912{ 913 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); 914} 915 916/* Register to register move using ADDI (move to/from SP). */ 917static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) 918{ 919 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); 920} 921 922/* This function is used for the Logical (immediate) instruction group. 923 The value of LIMM must satisfy IS_LIMM. See the comment above about 924 only supporting simplified logical immediates. */ 925static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, 926 TCGReg rd, TCGReg rn, uint64_t limm) 927{ 928 unsigned h, l, r, c; 929 930 tcg_debug_assert(is_limm(limm)); 931 932 h = clz64(limm); 933 l = ctz64(limm); 934 if (l == 0) { 935 r = 0; /* form 0....01....1 */ 936 c = ctz64(~limm) - 1; 937 if (h == 0) { 938 r = clz64(~limm); /* form 1..10..01..1 */ 939 c += r; 940 } 941 } else { 942 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ 943 c = r - h - 1; 944 } 945 if (ext == TCG_TYPE_I32) { 946 r &= 31; 947 c &= 31; 948 } 949 950 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); 951} 952 953static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 954 TCGReg rd, int64_t v64) 955{ 956 bool q = type == TCG_TYPE_V128; 957 int cmode, imm8, i; 958 959 /* Test all bytes equal first. */ 960 if (vece == MO_8) { 961 imm8 = (uint8_t)v64; 962 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8); 963 return; 964 } 965 966 /* 967 * Test all bytes 0x00 or 0xff second. This can match cases that 968 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below. 969 */ 970 for (i = imm8 = 0; i < 8; i++) { 971 uint8_t byte = v64 >> (i * 8); 972 if (byte == 0xff) { 973 imm8 |= 1 << i; 974 } else if (byte != 0) { 975 goto fail_bytes; 976 } 977 } 978 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8); 979 return; 980 fail_bytes: 981 982 /* 983 * Tests for various replications. For each element width, if we 984 * cannot find an expansion there's no point checking a larger 985 * width because we already know by replication it cannot match. 986 */ 987 if (vece == MO_16) { 988 uint16_t v16 = v64; 989 990 if (is_shimm16(v16, &cmode, &imm8)) { 991 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 992 return; 993 } 994 if (is_shimm16(~v16, &cmode, &imm8)) { 995 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 996 return; 997 } 998 999 /* 1000 * Otherwise, all remaining constants can be loaded in two insns: 1001 * rd = v16 & 0xff, rd |= v16 & 0xff00. 1002 */ 1003 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff); 1004 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8); 1005 return; 1006 } else if (vece == MO_32) { 1007 uint32_t v32 = v64; 1008 uint32_t n32 = ~v32; 1009 1010 if (is_shimm32(v32, &cmode, &imm8) || 1011 is_soimm32(v32, &cmode, &imm8) || 1012 is_fimm32(v32, &cmode, &imm8)) { 1013 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 1014 return; 1015 } 1016 if (is_shimm32(n32, &cmode, &imm8) || 1017 is_soimm32(n32, &cmode, &imm8)) { 1018 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 1019 return; 1020 } 1021 1022 /* 1023 * Restrict the set of constants to those we can load with 1024 * two instructions. Others we load from the pool. 1025 */ 1026 i = is_shimm32_pair(v32, &cmode, &imm8); 1027 if (i) { 1028 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 1029 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8)); 1030 return; 1031 } 1032 i = is_shimm32_pair(n32, &cmode, &imm8); 1033 if (i) { 1034 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 1035 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8)); 1036 return; 1037 } 1038 } else if (is_fimm64(v64, &cmode, &imm8)) { 1039 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8); 1040 return; 1041 } 1042 1043 /* 1044 * As a last resort, load from the constant pool. Sadly there 1045 * is no LD1R (literal), so store the full 16-byte vector. 1046 */ 1047 if (type == TCG_TYPE_V128) { 1048 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64); 1049 tcg_out_insn(s, 3305, LDR_v128, 0, rd); 1050 } else { 1051 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0); 1052 tcg_out_insn(s, 3305, LDR_v64, 0, rd); 1053 } 1054} 1055 1056static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 1057 TCGReg rd, TCGReg rs) 1058{ 1059 int is_q = type - TCG_TYPE_V64; 1060 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0); 1061 return true; 1062} 1063 1064static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 1065 TCGReg r, TCGReg base, intptr_t offset) 1066{ 1067 TCGReg temp = TCG_REG_TMP0; 1068 1069 if (offset < -0xffffff || offset > 0xffffff) { 1070 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset); 1071 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base); 1072 base = temp; 1073 } else { 1074 AArch64Insn add_insn = I3401_ADDI; 1075 1076 if (offset < 0) { 1077 add_insn = I3401_SUBI; 1078 offset = -offset; 1079 } 1080 if (offset & 0xfff000) { 1081 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000); 1082 base = temp; 1083 } 1084 if (offset & 0xfff) { 1085 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff); 1086 base = temp; 1087 } 1088 } 1089 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece); 1090 return true; 1091} 1092 1093static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 1094 tcg_target_long value) 1095{ 1096 tcg_target_long svalue = value; 1097 tcg_target_long ivalue = ~value; 1098 tcg_target_long t0, t1, t2; 1099 int s0, s1; 1100 AArch64Insn opc; 1101 1102 switch (type) { 1103 case TCG_TYPE_I32: 1104 case TCG_TYPE_I64: 1105 tcg_debug_assert(rd < 32); 1106 break; 1107 default: 1108 g_assert_not_reached(); 1109 } 1110 1111 /* For 32-bit values, discard potential garbage in value. For 64-bit 1112 values within [2**31, 2**32-1], we can create smaller sequences by 1113 interpreting this as a negative 32-bit number, while ensuring that 1114 the high 32 bits are cleared by setting SF=0. */ 1115 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { 1116 svalue = (int32_t)value; 1117 value = (uint32_t)value; 1118 ivalue = (uint32_t)ivalue; 1119 type = TCG_TYPE_I32; 1120 } 1121 1122 /* Speed things up by handling the common case of small positive 1123 and negative values specially. */ 1124 if ((value & ~0xffffull) == 0) { 1125 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); 1126 return; 1127 } else if ((ivalue & ~0xffffull) == 0) { 1128 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); 1129 return; 1130 } 1131 1132 /* Check for bitfield immediates. For the benefit of 32-bit quantities, 1133 use the sign-extended value. That lets us match rotated values such 1134 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ 1135 if (is_limm(svalue)) { 1136 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); 1137 return; 1138 } 1139 1140 /* Look for host pointer values within 4G of the PC. This happens 1141 often when loading pointers to QEMU's own data structures. */ 1142 if (type == TCG_TYPE_I64) { 1143 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr); 1144 tcg_target_long disp = value - src_rx; 1145 if (disp == sextract64(disp, 0, 21)) { 1146 tcg_out_insn(s, 3406, ADR, rd, disp); 1147 return; 1148 } 1149 disp = (value >> 12) - (src_rx >> 12); 1150 if (disp == sextract64(disp, 0, 21)) { 1151 tcg_out_insn(s, 3406, ADRP, rd, disp); 1152 if (value & 0xfff) { 1153 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); 1154 } 1155 return; 1156 } 1157 } 1158 1159 /* Would it take fewer insns to begin with MOVN? */ 1160 if (ctpop64(value) >= 32) { 1161 t0 = ivalue; 1162 opc = I3405_MOVN; 1163 } else { 1164 t0 = value; 1165 opc = I3405_MOVZ; 1166 } 1167 s0 = ctz64(t0) & (63 & -16); 1168 t1 = t0 & ~(0xffffull << s0); 1169 s1 = ctz64(t1) & (63 & -16); 1170 t2 = t1 & ~(0xffffull << s1); 1171 if (t2 == 0) { 1172 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0); 1173 if (t1 != 0) { 1174 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1); 1175 } 1176 return; 1177 } 1178 1179 /* For more than 2 insns, dump it into the constant pool. */ 1180 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0); 1181 tcg_out_insn(s, 3305, LDR, 0, rd); 1182} 1183 1184static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) 1185{ 1186 return false; 1187} 1188 1189static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, 1190 tcg_target_long imm) 1191{ 1192 /* This function is only used for passing structs by reference. */ 1193 g_assert_not_reached(); 1194} 1195 1196/* Define something more legible for general use. */ 1197#define tcg_out_ldst_r tcg_out_insn_3310 1198 1199static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd, 1200 TCGReg rn, intptr_t offset, int lgsize) 1201{ 1202 /* If the offset is naturally aligned and in range, then we can 1203 use the scaled uimm12 encoding */ 1204 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) { 1205 uintptr_t scaled_uimm = offset >> lgsize; 1206 if (scaled_uimm <= 0xfff) { 1207 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); 1208 return; 1209 } 1210 } 1211 1212 /* Small signed offsets can use the unscaled encoding. */ 1213 if (offset >= -256 && offset < 256) { 1214 tcg_out_insn_3312(s, insn, rd, rn, offset); 1215 return; 1216 } 1217 1218 /* Worst-case scenario, move offset to temp register, use reg offset. */ 1219 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset); 1220 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0); 1221} 1222 1223static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 1224{ 1225 if (ret == arg) { 1226 return true; 1227 } 1228 switch (type) { 1229 case TCG_TYPE_I32: 1230 case TCG_TYPE_I64: 1231 if (ret < 32 && arg < 32) { 1232 tcg_out_movr(s, type, ret, arg); 1233 break; 1234 } else if (ret < 32) { 1235 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0); 1236 break; 1237 } else if (arg < 32) { 1238 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0); 1239 break; 1240 } 1241 /* FALLTHRU */ 1242 1243 case TCG_TYPE_V64: 1244 tcg_debug_assert(ret >= 32 && arg >= 32); 1245 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg); 1246 break; 1247 case TCG_TYPE_V128: 1248 tcg_debug_assert(ret >= 32 && arg >= 32); 1249 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg); 1250 break; 1251 1252 default: 1253 g_assert_not_reached(); 1254 } 1255 return true; 1256} 1257 1258static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1259 TCGReg base, intptr_t ofs) 1260{ 1261 AArch64Insn insn; 1262 int lgsz; 1263 1264 switch (type) { 1265 case TCG_TYPE_I32: 1266 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS); 1267 lgsz = 2; 1268 break; 1269 case TCG_TYPE_I64: 1270 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD); 1271 lgsz = 3; 1272 break; 1273 case TCG_TYPE_V64: 1274 insn = I3312_LDRVD; 1275 lgsz = 3; 1276 break; 1277 case TCG_TYPE_V128: 1278 insn = I3312_LDRVQ; 1279 lgsz = 4; 1280 break; 1281 default: 1282 g_assert_not_reached(); 1283 } 1284 tcg_out_ldst(s, insn, ret, base, ofs, lgsz); 1285} 1286 1287static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src, 1288 TCGReg base, intptr_t ofs) 1289{ 1290 AArch64Insn insn; 1291 int lgsz; 1292 1293 switch (type) { 1294 case TCG_TYPE_I32: 1295 insn = (src < 32 ? I3312_STRW : I3312_STRVS); 1296 lgsz = 2; 1297 break; 1298 case TCG_TYPE_I64: 1299 insn = (src < 32 ? I3312_STRX : I3312_STRVD); 1300 lgsz = 3; 1301 break; 1302 case TCG_TYPE_V64: 1303 insn = I3312_STRVD; 1304 lgsz = 3; 1305 break; 1306 case TCG_TYPE_V128: 1307 insn = I3312_STRVQ; 1308 lgsz = 4; 1309 break; 1310 default: 1311 g_assert_not_reached(); 1312 } 1313 tcg_out_ldst(s, insn, src, base, ofs, lgsz); 1314} 1315 1316static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1317 TCGReg base, intptr_t ofs) 1318{ 1319 if (type <= TCG_TYPE_I64 && val == 0) { 1320 tcg_out_st(s, type, TCG_REG_XZR, base, ofs); 1321 return true; 1322 } 1323 return false; 1324} 1325 1326static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, 1327 TCGReg rn, unsigned int a, unsigned int b) 1328{ 1329 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); 1330} 1331 1332static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, 1333 TCGReg rn, unsigned int a, unsigned int b) 1334{ 1335 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); 1336} 1337 1338static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, 1339 TCGReg rn, unsigned int a, unsigned int b) 1340{ 1341 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); 1342} 1343 1344static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, 1345 TCGReg rn, TCGReg rm, unsigned int a) 1346{ 1347 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); 1348} 1349 1350static void tgen_cmp(TCGContext *s, TCGType ext, TCGCond cond, 1351 TCGReg a, TCGReg b) 1352{ 1353 if (is_tst_cond(cond)) { 1354 tcg_out_insn(s, 3510, ANDS, ext, TCG_REG_XZR, a, b); 1355 } else { 1356 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); 1357 } 1358} 1359 1360static void tgen_cmpi(TCGContext *s, TCGType ext, TCGCond cond, 1361 TCGReg a, tcg_target_long b) 1362{ 1363 if (is_tst_cond(cond)) { 1364 tcg_out_logicali(s, I3404_ANDSI, ext, TCG_REG_XZR, a, b); 1365 } else if (b >= 0) { 1366 tcg_debug_assert(is_aimm(b)); 1367 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); 1368 } else { 1369 tcg_debug_assert(is_aimm(-b)); 1370 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); 1371 } 1372} 1373 1374static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGCond cond, TCGReg a, 1375 tcg_target_long b, bool const_b) 1376{ 1377 if (const_b) { 1378 tgen_cmpi(s, ext, cond, a, b); 1379 } else { 1380 tgen_cmp(s, ext, cond, a, b); 1381 } 1382} 1383 1384static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) 1385{ 1386 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1387 tcg_debug_assert(offset == sextract64(offset, 0, 26)); 1388 tcg_out_insn(s, 3206, B, offset); 1389} 1390 1391static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target) 1392{ 1393 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1394 if (offset == sextract64(offset, 0, 26)) { 1395 tcg_out_insn(s, 3206, BL, offset); 1396 } else { 1397 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target); 1398 tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0); 1399 } 1400} 1401 1402static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 1403 const TCGHelperInfo *info) 1404{ 1405 tcg_out_call_int(s, target); 1406} 1407 1408static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) 1409{ 1410 if (!l->has_value) { 1411 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); 1412 tcg_out_insn(s, 3206, B, 0); 1413 } else { 1414 tcg_out_goto(s, l->u.value_ptr); 1415 } 1416} 1417 1418static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c, 1419 TCGReg a, TCGReg b, TCGLabel *l) 1420{ 1421 tgen_cmp(s, type, c, a, b); 1422 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1423 tcg_out_insn(s, 3202, B_C, c, 0); 1424} 1425 1426static void tgen_brcondi(TCGContext *s, TCGType ext, TCGCond c, 1427 TCGReg a, tcg_target_long b, TCGLabel *l) 1428{ 1429 int tbit = -1; 1430 bool need_cmp = true; 1431 1432 switch (c) { 1433 case TCG_COND_EQ: 1434 case TCG_COND_NE: 1435 /* cmp xN,0; b.ne L -> cbnz xN,L */ 1436 if (b == 0) { 1437 need_cmp = false; 1438 } 1439 break; 1440 case TCG_COND_LT: 1441 case TCG_COND_GE: 1442 /* cmp xN,0; b.mi L -> tbnz xN,63,L */ 1443 if (b == 0) { 1444 c = (c == TCG_COND_LT ? TCG_COND_TSTNE : TCG_COND_TSTEQ); 1445 tbit = ext ? 63 : 31; 1446 need_cmp = false; 1447 } 1448 break; 1449 case TCG_COND_TSTEQ: 1450 case TCG_COND_TSTNE: 1451 /* tst xN,0xffffffff; b.ne L -> cbnz wN,L */ 1452 if (b == UINT32_MAX) { 1453 c = tcg_tst_eqne_cond(c); 1454 ext = TCG_TYPE_I32; 1455 need_cmp = false; 1456 break; 1457 } 1458 /* tst xN,1<<B; b.ne L -> tbnz xN,B,L */ 1459 if (is_power_of_2(b)) { 1460 tbit = ctz64(b); 1461 need_cmp = false; 1462 } 1463 break; 1464 default: 1465 break; 1466 } 1467 1468 if (need_cmp) { 1469 tgen_cmpi(s, ext, c, a, b); 1470 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1471 tcg_out_insn(s, 3202, B_C, c, 0); 1472 return; 1473 } 1474 1475 if (tbit >= 0) { 1476 tcg_out_reloc(s, s->code_ptr, R_AARCH64_TSTBR14, l, 0); 1477 switch (c) { 1478 case TCG_COND_TSTEQ: 1479 tcg_out_insn(s, 3205, TBZ, a, tbit, 0); 1480 break; 1481 case TCG_COND_TSTNE: 1482 tcg_out_insn(s, 3205, TBNZ, a, tbit, 0); 1483 break; 1484 default: 1485 g_assert_not_reached(); 1486 } 1487 } else { 1488 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1489 switch (c) { 1490 case TCG_COND_EQ: 1491 tcg_out_insn(s, 3201, CBZ, ext, a, 0); 1492 break; 1493 case TCG_COND_NE: 1494 tcg_out_insn(s, 3201, CBNZ, ext, a, 0); 1495 break; 1496 default: 1497 g_assert_not_reached(); 1498 } 1499 } 1500} 1501 1502static const TCGOutOpBrcond outop_brcond = { 1503 .base.static_constraint = C_O0_I2(r, rC), 1504 .out_rr = tgen_brcond, 1505 .out_ri = tgen_brcondi, 1506}; 1507 1508static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits, 1509 TCGReg rd, TCGReg rn) 1510{ 1511 /* REV, REV16, REV32 */ 1512 tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn); 1513} 1514 1515static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits, 1516 TCGReg rd, TCGReg rn) 1517{ 1518 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ 1519 int bits = (8 << s_bits) - 1; 1520 tcg_out_sbfm(s, ext, rd, rn, 0, bits); 1521} 1522 1523static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn) 1524{ 1525 tcg_out_sxt(s, type, MO_8, rd, rn); 1526} 1527 1528static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn) 1529{ 1530 tcg_out_sxt(s, type, MO_16, rd, rn); 1531} 1532 1533static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn) 1534{ 1535 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn); 1536} 1537 1538static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn) 1539{ 1540 tcg_out_ext32s(s, rd, rn); 1541} 1542 1543static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits, 1544 TCGReg rd, TCGReg rn) 1545{ 1546 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ 1547 int bits = (8 << s_bits) - 1; 1548 tcg_out_ubfm(s, 0, rd, rn, 0, bits); 1549} 1550 1551static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn) 1552{ 1553 tcg_out_uxt(s, MO_8, rd, rn); 1554} 1555 1556static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn) 1557{ 1558 tcg_out_uxt(s, MO_16, rd, rn); 1559} 1560 1561static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn) 1562{ 1563 tcg_out_movr(s, TCG_TYPE_I32, rd, rn); 1564} 1565 1566static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn) 1567{ 1568 tcg_out_ext32u(s, rd, rn); 1569} 1570 1571static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) 1572{ 1573 tcg_out_mov(s, TCG_TYPE_I32, rd, rn); 1574} 1575 1576static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, 1577 TCGReg rh, TCGReg al, TCGReg ah, 1578 tcg_target_long bl, tcg_target_long bh, 1579 bool const_bl, bool const_bh, bool sub) 1580{ 1581 TCGReg orig_rl = rl; 1582 AArch64Insn insn; 1583 1584 if (rl == ah || (!const_bh && rl == bh)) { 1585 rl = TCG_REG_TMP0; 1586 } 1587 1588 if (const_bl) { 1589 if (bl < 0) { 1590 bl = -bl; 1591 insn = sub ? I3401_ADDSI : I3401_SUBSI; 1592 } else { 1593 insn = sub ? I3401_SUBSI : I3401_ADDSI; 1594 } 1595 1596 if (unlikely(al == TCG_REG_XZR)) { 1597 /* ??? We want to allow al to be zero for the benefit of 1598 negation via subtraction. However, that leaves open the 1599 possibility of adding 0+const in the low part, and the 1600 immediate add instructions encode XSP not XZR. Don't try 1601 anything more elaborate here than loading another zero. */ 1602 al = TCG_REG_TMP0; 1603 tcg_out_movi(s, ext, al, 0); 1604 } 1605 tcg_out_insn_3401(s, insn, ext, rl, al, bl); 1606 } else { 1607 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); 1608 } 1609 1610 insn = I3503_ADC; 1611 if (const_bh) { 1612 /* Note that the only two constants we support are 0 and -1, and 1613 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ 1614 if ((bh != 0) ^ sub) { 1615 insn = I3503_SBC; 1616 } 1617 bh = TCG_REG_XZR; 1618 } else if (sub) { 1619 insn = I3503_SBC; 1620 } 1621 tcg_out_insn_3503(s, insn, ext, rh, ah, bh); 1622 1623 tcg_out_mov(s, ext, orig_rl, rl); 1624} 1625 1626static inline void tcg_out_mb(TCGContext *s, TCGArg a0) 1627{ 1628 static const uint32_t sync[] = { 1629 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST, 1630 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST, 1631 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1632 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD, 1633 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1634 }; 1635 tcg_out32(s, sync[a0 & TCG_MO_ALL]); 1636} 1637 1638typedef struct { 1639 TCGReg base; 1640 TCGReg index; 1641 TCGType index_ext; 1642 TCGAtomAlign aa; 1643} HostAddress; 1644 1645bool tcg_target_has_memory_bswap(MemOp memop) 1646{ 1647 return false; 1648} 1649 1650static const TCGLdstHelperParam ldst_helper_param = { 1651 .ntmp = 1, .tmp = { TCG_REG_TMP0 } 1652}; 1653 1654static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1655{ 1656 MemOp opc = get_memop(lb->oi); 1657 1658 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1659 return false; 1660 } 1661 1662 tcg_out_ld_helper_args(s, lb, &ldst_helper_param); 1663 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); 1664 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); 1665 tcg_out_goto(s, lb->raddr); 1666 return true; 1667} 1668 1669static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1670{ 1671 MemOp opc = get_memop(lb->oi); 1672 1673 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1674 return false; 1675 } 1676 1677 tcg_out_st_helper_args(s, lb, &ldst_helper_param); 1678 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]); 1679 tcg_out_goto(s, lb->raddr); 1680 return true; 1681} 1682 1683/* We expect to use a 7-bit scaled negative offset from ENV. */ 1684#define MIN_TLB_MASK_TABLE_OFS -512 1685 1686/* 1687 * For system-mode, perform the TLB load and compare. 1688 * For user-mode, perform any required alignment tests. 1689 * In both cases, return a TCGLabelQemuLdst structure if the slow path 1690 * is required and fill in @h with the host address for the fast path. 1691 */ 1692static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, 1693 TCGReg addr_reg, MemOpIdx oi, 1694 bool is_ld) 1695{ 1696 TCGType addr_type = s->addr_type; 1697 TCGLabelQemuLdst *ldst = NULL; 1698 MemOp opc = get_memop(oi); 1699 MemOp s_bits = opc & MO_SIZE; 1700 unsigned a_mask; 1701 1702 h->aa = atom_and_align_for_opc(s, opc, 1703 have_lse2 ? MO_ATOM_WITHIN16 1704 : MO_ATOM_IFALIGN, 1705 s_bits == MO_128); 1706 a_mask = (1 << h->aa.align) - 1; 1707 1708 if (tcg_use_softmmu) { 1709 unsigned s_mask = (1u << s_bits) - 1; 1710 unsigned mem_index = get_mmuidx(oi); 1711 TCGReg addr_adj; 1712 TCGType mask_type; 1713 uint64_t compare_mask; 1714 1715 ldst = new_ldst_label(s); 1716 ldst->is_ld = is_ld; 1717 ldst->oi = oi; 1718 ldst->addr_reg = addr_reg; 1719 1720 mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32 1721 ? TCG_TYPE_I64 : TCG_TYPE_I32); 1722 1723 /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */ 1724 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); 1725 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); 1726 tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0, 1727 tlb_mask_table_ofs(s, mem_index), 1, 0); 1728 1729 /* Extract the TLB index from the address into X0. */ 1730 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, 1731 TCG_REG_TMP0, TCG_REG_TMP0, addr_reg, 1732 s->page_bits - CPU_TLB_ENTRY_BITS); 1733 1734 /* Add the tlb_table pointer, forming the CPUTLBEntry address. */ 1735 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0); 1736 1737 /* Load the tlb comparator into TMP0, and the fast path addend. */ 1738 QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN); 1739 tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1, 1740 is_ld ? offsetof(CPUTLBEntry, addr_read) 1741 : offsetof(CPUTLBEntry, addr_write)); 1742 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, 1743 offsetof(CPUTLBEntry, addend)); 1744 1745 /* 1746 * For aligned accesses, we check the first byte and include 1747 * the alignment bits within the address. For unaligned access, 1748 * we check that we don't cross pages using the address of the 1749 * last byte of the access. 1750 */ 1751 if (a_mask >= s_mask) { 1752 addr_adj = addr_reg; 1753 } else { 1754 addr_adj = TCG_REG_TMP2; 1755 tcg_out_insn(s, 3401, ADDI, addr_type, 1756 addr_adj, addr_reg, s_mask - a_mask); 1757 } 1758 compare_mask = (uint64_t)s->page_mask | a_mask; 1759 1760 /* Store the page mask part of the address into TMP2. */ 1761 tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2, 1762 addr_adj, compare_mask); 1763 1764 /* Perform the address comparison. */ 1765 tcg_out_cmp(s, addr_type, TCG_COND_NE, TCG_REG_TMP0, TCG_REG_TMP2, 0); 1766 1767 /* If not equal, we jump to the slow path. */ 1768 ldst->label_ptr[0] = s->code_ptr; 1769 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1770 1771 h->base = TCG_REG_TMP1; 1772 h->index = addr_reg; 1773 h->index_ext = addr_type; 1774 } else { 1775 if (a_mask) { 1776 ldst = new_ldst_label(s); 1777 1778 ldst->is_ld = is_ld; 1779 ldst->oi = oi; 1780 ldst->addr_reg = addr_reg; 1781 1782 /* tst addr, #mask */ 1783 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask); 1784 1785 /* b.ne slow_path */ 1786 ldst->label_ptr[0] = s->code_ptr; 1787 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1788 } 1789 1790 if (guest_base || addr_type == TCG_TYPE_I32) { 1791 h->base = TCG_REG_GUEST_BASE; 1792 h->index = addr_reg; 1793 h->index_ext = addr_type; 1794 } else { 1795 h->base = addr_reg; 1796 h->index = TCG_REG_XZR; 1797 h->index_ext = TCG_TYPE_I64; 1798 } 1799 } 1800 1801 return ldst; 1802} 1803 1804static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, 1805 TCGReg data_r, HostAddress h) 1806{ 1807 switch (memop & MO_SSIZE) { 1808 case MO_UB: 1809 tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index); 1810 break; 1811 case MO_SB: 1812 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, 1813 data_r, h.base, h.index_ext, h.index); 1814 break; 1815 case MO_UW: 1816 tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index); 1817 break; 1818 case MO_SW: 1819 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), 1820 data_r, h.base, h.index_ext, h.index); 1821 break; 1822 case MO_UL: 1823 tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index); 1824 break; 1825 case MO_SL: 1826 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index); 1827 break; 1828 case MO_UQ: 1829 tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index); 1830 break; 1831 default: 1832 g_assert_not_reached(); 1833 } 1834} 1835 1836static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, 1837 TCGReg data_r, HostAddress h) 1838{ 1839 switch (memop & MO_SIZE) { 1840 case MO_8: 1841 tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index); 1842 break; 1843 case MO_16: 1844 tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index); 1845 break; 1846 case MO_32: 1847 tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index); 1848 break; 1849 case MO_64: 1850 tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index); 1851 break; 1852 default: 1853 g_assert_not_reached(); 1854 } 1855} 1856 1857static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1858 MemOpIdx oi, TCGType data_type) 1859{ 1860 TCGLabelQemuLdst *ldst; 1861 HostAddress h; 1862 1863 ldst = prepare_host_addr(s, &h, addr_reg, oi, true); 1864 tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h); 1865 1866 if (ldst) { 1867 ldst->type = data_type; 1868 ldst->datalo_reg = data_reg; 1869 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1870 } 1871} 1872 1873static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1874 MemOpIdx oi, TCGType data_type) 1875{ 1876 TCGLabelQemuLdst *ldst; 1877 HostAddress h; 1878 1879 ldst = prepare_host_addr(s, &h, addr_reg, oi, false); 1880 tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h); 1881 1882 if (ldst) { 1883 ldst->type = data_type; 1884 ldst->datalo_reg = data_reg; 1885 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1886 } 1887} 1888 1889static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, 1890 TCGReg addr_reg, MemOpIdx oi, bool is_ld) 1891{ 1892 TCGLabelQemuLdst *ldst; 1893 HostAddress h; 1894 TCGReg base; 1895 bool use_pair; 1896 1897 ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld); 1898 1899 /* Compose the final address, as LDP/STP have no indexing. */ 1900 if (h.index == TCG_REG_XZR) { 1901 base = h.base; 1902 } else { 1903 base = TCG_REG_TMP2; 1904 if (h.index_ext == TCG_TYPE_I32) { 1905 /* add base, base, index, uxtw */ 1906 tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base, 1907 h.base, h.index, MO_32, 0); 1908 } else { 1909 /* add base, base, index */ 1910 tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index); 1911 } 1912 } 1913 1914 use_pair = h.aa.atom < MO_128 || have_lse2; 1915 1916 if (!use_pair) { 1917 tcg_insn_unit *branch = NULL; 1918 TCGReg ll, lh, sl, sh; 1919 1920 /* 1921 * If we have already checked for 16-byte alignment, that's all 1922 * we need. Otherwise we have determined that misaligned atomicity 1923 * may be handled with two 8-byte loads. 1924 */ 1925 if (h.aa.align < MO_128) { 1926 /* 1927 * TODO: align should be MO_64, so we only need test bit 3, 1928 * which means we could use TBNZ instead of ANDS+B_C. 1929 */ 1930 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15); 1931 branch = s->code_ptr; 1932 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1933 use_pair = true; 1934 } 1935 1936 if (is_ld) { 1937 /* 1938 * 16-byte atomicity without LSE2 requires LDXP+STXP loop: 1939 * ldxp lo, hi, [base] 1940 * stxp t0, lo, hi, [base] 1941 * cbnz t0, .-8 1942 * Require no overlap between data{lo,hi} and base. 1943 */ 1944 if (datalo == base || datahi == base) { 1945 tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base); 1946 base = TCG_REG_TMP2; 1947 } 1948 ll = sl = datalo; 1949 lh = sh = datahi; 1950 } else { 1951 /* 1952 * 16-byte atomicity without LSE2 requires LDXP+STXP loop: 1953 * 1: ldxp t0, t1, [base] 1954 * stxp t0, lo, hi, [base] 1955 * cbnz t0, 1b 1956 */ 1957 tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1); 1958 ll = TCG_REG_TMP0; 1959 lh = TCG_REG_TMP1; 1960 sl = datalo; 1961 sh = datahi; 1962 } 1963 1964 tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base); 1965 tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base); 1966 tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2); 1967 1968 if (use_pair) { 1969 /* "b .+8", branching across the one insn of use_pair. */ 1970 tcg_out_insn(s, 3206, B, 2); 1971 reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr)); 1972 } 1973 } 1974 1975 if (use_pair) { 1976 if (is_ld) { 1977 tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0); 1978 } else { 1979 tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0); 1980 } 1981 } 1982 1983 if (ldst) { 1984 ldst->type = TCG_TYPE_I128; 1985 ldst->datalo_reg = datalo; 1986 ldst->datahi_reg = datahi; 1987 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1988 } 1989} 1990 1991static const tcg_insn_unit *tb_ret_addr; 1992 1993static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) 1994{ 1995 const tcg_insn_unit *target; 1996 ptrdiff_t offset; 1997 1998 /* Reuse the zeroing that exists for goto_ptr. */ 1999 if (a0 == 0) { 2000 target = tcg_code_gen_epilogue; 2001 } else { 2002 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); 2003 target = tb_ret_addr; 2004 } 2005 2006 offset = tcg_pcrel_diff(s, target) >> 2; 2007 if (offset == sextract64(offset, 0, 26)) { 2008 tcg_out_insn(s, 3206, B, offset); 2009 } else { 2010 /* 2011 * Only x16/x17 generate BTI type Jump (2), 2012 * other registers generate BTI type Jump|Call (3). 2013 */ 2014 QEMU_BUILD_BUG_ON(TCG_REG_TMP0 != TCG_REG_X16); 2015 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target); 2016 tcg_out_insn(s, 3207, BR, TCG_REG_TMP0); 2017 } 2018} 2019 2020static void tcg_out_goto_tb(TCGContext *s, int which) 2021{ 2022 /* 2023 * Direct branch, or indirect address load, will be patched 2024 * by tb_target_set_jmp_target. Assert indirect load offset 2025 * in range early, regardless of direct branch distance. 2026 */ 2027 intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which)); 2028 tcg_debug_assert(i_off == sextract64(i_off, 0, 21)); 2029 2030 set_jmp_insn_offset(s, which); 2031 tcg_out32(s, I3206_B); 2032 tcg_out_insn(s, 3207, BR, TCG_REG_TMP0); 2033 set_jmp_reset_offset(s, which); 2034 tcg_out_bti(s, BTI_J); 2035} 2036 2037void tb_target_set_jmp_target(const TranslationBlock *tb, int n, 2038 uintptr_t jmp_rx, uintptr_t jmp_rw) 2039{ 2040 uintptr_t d_addr = tb->jmp_target_addr[n]; 2041 ptrdiff_t d_offset = d_addr - jmp_rx; 2042 tcg_insn_unit insn; 2043 2044 /* Either directly branch, or indirect branch load. */ 2045 if (d_offset == sextract64(d_offset, 0, 28)) { 2046 insn = deposit32(I3206_B, 0, 26, d_offset >> 2); 2047 } else { 2048 uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n]; 2049 ptrdiff_t i_offset = i_addr - jmp_rx; 2050 2051 /* Note that we asserted this in range in tcg_out_goto_tb. */ 2052 insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2); 2053 } 2054 qatomic_set((uint32_t *)jmp_rw, insn); 2055 flush_idcache_range(jmp_rx, jmp_rw, 4); 2056} 2057 2058 2059static void tgen_add(TCGContext *s, TCGType type, 2060 TCGReg a0, TCGReg a1, TCGReg a2) 2061{ 2062 tcg_out_insn(s, 3502, ADD, type, a0, a1, a2); 2063} 2064 2065static void tgen_addi(TCGContext *s, TCGType type, 2066 TCGReg a0, TCGReg a1, tcg_target_long a2) 2067{ 2068 if (a2 >= 0) { 2069 tcg_out_insn(s, 3401, ADDI, type, a0, a1, a2); 2070 } else { 2071 tcg_out_insn(s, 3401, SUBI, type, a0, a1, -a2); 2072 } 2073} 2074 2075static const TCGOutOpBinary outop_add = { 2076 .base.static_constraint = C_O1_I2(r, r, rA), 2077 .out_rrr = tgen_add, 2078 .out_rri = tgen_addi, 2079}; 2080 2081static void tgen_and(TCGContext *s, TCGType type, 2082 TCGReg a0, TCGReg a1, TCGReg a2) 2083{ 2084 tcg_out_insn(s, 3510, AND, type, a0, a1, a2); 2085} 2086 2087static void tgen_andi(TCGContext *s, TCGType type, 2088 TCGReg a0, TCGReg a1, tcg_target_long a2) 2089{ 2090 tcg_out_logicali(s, I3404_ANDI, type, a0, a1, a2); 2091} 2092 2093static const TCGOutOpBinary outop_and = { 2094 .base.static_constraint = C_O1_I2(r, r, rL), 2095 .out_rrr = tgen_and, 2096 .out_rri = tgen_andi, 2097}; 2098 2099static void tgen_andc(TCGContext *s, TCGType type, 2100 TCGReg a0, TCGReg a1, TCGReg a2) 2101{ 2102 tcg_out_insn(s, 3510, BIC, type, a0, a1, a2); 2103} 2104 2105static const TCGOutOpBinary outop_andc = { 2106 .base.static_constraint = C_O1_I2(r, r, r), 2107 .out_rrr = tgen_andc, 2108}; 2109 2110static void tgen_clz(TCGContext *s, TCGType type, 2111 TCGReg a0, TCGReg a1, TCGReg a2) 2112{ 2113 tcg_out_cmp(s, type, TCG_COND_NE, a1, 0, true); 2114 tcg_out_insn(s, 3507, CLZ, type, TCG_REG_TMP0, a1); 2115 tcg_out_insn(s, 3506, CSEL, type, a0, TCG_REG_TMP0, a2, TCG_COND_NE); 2116} 2117 2118static void tgen_clzi(TCGContext *s, TCGType type, 2119 TCGReg a0, TCGReg a1, tcg_target_long a2) 2120{ 2121 if (a2 == (type == TCG_TYPE_I32 ? 32 : 64)) { 2122 tcg_out_insn(s, 3507, CLZ, type, a0, a1); 2123 return; 2124 } 2125 2126 tcg_out_cmp(s, type, TCG_COND_NE, a1, 0, true); 2127 tcg_out_insn(s, 3507, CLZ, type, a0, a1); 2128 2129 switch (a2) { 2130 case -1: 2131 tcg_out_insn(s, 3506, CSINV, type, a0, a0, TCG_REG_XZR, TCG_COND_NE); 2132 break; 2133 case 0: 2134 tcg_out_insn(s, 3506, CSEL, type, a0, a0, TCG_REG_XZR, TCG_COND_NE); 2135 break; 2136 default: 2137 tcg_out_movi(s, type, TCG_REG_TMP0, a2); 2138 tcg_out_insn(s, 3506, CSEL, type, a0, a0, TCG_REG_TMP0, TCG_COND_NE); 2139 break; 2140 } 2141} 2142 2143static const TCGOutOpBinary outop_clz = { 2144 .base.static_constraint = C_O1_I2(r, r, rAL), 2145 .out_rrr = tgen_clz, 2146 .out_rri = tgen_clzi, 2147}; 2148 2149static const TCGOutOpUnary outop_ctpop = { 2150 .base.static_constraint = C_NotImplemented, 2151}; 2152 2153static void tgen_ctz(TCGContext *s, TCGType type, 2154 TCGReg a0, TCGReg a1, TCGReg a2) 2155{ 2156 tcg_out_insn(s, 3507, RBIT, type, TCG_REG_TMP0, a1); 2157 tgen_clz(s, type, a0, TCG_REG_TMP0, a2); 2158} 2159 2160static void tgen_ctzi(TCGContext *s, TCGType type, 2161 TCGReg a0, TCGReg a1, tcg_target_long a2) 2162{ 2163 tcg_out_insn(s, 3507, RBIT, type, TCG_REG_TMP0, a1); 2164 tgen_clzi(s, type, a0, TCG_REG_TMP0, a2); 2165} 2166 2167static const TCGOutOpBinary outop_ctz = { 2168 .base.static_constraint = C_O1_I2(r, r, rAL), 2169 .out_rrr = tgen_ctz, 2170 .out_rri = tgen_ctzi, 2171}; 2172 2173static void tgen_divs(TCGContext *s, TCGType type, 2174 TCGReg a0, TCGReg a1, TCGReg a2) 2175{ 2176 tcg_out_insn(s, 3508, SDIV, type, a0, a1, a2); 2177} 2178 2179static const TCGOutOpBinary outop_divs = { 2180 .base.static_constraint = C_O1_I2(r, r, r), 2181 .out_rrr = tgen_divs, 2182}; 2183 2184static const TCGOutOpDivRem outop_divs2 = { 2185 .base.static_constraint = C_NotImplemented, 2186}; 2187 2188static void tgen_divu(TCGContext *s, TCGType type, 2189 TCGReg a0, TCGReg a1, TCGReg a2) 2190{ 2191 tcg_out_insn(s, 3508, UDIV, type, a0, a1, a2); 2192} 2193 2194static const TCGOutOpBinary outop_divu = { 2195 .base.static_constraint = C_O1_I2(r, r, r), 2196 .out_rrr = tgen_divu, 2197}; 2198 2199static const TCGOutOpDivRem outop_divu2 = { 2200 .base.static_constraint = C_NotImplemented, 2201}; 2202 2203static void tgen_eqv(TCGContext *s, TCGType type, 2204 TCGReg a0, TCGReg a1, TCGReg a2) 2205{ 2206 tcg_out_insn(s, 3510, EON, type, a0, a1, a2); 2207} 2208 2209static const TCGOutOpBinary outop_eqv = { 2210 .base.static_constraint = C_O1_I2(r, r, r), 2211 .out_rrr = tgen_eqv, 2212}; 2213 2214static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) 2215{ 2216 tcg_out_ubfm(s, TCG_TYPE_I64, a0, a1, 32, 63); 2217} 2218 2219static const TCGOutOpUnary outop_extrh_i64_i32 = { 2220 .base.static_constraint = C_O1_I1(r, r), 2221 .out_rr = tgen_extrh_i64_i32, 2222}; 2223 2224static void tgen_mul(TCGContext *s, TCGType type, 2225 TCGReg a0, TCGReg a1, TCGReg a2) 2226{ 2227 tcg_out_insn(s, 3509, MADD, type, a0, a1, a2, TCG_REG_XZR); 2228} 2229 2230static const TCGOutOpBinary outop_mul = { 2231 .base.static_constraint = C_O1_I2(r, r, r), 2232 .out_rrr = tgen_mul, 2233}; 2234 2235static const TCGOutOpMul2 outop_muls2 = { 2236 .base.static_constraint = C_NotImplemented, 2237}; 2238 2239static TCGConstraintSetIndex cset_mulh(TCGType type, unsigned flags) 2240{ 2241 return type == TCG_TYPE_I64 ? C_O1_I2(r, r, r) : C_NotImplemented; 2242} 2243 2244static void tgen_mulsh(TCGContext *s, TCGType type, 2245 TCGReg a0, TCGReg a1, TCGReg a2) 2246{ 2247 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); 2248} 2249 2250static const TCGOutOpBinary outop_mulsh = { 2251 .base.static_constraint = C_Dynamic, 2252 .base.dynamic_constraint = cset_mulh, 2253 .out_rrr = tgen_mulsh, 2254}; 2255 2256static const TCGOutOpMul2 outop_mulu2 = { 2257 .base.static_constraint = C_NotImplemented, 2258}; 2259 2260static void tgen_muluh(TCGContext *s, TCGType type, 2261 TCGReg a0, TCGReg a1, TCGReg a2) 2262{ 2263 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); 2264} 2265 2266static const TCGOutOpBinary outop_muluh = { 2267 .base.static_constraint = C_Dynamic, 2268 .base.dynamic_constraint = cset_mulh, 2269 .out_rrr = tgen_muluh, 2270}; 2271 2272static const TCGOutOpBinary outop_nand = { 2273 .base.static_constraint = C_NotImplemented, 2274}; 2275 2276static const TCGOutOpBinary outop_nor = { 2277 .base.static_constraint = C_NotImplemented, 2278}; 2279 2280static void tgen_or(TCGContext *s, TCGType type, 2281 TCGReg a0, TCGReg a1, TCGReg a2) 2282{ 2283 tcg_out_insn(s, 3510, ORR, type, a0, a1, a2); 2284} 2285 2286static void tgen_ori(TCGContext *s, TCGType type, 2287 TCGReg a0, TCGReg a1, tcg_target_long a2) 2288{ 2289 tcg_out_logicali(s, I3404_ORRI, type, a0, a1, a2); 2290} 2291 2292static const TCGOutOpBinary outop_or = { 2293 .base.static_constraint = C_O1_I2(r, r, rL), 2294 .out_rrr = tgen_or, 2295 .out_rri = tgen_ori, 2296}; 2297 2298static void tgen_orc(TCGContext *s, TCGType type, 2299 TCGReg a0, TCGReg a1, TCGReg a2) 2300{ 2301 tcg_out_insn(s, 3510, ORN, type, a0, a1, a2); 2302} 2303 2304static const TCGOutOpBinary outop_orc = { 2305 .base.static_constraint = C_O1_I2(r, r, r), 2306 .out_rrr = tgen_orc, 2307}; 2308 2309static void tgen_rems(TCGContext *s, TCGType type, 2310 TCGReg a0, TCGReg a1, TCGReg a2) 2311{ 2312 tcg_out_insn(s, 3508, SDIV, type, TCG_REG_TMP0, a1, a2); 2313 tcg_out_insn(s, 3509, MSUB, type, a0, TCG_REG_TMP0, a2, a1); 2314} 2315 2316static const TCGOutOpBinary outop_rems = { 2317 .base.static_constraint = C_O1_I2(r, r, r), 2318 .out_rrr = tgen_rems, 2319}; 2320 2321static void tgen_remu(TCGContext *s, TCGType type, 2322 TCGReg a0, TCGReg a1, TCGReg a2) 2323{ 2324 tcg_out_insn(s, 3508, UDIV, type, TCG_REG_TMP0, a1, a2); 2325 tcg_out_insn(s, 3509, MSUB, type, a0, TCG_REG_TMP0, a2, a1); 2326} 2327 2328static const TCGOutOpBinary outop_remu = { 2329 .base.static_constraint = C_O1_I2(r, r, r), 2330 .out_rrr = tgen_remu, 2331}; 2332 2333static const TCGOutOpBinary outop_rotl = { 2334 .base.static_constraint = C_NotImplemented, 2335}; 2336 2337static void tgen_rotr(TCGContext *s, TCGType type, 2338 TCGReg a0, TCGReg a1, TCGReg a2) 2339{ 2340 tcg_out_insn(s, 3508, RORV, type, a0, a1, a2); 2341} 2342 2343static void tgen_rotri(TCGContext *s, TCGType type, 2344 TCGReg a0, TCGReg a1, tcg_target_long a2) 2345{ 2346 int max = type == TCG_TYPE_I32 ? 31 : 63; 2347 tcg_out_extr(s, type, a0, a1, a1, a2 & max); 2348} 2349 2350static const TCGOutOpBinary outop_rotr = { 2351 .base.static_constraint = C_O1_I2(r, r, ri), 2352 .out_rrr = tgen_rotr, 2353 .out_rri = tgen_rotri, 2354}; 2355 2356static void tgen_sar(TCGContext *s, TCGType type, 2357 TCGReg a0, TCGReg a1, TCGReg a2) 2358{ 2359 tcg_out_insn(s, 3508, ASRV, type, a0, a1, a2); 2360} 2361 2362static void tgen_sari(TCGContext *s, TCGType type, 2363 TCGReg a0, TCGReg a1, tcg_target_long a2) 2364{ 2365 int max = type == TCG_TYPE_I32 ? 31 : 63; 2366 tcg_out_sbfm(s, type, a0, a1, a2 & max, max); 2367} 2368 2369static const TCGOutOpBinary outop_sar = { 2370 .base.static_constraint = C_O1_I2(r, r, ri), 2371 .out_rrr = tgen_sar, 2372 .out_rri = tgen_sari, 2373}; 2374 2375static void tgen_shl(TCGContext *s, TCGType type, 2376 TCGReg a0, TCGReg a1, TCGReg a2) 2377{ 2378 tcg_out_insn(s, 3508, LSLV, type, a0, a1, a2); 2379} 2380 2381static void tgen_shli(TCGContext *s, TCGType type, 2382 TCGReg a0, TCGReg a1, tcg_target_long a2) 2383{ 2384 int max = type == TCG_TYPE_I32 ? 31 : 63; 2385 tcg_out_ubfm(s, type, a0, a1, -a2 & max, ~a2 & max); 2386} 2387 2388static const TCGOutOpBinary outop_shl = { 2389 .base.static_constraint = C_O1_I2(r, r, ri), 2390 .out_rrr = tgen_shl, 2391 .out_rri = tgen_shli, 2392}; 2393 2394static void tgen_shr(TCGContext *s, TCGType type, 2395 TCGReg a0, TCGReg a1, TCGReg a2) 2396{ 2397 tcg_out_insn(s, 3508, LSRV, type, a0, a1, a2); 2398} 2399 2400static void tgen_shri(TCGContext *s, TCGType type, 2401 TCGReg a0, TCGReg a1, tcg_target_long a2) 2402{ 2403 int max = type == TCG_TYPE_I32 ? 31 : 63; 2404 tcg_out_ubfm(s, type, a0, a1, a2 & max, max); 2405} 2406 2407static const TCGOutOpBinary outop_shr = { 2408 .base.static_constraint = C_O1_I2(r, r, ri), 2409 .out_rrr = tgen_shr, 2410 .out_rri = tgen_shri, 2411}; 2412 2413static void tgen_sub(TCGContext *s, TCGType type, 2414 TCGReg a0, TCGReg a1, TCGReg a2) 2415{ 2416 tcg_out_insn(s, 3502, SUB, type, a0, a1, a2); 2417} 2418 2419static const TCGOutOpSubtract outop_sub = { 2420 .base.static_constraint = C_O1_I2(r, r, r), 2421 .out_rrr = tgen_sub, 2422}; 2423 2424static void tgen_xor(TCGContext *s, TCGType type, 2425 TCGReg a0, TCGReg a1, TCGReg a2) 2426{ 2427 tcg_out_insn(s, 3510, EOR, type, a0, a1, a2); 2428} 2429 2430static void tgen_xori(TCGContext *s, TCGType type, 2431 TCGReg a0, TCGReg a1, tcg_target_long a2) 2432{ 2433 tcg_out_logicali(s, I3404_EORI, type, a0, a1, a2); 2434} 2435 2436static const TCGOutOpBinary outop_xor = { 2437 .base.static_constraint = C_O1_I2(r, r, rL), 2438 .out_rrr = tgen_xor, 2439 .out_rri = tgen_xori, 2440}; 2441 2442static void tgen_bswap16(TCGContext *s, TCGType type, 2443 TCGReg a0, TCGReg a1, unsigned flags) 2444{ 2445 tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1); 2446 if (flags & TCG_BSWAP_OS) { 2447 /* Output must be sign-extended. */ 2448 tcg_out_ext16s(s, type, a0, a0); 2449 } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 2450 /* Output must be zero-extended, but input isn't. */ 2451 tcg_out_ext16u(s, a0, a0); 2452 } 2453} 2454 2455static const TCGOutOpBswap outop_bswap16 = { 2456 .base.static_constraint = C_O1_I1(r, r), 2457 .out_rr = tgen_bswap16, 2458}; 2459 2460static void tgen_bswap32(TCGContext *s, TCGType type, 2461 TCGReg a0, TCGReg a1, unsigned flags) 2462{ 2463 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2464 if (flags & TCG_BSWAP_OS) { 2465 tcg_out_ext32s(s, a0, a0); 2466 } 2467} 2468 2469static const TCGOutOpBswap outop_bswap32 = { 2470 .base.static_constraint = C_O1_I1(r, r), 2471 .out_rr = tgen_bswap32, 2472}; 2473 2474static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) 2475{ 2476 tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1); 2477} 2478 2479static const TCGOutOpUnary outop_bswap64 = { 2480 .base.static_constraint = C_O1_I1(r, r), 2481 .out_rr = tgen_bswap64, 2482}; 2483 2484static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) 2485{ 2486 tgen_sub(s, type, a0, TCG_REG_XZR, a1); 2487} 2488 2489static const TCGOutOpUnary outop_neg = { 2490 .base.static_constraint = C_O1_I1(r, r), 2491 .out_rr = tgen_neg, 2492}; 2493 2494static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) 2495{ 2496 tgen_orc(s, type, a0, TCG_REG_XZR, a1); 2497} 2498 2499static const TCGOutOpUnary outop_not = { 2500 .base.static_constraint = C_O1_I1(r, r), 2501 .out_rr = tgen_not, 2502}; 2503 2504static void tgen_cset(TCGContext *s, TCGCond cond, TCGReg ret) 2505{ 2506 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ 2507 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, ret, TCG_REG_XZR, 2508 TCG_REG_XZR, tcg_invert_cond(cond)); 2509} 2510 2511static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, 2512 TCGReg a0, TCGReg a1, TCGReg a2) 2513{ 2514 tgen_cmp(s, type, cond, a1, a2); 2515 tgen_cset(s, cond, a0); 2516} 2517 2518static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond, 2519 TCGReg a0, TCGReg a1, tcg_target_long a2) 2520{ 2521 tgen_cmpi(s, type, cond, a1, a2); 2522 tgen_cset(s, cond, a0); 2523} 2524 2525static const TCGOutOpSetcond outop_setcond = { 2526 .base.static_constraint = C_O1_I2(r, r, rC), 2527 .out_rrr = tgen_setcond, 2528 .out_rri = tgen_setcondi, 2529}; 2530 2531static void tgen_csetm(TCGContext *s, TCGType ext, TCGCond cond, TCGReg ret) 2532{ 2533 /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond). */ 2534 tcg_out_insn(s, 3506, CSINV, ext, ret, TCG_REG_XZR, 2535 TCG_REG_XZR, tcg_invert_cond(cond)); 2536} 2537 2538static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, 2539 TCGReg a0, TCGReg a1, TCGReg a2) 2540{ 2541 tgen_cmp(s, type, cond, a1, a2); 2542 tgen_csetm(s, type, cond, a0); 2543} 2544 2545static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond, 2546 TCGReg a0, TCGReg a1, tcg_target_long a2) 2547{ 2548 tgen_cmpi(s, type, cond, a1, a2); 2549 tgen_csetm(s, type, cond, a0); 2550} 2551 2552static const TCGOutOpSetcond outop_negsetcond = { 2553 .base.static_constraint = C_O1_I2(r, r, rC), 2554 .out_rrr = tgen_negsetcond, 2555 .out_rri = tgen_negsetcondi, 2556}; 2557 2558static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, 2559 TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2, 2560 TCGArg vt, bool const_vt, TCGArg vf, bool const_vf) 2561{ 2562 tcg_out_cmp(s, type, cond, c1, c2, const_c2); 2563 tcg_out_insn(s, 3506, CSEL, type, ret, vt, vf, cond); 2564} 2565 2566static const TCGOutOpMovcond outop_movcond = { 2567 .base.static_constraint = C_O1_I4(r, r, rC, rz, rz), 2568 .out = tgen_movcond, 2569}; 2570 2571static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 2572 TCGReg a2, unsigned ofs, unsigned len) 2573{ 2574 unsigned mask = type == TCG_TYPE_I32 ? 31 : 63; 2575 2576 /* 2577 * Since we can't support "0Z" as a constraint, we allow a1 in 2578 * any register. Fix things up as if a matching constraint. 2579 */ 2580 if (a0 != a1) { 2581 if (a0 == a2) { 2582 tcg_out_mov(s, type, TCG_REG_TMP0, a2); 2583 a2 = TCG_REG_TMP0; 2584 } 2585 tcg_out_mov(s, type, a0, a1); 2586 } 2587 tcg_out_bfm(s, type, a0, a2, -ofs & mask, len - 1); 2588} 2589 2590static void tgen_depositi(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 2591 tcg_target_long a2, unsigned ofs, unsigned len) 2592{ 2593 tgen_andi(s, type, a0, a1, ~MAKE_64BIT_MASK(ofs, len)); 2594} 2595 2596static void tgen_depositz(TCGContext *s, TCGType type, TCGReg a0, TCGReg a2, 2597 unsigned ofs, unsigned len) 2598{ 2599 int max = type == TCG_TYPE_I32 ? 31 : 63; 2600 tcg_out_ubfm(s, type, a0, a2, -ofs & max, len - 1); 2601} 2602 2603static const TCGOutOpDeposit outop_deposit = { 2604 .base.static_constraint = C_O1_I2(r, rZ, rZ), 2605 .out_rrr = tgen_deposit, 2606 .out_rri = tgen_depositi, 2607 .out_rzr = tgen_depositz, 2608}; 2609 2610static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 2611 unsigned ofs, unsigned len) 2612{ 2613 if (ofs == 0) { 2614 uint64_t mask = MAKE_64BIT_MASK(0, len); 2615 tcg_out_logicali(s, I3404_ANDI, type, a0, a1, mask); 2616 } else { 2617 tcg_out_ubfm(s, type, a0, a1, ofs, ofs + len - 1); 2618 } 2619} 2620 2621static const TCGOutOpExtract outop_extract = { 2622 .base.static_constraint = C_O1_I1(r, r), 2623 .out_rr = tgen_extract, 2624}; 2625 2626static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 2627 unsigned ofs, unsigned len) 2628{ 2629 tcg_out_sbfm(s, type, a0, a1, ofs, ofs + len - 1); 2630} 2631 2632static const TCGOutOpExtract outop_sextract = { 2633 .base.static_constraint = C_O1_I1(r, r), 2634 .out_rr = tgen_sextract, 2635}; 2636 2637static void tgen_extract2(TCGContext *s, TCGType type, TCGReg a0, 2638 TCGReg a1, TCGReg a2, unsigned shr) 2639{ 2640 tcg_out_extr(s, type, a0, a2, a1, shr); 2641} 2642 2643static const TCGOutOpExtract2 outop_extract2 = { 2644 .base.static_constraint = C_O1_I2(r, rz, rz), 2645 .out_rrr = tgen_extract2, 2646}; 2647 2648static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, 2649 const TCGArg args[TCG_MAX_OP_ARGS], 2650 const int const_args[TCG_MAX_OP_ARGS]) 2651{ 2652 /* Hoist the loads of the most common arguments. */ 2653 TCGArg a0 = args[0]; 2654 TCGArg a1 = args[1]; 2655 TCGArg a2 = args[2]; 2656 2657 switch (opc) { 2658 case INDEX_op_goto_ptr: 2659 tcg_out_insn(s, 3207, BR, a0); 2660 break; 2661 2662 case INDEX_op_br: 2663 tcg_out_goto_label(s, arg_label(a0)); 2664 break; 2665 2666 case INDEX_op_ld8u_i32: 2667 case INDEX_op_ld8u_i64: 2668 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0); 2669 break; 2670 case INDEX_op_ld8s_i32: 2671 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0); 2672 break; 2673 case INDEX_op_ld8s_i64: 2674 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0); 2675 break; 2676 case INDEX_op_ld16u_i32: 2677 case INDEX_op_ld16u_i64: 2678 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1); 2679 break; 2680 case INDEX_op_ld16s_i32: 2681 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1); 2682 break; 2683 case INDEX_op_ld16s_i64: 2684 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1); 2685 break; 2686 case INDEX_op_ld_i32: 2687 case INDEX_op_ld32u_i64: 2688 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2); 2689 break; 2690 case INDEX_op_ld32s_i64: 2691 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2); 2692 break; 2693 case INDEX_op_ld_i64: 2694 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3); 2695 break; 2696 2697 case INDEX_op_st8_i32: 2698 case INDEX_op_st8_i64: 2699 tcg_out_ldst(s, I3312_STRB, a0, a1, a2, 0); 2700 break; 2701 case INDEX_op_st16_i32: 2702 case INDEX_op_st16_i64: 2703 tcg_out_ldst(s, I3312_STRH, a0, a1, a2, 1); 2704 break; 2705 case INDEX_op_st_i32: 2706 case INDEX_op_st32_i64: 2707 tcg_out_ldst(s, I3312_STRW, a0, a1, a2, 2); 2708 break; 2709 case INDEX_op_st_i64: 2710 tcg_out_ldst(s, I3312_STRX, a0, a1, a2, 3); 2711 break; 2712 2713 case INDEX_op_qemu_ld_i32: 2714 case INDEX_op_qemu_ld_i64: 2715 tcg_out_qemu_ld(s, a0, a1, a2, ext); 2716 break; 2717 case INDEX_op_qemu_st_i32: 2718 case INDEX_op_qemu_st_i64: 2719 tcg_out_qemu_st(s, a0, a1, a2, ext); 2720 break; 2721 case INDEX_op_qemu_ld_i128: 2722 tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true); 2723 break; 2724 case INDEX_op_qemu_st_i128: 2725 tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], false); 2726 break; 2727 2728 case INDEX_op_add2_i32: 2729 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, a2, args[3], 2730 (int32_t)args[4], args[5], const_args[4], 2731 const_args[5], false); 2732 break; 2733 case INDEX_op_add2_i64: 2734 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, a2, args[3], args[4], 2735 args[5], const_args[4], const_args[5], false); 2736 break; 2737 case INDEX_op_sub2_i32: 2738 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, a2, args[3], 2739 (int32_t)args[4], args[5], const_args[4], 2740 const_args[5], true); 2741 break; 2742 case INDEX_op_sub2_i64: 2743 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, a2, args[3], args[4], 2744 args[5], const_args[4], const_args[5], true); 2745 break; 2746 2747 case INDEX_op_mb: 2748 tcg_out_mb(s, a0); 2749 break; 2750 2751 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 2752 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ 2753 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ 2754 default: 2755 g_assert_not_reached(); 2756 } 2757} 2758 2759static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 2760 unsigned vecl, unsigned vece, 2761 const TCGArg args[TCG_MAX_OP_ARGS], 2762 const int const_args[TCG_MAX_OP_ARGS]) 2763{ 2764 static const AArch64Insn cmp_vec_insn[16] = { 2765 [TCG_COND_EQ] = I3616_CMEQ, 2766 [TCG_COND_GT] = I3616_CMGT, 2767 [TCG_COND_GE] = I3616_CMGE, 2768 [TCG_COND_GTU] = I3616_CMHI, 2769 [TCG_COND_GEU] = I3616_CMHS, 2770 }; 2771 static const AArch64Insn cmp_scalar_insn[16] = { 2772 [TCG_COND_EQ] = I3611_CMEQ, 2773 [TCG_COND_GT] = I3611_CMGT, 2774 [TCG_COND_GE] = I3611_CMGE, 2775 [TCG_COND_GTU] = I3611_CMHI, 2776 [TCG_COND_GEU] = I3611_CMHS, 2777 }; 2778 static const AArch64Insn cmp0_vec_insn[16] = { 2779 [TCG_COND_EQ] = I3617_CMEQ0, 2780 [TCG_COND_GT] = I3617_CMGT0, 2781 [TCG_COND_GE] = I3617_CMGE0, 2782 [TCG_COND_LT] = I3617_CMLT0, 2783 [TCG_COND_LE] = I3617_CMLE0, 2784 }; 2785 static const AArch64Insn cmp0_scalar_insn[16] = { 2786 [TCG_COND_EQ] = I3612_CMEQ0, 2787 [TCG_COND_GT] = I3612_CMGT0, 2788 [TCG_COND_GE] = I3612_CMGE0, 2789 [TCG_COND_LT] = I3612_CMLT0, 2790 [TCG_COND_LE] = I3612_CMLE0, 2791 }; 2792 2793 TCGType type = vecl + TCG_TYPE_V64; 2794 unsigned is_q = vecl; 2795 bool is_scalar = !is_q && vece == MO_64; 2796 TCGArg a0, a1, a2, a3; 2797 int cmode, imm8; 2798 2799 a0 = args[0]; 2800 a1 = args[1]; 2801 a2 = args[2]; 2802 2803 switch (opc) { 2804 case INDEX_op_ld_vec: 2805 tcg_out_ld(s, type, a0, a1, a2); 2806 break; 2807 case INDEX_op_st_vec: 2808 tcg_out_st(s, type, a0, a1, a2); 2809 break; 2810 case INDEX_op_dupm_vec: 2811 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 2812 break; 2813 case INDEX_op_add_vec: 2814 if (is_scalar) { 2815 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2); 2816 } else { 2817 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2); 2818 } 2819 break; 2820 case INDEX_op_sub_vec: 2821 if (is_scalar) { 2822 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2); 2823 } else { 2824 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2); 2825 } 2826 break; 2827 case INDEX_op_mul_vec: 2828 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2); 2829 break; 2830 case INDEX_op_neg_vec: 2831 if (is_scalar) { 2832 tcg_out_insn(s, 3612, NEG, vece, a0, a1); 2833 } else { 2834 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1); 2835 } 2836 break; 2837 case INDEX_op_abs_vec: 2838 if (is_scalar) { 2839 tcg_out_insn(s, 3612, ABS, vece, a0, a1); 2840 } else { 2841 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1); 2842 } 2843 break; 2844 case INDEX_op_and_vec: 2845 if (const_args[2]) { 2846 is_shimm1632(~a2, &cmode, &imm8); 2847 if (a0 == a1) { 2848 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2849 return; 2850 } 2851 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2852 a2 = a0; 2853 } 2854 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2); 2855 break; 2856 case INDEX_op_or_vec: 2857 if (const_args[2]) { 2858 is_shimm1632(a2, &cmode, &imm8); 2859 if (a0 == a1) { 2860 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2861 return; 2862 } 2863 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2864 a2 = a0; 2865 } 2866 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2); 2867 break; 2868 case INDEX_op_andc_vec: 2869 if (const_args[2]) { 2870 is_shimm1632(a2, &cmode, &imm8); 2871 if (a0 == a1) { 2872 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2873 return; 2874 } 2875 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2876 a2 = a0; 2877 } 2878 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2); 2879 break; 2880 case INDEX_op_orc_vec: 2881 if (const_args[2]) { 2882 is_shimm1632(~a2, &cmode, &imm8); 2883 if (a0 == a1) { 2884 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2885 return; 2886 } 2887 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2888 a2 = a0; 2889 } 2890 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2); 2891 break; 2892 case INDEX_op_xor_vec: 2893 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2); 2894 break; 2895 case INDEX_op_ssadd_vec: 2896 if (is_scalar) { 2897 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2); 2898 } else { 2899 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2); 2900 } 2901 break; 2902 case INDEX_op_sssub_vec: 2903 if (is_scalar) { 2904 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2); 2905 } else { 2906 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2); 2907 } 2908 break; 2909 case INDEX_op_usadd_vec: 2910 if (is_scalar) { 2911 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2); 2912 } else { 2913 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2); 2914 } 2915 break; 2916 case INDEX_op_ussub_vec: 2917 if (is_scalar) { 2918 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2); 2919 } else { 2920 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2); 2921 } 2922 break; 2923 case INDEX_op_smax_vec: 2924 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2); 2925 break; 2926 case INDEX_op_smin_vec: 2927 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2); 2928 break; 2929 case INDEX_op_umax_vec: 2930 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2); 2931 break; 2932 case INDEX_op_umin_vec: 2933 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2); 2934 break; 2935 case INDEX_op_not_vec: 2936 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1); 2937 break; 2938 case INDEX_op_shli_vec: 2939 if (is_scalar) { 2940 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece)); 2941 } else { 2942 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece)); 2943 } 2944 break; 2945 case INDEX_op_shri_vec: 2946 if (is_scalar) { 2947 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2); 2948 } else { 2949 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2); 2950 } 2951 break; 2952 case INDEX_op_sari_vec: 2953 if (is_scalar) { 2954 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2); 2955 } else { 2956 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); 2957 } 2958 break; 2959 case INDEX_op_aa64_sli_vec: 2960 if (is_scalar) { 2961 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece)); 2962 } else { 2963 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece)); 2964 } 2965 break; 2966 case INDEX_op_shlv_vec: 2967 if (is_scalar) { 2968 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2); 2969 } else { 2970 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); 2971 } 2972 break; 2973 case INDEX_op_aa64_sshl_vec: 2974 if (is_scalar) { 2975 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2); 2976 } else { 2977 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2); 2978 } 2979 break; 2980 case INDEX_op_cmp_vec: 2981 { 2982 TCGCond cond = args[3]; 2983 AArch64Insn insn; 2984 2985 switch (cond) { 2986 case TCG_COND_NE: 2987 if (const_args[2]) { 2988 if (is_scalar) { 2989 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1); 2990 } else { 2991 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1); 2992 } 2993 } else { 2994 if (is_scalar) { 2995 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2); 2996 } else { 2997 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2); 2998 } 2999 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); 3000 } 3001 break; 3002 3003 case TCG_COND_TSTNE: 3004 case TCG_COND_TSTEQ: 3005 if (const_args[2]) { 3006 /* (x & 0) == 0 */ 3007 tcg_out_dupi_vec(s, type, MO_8, a0, 3008 -(cond == TCG_COND_TSTEQ)); 3009 break; 3010 } 3011 if (is_scalar) { 3012 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a2); 3013 } else { 3014 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a2); 3015 } 3016 if (cond == TCG_COND_TSTEQ) { 3017 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); 3018 } 3019 break; 3020 3021 default: 3022 if (const_args[2]) { 3023 if (is_scalar) { 3024 insn = cmp0_scalar_insn[cond]; 3025 if (insn) { 3026 tcg_out_insn_3612(s, insn, vece, a0, a1); 3027 break; 3028 } 3029 } else { 3030 insn = cmp0_vec_insn[cond]; 3031 if (insn) { 3032 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1); 3033 break; 3034 } 3035 } 3036 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0); 3037 a2 = TCG_VEC_TMP0; 3038 } 3039 if (is_scalar) { 3040 insn = cmp_scalar_insn[cond]; 3041 if (insn == 0) { 3042 TCGArg t; 3043 t = a1, a1 = a2, a2 = t; 3044 cond = tcg_swap_cond(cond); 3045 insn = cmp_scalar_insn[cond]; 3046 tcg_debug_assert(insn != 0); 3047 } 3048 tcg_out_insn_3611(s, insn, vece, a0, a1, a2); 3049 } else { 3050 insn = cmp_vec_insn[cond]; 3051 if (insn == 0) { 3052 TCGArg t; 3053 t = a1, a1 = a2, a2 = t; 3054 cond = tcg_swap_cond(cond); 3055 insn = cmp_vec_insn[cond]; 3056 tcg_debug_assert(insn != 0); 3057 } 3058 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2); 3059 } 3060 break; 3061 } 3062 } 3063 break; 3064 3065 case INDEX_op_bitsel_vec: 3066 a3 = args[3]; 3067 if (a0 == a3) { 3068 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1); 3069 } else if (a0 == a2) { 3070 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1); 3071 } else { 3072 if (a0 != a1) { 3073 tcg_out_mov(s, type, a0, a1); 3074 } 3075 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3); 3076 } 3077 break; 3078 3079 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 3080 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 3081 default: 3082 g_assert_not_reached(); 3083 } 3084} 3085 3086int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 3087{ 3088 switch (opc) { 3089 case INDEX_op_add_vec: 3090 case INDEX_op_sub_vec: 3091 case INDEX_op_and_vec: 3092 case INDEX_op_or_vec: 3093 case INDEX_op_xor_vec: 3094 case INDEX_op_andc_vec: 3095 case INDEX_op_orc_vec: 3096 case INDEX_op_neg_vec: 3097 case INDEX_op_abs_vec: 3098 case INDEX_op_not_vec: 3099 case INDEX_op_cmp_vec: 3100 case INDEX_op_shli_vec: 3101 case INDEX_op_shri_vec: 3102 case INDEX_op_sari_vec: 3103 case INDEX_op_ssadd_vec: 3104 case INDEX_op_sssub_vec: 3105 case INDEX_op_usadd_vec: 3106 case INDEX_op_ussub_vec: 3107 case INDEX_op_shlv_vec: 3108 case INDEX_op_bitsel_vec: 3109 return 1; 3110 case INDEX_op_rotli_vec: 3111 case INDEX_op_shrv_vec: 3112 case INDEX_op_sarv_vec: 3113 case INDEX_op_rotlv_vec: 3114 case INDEX_op_rotrv_vec: 3115 return -1; 3116 case INDEX_op_mul_vec: 3117 case INDEX_op_smax_vec: 3118 case INDEX_op_smin_vec: 3119 case INDEX_op_umax_vec: 3120 case INDEX_op_umin_vec: 3121 return vece < MO_64; 3122 3123 default: 3124 return 0; 3125 } 3126} 3127 3128void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 3129 TCGArg a0, ...) 3130{ 3131 va_list va; 3132 TCGv_vec v0, v1, v2, t1, t2, c1; 3133 TCGArg a2; 3134 3135 va_start(va, a0); 3136 v0 = temp_tcgv_vec(arg_temp(a0)); 3137 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 3138 a2 = va_arg(va, TCGArg); 3139 va_end(va); 3140 3141 switch (opc) { 3142 case INDEX_op_rotli_vec: 3143 t1 = tcg_temp_new_vec(type); 3144 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1)); 3145 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece, 3146 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2); 3147 tcg_temp_free_vec(t1); 3148 break; 3149 3150 case INDEX_op_shrv_vec: 3151 case INDEX_op_sarv_vec: 3152 /* Right shifts are negative left shifts for AArch64. */ 3153 v2 = temp_tcgv_vec(arg_temp(a2)); 3154 t1 = tcg_temp_new_vec(type); 3155 tcg_gen_neg_vec(vece, t1, v2); 3156 opc = (opc == INDEX_op_shrv_vec 3157 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec); 3158 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0), 3159 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 3160 tcg_temp_free_vec(t1); 3161 break; 3162 3163 case INDEX_op_rotlv_vec: 3164 v2 = temp_tcgv_vec(arg_temp(a2)); 3165 t1 = tcg_temp_new_vec(type); 3166 c1 = tcg_constant_vec(type, vece, 8 << vece); 3167 tcg_gen_sub_vec(vece, t1, v2, c1); 3168 /* Right shifts are negative left shifts for AArch64. */ 3169 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 3170 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 3171 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0), 3172 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 3173 tcg_gen_or_vec(vece, v0, v0, t1); 3174 tcg_temp_free_vec(t1); 3175 break; 3176 3177 case INDEX_op_rotrv_vec: 3178 v2 = temp_tcgv_vec(arg_temp(a2)); 3179 t1 = tcg_temp_new_vec(type); 3180 t2 = tcg_temp_new_vec(type); 3181 c1 = tcg_constant_vec(type, vece, 8 << vece); 3182 tcg_gen_neg_vec(vece, t1, v2); 3183 tcg_gen_sub_vec(vece, t2, c1, v2); 3184 /* Right shifts are negative left shifts for AArch64. */ 3185 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 3186 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 3187 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2), 3188 tcgv_vec_arg(v1), tcgv_vec_arg(t2)); 3189 tcg_gen_or_vec(vece, v0, t1, t2); 3190 tcg_temp_free_vec(t1); 3191 tcg_temp_free_vec(t2); 3192 break; 3193 3194 default: 3195 g_assert_not_reached(); 3196 } 3197} 3198 3199static TCGConstraintSetIndex 3200tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) 3201{ 3202 switch (op) { 3203 case INDEX_op_goto_ptr: 3204 return C_O0_I1(r); 3205 3206 case INDEX_op_ld8u_i32: 3207 case INDEX_op_ld8s_i32: 3208 case INDEX_op_ld16u_i32: 3209 case INDEX_op_ld16s_i32: 3210 case INDEX_op_ld_i32: 3211 case INDEX_op_ld8u_i64: 3212 case INDEX_op_ld8s_i64: 3213 case INDEX_op_ld16u_i64: 3214 case INDEX_op_ld16s_i64: 3215 case INDEX_op_ld32u_i64: 3216 case INDEX_op_ld32s_i64: 3217 case INDEX_op_ld_i64: 3218 return C_O1_I1(r, r); 3219 3220 case INDEX_op_st8_i32: 3221 case INDEX_op_st16_i32: 3222 case INDEX_op_st_i32: 3223 case INDEX_op_st8_i64: 3224 case INDEX_op_st16_i64: 3225 case INDEX_op_st32_i64: 3226 case INDEX_op_st_i64: 3227 return C_O0_I2(rz, r); 3228 3229 case INDEX_op_qemu_ld_i32: 3230 case INDEX_op_qemu_ld_i64: 3231 return C_O1_I1(r, r); 3232 case INDEX_op_qemu_ld_i128: 3233 return C_O2_I1(r, r, r); 3234 case INDEX_op_qemu_st_i32: 3235 case INDEX_op_qemu_st_i64: 3236 return C_O0_I2(rz, r); 3237 case INDEX_op_qemu_st_i128: 3238 return C_O0_I3(rz, rz, r); 3239 3240 case INDEX_op_add2_i32: 3241 case INDEX_op_add2_i64: 3242 case INDEX_op_sub2_i32: 3243 case INDEX_op_sub2_i64: 3244 return C_O2_I4(r, r, rz, rz, rA, rMZ); 3245 3246 case INDEX_op_add_vec: 3247 case INDEX_op_sub_vec: 3248 case INDEX_op_mul_vec: 3249 case INDEX_op_xor_vec: 3250 case INDEX_op_ssadd_vec: 3251 case INDEX_op_sssub_vec: 3252 case INDEX_op_usadd_vec: 3253 case INDEX_op_ussub_vec: 3254 case INDEX_op_smax_vec: 3255 case INDEX_op_smin_vec: 3256 case INDEX_op_umax_vec: 3257 case INDEX_op_umin_vec: 3258 case INDEX_op_shlv_vec: 3259 case INDEX_op_shrv_vec: 3260 case INDEX_op_sarv_vec: 3261 case INDEX_op_aa64_sshl_vec: 3262 return C_O1_I2(w, w, w); 3263 case INDEX_op_not_vec: 3264 case INDEX_op_neg_vec: 3265 case INDEX_op_abs_vec: 3266 case INDEX_op_shli_vec: 3267 case INDEX_op_shri_vec: 3268 case INDEX_op_sari_vec: 3269 return C_O1_I1(w, w); 3270 case INDEX_op_ld_vec: 3271 case INDEX_op_dupm_vec: 3272 return C_O1_I1(w, r); 3273 case INDEX_op_st_vec: 3274 return C_O0_I2(w, r); 3275 case INDEX_op_dup_vec: 3276 return C_O1_I1(w, wr); 3277 case INDEX_op_or_vec: 3278 case INDEX_op_andc_vec: 3279 return C_O1_I2(w, w, wO); 3280 case INDEX_op_and_vec: 3281 case INDEX_op_orc_vec: 3282 return C_O1_I2(w, w, wN); 3283 case INDEX_op_cmp_vec: 3284 return C_O1_I2(w, w, wZ); 3285 case INDEX_op_bitsel_vec: 3286 return C_O1_I3(w, w, w, w); 3287 case INDEX_op_aa64_sli_vec: 3288 return C_O1_I2(w, 0, w); 3289 3290 default: 3291 return C_NotImplemented; 3292 } 3293} 3294 3295static void tcg_target_init(TCGContext *s) 3296{ 3297 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu; 3298 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu; 3299 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 3300 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 3301 3302 tcg_target_call_clobber_regs = -1ull; 3303 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19); 3304 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20); 3305 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21); 3306 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22); 3307 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23); 3308 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24); 3309 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25); 3310 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26); 3311 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27); 3312 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28); 3313 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29); 3314 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 3315 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 3316 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 3317 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 3318 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 3319 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 3320 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 3321 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 3322 3323 s->reserved_regs = 0; 3324 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 3325 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); 3326 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ 3327 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0); 3328 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); 3329 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); 3330 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0); 3331} 3332 3333/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ 3334#define PUSH_SIZE ((30 - 19 + 1) * 8) 3335 3336#define FRAME_SIZE \ 3337 ((PUSH_SIZE \ 3338 + TCG_STATIC_CALL_ARGS_SIZE \ 3339 + CPU_TEMP_BUF_NLONGS * sizeof(long) \ 3340 + TCG_TARGET_STACK_ALIGN - 1) \ 3341 & ~(TCG_TARGET_STACK_ALIGN - 1)) 3342 3343/* We're expecting a 2 byte uleb128 encoded value. */ 3344QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 3345 3346/* We're expecting to use a single ADDI insn. */ 3347QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); 3348 3349static void tcg_target_qemu_prologue(TCGContext *s) 3350{ 3351 TCGReg r; 3352 3353 tcg_out_bti(s, BTI_C); 3354 3355 /* Push (FP, LR) and allocate space for all saved registers. */ 3356 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, 3357 TCG_REG_SP, -PUSH_SIZE, 1, 1); 3358 3359 /* Set up frame pointer for canonical unwinding. */ 3360 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); 3361 3362 /* Store callee-preserved regs x19..x28. */ 3363 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 3364 int ofs = (r - TCG_REG_X19 + 2) * 8; 3365 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 3366 } 3367 3368 /* Make stack space for TCG locals. */ 3369 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 3370 FRAME_SIZE - PUSH_SIZE); 3371 3372 /* Inform TCG about how to find TCG locals with register, offset, size. */ 3373 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, 3374 CPU_TEMP_BUF_NLONGS * sizeof(long)); 3375 3376 if (!tcg_use_softmmu) { 3377 /* 3378 * Note that XZR cannot be encoded in the address base register slot, 3379 * as that actually encodes SP. Depending on the guest, we may need 3380 * to zero-extend the guest address via the address index register slot, 3381 * therefore we need to load even a zero guest base into a register. 3382 */ 3383 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); 3384 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); 3385 } 3386 3387 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 3388 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); 3389 3390 /* 3391 * Return path for goto_ptr. Set return value to 0, a-la exit_tb, 3392 * and fall through to the rest of the epilogue. 3393 */ 3394 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 3395 tcg_out_bti(s, BTI_J); 3396 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0); 3397 3398 /* TB epilogue */ 3399 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); 3400 tcg_out_bti(s, BTI_J); 3401 3402 /* Remove TCG locals stack space. */ 3403 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 3404 FRAME_SIZE - PUSH_SIZE); 3405 3406 /* Restore registers x19..x28. */ 3407 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 3408 int ofs = (r - TCG_REG_X19 + 2) * 8; 3409 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 3410 } 3411 3412 /* Pop (FP, LR), restore SP to previous frame. */ 3413 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, 3414 TCG_REG_SP, PUSH_SIZE, 0, 1); 3415 tcg_out_insn(s, 3207, RET, TCG_REG_LR); 3416} 3417 3418static void tcg_out_tb_start(TCGContext *s) 3419{ 3420 tcg_out_bti(s, BTI_J); 3421} 3422 3423static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 3424{ 3425 int i; 3426 for (i = 0; i < count; ++i) { 3427 p[i] = NOP; 3428 } 3429} 3430 3431typedef struct { 3432 DebugFrameHeader h; 3433 uint8_t fde_def_cfa[4]; 3434 uint8_t fde_reg_ofs[24]; 3435} DebugFrame; 3436 3437#define ELF_HOST_MACHINE EM_AARCH64 3438 3439static const DebugFrame debug_frame = { 3440 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 3441 .h.cie.id = -1, 3442 .h.cie.version = 1, 3443 .h.cie.code_align = 1, 3444 .h.cie.data_align = 0x78, /* sleb128 -8 */ 3445 .h.cie.return_column = TCG_REG_LR, 3446 3447 /* Total FDE size does not include the "len" member. */ 3448 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 3449 3450 .fde_def_cfa = { 3451 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 3452 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 3453 (FRAME_SIZE >> 7) 3454 }, 3455 .fde_reg_ofs = { 3456 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ 3457 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ 3458 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ 3459 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ 3460 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ 3461 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ 3462 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ 3463 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ 3464 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ 3465 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ 3466 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ 3467 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ 3468 } 3469}; 3470 3471void tcg_register_jit(const void *buf, size_t buf_size) 3472{ 3473 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 3474} 3475