1/* 2 * Initial TCG Implementation for aarch64 3 * 4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH 5 * Written by Claudio Fontana 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or 8 * (at your option) any later version. 9 * 10 * See the COPYING file in the top-level directory for details. 11 */ 12 13#include "qemu/bitops.h" 14 15/* Used for function call generation. */ 16#define TCG_REG_CALL_STACK TCG_REG_SP 17#define TCG_TARGET_STACK_ALIGN 16 18#define TCG_TARGET_CALL_STACK_OFFSET 0 19#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL 20#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL 21#ifdef CONFIG_DARWIN 22# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL 23#else 24# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN 25#endif 26#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL 27 28/* We're going to re-use TCGType in setting of the SF bit, which controls 29 the size of the operation performed. If we know the values match, it 30 makes things much cleaner. */ 31QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); 32 33#ifdef CONFIG_DEBUG_TCG 34static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 35 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 36 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 37 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 38 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp", 39 40 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 41 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 42 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 43 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31", 44}; 45#endif /* CONFIG_DEBUG_TCG */ 46 47static const int tcg_target_reg_alloc_order[] = { 48 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, 49 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, 50 TCG_REG_X28, /* we will reserve this for guest_base if configured */ 51 52 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, 53 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, 54 55 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 56 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, 57 58 /* X16 reserved as temporary */ 59 /* X17 reserved as temporary */ 60 /* X18 reserved by system */ 61 /* X19 reserved for AREG0 */ 62 /* X29 reserved as fp */ 63 /* X30 reserved as temporary */ 64 65 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, 66 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, 67 /* V8 - V15 are call-saved, and skipped. */ 68 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, 69 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, 70 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, 71 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, 72}; 73 74static const int tcg_target_call_iarg_regs[8] = { 75 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 76 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 77}; 78 79static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) 80{ 81 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); 82 tcg_debug_assert(slot >= 0 && slot <= 1); 83 return TCG_REG_X0 + slot; 84} 85 86#define TCG_REG_TMP0 TCG_REG_X16 87#define TCG_REG_TMP1 TCG_REG_X17 88#define TCG_REG_TMP2 TCG_REG_X30 89#define TCG_VEC_TMP0 TCG_REG_V31 90 91#define TCG_REG_GUEST_BASE TCG_REG_X28 92 93static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 94{ 95 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 96 ptrdiff_t offset = target - src_rx; 97 98 if (offset == sextract64(offset, 0, 26)) { 99 /* read instruction, mask away previous PC_REL26 parameter contents, 100 set the proper offset, then write back the instruction. */ 101 *src_rw = deposit32(*src_rw, 0, 26, offset); 102 return true; 103 } 104 return false; 105} 106 107static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 108{ 109 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 110 ptrdiff_t offset = target - src_rx; 111 112 if (offset == sextract64(offset, 0, 19)) { 113 *src_rw = deposit32(*src_rw, 5, 19, offset); 114 return true; 115 } 116 return false; 117} 118 119static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 120{ 121 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 122 ptrdiff_t offset = target - src_rx; 123 124 if (offset == sextract64(offset, 0, 14)) { 125 *src_rw = deposit32(*src_rw, 5, 14, offset); 126 return true; 127 } 128 return false; 129} 130 131static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 132 intptr_t value, intptr_t addend) 133{ 134 tcg_debug_assert(addend == 0); 135 switch (type) { 136 case R_AARCH64_JUMP26: 137 case R_AARCH64_CALL26: 138 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value); 139 case R_AARCH64_CONDBR19: 140 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value); 141 case R_AARCH64_TSTBR14: 142 return reloc_pc14(code_ptr, (const tcg_insn_unit *)value); 143 default: 144 g_assert_not_reached(); 145 } 146} 147 148#define TCG_CT_CONST_AIMM 0x100 149#define TCG_CT_CONST_LIMM 0x200 150#define TCG_CT_CONST_ZERO 0x400 151#define TCG_CT_CONST_MONE 0x800 152#define TCG_CT_CONST_ORRI 0x1000 153#define TCG_CT_CONST_ANDI 0x2000 154#define TCG_CT_CONST_CMP 0x4000 155 156#define ALL_GENERAL_REGS 0xffffffffu 157#define ALL_VECTOR_REGS 0xffffffff00000000ull 158 159/* Match a constant valid for addition (12-bit, optionally shifted). */ 160static inline bool is_aimm(uint64_t val) 161{ 162 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; 163} 164 165/* Match a constant valid for logical operations. */ 166static inline bool is_limm(uint64_t val) 167{ 168 /* Taking a simplified view of the logical immediates for now, ignoring 169 the replication that can happen across the field. Match bit patterns 170 of the forms 171 0....01....1 172 0..01..10..0 173 and their inverses. */ 174 175 /* Make things easier below, by testing the form with msb clear. */ 176 if ((int64_t)val < 0) { 177 val = ~val; 178 } 179 if (val == 0) { 180 return false; 181 } 182 val += val & -val; 183 return (val & (val - 1)) == 0; 184} 185 186/* Return true if v16 is a valid 16-bit shifted immediate. */ 187static bool is_shimm16(uint16_t v16, int *cmode, int *imm8) 188{ 189 if (v16 == (v16 & 0xff)) { 190 *cmode = 0x8; 191 *imm8 = v16 & 0xff; 192 return true; 193 } else if (v16 == (v16 & 0xff00)) { 194 *cmode = 0xa; 195 *imm8 = v16 >> 8; 196 return true; 197 } 198 return false; 199} 200 201/* Return true if v32 is a valid 32-bit shifted immediate. */ 202static bool is_shimm32(uint32_t v32, int *cmode, int *imm8) 203{ 204 if (v32 == (v32 & 0xff)) { 205 *cmode = 0x0; 206 *imm8 = v32 & 0xff; 207 return true; 208 } else if (v32 == (v32 & 0xff00)) { 209 *cmode = 0x2; 210 *imm8 = (v32 >> 8) & 0xff; 211 return true; 212 } else if (v32 == (v32 & 0xff0000)) { 213 *cmode = 0x4; 214 *imm8 = (v32 >> 16) & 0xff; 215 return true; 216 } else if (v32 == (v32 & 0xff000000)) { 217 *cmode = 0x6; 218 *imm8 = v32 >> 24; 219 return true; 220 } 221 return false; 222} 223 224/* Return true if v32 is a valid 32-bit shifting ones immediate. */ 225static bool is_soimm32(uint32_t v32, int *cmode, int *imm8) 226{ 227 if ((v32 & 0xffff00ff) == 0xff) { 228 *cmode = 0xc; 229 *imm8 = (v32 >> 8) & 0xff; 230 return true; 231 } else if ((v32 & 0xff00ffff) == 0xffff) { 232 *cmode = 0xd; 233 *imm8 = (v32 >> 16) & 0xff; 234 return true; 235 } 236 return false; 237} 238 239/* Return true if v32 is a valid float32 immediate. */ 240static bool is_fimm32(uint32_t v32, int *cmode, int *imm8) 241{ 242 if (extract32(v32, 0, 19) == 0 243 && (extract32(v32, 25, 6) == 0x20 244 || extract32(v32, 25, 6) == 0x1f)) { 245 *cmode = 0xf; 246 *imm8 = (extract32(v32, 31, 1) << 7) 247 | (extract32(v32, 25, 1) << 6) 248 | extract32(v32, 19, 6); 249 return true; 250 } 251 return false; 252} 253 254/* Return true if v64 is a valid float64 immediate. */ 255static bool is_fimm64(uint64_t v64, int *cmode, int *imm8) 256{ 257 if (extract64(v64, 0, 48) == 0 258 && (extract64(v64, 54, 9) == 0x100 259 || extract64(v64, 54, 9) == 0x0ff)) { 260 *cmode = 0xf; 261 *imm8 = (extract64(v64, 63, 1) << 7) 262 | (extract64(v64, 54, 1) << 6) 263 | extract64(v64, 48, 6); 264 return true; 265 } 266 return false; 267} 268 269/* 270 * Return non-zero if v32 can be formed by MOVI+ORR. 271 * Place the parameters for MOVI in (cmode, imm8). 272 * Return the cmode for ORR; the imm8 can be had via extraction from v32. 273 */ 274static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8) 275{ 276 int i; 277 278 for (i = 6; i > 0; i -= 2) { 279 /* Mask out one byte we can add with ORR. */ 280 uint32_t tmp = v32 & ~(0xffu << (i * 4)); 281 if (is_shimm32(tmp, cmode, imm8) || 282 is_soimm32(tmp, cmode, imm8)) { 283 break; 284 } 285 } 286 return i; 287} 288 289/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */ 290static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8) 291{ 292 if (v32 == deposit32(v32, 16, 16, v32)) { 293 return is_shimm16(v32, cmode, imm8); 294 } else { 295 return is_shimm32(v32, cmode, imm8); 296 } 297} 298 299static bool tcg_target_const_match(int64_t val, int ct, 300 TCGType type, TCGCond cond, int vece) 301{ 302 if (ct & TCG_CT_CONST) { 303 return 1; 304 } 305 if (type == TCG_TYPE_I32) { 306 val = (int32_t)val; 307 } 308 309 if (ct & TCG_CT_CONST_CMP) { 310 if (is_tst_cond(cond)) { 311 ct |= TCG_CT_CONST_LIMM; 312 } else { 313 ct |= TCG_CT_CONST_AIMM; 314 } 315 } 316 317 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { 318 return 1; 319 } 320 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { 321 return 1; 322 } 323 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 324 return 1; 325 } 326 if ((ct & TCG_CT_CONST_MONE) && val == -1) { 327 return 1; 328 } 329 330 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) { 331 case 0: 332 break; 333 case TCG_CT_CONST_ANDI: 334 val = ~val; 335 /* fallthru */ 336 case TCG_CT_CONST_ORRI: 337 if (val == deposit64(val, 32, 32, val)) { 338 int cmode, imm8; 339 return is_shimm1632(val, &cmode, &imm8); 340 } 341 break; 342 default: 343 /* Both bits should not be set for the same insn. */ 344 g_assert_not_reached(); 345 } 346 347 return 0; 348} 349 350enum aarch64_cond_code { 351 COND_EQ = 0x0, 352 COND_NE = 0x1, 353 COND_CS = 0x2, /* Unsigned greater or equal */ 354 COND_HS = COND_CS, /* ALIAS greater or equal */ 355 COND_CC = 0x3, /* Unsigned less than */ 356 COND_LO = COND_CC, /* ALIAS Lower */ 357 COND_MI = 0x4, /* Negative */ 358 COND_PL = 0x5, /* Zero or greater */ 359 COND_VS = 0x6, /* Overflow */ 360 COND_VC = 0x7, /* No overflow */ 361 COND_HI = 0x8, /* Unsigned greater than */ 362 COND_LS = 0x9, /* Unsigned less or equal */ 363 COND_GE = 0xa, 364 COND_LT = 0xb, 365 COND_GT = 0xc, 366 COND_LE = 0xd, 367 COND_AL = 0xe, 368 COND_NV = 0xf, /* behaves like COND_AL here */ 369}; 370 371static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { 372 [TCG_COND_EQ] = COND_EQ, 373 [TCG_COND_NE] = COND_NE, 374 [TCG_COND_LT] = COND_LT, 375 [TCG_COND_GE] = COND_GE, 376 [TCG_COND_LE] = COND_LE, 377 [TCG_COND_GT] = COND_GT, 378 /* unsigned */ 379 [TCG_COND_LTU] = COND_LO, 380 [TCG_COND_GTU] = COND_HI, 381 [TCG_COND_GEU] = COND_HS, 382 [TCG_COND_LEU] = COND_LS, 383 /* bit test */ 384 [TCG_COND_TSTEQ] = COND_EQ, 385 [TCG_COND_TSTNE] = COND_NE, 386}; 387 388typedef enum { 389 LDST_ST = 0, /* store */ 390 LDST_LD = 1, /* load */ 391 LDST_LD_S_X = 2, /* load and sign-extend into Xt */ 392 LDST_LD_S_W = 3, /* load and sign-extend into Wt */ 393} AArch64LdstType; 394 395/* We encode the format of the insn into the beginning of the name, so that 396 we can have the preprocessor help "typecheck" the insn vs the output 397 function. Arm didn't provide us with nice names for the formats, so we 398 use the section number of the architecture reference manual in which the 399 instruction group is described. */ 400typedef enum { 401 /* Compare and branch (immediate). */ 402 I3201_CBZ = 0x34000000, 403 I3201_CBNZ = 0x35000000, 404 405 /* Conditional branch (immediate). */ 406 I3202_B_C = 0x54000000, 407 408 /* Test and branch (immediate). */ 409 I3205_TBZ = 0x36000000, 410 I3205_TBNZ = 0x37000000, 411 412 /* Unconditional branch (immediate). */ 413 I3206_B = 0x14000000, 414 I3206_BL = 0x94000000, 415 416 /* Unconditional branch (register). */ 417 I3207_BR = 0xd61f0000, 418 I3207_BLR = 0xd63f0000, 419 I3207_RET = 0xd65f0000, 420 421 /* AdvSIMD load/store single structure. */ 422 I3303_LD1R = 0x0d40c000, 423 424 /* Load literal for loading the address at pc-relative offset */ 425 I3305_LDR = 0x58000000, 426 I3305_LDR_v64 = 0x5c000000, 427 I3305_LDR_v128 = 0x9c000000, 428 429 /* Load/store exclusive. */ 430 I3306_LDXP = 0xc8600000, 431 I3306_STXP = 0xc8200000, 432 433 /* Load/store register. Described here as 3.3.12, but the helper 434 that emits them can transform to 3.3.10 or 3.3.13. */ 435 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, 436 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, 437 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, 438 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, 439 440 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, 441 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, 442 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, 443 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, 444 445 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, 446 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, 447 448 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, 449 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, 450 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, 451 452 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30, 453 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30, 454 455 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30, 456 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30, 457 458 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30, 459 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30, 460 461 I3312_TO_I3310 = 0x00200800, 462 I3312_TO_I3313 = 0x01000000, 463 464 /* Load/store register pair instructions. */ 465 I3314_LDP = 0x28400000, 466 I3314_STP = 0x28000000, 467 468 /* Add/subtract immediate instructions. */ 469 I3401_ADDI = 0x11000000, 470 I3401_ADDSI = 0x31000000, 471 I3401_SUBI = 0x51000000, 472 I3401_SUBSI = 0x71000000, 473 474 /* Bitfield instructions. */ 475 I3402_BFM = 0x33000000, 476 I3402_SBFM = 0x13000000, 477 I3402_UBFM = 0x53000000, 478 479 /* Extract instruction. */ 480 I3403_EXTR = 0x13800000, 481 482 /* Logical immediate instructions. */ 483 I3404_ANDI = 0x12000000, 484 I3404_ORRI = 0x32000000, 485 I3404_EORI = 0x52000000, 486 I3404_ANDSI = 0x72000000, 487 488 /* Move wide immediate instructions. */ 489 I3405_MOVN = 0x12800000, 490 I3405_MOVZ = 0x52800000, 491 I3405_MOVK = 0x72800000, 492 493 /* PC relative addressing instructions. */ 494 I3406_ADR = 0x10000000, 495 I3406_ADRP = 0x90000000, 496 497 /* Add/subtract extended register instructions. */ 498 I3501_ADD = 0x0b200000, 499 500 /* Add/subtract shifted register instructions (without a shift). */ 501 I3502_ADD = 0x0b000000, 502 I3502_ADDS = 0x2b000000, 503 I3502_SUB = 0x4b000000, 504 I3502_SUBS = 0x6b000000, 505 506 /* Add/subtract shifted register instructions (with a shift). */ 507 I3502S_ADD_LSL = I3502_ADD, 508 509 /* Add/subtract with carry instructions. */ 510 I3503_ADC = 0x1a000000, 511 I3503_ADCS = 0x3a000000, 512 I3503_SBC = 0x5a000000, 513 I3503_SBCS = 0x7a000000, 514 515 /* Conditional select instructions. */ 516 I3506_CSEL = 0x1a800000, 517 I3506_CSINC = 0x1a800400, 518 I3506_CSINV = 0x5a800000, 519 I3506_CSNEG = 0x5a800400, 520 521 /* Data-processing (1 source) instructions. */ 522 I3507_CLZ = 0x5ac01000, 523 I3507_RBIT = 0x5ac00000, 524 I3507_REV = 0x5ac00000, /* + size << 10 */ 525 526 /* Data-processing (2 source) instructions. */ 527 I3508_LSLV = 0x1ac02000, 528 I3508_LSRV = 0x1ac02400, 529 I3508_ASRV = 0x1ac02800, 530 I3508_RORV = 0x1ac02c00, 531 I3508_SMULH = 0x9b407c00, 532 I3508_UMULH = 0x9bc07c00, 533 I3508_UDIV = 0x1ac00800, 534 I3508_SDIV = 0x1ac00c00, 535 536 /* Data-processing (3 source) instructions. */ 537 I3509_MADD = 0x1b000000, 538 I3509_MSUB = 0x1b008000, 539 540 /* Logical shifted register instructions (without a shift). */ 541 I3510_AND = 0x0a000000, 542 I3510_BIC = 0x0a200000, 543 I3510_ORR = 0x2a000000, 544 I3510_ORN = 0x2a200000, 545 I3510_EOR = 0x4a000000, 546 I3510_EON = 0x4a200000, 547 I3510_ANDS = 0x6a000000, 548 549 /* Logical shifted register instructions (with a shift). */ 550 I3502S_AND_LSR = I3510_AND | (1 << 22), 551 552 /* AdvSIMD copy */ 553 I3605_DUP = 0x0e000400, 554 I3605_INS = 0x4e001c00, 555 I3605_UMOV = 0x0e003c00, 556 557 /* AdvSIMD modified immediate */ 558 I3606_MOVI = 0x0f000400, 559 I3606_MVNI = 0x2f000400, 560 I3606_BIC = 0x2f001400, 561 I3606_ORR = 0x0f001400, 562 563 /* AdvSIMD scalar shift by immediate */ 564 I3609_SSHR = 0x5f000400, 565 I3609_SSRA = 0x5f001400, 566 I3609_SHL = 0x5f005400, 567 I3609_USHR = 0x7f000400, 568 I3609_USRA = 0x7f001400, 569 I3609_SLI = 0x7f005400, 570 571 /* AdvSIMD scalar three same */ 572 I3611_SQADD = 0x5e200c00, 573 I3611_SQSUB = 0x5e202c00, 574 I3611_CMGT = 0x5e203400, 575 I3611_CMGE = 0x5e203c00, 576 I3611_SSHL = 0x5e204400, 577 I3611_ADD = 0x5e208400, 578 I3611_CMTST = 0x5e208c00, 579 I3611_UQADD = 0x7e200c00, 580 I3611_UQSUB = 0x7e202c00, 581 I3611_CMHI = 0x7e203400, 582 I3611_CMHS = 0x7e203c00, 583 I3611_USHL = 0x7e204400, 584 I3611_SUB = 0x7e208400, 585 I3611_CMEQ = 0x7e208c00, 586 587 /* AdvSIMD scalar two-reg misc */ 588 I3612_CMGT0 = 0x5e208800, 589 I3612_CMEQ0 = 0x5e209800, 590 I3612_CMLT0 = 0x5e20a800, 591 I3612_ABS = 0x5e20b800, 592 I3612_CMGE0 = 0x7e208800, 593 I3612_CMLE0 = 0x7e209800, 594 I3612_NEG = 0x7e20b800, 595 596 /* AdvSIMD shift by immediate */ 597 I3614_SSHR = 0x0f000400, 598 I3614_SSRA = 0x0f001400, 599 I3614_SHL = 0x0f005400, 600 I3614_SLI = 0x2f005400, 601 I3614_USHR = 0x2f000400, 602 I3614_USRA = 0x2f001400, 603 604 /* AdvSIMD three same. */ 605 I3616_ADD = 0x0e208400, 606 I3616_AND = 0x0e201c00, 607 I3616_BIC = 0x0e601c00, 608 I3616_BIF = 0x2ee01c00, 609 I3616_BIT = 0x2ea01c00, 610 I3616_BSL = 0x2e601c00, 611 I3616_EOR = 0x2e201c00, 612 I3616_MUL = 0x0e209c00, 613 I3616_ORR = 0x0ea01c00, 614 I3616_ORN = 0x0ee01c00, 615 I3616_SUB = 0x2e208400, 616 I3616_CMGT = 0x0e203400, 617 I3616_CMGE = 0x0e203c00, 618 I3616_CMTST = 0x0e208c00, 619 I3616_CMHI = 0x2e203400, 620 I3616_CMHS = 0x2e203c00, 621 I3616_CMEQ = 0x2e208c00, 622 I3616_SMAX = 0x0e206400, 623 I3616_SMIN = 0x0e206c00, 624 I3616_SSHL = 0x0e204400, 625 I3616_SQADD = 0x0e200c00, 626 I3616_SQSUB = 0x0e202c00, 627 I3616_UMAX = 0x2e206400, 628 I3616_UMIN = 0x2e206c00, 629 I3616_UQADD = 0x2e200c00, 630 I3616_UQSUB = 0x2e202c00, 631 I3616_USHL = 0x2e204400, 632 633 /* AdvSIMD two-reg misc. */ 634 I3617_CMGT0 = 0x0e208800, 635 I3617_CMEQ0 = 0x0e209800, 636 I3617_CMLT0 = 0x0e20a800, 637 I3617_CMGE0 = 0x2e208800, 638 I3617_CMLE0 = 0x2e209800, 639 I3617_NOT = 0x2e205800, 640 I3617_ABS = 0x0e20b800, 641 I3617_NEG = 0x2e20b800, 642 643 /* System instructions. */ 644 NOP = 0xd503201f, 645 DMB_ISH = 0xd50338bf, 646 DMB_LD = 0x00000100, 647 DMB_ST = 0x00000200, 648 649 BTI_C = 0xd503245f, 650 BTI_J = 0xd503249f, 651 BTI_JC = 0xd50324df, 652} AArch64Insn; 653 654static inline uint32_t tcg_in32(TCGContext *s) 655{ 656 uint32_t v = *(uint32_t *)s->code_ptr; 657 return v; 658} 659 660/* Emit an opcode with "type-checking" of the format. */ 661#define tcg_out_insn(S, FMT, OP, ...) \ 662 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) 663 664static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q, 665 TCGReg rt, TCGReg rn, unsigned size) 666{ 667 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30)); 668} 669 670static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, 671 int imm19, TCGReg rt) 672{ 673 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt); 674} 675 676static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs, 677 TCGReg rt, TCGReg rt2, TCGReg rn) 678{ 679 tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt); 680} 681 682static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, 683 TCGReg rt, int imm19) 684{ 685 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); 686} 687 688static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, 689 TCGCond c, int imm19) 690{ 691 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); 692} 693 694static void tcg_out_insn_3205(TCGContext *s, AArch64Insn insn, 695 TCGReg rt, int imm6, int imm14) 696{ 697 insn |= (imm6 & 0x20) << (31 - 5); 698 insn |= (imm6 & 0x1f) << 19; 699 tcg_out32(s, insn | (imm14 & 0x3fff) << 5 | rt); 700} 701 702static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) 703{ 704 tcg_out32(s, insn | (imm26 & 0x03ffffff)); 705} 706 707static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) 708{ 709 tcg_out32(s, insn | rn << 5); 710} 711 712static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, 713 TCGReg r1, TCGReg r2, TCGReg rn, 714 tcg_target_long ofs, bool pre, bool w) 715{ 716 insn |= 1u << 31; /* ext */ 717 insn |= pre << 24; 718 insn |= w << 23; 719 720 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); 721 insn |= (ofs & (0x7f << 3)) << (15 - 3); 722 723 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); 724} 725 726static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, 727 TCGReg rd, TCGReg rn, uint64_t aimm) 728{ 729 if (aimm > 0xfff) { 730 tcg_debug_assert((aimm & 0xfff) == 0); 731 aimm >>= 12; 732 tcg_debug_assert(aimm <= 0xfff); 733 aimm |= 1 << 12; /* apply LSL 12 */ 734 } 735 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); 736} 737 738/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 739 (Logical immediate). Both insn groups have N, IMMR and IMMS fields 740 that feed the DecodeBitMasks pseudo function. */ 741static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, 742 TCGReg rd, TCGReg rn, int n, int immr, int imms) 743{ 744 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 745 | rn << 5 | rd); 746} 747 748#define tcg_out_insn_3404 tcg_out_insn_3402 749 750static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, 751 TCGReg rd, TCGReg rn, TCGReg rm, int imms) 752{ 753 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 754 | rn << 5 | rd); 755} 756 757/* This function is used for the Move (wide immediate) instruction group. 758 Note that SHIFT is a full shift count, not the 2 bit HW field. */ 759static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, 760 TCGReg rd, uint16_t half, unsigned shift) 761{ 762 tcg_debug_assert((shift & ~0x30) == 0); 763 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); 764} 765 766static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, 767 TCGReg rd, int64_t disp) 768{ 769 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); 770} 771 772static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn, 773 TCGType sf, TCGReg rd, TCGReg rn, 774 TCGReg rm, int opt, int imm3) 775{ 776 tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 | 777 imm3 << 10 | rn << 5 | rd); 778} 779 780/* This function is for both 3.5.2 (Add/Subtract shifted register), for 781 the rare occasion when we actually want to supply a shift amount. */ 782static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, 783 TCGType ext, TCGReg rd, TCGReg rn, 784 TCGReg rm, int imm6) 785{ 786 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); 787} 788 789/* This function is for 3.5.2 (Add/subtract shifted register), 790 and 3.5.10 (Logical shifted register), for the vast majorty of cases 791 when we don't want to apply a shift. Thus it can also be used for 792 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */ 793static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, 794 TCGReg rd, TCGReg rn, TCGReg rm) 795{ 796 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); 797} 798 799#define tcg_out_insn_3503 tcg_out_insn_3502 800#define tcg_out_insn_3508 tcg_out_insn_3502 801#define tcg_out_insn_3510 tcg_out_insn_3502 802 803static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, 804 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) 805{ 806 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd 807 | tcg_cond_to_aarch64[c] << 12); 808} 809 810static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, 811 TCGReg rd, TCGReg rn) 812{ 813 tcg_out32(s, insn | ext << 31 | rn << 5 | rd); 814} 815 816static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, 817 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) 818{ 819 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); 820} 821 822static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q, 823 TCGReg rd, TCGReg rn, int dst_idx, int src_idx) 824{ 825 /* Note that bit 11 set means general register input. Therefore 826 we can handle both register sets with one function. */ 827 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11) 828 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5); 829} 830 831static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q, 832 TCGReg rd, bool op, int cmode, uint8_t imm8) 833{ 834 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f) 835 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5); 836} 837 838static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn, 839 TCGReg rd, TCGReg rn, unsigned immhb) 840{ 841 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f)); 842} 843 844static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn, 845 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 846{ 847 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16 848 | (rn & 0x1f) << 5 | (rd & 0x1f)); 849} 850 851static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn, 852 unsigned size, TCGReg rd, TCGReg rn) 853{ 854 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f)); 855} 856 857static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q, 858 TCGReg rd, TCGReg rn, unsigned immhb) 859{ 860 tcg_out32(s, insn | q << 30 | immhb << 16 861 | (rn & 0x1f) << 5 | (rd & 0x1f)); 862} 863 864static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q, 865 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 866{ 867 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16 868 | (rn & 0x1f) << 5 | (rd & 0x1f)); 869} 870 871static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q, 872 unsigned size, TCGReg rd, TCGReg rn) 873{ 874 tcg_out32(s, insn | q << 30 | (size << 22) 875 | (rn & 0x1f) << 5 | (rd & 0x1f)); 876} 877 878static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, 879 TCGReg rd, TCGReg base, TCGType ext, 880 TCGReg regoff) 881{ 882 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 883 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | 884 0x4000 | ext << 13 | base << 5 | (rd & 0x1f)); 885} 886 887static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, 888 TCGReg rd, TCGReg rn, intptr_t offset) 889{ 890 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f)); 891} 892 893static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, 894 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) 895{ 896 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 897 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 898 | rn << 5 | (rd & 0x1f)); 899} 900 901static void tcg_out_bti(TCGContext *s, AArch64Insn insn) 902{ 903 /* 904 * While BTI insns are nops on hosts without FEAT_BTI, 905 * there is no point in emitting them in that case either. 906 */ 907 if (cpuinfo & CPUINFO_BTI) { 908 tcg_out32(s, insn); 909 } 910} 911 912/* Register to register move using ORR (shifted register with no shift). */ 913static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) 914{ 915 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); 916} 917 918/* Register to register move using ADDI (move to/from SP). */ 919static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) 920{ 921 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); 922} 923 924/* This function is used for the Logical (immediate) instruction group. 925 The value of LIMM must satisfy IS_LIMM. See the comment above about 926 only supporting simplified logical immediates. */ 927static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, 928 TCGReg rd, TCGReg rn, uint64_t limm) 929{ 930 unsigned h, l, r, c; 931 932 tcg_debug_assert(is_limm(limm)); 933 934 h = clz64(limm); 935 l = ctz64(limm); 936 if (l == 0) { 937 r = 0; /* form 0....01....1 */ 938 c = ctz64(~limm) - 1; 939 if (h == 0) { 940 r = clz64(~limm); /* form 1..10..01..1 */ 941 c += r; 942 } 943 } else { 944 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ 945 c = r - h - 1; 946 } 947 if (ext == TCG_TYPE_I32) { 948 r &= 31; 949 c &= 31; 950 } 951 952 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); 953} 954 955static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 956 TCGReg rd, int64_t v64) 957{ 958 bool q = type == TCG_TYPE_V128; 959 int cmode, imm8, i; 960 961 /* Test all bytes equal first. */ 962 if (vece == MO_8) { 963 imm8 = (uint8_t)v64; 964 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8); 965 return; 966 } 967 968 /* 969 * Test all bytes 0x00 or 0xff second. This can match cases that 970 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below. 971 */ 972 for (i = imm8 = 0; i < 8; i++) { 973 uint8_t byte = v64 >> (i * 8); 974 if (byte == 0xff) { 975 imm8 |= 1 << i; 976 } else if (byte != 0) { 977 goto fail_bytes; 978 } 979 } 980 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8); 981 return; 982 fail_bytes: 983 984 /* 985 * Tests for various replications. For each element width, if we 986 * cannot find an expansion there's no point checking a larger 987 * width because we already know by replication it cannot match. 988 */ 989 if (vece == MO_16) { 990 uint16_t v16 = v64; 991 992 if (is_shimm16(v16, &cmode, &imm8)) { 993 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 994 return; 995 } 996 if (is_shimm16(~v16, &cmode, &imm8)) { 997 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 998 return; 999 } 1000 1001 /* 1002 * Otherwise, all remaining constants can be loaded in two insns: 1003 * rd = v16 & 0xff, rd |= v16 & 0xff00. 1004 */ 1005 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff); 1006 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8); 1007 return; 1008 } else if (vece == MO_32) { 1009 uint32_t v32 = v64; 1010 uint32_t n32 = ~v32; 1011 1012 if (is_shimm32(v32, &cmode, &imm8) || 1013 is_soimm32(v32, &cmode, &imm8) || 1014 is_fimm32(v32, &cmode, &imm8)) { 1015 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 1016 return; 1017 } 1018 if (is_shimm32(n32, &cmode, &imm8) || 1019 is_soimm32(n32, &cmode, &imm8)) { 1020 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 1021 return; 1022 } 1023 1024 /* 1025 * Restrict the set of constants to those we can load with 1026 * two instructions. Others we load from the pool. 1027 */ 1028 i = is_shimm32_pair(v32, &cmode, &imm8); 1029 if (i) { 1030 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 1031 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8)); 1032 return; 1033 } 1034 i = is_shimm32_pair(n32, &cmode, &imm8); 1035 if (i) { 1036 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 1037 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8)); 1038 return; 1039 } 1040 } else if (is_fimm64(v64, &cmode, &imm8)) { 1041 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8); 1042 return; 1043 } 1044 1045 /* 1046 * As a last resort, load from the constant pool. Sadly there 1047 * is no LD1R (literal), so store the full 16-byte vector. 1048 */ 1049 if (type == TCG_TYPE_V128) { 1050 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64); 1051 tcg_out_insn(s, 3305, LDR_v128, 0, rd); 1052 } else { 1053 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0); 1054 tcg_out_insn(s, 3305, LDR_v64, 0, rd); 1055 } 1056} 1057 1058static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 1059 TCGReg rd, TCGReg rs) 1060{ 1061 int is_q = type - TCG_TYPE_V64; 1062 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0); 1063 return true; 1064} 1065 1066static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 1067 TCGReg r, TCGReg base, intptr_t offset) 1068{ 1069 TCGReg temp = TCG_REG_TMP0; 1070 1071 if (offset < -0xffffff || offset > 0xffffff) { 1072 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset); 1073 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base); 1074 base = temp; 1075 } else { 1076 AArch64Insn add_insn = I3401_ADDI; 1077 1078 if (offset < 0) { 1079 add_insn = I3401_SUBI; 1080 offset = -offset; 1081 } 1082 if (offset & 0xfff000) { 1083 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000); 1084 base = temp; 1085 } 1086 if (offset & 0xfff) { 1087 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff); 1088 base = temp; 1089 } 1090 } 1091 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece); 1092 return true; 1093} 1094 1095static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 1096 tcg_target_long value) 1097{ 1098 tcg_target_long svalue = value; 1099 tcg_target_long ivalue = ~value; 1100 tcg_target_long t0, t1, t2; 1101 int s0, s1; 1102 AArch64Insn opc; 1103 1104 switch (type) { 1105 case TCG_TYPE_I32: 1106 case TCG_TYPE_I64: 1107 tcg_debug_assert(rd < 32); 1108 break; 1109 default: 1110 g_assert_not_reached(); 1111 } 1112 1113 /* For 32-bit values, discard potential garbage in value. For 64-bit 1114 values within [2**31, 2**32-1], we can create smaller sequences by 1115 interpreting this as a negative 32-bit number, while ensuring that 1116 the high 32 bits are cleared by setting SF=0. */ 1117 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { 1118 svalue = (int32_t)value; 1119 value = (uint32_t)value; 1120 ivalue = (uint32_t)ivalue; 1121 type = TCG_TYPE_I32; 1122 } 1123 1124 /* Speed things up by handling the common case of small positive 1125 and negative values specially. */ 1126 if ((value & ~0xffffull) == 0) { 1127 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); 1128 return; 1129 } else if ((ivalue & ~0xffffull) == 0) { 1130 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); 1131 return; 1132 } 1133 1134 /* Check for bitfield immediates. For the benefit of 32-bit quantities, 1135 use the sign-extended value. That lets us match rotated values such 1136 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ 1137 if (is_limm(svalue)) { 1138 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); 1139 return; 1140 } 1141 1142 /* Look for host pointer values within 4G of the PC. This happens 1143 often when loading pointers to QEMU's own data structures. */ 1144 if (type == TCG_TYPE_I64) { 1145 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr); 1146 tcg_target_long disp = value - src_rx; 1147 if (disp == sextract64(disp, 0, 21)) { 1148 tcg_out_insn(s, 3406, ADR, rd, disp); 1149 return; 1150 } 1151 disp = (value >> 12) - (src_rx >> 12); 1152 if (disp == sextract64(disp, 0, 21)) { 1153 tcg_out_insn(s, 3406, ADRP, rd, disp); 1154 if (value & 0xfff) { 1155 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); 1156 } 1157 return; 1158 } 1159 } 1160 1161 /* Would it take fewer insns to begin with MOVN? */ 1162 if (ctpop64(value) >= 32) { 1163 t0 = ivalue; 1164 opc = I3405_MOVN; 1165 } else { 1166 t0 = value; 1167 opc = I3405_MOVZ; 1168 } 1169 s0 = ctz64(t0) & (63 & -16); 1170 t1 = t0 & ~(0xffffull << s0); 1171 s1 = ctz64(t1) & (63 & -16); 1172 t2 = t1 & ~(0xffffull << s1); 1173 if (t2 == 0) { 1174 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0); 1175 if (t1 != 0) { 1176 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1); 1177 } 1178 return; 1179 } 1180 1181 /* For more than 2 insns, dump it into the constant pool. */ 1182 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0); 1183 tcg_out_insn(s, 3305, LDR, 0, rd); 1184} 1185 1186static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) 1187{ 1188 return false; 1189} 1190 1191static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, 1192 tcg_target_long imm) 1193{ 1194 /* This function is only used for passing structs by reference. */ 1195 g_assert_not_reached(); 1196} 1197 1198/* Define something more legible for general use. */ 1199#define tcg_out_ldst_r tcg_out_insn_3310 1200 1201static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd, 1202 TCGReg rn, intptr_t offset, int lgsize) 1203{ 1204 /* If the offset is naturally aligned and in range, then we can 1205 use the scaled uimm12 encoding */ 1206 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) { 1207 uintptr_t scaled_uimm = offset >> lgsize; 1208 if (scaled_uimm <= 0xfff) { 1209 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); 1210 return; 1211 } 1212 } 1213 1214 /* Small signed offsets can use the unscaled encoding. */ 1215 if (offset >= -256 && offset < 256) { 1216 tcg_out_insn_3312(s, insn, rd, rn, offset); 1217 return; 1218 } 1219 1220 /* Worst-case scenario, move offset to temp register, use reg offset. */ 1221 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset); 1222 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0); 1223} 1224 1225static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 1226{ 1227 if (ret == arg) { 1228 return true; 1229 } 1230 switch (type) { 1231 case TCG_TYPE_I32: 1232 case TCG_TYPE_I64: 1233 if (ret < 32 && arg < 32) { 1234 tcg_out_movr(s, type, ret, arg); 1235 break; 1236 } else if (ret < 32) { 1237 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0); 1238 break; 1239 } else if (arg < 32) { 1240 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0); 1241 break; 1242 } 1243 /* FALLTHRU */ 1244 1245 case TCG_TYPE_V64: 1246 tcg_debug_assert(ret >= 32 && arg >= 32); 1247 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg); 1248 break; 1249 case TCG_TYPE_V128: 1250 tcg_debug_assert(ret >= 32 && arg >= 32); 1251 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg); 1252 break; 1253 1254 default: 1255 g_assert_not_reached(); 1256 } 1257 return true; 1258} 1259 1260static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1261 TCGReg base, intptr_t ofs) 1262{ 1263 AArch64Insn insn; 1264 int lgsz; 1265 1266 switch (type) { 1267 case TCG_TYPE_I32: 1268 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS); 1269 lgsz = 2; 1270 break; 1271 case TCG_TYPE_I64: 1272 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD); 1273 lgsz = 3; 1274 break; 1275 case TCG_TYPE_V64: 1276 insn = I3312_LDRVD; 1277 lgsz = 3; 1278 break; 1279 case TCG_TYPE_V128: 1280 insn = I3312_LDRVQ; 1281 lgsz = 4; 1282 break; 1283 default: 1284 g_assert_not_reached(); 1285 } 1286 tcg_out_ldst(s, insn, ret, base, ofs, lgsz); 1287} 1288 1289static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src, 1290 TCGReg base, intptr_t ofs) 1291{ 1292 AArch64Insn insn; 1293 int lgsz; 1294 1295 switch (type) { 1296 case TCG_TYPE_I32: 1297 insn = (src < 32 ? I3312_STRW : I3312_STRVS); 1298 lgsz = 2; 1299 break; 1300 case TCG_TYPE_I64: 1301 insn = (src < 32 ? I3312_STRX : I3312_STRVD); 1302 lgsz = 3; 1303 break; 1304 case TCG_TYPE_V64: 1305 insn = I3312_STRVD; 1306 lgsz = 3; 1307 break; 1308 case TCG_TYPE_V128: 1309 insn = I3312_STRVQ; 1310 lgsz = 4; 1311 break; 1312 default: 1313 g_assert_not_reached(); 1314 } 1315 tcg_out_ldst(s, insn, src, base, ofs, lgsz); 1316} 1317 1318static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1319 TCGReg base, intptr_t ofs) 1320{ 1321 if (type <= TCG_TYPE_I64 && val == 0) { 1322 tcg_out_st(s, type, TCG_REG_XZR, base, ofs); 1323 return true; 1324 } 1325 return false; 1326} 1327 1328static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, 1329 TCGReg rn, unsigned int a, unsigned int b) 1330{ 1331 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); 1332} 1333 1334static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, 1335 TCGReg rn, unsigned int a, unsigned int b) 1336{ 1337 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); 1338} 1339 1340static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, 1341 TCGReg rn, unsigned int a, unsigned int b) 1342{ 1343 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); 1344} 1345 1346static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, 1347 TCGReg rn, TCGReg rm, unsigned int a) 1348{ 1349 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); 1350} 1351 1352static void tgen_cmp(TCGContext *s, TCGType ext, TCGCond cond, 1353 TCGReg a, TCGReg b) 1354{ 1355 if (is_tst_cond(cond)) { 1356 tcg_out_insn(s, 3510, ANDS, ext, TCG_REG_XZR, a, b); 1357 } else { 1358 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); 1359 } 1360} 1361 1362static void tgen_cmpi(TCGContext *s, TCGType ext, TCGCond cond, 1363 TCGReg a, tcg_target_long b) 1364{ 1365 if (is_tst_cond(cond)) { 1366 tcg_out_logicali(s, I3404_ANDSI, ext, TCG_REG_XZR, a, b); 1367 } else if (b >= 0) { 1368 tcg_debug_assert(is_aimm(b)); 1369 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); 1370 } else { 1371 tcg_debug_assert(is_aimm(-b)); 1372 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); 1373 } 1374} 1375 1376static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGCond cond, TCGReg a, 1377 tcg_target_long b, bool const_b) 1378{ 1379 if (const_b) { 1380 tgen_cmpi(s, ext, cond, a, b); 1381 } else { 1382 tgen_cmp(s, ext, cond, a, b); 1383 } 1384} 1385 1386static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) 1387{ 1388 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1389 tcg_debug_assert(offset == sextract64(offset, 0, 26)); 1390 tcg_out_insn(s, 3206, B, offset); 1391} 1392 1393static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target) 1394{ 1395 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1396 if (offset == sextract64(offset, 0, 26)) { 1397 tcg_out_insn(s, 3206, BL, offset); 1398 } else { 1399 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target); 1400 tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0); 1401 } 1402} 1403 1404static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 1405 const TCGHelperInfo *info) 1406{ 1407 tcg_out_call_int(s, target); 1408} 1409 1410static void tcg_out_br(TCGContext *s, TCGLabel *l) 1411{ 1412 if (!l->has_value) { 1413 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); 1414 tcg_out_insn(s, 3206, B, 0); 1415 } else { 1416 tcg_out_goto(s, l->u.value_ptr); 1417 } 1418} 1419 1420static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c, 1421 TCGReg a, TCGReg b, TCGLabel *l) 1422{ 1423 tgen_cmp(s, type, c, a, b); 1424 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1425 tcg_out_insn(s, 3202, B_C, c, 0); 1426} 1427 1428static void tgen_brcondi(TCGContext *s, TCGType ext, TCGCond c, 1429 TCGReg a, tcg_target_long b, TCGLabel *l) 1430{ 1431 int tbit = -1; 1432 bool need_cmp = true; 1433 1434 switch (c) { 1435 case TCG_COND_EQ: 1436 case TCG_COND_NE: 1437 /* cmp xN,0; b.ne L -> cbnz xN,L */ 1438 if (b == 0) { 1439 need_cmp = false; 1440 } 1441 break; 1442 case TCG_COND_LT: 1443 case TCG_COND_GE: 1444 /* cmp xN,0; b.mi L -> tbnz xN,63,L */ 1445 if (b == 0) { 1446 c = (c == TCG_COND_LT ? TCG_COND_TSTNE : TCG_COND_TSTEQ); 1447 tbit = ext ? 63 : 31; 1448 need_cmp = false; 1449 } 1450 break; 1451 case TCG_COND_TSTEQ: 1452 case TCG_COND_TSTNE: 1453 /* tst xN,0xffffffff; b.ne L -> cbnz wN,L */ 1454 if (b == UINT32_MAX) { 1455 c = tcg_tst_eqne_cond(c); 1456 ext = TCG_TYPE_I32; 1457 need_cmp = false; 1458 break; 1459 } 1460 /* tst xN,1<<B; b.ne L -> tbnz xN,B,L */ 1461 if (is_power_of_2(b)) { 1462 tbit = ctz64(b); 1463 need_cmp = false; 1464 } 1465 break; 1466 default: 1467 break; 1468 } 1469 1470 if (need_cmp) { 1471 tgen_cmpi(s, ext, c, a, b); 1472 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1473 tcg_out_insn(s, 3202, B_C, c, 0); 1474 return; 1475 } 1476 1477 if (tbit >= 0) { 1478 tcg_out_reloc(s, s->code_ptr, R_AARCH64_TSTBR14, l, 0); 1479 switch (c) { 1480 case TCG_COND_TSTEQ: 1481 tcg_out_insn(s, 3205, TBZ, a, tbit, 0); 1482 break; 1483 case TCG_COND_TSTNE: 1484 tcg_out_insn(s, 3205, TBNZ, a, tbit, 0); 1485 break; 1486 default: 1487 g_assert_not_reached(); 1488 } 1489 } else { 1490 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1491 switch (c) { 1492 case TCG_COND_EQ: 1493 tcg_out_insn(s, 3201, CBZ, ext, a, 0); 1494 break; 1495 case TCG_COND_NE: 1496 tcg_out_insn(s, 3201, CBNZ, ext, a, 0); 1497 break; 1498 default: 1499 g_assert_not_reached(); 1500 } 1501 } 1502} 1503 1504static const TCGOutOpBrcond outop_brcond = { 1505 .base.static_constraint = C_O0_I2(r, rC), 1506 .out_rr = tgen_brcond, 1507 .out_ri = tgen_brcondi, 1508}; 1509 1510static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits, 1511 TCGReg rd, TCGReg rn) 1512{ 1513 /* REV, REV16, REV32 */ 1514 tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn); 1515} 1516 1517static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits, 1518 TCGReg rd, TCGReg rn) 1519{ 1520 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ 1521 int bits = (8 << s_bits) - 1; 1522 tcg_out_sbfm(s, ext, rd, rn, 0, bits); 1523} 1524 1525static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn) 1526{ 1527 tcg_out_sxt(s, type, MO_8, rd, rn); 1528} 1529 1530static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn) 1531{ 1532 tcg_out_sxt(s, type, MO_16, rd, rn); 1533} 1534 1535static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn) 1536{ 1537 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn); 1538} 1539 1540static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn) 1541{ 1542 tcg_out_ext32s(s, rd, rn); 1543} 1544 1545static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits, 1546 TCGReg rd, TCGReg rn) 1547{ 1548 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ 1549 int bits = (8 << s_bits) - 1; 1550 tcg_out_ubfm(s, 0, rd, rn, 0, bits); 1551} 1552 1553static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn) 1554{ 1555 tcg_out_uxt(s, MO_8, rd, rn); 1556} 1557 1558static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn) 1559{ 1560 tcg_out_uxt(s, MO_16, rd, rn); 1561} 1562 1563static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn) 1564{ 1565 tcg_out_movr(s, TCG_TYPE_I32, rd, rn); 1566} 1567 1568static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn) 1569{ 1570 tcg_out_ext32u(s, rd, rn); 1571} 1572 1573static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) 1574{ 1575 tcg_out_mov(s, TCG_TYPE_I32, rd, rn); 1576} 1577 1578static void tcg_out_mb(TCGContext *s, unsigned a0) 1579{ 1580 static const uint32_t sync[] = { 1581 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST, 1582 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST, 1583 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1584 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD, 1585 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1586 }; 1587 tcg_out32(s, sync[a0 & TCG_MO_ALL]); 1588} 1589 1590typedef struct { 1591 TCGReg base; 1592 TCGReg index; 1593 TCGType index_ext; 1594 TCGAtomAlign aa; 1595} HostAddress; 1596 1597bool tcg_target_has_memory_bswap(MemOp memop) 1598{ 1599 return false; 1600} 1601 1602static const TCGLdstHelperParam ldst_helper_param = { 1603 .ntmp = 1, .tmp = { TCG_REG_TMP0 } 1604}; 1605 1606static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1607{ 1608 MemOp opc = get_memop(lb->oi); 1609 1610 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1611 return false; 1612 } 1613 1614 tcg_out_ld_helper_args(s, lb, &ldst_helper_param); 1615 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); 1616 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); 1617 tcg_out_goto(s, lb->raddr); 1618 return true; 1619} 1620 1621static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1622{ 1623 MemOp opc = get_memop(lb->oi); 1624 1625 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1626 return false; 1627 } 1628 1629 tcg_out_st_helper_args(s, lb, &ldst_helper_param); 1630 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]); 1631 tcg_out_goto(s, lb->raddr); 1632 return true; 1633} 1634 1635/* We expect to use a 7-bit scaled negative offset from ENV. */ 1636#define MIN_TLB_MASK_TABLE_OFS -512 1637 1638/* 1639 * For system-mode, perform the TLB load and compare. 1640 * For user-mode, perform any required alignment tests. 1641 * In both cases, return a TCGLabelQemuLdst structure if the slow path 1642 * is required and fill in @h with the host address for the fast path. 1643 */ 1644static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, 1645 TCGReg addr_reg, MemOpIdx oi, 1646 bool is_ld) 1647{ 1648 TCGType addr_type = s->addr_type; 1649 TCGLabelQemuLdst *ldst = NULL; 1650 MemOp opc = get_memop(oi); 1651 MemOp s_bits = opc & MO_SIZE; 1652 unsigned a_mask; 1653 1654 h->aa = atom_and_align_for_opc(s, opc, 1655 have_lse2 ? MO_ATOM_WITHIN16 1656 : MO_ATOM_IFALIGN, 1657 s_bits == MO_128); 1658 a_mask = (1 << h->aa.align) - 1; 1659 1660 if (tcg_use_softmmu) { 1661 unsigned s_mask = (1u << s_bits) - 1; 1662 unsigned mem_index = get_mmuidx(oi); 1663 TCGReg addr_adj; 1664 TCGType mask_type; 1665 uint64_t compare_mask; 1666 1667 ldst = new_ldst_label(s); 1668 ldst->is_ld = is_ld; 1669 ldst->oi = oi; 1670 ldst->addr_reg = addr_reg; 1671 1672 mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32 1673 ? TCG_TYPE_I64 : TCG_TYPE_I32); 1674 1675 /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */ 1676 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); 1677 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); 1678 tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0, 1679 tlb_mask_table_ofs(s, mem_index), 1, 0); 1680 1681 /* Extract the TLB index from the address into X0. */ 1682 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, 1683 TCG_REG_TMP0, TCG_REG_TMP0, addr_reg, 1684 s->page_bits - CPU_TLB_ENTRY_BITS); 1685 1686 /* Add the tlb_table pointer, forming the CPUTLBEntry address. */ 1687 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0); 1688 1689 /* Load the tlb comparator into TMP0, and the fast path addend. */ 1690 QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN); 1691 tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1, 1692 is_ld ? offsetof(CPUTLBEntry, addr_read) 1693 : offsetof(CPUTLBEntry, addr_write)); 1694 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, 1695 offsetof(CPUTLBEntry, addend)); 1696 1697 /* 1698 * For aligned accesses, we check the first byte and include 1699 * the alignment bits within the address. For unaligned access, 1700 * we check that we don't cross pages using the address of the 1701 * last byte of the access. 1702 */ 1703 if (a_mask >= s_mask) { 1704 addr_adj = addr_reg; 1705 } else { 1706 addr_adj = TCG_REG_TMP2; 1707 tcg_out_insn(s, 3401, ADDI, addr_type, 1708 addr_adj, addr_reg, s_mask - a_mask); 1709 } 1710 compare_mask = (uint64_t)s->page_mask | a_mask; 1711 1712 /* Store the page mask part of the address into TMP2. */ 1713 tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2, 1714 addr_adj, compare_mask); 1715 1716 /* Perform the address comparison. */ 1717 tcg_out_cmp(s, addr_type, TCG_COND_NE, TCG_REG_TMP0, TCG_REG_TMP2, 0); 1718 1719 /* If not equal, we jump to the slow path. */ 1720 ldst->label_ptr[0] = s->code_ptr; 1721 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1722 1723 h->base = TCG_REG_TMP1; 1724 h->index = addr_reg; 1725 h->index_ext = addr_type; 1726 } else { 1727 if (a_mask) { 1728 ldst = new_ldst_label(s); 1729 1730 ldst->is_ld = is_ld; 1731 ldst->oi = oi; 1732 ldst->addr_reg = addr_reg; 1733 1734 /* tst addr, #mask */ 1735 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask); 1736 1737 /* b.ne slow_path */ 1738 ldst->label_ptr[0] = s->code_ptr; 1739 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1740 } 1741 1742 if (guest_base || addr_type == TCG_TYPE_I32) { 1743 h->base = TCG_REG_GUEST_BASE; 1744 h->index = addr_reg; 1745 h->index_ext = addr_type; 1746 } else { 1747 h->base = addr_reg; 1748 h->index = TCG_REG_XZR; 1749 h->index_ext = TCG_TYPE_I64; 1750 } 1751 } 1752 1753 return ldst; 1754} 1755 1756static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, 1757 TCGReg data_r, HostAddress h) 1758{ 1759 switch (memop & MO_SSIZE) { 1760 case MO_UB: 1761 tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index); 1762 break; 1763 case MO_SB: 1764 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, 1765 data_r, h.base, h.index_ext, h.index); 1766 break; 1767 case MO_UW: 1768 tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index); 1769 break; 1770 case MO_SW: 1771 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), 1772 data_r, h.base, h.index_ext, h.index); 1773 break; 1774 case MO_UL: 1775 tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index); 1776 break; 1777 case MO_SL: 1778 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index); 1779 break; 1780 case MO_UQ: 1781 tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index); 1782 break; 1783 default: 1784 g_assert_not_reached(); 1785 } 1786} 1787 1788static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, 1789 TCGReg data_r, HostAddress h) 1790{ 1791 switch (memop & MO_SIZE) { 1792 case MO_8: 1793 tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index); 1794 break; 1795 case MO_16: 1796 tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index); 1797 break; 1798 case MO_32: 1799 tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index); 1800 break; 1801 case MO_64: 1802 tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index); 1803 break; 1804 default: 1805 g_assert_not_reached(); 1806 } 1807} 1808 1809static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1810 MemOpIdx oi, TCGType data_type) 1811{ 1812 TCGLabelQemuLdst *ldst; 1813 HostAddress h; 1814 1815 ldst = prepare_host_addr(s, &h, addr_reg, oi, true); 1816 tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h); 1817 1818 if (ldst) { 1819 ldst->type = data_type; 1820 ldst->datalo_reg = data_reg; 1821 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1822 } 1823} 1824 1825static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1826 MemOpIdx oi, TCGType data_type) 1827{ 1828 TCGLabelQemuLdst *ldst; 1829 HostAddress h; 1830 1831 ldst = prepare_host_addr(s, &h, addr_reg, oi, false); 1832 tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h); 1833 1834 if (ldst) { 1835 ldst->type = data_type; 1836 ldst->datalo_reg = data_reg; 1837 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1838 } 1839} 1840 1841static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, 1842 TCGReg addr_reg, MemOpIdx oi, bool is_ld) 1843{ 1844 TCGLabelQemuLdst *ldst; 1845 HostAddress h; 1846 TCGReg base; 1847 bool use_pair; 1848 1849 ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld); 1850 1851 /* Compose the final address, as LDP/STP have no indexing. */ 1852 if (h.index == TCG_REG_XZR) { 1853 base = h.base; 1854 } else { 1855 base = TCG_REG_TMP2; 1856 if (h.index_ext == TCG_TYPE_I32) { 1857 /* add base, base, index, uxtw */ 1858 tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base, 1859 h.base, h.index, MO_32, 0); 1860 } else { 1861 /* add base, base, index */ 1862 tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index); 1863 } 1864 } 1865 1866 use_pair = h.aa.atom < MO_128 || have_lse2; 1867 1868 if (!use_pair) { 1869 tcg_insn_unit *branch = NULL; 1870 TCGReg ll, lh, sl, sh; 1871 1872 /* 1873 * If we have already checked for 16-byte alignment, that's all 1874 * we need. Otherwise we have determined that misaligned atomicity 1875 * may be handled with two 8-byte loads. 1876 */ 1877 if (h.aa.align < MO_128) { 1878 /* 1879 * TODO: align should be MO_64, so we only need test bit 3, 1880 * which means we could use TBNZ instead of ANDS+B_C. 1881 */ 1882 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15); 1883 branch = s->code_ptr; 1884 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1885 use_pair = true; 1886 } 1887 1888 if (is_ld) { 1889 /* 1890 * 16-byte atomicity without LSE2 requires LDXP+STXP loop: 1891 * ldxp lo, hi, [base] 1892 * stxp t0, lo, hi, [base] 1893 * cbnz t0, .-8 1894 * Require no overlap between data{lo,hi} and base. 1895 */ 1896 if (datalo == base || datahi == base) { 1897 tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base); 1898 base = TCG_REG_TMP2; 1899 } 1900 ll = sl = datalo; 1901 lh = sh = datahi; 1902 } else { 1903 /* 1904 * 16-byte atomicity without LSE2 requires LDXP+STXP loop: 1905 * 1: ldxp t0, t1, [base] 1906 * stxp t0, lo, hi, [base] 1907 * cbnz t0, 1b 1908 */ 1909 tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1); 1910 ll = TCG_REG_TMP0; 1911 lh = TCG_REG_TMP1; 1912 sl = datalo; 1913 sh = datahi; 1914 } 1915 1916 tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base); 1917 tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base); 1918 tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2); 1919 1920 if (use_pair) { 1921 /* "b .+8", branching across the one insn of use_pair. */ 1922 tcg_out_insn(s, 3206, B, 2); 1923 reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr)); 1924 } 1925 } 1926 1927 if (use_pair) { 1928 if (is_ld) { 1929 tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0); 1930 } else { 1931 tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0); 1932 } 1933 } 1934 1935 if (ldst) { 1936 ldst->type = TCG_TYPE_I128; 1937 ldst->datalo_reg = datalo; 1938 ldst->datahi_reg = datahi; 1939 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1940 } 1941} 1942 1943static const tcg_insn_unit *tb_ret_addr; 1944 1945static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) 1946{ 1947 const tcg_insn_unit *target; 1948 ptrdiff_t offset; 1949 1950 /* Reuse the zeroing that exists for goto_ptr. */ 1951 if (a0 == 0) { 1952 target = tcg_code_gen_epilogue; 1953 } else { 1954 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); 1955 target = tb_ret_addr; 1956 } 1957 1958 offset = tcg_pcrel_diff(s, target) >> 2; 1959 if (offset == sextract64(offset, 0, 26)) { 1960 tcg_out_insn(s, 3206, B, offset); 1961 } else { 1962 /* 1963 * Only x16/x17 generate BTI type Jump (2), 1964 * other registers generate BTI type Jump|Call (3). 1965 */ 1966 QEMU_BUILD_BUG_ON(TCG_REG_TMP0 != TCG_REG_X16); 1967 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target); 1968 tcg_out_insn(s, 3207, BR, TCG_REG_TMP0); 1969 } 1970} 1971 1972static void tcg_out_goto_tb(TCGContext *s, int which) 1973{ 1974 /* 1975 * Direct branch, or indirect address load, will be patched 1976 * by tb_target_set_jmp_target. Assert indirect load offset 1977 * in range early, regardless of direct branch distance. 1978 */ 1979 intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which)); 1980 tcg_debug_assert(i_off == sextract64(i_off, 0, 21)); 1981 1982 set_jmp_insn_offset(s, which); 1983 tcg_out32(s, I3206_B); 1984 tcg_out_insn(s, 3207, BR, TCG_REG_TMP0); 1985 set_jmp_reset_offset(s, which); 1986 tcg_out_bti(s, BTI_J); 1987} 1988 1989static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0) 1990{ 1991 tcg_out_insn(s, 3207, BR, a0); 1992} 1993 1994void tb_target_set_jmp_target(const TranslationBlock *tb, int n, 1995 uintptr_t jmp_rx, uintptr_t jmp_rw) 1996{ 1997 uintptr_t d_addr = tb->jmp_target_addr[n]; 1998 ptrdiff_t d_offset = d_addr - jmp_rx; 1999 tcg_insn_unit insn; 2000 2001 /* Either directly branch, or indirect branch load. */ 2002 if (d_offset == sextract64(d_offset, 0, 28)) { 2003 insn = deposit32(I3206_B, 0, 26, d_offset >> 2); 2004 } else { 2005 uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n]; 2006 ptrdiff_t i_offset = i_addr - jmp_rx; 2007 2008 /* Note that we asserted this in range in tcg_out_goto_tb. */ 2009 insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2); 2010 } 2011 qatomic_set((uint32_t *)jmp_rw, insn); 2012 flush_idcache_range(jmp_rx, jmp_rw, 4); 2013} 2014 2015 2016static void tgen_add(TCGContext *s, TCGType type, 2017 TCGReg a0, TCGReg a1, TCGReg a2) 2018{ 2019 tcg_out_insn(s, 3502, ADD, type, a0, a1, a2); 2020} 2021 2022static void tgen_addi(TCGContext *s, TCGType type, 2023 TCGReg a0, TCGReg a1, tcg_target_long a2) 2024{ 2025 if (a2 >= 0) { 2026 tcg_out_insn(s, 3401, ADDI, type, a0, a1, a2); 2027 } else { 2028 tcg_out_insn(s, 3401, SUBI, type, a0, a1, -a2); 2029 } 2030} 2031 2032static const TCGOutOpBinary outop_add = { 2033 .base.static_constraint = C_O1_I2(r, r, rA), 2034 .out_rrr = tgen_add, 2035 .out_rri = tgen_addi, 2036}; 2037 2038static void tgen_addco(TCGContext *s, TCGType type, 2039 TCGReg a0, TCGReg a1, TCGReg a2) 2040{ 2041 tcg_out_insn(s, 3502, ADDS, type, a0, a1, a2); 2042} 2043 2044static void tgen_addco_imm(TCGContext *s, TCGType type, 2045 TCGReg a0, TCGReg a1, tcg_target_long a2) 2046{ 2047 if (a2 >= 0) { 2048 tcg_out_insn(s, 3401, ADDSI, type, a0, a1, a2); 2049 } else { 2050 tcg_out_insn(s, 3401, SUBSI, type, a0, a1, -a2); 2051 } 2052} 2053 2054static const TCGOutOpBinary outop_addco = { 2055 .base.static_constraint = C_O1_I2(r, r, rA), 2056 .out_rrr = tgen_addco, 2057 .out_rri = tgen_addco_imm, 2058}; 2059 2060static void tgen_addci_rrr(TCGContext *s, TCGType type, 2061 TCGReg a0, TCGReg a1, TCGReg a2) 2062{ 2063 tcg_out_insn(s, 3503, ADC, type, a0, a1, a2); 2064} 2065 2066static void tgen_addci_rri(TCGContext *s, TCGType type, 2067 TCGReg a0, TCGReg a1, tcg_target_long a2) 2068{ 2069 /* 2070 * Note that the only two constants we support are 0 and -1, and 2071 * that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. 2072 */ 2073 if (a2) { 2074 tcg_out_insn(s, 3503, SBC, type, a0, a1, TCG_REG_XZR); 2075 } else { 2076 tcg_out_insn(s, 3503, ADC, type, a0, a1, TCG_REG_XZR); 2077 } 2078} 2079 2080static const TCGOutOpAddSubCarry outop_addci = { 2081 .base.static_constraint = C_O1_I2(r, rz, rMZ), 2082 .out_rrr = tgen_addci_rrr, 2083 .out_rri = tgen_addci_rri, 2084}; 2085 2086static void tgen_addcio(TCGContext *s, TCGType type, 2087 TCGReg a0, TCGReg a1, TCGReg a2) 2088{ 2089 tcg_out_insn(s, 3503, ADCS, type, a0, a1, a2); 2090} 2091 2092static void tgen_addcio_imm(TCGContext *s, TCGType type, 2093 TCGReg a0, TCGReg a1, tcg_target_long a2) 2094{ 2095 /* Use SBCS w/0 for ADCS w/-1 -- see above. */ 2096 if (a2) { 2097 tcg_out_insn(s, 3503, SBCS, type, a0, a1, TCG_REG_XZR); 2098 } else { 2099 tcg_out_insn(s, 3503, ADCS, type, a0, a1, TCG_REG_XZR); 2100 } 2101} 2102 2103static const TCGOutOpBinary outop_addcio = { 2104 .base.static_constraint = C_O1_I2(r, rz, rMZ), 2105 .out_rrr = tgen_addcio, 2106 .out_rri = tgen_addcio_imm, 2107}; 2108 2109static void tcg_out_set_carry(TCGContext *s) 2110{ 2111 tcg_out_insn(s, 3502, SUBS, TCG_TYPE_I32, 2112 TCG_REG_XZR, TCG_REG_XZR, TCG_REG_XZR); 2113} 2114 2115static void tgen_and(TCGContext *s, TCGType type, 2116 TCGReg a0, TCGReg a1, TCGReg a2) 2117{ 2118 tcg_out_insn(s, 3510, AND, type, a0, a1, a2); 2119} 2120 2121static void tgen_andi(TCGContext *s, TCGType type, 2122 TCGReg a0, TCGReg a1, tcg_target_long a2) 2123{ 2124 tcg_out_logicali(s, I3404_ANDI, type, a0, a1, a2); 2125} 2126 2127static const TCGOutOpBinary outop_and = { 2128 .base.static_constraint = C_O1_I2(r, r, rL), 2129 .out_rrr = tgen_and, 2130 .out_rri = tgen_andi, 2131}; 2132 2133static void tgen_andc(TCGContext *s, TCGType type, 2134 TCGReg a0, TCGReg a1, TCGReg a2) 2135{ 2136 tcg_out_insn(s, 3510, BIC, type, a0, a1, a2); 2137} 2138 2139static const TCGOutOpBinary outop_andc = { 2140 .base.static_constraint = C_O1_I2(r, r, r), 2141 .out_rrr = tgen_andc, 2142}; 2143 2144static void tgen_clz(TCGContext *s, TCGType type, 2145 TCGReg a0, TCGReg a1, TCGReg a2) 2146{ 2147 tcg_out_cmp(s, type, TCG_COND_NE, a1, 0, true); 2148 tcg_out_insn(s, 3507, CLZ, type, TCG_REG_TMP0, a1); 2149 tcg_out_insn(s, 3506, CSEL, type, a0, TCG_REG_TMP0, a2, TCG_COND_NE); 2150} 2151 2152static void tgen_clzi(TCGContext *s, TCGType type, 2153 TCGReg a0, TCGReg a1, tcg_target_long a2) 2154{ 2155 if (a2 == (type == TCG_TYPE_I32 ? 32 : 64)) { 2156 tcg_out_insn(s, 3507, CLZ, type, a0, a1); 2157 return; 2158 } 2159 2160 tcg_out_cmp(s, type, TCG_COND_NE, a1, 0, true); 2161 tcg_out_insn(s, 3507, CLZ, type, a0, a1); 2162 2163 switch (a2) { 2164 case -1: 2165 tcg_out_insn(s, 3506, CSINV, type, a0, a0, TCG_REG_XZR, TCG_COND_NE); 2166 break; 2167 case 0: 2168 tcg_out_insn(s, 3506, CSEL, type, a0, a0, TCG_REG_XZR, TCG_COND_NE); 2169 break; 2170 default: 2171 tcg_out_movi(s, type, TCG_REG_TMP0, a2); 2172 tcg_out_insn(s, 3506, CSEL, type, a0, a0, TCG_REG_TMP0, TCG_COND_NE); 2173 break; 2174 } 2175} 2176 2177static const TCGOutOpBinary outop_clz = { 2178 .base.static_constraint = C_O1_I2(r, r, rAL), 2179 .out_rrr = tgen_clz, 2180 .out_rri = tgen_clzi, 2181}; 2182 2183static const TCGOutOpUnary outop_ctpop = { 2184 .base.static_constraint = C_NotImplemented, 2185}; 2186 2187static void tgen_ctz(TCGContext *s, TCGType type, 2188 TCGReg a0, TCGReg a1, TCGReg a2) 2189{ 2190 tcg_out_insn(s, 3507, RBIT, type, TCG_REG_TMP0, a1); 2191 tgen_clz(s, type, a0, TCG_REG_TMP0, a2); 2192} 2193 2194static void tgen_ctzi(TCGContext *s, TCGType type, 2195 TCGReg a0, TCGReg a1, tcg_target_long a2) 2196{ 2197 tcg_out_insn(s, 3507, RBIT, type, TCG_REG_TMP0, a1); 2198 tgen_clzi(s, type, a0, TCG_REG_TMP0, a2); 2199} 2200 2201static const TCGOutOpBinary outop_ctz = { 2202 .base.static_constraint = C_O1_I2(r, r, rAL), 2203 .out_rrr = tgen_ctz, 2204 .out_rri = tgen_ctzi, 2205}; 2206 2207static void tgen_divs(TCGContext *s, TCGType type, 2208 TCGReg a0, TCGReg a1, TCGReg a2) 2209{ 2210 tcg_out_insn(s, 3508, SDIV, type, a0, a1, a2); 2211} 2212 2213static const TCGOutOpBinary outop_divs = { 2214 .base.static_constraint = C_O1_I2(r, r, r), 2215 .out_rrr = tgen_divs, 2216}; 2217 2218static const TCGOutOpDivRem outop_divs2 = { 2219 .base.static_constraint = C_NotImplemented, 2220}; 2221 2222static void tgen_divu(TCGContext *s, TCGType type, 2223 TCGReg a0, TCGReg a1, TCGReg a2) 2224{ 2225 tcg_out_insn(s, 3508, UDIV, type, a0, a1, a2); 2226} 2227 2228static const TCGOutOpBinary outop_divu = { 2229 .base.static_constraint = C_O1_I2(r, r, r), 2230 .out_rrr = tgen_divu, 2231}; 2232 2233static const TCGOutOpDivRem outop_divu2 = { 2234 .base.static_constraint = C_NotImplemented, 2235}; 2236 2237static void tgen_eqv(TCGContext *s, TCGType type, 2238 TCGReg a0, TCGReg a1, TCGReg a2) 2239{ 2240 tcg_out_insn(s, 3510, EON, type, a0, a1, a2); 2241} 2242 2243static const TCGOutOpBinary outop_eqv = { 2244 .base.static_constraint = C_O1_I2(r, r, r), 2245 .out_rrr = tgen_eqv, 2246}; 2247 2248static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) 2249{ 2250 tcg_out_ubfm(s, TCG_TYPE_I64, a0, a1, 32, 63); 2251} 2252 2253static const TCGOutOpUnary outop_extrh_i64_i32 = { 2254 .base.static_constraint = C_O1_I1(r, r), 2255 .out_rr = tgen_extrh_i64_i32, 2256}; 2257 2258static void tgen_mul(TCGContext *s, TCGType type, 2259 TCGReg a0, TCGReg a1, TCGReg a2) 2260{ 2261 tcg_out_insn(s, 3509, MADD, type, a0, a1, a2, TCG_REG_XZR); 2262} 2263 2264static const TCGOutOpBinary outop_mul = { 2265 .base.static_constraint = C_O1_I2(r, r, r), 2266 .out_rrr = tgen_mul, 2267}; 2268 2269static const TCGOutOpMul2 outop_muls2 = { 2270 .base.static_constraint = C_NotImplemented, 2271}; 2272 2273static TCGConstraintSetIndex cset_mulh(TCGType type, unsigned flags) 2274{ 2275 return type == TCG_TYPE_I64 ? C_O1_I2(r, r, r) : C_NotImplemented; 2276} 2277 2278static void tgen_mulsh(TCGContext *s, TCGType type, 2279 TCGReg a0, TCGReg a1, TCGReg a2) 2280{ 2281 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); 2282} 2283 2284static const TCGOutOpBinary outop_mulsh = { 2285 .base.static_constraint = C_Dynamic, 2286 .base.dynamic_constraint = cset_mulh, 2287 .out_rrr = tgen_mulsh, 2288}; 2289 2290static const TCGOutOpMul2 outop_mulu2 = { 2291 .base.static_constraint = C_NotImplemented, 2292}; 2293 2294static void tgen_muluh(TCGContext *s, TCGType type, 2295 TCGReg a0, TCGReg a1, TCGReg a2) 2296{ 2297 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); 2298} 2299 2300static const TCGOutOpBinary outop_muluh = { 2301 .base.static_constraint = C_Dynamic, 2302 .base.dynamic_constraint = cset_mulh, 2303 .out_rrr = tgen_muluh, 2304}; 2305 2306static const TCGOutOpBinary outop_nand = { 2307 .base.static_constraint = C_NotImplemented, 2308}; 2309 2310static const TCGOutOpBinary outop_nor = { 2311 .base.static_constraint = C_NotImplemented, 2312}; 2313 2314static void tgen_or(TCGContext *s, TCGType type, 2315 TCGReg a0, TCGReg a1, TCGReg a2) 2316{ 2317 tcg_out_insn(s, 3510, ORR, type, a0, a1, a2); 2318} 2319 2320static void tgen_ori(TCGContext *s, TCGType type, 2321 TCGReg a0, TCGReg a1, tcg_target_long a2) 2322{ 2323 tcg_out_logicali(s, I3404_ORRI, type, a0, a1, a2); 2324} 2325 2326static const TCGOutOpBinary outop_or = { 2327 .base.static_constraint = C_O1_I2(r, r, rL), 2328 .out_rrr = tgen_or, 2329 .out_rri = tgen_ori, 2330}; 2331 2332static void tgen_orc(TCGContext *s, TCGType type, 2333 TCGReg a0, TCGReg a1, TCGReg a2) 2334{ 2335 tcg_out_insn(s, 3510, ORN, type, a0, a1, a2); 2336} 2337 2338static const TCGOutOpBinary outop_orc = { 2339 .base.static_constraint = C_O1_I2(r, r, r), 2340 .out_rrr = tgen_orc, 2341}; 2342 2343static void tgen_rems(TCGContext *s, TCGType type, 2344 TCGReg a0, TCGReg a1, TCGReg a2) 2345{ 2346 tcg_out_insn(s, 3508, SDIV, type, TCG_REG_TMP0, a1, a2); 2347 tcg_out_insn(s, 3509, MSUB, type, a0, TCG_REG_TMP0, a2, a1); 2348} 2349 2350static const TCGOutOpBinary outop_rems = { 2351 .base.static_constraint = C_O1_I2(r, r, r), 2352 .out_rrr = tgen_rems, 2353}; 2354 2355static void tgen_remu(TCGContext *s, TCGType type, 2356 TCGReg a0, TCGReg a1, TCGReg a2) 2357{ 2358 tcg_out_insn(s, 3508, UDIV, type, TCG_REG_TMP0, a1, a2); 2359 tcg_out_insn(s, 3509, MSUB, type, a0, TCG_REG_TMP0, a2, a1); 2360} 2361 2362static const TCGOutOpBinary outop_remu = { 2363 .base.static_constraint = C_O1_I2(r, r, r), 2364 .out_rrr = tgen_remu, 2365}; 2366 2367static const TCGOutOpBinary outop_rotl = { 2368 .base.static_constraint = C_NotImplemented, 2369}; 2370 2371static void tgen_rotr(TCGContext *s, TCGType type, 2372 TCGReg a0, TCGReg a1, TCGReg a2) 2373{ 2374 tcg_out_insn(s, 3508, RORV, type, a0, a1, a2); 2375} 2376 2377static void tgen_rotri(TCGContext *s, TCGType type, 2378 TCGReg a0, TCGReg a1, tcg_target_long a2) 2379{ 2380 int max = type == TCG_TYPE_I32 ? 31 : 63; 2381 tcg_out_extr(s, type, a0, a1, a1, a2 & max); 2382} 2383 2384static const TCGOutOpBinary outop_rotr = { 2385 .base.static_constraint = C_O1_I2(r, r, ri), 2386 .out_rrr = tgen_rotr, 2387 .out_rri = tgen_rotri, 2388}; 2389 2390static void tgen_sar(TCGContext *s, TCGType type, 2391 TCGReg a0, TCGReg a1, TCGReg a2) 2392{ 2393 tcg_out_insn(s, 3508, ASRV, type, a0, a1, a2); 2394} 2395 2396static void tgen_sari(TCGContext *s, TCGType type, 2397 TCGReg a0, TCGReg a1, tcg_target_long a2) 2398{ 2399 int max = type == TCG_TYPE_I32 ? 31 : 63; 2400 tcg_out_sbfm(s, type, a0, a1, a2 & max, max); 2401} 2402 2403static const TCGOutOpBinary outop_sar = { 2404 .base.static_constraint = C_O1_I2(r, r, ri), 2405 .out_rrr = tgen_sar, 2406 .out_rri = tgen_sari, 2407}; 2408 2409static void tgen_shl(TCGContext *s, TCGType type, 2410 TCGReg a0, TCGReg a1, TCGReg a2) 2411{ 2412 tcg_out_insn(s, 3508, LSLV, type, a0, a1, a2); 2413} 2414 2415static void tgen_shli(TCGContext *s, TCGType type, 2416 TCGReg a0, TCGReg a1, tcg_target_long a2) 2417{ 2418 int max = type == TCG_TYPE_I32 ? 31 : 63; 2419 tcg_out_ubfm(s, type, a0, a1, -a2 & max, ~a2 & max); 2420} 2421 2422static const TCGOutOpBinary outop_shl = { 2423 .base.static_constraint = C_O1_I2(r, r, ri), 2424 .out_rrr = tgen_shl, 2425 .out_rri = tgen_shli, 2426}; 2427 2428static void tgen_shr(TCGContext *s, TCGType type, 2429 TCGReg a0, TCGReg a1, TCGReg a2) 2430{ 2431 tcg_out_insn(s, 3508, LSRV, type, a0, a1, a2); 2432} 2433 2434static void tgen_shri(TCGContext *s, TCGType type, 2435 TCGReg a0, TCGReg a1, tcg_target_long a2) 2436{ 2437 int max = type == TCG_TYPE_I32 ? 31 : 63; 2438 tcg_out_ubfm(s, type, a0, a1, a2 & max, max); 2439} 2440 2441static const TCGOutOpBinary outop_shr = { 2442 .base.static_constraint = C_O1_I2(r, r, ri), 2443 .out_rrr = tgen_shr, 2444 .out_rri = tgen_shri, 2445}; 2446 2447static void tgen_sub(TCGContext *s, TCGType type, 2448 TCGReg a0, TCGReg a1, TCGReg a2) 2449{ 2450 tcg_out_insn(s, 3502, SUB, type, a0, a1, a2); 2451} 2452 2453static const TCGOutOpSubtract outop_sub = { 2454 .base.static_constraint = C_O1_I2(r, r, r), 2455 .out_rrr = tgen_sub, 2456}; 2457 2458static void tgen_subbo_rrr(TCGContext *s, TCGType type, 2459 TCGReg a0, TCGReg a1, TCGReg a2) 2460{ 2461 tcg_out_insn(s, 3502, SUBS, type, a0, a1, a2); 2462} 2463 2464static void tgen_subbo_rri(TCGContext *s, TCGType type, 2465 TCGReg a0, TCGReg a1, tcg_target_long a2) 2466{ 2467 if (a2 >= 0) { 2468 tcg_out_insn(s, 3401, SUBSI, type, a0, a1, a2); 2469 } else { 2470 tcg_out_insn(s, 3401, ADDSI, type, a0, a1, -a2); 2471 } 2472} 2473 2474static void tgen_subbo_rir(TCGContext *s, TCGType type, 2475 TCGReg a0, tcg_target_long a1, TCGReg a2) 2476{ 2477 tgen_subbo_rrr(s, type, a0, TCG_REG_XZR, a2); 2478} 2479 2480static void tgen_subbo_rii(TCGContext *s, TCGType type, 2481 TCGReg a0, tcg_target_long a1, tcg_target_long a2) 2482{ 2483 if (a2 == 0) { 2484 tgen_subbo_rrr(s, type, a0, TCG_REG_XZR, TCG_REG_XZR); 2485 return; 2486 } 2487 2488 /* 2489 * We want to allow a1 to be zero for the benefit of negation via 2490 * subtraction. However, that leaves open the possibility of 2491 * adding 0 +/- const, and the immediate add/sub instructions 2492 * encode XSP not XZR. Since we have 0 - non-zero, borrow is 2493 * always set. 2494 */ 2495 tcg_out_movi(s, type, a0, -a2); 2496 tcg_out_set_borrow(s); 2497} 2498 2499static const TCGOutOpAddSubCarry outop_subbo = { 2500 .base.static_constraint = C_O1_I2(r, rZ, rA), 2501 .out_rrr = tgen_subbo_rrr, 2502 .out_rri = tgen_subbo_rri, 2503 .out_rir = tgen_subbo_rir, 2504 .out_rii = tgen_subbo_rii, 2505}; 2506 2507static void tgen_subbi_rrr(TCGContext *s, TCGType type, 2508 TCGReg a0, TCGReg a1, TCGReg a2) 2509{ 2510 tcg_out_insn(s, 3503, SBC, type, a0, a1, a2); 2511} 2512 2513static void tgen_subbi_rri(TCGContext *s, TCGType type, 2514 TCGReg a0, TCGReg a1, tcg_target_long a2) 2515{ 2516 tgen_addci_rri(s, type, a0, a1, ~a2); 2517} 2518 2519static const TCGOutOpAddSubCarry outop_subbi = { 2520 .base.static_constraint = C_O1_I2(r, rz, rMZ), 2521 .out_rrr = tgen_subbi_rrr, 2522 .out_rri = tgen_subbi_rri, 2523}; 2524 2525static void tgen_subbio_rrr(TCGContext *s, TCGType type, 2526 TCGReg a0, TCGReg a1, TCGReg a2) 2527{ 2528 tcg_out_insn(s, 3503, SBCS, type, a0, a1, a2); 2529} 2530 2531static void tgen_subbio_rri(TCGContext *s, TCGType type, 2532 TCGReg a0, TCGReg a1, tcg_target_long a2) 2533{ 2534 tgen_addcio_imm(s, type, a0, a1, ~a2); 2535} 2536 2537static const TCGOutOpAddSubCarry outop_subbio = { 2538 .base.static_constraint = C_O1_I2(r, rz, rMZ), 2539 .out_rrr = tgen_subbio_rrr, 2540 .out_rri = tgen_subbio_rri, 2541}; 2542 2543static void tcg_out_set_borrow(TCGContext *s) 2544{ 2545 tcg_out_insn(s, 3502, ADDS, TCG_TYPE_I32, 2546 TCG_REG_XZR, TCG_REG_XZR, TCG_REG_XZR); 2547} 2548 2549static void tgen_xor(TCGContext *s, TCGType type, 2550 TCGReg a0, TCGReg a1, TCGReg a2) 2551{ 2552 tcg_out_insn(s, 3510, EOR, type, a0, a1, a2); 2553} 2554 2555static void tgen_xori(TCGContext *s, TCGType type, 2556 TCGReg a0, TCGReg a1, tcg_target_long a2) 2557{ 2558 tcg_out_logicali(s, I3404_EORI, type, a0, a1, a2); 2559} 2560 2561static const TCGOutOpBinary outop_xor = { 2562 .base.static_constraint = C_O1_I2(r, r, rL), 2563 .out_rrr = tgen_xor, 2564 .out_rri = tgen_xori, 2565}; 2566 2567static void tgen_bswap16(TCGContext *s, TCGType type, 2568 TCGReg a0, TCGReg a1, unsigned flags) 2569{ 2570 tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1); 2571 if (flags & TCG_BSWAP_OS) { 2572 /* Output must be sign-extended. */ 2573 tcg_out_ext16s(s, type, a0, a0); 2574 } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 2575 /* Output must be zero-extended, but input isn't. */ 2576 tcg_out_ext16u(s, a0, a0); 2577 } 2578} 2579 2580static const TCGOutOpBswap outop_bswap16 = { 2581 .base.static_constraint = C_O1_I1(r, r), 2582 .out_rr = tgen_bswap16, 2583}; 2584 2585static void tgen_bswap32(TCGContext *s, TCGType type, 2586 TCGReg a0, TCGReg a1, unsigned flags) 2587{ 2588 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2589 if (flags & TCG_BSWAP_OS) { 2590 tcg_out_ext32s(s, a0, a0); 2591 } 2592} 2593 2594static const TCGOutOpBswap outop_bswap32 = { 2595 .base.static_constraint = C_O1_I1(r, r), 2596 .out_rr = tgen_bswap32, 2597}; 2598 2599static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) 2600{ 2601 tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1); 2602} 2603 2604static const TCGOutOpUnary outop_bswap64 = { 2605 .base.static_constraint = C_O1_I1(r, r), 2606 .out_rr = tgen_bswap64, 2607}; 2608 2609static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) 2610{ 2611 tgen_sub(s, type, a0, TCG_REG_XZR, a1); 2612} 2613 2614static const TCGOutOpUnary outop_neg = { 2615 .base.static_constraint = C_O1_I1(r, r), 2616 .out_rr = tgen_neg, 2617}; 2618 2619static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) 2620{ 2621 tgen_orc(s, type, a0, TCG_REG_XZR, a1); 2622} 2623 2624static const TCGOutOpUnary outop_not = { 2625 .base.static_constraint = C_O1_I1(r, r), 2626 .out_rr = tgen_not, 2627}; 2628 2629static void tgen_cset(TCGContext *s, TCGCond cond, TCGReg ret) 2630{ 2631 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ 2632 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, ret, TCG_REG_XZR, 2633 TCG_REG_XZR, tcg_invert_cond(cond)); 2634} 2635 2636static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, 2637 TCGReg a0, TCGReg a1, TCGReg a2) 2638{ 2639 tgen_cmp(s, type, cond, a1, a2); 2640 tgen_cset(s, cond, a0); 2641} 2642 2643static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond, 2644 TCGReg a0, TCGReg a1, tcg_target_long a2) 2645{ 2646 tgen_cmpi(s, type, cond, a1, a2); 2647 tgen_cset(s, cond, a0); 2648} 2649 2650static const TCGOutOpSetcond outop_setcond = { 2651 .base.static_constraint = C_O1_I2(r, r, rC), 2652 .out_rrr = tgen_setcond, 2653 .out_rri = tgen_setcondi, 2654}; 2655 2656static void tgen_csetm(TCGContext *s, TCGType ext, TCGCond cond, TCGReg ret) 2657{ 2658 /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond). */ 2659 tcg_out_insn(s, 3506, CSINV, ext, ret, TCG_REG_XZR, 2660 TCG_REG_XZR, tcg_invert_cond(cond)); 2661} 2662 2663static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, 2664 TCGReg a0, TCGReg a1, TCGReg a2) 2665{ 2666 tgen_cmp(s, type, cond, a1, a2); 2667 tgen_csetm(s, type, cond, a0); 2668} 2669 2670static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond, 2671 TCGReg a0, TCGReg a1, tcg_target_long a2) 2672{ 2673 tgen_cmpi(s, type, cond, a1, a2); 2674 tgen_csetm(s, type, cond, a0); 2675} 2676 2677static const TCGOutOpSetcond outop_negsetcond = { 2678 .base.static_constraint = C_O1_I2(r, r, rC), 2679 .out_rrr = tgen_negsetcond, 2680 .out_rri = tgen_negsetcondi, 2681}; 2682 2683static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, 2684 TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2, 2685 TCGArg vt, bool const_vt, TCGArg vf, bool const_vf) 2686{ 2687 tcg_out_cmp(s, type, cond, c1, c2, const_c2); 2688 tcg_out_insn(s, 3506, CSEL, type, ret, vt, vf, cond); 2689} 2690 2691static const TCGOutOpMovcond outop_movcond = { 2692 .base.static_constraint = C_O1_I4(r, r, rC, rz, rz), 2693 .out = tgen_movcond, 2694}; 2695 2696static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 2697 TCGReg a2, unsigned ofs, unsigned len) 2698{ 2699 unsigned mask = type == TCG_TYPE_I32 ? 31 : 63; 2700 2701 /* 2702 * Since we can't support "0Z" as a constraint, we allow a1 in 2703 * any register. Fix things up as if a matching constraint. 2704 */ 2705 if (a0 != a1) { 2706 if (a0 == a2) { 2707 tcg_out_mov(s, type, TCG_REG_TMP0, a2); 2708 a2 = TCG_REG_TMP0; 2709 } 2710 tcg_out_mov(s, type, a0, a1); 2711 } 2712 tcg_out_bfm(s, type, a0, a2, -ofs & mask, len - 1); 2713} 2714 2715static void tgen_depositi(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 2716 tcg_target_long a2, unsigned ofs, unsigned len) 2717{ 2718 tgen_andi(s, type, a0, a1, ~MAKE_64BIT_MASK(ofs, len)); 2719} 2720 2721static void tgen_depositz(TCGContext *s, TCGType type, TCGReg a0, TCGReg a2, 2722 unsigned ofs, unsigned len) 2723{ 2724 int max = type == TCG_TYPE_I32 ? 31 : 63; 2725 tcg_out_ubfm(s, type, a0, a2, -ofs & max, len - 1); 2726} 2727 2728static const TCGOutOpDeposit outop_deposit = { 2729 .base.static_constraint = C_O1_I2(r, rZ, rZ), 2730 .out_rrr = tgen_deposit, 2731 .out_rri = tgen_depositi, 2732 .out_rzr = tgen_depositz, 2733}; 2734 2735static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 2736 unsigned ofs, unsigned len) 2737{ 2738 if (ofs == 0) { 2739 uint64_t mask = MAKE_64BIT_MASK(0, len); 2740 tcg_out_logicali(s, I3404_ANDI, type, a0, a1, mask); 2741 } else { 2742 tcg_out_ubfm(s, type, a0, a1, ofs, ofs + len - 1); 2743 } 2744} 2745 2746static const TCGOutOpExtract outop_extract = { 2747 .base.static_constraint = C_O1_I1(r, r), 2748 .out_rr = tgen_extract, 2749}; 2750 2751static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 2752 unsigned ofs, unsigned len) 2753{ 2754 tcg_out_sbfm(s, type, a0, a1, ofs, ofs + len - 1); 2755} 2756 2757static const TCGOutOpExtract outop_sextract = { 2758 .base.static_constraint = C_O1_I1(r, r), 2759 .out_rr = tgen_sextract, 2760}; 2761 2762static void tgen_extract2(TCGContext *s, TCGType type, TCGReg a0, 2763 TCGReg a1, TCGReg a2, unsigned shr) 2764{ 2765 tcg_out_extr(s, type, a0, a2, a1, shr); 2766} 2767 2768static const TCGOutOpExtract2 outop_extract2 = { 2769 .base.static_constraint = C_O1_I2(r, rz, rz), 2770 .out_rrr = tgen_extract2, 2771}; 2772 2773static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest, 2774 TCGReg base, ptrdiff_t offset) 2775{ 2776 tcg_out_ldst(s, I3312_LDRB, dest, base, offset, 0); 2777} 2778 2779static const TCGOutOpLoad outop_ld8u = { 2780 .base.static_constraint = C_O1_I1(r, r), 2781 .out = tgen_ld8u, 2782}; 2783 2784static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest, 2785 TCGReg base, ptrdiff_t offset) 2786{ 2787 AArch64Insn insn = type == TCG_TYPE_I32 ? I3312_LDRSBW : I3312_LDRSBX; 2788 tcg_out_ldst(s, insn, dest, base, offset, 0); 2789} 2790 2791static const TCGOutOpLoad outop_ld8s = { 2792 .base.static_constraint = C_O1_I1(r, r), 2793 .out = tgen_ld8s, 2794}; 2795 2796static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest, 2797 TCGReg base, ptrdiff_t offset) 2798{ 2799 tcg_out_ldst(s, I3312_LDRH, dest, base, offset, 1); 2800} 2801 2802static const TCGOutOpLoad outop_ld16u = { 2803 .base.static_constraint = C_O1_I1(r, r), 2804 .out = tgen_ld16u, 2805}; 2806 2807static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest, 2808 TCGReg base, ptrdiff_t offset) 2809{ 2810 AArch64Insn insn = type == TCG_TYPE_I32 ? I3312_LDRSHW : I3312_LDRSHX; 2811 tcg_out_ldst(s, insn, dest, base, offset, 1); 2812} 2813 2814static const TCGOutOpLoad outop_ld16s = { 2815 .base.static_constraint = C_O1_I1(r, r), 2816 .out = tgen_ld16s, 2817}; 2818 2819static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest, 2820 TCGReg base, ptrdiff_t offset) 2821{ 2822 tcg_out_ldst(s, I3312_LDRW, dest, base, offset, 2); 2823} 2824 2825static const TCGOutOpLoad outop_ld32u = { 2826 .base.static_constraint = C_O1_I1(r, r), 2827 .out = tgen_ld32u, 2828}; 2829 2830static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest, 2831 TCGReg base, ptrdiff_t offset) 2832{ 2833 tcg_out_ldst(s, I3312_LDRSWX, dest, base, offset, 2); 2834} 2835 2836static const TCGOutOpLoad outop_ld32s = { 2837 .base.static_constraint = C_O1_I1(r, r), 2838 .out = tgen_ld32s, 2839}; 2840 2841static void tgen_st8_r(TCGContext *s, TCGType type, TCGReg data, 2842 TCGReg base, ptrdiff_t offset) 2843{ 2844 tcg_out_ldst(s, I3312_STRB, data, base, offset, 0); 2845} 2846 2847static const TCGOutOpStore outop_st8 = { 2848 .base.static_constraint = C_O0_I2(rz, r), 2849 .out_r = tgen_st8_r, 2850}; 2851 2852static void tgen_st16_r(TCGContext *s, TCGType type, TCGReg data, 2853 TCGReg base, ptrdiff_t offset) 2854{ 2855 tcg_out_ldst(s, I3312_STRH, data, base, offset, 1); 2856} 2857 2858static const TCGOutOpStore outop_st16 = { 2859 .base.static_constraint = C_O0_I2(rz, r), 2860 .out_r = tgen_st16_r, 2861}; 2862 2863static const TCGOutOpStore outop_st = { 2864 .base.static_constraint = C_O0_I2(rz, r), 2865 .out_r = tcg_out_st, 2866}; 2867 2868static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, 2869 const TCGArg args[TCG_MAX_OP_ARGS], 2870 const int const_args[TCG_MAX_OP_ARGS]) 2871{ 2872 /* Hoist the loads of the most common arguments. */ 2873 TCGArg a0 = args[0]; 2874 TCGArg a1 = args[1]; 2875 TCGArg a2 = args[2]; 2876 2877 switch (opc) { 2878 case INDEX_op_qemu_ld_i32: 2879 case INDEX_op_qemu_ld_i64: 2880 tcg_out_qemu_ld(s, a0, a1, a2, ext); 2881 break; 2882 case INDEX_op_qemu_st_i32: 2883 case INDEX_op_qemu_st_i64: 2884 tcg_out_qemu_st(s, a0, a1, a2, ext); 2885 break; 2886 case INDEX_op_qemu_ld_i128: 2887 tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true); 2888 break; 2889 case INDEX_op_qemu_st_i128: 2890 tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], false); 2891 break; 2892 2893 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 2894 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ 2895 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ 2896 default: 2897 g_assert_not_reached(); 2898 } 2899} 2900 2901static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 2902 unsigned vecl, unsigned vece, 2903 const TCGArg args[TCG_MAX_OP_ARGS], 2904 const int const_args[TCG_MAX_OP_ARGS]) 2905{ 2906 static const AArch64Insn cmp_vec_insn[16] = { 2907 [TCG_COND_EQ] = I3616_CMEQ, 2908 [TCG_COND_GT] = I3616_CMGT, 2909 [TCG_COND_GE] = I3616_CMGE, 2910 [TCG_COND_GTU] = I3616_CMHI, 2911 [TCG_COND_GEU] = I3616_CMHS, 2912 }; 2913 static const AArch64Insn cmp_scalar_insn[16] = { 2914 [TCG_COND_EQ] = I3611_CMEQ, 2915 [TCG_COND_GT] = I3611_CMGT, 2916 [TCG_COND_GE] = I3611_CMGE, 2917 [TCG_COND_GTU] = I3611_CMHI, 2918 [TCG_COND_GEU] = I3611_CMHS, 2919 }; 2920 static const AArch64Insn cmp0_vec_insn[16] = { 2921 [TCG_COND_EQ] = I3617_CMEQ0, 2922 [TCG_COND_GT] = I3617_CMGT0, 2923 [TCG_COND_GE] = I3617_CMGE0, 2924 [TCG_COND_LT] = I3617_CMLT0, 2925 [TCG_COND_LE] = I3617_CMLE0, 2926 }; 2927 static const AArch64Insn cmp0_scalar_insn[16] = { 2928 [TCG_COND_EQ] = I3612_CMEQ0, 2929 [TCG_COND_GT] = I3612_CMGT0, 2930 [TCG_COND_GE] = I3612_CMGE0, 2931 [TCG_COND_LT] = I3612_CMLT0, 2932 [TCG_COND_LE] = I3612_CMLE0, 2933 }; 2934 2935 TCGType type = vecl + TCG_TYPE_V64; 2936 unsigned is_q = vecl; 2937 bool is_scalar = !is_q && vece == MO_64; 2938 TCGArg a0, a1, a2, a3; 2939 int cmode, imm8; 2940 2941 a0 = args[0]; 2942 a1 = args[1]; 2943 a2 = args[2]; 2944 2945 switch (opc) { 2946 case INDEX_op_ld_vec: 2947 tcg_out_ld(s, type, a0, a1, a2); 2948 break; 2949 case INDEX_op_st_vec: 2950 tcg_out_st(s, type, a0, a1, a2); 2951 break; 2952 case INDEX_op_dupm_vec: 2953 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 2954 break; 2955 case INDEX_op_add_vec: 2956 if (is_scalar) { 2957 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2); 2958 } else { 2959 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2); 2960 } 2961 break; 2962 case INDEX_op_sub_vec: 2963 if (is_scalar) { 2964 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2); 2965 } else { 2966 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2); 2967 } 2968 break; 2969 case INDEX_op_mul_vec: 2970 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2); 2971 break; 2972 case INDEX_op_neg_vec: 2973 if (is_scalar) { 2974 tcg_out_insn(s, 3612, NEG, vece, a0, a1); 2975 } else { 2976 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1); 2977 } 2978 break; 2979 case INDEX_op_abs_vec: 2980 if (is_scalar) { 2981 tcg_out_insn(s, 3612, ABS, vece, a0, a1); 2982 } else { 2983 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1); 2984 } 2985 break; 2986 case INDEX_op_and_vec: 2987 if (const_args[2]) { 2988 is_shimm1632(~a2, &cmode, &imm8); 2989 if (a0 == a1) { 2990 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2991 return; 2992 } 2993 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2994 a2 = a0; 2995 } 2996 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2); 2997 break; 2998 case INDEX_op_or_vec: 2999 if (const_args[2]) { 3000 is_shimm1632(a2, &cmode, &imm8); 3001 if (a0 == a1) { 3002 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 3003 return; 3004 } 3005 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 3006 a2 = a0; 3007 } 3008 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2); 3009 break; 3010 case INDEX_op_andc_vec: 3011 if (const_args[2]) { 3012 is_shimm1632(a2, &cmode, &imm8); 3013 if (a0 == a1) { 3014 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 3015 return; 3016 } 3017 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 3018 a2 = a0; 3019 } 3020 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2); 3021 break; 3022 case INDEX_op_orc_vec: 3023 if (const_args[2]) { 3024 is_shimm1632(~a2, &cmode, &imm8); 3025 if (a0 == a1) { 3026 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 3027 return; 3028 } 3029 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 3030 a2 = a0; 3031 } 3032 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2); 3033 break; 3034 case INDEX_op_xor_vec: 3035 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2); 3036 break; 3037 case INDEX_op_ssadd_vec: 3038 if (is_scalar) { 3039 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2); 3040 } else { 3041 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2); 3042 } 3043 break; 3044 case INDEX_op_sssub_vec: 3045 if (is_scalar) { 3046 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2); 3047 } else { 3048 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2); 3049 } 3050 break; 3051 case INDEX_op_usadd_vec: 3052 if (is_scalar) { 3053 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2); 3054 } else { 3055 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2); 3056 } 3057 break; 3058 case INDEX_op_ussub_vec: 3059 if (is_scalar) { 3060 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2); 3061 } else { 3062 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2); 3063 } 3064 break; 3065 case INDEX_op_smax_vec: 3066 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2); 3067 break; 3068 case INDEX_op_smin_vec: 3069 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2); 3070 break; 3071 case INDEX_op_umax_vec: 3072 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2); 3073 break; 3074 case INDEX_op_umin_vec: 3075 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2); 3076 break; 3077 case INDEX_op_not_vec: 3078 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1); 3079 break; 3080 case INDEX_op_shli_vec: 3081 if (is_scalar) { 3082 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece)); 3083 } else { 3084 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece)); 3085 } 3086 break; 3087 case INDEX_op_shri_vec: 3088 if (is_scalar) { 3089 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2); 3090 } else { 3091 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2); 3092 } 3093 break; 3094 case INDEX_op_sari_vec: 3095 if (is_scalar) { 3096 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2); 3097 } else { 3098 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); 3099 } 3100 break; 3101 case INDEX_op_aa64_sli_vec: 3102 if (is_scalar) { 3103 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece)); 3104 } else { 3105 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece)); 3106 } 3107 break; 3108 case INDEX_op_shlv_vec: 3109 if (is_scalar) { 3110 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2); 3111 } else { 3112 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); 3113 } 3114 break; 3115 case INDEX_op_aa64_sshl_vec: 3116 if (is_scalar) { 3117 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2); 3118 } else { 3119 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2); 3120 } 3121 break; 3122 case INDEX_op_cmp_vec: 3123 { 3124 TCGCond cond = args[3]; 3125 AArch64Insn insn; 3126 3127 switch (cond) { 3128 case TCG_COND_NE: 3129 if (const_args[2]) { 3130 if (is_scalar) { 3131 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1); 3132 } else { 3133 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1); 3134 } 3135 } else { 3136 if (is_scalar) { 3137 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2); 3138 } else { 3139 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2); 3140 } 3141 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); 3142 } 3143 break; 3144 3145 case TCG_COND_TSTNE: 3146 case TCG_COND_TSTEQ: 3147 if (const_args[2]) { 3148 /* (x & 0) == 0 */ 3149 tcg_out_dupi_vec(s, type, MO_8, a0, 3150 -(cond == TCG_COND_TSTEQ)); 3151 break; 3152 } 3153 if (is_scalar) { 3154 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a2); 3155 } else { 3156 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a2); 3157 } 3158 if (cond == TCG_COND_TSTEQ) { 3159 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); 3160 } 3161 break; 3162 3163 default: 3164 if (const_args[2]) { 3165 if (is_scalar) { 3166 insn = cmp0_scalar_insn[cond]; 3167 if (insn) { 3168 tcg_out_insn_3612(s, insn, vece, a0, a1); 3169 break; 3170 } 3171 } else { 3172 insn = cmp0_vec_insn[cond]; 3173 if (insn) { 3174 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1); 3175 break; 3176 } 3177 } 3178 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0); 3179 a2 = TCG_VEC_TMP0; 3180 } 3181 if (is_scalar) { 3182 insn = cmp_scalar_insn[cond]; 3183 if (insn == 0) { 3184 TCGArg t; 3185 t = a1, a1 = a2, a2 = t; 3186 cond = tcg_swap_cond(cond); 3187 insn = cmp_scalar_insn[cond]; 3188 tcg_debug_assert(insn != 0); 3189 } 3190 tcg_out_insn_3611(s, insn, vece, a0, a1, a2); 3191 } else { 3192 insn = cmp_vec_insn[cond]; 3193 if (insn == 0) { 3194 TCGArg t; 3195 t = a1, a1 = a2, a2 = t; 3196 cond = tcg_swap_cond(cond); 3197 insn = cmp_vec_insn[cond]; 3198 tcg_debug_assert(insn != 0); 3199 } 3200 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2); 3201 } 3202 break; 3203 } 3204 } 3205 break; 3206 3207 case INDEX_op_bitsel_vec: 3208 a3 = args[3]; 3209 if (a0 == a3) { 3210 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1); 3211 } else if (a0 == a2) { 3212 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1); 3213 } else { 3214 if (a0 != a1) { 3215 tcg_out_mov(s, type, a0, a1); 3216 } 3217 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3); 3218 } 3219 break; 3220 3221 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 3222 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 3223 default: 3224 g_assert_not_reached(); 3225 } 3226} 3227 3228int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 3229{ 3230 switch (opc) { 3231 case INDEX_op_add_vec: 3232 case INDEX_op_sub_vec: 3233 case INDEX_op_and_vec: 3234 case INDEX_op_or_vec: 3235 case INDEX_op_xor_vec: 3236 case INDEX_op_andc_vec: 3237 case INDEX_op_orc_vec: 3238 case INDEX_op_neg_vec: 3239 case INDEX_op_abs_vec: 3240 case INDEX_op_not_vec: 3241 case INDEX_op_cmp_vec: 3242 case INDEX_op_shli_vec: 3243 case INDEX_op_shri_vec: 3244 case INDEX_op_sari_vec: 3245 case INDEX_op_ssadd_vec: 3246 case INDEX_op_sssub_vec: 3247 case INDEX_op_usadd_vec: 3248 case INDEX_op_ussub_vec: 3249 case INDEX_op_shlv_vec: 3250 case INDEX_op_bitsel_vec: 3251 return 1; 3252 case INDEX_op_rotli_vec: 3253 case INDEX_op_shrv_vec: 3254 case INDEX_op_sarv_vec: 3255 case INDEX_op_rotlv_vec: 3256 case INDEX_op_rotrv_vec: 3257 return -1; 3258 case INDEX_op_mul_vec: 3259 case INDEX_op_smax_vec: 3260 case INDEX_op_smin_vec: 3261 case INDEX_op_umax_vec: 3262 case INDEX_op_umin_vec: 3263 return vece < MO_64; 3264 3265 default: 3266 return 0; 3267 } 3268} 3269 3270void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 3271 TCGArg a0, ...) 3272{ 3273 va_list va; 3274 TCGv_vec v0, v1, v2, t1, t2, c1; 3275 TCGArg a2; 3276 3277 va_start(va, a0); 3278 v0 = temp_tcgv_vec(arg_temp(a0)); 3279 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 3280 a2 = va_arg(va, TCGArg); 3281 va_end(va); 3282 3283 switch (opc) { 3284 case INDEX_op_rotli_vec: 3285 t1 = tcg_temp_new_vec(type); 3286 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1)); 3287 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece, 3288 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2); 3289 tcg_temp_free_vec(t1); 3290 break; 3291 3292 case INDEX_op_shrv_vec: 3293 case INDEX_op_sarv_vec: 3294 /* Right shifts are negative left shifts for AArch64. */ 3295 v2 = temp_tcgv_vec(arg_temp(a2)); 3296 t1 = tcg_temp_new_vec(type); 3297 tcg_gen_neg_vec(vece, t1, v2); 3298 opc = (opc == INDEX_op_shrv_vec 3299 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec); 3300 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0), 3301 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 3302 tcg_temp_free_vec(t1); 3303 break; 3304 3305 case INDEX_op_rotlv_vec: 3306 v2 = temp_tcgv_vec(arg_temp(a2)); 3307 t1 = tcg_temp_new_vec(type); 3308 c1 = tcg_constant_vec(type, vece, 8 << vece); 3309 tcg_gen_sub_vec(vece, t1, v2, c1); 3310 /* Right shifts are negative left shifts for AArch64. */ 3311 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 3312 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 3313 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0), 3314 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 3315 tcg_gen_or_vec(vece, v0, v0, t1); 3316 tcg_temp_free_vec(t1); 3317 break; 3318 3319 case INDEX_op_rotrv_vec: 3320 v2 = temp_tcgv_vec(arg_temp(a2)); 3321 t1 = tcg_temp_new_vec(type); 3322 t2 = tcg_temp_new_vec(type); 3323 c1 = tcg_constant_vec(type, vece, 8 << vece); 3324 tcg_gen_neg_vec(vece, t1, v2); 3325 tcg_gen_sub_vec(vece, t2, c1, v2); 3326 /* Right shifts are negative left shifts for AArch64. */ 3327 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 3328 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 3329 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2), 3330 tcgv_vec_arg(v1), tcgv_vec_arg(t2)); 3331 tcg_gen_or_vec(vece, v0, t1, t2); 3332 tcg_temp_free_vec(t1); 3333 tcg_temp_free_vec(t2); 3334 break; 3335 3336 default: 3337 g_assert_not_reached(); 3338 } 3339} 3340 3341static TCGConstraintSetIndex 3342tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) 3343{ 3344 switch (op) { 3345 case INDEX_op_qemu_ld_i32: 3346 case INDEX_op_qemu_ld_i64: 3347 return C_O1_I1(r, r); 3348 case INDEX_op_qemu_ld_i128: 3349 return C_O2_I1(r, r, r); 3350 case INDEX_op_qemu_st_i32: 3351 case INDEX_op_qemu_st_i64: 3352 return C_O0_I2(rz, r); 3353 case INDEX_op_qemu_st_i128: 3354 return C_O0_I3(rz, rz, r); 3355 3356 case INDEX_op_add_vec: 3357 case INDEX_op_sub_vec: 3358 case INDEX_op_mul_vec: 3359 case INDEX_op_xor_vec: 3360 case INDEX_op_ssadd_vec: 3361 case INDEX_op_sssub_vec: 3362 case INDEX_op_usadd_vec: 3363 case INDEX_op_ussub_vec: 3364 case INDEX_op_smax_vec: 3365 case INDEX_op_smin_vec: 3366 case INDEX_op_umax_vec: 3367 case INDEX_op_umin_vec: 3368 case INDEX_op_shlv_vec: 3369 case INDEX_op_shrv_vec: 3370 case INDEX_op_sarv_vec: 3371 case INDEX_op_aa64_sshl_vec: 3372 return C_O1_I2(w, w, w); 3373 case INDEX_op_not_vec: 3374 case INDEX_op_neg_vec: 3375 case INDEX_op_abs_vec: 3376 case INDEX_op_shli_vec: 3377 case INDEX_op_shri_vec: 3378 case INDEX_op_sari_vec: 3379 return C_O1_I1(w, w); 3380 case INDEX_op_ld_vec: 3381 case INDEX_op_dupm_vec: 3382 return C_O1_I1(w, r); 3383 case INDEX_op_st_vec: 3384 return C_O0_I2(w, r); 3385 case INDEX_op_dup_vec: 3386 return C_O1_I1(w, wr); 3387 case INDEX_op_or_vec: 3388 case INDEX_op_andc_vec: 3389 return C_O1_I2(w, w, wO); 3390 case INDEX_op_and_vec: 3391 case INDEX_op_orc_vec: 3392 return C_O1_I2(w, w, wN); 3393 case INDEX_op_cmp_vec: 3394 return C_O1_I2(w, w, wZ); 3395 case INDEX_op_bitsel_vec: 3396 return C_O1_I3(w, w, w, w); 3397 case INDEX_op_aa64_sli_vec: 3398 return C_O1_I2(w, 0, w); 3399 3400 default: 3401 return C_NotImplemented; 3402 } 3403} 3404 3405static void tcg_target_init(TCGContext *s) 3406{ 3407 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu; 3408 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu; 3409 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 3410 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 3411 3412 tcg_target_call_clobber_regs = -1ull; 3413 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19); 3414 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20); 3415 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21); 3416 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22); 3417 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23); 3418 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24); 3419 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25); 3420 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26); 3421 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27); 3422 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28); 3423 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29); 3424 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 3425 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 3426 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 3427 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 3428 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 3429 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 3430 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 3431 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 3432 3433 s->reserved_regs = 0; 3434 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 3435 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); 3436 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ 3437 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0); 3438 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); 3439 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); 3440 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0); 3441} 3442 3443/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ 3444#define PUSH_SIZE ((30 - 19 + 1) * 8) 3445 3446#define FRAME_SIZE \ 3447 ((PUSH_SIZE \ 3448 + TCG_STATIC_CALL_ARGS_SIZE \ 3449 + CPU_TEMP_BUF_NLONGS * sizeof(long) \ 3450 + TCG_TARGET_STACK_ALIGN - 1) \ 3451 & ~(TCG_TARGET_STACK_ALIGN - 1)) 3452 3453/* We're expecting a 2 byte uleb128 encoded value. */ 3454QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 3455 3456/* We're expecting to use a single ADDI insn. */ 3457QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); 3458 3459static void tcg_target_qemu_prologue(TCGContext *s) 3460{ 3461 TCGReg r; 3462 3463 tcg_out_bti(s, BTI_C); 3464 3465 /* Push (FP, LR) and allocate space for all saved registers. */ 3466 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, 3467 TCG_REG_SP, -PUSH_SIZE, 1, 1); 3468 3469 /* Set up frame pointer for canonical unwinding. */ 3470 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); 3471 3472 /* Store callee-preserved regs x19..x28. */ 3473 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 3474 int ofs = (r - TCG_REG_X19 + 2) * 8; 3475 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 3476 } 3477 3478 /* Make stack space for TCG locals. */ 3479 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 3480 FRAME_SIZE - PUSH_SIZE); 3481 3482 /* Inform TCG about how to find TCG locals with register, offset, size. */ 3483 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, 3484 CPU_TEMP_BUF_NLONGS * sizeof(long)); 3485 3486 if (!tcg_use_softmmu) { 3487 /* 3488 * Note that XZR cannot be encoded in the address base register slot, 3489 * as that actually encodes SP. Depending on the guest, we may need 3490 * to zero-extend the guest address via the address index register slot, 3491 * therefore we need to load even a zero guest base into a register. 3492 */ 3493 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); 3494 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); 3495 } 3496 3497 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 3498 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); 3499 3500 /* 3501 * Return path for goto_ptr. Set return value to 0, a-la exit_tb, 3502 * and fall through to the rest of the epilogue. 3503 */ 3504 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 3505 tcg_out_bti(s, BTI_J); 3506 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0); 3507 3508 /* TB epilogue */ 3509 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); 3510 tcg_out_bti(s, BTI_J); 3511 3512 /* Remove TCG locals stack space. */ 3513 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 3514 FRAME_SIZE - PUSH_SIZE); 3515 3516 /* Restore registers x19..x28. */ 3517 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 3518 int ofs = (r - TCG_REG_X19 + 2) * 8; 3519 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 3520 } 3521 3522 /* Pop (FP, LR), restore SP to previous frame. */ 3523 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, 3524 TCG_REG_SP, PUSH_SIZE, 0, 1); 3525 tcg_out_insn(s, 3207, RET, TCG_REG_LR); 3526} 3527 3528static void tcg_out_tb_start(TCGContext *s) 3529{ 3530 tcg_out_bti(s, BTI_J); 3531} 3532 3533static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 3534{ 3535 int i; 3536 for (i = 0; i < count; ++i) { 3537 p[i] = NOP; 3538 } 3539} 3540 3541typedef struct { 3542 DebugFrameHeader h; 3543 uint8_t fde_def_cfa[4]; 3544 uint8_t fde_reg_ofs[24]; 3545} DebugFrame; 3546 3547#define ELF_HOST_MACHINE EM_AARCH64 3548 3549static const DebugFrame debug_frame = { 3550 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 3551 .h.cie.id = -1, 3552 .h.cie.version = 1, 3553 .h.cie.code_align = 1, 3554 .h.cie.data_align = 0x78, /* sleb128 -8 */ 3555 .h.cie.return_column = TCG_REG_LR, 3556 3557 /* Total FDE size does not include the "len" member. */ 3558 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 3559 3560 .fde_def_cfa = { 3561 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 3562 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 3563 (FRAME_SIZE >> 7) 3564 }, 3565 .fde_reg_ofs = { 3566 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ 3567 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ 3568 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ 3569 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ 3570 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ 3571 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ 3572 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ 3573 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ 3574 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ 3575 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ 3576 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ 3577 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ 3578 } 3579}; 3580 3581void tcg_register_jit(const void *buf, size_t buf_size) 3582{ 3583 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 3584} 3585