1/* 2 * Initial TCG Implementation for aarch64 3 * 4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH 5 * Written by Claudio Fontana 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or 8 * (at your option) any later version. 9 * 10 * See the COPYING file in the top-level directory for details. 11 */ 12 13#include "qemu/bitops.h" 14 15/* Used for function call generation. */ 16#define TCG_REG_CALL_STACK TCG_REG_SP 17#define TCG_TARGET_STACK_ALIGN 16 18#define TCG_TARGET_CALL_STACK_OFFSET 0 19#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL 20#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL 21#ifdef CONFIG_DARWIN 22# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL 23#else 24# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN 25#endif 26#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL 27 28/* We're going to re-use TCGType in setting of the SF bit, which controls 29 the size of the operation performed. If we know the values match, it 30 makes things much cleaner. */ 31QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); 32 33#ifdef CONFIG_DEBUG_TCG 34static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 35 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 36 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 37 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 38 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp", 39 40 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 41 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 42 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 43 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31", 44}; 45#endif /* CONFIG_DEBUG_TCG */ 46 47static const int tcg_target_reg_alloc_order[] = { 48 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, 49 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, 50 TCG_REG_X28, /* we will reserve this for guest_base if configured */ 51 52 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, 53 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, 54 55 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 56 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, 57 58 /* X16 reserved as temporary */ 59 /* X17 reserved as temporary */ 60 /* X18 reserved by system */ 61 /* X19 reserved for AREG0 */ 62 /* X29 reserved as fp */ 63 /* X30 reserved as temporary */ 64 65 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, 66 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, 67 /* V8 - V15 are call-saved, and skipped. */ 68 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, 69 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, 70 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, 71 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, 72}; 73 74static const int tcg_target_call_iarg_regs[8] = { 75 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 76 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 77}; 78 79static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) 80{ 81 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); 82 tcg_debug_assert(slot >= 0 && slot <= 1); 83 return TCG_REG_X0 + slot; 84} 85 86#define TCG_REG_TMP0 TCG_REG_X16 87#define TCG_REG_TMP1 TCG_REG_X17 88#define TCG_REG_TMP2 TCG_REG_X30 89#define TCG_VEC_TMP0 TCG_REG_V31 90 91#define TCG_REG_GUEST_BASE TCG_REG_X28 92 93static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 94{ 95 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 96 ptrdiff_t offset = target - src_rx; 97 98 if (offset == sextract64(offset, 0, 26)) { 99 /* read instruction, mask away previous PC_REL26 parameter contents, 100 set the proper offset, then write back the instruction. */ 101 *src_rw = deposit32(*src_rw, 0, 26, offset); 102 return true; 103 } 104 return false; 105} 106 107static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 108{ 109 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 110 ptrdiff_t offset = target - src_rx; 111 112 if (offset == sextract64(offset, 0, 19)) { 113 *src_rw = deposit32(*src_rw, 5, 19, offset); 114 return true; 115 } 116 return false; 117} 118 119static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 120{ 121 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 122 ptrdiff_t offset = target - src_rx; 123 124 if (offset == sextract64(offset, 0, 14)) { 125 *src_rw = deposit32(*src_rw, 5, 14, offset); 126 return true; 127 } 128 return false; 129} 130 131static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 132 intptr_t value, intptr_t addend) 133{ 134 tcg_debug_assert(addend == 0); 135 switch (type) { 136 case R_AARCH64_JUMP26: 137 case R_AARCH64_CALL26: 138 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value); 139 case R_AARCH64_CONDBR19: 140 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value); 141 case R_AARCH64_TSTBR14: 142 return reloc_pc14(code_ptr, (const tcg_insn_unit *)value); 143 default: 144 g_assert_not_reached(); 145 } 146} 147 148#define TCG_CT_CONST_AIMM 0x100 149#define TCG_CT_CONST_LIMM 0x200 150#define TCG_CT_CONST_ZERO 0x400 151#define TCG_CT_CONST_MONE 0x800 152#define TCG_CT_CONST_ORRI 0x1000 153#define TCG_CT_CONST_ANDI 0x2000 154#define TCG_CT_CONST_CMP 0x4000 155 156#define ALL_GENERAL_REGS 0xffffffffu 157#define ALL_VECTOR_REGS 0xffffffff00000000ull 158 159/* Match a constant valid for addition (12-bit, optionally shifted). */ 160static inline bool is_aimm(uint64_t val) 161{ 162 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; 163} 164 165/* Match a constant valid for logical operations. */ 166static inline bool is_limm(uint64_t val) 167{ 168 /* Taking a simplified view of the logical immediates for now, ignoring 169 the replication that can happen across the field. Match bit patterns 170 of the forms 171 0....01....1 172 0..01..10..0 173 and their inverses. */ 174 175 /* Make things easier below, by testing the form with msb clear. */ 176 if ((int64_t)val < 0) { 177 val = ~val; 178 } 179 if (val == 0) { 180 return false; 181 } 182 val += val & -val; 183 return (val & (val - 1)) == 0; 184} 185 186/* Return true if v16 is a valid 16-bit shifted immediate. */ 187static bool is_shimm16(uint16_t v16, int *cmode, int *imm8) 188{ 189 if (v16 == (v16 & 0xff)) { 190 *cmode = 0x8; 191 *imm8 = v16 & 0xff; 192 return true; 193 } else if (v16 == (v16 & 0xff00)) { 194 *cmode = 0xa; 195 *imm8 = v16 >> 8; 196 return true; 197 } 198 return false; 199} 200 201/* Return true if v32 is a valid 32-bit shifted immediate. */ 202static bool is_shimm32(uint32_t v32, int *cmode, int *imm8) 203{ 204 if (v32 == (v32 & 0xff)) { 205 *cmode = 0x0; 206 *imm8 = v32 & 0xff; 207 return true; 208 } else if (v32 == (v32 & 0xff00)) { 209 *cmode = 0x2; 210 *imm8 = (v32 >> 8) & 0xff; 211 return true; 212 } else if (v32 == (v32 & 0xff0000)) { 213 *cmode = 0x4; 214 *imm8 = (v32 >> 16) & 0xff; 215 return true; 216 } else if (v32 == (v32 & 0xff000000)) { 217 *cmode = 0x6; 218 *imm8 = v32 >> 24; 219 return true; 220 } 221 return false; 222} 223 224/* Return true if v32 is a valid 32-bit shifting ones immediate. */ 225static bool is_soimm32(uint32_t v32, int *cmode, int *imm8) 226{ 227 if ((v32 & 0xffff00ff) == 0xff) { 228 *cmode = 0xc; 229 *imm8 = (v32 >> 8) & 0xff; 230 return true; 231 } else if ((v32 & 0xff00ffff) == 0xffff) { 232 *cmode = 0xd; 233 *imm8 = (v32 >> 16) & 0xff; 234 return true; 235 } 236 return false; 237} 238 239/* Return true if v32 is a valid float32 immediate. */ 240static bool is_fimm32(uint32_t v32, int *cmode, int *imm8) 241{ 242 if (extract32(v32, 0, 19) == 0 243 && (extract32(v32, 25, 6) == 0x20 244 || extract32(v32, 25, 6) == 0x1f)) { 245 *cmode = 0xf; 246 *imm8 = (extract32(v32, 31, 1) << 7) 247 | (extract32(v32, 25, 1) << 6) 248 | extract32(v32, 19, 6); 249 return true; 250 } 251 return false; 252} 253 254/* Return true if v64 is a valid float64 immediate. */ 255static bool is_fimm64(uint64_t v64, int *cmode, int *imm8) 256{ 257 if (extract64(v64, 0, 48) == 0 258 && (extract64(v64, 54, 9) == 0x100 259 || extract64(v64, 54, 9) == 0x0ff)) { 260 *cmode = 0xf; 261 *imm8 = (extract64(v64, 63, 1) << 7) 262 | (extract64(v64, 54, 1) << 6) 263 | extract64(v64, 48, 6); 264 return true; 265 } 266 return false; 267} 268 269/* 270 * Return non-zero if v32 can be formed by MOVI+ORR. 271 * Place the parameters for MOVI in (cmode, imm8). 272 * Return the cmode for ORR; the imm8 can be had via extraction from v32. 273 */ 274static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8) 275{ 276 int i; 277 278 for (i = 6; i > 0; i -= 2) { 279 /* Mask out one byte we can add with ORR. */ 280 uint32_t tmp = v32 & ~(0xffu << (i * 4)); 281 if (is_shimm32(tmp, cmode, imm8) || 282 is_soimm32(tmp, cmode, imm8)) { 283 break; 284 } 285 } 286 return i; 287} 288 289/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */ 290static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8) 291{ 292 if (v32 == deposit32(v32, 16, 16, v32)) { 293 return is_shimm16(v32, cmode, imm8); 294 } else { 295 return is_shimm32(v32, cmode, imm8); 296 } 297} 298 299static bool tcg_target_const_match(int64_t val, int ct, 300 TCGType type, TCGCond cond, int vece) 301{ 302 if (ct & TCG_CT_CONST) { 303 return 1; 304 } 305 if (type == TCG_TYPE_I32) { 306 val = (int32_t)val; 307 } 308 309 if (ct & TCG_CT_CONST_CMP) { 310 if (is_tst_cond(cond)) { 311 ct |= TCG_CT_CONST_LIMM; 312 } else { 313 ct |= TCG_CT_CONST_AIMM; 314 } 315 } 316 317 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { 318 return 1; 319 } 320 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { 321 return 1; 322 } 323 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 324 return 1; 325 } 326 if ((ct & TCG_CT_CONST_MONE) && val == -1) { 327 return 1; 328 } 329 330 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) { 331 case 0: 332 break; 333 case TCG_CT_CONST_ANDI: 334 val = ~val; 335 /* fallthru */ 336 case TCG_CT_CONST_ORRI: 337 if (val == deposit64(val, 32, 32, val)) { 338 int cmode, imm8; 339 return is_shimm1632(val, &cmode, &imm8); 340 } 341 break; 342 default: 343 /* Both bits should not be set for the same insn. */ 344 g_assert_not_reached(); 345 } 346 347 return 0; 348} 349 350enum aarch64_cond_code { 351 COND_EQ = 0x0, 352 COND_NE = 0x1, 353 COND_CS = 0x2, /* Unsigned greater or equal */ 354 COND_HS = COND_CS, /* ALIAS greater or equal */ 355 COND_CC = 0x3, /* Unsigned less than */ 356 COND_LO = COND_CC, /* ALIAS Lower */ 357 COND_MI = 0x4, /* Negative */ 358 COND_PL = 0x5, /* Zero or greater */ 359 COND_VS = 0x6, /* Overflow */ 360 COND_VC = 0x7, /* No overflow */ 361 COND_HI = 0x8, /* Unsigned greater than */ 362 COND_LS = 0x9, /* Unsigned less or equal */ 363 COND_GE = 0xa, 364 COND_LT = 0xb, 365 COND_GT = 0xc, 366 COND_LE = 0xd, 367 COND_AL = 0xe, 368 COND_NV = 0xf, /* behaves like COND_AL here */ 369}; 370 371static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { 372 [TCG_COND_EQ] = COND_EQ, 373 [TCG_COND_NE] = COND_NE, 374 [TCG_COND_LT] = COND_LT, 375 [TCG_COND_GE] = COND_GE, 376 [TCG_COND_LE] = COND_LE, 377 [TCG_COND_GT] = COND_GT, 378 /* unsigned */ 379 [TCG_COND_LTU] = COND_LO, 380 [TCG_COND_GTU] = COND_HI, 381 [TCG_COND_GEU] = COND_HS, 382 [TCG_COND_LEU] = COND_LS, 383 /* bit test */ 384 [TCG_COND_TSTEQ] = COND_EQ, 385 [TCG_COND_TSTNE] = COND_NE, 386}; 387 388typedef enum { 389 LDST_ST = 0, /* store */ 390 LDST_LD = 1, /* load */ 391 LDST_LD_S_X = 2, /* load and sign-extend into Xt */ 392 LDST_LD_S_W = 3, /* load and sign-extend into Wt */ 393} AArch64LdstType; 394 395/* We encode the format of the insn into the beginning of the name, so that 396 we can have the preprocessor help "typecheck" the insn vs the output 397 function. Arm didn't provide us with nice names for the formats, so we 398 use the section number of the architecture reference manual in which the 399 instruction group is described. */ 400typedef enum { 401 /* Compare and branch (immediate). */ 402 I3201_CBZ = 0x34000000, 403 I3201_CBNZ = 0x35000000, 404 405 /* Conditional branch (immediate). */ 406 I3202_B_C = 0x54000000, 407 408 /* Test and branch (immediate). */ 409 I3205_TBZ = 0x36000000, 410 I3205_TBNZ = 0x37000000, 411 412 /* Unconditional branch (immediate). */ 413 I3206_B = 0x14000000, 414 I3206_BL = 0x94000000, 415 416 /* Unconditional branch (register). */ 417 I3207_BR = 0xd61f0000, 418 I3207_BLR = 0xd63f0000, 419 I3207_RET = 0xd65f0000, 420 421 /* AdvSIMD load/store single structure. */ 422 I3303_LD1R = 0x0d40c000, 423 424 /* Load literal for loading the address at pc-relative offset */ 425 I3305_LDR = 0x58000000, 426 I3305_LDR_v64 = 0x5c000000, 427 I3305_LDR_v128 = 0x9c000000, 428 429 /* Load/store exclusive. */ 430 I3306_LDXP = 0xc8600000, 431 I3306_STXP = 0xc8200000, 432 433 /* Load/store register. Described here as 3.3.12, but the helper 434 that emits them can transform to 3.3.10 or 3.3.13. */ 435 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, 436 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, 437 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, 438 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, 439 440 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, 441 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, 442 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, 443 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, 444 445 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, 446 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, 447 448 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, 449 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, 450 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, 451 452 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30, 453 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30, 454 455 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30, 456 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30, 457 458 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30, 459 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30, 460 461 I3312_TO_I3310 = 0x00200800, 462 I3312_TO_I3313 = 0x01000000, 463 464 /* Load/store register pair instructions. */ 465 I3314_LDP = 0x28400000, 466 I3314_STP = 0x28000000, 467 468 /* Add/subtract immediate instructions. */ 469 I3401_ADDI = 0x11000000, 470 I3401_ADDSI = 0x31000000, 471 I3401_SUBI = 0x51000000, 472 I3401_SUBSI = 0x71000000, 473 474 /* Bitfield instructions. */ 475 I3402_BFM = 0x33000000, 476 I3402_SBFM = 0x13000000, 477 I3402_UBFM = 0x53000000, 478 479 /* Extract instruction. */ 480 I3403_EXTR = 0x13800000, 481 482 /* Logical immediate instructions. */ 483 I3404_ANDI = 0x12000000, 484 I3404_ORRI = 0x32000000, 485 I3404_EORI = 0x52000000, 486 I3404_ANDSI = 0x72000000, 487 488 /* Move wide immediate instructions. */ 489 I3405_MOVN = 0x12800000, 490 I3405_MOVZ = 0x52800000, 491 I3405_MOVK = 0x72800000, 492 493 /* PC relative addressing instructions. */ 494 I3406_ADR = 0x10000000, 495 I3406_ADRP = 0x90000000, 496 497 /* Add/subtract extended register instructions. */ 498 I3501_ADD = 0x0b200000, 499 500 /* Add/subtract shifted register instructions (without a shift). */ 501 I3502_ADD = 0x0b000000, 502 I3502_ADDS = 0x2b000000, 503 I3502_SUB = 0x4b000000, 504 I3502_SUBS = 0x6b000000, 505 506 /* Add/subtract shifted register instructions (with a shift). */ 507 I3502S_ADD_LSL = I3502_ADD, 508 509 /* Add/subtract with carry instructions. */ 510 I3503_ADC = 0x1a000000, 511 I3503_SBC = 0x5a000000, 512 513 /* Conditional select instructions. */ 514 I3506_CSEL = 0x1a800000, 515 I3506_CSINC = 0x1a800400, 516 I3506_CSINV = 0x5a800000, 517 I3506_CSNEG = 0x5a800400, 518 519 /* Data-processing (1 source) instructions. */ 520 I3507_CLZ = 0x5ac01000, 521 I3507_RBIT = 0x5ac00000, 522 I3507_REV = 0x5ac00000, /* + size << 10 */ 523 524 /* Data-processing (2 source) instructions. */ 525 I3508_LSLV = 0x1ac02000, 526 I3508_LSRV = 0x1ac02400, 527 I3508_ASRV = 0x1ac02800, 528 I3508_RORV = 0x1ac02c00, 529 I3508_SMULH = 0x9b407c00, 530 I3508_UMULH = 0x9bc07c00, 531 I3508_UDIV = 0x1ac00800, 532 I3508_SDIV = 0x1ac00c00, 533 534 /* Data-processing (3 source) instructions. */ 535 I3509_MADD = 0x1b000000, 536 I3509_MSUB = 0x1b008000, 537 538 /* Logical shifted register instructions (without a shift). */ 539 I3510_AND = 0x0a000000, 540 I3510_BIC = 0x0a200000, 541 I3510_ORR = 0x2a000000, 542 I3510_ORN = 0x2a200000, 543 I3510_EOR = 0x4a000000, 544 I3510_EON = 0x4a200000, 545 I3510_ANDS = 0x6a000000, 546 547 /* Logical shifted register instructions (with a shift). */ 548 I3502S_AND_LSR = I3510_AND | (1 << 22), 549 550 /* AdvSIMD copy */ 551 I3605_DUP = 0x0e000400, 552 I3605_INS = 0x4e001c00, 553 I3605_UMOV = 0x0e003c00, 554 555 /* AdvSIMD modified immediate */ 556 I3606_MOVI = 0x0f000400, 557 I3606_MVNI = 0x2f000400, 558 I3606_BIC = 0x2f001400, 559 I3606_ORR = 0x0f001400, 560 561 /* AdvSIMD scalar shift by immediate */ 562 I3609_SSHR = 0x5f000400, 563 I3609_SSRA = 0x5f001400, 564 I3609_SHL = 0x5f005400, 565 I3609_USHR = 0x7f000400, 566 I3609_USRA = 0x7f001400, 567 I3609_SLI = 0x7f005400, 568 569 /* AdvSIMD scalar three same */ 570 I3611_SQADD = 0x5e200c00, 571 I3611_SQSUB = 0x5e202c00, 572 I3611_CMGT = 0x5e203400, 573 I3611_CMGE = 0x5e203c00, 574 I3611_SSHL = 0x5e204400, 575 I3611_ADD = 0x5e208400, 576 I3611_CMTST = 0x5e208c00, 577 I3611_UQADD = 0x7e200c00, 578 I3611_UQSUB = 0x7e202c00, 579 I3611_CMHI = 0x7e203400, 580 I3611_CMHS = 0x7e203c00, 581 I3611_USHL = 0x7e204400, 582 I3611_SUB = 0x7e208400, 583 I3611_CMEQ = 0x7e208c00, 584 585 /* AdvSIMD scalar two-reg misc */ 586 I3612_CMGT0 = 0x5e208800, 587 I3612_CMEQ0 = 0x5e209800, 588 I3612_CMLT0 = 0x5e20a800, 589 I3612_ABS = 0x5e20b800, 590 I3612_CMGE0 = 0x7e208800, 591 I3612_CMLE0 = 0x7e209800, 592 I3612_NEG = 0x7e20b800, 593 594 /* AdvSIMD shift by immediate */ 595 I3614_SSHR = 0x0f000400, 596 I3614_SSRA = 0x0f001400, 597 I3614_SHL = 0x0f005400, 598 I3614_SLI = 0x2f005400, 599 I3614_USHR = 0x2f000400, 600 I3614_USRA = 0x2f001400, 601 602 /* AdvSIMD three same. */ 603 I3616_ADD = 0x0e208400, 604 I3616_AND = 0x0e201c00, 605 I3616_BIC = 0x0e601c00, 606 I3616_BIF = 0x2ee01c00, 607 I3616_BIT = 0x2ea01c00, 608 I3616_BSL = 0x2e601c00, 609 I3616_EOR = 0x2e201c00, 610 I3616_MUL = 0x0e209c00, 611 I3616_ORR = 0x0ea01c00, 612 I3616_ORN = 0x0ee01c00, 613 I3616_SUB = 0x2e208400, 614 I3616_CMGT = 0x0e203400, 615 I3616_CMGE = 0x0e203c00, 616 I3616_CMTST = 0x0e208c00, 617 I3616_CMHI = 0x2e203400, 618 I3616_CMHS = 0x2e203c00, 619 I3616_CMEQ = 0x2e208c00, 620 I3616_SMAX = 0x0e206400, 621 I3616_SMIN = 0x0e206c00, 622 I3616_SSHL = 0x0e204400, 623 I3616_SQADD = 0x0e200c00, 624 I3616_SQSUB = 0x0e202c00, 625 I3616_UMAX = 0x2e206400, 626 I3616_UMIN = 0x2e206c00, 627 I3616_UQADD = 0x2e200c00, 628 I3616_UQSUB = 0x2e202c00, 629 I3616_USHL = 0x2e204400, 630 631 /* AdvSIMD two-reg misc. */ 632 I3617_CMGT0 = 0x0e208800, 633 I3617_CMEQ0 = 0x0e209800, 634 I3617_CMLT0 = 0x0e20a800, 635 I3617_CMGE0 = 0x2e208800, 636 I3617_CMLE0 = 0x2e209800, 637 I3617_NOT = 0x2e205800, 638 I3617_ABS = 0x0e20b800, 639 I3617_NEG = 0x2e20b800, 640 641 /* System instructions. */ 642 NOP = 0xd503201f, 643 DMB_ISH = 0xd50338bf, 644 DMB_LD = 0x00000100, 645 DMB_ST = 0x00000200, 646 647 BTI_C = 0xd503245f, 648 BTI_J = 0xd503249f, 649 BTI_JC = 0xd50324df, 650} AArch64Insn; 651 652static inline uint32_t tcg_in32(TCGContext *s) 653{ 654 uint32_t v = *(uint32_t *)s->code_ptr; 655 return v; 656} 657 658/* Emit an opcode with "type-checking" of the format. */ 659#define tcg_out_insn(S, FMT, OP, ...) \ 660 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) 661 662static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q, 663 TCGReg rt, TCGReg rn, unsigned size) 664{ 665 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30)); 666} 667 668static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, 669 int imm19, TCGReg rt) 670{ 671 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt); 672} 673 674static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs, 675 TCGReg rt, TCGReg rt2, TCGReg rn) 676{ 677 tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt); 678} 679 680static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, 681 TCGReg rt, int imm19) 682{ 683 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); 684} 685 686static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, 687 TCGCond c, int imm19) 688{ 689 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); 690} 691 692static void tcg_out_insn_3205(TCGContext *s, AArch64Insn insn, 693 TCGReg rt, int imm6, int imm14) 694{ 695 insn |= (imm6 & 0x20) << (31 - 5); 696 insn |= (imm6 & 0x1f) << 19; 697 tcg_out32(s, insn | (imm14 & 0x3fff) << 5 | rt); 698} 699 700static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) 701{ 702 tcg_out32(s, insn | (imm26 & 0x03ffffff)); 703} 704 705static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) 706{ 707 tcg_out32(s, insn | rn << 5); 708} 709 710static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, 711 TCGReg r1, TCGReg r2, TCGReg rn, 712 tcg_target_long ofs, bool pre, bool w) 713{ 714 insn |= 1u << 31; /* ext */ 715 insn |= pre << 24; 716 insn |= w << 23; 717 718 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); 719 insn |= (ofs & (0x7f << 3)) << (15 - 3); 720 721 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); 722} 723 724static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, 725 TCGReg rd, TCGReg rn, uint64_t aimm) 726{ 727 if (aimm > 0xfff) { 728 tcg_debug_assert((aimm & 0xfff) == 0); 729 aimm >>= 12; 730 tcg_debug_assert(aimm <= 0xfff); 731 aimm |= 1 << 12; /* apply LSL 12 */ 732 } 733 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); 734} 735 736/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 737 (Logical immediate). Both insn groups have N, IMMR and IMMS fields 738 that feed the DecodeBitMasks pseudo function. */ 739static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, 740 TCGReg rd, TCGReg rn, int n, int immr, int imms) 741{ 742 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 743 | rn << 5 | rd); 744} 745 746#define tcg_out_insn_3404 tcg_out_insn_3402 747 748static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, 749 TCGReg rd, TCGReg rn, TCGReg rm, int imms) 750{ 751 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 752 | rn << 5 | rd); 753} 754 755/* This function is used for the Move (wide immediate) instruction group. 756 Note that SHIFT is a full shift count, not the 2 bit HW field. */ 757static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, 758 TCGReg rd, uint16_t half, unsigned shift) 759{ 760 tcg_debug_assert((shift & ~0x30) == 0); 761 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); 762} 763 764static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, 765 TCGReg rd, int64_t disp) 766{ 767 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); 768} 769 770static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn, 771 TCGType sf, TCGReg rd, TCGReg rn, 772 TCGReg rm, int opt, int imm3) 773{ 774 tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 | 775 imm3 << 10 | rn << 5 | rd); 776} 777 778/* This function is for both 3.5.2 (Add/Subtract shifted register), for 779 the rare occasion when we actually want to supply a shift amount. */ 780static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, 781 TCGType ext, TCGReg rd, TCGReg rn, 782 TCGReg rm, int imm6) 783{ 784 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); 785} 786 787/* This function is for 3.5.2 (Add/subtract shifted register), 788 and 3.5.10 (Logical shifted register), for the vast majorty of cases 789 when we don't want to apply a shift. Thus it can also be used for 790 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */ 791static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, 792 TCGReg rd, TCGReg rn, TCGReg rm) 793{ 794 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); 795} 796 797#define tcg_out_insn_3503 tcg_out_insn_3502 798#define tcg_out_insn_3508 tcg_out_insn_3502 799#define tcg_out_insn_3510 tcg_out_insn_3502 800 801static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, 802 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) 803{ 804 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd 805 | tcg_cond_to_aarch64[c] << 12); 806} 807 808static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, 809 TCGReg rd, TCGReg rn) 810{ 811 tcg_out32(s, insn | ext << 31 | rn << 5 | rd); 812} 813 814static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, 815 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) 816{ 817 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); 818} 819 820static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q, 821 TCGReg rd, TCGReg rn, int dst_idx, int src_idx) 822{ 823 /* Note that bit 11 set means general register input. Therefore 824 we can handle both register sets with one function. */ 825 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11) 826 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5); 827} 828 829static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q, 830 TCGReg rd, bool op, int cmode, uint8_t imm8) 831{ 832 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f) 833 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5); 834} 835 836static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn, 837 TCGReg rd, TCGReg rn, unsigned immhb) 838{ 839 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f)); 840} 841 842static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn, 843 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 844{ 845 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16 846 | (rn & 0x1f) << 5 | (rd & 0x1f)); 847} 848 849static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn, 850 unsigned size, TCGReg rd, TCGReg rn) 851{ 852 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f)); 853} 854 855static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q, 856 TCGReg rd, TCGReg rn, unsigned immhb) 857{ 858 tcg_out32(s, insn | q << 30 | immhb << 16 859 | (rn & 0x1f) << 5 | (rd & 0x1f)); 860} 861 862static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q, 863 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 864{ 865 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16 866 | (rn & 0x1f) << 5 | (rd & 0x1f)); 867} 868 869static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q, 870 unsigned size, TCGReg rd, TCGReg rn) 871{ 872 tcg_out32(s, insn | q << 30 | (size << 22) 873 | (rn & 0x1f) << 5 | (rd & 0x1f)); 874} 875 876static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, 877 TCGReg rd, TCGReg base, TCGType ext, 878 TCGReg regoff) 879{ 880 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 881 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | 882 0x4000 | ext << 13 | base << 5 | (rd & 0x1f)); 883} 884 885static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, 886 TCGReg rd, TCGReg rn, intptr_t offset) 887{ 888 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f)); 889} 890 891static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, 892 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) 893{ 894 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 895 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 896 | rn << 5 | (rd & 0x1f)); 897} 898 899static void tcg_out_bti(TCGContext *s, AArch64Insn insn) 900{ 901 /* 902 * While BTI insns are nops on hosts without FEAT_BTI, 903 * there is no point in emitting them in that case either. 904 */ 905 if (cpuinfo & CPUINFO_BTI) { 906 tcg_out32(s, insn); 907 } 908} 909 910/* Register to register move using ORR (shifted register with no shift). */ 911static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) 912{ 913 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); 914} 915 916/* Register to register move using ADDI (move to/from SP). */ 917static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) 918{ 919 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); 920} 921 922/* This function is used for the Logical (immediate) instruction group. 923 The value of LIMM must satisfy IS_LIMM. See the comment above about 924 only supporting simplified logical immediates. */ 925static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, 926 TCGReg rd, TCGReg rn, uint64_t limm) 927{ 928 unsigned h, l, r, c; 929 930 tcg_debug_assert(is_limm(limm)); 931 932 h = clz64(limm); 933 l = ctz64(limm); 934 if (l == 0) { 935 r = 0; /* form 0....01....1 */ 936 c = ctz64(~limm) - 1; 937 if (h == 0) { 938 r = clz64(~limm); /* form 1..10..01..1 */ 939 c += r; 940 } 941 } else { 942 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ 943 c = r - h - 1; 944 } 945 if (ext == TCG_TYPE_I32) { 946 r &= 31; 947 c &= 31; 948 } 949 950 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); 951} 952 953static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 954 TCGReg rd, int64_t v64) 955{ 956 bool q = type == TCG_TYPE_V128; 957 int cmode, imm8, i; 958 959 /* Test all bytes equal first. */ 960 if (vece == MO_8) { 961 imm8 = (uint8_t)v64; 962 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8); 963 return; 964 } 965 966 /* 967 * Test all bytes 0x00 or 0xff second. This can match cases that 968 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below. 969 */ 970 for (i = imm8 = 0; i < 8; i++) { 971 uint8_t byte = v64 >> (i * 8); 972 if (byte == 0xff) { 973 imm8 |= 1 << i; 974 } else if (byte != 0) { 975 goto fail_bytes; 976 } 977 } 978 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8); 979 return; 980 fail_bytes: 981 982 /* 983 * Tests for various replications. For each element width, if we 984 * cannot find an expansion there's no point checking a larger 985 * width because we already know by replication it cannot match. 986 */ 987 if (vece == MO_16) { 988 uint16_t v16 = v64; 989 990 if (is_shimm16(v16, &cmode, &imm8)) { 991 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 992 return; 993 } 994 if (is_shimm16(~v16, &cmode, &imm8)) { 995 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 996 return; 997 } 998 999 /* 1000 * Otherwise, all remaining constants can be loaded in two insns: 1001 * rd = v16 & 0xff, rd |= v16 & 0xff00. 1002 */ 1003 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff); 1004 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8); 1005 return; 1006 } else if (vece == MO_32) { 1007 uint32_t v32 = v64; 1008 uint32_t n32 = ~v32; 1009 1010 if (is_shimm32(v32, &cmode, &imm8) || 1011 is_soimm32(v32, &cmode, &imm8) || 1012 is_fimm32(v32, &cmode, &imm8)) { 1013 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 1014 return; 1015 } 1016 if (is_shimm32(n32, &cmode, &imm8) || 1017 is_soimm32(n32, &cmode, &imm8)) { 1018 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 1019 return; 1020 } 1021 1022 /* 1023 * Restrict the set of constants to those we can load with 1024 * two instructions. Others we load from the pool. 1025 */ 1026 i = is_shimm32_pair(v32, &cmode, &imm8); 1027 if (i) { 1028 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 1029 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8)); 1030 return; 1031 } 1032 i = is_shimm32_pair(n32, &cmode, &imm8); 1033 if (i) { 1034 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 1035 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8)); 1036 return; 1037 } 1038 } else if (is_fimm64(v64, &cmode, &imm8)) { 1039 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8); 1040 return; 1041 } 1042 1043 /* 1044 * As a last resort, load from the constant pool. Sadly there 1045 * is no LD1R (literal), so store the full 16-byte vector. 1046 */ 1047 if (type == TCG_TYPE_V128) { 1048 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64); 1049 tcg_out_insn(s, 3305, LDR_v128, 0, rd); 1050 } else { 1051 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0); 1052 tcg_out_insn(s, 3305, LDR_v64, 0, rd); 1053 } 1054} 1055 1056static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 1057 TCGReg rd, TCGReg rs) 1058{ 1059 int is_q = type - TCG_TYPE_V64; 1060 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0); 1061 return true; 1062} 1063 1064static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 1065 TCGReg r, TCGReg base, intptr_t offset) 1066{ 1067 TCGReg temp = TCG_REG_TMP0; 1068 1069 if (offset < -0xffffff || offset > 0xffffff) { 1070 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset); 1071 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base); 1072 base = temp; 1073 } else { 1074 AArch64Insn add_insn = I3401_ADDI; 1075 1076 if (offset < 0) { 1077 add_insn = I3401_SUBI; 1078 offset = -offset; 1079 } 1080 if (offset & 0xfff000) { 1081 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000); 1082 base = temp; 1083 } 1084 if (offset & 0xfff) { 1085 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff); 1086 base = temp; 1087 } 1088 } 1089 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece); 1090 return true; 1091} 1092 1093static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 1094 tcg_target_long value) 1095{ 1096 tcg_target_long svalue = value; 1097 tcg_target_long ivalue = ~value; 1098 tcg_target_long t0, t1, t2; 1099 int s0, s1; 1100 AArch64Insn opc; 1101 1102 switch (type) { 1103 case TCG_TYPE_I32: 1104 case TCG_TYPE_I64: 1105 tcg_debug_assert(rd < 32); 1106 break; 1107 default: 1108 g_assert_not_reached(); 1109 } 1110 1111 /* For 32-bit values, discard potential garbage in value. For 64-bit 1112 values within [2**31, 2**32-1], we can create smaller sequences by 1113 interpreting this as a negative 32-bit number, while ensuring that 1114 the high 32 bits are cleared by setting SF=0. */ 1115 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { 1116 svalue = (int32_t)value; 1117 value = (uint32_t)value; 1118 ivalue = (uint32_t)ivalue; 1119 type = TCG_TYPE_I32; 1120 } 1121 1122 /* Speed things up by handling the common case of small positive 1123 and negative values specially. */ 1124 if ((value & ~0xffffull) == 0) { 1125 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); 1126 return; 1127 } else if ((ivalue & ~0xffffull) == 0) { 1128 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); 1129 return; 1130 } 1131 1132 /* Check for bitfield immediates. For the benefit of 32-bit quantities, 1133 use the sign-extended value. That lets us match rotated values such 1134 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ 1135 if (is_limm(svalue)) { 1136 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); 1137 return; 1138 } 1139 1140 /* Look for host pointer values within 4G of the PC. This happens 1141 often when loading pointers to QEMU's own data structures. */ 1142 if (type == TCG_TYPE_I64) { 1143 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr); 1144 tcg_target_long disp = value - src_rx; 1145 if (disp == sextract64(disp, 0, 21)) { 1146 tcg_out_insn(s, 3406, ADR, rd, disp); 1147 return; 1148 } 1149 disp = (value >> 12) - (src_rx >> 12); 1150 if (disp == sextract64(disp, 0, 21)) { 1151 tcg_out_insn(s, 3406, ADRP, rd, disp); 1152 if (value & 0xfff) { 1153 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); 1154 } 1155 return; 1156 } 1157 } 1158 1159 /* Would it take fewer insns to begin with MOVN? */ 1160 if (ctpop64(value) >= 32) { 1161 t0 = ivalue; 1162 opc = I3405_MOVN; 1163 } else { 1164 t0 = value; 1165 opc = I3405_MOVZ; 1166 } 1167 s0 = ctz64(t0) & (63 & -16); 1168 t1 = t0 & ~(0xffffull << s0); 1169 s1 = ctz64(t1) & (63 & -16); 1170 t2 = t1 & ~(0xffffull << s1); 1171 if (t2 == 0) { 1172 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0); 1173 if (t1 != 0) { 1174 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1); 1175 } 1176 return; 1177 } 1178 1179 /* For more than 2 insns, dump it into the constant pool. */ 1180 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0); 1181 tcg_out_insn(s, 3305, LDR, 0, rd); 1182} 1183 1184static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) 1185{ 1186 return false; 1187} 1188 1189static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, 1190 tcg_target_long imm) 1191{ 1192 /* This function is only used for passing structs by reference. */ 1193 g_assert_not_reached(); 1194} 1195 1196/* Define something more legible for general use. */ 1197#define tcg_out_ldst_r tcg_out_insn_3310 1198 1199static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd, 1200 TCGReg rn, intptr_t offset, int lgsize) 1201{ 1202 /* If the offset is naturally aligned and in range, then we can 1203 use the scaled uimm12 encoding */ 1204 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) { 1205 uintptr_t scaled_uimm = offset >> lgsize; 1206 if (scaled_uimm <= 0xfff) { 1207 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); 1208 return; 1209 } 1210 } 1211 1212 /* Small signed offsets can use the unscaled encoding. */ 1213 if (offset >= -256 && offset < 256) { 1214 tcg_out_insn_3312(s, insn, rd, rn, offset); 1215 return; 1216 } 1217 1218 /* Worst-case scenario, move offset to temp register, use reg offset. */ 1219 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset); 1220 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0); 1221} 1222 1223static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 1224{ 1225 if (ret == arg) { 1226 return true; 1227 } 1228 switch (type) { 1229 case TCG_TYPE_I32: 1230 case TCG_TYPE_I64: 1231 if (ret < 32 && arg < 32) { 1232 tcg_out_movr(s, type, ret, arg); 1233 break; 1234 } else if (ret < 32) { 1235 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0); 1236 break; 1237 } else if (arg < 32) { 1238 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0); 1239 break; 1240 } 1241 /* FALLTHRU */ 1242 1243 case TCG_TYPE_V64: 1244 tcg_debug_assert(ret >= 32 && arg >= 32); 1245 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg); 1246 break; 1247 case TCG_TYPE_V128: 1248 tcg_debug_assert(ret >= 32 && arg >= 32); 1249 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg); 1250 break; 1251 1252 default: 1253 g_assert_not_reached(); 1254 } 1255 return true; 1256} 1257 1258static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1259 TCGReg base, intptr_t ofs) 1260{ 1261 AArch64Insn insn; 1262 int lgsz; 1263 1264 switch (type) { 1265 case TCG_TYPE_I32: 1266 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS); 1267 lgsz = 2; 1268 break; 1269 case TCG_TYPE_I64: 1270 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD); 1271 lgsz = 3; 1272 break; 1273 case TCG_TYPE_V64: 1274 insn = I3312_LDRVD; 1275 lgsz = 3; 1276 break; 1277 case TCG_TYPE_V128: 1278 insn = I3312_LDRVQ; 1279 lgsz = 4; 1280 break; 1281 default: 1282 g_assert_not_reached(); 1283 } 1284 tcg_out_ldst(s, insn, ret, base, ofs, lgsz); 1285} 1286 1287static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src, 1288 TCGReg base, intptr_t ofs) 1289{ 1290 AArch64Insn insn; 1291 int lgsz; 1292 1293 switch (type) { 1294 case TCG_TYPE_I32: 1295 insn = (src < 32 ? I3312_STRW : I3312_STRVS); 1296 lgsz = 2; 1297 break; 1298 case TCG_TYPE_I64: 1299 insn = (src < 32 ? I3312_STRX : I3312_STRVD); 1300 lgsz = 3; 1301 break; 1302 case TCG_TYPE_V64: 1303 insn = I3312_STRVD; 1304 lgsz = 3; 1305 break; 1306 case TCG_TYPE_V128: 1307 insn = I3312_STRVQ; 1308 lgsz = 4; 1309 break; 1310 default: 1311 g_assert_not_reached(); 1312 } 1313 tcg_out_ldst(s, insn, src, base, ofs, lgsz); 1314} 1315 1316static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1317 TCGReg base, intptr_t ofs) 1318{ 1319 if (type <= TCG_TYPE_I64 && val == 0) { 1320 tcg_out_st(s, type, TCG_REG_XZR, base, ofs); 1321 return true; 1322 } 1323 return false; 1324} 1325 1326static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, 1327 TCGReg rn, unsigned int a, unsigned int b) 1328{ 1329 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); 1330} 1331 1332static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, 1333 TCGReg rn, unsigned int a, unsigned int b) 1334{ 1335 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); 1336} 1337 1338static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, 1339 TCGReg rn, unsigned int a, unsigned int b) 1340{ 1341 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); 1342} 1343 1344static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, 1345 TCGReg rn, TCGReg rm, unsigned int a) 1346{ 1347 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); 1348} 1349 1350static inline void tcg_out_shl(TCGContext *s, TCGType ext, 1351 TCGReg rd, TCGReg rn, unsigned int m) 1352{ 1353 int bits = ext ? 64 : 32; 1354 int max = bits - 1; 1355 tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max); 1356} 1357 1358static inline void tcg_out_shr(TCGContext *s, TCGType ext, 1359 TCGReg rd, TCGReg rn, unsigned int m) 1360{ 1361 int max = ext ? 63 : 31; 1362 tcg_out_ubfm(s, ext, rd, rn, m & max, max); 1363} 1364 1365static inline void tcg_out_sar(TCGContext *s, TCGType ext, 1366 TCGReg rd, TCGReg rn, unsigned int m) 1367{ 1368 int max = ext ? 63 : 31; 1369 tcg_out_sbfm(s, ext, rd, rn, m & max, max); 1370} 1371 1372static inline void tcg_out_rotr(TCGContext *s, TCGType ext, 1373 TCGReg rd, TCGReg rn, unsigned int m) 1374{ 1375 int max = ext ? 63 : 31; 1376 tcg_out_extr(s, ext, rd, rn, rn, m & max); 1377} 1378 1379static inline void tcg_out_rotl(TCGContext *s, TCGType ext, 1380 TCGReg rd, TCGReg rn, unsigned int m) 1381{ 1382 int max = ext ? 63 : 31; 1383 tcg_out_extr(s, ext, rd, rn, rn, -m & max); 1384} 1385 1386static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, 1387 TCGReg rn, unsigned lsb, unsigned width) 1388{ 1389 unsigned size = ext ? 64 : 32; 1390 unsigned a = (size - lsb) & (size - 1); 1391 unsigned b = width - 1; 1392 tcg_out_bfm(s, ext, rd, rn, a, b); 1393} 1394 1395static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGCond cond, TCGReg a, 1396 tcg_target_long b, bool const_b) 1397{ 1398 if (is_tst_cond(cond)) { 1399 if (!const_b) { 1400 tcg_out_insn(s, 3510, ANDS, ext, TCG_REG_XZR, a, b); 1401 } else { 1402 tcg_out_logicali(s, I3404_ANDSI, ext, TCG_REG_XZR, a, b); 1403 } 1404 } else { 1405 if (!const_b) { 1406 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); 1407 } else if (b >= 0) { 1408 tcg_debug_assert(is_aimm(b)); 1409 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); 1410 } else { 1411 tcg_debug_assert(is_aimm(-b)); 1412 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); 1413 } 1414 } 1415} 1416 1417static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) 1418{ 1419 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1420 tcg_debug_assert(offset == sextract64(offset, 0, 26)); 1421 tcg_out_insn(s, 3206, B, offset); 1422} 1423 1424static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target) 1425{ 1426 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1427 if (offset == sextract64(offset, 0, 26)) { 1428 tcg_out_insn(s, 3206, BL, offset); 1429 } else { 1430 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target); 1431 tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0); 1432 } 1433} 1434 1435static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 1436 const TCGHelperInfo *info) 1437{ 1438 tcg_out_call_int(s, target); 1439} 1440 1441static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) 1442{ 1443 if (!l->has_value) { 1444 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); 1445 tcg_out_insn(s, 3206, B, 0); 1446 } else { 1447 tcg_out_goto(s, l->u.value_ptr); 1448 } 1449} 1450 1451static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, 1452 TCGArg b, bool b_const, TCGLabel *l) 1453{ 1454 int tbit = -1; 1455 bool need_cmp = true; 1456 1457 switch (c) { 1458 case TCG_COND_EQ: 1459 case TCG_COND_NE: 1460 /* cmp xN,0; b.ne L -> cbnz xN,L */ 1461 if (b_const && b == 0) { 1462 need_cmp = false; 1463 } 1464 break; 1465 case TCG_COND_LT: 1466 case TCG_COND_GE: 1467 /* cmp xN,0; b.mi L -> tbnz xN,63,L */ 1468 if (b_const && b == 0) { 1469 c = (c == TCG_COND_LT ? TCG_COND_TSTNE : TCG_COND_TSTEQ); 1470 tbit = ext ? 63 : 31; 1471 need_cmp = false; 1472 } 1473 break; 1474 case TCG_COND_TSTEQ: 1475 case TCG_COND_TSTNE: 1476 /* tst xN,0xffffffff; b.ne L -> cbnz wN,L */ 1477 if (b_const && b == UINT32_MAX) { 1478 c = tcg_tst_eqne_cond(c); 1479 ext = TCG_TYPE_I32; 1480 need_cmp = false; 1481 break; 1482 } 1483 /* tst xN,1<<B; b.ne L -> tbnz xN,B,L */ 1484 if (b_const && is_power_of_2(b)) { 1485 tbit = ctz64(b); 1486 need_cmp = false; 1487 } 1488 break; 1489 default: 1490 break; 1491 } 1492 1493 if (need_cmp) { 1494 tcg_out_cmp(s, ext, c, a, b, b_const); 1495 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1496 tcg_out_insn(s, 3202, B_C, c, 0); 1497 return; 1498 } 1499 1500 if (tbit >= 0) { 1501 tcg_out_reloc(s, s->code_ptr, R_AARCH64_TSTBR14, l, 0); 1502 switch (c) { 1503 case TCG_COND_TSTEQ: 1504 tcg_out_insn(s, 3205, TBZ, a, tbit, 0); 1505 break; 1506 case TCG_COND_TSTNE: 1507 tcg_out_insn(s, 3205, TBNZ, a, tbit, 0); 1508 break; 1509 default: 1510 g_assert_not_reached(); 1511 } 1512 } else { 1513 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1514 switch (c) { 1515 case TCG_COND_EQ: 1516 tcg_out_insn(s, 3201, CBZ, ext, a, 0); 1517 break; 1518 case TCG_COND_NE: 1519 tcg_out_insn(s, 3201, CBNZ, ext, a, 0); 1520 break; 1521 default: 1522 g_assert_not_reached(); 1523 } 1524 } 1525} 1526 1527static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits, 1528 TCGReg rd, TCGReg rn) 1529{ 1530 /* REV, REV16, REV32 */ 1531 tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn); 1532} 1533 1534static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits, 1535 TCGReg rd, TCGReg rn) 1536{ 1537 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ 1538 int bits = (8 << s_bits) - 1; 1539 tcg_out_sbfm(s, ext, rd, rn, 0, bits); 1540} 1541 1542static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn) 1543{ 1544 tcg_out_sxt(s, type, MO_8, rd, rn); 1545} 1546 1547static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn) 1548{ 1549 tcg_out_sxt(s, type, MO_16, rd, rn); 1550} 1551 1552static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn) 1553{ 1554 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn); 1555} 1556 1557static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn) 1558{ 1559 tcg_out_ext32s(s, rd, rn); 1560} 1561 1562static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits, 1563 TCGReg rd, TCGReg rn) 1564{ 1565 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ 1566 int bits = (8 << s_bits) - 1; 1567 tcg_out_ubfm(s, 0, rd, rn, 0, bits); 1568} 1569 1570static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn) 1571{ 1572 tcg_out_uxt(s, MO_8, rd, rn); 1573} 1574 1575static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn) 1576{ 1577 tcg_out_uxt(s, MO_16, rd, rn); 1578} 1579 1580static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn) 1581{ 1582 tcg_out_movr(s, TCG_TYPE_I32, rd, rn); 1583} 1584 1585static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn) 1586{ 1587 tcg_out_ext32u(s, rd, rn); 1588} 1589 1590static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) 1591{ 1592 tcg_out_mov(s, TCG_TYPE_I32, rd, rn); 1593} 1594 1595static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, 1596 TCGReg rn, int64_t aimm) 1597{ 1598 if (aimm >= 0) { 1599 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); 1600 } else { 1601 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); 1602 } 1603} 1604 1605static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, 1606 TCGReg rh, TCGReg al, TCGReg ah, 1607 tcg_target_long bl, tcg_target_long bh, 1608 bool const_bl, bool const_bh, bool sub) 1609{ 1610 TCGReg orig_rl = rl; 1611 AArch64Insn insn; 1612 1613 if (rl == ah || (!const_bh && rl == bh)) { 1614 rl = TCG_REG_TMP0; 1615 } 1616 1617 if (const_bl) { 1618 if (bl < 0) { 1619 bl = -bl; 1620 insn = sub ? I3401_ADDSI : I3401_SUBSI; 1621 } else { 1622 insn = sub ? I3401_SUBSI : I3401_ADDSI; 1623 } 1624 1625 if (unlikely(al == TCG_REG_XZR)) { 1626 /* ??? We want to allow al to be zero for the benefit of 1627 negation via subtraction. However, that leaves open the 1628 possibility of adding 0+const in the low part, and the 1629 immediate add instructions encode XSP not XZR. Don't try 1630 anything more elaborate here than loading another zero. */ 1631 al = TCG_REG_TMP0; 1632 tcg_out_movi(s, ext, al, 0); 1633 } 1634 tcg_out_insn_3401(s, insn, ext, rl, al, bl); 1635 } else { 1636 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); 1637 } 1638 1639 insn = I3503_ADC; 1640 if (const_bh) { 1641 /* Note that the only two constants we support are 0 and -1, and 1642 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ 1643 if ((bh != 0) ^ sub) { 1644 insn = I3503_SBC; 1645 } 1646 bh = TCG_REG_XZR; 1647 } else if (sub) { 1648 insn = I3503_SBC; 1649 } 1650 tcg_out_insn_3503(s, insn, ext, rh, ah, bh); 1651 1652 tcg_out_mov(s, ext, orig_rl, rl); 1653} 1654 1655static inline void tcg_out_mb(TCGContext *s, TCGArg a0) 1656{ 1657 static const uint32_t sync[] = { 1658 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST, 1659 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST, 1660 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1661 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD, 1662 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1663 }; 1664 tcg_out32(s, sync[a0 & TCG_MO_ALL]); 1665} 1666 1667static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, 1668 TCGReg a0, TCGArg b, bool const_b, bool is_ctz) 1669{ 1670 TCGReg a1 = a0; 1671 if (is_ctz) { 1672 a1 = TCG_REG_TMP0; 1673 tcg_out_insn(s, 3507, RBIT, ext, a1, a0); 1674 } 1675 if (const_b && b == (ext ? 64 : 32)) { 1676 tcg_out_insn(s, 3507, CLZ, ext, d, a1); 1677 } else { 1678 AArch64Insn sel = I3506_CSEL; 1679 1680 tcg_out_cmp(s, ext, TCG_COND_NE, a0, 0, 1); 1681 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1); 1682 1683 if (const_b) { 1684 if (b == -1) { 1685 b = TCG_REG_XZR; 1686 sel = I3506_CSINV; 1687 } else if (b == 0) { 1688 b = TCG_REG_XZR; 1689 } else { 1690 tcg_out_movi(s, ext, d, b); 1691 b = d; 1692 } 1693 } 1694 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE); 1695 } 1696} 1697 1698typedef struct { 1699 TCGReg base; 1700 TCGReg index; 1701 TCGType index_ext; 1702 TCGAtomAlign aa; 1703} HostAddress; 1704 1705bool tcg_target_has_memory_bswap(MemOp memop) 1706{ 1707 return false; 1708} 1709 1710static const TCGLdstHelperParam ldst_helper_param = { 1711 .ntmp = 1, .tmp = { TCG_REG_TMP0 } 1712}; 1713 1714static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1715{ 1716 MemOp opc = get_memop(lb->oi); 1717 1718 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1719 return false; 1720 } 1721 1722 tcg_out_ld_helper_args(s, lb, &ldst_helper_param); 1723 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); 1724 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); 1725 tcg_out_goto(s, lb->raddr); 1726 return true; 1727} 1728 1729static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1730{ 1731 MemOp opc = get_memop(lb->oi); 1732 1733 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1734 return false; 1735 } 1736 1737 tcg_out_st_helper_args(s, lb, &ldst_helper_param); 1738 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]); 1739 tcg_out_goto(s, lb->raddr); 1740 return true; 1741} 1742 1743/* We expect to use a 7-bit scaled negative offset from ENV. */ 1744#define MIN_TLB_MASK_TABLE_OFS -512 1745 1746/* 1747 * For system-mode, perform the TLB load and compare. 1748 * For user-mode, perform any required alignment tests. 1749 * In both cases, return a TCGLabelQemuLdst structure if the slow path 1750 * is required and fill in @h with the host address for the fast path. 1751 */ 1752static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, 1753 TCGReg addr_reg, MemOpIdx oi, 1754 bool is_ld) 1755{ 1756 TCGType addr_type = s->addr_type; 1757 TCGLabelQemuLdst *ldst = NULL; 1758 MemOp opc = get_memop(oi); 1759 MemOp s_bits = opc & MO_SIZE; 1760 unsigned a_mask; 1761 1762 h->aa = atom_and_align_for_opc(s, opc, 1763 have_lse2 ? MO_ATOM_WITHIN16 1764 : MO_ATOM_IFALIGN, 1765 s_bits == MO_128); 1766 a_mask = (1 << h->aa.align) - 1; 1767 1768 if (tcg_use_softmmu) { 1769 unsigned s_mask = (1u << s_bits) - 1; 1770 unsigned mem_index = get_mmuidx(oi); 1771 TCGReg addr_adj; 1772 TCGType mask_type; 1773 uint64_t compare_mask; 1774 1775 ldst = new_ldst_label(s); 1776 ldst->is_ld = is_ld; 1777 ldst->oi = oi; 1778 ldst->addrlo_reg = addr_reg; 1779 1780 mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32 1781 ? TCG_TYPE_I64 : TCG_TYPE_I32); 1782 1783 /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */ 1784 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); 1785 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); 1786 tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0, 1787 tlb_mask_table_ofs(s, mem_index), 1, 0); 1788 1789 /* Extract the TLB index from the address into X0. */ 1790 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, 1791 TCG_REG_TMP0, TCG_REG_TMP0, addr_reg, 1792 s->page_bits - CPU_TLB_ENTRY_BITS); 1793 1794 /* Add the tlb_table pointer, forming the CPUTLBEntry address. */ 1795 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0); 1796 1797 /* Load the tlb comparator into TMP0, and the fast path addend. */ 1798 QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN); 1799 tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1, 1800 is_ld ? offsetof(CPUTLBEntry, addr_read) 1801 : offsetof(CPUTLBEntry, addr_write)); 1802 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, 1803 offsetof(CPUTLBEntry, addend)); 1804 1805 /* 1806 * For aligned accesses, we check the first byte and include 1807 * the alignment bits within the address. For unaligned access, 1808 * we check that we don't cross pages using the address of the 1809 * last byte of the access. 1810 */ 1811 if (a_mask >= s_mask) { 1812 addr_adj = addr_reg; 1813 } else { 1814 addr_adj = TCG_REG_TMP2; 1815 tcg_out_insn(s, 3401, ADDI, addr_type, 1816 addr_adj, addr_reg, s_mask - a_mask); 1817 } 1818 compare_mask = (uint64_t)s->page_mask | a_mask; 1819 1820 /* Store the page mask part of the address into TMP2. */ 1821 tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2, 1822 addr_adj, compare_mask); 1823 1824 /* Perform the address comparison. */ 1825 tcg_out_cmp(s, addr_type, TCG_COND_NE, TCG_REG_TMP0, TCG_REG_TMP2, 0); 1826 1827 /* If not equal, we jump to the slow path. */ 1828 ldst->label_ptr[0] = s->code_ptr; 1829 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1830 1831 h->base = TCG_REG_TMP1; 1832 h->index = addr_reg; 1833 h->index_ext = addr_type; 1834 } else { 1835 if (a_mask) { 1836 ldst = new_ldst_label(s); 1837 1838 ldst->is_ld = is_ld; 1839 ldst->oi = oi; 1840 ldst->addrlo_reg = addr_reg; 1841 1842 /* tst addr, #mask */ 1843 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask); 1844 1845 /* b.ne slow_path */ 1846 ldst->label_ptr[0] = s->code_ptr; 1847 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1848 } 1849 1850 if (guest_base || addr_type == TCG_TYPE_I32) { 1851 h->base = TCG_REG_GUEST_BASE; 1852 h->index = addr_reg; 1853 h->index_ext = addr_type; 1854 } else { 1855 h->base = addr_reg; 1856 h->index = TCG_REG_XZR; 1857 h->index_ext = TCG_TYPE_I64; 1858 } 1859 } 1860 1861 return ldst; 1862} 1863 1864static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, 1865 TCGReg data_r, HostAddress h) 1866{ 1867 switch (memop & MO_SSIZE) { 1868 case MO_UB: 1869 tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index); 1870 break; 1871 case MO_SB: 1872 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, 1873 data_r, h.base, h.index_ext, h.index); 1874 break; 1875 case MO_UW: 1876 tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index); 1877 break; 1878 case MO_SW: 1879 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), 1880 data_r, h.base, h.index_ext, h.index); 1881 break; 1882 case MO_UL: 1883 tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index); 1884 break; 1885 case MO_SL: 1886 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index); 1887 break; 1888 case MO_UQ: 1889 tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index); 1890 break; 1891 default: 1892 g_assert_not_reached(); 1893 } 1894} 1895 1896static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, 1897 TCGReg data_r, HostAddress h) 1898{ 1899 switch (memop & MO_SIZE) { 1900 case MO_8: 1901 tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index); 1902 break; 1903 case MO_16: 1904 tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index); 1905 break; 1906 case MO_32: 1907 tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index); 1908 break; 1909 case MO_64: 1910 tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index); 1911 break; 1912 default: 1913 g_assert_not_reached(); 1914 } 1915} 1916 1917static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1918 MemOpIdx oi, TCGType data_type) 1919{ 1920 TCGLabelQemuLdst *ldst; 1921 HostAddress h; 1922 1923 ldst = prepare_host_addr(s, &h, addr_reg, oi, true); 1924 tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h); 1925 1926 if (ldst) { 1927 ldst->type = data_type; 1928 ldst->datalo_reg = data_reg; 1929 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1930 } 1931} 1932 1933static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1934 MemOpIdx oi, TCGType data_type) 1935{ 1936 TCGLabelQemuLdst *ldst; 1937 HostAddress h; 1938 1939 ldst = prepare_host_addr(s, &h, addr_reg, oi, false); 1940 tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h); 1941 1942 if (ldst) { 1943 ldst->type = data_type; 1944 ldst->datalo_reg = data_reg; 1945 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1946 } 1947} 1948 1949static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, 1950 TCGReg addr_reg, MemOpIdx oi, bool is_ld) 1951{ 1952 TCGLabelQemuLdst *ldst; 1953 HostAddress h; 1954 TCGReg base; 1955 bool use_pair; 1956 1957 ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld); 1958 1959 /* Compose the final address, as LDP/STP have no indexing. */ 1960 if (h.index == TCG_REG_XZR) { 1961 base = h.base; 1962 } else { 1963 base = TCG_REG_TMP2; 1964 if (h.index_ext == TCG_TYPE_I32) { 1965 /* add base, base, index, uxtw */ 1966 tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base, 1967 h.base, h.index, MO_32, 0); 1968 } else { 1969 /* add base, base, index */ 1970 tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index); 1971 } 1972 } 1973 1974 use_pair = h.aa.atom < MO_128 || have_lse2; 1975 1976 if (!use_pair) { 1977 tcg_insn_unit *branch = NULL; 1978 TCGReg ll, lh, sl, sh; 1979 1980 /* 1981 * If we have already checked for 16-byte alignment, that's all 1982 * we need. Otherwise we have determined that misaligned atomicity 1983 * may be handled with two 8-byte loads. 1984 */ 1985 if (h.aa.align < MO_128) { 1986 /* 1987 * TODO: align should be MO_64, so we only need test bit 3, 1988 * which means we could use TBNZ instead of ANDS+B_C. 1989 */ 1990 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15); 1991 branch = s->code_ptr; 1992 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1993 use_pair = true; 1994 } 1995 1996 if (is_ld) { 1997 /* 1998 * 16-byte atomicity without LSE2 requires LDXP+STXP loop: 1999 * ldxp lo, hi, [base] 2000 * stxp t0, lo, hi, [base] 2001 * cbnz t0, .-8 2002 * Require no overlap between data{lo,hi} and base. 2003 */ 2004 if (datalo == base || datahi == base) { 2005 tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base); 2006 base = TCG_REG_TMP2; 2007 } 2008 ll = sl = datalo; 2009 lh = sh = datahi; 2010 } else { 2011 /* 2012 * 16-byte atomicity without LSE2 requires LDXP+STXP loop: 2013 * 1: ldxp t0, t1, [base] 2014 * stxp t0, lo, hi, [base] 2015 * cbnz t0, 1b 2016 */ 2017 tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1); 2018 ll = TCG_REG_TMP0; 2019 lh = TCG_REG_TMP1; 2020 sl = datalo; 2021 sh = datahi; 2022 } 2023 2024 tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base); 2025 tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base); 2026 tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2); 2027 2028 if (use_pair) { 2029 /* "b .+8", branching across the one insn of use_pair. */ 2030 tcg_out_insn(s, 3206, B, 2); 2031 reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr)); 2032 } 2033 } 2034 2035 if (use_pair) { 2036 if (is_ld) { 2037 tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0); 2038 } else { 2039 tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0); 2040 } 2041 } 2042 2043 if (ldst) { 2044 ldst->type = TCG_TYPE_I128; 2045 ldst->datalo_reg = datalo; 2046 ldst->datahi_reg = datahi; 2047 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2048 } 2049} 2050 2051static const tcg_insn_unit *tb_ret_addr; 2052 2053static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) 2054{ 2055 const tcg_insn_unit *target; 2056 ptrdiff_t offset; 2057 2058 /* Reuse the zeroing that exists for goto_ptr. */ 2059 if (a0 == 0) { 2060 target = tcg_code_gen_epilogue; 2061 } else { 2062 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); 2063 target = tb_ret_addr; 2064 } 2065 2066 offset = tcg_pcrel_diff(s, target) >> 2; 2067 if (offset == sextract64(offset, 0, 26)) { 2068 tcg_out_insn(s, 3206, B, offset); 2069 } else { 2070 /* 2071 * Only x16/x17 generate BTI type Jump (2), 2072 * other registers generate BTI type Jump|Call (3). 2073 */ 2074 QEMU_BUILD_BUG_ON(TCG_REG_TMP0 != TCG_REG_X16); 2075 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target); 2076 tcg_out_insn(s, 3207, BR, TCG_REG_TMP0); 2077 } 2078} 2079 2080static void tcg_out_goto_tb(TCGContext *s, int which) 2081{ 2082 /* 2083 * Direct branch, or indirect address load, will be patched 2084 * by tb_target_set_jmp_target. Assert indirect load offset 2085 * in range early, regardless of direct branch distance. 2086 */ 2087 intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which)); 2088 tcg_debug_assert(i_off == sextract64(i_off, 0, 21)); 2089 2090 set_jmp_insn_offset(s, which); 2091 tcg_out32(s, I3206_B); 2092 tcg_out_insn(s, 3207, BR, TCG_REG_TMP0); 2093 set_jmp_reset_offset(s, which); 2094 tcg_out_bti(s, BTI_J); 2095} 2096 2097void tb_target_set_jmp_target(const TranslationBlock *tb, int n, 2098 uintptr_t jmp_rx, uintptr_t jmp_rw) 2099{ 2100 uintptr_t d_addr = tb->jmp_target_addr[n]; 2101 ptrdiff_t d_offset = d_addr - jmp_rx; 2102 tcg_insn_unit insn; 2103 2104 /* Either directly branch, or indirect branch load. */ 2105 if (d_offset == sextract64(d_offset, 0, 28)) { 2106 insn = deposit32(I3206_B, 0, 26, d_offset >> 2); 2107 } else { 2108 uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n]; 2109 ptrdiff_t i_offset = i_addr - jmp_rx; 2110 2111 /* Note that we asserted this in range in tcg_out_goto_tb. */ 2112 insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2); 2113 } 2114 qatomic_set((uint32_t *)jmp_rw, insn); 2115 flush_idcache_range(jmp_rx, jmp_rw, 4); 2116} 2117 2118static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, 2119 const TCGArg args[TCG_MAX_OP_ARGS], 2120 const int const_args[TCG_MAX_OP_ARGS]) 2121{ 2122 /* Hoist the loads of the most common arguments. */ 2123 TCGArg a0 = args[0]; 2124 TCGArg a1 = args[1]; 2125 TCGArg a2 = args[2]; 2126 int c2 = const_args[2]; 2127 2128 /* Some operands are defined with "rZ" constraint, a register or 2129 the zero register. These need not actually test args[I] == 0. */ 2130#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) 2131 2132 switch (opc) { 2133 case INDEX_op_goto_ptr: 2134 tcg_out_insn(s, 3207, BR, a0); 2135 break; 2136 2137 case INDEX_op_br: 2138 tcg_out_goto_label(s, arg_label(a0)); 2139 break; 2140 2141 case INDEX_op_ld8u_i32: 2142 case INDEX_op_ld8u_i64: 2143 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0); 2144 break; 2145 case INDEX_op_ld8s_i32: 2146 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0); 2147 break; 2148 case INDEX_op_ld8s_i64: 2149 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0); 2150 break; 2151 case INDEX_op_ld16u_i32: 2152 case INDEX_op_ld16u_i64: 2153 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1); 2154 break; 2155 case INDEX_op_ld16s_i32: 2156 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1); 2157 break; 2158 case INDEX_op_ld16s_i64: 2159 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1); 2160 break; 2161 case INDEX_op_ld_i32: 2162 case INDEX_op_ld32u_i64: 2163 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2); 2164 break; 2165 case INDEX_op_ld32s_i64: 2166 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2); 2167 break; 2168 case INDEX_op_ld_i64: 2169 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3); 2170 break; 2171 2172 case INDEX_op_st8_i32: 2173 case INDEX_op_st8_i64: 2174 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0); 2175 break; 2176 case INDEX_op_st16_i32: 2177 case INDEX_op_st16_i64: 2178 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1); 2179 break; 2180 case INDEX_op_st_i32: 2181 case INDEX_op_st32_i64: 2182 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2); 2183 break; 2184 case INDEX_op_st_i64: 2185 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3); 2186 break; 2187 2188 case INDEX_op_add_i32: 2189 a2 = (int32_t)a2; 2190 /* FALLTHRU */ 2191 case INDEX_op_add_i64: 2192 if (c2) { 2193 tcg_out_addsubi(s, ext, a0, a1, a2); 2194 } else { 2195 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); 2196 } 2197 break; 2198 2199 case INDEX_op_sub_i32: 2200 a2 = (int32_t)a2; 2201 /* FALLTHRU */ 2202 case INDEX_op_sub_i64: 2203 if (c2) { 2204 tcg_out_addsubi(s, ext, a0, a1, -a2); 2205 } else { 2206 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); 2207 } 2208 break; 2209 2210 case INDEX_op_neg_i64: 2211 case INDEX_op_neg_i32: 2212 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); 2213 break; 2214 2215 case INDEX_op_and_i32: 2216 a2 = (int32_t)a2; 2217 /* FALLTHRU */ 2218 case INDEX_op_and_i64: 2219 if (c2) { 2220 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); 2221 } else { 2222 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); 2223 } 2224 break; 2225 2226 case INDEX_op_andc_i32: 2227 a2 = (int32_t)a2; 2228 /* FALLTHRU */ 2229 case INDEX_op_andc_i64: 2230 if (c2) { 2231 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); 2232 } else { 2233 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); 2234 } 2235 break; 2236 2237 case INDEX_op_or_i32: 2238 a2 = (int32_t)a2; 2239 /* FALLTHRU */ 2240 case INDEX_op_or_i64: 2241 if (c2) { 2242 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); 2243 } else { 2244 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); 2245 } 2246 break; 2247 2248 case INDEX_op_orc_i32: 2249 a2 = (int32_t)a2; 2250 /* FALLTHRU */ 2251 case INDEX_op_orc_i64: 2252 if (c2) { 2253 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); 2254 } else { 2255 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); 2256 } 2257 break; 2258 2259 case INDEX_op_xor_i32: 2260 a2 = (int32_t)a2; 2261 /* FALLTHRU */ 2262 case INDEX_op_xor_i64: 2263 if (c2) { 2264 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); 2265 } else { 2266 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); 2267 } 2268 break; 2269 2270 case INDEX_op_eqv_i32: 2271 a2 = (int32_t)a2; 2272 /* FALLTHRU */ 2273 case INDEX_op_eqv_i64: 2274 if (c2) { 2275 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); 2276 } else { 2277 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); 2278 } 2279 break; 2280 2281 case INDEX_op_not_i64: 2282 case INDEX_op_not_i32: 2283 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); 2284 break; 2285 2286 case INDEX_op_mul_i64: 2287 case INDEX_op_mul_i32: 2288 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); 2289 break; 2290 2291 case INDEX_op_div_i64: 2292 case INDEX_op_div_i32: 2293 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); 2294 break; 2295 case INDEX_op_divu_i64: 2296 case INDEX_op_divu_i32: 2297 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); 2298 break; 2299 2300 case INDEX_op_rem_i64: 2301 case INDEX_op_rem_i32: 2302 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2); 2303 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1); 2304 break; 2305 case INDEX_op_remu_i64: 2306 case INDEX_op_remu_i32: 2307 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2); 2308 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1); 2309 break; 2310 2311 case INDEX_op_shl_i64: 2312 case INDEX_op_shl_i32: 2313 if (c2) { 2314 tcg_out_shl(s, ext, a0, a1, a2); 2315 } else { 2316 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); 2317 } 2318 break; 2319 2320 case INDEX_op_shr_i64: 2321 case INDEX_op_shr_i32: 2322 if (c2) { 2323 tcg_out_shr(s, ext, a0, a1, a2); 2324 } else { 2325 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); 2326 } 2327 break; 2328 2329 case INDEX_op_sar_i64: 2330 case INDEX_op_sar_i32: 2331 if (c2) { 2332 tcg_out_sar(s, ext, a0, a1, a2); 2333 } else { 2334 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); 2335 } 2336 break; 2337 2338 case INDEX_op_rotr_i64: 2339 case INDEX_op_rotr_i32: 2340 if (c2) { 2341 tcg_out_rotr(s, ext, a0, a1, a2); 2342 } else { 2343 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); 2344 } 2345 break; 2346 2347 case INDEX_op_rotl_i64: 2348 case INDEX_op_rotl_i32: 2349 if (c2) { 2350 tcg_out_rotl(s, ext, a0, a1, a2); 2351 } else { 2352 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2); 2353 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0); 2354 } 2355 break; 2356 2357 case INDEX_op_clz_i64: 2358 case INDEX_op_clz_i32: 2359 tcg_out_cltz(s, ext, a0, a1, a2, c2, false); 2360 break; 2361 case INDEX_op_ctz_i64: 2362 case INDEX_op_ctz_i32: 2363 tcg_out_cltz(s, ext, a0, a1, a2, c2, true); 2364 break; 2365 2366 case INDEX_op_brcond_i32: 2367 a1 = (int32_t)a1; 2368 /* FALLTHRU */ 2369 case INDEX_op_brcond_i64: 2370 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); 2371 break; 2372 2373 case INDEX_op_setcond_i32: 2374 a2 = (int32_t)a2; 2375 /* FALLTHRU */ 2376 case INDEX_op_setcond_i64: 2377 tcg_out_cmp(s, ext, args[3], a1, a2, c2); 2378 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ 2379 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, 2380 TCG_REG_XZR, tcg_invert_cond(args[3])); 2381 break; 2382 2383 case INDEX_op_negsetcond_i32: 2384 a2 = (int32_t)a2; 2385 /* FALLTHRU */ 2386 case INDEX_op_negsetcond_i64: 2387 tcg_out_cmp(s, ext, args[3], a1, a2, c2); 2388 /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond). */ 2389 tcg_out_insn(s, 3506, CSINV, ext, a0, TCG_REG_XZR, 2390 TCG_REG_XZR, tcg_invert_cond(args[3])); 2391 break; 2392 2393 case INDEX_op_movcond_i32: 2394 a2 = (int32_t)a2; 2395 /* FALLTHRU */ 2396 case INDEX_op_movcond_i64: 2397 tcg_out_cmp(s, ext, args[5], a1, a2, c2); 2398 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); 2399 break; 2400 2401 case INDEX_op_qemu_ld_a32_i32: 2402 case INDEX_op_qemu_ld_a64_i32: 2403 case INDEX_op_qemu_ld_a32_i64: 2404 case INDEX_op_qemu_ld_a64_i64: 2405 tcg_out_qemu_ld(s, a0, a1, a2, ext); 2406 break; 2407 case INDEX_op_qemu_st_a32_i32: 2408 case INDEX_op_qemu_st_a64_i32: 2409 case INDEX_op_qemu_st_a32_i64: 2410 case INDEX_op_qemu_st_a64_i64: 2411 tcg_out_qemu_st(s, REG0(0), a1, a2, ext); 2412 break; 2413 case INDEX_op_qemu_ld_a32_i128: 2414 case INDEX_op_qemu_ld_a64_i128: 2415 tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true); 2416 break; 2417 case INDEX_op_qemu_st_a32_i128: 2418 case INDEX_op_qemu_st_a64_i128: 2419 tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false); 2420 break; 2421 2422 case INDEX_op_bswap64_i64: 2423 tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1); 2424 break; 2425 case INDEX_op_bswap32_i64: 2426 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2427 if (a2 & TCG_BSWAP_OS) { 2428 tcg_out_ext32s(s, a0, a0); 2429 } 2430 break; 2431 case INDEX_op_bswap32_i32: 2432 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2433 break; 2434 case INDEX_op_bswap16_i64: 2435 case INDEX_op_bswap16_i32: 2436 tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1); 2437 if (a2 & TCG_BSWAP_OS) { 2438 /* Output must be sign-extended. */ 2439 tcg_out_ext16s(s, ext, a0, a0); 2440 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 2441 /* Output must be zero-extended, but input isn't. */ 2442 tcg_out_ext16u(s, a0, a0); 2443 } 2444 break; 2445 2446 case INDEX_op_deposit_i64: 2447 case INDEX_op_deposit_i32: 2448 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]); 2449 break; 2450 2451 case INDEX_op_extract_i64: 2452 case INDEX_op_extract_i32: 2453 if (a2 == 0) { 2454 uint64_t mask = MAKE_64BIT_MASK(0, args[3]); 2455 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, mask); 2456 } else { 2457 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2458 } 2459 break; 2460 2461 case INDEX_op_sextract_i64: 2462 case INDEX_op_sextract_i32: 2463 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2464 break; 2465 2466 case INDEX_op_extract2_i64: 2467 case INDEX_op_extract2_i32: 2468 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]); 2469 break; 2470 2471 case INDEX_op_add2_i32: 2472 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2473 (int32_t)args[4], args[5], const_args[4], 2474 const_args[5], false); 2475 break; 2476 case INDEX_op_add2_i64: 2477 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2478 args[5], const_args[4], const_args[5], false); 2479 break; 2480 case INDEX_op_sub2_i32: 2481 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2482 (int32_t)args[4], args[5], const_args[4], 2483 const_args[5], true); 2484 break; 2485 case INDEX_op_sub2_i64: 2486 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2487 args[5], const_args[4], const_args[5], true); 2488 break; 2489 2490 case INDEX_op_muluh_i64: 2491 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); 2492 break; 2493 case INDEX_op_mulsh_i64: 2494 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); 2495 break; 2496 2497 case INDEX_op_mb: 2498 tcg_out_mb(s, a0); 2499 break; 2500 2501 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 2502 case INDEX_op_mov_i64: 2503 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 2504 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ 2505 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ 2506 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ 2507 case INDEX_op_ext8s_i64: 2508 case INDEX_op_ext8u_i32: 2509 case INDEX_op_ext8u_i64: 2510 case INDEX_op_ext16s_i64: 2511 case INDEX_op_ext16s_i32: 2512 case INDEX_op_ext16u_i64: 2513 case INDEX_op_ext16u_i32: 2514 case INDEX_op_ext32s_i64: 2515 case INDEX_op_ext32u_i64: 2516 case INDEX_op_ext_i32_i64: 2517 case INDEX_op_extu_i32_i64: 2518 case INDEX_op_extrl_i64_i32: 2519 default: 2520 g_assert_not_reached(); 2521 } 2522 2523#undef REG0 2524} 2525 2526static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 2527 unsigned vecl, unsigned vece, 2528 const TCGArg args[TCG_MAX_OP_ARGS], 2529 const int const_args[TCG_MAX_OP_ARGS]) 2530{ 2531 static const AArch64Insn cmp_vec_insn[16] = { 2532 [TCG_COND_EQ] = I3616_CMEQ, 2533 [TCG_COND_GT] = I3616_CMGT, 2534 [TCG_COND_GE] = I3616_CMGE, 2535 [TCG_COND_GTU] = I3616_CMHI, 2536 [TCG_COND_GEU] = I3616_CMHS, 2537 }; 2538 static const AArch64Insn cmp_scalar_insn[16] = { 2539 [TCG_COND_EQ] = I3611_CMEQ, 2540 [TCG_COND_GT] = I3611_CMGT, 2541 [TCG_COND_GE] = I3611_CMGE, 2542 [TCG_COND_GTU] = I3611_CMHI, 2543 [TCG_COND_GEU] = I3611_CMHS, 2544 }; 2545 static const AArch64Insn cmp0_vec_insn[16] = { 2546 [TCG_COND_EQ] = I3617_CMEQ0, 2547 [TCG_COND_GT] = I3617_CMGT0, 2548 [TCG_COND_GE] = I3617_CMGE0, 2549 [TCG_COND_LT] = I3617_CMLT0, 2550 [TCG_COND_LE] = I3617_CMLE0, 2551 }; 2552 static const AArch64Insn cmp0_scalar_insn[16] = { 2553 [TCG_COND_EQ] = I3612_CMEQ0, 2554 [TCG_COND_GT] = I3612_CMGT0, 2555 [TCG_COND_GE] = I3612_CMGE0, 2556 [TCG_COND_LT] = I3612_CMLT0, 2557 [TCG_COND_LE] = I3612_CMLE0, 2558 }; 2559 2560 TCGType type = vecl + TCG_TYPE_V64; 2561 unsigned is_q = vecl; 2562 bool is_scalar = !is_q && vece == MO_64; 2563 TCGArg a0, a1, a2, a3; 2564 int cmode, imm8; 2565 2566 a0 = args[0]; 2567 a1 = args[1]; 2568 a2 = args[2]; 2569 2570 switch (opc) { 2571 case INDEX_op_ld_vec: 2572 tcg_out_ld(s, type, a0, a1, a2); 2573 break; 2574 case INDEX_op_st_vec: 2575 tcg_out_st(s, type, a0, a1, a2); 2576 break; 2577 case INDEX_op_dupm_vec: 2578 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 2579 break; 2580 case INDEX_op_add_vec: 2581 if (is_scalar) { 2582 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2); 2583 } else { 2584 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2); 2585 } 2586 break; 2587 case INDEX_op_sub_vec: 2588 if (is_scalar) { 2589 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2); 2590 } else { 2591 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2); 2592 } 2593 break; 2594 case INDEX_op_mul_vec: 2595 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2); 2596 break; 2597 case INDEX_op_neg_vec: 2598 if (is_scalar) { 2599 tcg_out_insn(s, 3612, NEG, vece, a0, a1); 2600 } else { 2601 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1); 2602 } 2603 break; 2604 case INDEX_op_abs_vec: 2605 if (is_scalar) { 2606 tcg_out_insn(s, 3612, ABS, vece, a0, a1); 2607 } else { 2608 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1); 2609 } 2610 break; 2611 case INDEX_op_and_vec: 2612 if (const_args[2]) { 2613 is_shimm1632(~a2, &cmode, &imm8); 2614 if (a0 == a1) { 2615 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2616 return; 2617 } 2618 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2619 a2 = a0; 2620 } 2621 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2); 2622 break; 2623 case INDEX_op_or_vec: 2624 if (const_args[2]) { 2625 is_shimm1632(a2, &cmode, &imm8); 2626 if (a0 == a1) { 2627 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2628 return; 2629 } 2630 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2631 a2 = a0; 2632 } 2633 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2); 2634 break; 2635 case INDEX_op_andc_vec: 2636 if (const_args[2]) { 2637 is_shimm1632(a2, &cmode, &imm8); 2638 if (a0 == a1) { 2639 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2640 return; 2641 } 2642 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2643 a2 = a0; 2644 } 2645 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2); 2646 break; 2647 case INDEX_op_orc_vec: 2648 if (const_args[2]) { 2649 is_shimm1632(~a2, &cmode, &imm8); 2650 if (a0 == a1) { 2651 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2652 return; 2653 } 2654 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2655 a2 = a0; 2656 } 2657 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2); 2658 break; 2659 case INDEX_op_xor_vec: 2660 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2); 2661 break; 2662 case INDEX_op_ssadd_vec: 2663 if (is_scalar) { 2664 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2); 2665 } else { 2666 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2); 2667 } 2668 break; 2669 case INDEX_op_sssub_vec: 2670 if (is_scalar) { 2671 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2); 2672 } else { 2673 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2); 2674 } 2675 break; 2676 case INDEX_op_usadd_vec: 2677 if (is_scalar) { 2678 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2); 2679 } else { 2680 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2); 2681 } 2682 break; 2683 case INDEX_op_ussub_vec: 2684 if (is_scalar) { 2685 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2); 2686 } else { 2687 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2); 2688 } 2689 break; 2690 case INDEX_op_smax_vec: 2691 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2); 2692 break; 2693 case INDEX_op_smin_vec: 2694 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2); 2695 break; 2696 case INDEX_op_umax_vec: 2697 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2); 2698 break; 2699 case INDEX_op_umin_vec: 2700 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2); 2701 break; 2702 case INDEX_op_not_vec: 2703 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1); 2704 break; 2705 case INDEX_op_shli_vec: 2706 if (is_scalar) { 2707 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece)); 2708 } else { 2709 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece)); 2710 } 2711 break; 2712 case INDEX_op_shri_vec: 2713 if (is_scalar) { 2714 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2); 2715 } else { 2716 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2); 2717 } 2718 break; 2719 case INDEX_op_sari_vec: 2720 if (is_scalar) { 2721 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2); 2722 } else { 2723 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); 2724 } 2725 break; 2726 case INDEX_op_aa64_sli_vec: 2727 if (is_scalar) { 2728 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece)); 2729 } else { 2730 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece)); 2731 } 2732 break; 2733 case INDEX_op_shlv_vec: 2734 if (is_scalar) { 2735 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2); 2736 } else { 2737 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); 2738 } 2739 break; 2740 case INDEX_op_aa64_sshl_vec: 2741 if (is_scalar) { 2742 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2); 2743 } else { 2744 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2); 2745 } 2746 break; 2747 case INDEX_op_cmp_vec: 2748 { 2749 TCGCond cond = args[3]; 2750 AArch64Insn insn; 2751 2752 switch (cond) { 2753 case TCG_COND_NE: 2754 if (const_args[2]) { 2755 if (is_scalar) { 2756 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1); 2757 } else { 2758 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1); 2759 } 2760 } else { 2761 if (is_scalar) { 2762 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2); 2763 } else { 2764 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2); 2765 } 2766 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); 2767 } 2768 break; 2769 2770 case TCG_COND_TSTNE: 2771 case TCG_COND_TSTEQ: 2772 if (const_args[2]) { 2773 /* (x & 0) == 0 */ 2774 tcg_out_dupi_vec(s, type, MO_8, a0, 2775 -(cond == TCG_COND_TSTEQ)); 2776 break; 2777 } 2778 if (is_scalar) { 2779 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a2); 2780 } else { 2781 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a2); 2782 } 2783 if (cond == TCG_COND_TSTEQ) { 2784 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); 2785 } 2786 break; 2787 2788 default: 2789 if (const_args[2]) { 2790 if (is_scalar) { 2791 insn = cmp0_scalar_insn[cond]; 2792 if (insn) { 2793 tcg_out_insn_3612(s, insn, vece, a0, a1); 2794 break; 2795 } 2796 } else { 2797 insn = cmp0_vec_insn[cond]; 2798 if (insn) { 2799 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1); 2800 break; 2801 } 2802 } 2803 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0); 2804 a2 = TCG_VEC_TMP0; 2805 } 2806 if (is_scalar) { 2807 insn = cmp_scalar_insn[cond]; 2808 if (insn == 0) { 2809 TCGArg t; 2810 t = a1, a1 = a2, a2 = t; 2811 cond = tcg_swap_cond(cond); 2812 insn = cmp_scalar_insn[cond]; 2813 tcg_debug_assert(insn != 0); 2814 } 2815 tcg_out_insn_3611(s, insn, vece, a0, a1, a2); 2816 } else { 2817 insn = cmp_vec_insn[cond]; 2818 if (insn == 0) { 2819 TCGArg t; 2820 t = a1, a1 = a2, a2 = t; 2821 cond = tcg_swap_cond(cond); 2822 insn = cmp_vec_insn[cond]; 2823 tcg_debug_assert(insn != 0); 2824 } 2825 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2); 2826 } 2827 break; 2828 } 2829 } 2830 break; 2831 2832 case INDEX_op_bitsel_vec: 2833 a3 = args[3]; 2834 if (a0 == a3) { 2835 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1); 2836 } else if (a0 == a2) { 2837 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1); 2838 } else { 2839 if (a0 != a1) { 2840 tcg_out_mov(s, type, a0, a1); 2841 } 2842 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3); 2843 } 2844 break; 2845 2846 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 2847 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 2848 default: 2849 g_assert_not_reached(); 2850 } 2851} 2852 2853int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 2854{ 2855 switch (opc) { 2856 case INDEX_op_add_vec: 2857 case INDEX_op_sub_vec: 2858 case INDEX_op_and_vec: 2859 case INDEX_op_or_vec: 2860 case INDEX_op_xor_vec: 2861 case INDEX_op_andc_vec: 2862 case INDEX_op_orc_vec: 2863 case INDEX_op_neg_vec: 2864 case INDEX_op_abs_vec: 2865 case INDEX_op_not_vec: 2866 case INDEX_op_cmp_vec: 2867 case INDEX_op_shli_vec: 2868 case INDEX_op_shri_vec: 2869 case INDEX_op_sari_vec: 2870 case INDEX_op_ssadd_vec: 2871 case INDEX_op_sssub_vec: 2872 case INDEX_op_usadd_vec: 2873 case INDEX_op_ussub_vec: 2874 case INDEX_op_shlv_vec: 2875 case INDEX_op_bitsel_vec: 2876 return 1; 2877 case INDEX_op_rotli_vec: 2878 case INDEX_op_shrv_vec: 2879 case INDEX_op_sarv_vec: 2880 case INDEX_op_rotlv_vec: 2881 case INDEX_op_rotrv_vec: 2882 return -1; 2883 case INDEX_op_mul_vec: 2884 case INDEX_op_smax_vec: 2885 case INDEX_op_smin_vec: 2886 case INDEX_op_umax_vec: 2887 case INDEX_op_umin_vec: 2888 return vece < MO_64; 2889 2890 default: 2891 return 0; 2892 } 2893} 2894 2895void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 2896 TCGArg a0, ...) 2897{ 2898 va_list va; 2899 TCGv_vec v0, v1, v2, t1, t2, c1; 2900 TCGArg a2; 2901 2902 va_start(va, a0); 2903 v0 = temp_tcgv_vec(arg_temp(a0)); 2904 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 2905 a2 = va_arg(va, TCGArg); 2906 va_end(va); 2907 2908 switch (opc) { 2909 case INDEX_op_rotli_vec: 2910 t1 = tcg_temp_new_vec(type); 2911 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1)); 2912 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece, 2913 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2); 2914 tcg_temp_free_vec(t1); 2915 break; 2916 2917 case INDEX_op_shrv_vec: 2918 case INDEX_op_sarv_vec: 2919 /* Right shifts are negative left shifts for AArch64. */ 2920 v2 = temp_tcgv_vec(arg_temp(a2)); 2921 t1 = tcg_temp_new_vec(type); 2922 tcg_gen_neg_vec(vece, t1, v2); 2923 opc = (opc == INDEX_op_shrv_vec 2924 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec); 2925 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0), 2926 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2927 tcg_temp_free_vec(t1); 2928 break; 2929 2930 case INDEX_op_rotlv_vec: 2931 v2 = temp_tcgv_vec(arg_temp(a2)); 2932 t1 = tcg_temp_new_vec(type); 2933 c1 = tcg_constant_vec(type, vece, 8 << vece); 2934 tcg_gen_sub_vec(vece, t1, v2, c1); 2935 /* Right shifts are negative left shifts for AArch64. */ 2936 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2937 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2938 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0), 2939 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 2940 tcg_gen_or_vec(vece, v0, v0, t1); 2941 tcg_temp_free_vec(t1); 2942 break; 2943 2944 case INDEX_op_rotrv_vec: 2945 v2 = temp_tcgv_vec(arg_temp(a2)); 2946 t1 = tcg_temp_new_vec(type); 2947 t2 = tcg_temp_new_vec(type); 2948 c1 = tcg_constant_vec(type, vece, 8 << vece); 2949 tcg_gen_neg_vec(vece, t1, v2); 2950 tcg_gen_sub_vec(vece, t2, c1, v2); 2951 /* Right shifts are negative left shifts for AArch64. */ 2952 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2953 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2954 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2), 2955 tcgv_vec_arg(v1), tcgv_vec_arg(t2)); 2956 tcg_gen_or_vec(vece, v0, t1, t2); 2957 tcg_temp_free_vec(t1); 2958 tcg_temp_free_vec(t2); 2959 break; 2960 2961 default: 2962 g_assert_not_reached(); 2963 } 2964} 2965 2966static TCGConstraintSetIndex 2967tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) 2968{ 2969 switch (op) { 2970 case INDEX_op_goto_ptr: 2971 return C_O0_I1(r); 2972 2973 case INDEX_op_ld8u_i32: 2974 case INDEX_op_ld8s_i32: 2975 case INDEX_op_ld16u_i32: 2976 case INDEX_op_ld16s_i32: 2977 case INDEX_op_ld_i32: 2978 case INDEX_op_ld8u_i64: 2979 case INDEX_op_ld8s_i64: 2980 case INDEX_op_ld16u_i64: 2981 case INDEX_op_ld16s_i64: 2982 case INDEX_op_ld32u_i64: 2983 case INDEX_op_ld32s_i64: 2984 case INDEX_op_ld_i64: 2985 case INDEX_op_neg_i32: 2986 case INDEX_op_neg_i64: 2987 case INDEX_op_not_i32: 2988 case INDEX_op_not_i64: 2989 case INDEX_op_bswap16_i32: 2990 case INDEX_op_bswap32_i32: 2991 case INDEX_op_bswap16_i64: 2992 case INDEX_op_bswap32_i64: 2993 case INDEX_op_bswap64_i64: 2994 case INDEX_op_ext8s_i32: 2995 case INDEX_op_ext16s_i32: 2996 case INDEX_op_ext8u_i32: 2997 case INDEX_op_ext16u_i32: 2998 case INDEX_op_ext8s_i64: 2999 case INDEX_op_ext16s_i64: 3000 case INDEX_op_ext32s_i64: 3001 case INDEX_op_ext8u_i64: 3002 case INDEX_op_ext16u_i64: 3003 case INDEX_op_ext32u_i64: 3004 case INDEX_op_ext_i32_i64: 3005 case INDEX_op_extu_i32_i64: 3006 case INDEX_op_extract_i32: 3007 case INDEX_op_extract_i64: 3008 case INDEX_op_sextract_i32: 3009 case INDEX_op_sextract_i64: 3010 return C_O1_I1(r, r); 3011 3012 case INDEX_op_st8_i32: 3013 case INDEX_op_st16_i32: 3014 case INDEX_op_st_i32: 3015 case INDEX_op_st8_i64: 3016 case INDEX_op_st16_i64: 3017 case INDEX_op_st32_i64: 3018 case INDEX_op_st_i64: 3019 return C_O0_I2(rZ, r); 3020 3021 case INDEX_op_add_i32: 3022 case INDEX_op_add_i64: 3023 case INDEX_op_sub_i32: 3024 case INDEX_op_sub_i64: 3025 return C_O1_I2(r, r, rA); 3026 3027 case INDEX_op_setcond_i32: 3028 case INDEX_op_setcond_i64: 3029 case INDEX_op_negsetcond_i32: 3030 case INDEX_op_negsetcond_i64: 3031 return C_O1_I2(r, r, rC); 3032 3033 case INDEX_op_mul_i32: 3034 case INDEX_op_mul_i64: 3035 case INDEX_op_div_i32: 3036 case INDEX_op_div_i64: 3037 case INDEX_op_divu_i32: 3038 case INDEX_op_divu_i64: 3039 case INDEX_op_rem_i32: 3040 case INDEX_op_rem_i64: 3041 case INDEX_op_remu_i32: 3042 case INDEX_op_remu_i64: 3043 case INDEX_op_muluh_i64: 3044 case INDEX_op_mulsh_i64: 3045 return C_O1_I2(r, r, r); 3046 3047 case INDEX_op_and_i32: 3048 case INDEX_op_and_i64: 3049 case INDEX_op_or_i32: 3050 case INDEX_op_or_i64: 3051 case INDEX_op_xor_i32: 3052 case INDEX_op_xor_i64: 3053 case INDEX_op_andc_i32: 3054 case INDEX_op_andc_i64: 3055 case INDEX_op_orc_i32: 3056 case INDEX_op_orc_i64: 3057 case INDEX_op_eqv_i32: 3058 case INDEX_op_eqv_i64: 3059 return C_O1_I2(r, r, rL); 3060 3061 case INDEX_op_shl_i32: 3062 case INDEX_op_shr_i32: 3063 case INDEX_op_sar_i32: 3064 case INDEX_op_rotl_i32: 3065 case INDEX_op_rotr_i32: 3066 case INDEX_op_shl_i64: 3067 case INDEX_op_shr_i64: 3068 case INDEX_op_sar_i64: 3069 case INDEX_op_rotl_i64: 3070 case INDEX_op_rotr_i64: 3071 return C_O1_I2(r, r, ri); 3072 3073 case INDEX_op_clz_i32: 3074 case INDEX_op_ctz_i32: 3075 case INDEX_op_clz_i64: 3076 case INDEX_op_ctz_i64: 3077 return C_O1_I2(r, r, rAL); 3078 3079 case INDEX_op_brcond_i32: 3080 case INDEX_op_brcond_i64: 3081 return C_O0_I2(r, rC); 3082 3083 case INDEX_op_movcond_i32: 3084 case INDEX_op_movcond_i64: 3085 return C_O1_I4(r, r, rC, rZ, rZ); 3086 3087 case INDEX_op_qemu_ld_a32_i32: 3088 case INDEX_op_qemu_ld_a64_i32: 3089 case INDEX_op_qemu_ld_a32_i64: 3090 case INDEX_op_qemu_ld_a64_i64: 3091 return C_O1_I1(r, r); 3092 case INDEX_op_qemu_ld_a32_i128: 3093 case INDEX_op_qemu_ld_a64_i128: 3094 return C_O2_I1(r, r, r); 3095 case INDEX_op_qemu_st_a32_i32: 3096 case INDEX_op_qemu_st_a64_i32: 3097 case INDEX_op_qemu_st_a32_i64: 3098 case INDEX_op_qemu_st_a64_i64: 3099 return C_O0_I2(rZ, r); 3100 case INDEX_op_qemu_st_a32_i128: 3101 case INDEX_op_qemu_st_a64_i128: 3102 return C_O0_I3(rZ, rZ, r); 3103 3104 case INDEX_op_deposit_i32: 3105 case INDEX_op_deposit_i64: 3106 return C_O1_I2(r, 0, rZ); 3107 3108 case INDEX_op_extract2_i32: 3109 case INDEX_op_extract2_i64: 3110 return C_O1_I2(r, rZ, rZ); 3111 3112 case INDEX_op_add2_i32: 3113 case INDEX_op_add2_i64: 3114 case INDEX_op_sub2_i32: 3115 case INDEX_op_sub2_i64: 3116 return C_O2_I4(r, r, rZ, rZ, rA, rMZ); 3117 3118 case INDEX_op_add_vec: 3119 case INDEX_op_sub_vec: 3120 case INDEX_op_mul_vec: 3121 case INDEX_op_xor_vec: 3122 case INDEX_op_ssadd_vec: 3123 case INDEX_op_sssub_vec: 3124 case INDEX_op_usadd_vec: 3125 case INDEX_op_ussub_vec: 3126 case INDEX_op_smax_vec: 3127 case INDEX_op_smin_vec: 3128 case INDEX_op_umax_vec: 3129 case INDEX_op_umin_vec: 3130 case INDEX_op_shlv_vec: 3131 case INDEX_op_shrv_vec: 3132 case INDEX_op_sarv_vec: 3133 case INDEX_op_aa64_sshl_vec: 3134 return C_O1_I2(w, w, w); 3135 case INDEX_op_not_vec: 3136 case INDEX_op_neg_vec: 3137 case INDEX_op_abs_vec: 3138 case INDEX_op_shli_vec: 3139 case INDEX_op_shri_vec: 3140 case INDEX_op_sari_vec: 3141 return C_O1_I1(w, w); 3142 case INDEX_op_ld_vec: 3143 case INDEX_op_dupm_vec: 3144 return C_O1_I1(w, r); 3145 case INDEX_op_st_vec: 3146 return C_O0_I2(w, r); 3147 case INDEX_op_dup_vec: 3148 return C_O1_I1(w, wr); 3149 case INDEX_op_or_vec: 3150 case INDEX_op_andc_vec: 3151 return C_O1_I2(w, w, wO); 3152 case INDEX_op_and_vec: 3153 case INDEX_op_orc_vec: 3154 return C_O1_I2(w, w, wN); 3155 case INDEX_op_cmp_vec: 3156 return C_O1_I2(w, w, wZ); 3157 case INDEX_op_bitsel_vec: 3158 return C_O1_I3(w, w, w, w); 3159 case INDEX_op_aa64_sli_vec: 3160 return C_O1_I2(w, 0, w); 3161 3162 default: 3163 return C_NotImplemented; 3164 } 3165} 3166 3167static void tcg_target_init(TCGContext *s) 3168{ 3169 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu; 3170 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu; 3171 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 3172 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 3173 3174 tcg_target_call_clobber_regs = -1ull; 3175 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19); 3176 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20); 3177 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21); 3178 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22); 3179 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23); 3180 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24); 3181 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25); 3182 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26); 3183 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27); 3184 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28); 3185 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29); 3186 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 3187 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 3188 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 3189 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 3190 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 3191 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 3192 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 3193 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 3194 3195 s->reserved_regs = 0; 3196 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 3197 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); 3198 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ 3199 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0); 3200 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); 3201 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); 3202 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0); 3203} 3204 3205/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ 3206#define PUSH_SIZE ((30 - 19 + 1) * 8) 3207 3208#define FRAME_SIZE \ 3209 ((PUSH_SIZE \ 3210 + TCG_STATIC_CALL_ARGS_SIZE \ 3211 + CPU_TEMP_BUF_NLONGS * sizeof(long) \ 3212 + TCG_TARGET_STACK_ALIGN - 1) \ 3213 & ~(TCG_TARGET_STACK_ALIGN - 1)) 3214 3215/* We're expecting a 2 byte uleb128 encoded value. */ 3216QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 3217 3218/* We're expecting to use a single ADDI insn. */ 3219QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); 3220 3221static void tcg_target_qemu_prologue(TCGContext *s) 3222{ 3223 TCGReg r; 3224 3225 tcg_out_bti(s, BTI_C); 3226 3227 /* Push (FP, LR) and allocate space for all saved registers. */ 3228 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, 3229 TCG_REG_SP, -PUSH_SIZE, 1, 1); 3230 3231 /* Set up frame pointer for canonical unwinding. */ 3232 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); 3233 3234 /* Store callee-preserved regs x19..x28. */ 3235 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 3236 int ofs = (r - TCG_REG_X19 + 2) * 8; 3237 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 3238 } 3239 3240 /* Make stack space for TCG locals. */ 3241 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 3242 FRAME_SIZE - PUSH_SIZE); 3243 3244 /* Inform TCG about how to find TCG locals with register, offset, size. */ 3245 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, 3246 CPU_TEMP_BUF_NLONGS * sizeof(long)); 3247 3248 if (!tcg_use_softmmu) { 3249 /* 3250 * Note that XZR cannot be encoded in the address base register slot, 3251 * as that actually encodes SP. Depending on the guest, we may need 3252 * to zero-extend the guest address via the address index register slot, 3253 * therefore we need to load even a zero guest base into a register. 3254 */ 3255 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); 3256 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); 3257 } 3258 3259 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 3260 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); 3261 3262 /* 3263 * Return path for goto_ptr. Set return value to 0, a-la exit_tb, 3264 * and fall through to the rest of the epilogue. 3265 */ 3266 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 3267 tcg_out_bti(s, BTI_J); 3268 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0); 3269 3270 /* TB epilogue */ 3271 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); 3272 tcg_out_bti(s, BTI_J); 3273 3274 /* Remove TCG locals stack space. */ 3275 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 3276 FRAME_SIZE - PUSH_SIZE); 3277 3278 /* Restore registers x19..x28. */ 3279 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 3280 int ofs = (r - TCG_REG_X19 + 2) * 8; 3281 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 3282 } 3283 3284 /* Pop (FP, LR), restore SP to previous frame. */ 3285 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, 3286 TCG_REG_SP, PUSH_SIZE, 0, 1); 3287 tcg_out_insn(s, 3207, RET, TCG_REG_LR); 3288} 3289 3290static void tcg_out_tb_start(TCGContext *s) 3291{ 3292 tcg_out_bti(s, BTI_J); 3293} 3294 3295static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 3296{ 3297 int i; 3298 for (i = 0; i < count; ++i) { 3299 p[i] = NOP; 3300 } 3301} 3302 3303typedef struct { 3304 DebugFrameHeader h; 3305 uint8_t fde_def_cfa[4]; 3306 uint8_t fde_reg_ofs[24]; 3307} DebugFrame; 3308 3309#define ELF_HOST_MACHINE EM_AARCH64 3310 3311static const DebugFrame debug_frame = { 3312 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 3313 .h.cie.id = -1, 3314 .h.cie.version = 1, 3315 .h.cie.code_align = 1, 3316 .h.cie.data_align = 0x78, /* sleb128 -8 */ 3317 .h.cie.return_column = TCG_REG_LR, 3318 3319 /* Total FDE size does not include the "len" member. */ 3320 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 3321 3322 .fde_def_cfa = { 3323 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 3324 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 3325 (FRAME_SIZE >> 7) 3326 }, 3327 .fde_reg_ofs = { 3328 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ 3329 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ 3330 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ 3331 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ 3332 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ 3333 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ 3334 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ 3335 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ 3336 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ 3337 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ 3338 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ 3339 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ 3340 } 3341}; 3342 3343void tcg_register_jit(const void *buf, size_t buf_size) 3344{ 3345 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 3346} 3347