1/* 2 * Initial TCG Implementation for aarch64 3 * 4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH 5 * Written by Claudio Fontana 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or 8 * (at your option) any later version. 9 * 10 * See the COPYING file in the top-level directory for details. 11 */ 12 13#include "../tcg-ldst.c.inc" 14#include "../tcg-pool.c.inc" 15#include "qemu/bitops.h" 16 17/* We're going to re-use TCGType in setting of the SF bit, which controls 18 the size of the operation performed. If we know the values match, it 19 makes things much cleaner. */ 20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); 21 22#ifdef CONFIG_DEBUG_TCG 23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 24 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 25 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 26 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 27 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp", 28 29 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 30 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 31 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 32 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31", 33}; 34#endif /* CONFIG_DEBUG_TCG */ 35 36static const int tcg_target_reg_alloc_order[] = { 37 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, 38 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, 39 TCG_REG_X28, /* we will reserve this for guest_base if configured */ 40 41 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, 42 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, 43 TCG_REG_X16, TCG_REG_X17, 44 45 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 46 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, 47 48 /* X18 reserved by system */ 49 /* X19 reserved for AREG0 */ 50 /* X29 reserved as fp */ 51 /* X30 reserved as temporary */ 52 53 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, 54 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, 55 /* V8 - V15 are call-saved, and skipped. */ 56 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, 57 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, 58 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, 59 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, 60}; 61 62static const int tcg_target_call_iarg_regs[8] = { 63 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 64 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 65}; 66static const int tcg_target_call_oarg_regs[1] = { 67 TCG_REG_X0 68}; 69 70#define TCG_REG_TMP TCG_REG_X30 71#define TCG_VEC_TMP TCG_REG_V31 72 73#ifndef CONFIG_SOFTMMU 74/* Note that XZR cannot be encoded in the address base register slot, 75 as that actaully encodes SP. So if we need to zero-extend the guest 76 address, via the address index register slot, we need to load even 77 a zero guest base into a register. */ 78#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32) 79#define TCG_REG_GUEST_BASE TCG_REG_X28 80#endif 81 82static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 83{ 84 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 85 ptrdiff_t offset = target - src_rx; 86 87 if (offset == sextract64(offset, 0, 26)) { 88 /* read instruction, mask away previous PC_REL26 parameter contents, 89 set the proper offset, then write back the instruction. */ 90 *src_rw = deposit32(*src_rw, 0, 26, offset); 91 return true; 92 } 93 return false; 94} 95 96static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 97{ 98 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 99 ptrdiff_t offset = target - src_rx; 100 101 if (offset == sextract64(offset, 0, 19)) { 102 *src_rw = deposit32(*src_rw, 5, 19, offset); 103 return true; 104 } 105 return false; 106} 107 108static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 109 intptr_t value, intptr_t addend) 110{ 111 tcg_debug_assert(addend == 0); 112 switch (type) { 113 case R_AARCH64_JUMP26: 114 case R_AARCH64_CALL26: 115 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value); 116 case R_AARCH64_CONDBR19: 117 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value); 118 default: 119 g_assert_not_reached(); 120 } 121} 122 123#define TCG_CT_CONST_AIMM 0x100 124#define TCG_CT_CONST_LIMM 0x200 125#define TCG_CT_CONST_ZERO 0x400 126#define TCG_CT_CONST_MONE 0x800 127#define TCG_CT_CONST_ORRI 0x1000 128#define TCG_CT_CONST_ANDI 0x2000 129 130#define ALL_GENERAL_REGS 0xffffffffu 131#define ALL_VECTOR_REGS 0xffffffff00000000ull 132 133#ifdef CONFIG_SOFTMMU 134#define ALL_QLDST_REGS \ 135 (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \ 136 (1 << TCG_REG_X2) | (1 << TCG_REG_X3))) 137#else 138#define ALL_QLDST_REGS ALL_GENERAL_REGS 139#endif 140 141/* Match a constant valid for addition (12-bit, optionally shifted). */ 142static inline bool is_aimm(uint64_t val) 143{ 144 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; 145} 146 147/* Match a constant valid for logical operations. */ 148static inline bool is_limm(uint64_t val) 149{ 150 /* Taking a simplified view of the logical immediates for now, ignoring 151 the replication that can happen across the field. Match bit patterns 152 of the forms 153 0....01....1 154 0..01..10..0 155 and their inverses. */ 156 157 /* Make things easier below, by testing the form with msb clear. */ 158 if ((int64_t)val < 0) { 159 val = ~val; 160 } 161 if (val == 0) { 162 return false; 163 } 164 val += val & -val; 165 return (val & (val - 1)) == 0; 166} 167 168/* Return true if v16 is a valid 16-bit shifted immediate. */ 169static bool is_shimm16(uint16_t v16, int *cmode, int *imm8) 170{ 171 if (v16 == (v16 & 0xff)) { 172 *cmode = 0x8; 173 *imm8 = v16 & 0xff; 174 return true; 175 } else if (v16 == (v16 & 0xff00)) { 176 *cmode = 0xa; 177 *imm8 = v16 >> 8; 178 return true; 179 } 180 return false; 181} 182 183/* Return true if v32 is a valid 32-bit shifted immediate. */ 184static bool is_shimm32(uint32_t v32, int *cmode, int *imm8) 185{ 186 if (v32 == (v32 & 0xff)) { 187 *cmode = 0x0; 188 *imm8 = v32 & 0xff; 189 return true; 190 } else if (v32 == (v32 & 0xff00)) { 191 *cmode = 0x2; 192 *imm8 = (v32 >> 8) & 0xff; 193 return true; 194 } else if (v32 == (v32 & 0xff0000)) { 195 *cmode = 0x4; 196 *imm8 = (v32 >> 16) & 0xff; 197 return true; 198 } else if (v32 == (v32 & 0xff000000)) { 199 *cmode = 0x6; 200 *imm8 = v32 >> 24; 201 return true; 202 } 203 return false; 204} 205 206/* Return true if v32 is a valid 32-bit shifting ones immediate. */ 207static bool is_soimm32(uint32_t v32, int *cmode, int *imm8) 208{ 209 if ((v32 & 0xffff00ff) == 0xff) { 210 *cmode = 0xc; 211 *imm8 = (v32 >> 8) & 0xff; 212 return true; 213 } else if ((v32 & 0xff00ffff) == 0xffff) { 214 *cmode = 0xd; 215 *imm8 = (v32 >> 16) & 0xff; 216 return true; 217 } 218 return false; 219} 220 221/* Return true if v32 is a valid float32 immediate. */ 222static bool is_fimm32(uint32_t v32, int *cmode, int *imm8) 223{ 224 if (extract32(v32, 0, 19) == 0 225 && (extract32(v32, 25, 6) == 0x20 226 || extract32(v32, 25, 6) == 0x1f)) { 227 *cmode = 0xf; 228 *imm8 = (extract32(v32, 31, 1) << 7) 229 | (extract32(v32, 25, 1) << 6) 230 | extract32(v32, 19, 6); 231 return true; 232 } 233 return false; 234} 235 236/* Return true if v64 is a valid float64 immediate. */ 237static bool is_fimm64(uint64_t v64, int *cmode, int *imm8) 238{ 239 if (extract64(v64, 0, 48) == 0 240 && (extract64(v64, 54, 9) == 0x100 241 || extract64(v64, 54, 9) == 0x0ff)) { 242 *cmode = 0xf; 243 *imm8 = (extract64(v64, 63, 1) << 7) 244 | (extract64(v64, 54, 1) << 6) 245 | extract64(v64, 48, 6); 246 return true; 247 } 248 return false; 249} 250 251/* 252 * Return non-zero if v32 can be formed by MOVI+ORR. 253 * Place the parameters for MOVI in (cmode, imm8). 254 * Return the cmode for ORR; the imm8 can be had via extraction from v32. 255 */ 256static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8) 257{ 258 int i; 259 260 for (i = 6; i > 0; i -= 2) { 261 /* Mask out one byte we can add with ORR. */ 262 uint32_t tmp = v32 & ~(0xffu << (i * 4)); 263 if (is_shimm32(tmp, cmode, imm8) || 264 is_soimm32(tmp, cmode, imm8)) { 265 break; 266 } 267 } 268 return i; 269} 270 271/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */ 272static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8) 273{ 274 if (v32 == deposit32(v32, 16, 16, v32)) { 275 return is_shimm16(v32, cmode, imm8); 276 } else { 277 return is_shimm32(v32, cmode, imm8); 278 } 279} 280 281static bool tcg_target_const_match(int64_t val, TCGType type, int ct) 282{ 283 if (ct & TCG_CT_CONST) { 284 return 1; 285 } 286 if (type == TCG_TYPE_I32) { 287 val = (int32_t)val; 288 } 289 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { 290 return 1; 291 } 292 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { 293 return 1; 294 } 295 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 296 return 1; 297 } 298 if ((ct & TCG_CT_CONST_MONE) && val == -1) { 299 return 1; 300 } 301 302 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) { 303 case 0: 304 break; 305 case TCG_CT_CONST_ANDI: 306 val = ~val; 307 /* fallthru */ 308 case TCG_CT_CONST_ORRI: 309 if (val == deposit64(val, 32, 32, val)) { 310 int cmode, imm8; 311 return is_shimm1632(val, &cmode, &imm8); 312 } 313 break; 314 default: 315 /* Both bits should not be set for the same insn. */ 316 g_assert_not_reached(); 317 } 318 319 return 0; 320} 321 322enum aarch64_cond_code { 323 COND_EQ = 0x0, 324 COND_NE = 0x1, 325 COND_CS = 0x2, /* Unsigned greater or equal */ 326 COND_HS = COND_CS, /* ALIAS greater or equal */ 327 COND_CC = 0x3, /* Unsigned less than */ 328 COND_LO = COND_CC, /* ALIAS Lower */ 329 COND_MI = 0x4, /* Negative */ 330 COND_PL = 0x5, /* Zero or greater */ 331 COND_VS = 0x6, /* Overflow */ 332 COND_VC = 0x7, /* No overflow */ 333 COND_HI = 0x8, /* Unsigned greater than */ 334 COND_LS = 0x9, /* Unsigned less or equal */ 335 COND_GE = 0xa, 336 COND_LT = 0xb, 337 COND_GT = 0xc, 338 COND_LE = 0xd, 339 COND_AL = 0xe, 340 COND_NV = 0xf, /* behaves like COND_AL here */ 341}; 342 343static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { 344 [TCG_COND_EQ] = COND_EQ, 345 [TCG_COND_NE] = COND_NE, 346 [TCG_COND_LT] = COND_LT, 347 [TCG_COND_GE] = COND_GE, 348 [TCG_COND_LE] = COND_LE, 349 [TCG_COND_GT] = COND_GT, 350 /* unsigned */ 351 [TCG_COND_LTU] = COND_LO, 352 [TCG_COND_GTU] = COND_HI, 353 [TCG_COND_GEU] = COND_HS, 354 [TCG_COND_LEU] = COND_LS, 355}; 356 357typedef enum { 358 LDST_ST = 0, /* store */ 359 LDST_LD = 1, /* load */ 360 LDST_LD_S_X = 2, /* load and sign-extend into Xt */ 361 LDST_LD_S_W = 3, /* load and sign-extend into Wt */ 362} AArch64LdstType; 363 364/* We encode the format of the insn into the beginning of the name, so that 365 we can have the preprocessor help "typecheck" the insn vs the output 366 function. Arm didn't provide us with nice names for the formats, so we 367 use the section number of the architecture reference manual in which the 368 instruction group is described. */ 369typedef enum { 370 /* Compare and branch (immediate). */ 371 I3201_CBZ = 0x34000000, 372 I3201_CBNZ = 0x35000000, 373 374 /* Conditional branch (immediate). */ 375 I3202_B_C = 0x54000000, 376 377 /* Unconditional branch (immediate). */ 378 I3206_B = 0x14000000, 379 I3206_BL = 0x94000000, 380 381 /* Unconditional branch (register). */ 382 I3207_BR = 0xd61f0000, 383 I3207_BLR = 0xd63f0000, 384 I3207_RET = 0xd65f0000, 385 386 /* AdvSIMD load/store single structure. */ 387 I3303_LD1R = 0x0d40c000, 388 389 /* Load literal for loading the address at pc-relative offset */ 390 I3305_LDR = 0x58000000, 391 I3305_LDR_v64 = 0x5c000000, 392 I3305_LDR_v128 = 0x9c000000, 393 394 /* Load/store register. Described here as 3.3.12, but the helper 395 that emits them can transform to 3.3.10 or 3.3.13. */ 396 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, 397 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, 398 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, 399 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, 400 401 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, 402 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, 403 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, 404 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, 405 406 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, 407 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, 408 409 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, 410 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, 411 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, 412 413 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30, 414 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30, 415 416 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30, 417 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30, 418 419 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30, 420 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30, 421 422 I3312_TO_I3310 = 0x00200800, 423 I3312_TO_I3313 = 0x01000000, 424 425 /* Load/store register pair instructions. */ 426 I3314_LDP = 0x28400000, 427 I3314_STP = 0x28000000, 428 429 /* Add/subtract immediate instructions. */ 430 I3401_ADDI = 0x11000000, 431 I3401_ADDSI = 0x31000000, 432 I3401_SUBI = 0x51000000, 433 I3401_SUBSI = 0x71000000, 434 435 /* Bitfield instructions. */ 436 I3402_BFM = 0x33000000, 437 I3402_SBFM = 0x13000000, 438 I3402_UBFM = 0x53000000, 439 440 /* Extract instruction. */ 441 I3403_EXTR = 0x13800000, 442 443 /* Logical immediate instructions. */ 444 I3404_ANDI = 0x12000000, 445 I3404_ORRI = 0x32000000, 446 I3404_EORI = 0x52000000, 447 I3404_ANDSI = 0x72000000, 448 449 /* Move wide immediate instructions. */ 450 I3405_MOVN = 0x12800000, 451 I3405_MOVZ = 0x52800000, 452 I3405_MOVK = 0x72800000, 453 454 /* PC relative addressing instructions. */ 455 I3406_ADR = 0x10000000, 456 I3406_ADRP = 0x90000000, 457 458 /* Add/subtract shifted register instructions (without a shift). */ 459 I3502_ADD = 0x0b000000, 460 I3502_ADDS = 0x2b000000, 461 I3502_SUB = 0x4b000000, 462 I3502_SUBS = 0x6b000000, 463 464 /* Add/subtract shifted register instructions (with a shift). */ 465 I3502S_ADD_LSL = I3502_ADD, 466 467 /* Add/subtract with carry instructions. */ 468 I3503_ADC = 0x1a000000, 469 I3503_SBC = 0x5a000000, 470 471 /* Conditional select instructions. */ 472 I3506_CSEL = 0x1a800000, 473 I3506_CSINC = 0x1a800400, 474 I3506_CSINV = 0x5a800000, 475 I3506_CSNEG = 0x5a800400, 476 477 /* Data-processing (1 source) instructions. */ 478 I3507_CLZ = 0x5ac01000, 479 I3507_RBIT = 0x5ac00000, 480 I3507_REV = 0x5ac00000, /* + size << 10 */ 481 482 /* Data-processing (2 source) instructions. */ 483 I3508_LSLV = 0x1ac02000, 484 I3508_LSRV = 0x1ac02400, 485 I3508_ASRV = 0x1ac02800, 486 I3508_RORV = 0x1ac02c00, 487 I3508_SMULH = 0x9b407c00, 488 I3508_UMULH = 0x9bc07c00, 489 I3508_UDIV = 0x1ac00800, 490 I3508_SDIV = 0x1ac00c00, 491 492 /* Data-processing (3 source) instructions. */ 493 I3509_MADD = 0x1b000000, 494 I3509_MSUB = 0x1b008000, 495 496 /* Logical shifted register instructions (without a shift). */ 497 I3510_AND = 0x0a000000, 498 I3510_BIC = 0x0a200000, 499 I3510_ORR = 0x2a000000, 500 I3510_ORN = 0x2a200000, 501 I3510_EOR = 0x4a000000, 502 I3510_EON = 0x4a200000, 503 I3510_ANDS = 0x6a000000, 504 505 /* Logical shifted register instructions (with a shift). */ 506 I3502S_AND_LSR = I3510_AND | (1 << 22), 507 508 /* AdvSIMD copy */ 509 I3605_DUP = 0x0e000400, 510 I3605_INS = 0x4e001c00, 511 I3605_UMOV = 0x0e003c00, 512 513 /* AdvSIMD modified immediate */ 514 I3606_MOVI = 0x0f000400, 515 I3606_MVNI = 0x2f000400, 516 I3606_BIC = 0x2f001400, 517 I3606_ORR = 0x0f001400, 518 519 /* AdvSIMD scalar shift by immediate */ 520 I3609_SSHR = 0x5f000400, 521 I3609_SSRA = 0x5f001400, 522 I3609_SHL = 0x5f005400, 523 I3609_USHR = 0x7f000400, 524 I3609_USRA = 0x7f001400, 525 I3609_SLI = 0x7f005400, 526 527 /* AdvSIMD scalar three same */ 528 I3611_SQADD = 0x5e200c00, 529 I3611_SQSUB = 0x5e202c00, 530 I3611_CMGT = 0x5e203400, 531 I3611_CMGE = 0x5e203c00, 532 I3611_SSHL = 0x5e204400, 533 I3611_ADD = 0x5e208400, 534 I3611_CMTST = 0x5e208c00, 535 I3611_UQADD = 0x7e200c00, 536 I3611_UQSUB = 0x7e202c00, 537 I3611_CMHI = 0x7e203400, 538 I3611_CMHS = 0x7e203c00, 539 I3611_USHL = 0x7e204400, 540 I3611_SUB = 0x7e208400, 541 I3611_CMEQ = 0x7e208c00, 542 543 /* AdvSIMD scalar two-reg misc */ 544 I3612_CMGT0 = 0x5e208800, 545 I3612_CMEQ0 = 0x5e209800, 546 I3612_CMLT0 = 0x5e20a800, 547 I3612_ABS = 0x5e20b800, 548 I3612_CMGE0 = 0x7e208800, 549 I3612_CMLE0 = 0x7e209800, 550 I3612_NEG = 0x7e20b800, 551 552 /* AdvSIMD shift by immediate */ 553 I3614_SSHR = 0x0f000400, 554 I3614_SSRA = 0x0f001400, 555 I3614_SHL = 0x0f005400, 556 I3614_SLI = 0x2f005400, 557 I3614_USHR = 0x2f000400, 558 I3614_USRA = 0x2f001400, 559 560 /* AdvSIMD three same. */ 561 I3616_ADD = 0x0e208400, 562 I3616_AND = 0x0e201c00, 563 I3616_BIC = 0x0e601c00, 564 I3616_BIF = 0x2ee01c00, 565 I3616_BIT = 0x2ea01c00, 566 I3616_BSL = 0x2e601c00, 567 I3616_EOR = 0x2e201c00, 568 I3616_MUL = 0x0e209c00, 569 I3616_ORR = 0x0ea01c00, 570 I3616_ORN = 0x0ee01c00, 571 I3616_SUB = 0x2e208400, 572 I3616_CMGT = 0x0e203400, 573 I3616_CMGE = 0x0e203c00, 574 I3616_CMTST = 0x0e208c00, 575 I3616_CMHI = 0x2e203400, 576 I3616_CMHS = 0x2e203c00, 577 I3616_CMEQ = 0x2e208c00, 578 I3616_SMAX = 0x0e206400, 579 I3616_SMIN = 0x0e206c00, 580 I3616_SSHL = 0x0e204400, 581 I3616_SQADD = 0x0e200c00, 582 I3616_SQSUB = 0x0e202c00, 583 I3616_UMAX = 0x2e206400, 584 I3616_UMIN = 0x2e206c00, 585 I3616_UQADD = 0x2e200c00, 586 I3616_UQSUB = 0x2e202c00, 587 I3616_USHL = 0x2e204400, 588 589 /* AdvSIMD two-reg misc. */ 590 I3617_CMGT0 = 0x0e208800, 591 I3617_CMEQ0 = 0x0e209800, 592 I3617_CMLT0 = 0x0e20a800, 593 I3617_CMGE0 = 0x2e208800, 594 I3617_CMLE0 = 0x2e209800, 595 I3617_NOT = 0x2e205800, 596 I3617_ABS = 0x0e20b800, 597 I3617_NEG = 0x2e20b800, 598 599 /* System instructions. */ 600 NOP = 0xd503201f, 601 DMB_ISH = 0xd50338bf, 602 DMB_LD = 0x00000100, 603 DMB_ST = 0x00000200, 604} AArch64Insn; 605 606static inline uint32_t tcg_in32(TCGContext *s) 607{ 608 uint32_t v = *(uint32_t *)s->code_ptr; 609 return v; 610} 611 612/* Emit an opcode with "type-checking" of the format. */ 613#define tcg_out_insn(S, FMT, OP, ...) \ 614 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) 615 616static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q, 617 TCGReg rt, TCGReg rn, unsigned size) 618{ 619 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30)); 620} 621 622static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, 623 int imm19, TCGReg rt) 624{ 625 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt); 626} 627 628static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, 629 TCGReg rt, int imm19) 630{ 631 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); 632} 633 634static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, 635 TCGCond c, int imm19) 636{ 637 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); 638} 639 640static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) 641{ 642 tcg_out32(s, insn | (imm26 & 0x03ffffff)); 643} 644 645static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) 646{ 647 tcg_out32(s, insn | rn << 5); 648} 649 650static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, 651 TCGReg r1, TCGReg r2, TCGReg rn, 652 tcg_target_long ofs, bool pre, bool w) 653{ 654 insn |= 1u << 31; /* ext */ 655 insn |= pre << 24; 656 insn |= w << 23; 657 658 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); 659 insn |= (ofs & (0x7f << 3)) << (15 - 3); 660 661 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); 662} 663 664static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, 665 TCGReg rd, TCGReg rn, uint64_t aimm) 666{ 667 if (aimm > 0xfff) { 668 tcg_debug_assert((aimm & 0xfff) == 0); 669 aimm >>= 12; 670 tcg_debug_assert(aimm <= 0xfff); 671 aimm |= 1 << 12; /* apply LSL 12 */ 672 } 673 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); 674} 675 676/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 677 (Logical immediate). Both insn groups have N, IMMR and IMMS fields 678 that feed the DecodeBitMasks pseudo function. */ 679static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, 680 TCGReg rd, TCGReg rn, int n, int immr, int imms) 681{ 682 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 683 | rn << 5 | rd); 684} 685 686#define tcg_out_insn_3404 tcg_out_insn_3402 687 688static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, 689 TCGReg rd, TCGReg rn, TCGReg rm, int imms) 690{ 691 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 692 | rn << 5 | rd); 693} 694 695/* This function is used for the Move (wide immediate) instruction group. 696 Note that SHIFT is a full shift count, not the 2 bit HW field. */ 697static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, 698 TCGReg rd, uint16_t half, unsigned shift) 699{ 700 tcg_debug_assert((shift & ~0x30) == 0); 701 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); 702} 703 704static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, 705 TCGReg rd, int64_t disp) 706{ 707 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); 708} 709 710/* This function is for both 3.5.2 (Add/Subtract shifted register), for 711 the rare occasion when we actually want to supply a shift amount. */ 712static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, 713 TCGType ext, TCGReg rd, TCGReg rn, 714 TCGReg rm, int imm6) 715{ 716 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); 717} 718 719/* This function is for 3.5.2 (Add/subtract shifted register), 720 and 3.5.10 (Logical shifted register), for the vast majorty of cases 721 when we don't want to apply a shift. Thus it can also be used for 722 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */ 723static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, 724 TCGReg rd, TCGReg rn, TCGReg rm) 725{ 726 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); 727} 728 729#define tcg_out_insn_3503 tcg_out_insn_3502 730#define tcg_out_insn_3508 tcg_out_insn_3502 731#define tcg_out_insn_3510 tcg_out_insn_3502 732 733static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, 734 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) 735{ 736 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd 737 | tcg_cond_to_aarch64[c] << 12); 738} 739 740static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, 741 TCGReg rd, TCGReg rn) 742{ 743 tcg_out32(s, insn | ext << 31 | rn << 5 | rd); 744} 745 746static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, 747 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) 748{ 749 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); 750} 751 752static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q, 753 TCGReg rd, TCGReg rn, int dst_idx, int src_idx) 754{ 755 /* Note that bit 11 set means general register input. Therefore 756 we can handle both register sets with one function. */ 757 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11) 758 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5); 759} 760 761static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q, 762 TCGReg rd, bool op, int cmode, uint8_t imm8) 763{ 764 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f) 765 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5); 766} 767 768static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn, 769 TCGReg rd, TCGReg rn, unsigned immhb) 770{ 771 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f)); 772} 773 774static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn, 775 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 776{ 777 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16 778 | (rn & 0x1f) << 5 | (rd & 0x1f)); 779} 780 781static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn, 782 unsigned size, TCGReg rd, TCGReg rn) 783{ 784 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f)); 785} 786 787static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q, 788 TCGReg rd, TCGReg rn, unsigned immhb) 789{ 790 tcg_out32(s, insn | q << 30 | immhb << 16 791 | (rn & 0x1f) << 5 | (rd & 0x1f)); 792} 793 794static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q, 795 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 796{ 797 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16 798 | (rn & 0x1f) << 5 | (rd & 0x1f)); 799} 800 801static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q, 802 unsigned size, TCGReg rd, TCGReg rn) 803{ 804 tcg_out32(s, insn | q << 30 | (size << 22) 805 | (rn & 0x1f) << 5 | (rd & 0x1f)); 806} 807 808static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, 809 TCGReg rd, TCGReg base, TCGType ext, 810 TCGReg regoff) 811{ 812 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 813 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | 814 0x4000 | ext << 13 | base << 5 | (rd & 0x1f)); 815} 816 817static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, 818 TCGReg rd, TCGReg rn, intptr_t offset) 819{ 820 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f)); 821} 822 823static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, 824 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) 825{ 826 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 827 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 828 | rn << 5 | (rd & 0x1f)); 829} 830 831/* Register to register move using ORR (shifted register with no shift). */ 832static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) 833{ 834 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); 835} 836 837/* Register to register move using ADDI (move to/from SP). */ 838static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) 839{ 840 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); 841} 842 843/* This function is used for the Logical (immediate) instruction group. 844 The value of LIMM must satisfy IS_LIMM. See the comment above about 845 only supporting simplified logical immediates. */ 846static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, 847 TCGReg rd, TCGReg rn, uint64_t limm) 848{ 849 unsigned h, l, r, c; 850 851 tcg_debug_assert(is_limm(limm)); 852 853 h = clz64(limm); 854 l = ctz64(limm); 855 if (l == 0) { 856 r = 0; /* form 0....01....1 */ 857 c = ctz64(~limm) - 1; 858 if (h == 0) { 859 r = clz64(~limm); /* form 1..10..01..1 */ 860 c += r; 861 } 862 } else { 863 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ 864 c = r - h - 1; 865 } 866 if (ext == TCG_TYPE_I32) { 867 r &= 31; 868 c &= 31; 869 } 870 871 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); 872} 873 874static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 875 TCGReg rd, int64_t v64) 876{ 877 bool q = type == TCG_TYPE_V128; 878 int cmode, imm8, i; 879 880 /* Test all bytes equal first. */ 881 if (vece == MO_8) { 882 imm8 = (uint8_t)v64; 883 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8); 884 return; 885 } 886 887 /* 888 * Test all bytes 0x00 or 0xff second. This can match cases that 889 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below. 890 */ 891 for (i = imm8 = 0; i < 8; i++) { 892 uint8_t byte = v64 >> (i * 8); 893 if (byte == 0xff) { 894 imm8 |= 1 << i; 895 } else if (byte != 0) { 896 goto fail_bytes; 897 } 898 } 899 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8); 900 return; 901 fail_bytes: 902 903 /* 904 * Tests for various replications. For each element width, if we 905 * cannot find an expansion there's no point checking a larger 906 * width because we already know by replication it cannot match. 907 */ 908 if (vece == MO_16) { 909 uint16_t v16 = v64; 910 911 if (is_shimm16(v16, &cmode, &imm8)) { 912 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 913 return; 914 } 915 if (is_shimm16(~v16, &cmode, &imm8)) { 916 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 917 return; 918 } 919 920 /* 921 * Otherwise, all remaining constants can be loaded in two insns: 922 * rd = v16 & 0xff, rd |= v16 & 0xff00. 923 */ 924 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff); 925 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8); 926 return; 927 } else if (vece == MO_32) { 928 uint32_t v32 = v64; 929 uint32_t n32 = ~v32; 930 931 if (is_shimm32(v32, &cmode, &imm8) || 932 is_soimm32(v32, &cmode, &imm8) || 933 is_fimm32(v32, &cmode, &imm8)) { 934 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 935 return; 936 } 937 if (is_shimm32(n32, &cmode, &imm8) || 938 is_soimm32(n32, &cmode, &imm8)) { 939 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 940 return; 941 } 942 943 /* 944 * Restrict the set of constants to those we can load with 945 * two instructions. Others we load from the pool. 946 */ 947 i = is_shimm32_pair(v32, &cmode, &imm8); 948 if (i) { 949 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 950 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8)); 951 return; 952 } 953 i = is_shimm32_pair(n32, &cmode, &imm8); 954 if (i) { 955 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 956 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8)); 957 return; 958 } 959 } else if (is_fimm64(v64, &cmode, &imm8)) { 960 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8); 961 return; 962 } 963 964 /* 965 * As a last resort, load from the constant pool. Sadly there 966 * is no LD1R (literal), so store the full 16-byte vector. 967 */ 968 if (type == TCG_TYPE_V128) { 969 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64); 970 tcg_out_insn(s, 3305, LDR_v128, 0, rd); 971 } else { 972 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0); 973 tcg_out_insn(s, 3305, LDR_v64, 0, rd); 974 } 975} 976 977static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 978 TCGReg rd, TCGReg rs) 979{ 980 int is_q = type - TCG_TYPE_V64; 981 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0); 982 return true; 983} 984 985static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 986 TCGReg r, TCGReg base, intptr_t offset) 987{ 988 TCGReg temp = TCG_REG_TMP; 989 990 if (offset < -0xffffff || offset > 0xffffff) { 991 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset); 992 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base); 993 base = temp; 994 } else { 995 AArch64Insn add_insn = I3401_ADDI; 996 997 if (offset < 0) { 998 add_insn = I3401_SUBI; 999 offset = -offset; 1000 } 1001 if (offset & 0xfff000) { 1002 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000); 1003 base = temp; 1004 } 1005 if (offset & 0xfff) { 1006 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff); 1007 base = temp; 1008 } 1009 } 1010 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece); 1011 return true; 1012} 1013 1014static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 1015 tcg_target_long value) 1016{ 1017 tcg_target_long svalue = value; 1018 tcg_target_long ivalue = ~value; 1019 tcg_target_long t0, t1, t2; 1020 int s0, s1; 1021 AArch64Insn opc; 1022 1023 switch (type) { 1024 case TCG_TYPE_I32: 1025 case TCG_TYPE_I64: 1026 tcg_debug_assert(rd < 32); 1027 break; 1028 default: 1029 g_assert_not_reached(); 1030 } 1031 1032 /* For 32-bit values, discard potential garbage in value. For 64-bit 1033 values within [2**31, 2**32-1], we can create smaller sequences by 1034 interpreting this as a negative 32-bit number, while ensuring that 1035 the high 32 bits are cleared by setting SF=0. */ 1036 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { 1037 svalue = (int32_t)value; 1038 value = (uint32_t)value; 1039 ivalue = (uint32_t)ivalue; 1040 type = TCG_TYPE_I32; 1041 } 1042 1043 /* Speed things up by handling the common case of small positive 1044 and negative values specially. */ 1045 if ((value & ~0xffffull) == 0) { 1046 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); 1047 return; 1048 } else if ((ivalue & ~0xffffull) == 0) { 1049 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); 1050 return; 1051 } 1052 1053 /* Check for bitfield immediates. For the benefit of 32-bit quantities, 1054 use the sign-extended value. That lets us match rotated values such 1055 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ 1056 if (is_limm(svalue)) { 1057 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); 1058 return; 1059 } 1060 1061 /* Look for host pointer values within 4G of the PC. This happens 1062 often when loading pointers to QEMU's own data structures. */ 1063 if (type == TCG_TYPE_I64) { 1064 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr); 1065 tcg_target_long disp = value - src_rx; 1066 if (disp == sextract64(disp, 0, 21)) { 1067 tcg_out_insn(s, 3406, ADR, rd, disp); 1068 return; 1069 } 1070 disp = (value >> 12) - (src_rx >> 12); 1071 if (disp == sextract64(disp, 0, 21)) { 1072 tcg_out_insn(s, 3406, ADRP, rd, disp); 1073 if (value & 0xfff) { 1074 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); 1075 } 1076 return; 1077 } 1078 } 1079 1080 /* Would it take fewer insns to begin with MOVN? */ 1081 if (ctpop64(value) >= 32) { 1082 t0 = ivalue; 1083 opc = I3405_MOVN; 1084 } else { 1085 t0 = value; 1086 opc = I3405_MOVZ; 1087 } 1088 s0 = ctz64(t0) & (63 & -16); 1089 t1 = t0 & ~(0xffffull << s0); 1090 s1 = ctz64(t1) & (63 & -16); 1091 t2 = t1 & ~(0xffffull << s1); 1092 if (t2 == 0) { 1093 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0); 1094 if (t1 != 0) { 1095 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1); 1096 } 1097 return; 1098 } 1099 1100 /* For more than 2 insns, dump it into the constant pool. */ 1101 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0); 1102 tcg_out_insn(s, 3305, LDR, 0, rd); 1103} 1104 1105/* Define something more legible for general use. */ 1106#define tcg_out_ldst_r tcg_out_insn_3310 1107 1108static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd, 1109 TCGReg rn, intptr_t offset, int lgsize) 1110{ 1111 /* If the offset is naturally aligned and in range, then we can 1112 use the scaled uimm12 encoding */ 1113 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) { 1114 uintptr_t scaled_uimm = offset >> lgsize; 1115 if (scaled_uimm <= 0xfff) { 1116 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); 1117 return; 1118 } 1119 } 1120 1121 /* Small signed offsets can use the unscaled encoding. */ 1122 if (offset >= -256 && offset < 256) { 1123 tcg_out_insn_3312(s, insn, rd, rn, offset); 1124 return; 1125 } 1126 1127 /* Worst-case scenario, move offset to temp register, use reg offset. */ 1128 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset); 1129 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP); 1130} 1131 1132static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 1133{ 1134 if (ret == arg) { 1135 return true; 1136 } 1137 switch (type) { 1138 case TCG_TYPE_I32: 1139 case TCG_TYPE_I64: 1140 if (ret < 32 && arg < 32) { 1141 tcg_out_movr(s, type, ret, arg); 1142 break; 1143 } else if (ret < 32) { 1144 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0); 1145 break; 1146 } else if (arg < 32) { 1147 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0); 1148 break; 1149 } 1150 /* FALLTHRU */ 1151 1152 case TCG_TYPE_V64: 1153 tcg_debug_assert(ret >= 32 && arg >= 32); 1154 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg); 1155 break; 1156 case TCG_TYPE_V128: 1157 tcg_debug_assert(ret >= 32 && arg >= 32); 1158 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg); 1159 break; 1160 1161 default: 1162 g_assert_not_reached(); 1163 } 1164 return true; 1165} 1166 1167static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1168 TCGReg base, intptr_t ofs) 1169{ 1170 AArch64Insn insn; 1171 int lgsz; 1172 1173 switch (type) { 1174 case TCG_TYPE_I32: 1175 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS); 1176 lgsz = 2; 1177 break; 1178 case TCG_TYPE_I64: 1179 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD); 1180 lgsz = 3; 1181 break; 1182 case TCG_TYPE_V64: 1183 insn = I3312_LDRVD; 1184 lgsz = 3; 1185 break; 1186 case TCG_TYPE_V128: 1187 insn = I3312_LDRVQ; 1188 lgsz = 4; 1189 break; 1190 default: 1191 g_assert_not_reached(); 1192 } 1193 tcg_out_ldst(s, insn, ret, base, ofs, lgsz); 1194} 1195 1196static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src, 1197 TCGReg base, intptr_t ofs) 1198{ 1199 AArch64Insn insn; 1200 int lgsz; 1201 1202 switch (type) { 1203 case TCG_TYPE_I32: 1204 insn = (src < 32 ? I3312_STRW : I3312_STRVS); 1205 lgsz = 2; 1206 break; 1207 case TCG_TYPE_I64: 1208 insn = (src < 32 ? I3312_STRX : I3312_STRVD); 1209 lgsz = 3; 1210 break; 1211 case TCG_TYPE_V64: 1212 insn = I3312_STRVD; 1213 lgsz = 3; 1214 break; 1215 case TCG_TYPE_V128: 1216 insn = I3312_STRVQ; 1217 lgsz = 4; 1218 break; 1219 default: 1220 g_assert_not_reached(); 1221 } 1222 tcg_out_ldst(s, insn, src, base, ofs, lgsz); 1223} 1224 1225static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1226 TCGReg base, intptr_t ofs) 1227{ 1228 if (type <= TCG_TYPE_I64 && val == 0) { 1229 tcg_out_st(s, type, TCG_REG_XZR, base, ofs); 1230 return true; 1231 } 1232 return false; 1233} 1234 1235static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, 1236 TCGReg rn, unsigned int a, unsigned int b) 1237{ 1238 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); 1239} 1240 1241static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, 1242 TCGReg rn, unsigned int a, unsigned int b) 1243{ 1244 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); 1245} 1246 1247static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, 1248 TCGReg rn, unsigned int a, unsigned int b) 1249{ 1250 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); 1251} 1252 1253static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, 1254 TCGReg rn, TCGReg rm, unsigned int a) 1255{ 1256 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); 1257} 1258 1259static inline void tcg_out_shl(TCGContext *s, TCGType ext, 1260 TCGReg rd, TCGReg rn, unsigned int m) 1261{ 1262 int bits = ext ? 64 : 32; 1263 int max = bits - 1; 1264 tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max); 1265} 1266 1267static inline void tcg_out_shr(TCGContext *s, TCGType ext, 1268 TCGReg rd, TCGReg rn, unsigned int m) 1269{ 1270 int max = ext ? 63 : 31; 1271 tcg_out_ubfm(s, ext, rd, rn, m & max, max); 1272} 1273 1274static inline void tcg_out_sar(TCGContext *s, TCGType ext, 1275 TCGReg rd, TCGReg rn, unsigned int m) 1276{ 1277 int max = ext ? 63 : 31; 1278 tcg_out_sbfm(s, ext, rd, rn, m & max, max); 1279} 1280 1281static inline void tcg_out_rotr(TCGContext *s, TCGType ext, 1282 TCGReg rd, TCGReg rn, unsigned int m) 1283{ 1284 int max = ext ? 63 : 31; 1285 tcg_out_extr(s, ext, rd, rn, rn, m & max); 1286} 1287 1288static inline void tcg_out_rotl(TCGContext *s, TCGType ext, 1289 TCGReg rd, TCGReg rn, unsigned int m) 1290{ 1291 int max = ext ? 63 : 31; 1292 tcg_out_extr(s, ext, rd, rn, rn, -m & max); 1293} 1294 1295static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, 1296 TCGReg rn, unsigned lsb, unsigned width) 1297{ 1298 unsigned size = ext ? 64 : 32; 1299 unsigned a = (size - lsb) & (size - 1); 1300 unsigned b = width - 1; 1301 tcg_out_bfm(s, ext, rd, rn, a, b); 1302} 1303 1304static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, 1305 tcg_target_long b, bool const_b) 1306{ 1307 if (const_b) { 1308 /* Using CMP or CMN aliases. */ 1309 if (b >= 0) { 1310 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); 1311 } else { 1312 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); 1313 } 1314 } else { 1315 /* Using CMP alias SUBS wzr, Wn, Wm */ 1316 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); 1317 } 1318} 1319 1320static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) 1321{ 1322 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1323 tcg_debug_assert(offset == sextract64(offset, 0, 26)); 1324 tcg_out_insn(s, 3206, B, offset); 1325} 1326 1327static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target) 1328{ 1329 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1330 if (offset == sextract64(offset, 0, 26)) { 1331 tcg_out_insn(s, 3206, B, offset); 1332 } else { 1333 /* Choose X9 as a call-clobbered non-LR temporary. */ 1334 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target); 1335 tcg_out_insn(s, 3207, BR, TCG_REG_X9); 1336 } 1337} 1338 1339static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target) 1340{ 1341 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1342 if (offset == sextract64(offset, 0, 26)) { 1343 tcg_out_insn(s, 3206, BL, offset); 1344 } else { 1345 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); 1346 tcg_out_insn(s, 3207, BLR, TCG_REG_TMP); 1347 } 1348} 1349 1350static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 1351 const TCGHelperInfo *info) 1352{ 1353 tcg_out_call_int(s, target); 1354} 1355 1356void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, 1357 uintptr_t jmp_rw, uintptr_t addr) 1358{ 1359 tcg_insn_unit i1, i2; 1360 TCGType rt = TCG_TYPE_I64; 1361 TCGReg rd = TCG_REG_TMP; 1362 uint64_t pair; 1363 1364 ptrdiff_t offset = addr - jmp_rx; 1365 1366 if (offset == sextract64(offset, 0, 26)) { 1367 i1 = I3206_B | ((offset >> 2) & 0x3ffffff); 1368 i2 = NOP; 1369 } else { 1370 offset = (addr >> 12) - (jmp_rx >> 12); 1371 1372 /* patch ADRP */ 1373 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd; 1374 /* patch ADDI */ 1375 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd; 1376 } 1377 pair = (uint64_t)i2 << 32 | i1; 1378 qatomic_set((uint64_t *)jmp_rw, pair); 1379 flush_idcache_range(jmp_rx, jmp_rw, 8); 1380} 1381 1382static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) 1383{ 1384 if (!l->has_value) { 1385 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); 1386 tcg_out_insn(s, 3206, B, 0); 1387 } else { 1388 tcg_out_goto(s, l->u.value_ptr); 1389 } 1390} 1391 1392static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, 1393 TCGArg b, bool b_const, TCGLabel *l) 1394{ 1395 intptr_t offset; 1396 bool need_cmp; 1397 1398 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) { 1399 need_cmp = false; 1400 } else { 1401 need_cmp = true; 1402 tcg_out_cmp(s, ext, a, b, b_const); 1403 } 1404 1405 if (!l->has_value) { 1406 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1407 offset = tcg_in32(s) >> 5; 1408 } else { 1409 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2; 1410 tcg_debug_assert(offset == sextract64(offset, 0, 19)); 1411 } 1412 1413 if (need_cmp) { 1414 tcg_out_insn(s, 3202, B_C, c, offset); 1415 } else if (c == TCG_COND_EQ) { 1416 tcg_out_insn(s, 3201, CBZ, ext, a, offset); 1417 } else { 1418 tcg_out_insn(s, 3201, CBNZ, ext, a, offset); 1419 } 1420} 1421 1422static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits, 1423 TCGReg rd, TCGReg rn) 1424{ 1425 /* REV, REV16, REV32 */ 1426 tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn); 1427} 1428 1429static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits, 1430 TCGReg rd, TCGReg rn) 1431{ 1432 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ 1433 int bits = (8 << s_bits) - 1; 1434 tcg_out_sbfm(s, ext, rd, rn, 0, bits); 1435} 1436 1437static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits, 1438 TCGReg rd, TCGReg rn) 1439{ 1440 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ 1441 int bits = (8 << s_bits) - 1; 1442 tcg_out_ubfm(s, 0, rd, rn, 0, bits); 1443} 1444 1445static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, 1446 TCGReg rn, int64_t aimm) 1447{ 1448 if (aimm >= 0) { 1449 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); 1450 } else { 1451 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); 1452 } 1453} 1454 1455static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, 1456 TCGReg rh, TCGReg al, TCGReg ah, 1457 tcg_target_long bl, tcg_target_long bh, 1458 bool const_bl, bool const_bh, bool sub) 1459{ 1460 TCGReg orig_rl = rl; 1461 AArch64Insn insn; 1462 1463 if (rl == ah || (!const_bh && rl == bh)) { 1464 rl = TCG_REG_TMP; 1465 } 1466 1467 if (const_bl) { 1468 if (bl < 0) { 1469 bl = -bl; 1470 insn = sub ? I3401_ADDSI : I3401_SUBSI; 1471 } else { 1472 insn = sub ? I3401_SUBSI : I3401_ADDSI; 1473 } 1474 1475 if (unlikely(al == TCG_REG_XZR)) { 1476 /* ??? We want to allow al to be zero for the benefit of 1477 negation via subtraction. However, that leaves open the 1478 possibility of adding 0+const in the low part, and the 1479 immediate add instructions encode XSP not XZR. Don't try 1480 anything more elaborate here than loading another zero. */ 1481 al = TCG_REG_TMP; 1482 tcg_out_movi(s, ext, al, 0); 1483 } 1484 tcg_out_insn_3401(s, insn, ext, rl, al, bl); 1485 } else { 1486 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); 1487 } 1488 1489 insn = I3503_ADC; 1490 if (const_bh) { 1491 /* Note that the only two constants we support are 0 and -1, and 1492 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ 1493 if ((bh != 0) ^ sub) { 1494 insn = I3503_SBC; 1495 } 1496 bh = TCG_REG_XZR; 1497 } else if (sub) { 1498 insn = I3503_SBC; 1499 } 1500 tcg_out_insn_3503(s, insn, ext, rh, ah, bh); 1501 1502 tcg_out_mov(s, ext, orig_rl, rl); 1503} 1504 1505static inline void tcg_out_mb(TCGContext *s, TCGArg a0) 1506{ 1507 static const uint32_t sync[] = { 1508 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST, 1509 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST, 1510 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1511 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD, 1512 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1513 }; 1514 tcg_out32(s, sync[a0 & TCG_MO_ALL]); 1515} 1516 1517static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, 1518 TCGReg a0, TCGArg b, bool const_b, bool is_ctz) 1519{ 1520 TCGReg a1 = a0; 1521 if (is_ctz) { 1522 a1 = TCG_REG_TMP; 1523 tcg_out_insn(s, 3507, RBIT, ext, a1, a0); 1524 } 1525 if (const_b && b == (ext ? 64 : 32)) { 1526 tcg_out_insn(s, 3507, CLZ, ext, d, a1); 1527 } else { 1528 AArch64Insn sel = I3506_CSEL; 1529 1530 tcg_out_cmp(s, ext, a0, 0, 1); 1531 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1); 1532 1533 if (const_b) { 1534 if (b == -1) { 1535 b = TCG_REG_XZR; 1536 sel = I3506_CSINV; 1537 } else if (b == 0) { 1538 b = TCG_REG_XZR; 1539 } else { 1540 tcg_out_movi(s, ext, d, b); 1541 b = d; 1542 } 1543 } 1544 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE); 1545 } 1546} 1547 1548static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target) 1549{ 1550 ptrdiff_t offset = tcg_pcrel_diff(s, target); 1551 tcg_debug_assert(offset == sextract64(offset, 0, 21)); 1552 tcg_out_insn(s, 3406, ADR, rd, offset); 1553} 1554 1555#ifdef CONFIG_SOFTMMU 1556/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, 1557 * MemOpIdx oi, uintptr_t ra) 1558 */ 1559static void * const qemu_ld_helpers[MO_SIZE + 1] = { 1560 [MO_8] = helper_ret_ldub_mmu, 1561#if HOST_BIG_ENDIAN 1562 [MO_16] = helper_be_lduw_mmu, 1563 [MO_32] = helper_be_ldul_mmu, 1564 [MO_64] = helper_be_ldq_mmu, 1565#else 1566 [MO_16] = helper_le_lduw_mmu, 1567 [MO_32] = helper_le_ldul_mmu, 1568 [MO_64] = helper_le_ldq_mmu, 1569#endif 1570}; 1571 1572/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, 1573 * uintxx_t val, MemOpIdx oi, 1574 * uintptr_t ra) 1575 */ 1576static void * const qemu_st_helpers[MO_SIZE + 1] = { 1577 [MO_8] = helper_ret_stb_mmu, 1578#if HOST_BIG_ENDIAN 1579 [MO_16] = helper_be_stw_mmu, 1580 [MO_32] = helper_be_stl_mmu, 1581 [MO_64] = helper_be_stq_mmu, 1582#else 1583 [MO_16] = helper_le_stw_mmu, 1584 [MO_32] = helper_le_stl_mmu, 1585 [MO_64] = helper_le_stq_mmu, 1586#endif 1587}; 1588 1589static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1590{ 1591 MemOpIdx oi = lb->oi; 1592 MemOp opc = get_memop(oi); 1593 MemOp size = opc & MO_SIZE; 1594 1595 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1596 return false; 1597 } 1598 1599 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1600 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); 1601 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi); 1602 tcg_out_adr(s, TCG_REG_X3, lb->raddr); 1603 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); 1604 if (opc & MO_SIGN) { 1605 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0); 1606 } else { 1607 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0); 1608 } 1609 1610 tcg_out_goto(s, lb->raddr); 1611 return true; 1612} 1613 1614static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1615{ 1616 MemOpIdx oi = lb->oi; 1617 MemOp opc = get_memop(oi); 1618 MemOp size = opc & MO_SIZE; 1619 1620 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1621 return false; 1622 } 1623 1624 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1625 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); 1626 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); 1627 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi); 1628 tcg_out_adr(s, TCG_REG_X4, lb->raddr); 1629 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]); 1630 tcg_out_goto(s, lb->raddr); 1631 return true; 1632} 1633 1634static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi, 1635 TCGType ext, TCGReg data_reg, TCGReg addr_reg, 1636 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) 1637{ 1638 TCGLabelQemuLdst *label = new_ldst_label(s); 1639 1640 label->is_ld = is_ld; 1641 label->oi = oi; 1642 label->type = ext; 1643 label->datalo_reg = data_reg; 1644 label->addrlo_reg = addr_reg; 1645 label->raddr = tcg_splitwx_to_rx(raddr); 1646 label->label_ptr[0] = label_ptr; 1647} 1648 1649/* We expect to use a 7-bit scaled negative offset from ENV. */ 1650QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); 1651QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512); 1652 1653/* These offsets are built into the LDP below. */ 1654QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); 1655QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); 1656 1657/* Load and compare a TLB entry, emitting the conditional jump to the 1658 slow path for the failure case, which will be patched later when finalizing 1659 the slow path. Generated code returns the host addend in X1, 1660 clobbers X0,X2,X3,TMP. */ 1661static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc, 1662 tcg_insn_unit **label_ptr, int mem_index, 1663 bool is_read) 1664{ 1665 unsigned a_bits = get_alignment_bits(opc); 1666 unsigned s_bits = opc & MO_SIZE; 1667 unsigned a_mask = (1u << a_bits) - 1; 1668 unsigned s_mask = (1u << s_bits) - 1; 1669 TCGReg x3; 1670 TCGType mask_type; 1671 uint64_t compare_mask; 1672 1673 mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32 1674 ? TCG_TYPE_I64 : TCG_TYPE_I32); 1675 1676 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */ 1677 tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0, 1678 TLB_MASK_TABLE_OFS(mem_index), 1, 0); 1679 1680 /* Extract the TLB index from the address into X0. */ 1681 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, 1682 TCG_REG_X0, TCG_REG_X0, addr_reg, 1683 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); 1684 1685 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */ 1686 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0); 1687 1688 /* Load the tlb comparator into X0, and the fast path addend into X1. */ 1689 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read 1690 ? offsetof(CPUTLBEntry, addr_read) 1691 : offsetof(CPUTLBEntry, addr_write)); 1692 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1, 1693 offsetof(CPUTLBEntry, addend)); 1694 1695 /* For aligned accesses, we check the first byte and include the alignment 1696 bits within the address. For unaligned access, we check that we don't 1697 cross pages using the address of the last byte of the access. */ 1698 if (a_bits >= s_bits) { 1699 x3 = addr_reg; 1700 } else { 1701 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64, 1702 TCG_REG_X3, addr_reg, s_mask - a_mask); 1703 x3 = TCG_REG_X3; 1704 } 1705 compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; 1706 1707 /* Store the page mask part of the address into X3. */ 1708 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, 1709 TCG_REG_X3, x3, compare_mask); 1710 1711 /* Perform the address comparison. */ 1712 tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0); 1713 1714 /* If not equal, we jump to the slow path. */ 1715 *label_ptr = s->code_ptr; 1716 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1717} 1718 1719#else 1720static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg, 1721 unsigned a_bits) 1722{ 1723 unsigned a_mask = (1 << a_bits) - 1; 1724 TCGLabelQemuLdst *label = new_ldst_label(s); 1725 1726 label->is_ld = is_ld; 1727 label->addrlo_reg = addr_reg; 1728 1729 /* tst addr, #mask */ 1730 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask); 1731 1732 label->label_ptr[0] = s->code_ptr; 1733 1734 /* b.ne slow_path */ 1735 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1736 1737 label->raddr = tcg_splitwx_to_rx(s->code_ptr); 1738} 1739 1740static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) 1741{ 1742 if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1743 return false; 1744 } 1745 1746 tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_X1, l->addrlo_reg); 1747 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1748 1749 /* "Tail call" to the helper, with the return address back inline. */ 1750 tcg_out_adr(s, TCG_REG_LR, l->raddr); 1751 tcg_out_goto_long(s, (const void *)(l->is_ld ? helper_unaligned_ld 1752 : helper_unaligned_st)); 1753 return true; 1754} 1755 1756static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 1757{ 1758 return tcg_out_fail_alignment(s, l); 1759} 1760 1761static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 1762{ 1763 return tcg_out_fail_alignment(s, l); 1764} 1765#endif /* CONFIG_SOFTMMU */ 1766 1767static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, 1768 TCGReg data_r, TCGReg addr_r, 1769 TCGType otype, TCGReg off_r) 1770{ 1771 switch (memop & MO_SSIZE) { 1772 case MO_UB: 1773 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r); 1774 break; 1775 case MO_SB: 1776 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, 1777 data_r, addr_r, otype, off_r); 1778 break; 1779 case MO_UW: 1780 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); 1781 break; 1782 case MO_SW: 1783 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), 1784 data_r, addr_r, otype, off_r); 1785 break; 1786 case MO_UL: 1787 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); 1788 break; 1789 case MO_SL: 1790 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r); 1791 break; 1792 case MO_UQ: 1793 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r); 1794 break; 1795 default: 1796 tcg_abort(); 1797 } 1798} 1799 1800static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, 1801 TCGReg data_r, TCGReg addr_r, 1802 TCGType otype, TCGReg off_r) 1803{ 1804 switch (memop & MO_SIZE) { 1805 case MO_8: 1806 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r); 1807 break; 1808 case MO_16: 1809 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r); 1810 break; 1811 case MO_32: 1812 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r); 1813 break; 1814 case MO_64: 1815 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r); 1816 break; 1817 default: 1818 tcg_abort(); 1819 } 1820} 1821 1822static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1823 MemOpIdx oi, TCGType ext) 1824{ 1825 MemOp memop = get_memop(oi); 1826 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; 1827 1828 /* Byte swapping is left to middle-end expansion. */ 1829 tcg_debug_assert((memop & MO_BSWAP) == 0); 1830 1831#ifdef CONFIG_SOFTMMU 1832 unsigned mem_index = get_mmuidx(oi); 1833 tcg_insn_unit *label_ptr; 1834 1835 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1); 1836 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1837 TCG_REG_X1, otype, addr_reg); 1838 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg, 1839 s->code_ptr, label_ptr); 1840#else /* !CONFIG_SOFTMMU */ 1841 unsigned a_bits = get_alignment_bits(memop); 1842 if (a_bits) { 1843 tcg_out_test_alignment(s, true, addr_reg, a_bits); 1844 } 1845 if (USE_GUEST_BASE) { 1846 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1847 TCG_REG_GUEST_BASE, otype, addr_reg); 1848 } else { 1849 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1850 addr_reg, TCG_TYPE_I64, TCG_REG_XZR); 1851 } 1852#endif /* CONFIG_SOFTMMU */ 1853} 1854 1855static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1856 MemOpIdx oi) 1857{ 1858 MemOp memop = get_memop(oi); 1859 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; 1860 1861 /* Byte swapping is left to middle-end expansion. */ 1862 tcg_debug_assert((memop & MO_BSWAP) == 0); 1863 1864#ifdef CONFIG_SOFTMMU 1865 unsigned mem_index = get_mmuidx(oi); 1866 tcg_insn_unit *label_ptr; 1867 1868 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0); 1869 tcg_out_qemu_st_direct(s, memop, data_reg, 1870 TCG_REG_X1, otype, addr_reg); 1871 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64, 1872 data_reg, addr_reg, s->code_ptr, label_ptr); 1873#else /* !CONFIG_SOFTMMU */ 1874 unsigned a_bits = get_alignment_bits(memop); 1875 if (a_bits) { 1876 tcg_out_test_alignment(s, false, addr_reg, a_bits); 1877 } 1878 if (USE_GUEST_BASE) { 1879 tcg_out_qemu_st_direct(s, memop, data_reg, 1880 TCG_REG_GUEST_BASE, otype, addr_reg); 1881 } else { 1882 tcg_out_qemu_st_direct(s, memop, data_reg, 1883 addr_reg, TCG_TYPE_I64, TCG_REG_XZR); 1884 } 1885#endif /* CONFIG_SOFTMMU */ 1886} 1887 1888static const tcg_insn_unit *tb_ret_addr; 1889 1890static void tcg_out_op(TCGContext *s, TCGOpcode opc, 1891 const TCGArg args[TCG_MAX_OP_ARGS], 1892 const int const_args[TCG_MAX_OP_ARGS]) 1893{ 1894 /* 99% of the time, we can signal the use of extension registers 1895 by looking to see if the opcode handles 64-bit data. */ 1896 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0; 1897 1898 /* Hoist the loads of the most common arguments. */ 1899 TCGArg a0 = args[0]; 1900 TCGArg a1 = args[1]; 1901 TCGArg a2 = args[2]; 1902 int c2 = const_args[2]; 1903 1904 /* Some operands are defined with "rZ" constraint, a register or 1905 the zero register. These need not actually test args[I] == 0. */ 1906#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) 1907 1908 switch (opc) { 1909 case INDEX_op_exit_tb: 1910 /* Reuse the zeroing that exists for goto_ptr. */ 1911 if (a0 == 0) { 1912 tcg_out_goto_long(s, tcg_code_gen_epilogue); 1913 } else { 1914 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); 1915 tcg_out_goto_long(s, tb_ret_addr); 1916 } 1917 break; 1918 1919 case INDEX_op_goto_tb: 1920 tcg_debug_assert(s->tb_jmp_insn_offset != NULL); 1921 /* 1922 * Ensure that ADRP+ADD are 8-byte aligned so that an atomic 1923 * write can be used to patch the target address. 1924 */ 1925 if ((uintptr_t)s->code_ptr & 7) { 1926 tcg_out32(s, NOP); 1927 } 1928 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); 1929 /* 1930 * actual branch destination will be patched by 1931 * tb_target_set_jmp_target later 1932 */ 1933 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0); 1934 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0); 1935 tcg_out_insn(s, 3207, BR, TCG_REG_TMP); 1936 set_jmp_reset_offset(s, a0); 1937 break; 1938 1939 case INDEX_op_goto_ptr: 1940 tcg_out_insn(s, 3207, BR, a0); 1941 break; 1942 1943 case INDEX_op_br: 1944 tcg_out_goto_label(s, arg_label(a0)); 1945 break; 1946 1947 case INDEX_op_ld8u_i32: 1948 case INDEX_op_ld8u_i64: 1949 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0); 1950 break; 1951 case INDEX_op_ld8s_i32: 1952 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0); 1953 break; 1954 case INDEX_op_ld8s_i64: 1955 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0); 1956 break; 1957 case INDEX_op_ld16u_i32: 1958 case INDEX_op_ld16u_i64: 1959 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1); 1960 break; 1961 case INDEX_op_ld16s_i32: 1962 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1); 1963 break; 1964 case INDEX_op_ld16s_i64: 1965 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1); 1966 break; 1967 case INDEX_op_ld_i32: 1968 case INDEX_op_ld32u_i64: 1969 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2); 1970 break; 1971 case INDEX_op_ld32s_i64: 1972 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2); 1973 break; 1974 case INDEX_op_ld_i64: 1975 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3); 1976 break; 1977 1978 case INDEX_op_st8_i32: 1979 case INDEX_op_st8_i64: 1980 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0); 1981 break; 1982 case INDEX_op_st16_i32: 1983 case INDEX_op_st16_i64: 1984 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1); 1985 break; 1986 case INDEX_op_st_i32: 1987 case INDEX_op_st32_i64: 1988 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2); 1989 break; 1990 case INDEX_op_st_i64: 1991 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3); 1992 break; 1993 1994 case INDEX_op_add_i32: 1995 a2 = (int32_t)a2; 1996 /* FALLTHRU */ 1997 case INDEX_op_add_i64: 1998 if (c2) { 1999 tcg_out_addsubi(s, ext, a0, a1, a2); 2000 } else { 2001 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); 2002 } 2003 break; 2004 2005 case INDEX_op_sub_i32: 2006 a2 = (int32_t)a2; 2007 /* FALLTHRU */ 2008 case INDEX_op_sub_i64: 2009 if (c2) { 2010 tcg_out_addsubi(s, ext, a0, a1, -a2); 2011 } else { 2012 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); 2013 } 2014 break; 2015 2016 case INDEX_op_neg_i64: 2017 case INDEX_op_neg_i32: 2018 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); 2019 break; 2020 2021 case INDEX_op_and_i32: 2022 a2 = (int32_t)a2; 2023 /* FALLTHRU */ 2024 case INDEX_op_and_i64: 2025 if (c2) { 2026 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); 2027 } else { 2028 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); 2029 } 2030 break; 2031 2032 case INDEX_op_andc_i32: 2033 a2 = (int32_t)a2; 2034 /* FALLTHRU */ 2035 case INDEX_op_andc_i64: 2036 if (c2) { 2037 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); 2038 } else { 2039 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); 2040 } 2041 break; 2042 2043 case INDEX_op_or_i32: 2044 a2 = (int32_t)a2; 2045 /* FALLTHRU */ 2046 case INDEX_op_or_i64: 2047 if (c2) { 2048 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); 2049 } else { 2050 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); 2051 } 2052 break; 2053 2054 case INDEX_op_orc_i32: 2055 a2 = (int32_t)a2; 2056 /* FALLTHRU */ 2057 case INDEX_op_orc_i64: 2058 if (c2) { 2059 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); 2060 } else { 2061 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); 2062 } 2063 break; 2064 2065 case INDEX_op_xor_i32: 2066 a2 = (int32_t)a2; 2067 /* FALLTHRU */ 2068 case INDEX_op_xor_i64: 2069 if (c2) { 2070 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); 2071 } else { 2072 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); 2073 } 2074 break; 2075 2076 case INDEX_op_eqv_i32: 2077 a2 = (int32_t)a2; 2078 /* FALLTHRU */ 2079 case INDEX_op_eqv_i64: 2080 if (c2) { 2081 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); 2082 } else { 2083 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); 2084 } 2085 break; 2086 2087 case INDEX_op_not_i64: 2088 case INDEX_op_not_i32: 2089 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); 2090 break; 2091 2092 case INDEX_op_mul_i64: 2093 case INDEX_op_mul_i32: 2094 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); 2095 break; 2096 2097 case INDEX_op_div_i64: 2098 case INDEX_op_div_i32: 2099 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); 2100 break; 2101 case INDEX_op_divu_i64: 2102 case INDEX_op_divu_i32: 2103 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); 2104 break; 2105 2106 case INDEX_op_rem_i64: 2107 case INDEX_op_rem_i32: 2108 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2); 2109 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2110 break; 2111 case INDEX_op_remu_i64: 2112 case INDEX_op_remu_i32: 2113 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2); 2114 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2115 break; 2116 2117 case INDEX_op_shl_i64: 2118 case INDEX_op_shl_i32: 2119 if (c2) { 2120 tcg_out_shl(s, ext, a0, a1, a2); 2121 } else { 2122 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); 2123 } 2124 break; 2125 2126 case INDEX_op_shr_i64: 2127 case INDEX_op_shr_i32: 2128 if (c2) { 2129 tcg_out_shr(s, ext, a0, a1, a2); 2130 } else { 2131 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); 2132 } 2133 break; 2134 2135 case INDEX_op_sar_i64: 2136 case INDEX_op_sar_i32: 2137 if (c2) { 2138 tcg_out_sar(s, ext, a0, a1, a2); 2139 } else { 2140 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); 2141 } 2142 break; 2143 2144 case INDEX_op_rotr_i64: 2145 case INDEX_op_rotr_i32: 2146 if (c2) { 2147 tcg_out_rotr(s, ext, a0, a1, a2); 2148 } else { 2149 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); 2150 } 2151 break; 2152 2153 case INDEX_op_rotl_i64: 2154 case INDEX_op_rotl_i32: 2155 if (c2) { 2156 tcg_out_rotl(s, ext, a0, a1, a2); 2157 } else { 2158 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2); 2159 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP); 2160 } 2161 break; 2162 2163 case INDEX_op_clz_i64: 2164 case INDEX_op_clz_i32: 2165 tcg_out_cltz(s, ext, a0, a1, a2, c2, false); 2166 break; 2167 case INDEX_op_ctz_i64: 2168 case INDEX_op_ctz_i32: 2169 tcg_out_cltz(s, ext, a0, a1, a2, c2, true); 2170 break; 2171 2172 case INDEX_op_brcond_i32: 2173 a1 = (int32_t)a1; 2174 /* FALLTHRU */ 2175 case INDEX_op_brcond_i64: 2176 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); 2177 break; 2178 2179 case INDEX_op_setcond_i32: 2180 a2 = (int32_t)a2; 2181 /* FALLTHRU */ 2182 case INDEX_op_setcond_i64: 2183 tcg_out_cmp(s, ext, a1, a2, c2); 2184 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ 2185 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, 2186 TCG_REG_XZR, tcg_invert_cond(args[3])); 2187 break; 2188 2189 case INDEX_op_movcond_i32: 2190 a2 = (int32_t)a2; 2191 /* FALLTHRU */ 2192 case INDEX_op_movcond_i64: 2193 tcg_out_cmp(s, ext, a1, a2, c2); 2194 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); 2195 break; 2196 2197 case INDEX_op_qemu_ld_i32: 2198 case INDEX_op_qemu_ld_i64: 2199 tcg_out_qemu_ld(s, a0, a1, a2, ext); 2200 break; 2201 case INDEX_op_qemu_st_i32: 2202 case INDEX_op_qemu_st_i64: 2203 tcg_out_qemu_st(s, REG0(0), a1, a2); 2204 break; 2205 2206 case INDEX_op_bswap64_i64: 2207 tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1); 2208 break; 2209 case INDEX_op_bswap32_i64: 2210 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2211 if (a2 & TCG_BSWAP_OS) { 2212 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a0); 2213 } 2214 break; 2215 case INDEX_op_bswap32_i32: 2216 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2217 break; 2218 case INDEX_op_bswap16_i64: 2219 case INDEX_op_bswap16_i32: 2220 tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1); 2221 if (a2 & TCG_BSWAP_OS) { 2222 /* Output must be sign-extended. */ 2223 tcg_out_sxt(s, ext, MO_16, a0, a0); 2224 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 2225 /* Output must be zero-extended, but input isn't. */ 2226 tcg_out_uxt(s, MO_16, a0, a0); 2227 } 2228 break; 2229 2230 case INDEX_op_ext8s_i64: 2231 case INDEX_op_ext8s_i32: 2232 tcg_out_sxt(s, ext, MO_8, a0, a1); 2233 break; 2234 case INDEX_op_ext16s_i64: 2235 case INDEX_op_ext16s_i32: 2236 tcg_out_sxt(s, ext, MO_16, a0, a1); 2237 break; 2238 case INDEX_op_ext_i32_i64: 2239 case INDEX_op_ext32s_i64: 2240 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1); 2241 break; 2242 case INDEX_op_ext8u_i64: 2243 case INDEX_op_ext8u_i32: 2244 tcg_out_uxt(s, MO_8, a0, a1); 2245 break; 2246 case INDEX_op_ext16u_i64: 2247 case INDEX_op_ext16u_i32: 2248 tcg_out_uxt(s, MO_16, a0, a1); 2249 break; 2250 case INDEX_op_extu_i32_i64: 2251 case INDEX_op_ext32u_i64: 2252 tcg_out_movr(s, TCG_TYPE_I32, a0, a1); 2253 break; 2254 2255 case INDEX_op_deposit_i64: 2256 case INDEX_op_deposit_i32: 2257 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]); 2258 break; 2259 2260 case INDEX_op_extract_i64: 2261 case INDEX_op_extract_i32: 2262 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2263 break; 2264 2265 case INDEX_op_sextract_i64: 2266 case INDEX_op_sextract_i32: 2267 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2268 break; 2269 2270 case INDEX_op_extract2_i64: 2271 case INDEX_op_extract2_i32: 2272 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]); 2273 break; 2274 2275 case INDEX_op_add2_i32: 2276 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2277 (int32_t)args[4], args[5], const_args[4], 2278 const_args[5], false); 2279 break; 2280 case INDEX_op_add2_i64: 2281 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2282 args[5], const_args[4], const_args[5], false); 2283 break; 2284 case INDEX_op_sub2_i32: 2285 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2286 (int32_t)args[4], args[5], const_args[4], 2287 const_args[5], true); 2288 break; 2289 case INDEX_op_sub2_i64: 2290 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2291 args[5], const_args[4], const_args[5], true); 2292 break; 2293 2294 case INDEX_op_muluh_i64: 2295 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); 2296 break; 2297 case INDEX_op_mulsh_i64: 2298 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); 2299 break; 2300 2301 case INDEX_op_mb: 2302 tcg_out_mb(s, a0); 2303 break; 2304 2305 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 2306 case INDEX_op_mov_i64: 2307 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 2308 default: 2309 g_assert_not_reached(); 2310 } 2311 2312#undef REG0 2313} 2314 2315static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 2316 unsigned vecl, unsigned vece, 2317 const TCGArg args[TCG_MAX_OP_ARGS], 2318 const int const_args[TCG_MAX_OP_ARGS]) 2319{ 2320 static const AArch64Insn cmp_vec_insn[16] = { 2321 [TCG_COND_EQ] = I3616_CMEQ, 2322 [TCG_COND_GT] = I3616_CMGT, 2323 [TCG_COND_GE] = I3616_CMGE, 2324 [TCG_COND_GTU] = I3616_CMHI, 2325 [TCG_COND_GEU] = I3616_CMHS, 2326 }; 2327 static const AArch64Insn cmp_scalar_insn[16] = { 2328 [TCG_COND_EQ] = I3611_CMEQ, 2329 [TCG_COND_GT] = I3611_CMGT, 2330 [TCG_COND_GE] = I3611_CMGE, 2331 [TCG_COND_GTU] = I3611_CMHI, 2332 [TCG_COND_GEU] = I3611_CMHS, 2333 }; 2334 static const AArch64Insn cmp0_vec_insn[16] = { 2335 [TCG_COND_EQ] = I3617_CMEQ0, 2336 [TCG_COND_GT] = I3617_CMGT0, 2337 [TCG_COND_GE] = I3617_CMGE0, 2338 [TCG_COND_LT] = I3617_CMLT0, 2339 [TCG_COND_LE] = I3617_CMLE0, 2340 }; 2341 static const AArch64Insn cmp0_scalar_insn[16] = { 2342 [TCG_COND_EQ] = I3612_CMEQ0, 2343 [TCG_COND_GT] = I3612_CMGT0, 2344 [TCG_COND_GE] = I3612_CMGE0, 2345 [TCG_COND_LT] = I3612_CMLT0, 2346 [TCG_COND_LE] = I3612_CMLE0, 2347 }; 2348 2349 TCGType type = vecl + TCG_TYPE_V64; 2350 unsigned is_q = vecl; 2351 bool is_scalar = !is_q && vece == MO_64; 2352 TCGArg a0, a1, a2, a3; 2353 int cmode, imm8; 2354 2355 a0 = args[0]; 2356 a1 = args[1]; 2357 a2 = args[2]; 2358 2359 switch (opc) { 2360 case INDEX_op_ld_vec: 2361 tcg_out_ld(s, type, a0, a1, a2); 2362 break; 2363 case INDEX_op_st_vec: 2364 tcg_out_st(s, type, a0, a1, a2); 2365 break; 2366 case INDEX_op_dupm_vec: 2367 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 2368 break; 2369 case INDEX_op_add_vec: 2370 if (is_scalar) { 2371 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2); 2372 } else { 2373 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2); 2374 } 2375 break; 2376 case INDEX_op_sub_vec: 2377 if (is_scalar) { 2378 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2); 2379 } else { 2380 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2); 2381 } 2382 break; 2383 case INDEX_op_mul_vec: 2384 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2); 2385 break; 2386 case INDEX_op_neg_vec: 2387 if (is_scalar) { 2388 tcg_out_insn(s, 3612, NEG, vece, a0, a1); 2389 } else { 2390 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1); 2391 } 2392 break; 2393 case INDEX_op_abs_vec: 2394 if (is_scalar) { 2395 tcg_out_insn(s, 3612, ABS, vece, a0, a1); 2396 } else { 2397 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1); 2398 } 2399 break; 2400 case INDEX_op_and_vec: 2401 if (const_args[2]) { 2402 is_shimm1632(~a2, &cmode, &imm8); 2403 if (a0 == a1) { 2404 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2405 return; 2406 } 2407 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2408 a2 = a0; 2409 } 2410 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2); 2411 break; 2412 case INDEX_op_or_vec: 2413 if (const_args[2]) { 2414 is_shimm1632(a2, &cmode, &imm8); 2415 if (a0 == a1) { 2416 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2417 return; 2418 } 2419 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2420 a2 = a0; 2421 } 2422 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2); 2423 break; 2424 case INDEX_op_andc_vec: 2425 if (const_args[2]) { 2426 is_shimm1632(a2, &cmode, &imm8); 2427 if (a0 == a1) { 2428 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2429 return; 2430 } 2431 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2432 a2 = a0; 2433 } 2434 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2); 2435 break; 2436 case INDEX_op_orc_vec: 2437 if (const_args[2]) { 2438 is_shimm1632(~a2, &cmode, &imm8); 2439 if (a0 == a1) { 2440 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2441 return; 2442 } 2443 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2444 a2 = a0; 2445 } 2446 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2); 2447 break; 2448 case INDEX_op_xor_vec: 2449 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2); 2450 break; 2451 case INDEX_op_ssadd_vec: 2452 if (is_scalar) { 2453 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2); 2454 } else { 2455 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2); 2456 } 2457 break; 2458 case INDEX_op_sssub_vec: 2459 if (is_scalar) { 2460 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2); 2461 } else { 2462 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2); 2463 } 2464 break; 2465 case INDEX_op_usadd_vec: 2466 if (is_scalar) { 2467 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2); 2468 } else { 2469 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2); 2470 } 2471 break; 2472 case INDEX_op_ussub_vec: 2473 if (is_scalar) { 2474 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2); 2475 } else { 2476 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2); 2477 } 2478 break; 2479 case INDEX_op_smax_vec: 2480 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2); 2481 break; 2482 case INDEX_op_smin_vec: 2483 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2); 2484 break; 2485 case INDEX_op_umax_vec: 2486 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2); 2487 break; 2488 case INDEX_op_umin_vec: 2489 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2); 2490 break; 2491 case INDEX_op_not_vec: 2492 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1); 2493 break; 2494 case INDEX_op_shli_vec: 2495 if (is_scalar) { 2496 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece)); 2497 } else { 2498 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece)); 2499 } 2500 break; 2501 case INDEX_op_shri_vec: 2502 if (is_scalar) { 2503 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2); 2504 } else { 2505 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2); 2506 } 2507 break; 2508 case INDEX_op_sari_vec: 2509 if (is_scalar) { 2510 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2); 2511 } else { 2512 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); 2513 } 2514 break; 2515 case INDEX_op_aa64_sli_vec: 2516 if (is_scalar) { 2517 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece)); 2518 } else { 2519 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece)); 2520 } 2521 break; 2522 case INDEX_op_shlv_vec: 2523 if (is_scalar) { 2524 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2); 2525 } else { 2526 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); 2527 } 2528 break; 2529 case INDEX_op_aa64_sshl_vec: 2530 if (is_scalar) { 2531 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2); 2532 } else { 2533 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2); 2534 } 2535 break; 2536 case INDEX_op_cmp_vec: 2537 { 2538 TCGCond cond = args[3]; 2539 AArch64Insn insn; 2540 2541 if (cond == TCG_COND_NE) { 2542 if (const_args[2]) { 2543 if (is_scalar) { 2544 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1); 2545 } else { 2546 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1); 2547 } 2548 } else { 2549 if (is_scalar) { 2550 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2); 2551 } else { 2552 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2); 2553 } 2554 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); 2555 } 2556 } else { 2557 if (const_args[2]) { 2558 if (is_scalar) { 2559 insn = cmp0_scalar_insn[cond]; 2560 if (insn) { 2561 tcg_out_insn_3612(s, insn, vece, a0, a1); 2562 break; 2563 } 2564 } else { 2565 insn = cmp0_vec_insn[cond]; 2566 if (insn) { 2567 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1); 2568 break; 2569 } 2570 } 2571 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0); 2572 a2 = TCG_VEC_TMP; 2573 } 2574 if (is_scalar) { 2575 insn = cmp_scalar_insn[cond]; 2576 if (insn == 0) { 2577 TCGArg t; 2578 t = a1, a1 = a2, a2 = t; 2579 cond = tcg_swap_cond(cond); 2580 insn = cmp_scalar_insn[cond]; 2581 tcg_debug_assert(insn != 0); 2582 } 2583 tcg_out_insn_3611(s, insn, vece, a0, a1, a2); 2584 } else { 2585 insn = cmp_vec_insn[cond]; 2586 if (insn == 0) { 2587 TCGArg t; 2588 t = a1, a1 = a2, a2 = t; 2589 cond = tcg_swap_cond(cond); 2590 insn = cmp_vec_insn[cond]; 2591 tcg_debug_assert(insn != 0); 2592 } 2593 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2); 2594 } 2595 } 2596 } 2597 break; 2598 2599 case INDEX_op_bitsel_vec: 2600 a3 = args[3]; 2601 if (a0 == a3) { 2602 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1); 2603 } else if (a0 == a2) { 2604 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1); 2605 } else { 2606 if (a0 != a1) { 2607 tcg_out_mov(s, type, a0, a1); 2608 } 2609 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3); 2610 } 2611 break; 2612 2613 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 2614 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 2615 default: 2616 g_assert_not_reached(); 2617 } 2618} 2619 2620int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 2621{ 2622 switch (opc) { 2623 case INDEX_op_add_vec: 2624 case INDEX_op_sub_vec: 2625 case INDEX_op_and_vec: 2626 case INDEX_op_or_vec: 2627 case INDEX_op_xor_vec: 2628 case INDEX_op_andc_vec: 2629 case INDEX_op_orc_vec: 2630 case INDEX_op_neg_vec: 2631 case INDEX_op_abs_vec: 2632 case INDEX_op_not_vec: 2633 case INDEX_op_cmp_vec: 2634 case INDEX_op_shli_vec: 2635 case INDEX_op_shri_vec: 2636 case INDEX_op_sari_vec: 2637 case INDEX_op_ssadd_vec: 2638 case INDEX_op_sssub_vec: 2639 case INDEX_op_usadd_vec: 2640 case INDEX_op_ussub_vec: 2641 case INDEX_op_shlv_vec: 2642 case INDEX_op_bitsel_vec: 2643 return 1; 2644 case INDEX_op_rotli_vec: 2645 case INDEX_op_shrv_vec: 2646 case INDEX_op_sarv_vec: 2647 case INDEX_op_rotlv_vec: 2648 case INDEX_op_rotrv_vec: 2649 return -1; 2650 case INDEX_op_mul_vec: 2651 case INDEX_op_smax_vec: 2652 case INDEX_op_smin_vec: 2653 case INDEX_op_umax_vec: 2654 case INDEX_op_umin_vec: 2655 return vece < MO_64; 2656 2657 default: 2658 return 0; 2659 } 2660} 2661 2662void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 2663 TCGArg a0, ...) 2664{ 2665 va_list va; 2666 TCGv_vec v0, v1, v2, t1, t2, c1; 2667 TCGArg a2; 2668 2669 va_start(va, a0); 2670 v0 = temp_tcgv_vec(arg_temp(a0)); 2671 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 2672 a2 = va_arg(va, TCGArg); 2673 va_end(va); 2674 2675 switch (opc) { 2676 case INDEX_op_rotli_vec: 2677 t1 = tcg_temp_new_vec(type); 2678 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1)); 2679 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece, 2680 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2); 2681 tcg_temp_free_vec(t1); 2682 break; 2683 2684 case INDEX_op_shrv_vec: 2685 case INDEX_op_sarv_vec: 2686 /* Right shifts are negative left shifts for AArch64. */ 2687 v2 = temp_tcgv_vec(arg_temp(a2)); 2688 t1 = tcg_temp_new_vec(type); 2689 tcg_gen_neg_vec(vece, t1, v2); 2690 opc = (opc == INDEX_op_shrv_vec 2691 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec); 2692 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0), 2693 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2694 tcg_temp_free_vec(t1); 2695 break; 2696 2697 case INDEX_op_rotlv_vec: 2698 v2 = temp_tcgv_vec(arg_temp(a2)); 2699 t1 = tcg_temp_new_vec(type); 2700 c1 = tcg_constant_vec(type, vece, 8 << vece); 2701 tcg_gen_sub_vec(vece, t1, v2, c1); 2702 /* Right shifts are negative left shifts for AArch64. */ 2703 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2704 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2705 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0), 2706 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 2707 tcg_gen_or_vec(vece, v0, v0, t1); 2708 tcg_temp_free_vec(t1); 2709 break; 2710 2711 case INDEX_op_rotrv_vec: 2712 v2 = temp_tcgv_vec(arg_temp(a2)); 2713 t1 = tcg_temp_new_vec(type); 2714 t2 = tcg_temp_new_vec(type); 2715 c1 = tcg_constant_vec(type, vece, 8 << vece); 2716 tcg_gen_neg_vec(vece, t1, v2); 2717 tcg_gen_sub_vec(vece, t2, c1, v2); 2718 /* Right shifts are negative left shifts for AArch64. */ 2719 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2720 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2721 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2), 2722 tcgv_vec_arg(v1), tcgv_vec_arg(t2)); 2723 tcg_gen_or_vec(vece, v0, t1, t2); 2724 tcg_temp_free_vec(t1); 2725 tcg_temp_free_vec(t2); 2726 break; 2727 2728 default: 2729 g_assert_not_reached(); 2730 } 2731} 2732 2733static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 2734{ 2735 switch (op) { 2736 case INDEX_op_goto_ptr: 2737 return C_O0_I1(r); 2738 2739 case INDEX_op_ld8u_i32: 2740 case INDEX_op_ld8s_i32: 2741 case INDEX_op_ld16u_i32: 2742 case INDEX_op_ld16s_i32: 2743 case INDEX_op_ld_i32: 2744 case INDEX_op_ld8u_i64: 2745 case INDEX_op_ld8s_i64: 2746 case INDEX_op_ld16u_i64: 2747 case INDEX_op_ld16s_i64: 2748 case INDEX_op_ld32u_i64: 2749 case INDEX_op_ld32s_i64: 2750 case INDEX_op_ld_i64: 2751 case INDEX_op_neg_i32: 2752 case INDEX_op_neg_i64: 2753 case INDEX_op_not_i32: 2754 case INDEX_op_not_i64: 2755 case INDEX_op_bswap16_i32: 2756 case INDEX_op_bswap32_i32: 2757 case INDEX_op_bswap16_i64: 2758 case INDEX_op_bswap32_i64: 2759 case INDEX_op_bswap64_i64: 2760 case INDEX_op_ext8s_i32: 2761 case INDEX_op_ext16s_i32: 2762 case INDEX_op_ext8u_i32: 2763 case INDEX_op_ext16u_i32: 2764 case INDEX_op_ext8s_i64: 2765 case INDEX_op_ext16s_i64: 2766 case INDEX_op_ext32s_i64: 2767 case INDEX_op_ext8u_i64: 2768 case INDEX_op_ext16u_i64: 2769 case INDEX_op_ext32u_i64: 2770 case INDEX_op_ext_i32_i64: 2771 case INDEX_op_extu_i32_i64: 2772 case INDEX_op_extract_i32: 2773 case INDEX_op_extract_i64: 2774 case INDEX_op_sextract_i32: 2775 case INDEX_op_sextract_i64: 2776 return C_O1_I1(r, r); 2777 2778 case INDEX_op_st8_i32: 2779 case INDEX_op_st16_i32: 2780 case INDEX_op_st_i32: 2781 case INDEX_op_st8_i64: 2782 case INDEX_op_st16_i64: 2783 case INDEX_op_st32_i64: 2784 case INDEX_op_st_i64: 2785 return C_O0_I2(rZ, r); 2786 2787 case INDEX_op_add_i32: 2788 case INDEX_op_add_i64: 2789 case INDEX_op_sub_i32: 2790 case INDEX_op_sub_i64: 2791 case INDEX_op_setcond_i32: 2792 case INDEX_op_setcond_i64: 2793 return C_O1_I2(r, r, rA); 2794 2795 case INDEX_op_mul_i32: 2796 case INDEX_op_mul_i64: 2797 case INDEX_op_div_i32: 2798 case INDEX_op_div_i64: 2799 case INDEX_op_divu_i32: 2800 case INDEX_op_divu_i64: 2801 case INDEX_op_rem_i32: 2802 case INDEX_op_rem_i64: 2803 case INDEX_op_remu_i32: 2804 case INDEX_op_remu_i64: 2805 case INDEX_op_muluh_i64: 2806 case INDEX_op_mulsh_i64: 2807 return C_O1_I2(r, r, r); 2808 2809 case INDEX_op_and_i32: 2810 case INDEX_op_and_i64: 2811 case INDEX_op_or_i32: 2812 case INDEX_op_or_i64: 2813 case INDEX_op_xor_i32: 2814 case INDEX_op_xor_i64: 2815 case INDEX_op_andc_i32: 2816 case INDEX_op_andc_i64: 2817 case INDEX_op_orc_i32: 2818 case INDEX_op_orc_i64: 2819 case INDEX_op_eqv_i32: 2820 case INDEX_op_eqv_i64: 2821 return C_O1_I2(r, r, rL); 2822 2823 case INDEX_op_shl_i32: 2824 case INDEX_op_shr_i32: 2825 case INDEX_op_sar_i32: 2826 case INDEX_op_rotl_i32: 2827 case INDEX_op_rotr_i32: 2828 case INDEX_op_shl_i64: 2829 case INDEX_op_shr_i64: 2830 case INDEX_op_sar_i64: 2831 case INDEX_op_rotl_i64: 2832 case INDEX_op_rotr_i64: 2833 return C_O1_I2(r, r, ri); 2834 2835 case INDEX_op_clz_i32: 2836 case INDEX_op_ctz_i32: 2837 case INDEX_op_clz_i64: 2838 case INDEX_op_ctz_i64: 2839 return C_O1_I2(r, r, rAL); 2840 2841 case INDEX_op_brcond_i32: 2842 case INDEX_op_brcond_i64: 2843 return C_O0_I2(r, rA); 2844 2845 case INDEX_op_movcond_i32: 2846 case INDEX_op_movcond_i64: 2847 return C_O1_I4(r, r, rA, rZ, rZ); 2848 2849 case INDEX_op_qemu_ld_i32: 2850 case INDEX_op_qemu_ld_i64: 2851 return C_O1_I1(r, l); 2852 case INDEX_op_qemu_st_i32: 2853 case INDEX_op_qemu_st_i64: 2854 return C_O0_I2(lZ, l); 2855 2856 case INDEX_op_deposit_i32: 2857 case INDEX_op_deposit_i64: 2858 return C_O1_I2(r, 0, rZ); 2859 2860 case INDEX_op_extract2_i32: 2861 case INDEX_op_extract2_i64: 2862 return C_O1_I2(r, rZ, rZ); 2863 2864 case INDEX_op_add2_i32: 2865 case INDEX_op_add2_i64: 2866 case INDEX_op_sub2_i32: 2867 case INDEX_op_sub2_i64: 2868 return C_O2_I4(r, r, rZ, rZ, rA, rMZ); 2869 2870 case INDEX_op_add_vec: 2871 case INDEX_op_sub_vec: 2872 case INDEX_op_mul_vec: 2873 case INDEX_op_xor_vec: 2874 case INDEX_op_ssadd_vec: 2875 case INDEX_op_sssub_vec: 2876 case INDEX_op_usadd_vec: 2877 case INDEX_op_ussub_vec: 2878 case INDEX_op_smax_vec: 2879 case INDEX_op_smin_vec: 2880 case INDEX_op_umax_vec: 2881 case INDEX_op_umin_vec: 2882 case INDEX_op_shlv_vec: 2883 case INDEX_op_shrv_vec: 2884 case INDEX_op_sarv_vec: 2885 case INDEX_op_aa64_sshl_vec: 2886 return C_O1_I2(w, w, w); 2887 case INDEX_op_not_vec: 2888 case INDEX_op_neg_vec: 2889 case INDEX_op_abs_vec: 2890 case INDEX_op_shli_vec: 2891 case INDEX_op_shri_vec: 2892 case INDEX_op_sari_vec: 2893 return C_O1_I1(w, w); 2894 case INDEX_op_ld_vec: 2895 case INDEX_op_dupm_vec: 2896 return C_O1_I1(w, r); 2897 case INDEX_op_st_vec: 2898 return C_O0_I2(w, r); 2899 case INDEX_op_dup_vec: 2900 return C_O1_I1(w, wr); 2901 case INDEX_op_or_vec: 2902 case INDEX_op_andc_vec: 2903 return C_O1_I2(w, w, wO); 2904 case INDEX_op_and_vec: 2905 case INDEX_op_orc_vec: 2906 return C_O1_I2(w, w, wN); 2907 case INDEX_op_cmp_vec: 2908 return C_O1_I2(w, w, wZ); 2909 case INDEX_op_bitsel_vec: 2910 return C_O1_I3(w, w, w, w); 2911 case INDEX_op_aa64_sli_vec: 2912 return C_O1_I2(w, 0, w); 2913 2914 default: 2915 g_assert_not_reached(); 2916 } 2917} 2918 2919static void tcg_target_init(TCGContext *s) 2920{ 2921 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu; 2922 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu; 2923 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 2924 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 2925 2926 tcg_target_call_clobber_regs = -1ull; 2927 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19); 2928 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20); 2929 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21); 2930 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22); 2931 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23); 2932 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24); 2933 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25); 2934 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26); 2935 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27); 2936 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28); 2937 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29); 2938 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 2939 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 2940 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 2941 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 2942 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 2943 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 2944 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 2945 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 2946 2947 s->reserved_regs = 0; 2948 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 2949 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); 2950 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); 2951 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ 2952 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP); 2953} 2954 2955/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ 2956#define PUSH_SIZE ((30 - 19 + 1) * 8) 2957 2958#define FRAME_SIZE \ 2959 ((PUSH_SIZE \ 2960 + TCG_STATIC_CALL_ARGS_SIZE \ 2961 + CPU_TEMP_BUF_NLONGS * sizeof(long) \ 2962 + TCG_TARGET_STACK_ALIGN - 1) \ 2963 & ~(TCG_TARGET_STACK_ALIGN - 1)) 2964 2965/* We're expecting a 2 byte uleb128 encoded value. */ 2966QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 2967 2968/* We're expecting to use a single ADDI insn. */ 2969QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); 2970 2971static void tcg_target_qemu_prologue(TCGContext *s) 2972{ 2973 TCGReg r; 2974 2975 /* Push (FP, LR) and allocate space for all saved registers. */ 2976 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, 2977 TCG_REG_SP, -PUSH_SIZE, 1, 1); 2978 2979 /* Set up frame pointer for canonical unwinding. */ 2980 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); 2981 2982 /* Store callee-preserved regs x19..x28. */ 2983 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 2984 int ofs = (r - TCG_REG_X19 + 2) * 8; 2985 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 2986 } 2987 2988 /* Make stack space for TCG locals. */ 2989 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 2990 FRAME_SIZE - PUSH_SIZE); 2991 2992 /* Inform TCG about how to find TCG locals with register, offset, size. */ 2993 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, 2994 CPU_TEMP_BUF_NLONGS * sizeof(long)); 2995 2996#if !defined(CONFIG_SOFTMMU) 2997 if (USE_GUEST_BASE) { 2998 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); 2999 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); 3000 } 3001#endif 3002 3003 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 3004 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); 3005 3006 /* 3007 * Return path for goto_ptr. Set return value to 0, a-la exit_tb, 3008 * and fall through to the rest of the epilogue. 3009 */ 3010 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 3011 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0); 3012 3013 /* TB epilogue */ 3014 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); 3015 3016 /* Remove TCG locals stack space. */ 3017 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 3018 FRAME_SIZE - PUSH_SIZE); 3019 3020 /* Restore registers x19..x28. */ 3021 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 3022 int ofs = (r - TCG_REG_X19 + 2) * 8; 3023 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 3024 } 3025 3026 /* Pop (FP, LR), restore SP to previous frame. */ 3027 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, 3028 TCG_REG_SP, PUSH_SIZE, 0, 1); 3029 tcg_out_insn(s, 3207, RET, TCG_REG_LR); 3030} 3031 3032static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 3033{ 3034 int i; 3035 for (i = 0; i < count; ++i) { 3036 p[i] = NOP; 3037 } 3038} 3039 3040typedef struct { 3041 DebugFrameHeader h; 3042 uint8_t fde_def_cfa[4]; 3043 uint8_t fde_reg_ofs[24]; 3044} DebugFrame; 3045 3046#define ELF_HOST_MACHINE EM_AARCH64 3047 3048static const DebugFrame debug_frame = { 3049 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 3050 .h.cie.id = -1, 3051 .h.cie.version = 1, 3052 .h.cie.code_align = 1, 3053 .h.cie.data_align = 0x78, /* sleb128 -8 */ 3054 .h.cie.return_column = TCG_REG_LR, 3055 3056 /* Total FDE size does not include the "len" member. */ 3057 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 3058 3059 .fde_def_cfa = { 3060 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 3061 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 3062 (FRAME_SIZE >> 7) 3063 }, 3064 .fde_reg_ofs = { 3065 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ 3066 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ 3067 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ 3068 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ 3069 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ 3070 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ 3071 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ 3072 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ 3073 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ 3074 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ 3075 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ 3076 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ 3077 } 3078}; 3079 3080void tcg_register_jit(const void *buf, size_t buf_size) 3081{ 3082 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 3083} 3084