1/* 2 * Initial TCG Implementation for aarch64 3 * 4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH 5 * Written by Claudio Fontana 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or 8 * (at your option) any later version. 9 * 10 * See the COPYING file in the top-level directory for details. 11 */ 12 13#include "../tcg-pool.c.inc" 14#include "qemu/bitops.h" 15 16/* We're going to re-use TCGType in setting of the SF bit, which controls 17 the size of the operation performed. If we know the values match, it 18 makes things much cleaner. */ 19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); 20 21#ifdef CONFIG_DEBUG_TCG 22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 23 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 24 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 25 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 26 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp", 27 28 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 29 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 30 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 31 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31", 32}; 33#endif /* CONFIG_DEBUG_TCG */ 34 35static const int tcg_target_reg_alloc_order[] = { 36 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, 37 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, 38 TCG_REG_X28, /* we will reserve this for guest_base if configured */ 39 40 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, 41 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, 42 TCG_REG_X16, TCG_REG_X17, 43 44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, 46 47 /* X18 reserved by system */ 48 /* X19 reserved for AREG0 */ 49 /* X29 reserved as fp */ 50 /* X30 reserved as temporary */ 51 52 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, 53 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, 54 /* V8 - V15 are call-saved, and skipped. */ 55 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, 56 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, 57 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, 58 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, 59}; 60 61static const int tcg_target_call_iarg_regs[8] = { 62 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 63 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 64}; 65static const int tcg_target_call_oarg_regs[1] = { 66 TCG_REG_X0 67}; 68 69#define TCG_REG_TMP TCG_REG_X30 70#define TCG_VEC_TMP TCG_REG_V31 71 72#ifndef CONFIG_SOFTMMU 73/* Note that XZR cannot be encoded in the address base register slot, 74 as that actaully encodes SP. So if we need to zero-extend the guest 75 address, via the address index register slot, we need to load even 76 a zero guest base into a register. */ 77#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32) 78#define TCG_REG_GUEST_BASE TCG_REG_X28 79#endif 80 81static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 82{ 83 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 84 ptrdiff_t offset = target - src_rx; 85 86 if (offset == sextract64(offset, 0, 26)) { 87 /* read instruction, mask away previous PC_REL26 parameter contents, 88 set the proper offset, then write back the instruction. */ 89 *src_rw = deposit32(*src_rw, 0, 26, offset); 90 return true; 91 } 92 return false; 93} 94 95static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 96{ 97 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 98 ptrdiff_t offset = target - src_rx; 99 100 if (offset == sextract64(offset, 0, 19)) { 101 *src_rw = deposit32(*src_rw, 5, 19, offset); 102 return true; 103 } 104 return false; 105} 106 107static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 108 intptr_t value, intptr_t addend) 109{ 110 tcg_debug_assert(addend == 0); 111 switch (type) { 112 case R_AARCH64_JUMP26: 113 case R_AARCH64_CALL26: 114 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value); 115 case R_AARCH64_CONDBR19: 116 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value); 117 default: 118 g_assert_not_reached(); 119 } 120} 121 122#define TCG_CT_CONST_AIMM 0x100 123#define TCG_CT_CONST_LIMM 0x200 124#define TCG_CT_CONST_ZERO 0x400 125#define TCG_CT_CONST_MONE 0x800 126#define TCG_CT_CONST_ORRI 0x1000 127#define TCG_CT_CONST_ANDI 0x2000 128 129#define ALL_GENERAL_REGS 0xffffffffu 130#define ALL_VECTOR_REGS 0xffffffff00000000ull 131 132#ifdef CONFIG_SOFTMMU 133#define ALL_QLDST_REGS \ 134 (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \ 135 (1 << TCG_REG_X2) | (1 << TCG_REG_X3))) 136#else 137#define ALL_QLDST_REGS ALL_GENERAL_REGS 138#endif 139 140/* Match a constant valid for addition (12-bit, optionally shifted). */ 141static inline bool is_aimm(uint64_t val) 142{ 143 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; 144} 145 146/* Match a constant valid for logical operations. */ 147static inline bool is_limm(uint64_t val) 148{ 149 /* Taking a simplified view of the logical immediates for now, ignoring 150 the replication that can happen across the field. Match bit patterns 151 of the forms 152 0....01....1 153 0..01..10..0 154 and their inverses. */ 155 156 /* Make things easier below, by testing the form with msb clear. */ 157 if ((int64_t)val < 0) { 158 val = ~val; 159 } 160 if (val == 0) { 161 return false; 162 } 163 val += val & -val; 164 return (val & (val - 1)) == 0; 165} 166 167/* Return true if v16 is a valid 16-bit shifted immediate. */ 168static bool is_shimm16(uint16_t v16, int *cmode, int *imm8) 169{ 170 if (v16 == (v16 & 0xff)) { 171 *cmode = 0x8; 172 *imm8 = v16 & 0xff; 173 return true; 174 } else if (v16 == (v16 & 0xff00)) { 175 *cmode = 0xa; 176 *imm8 = v16 >> 8; 177 return true; 178 } 179 return false; 180} 181 182/* Return true if v32 is a valid 32-bit shifted immediate. */ 183static bool is_shimm32(uint32_t v32, int *cmode, int *imm8) 184{ 185 if (v32 == (v32 & 0xff)) { 186 *cmode = 0x0; 187 *imm8 = v32 & 0xff; 188 return true; 189 } else if (v32 == (v32 & 0xff00)) { 190 *cmode = 0x2; 191 *imm8 = (v32 >> 8) & 0xff; 192 return true; 193 } else if (v32 == (v32 & 0xff0000)) { 194 *cmode = 0x4; 195 *imm8 = (v32 >> 16) & 0xff; 196 return true; 197 } else if (v32 == (v32 & 0xff000000)) { 198 *cmode = 0x6; 199 *imm8 = v32 >> 24; 200 return true; 201 } 202 return false; 203} 204 205/* Return true if v32 is a valid 32-bit shifting ones immediate. */ 206static bool is_soimm32(uint32_t v32, int *cmode, int *imm8) 207{ 208 if ((v32 & 0xffff00ff) == 0xff) { 209 *cmode = 0xc; 210 *imm8 = (v32 >> 8) & 0xff; 211 return true; 212 } else if ((v32 & 0xff00ffff) == 0xffff) { 213 *cmode = 0xd; 214 *imm8 = (v32 >> 16) & 0xff; 215 return true; 216 } 217 return false; 218} 219 220/* Return true if v32 is a valid float32 immediate. */ 221static bool is_fimm32(uint32_t v32, int *cmode, int *imm8) 222{ 223 if (extract32(v32, 0, 19) == 0 224 && (extract32(v32, 25, 6) == 0x20 225 || extract32(v32, 25, 6) == 0x1f)) { 226 *cmode = 0xf; 227 *imm8 = (extract32(v32, 31, 1) << 7) 228 | (extract32(v32, 25, 1) << 6) 229 | extract32(v32, 19, 6); 230 return true; 231 } 232 return false; 233} 234 235/* Return true if v64 is a valid float64 immediate. */ 236static bool is_fimm64(uint64_t v64, int *cmode, int *imm8) 237{ 238 if (extract64(v64, 0, 48) == 0 239 && (extract64(v64, 54, 9) == 0x100 240 || extract64(v64, 54, 9) == 0x0ff)) { 241 *cmode = 0xf; 242 *imm8 = (extract64(v64, 63, 1) << 7) 243 | (extract64(v64, 54, 1) << 6) 244 | extract64(v64, 48, 6); 245 return true; 246 } 247 return false; 248} 249 250/* 251 * Return non-zero if v32 can be formed by MOVI+ORR. 252 * Place the parameters for MOVI in (cmode, imm8). 253 * Return the cmode for ORR; the imm8 can be had via extraction from v32. 254 */ 255static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8) 256{ 257 int i; 258 259 for (i = 6; i > 0; i -= 2) { 260 /* Mask out one byte we can add with ORR. */ 261 uint32_t tmp = v32 & ~(0xffu << (i * 4)); 262 if (is_shimm32(tmp, cmode, imm8) || 263 is_soimm32(tmp, cmode, imm8)) { 264 break; 265 } 266 } 267 return i; 268} 269 270/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */ 271static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8) 272{ 273 if (v32 == deposit32(v32, 16, 16, v32)) { 274 return is_shimm16(v32, cmode, imm8); 275 } else { 276 return is_shimm32(v32, cmode, imm8); 277 } 278} 279 280static bool tcg_target_const_match(int64_t val, TCGType type, int ct) 281{ 282 if (ct & TCG_CT_CONST) { 283 return 1; 284 } 285 if (type == TCG_TYPE_I32) { 286 val = (int32_t)val; 287 } 288 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { 289 return 1; 290 } 291 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { 292 return 1; 293 } 294 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 295 return 1; 296 } 297 if ((ct & TCG_CT_CONST_MONE) && val == -1) { 298 return 1; 299 } 300 301 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) { 302 case 0: 303 break; 304 case TCG_CT_CONST_ANDI: 305 val = ~val; 306 /* fallthru */ 307 case TCG_CT_CONST_ORRI: 308 if (val == deposit64(val, 32, 32, val)) { 309 int cmode, imm8; 310 return is_shimm1632(val, &cmode, &imm8); 311 } 312 break; 313 default: 314 /* Both bits should not be set for the same insn. */ 315 g_assert_not_reached(); 316 } 317 318 return 0; 319} 320 321enum aarch64_cond_code { 322 COND_EQ = 0x0, 323 COND_NE = 0x1, 324 COND_CS = 0x2, /* Unsigned greater or equal */ 325 COND_HS = COND_CS, /* ALIAS greater or equal */ 326 COND_CC = 0x3, /* Unsigned less than */ 327 COND_LO = COND_CC, /* ALIAS Lower */ 328 COND_MI = 0x4, /* Negative */ 329 COND_PL = 0x5, /* Zero or greater */ 330 COND_VS = 0x6, /* Overflow */ 331 COND_VC = 0x7, /* No overflow */ 332 COND_HI = 0x8, /* Unsigned greater than */ 333 COND_LS = 0x9, /* Unsigned less or equal */ 334 COND_GE = 0xa, 335 COND_LT = 0xb, 336 COND_GT = 0xc, 337 COND_LE = 0xd, 338 COND_AL = 0xe, 339 COND_NV = 0xf, /* behaves like COND_AL here */ 340}; 341 342static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { 343 [TCG_COND_EQ] = COND_EQ, 344 [TCG_COND_NE] = COND_NE, 345 [TCG_COND_LT] = COND_LT, 346 [TCG_COND_GE] = COND_GE, 347 [TCG_COND_LE] = COND_LE, 348 [TCG_COND_GT] = COND_GT, 349 /* unsigned */ 350 [TCG_COND_LTU] = COND_LO, 351 [TCG_COND_GTU] = COND_HI, 352 [TCG_COND_GEU] = COND_HS, 353 [TCG_COND_LEU] = COND_LS, 354}; 355 356typedef enum { 357 LDST_ST = 0, /* store */ 358 LDST_LD = 1, /* load */ 359 LDST_LD_S_X = 2, /* load and sign-extend into Xt */ 360 LDST_LD_S_W = 3, /* load and sign-extend into Wt */ 361} AArch64LdstType; 362 363/* We encode the format of the insn into the beginning of the name, so that 364 we can have the preprocessor help "typecheck" the insn vs the output 365 function. Arm didn't provide us with nice names for the formats, so we 366 use the section number of the architecture reference manual in which the 367 instruction group is described. */ 368typedef enum { 369 /* Compare and branch (immediate). */ 370 I3201_CBZ = 0x34000000, 371 I3201_CBNZ = 0x35000000, 372 373 /* Conditional branch (immediate). */ 374 I3202_B_C = 0x54000000, 375 376 /* Unconditional branch (immediate). */ 377 I3206_B = 0x14000000, 378 I3206_BL = 0x94000000, 379 380 /* Unconditional branch (register). */ 381 I3207_BR = 0xd61f0000, 382 I3207_BLR = 0xd63f0000, 383 I3207_RET = 0xd65f0000, 384 385 /* AdvSIMD load/store single structure. */ 386 I3303_LD1R = 0x0d40c000, 387 388 /* Load literal for loading the address at pc-relative offset */ 389 I3305_LDR = 0x58000000, 390 I3305_LDR_v64 = 0x5c000000, 391 I3305_LDR_v128 = 0x9c000000, 392 393 /* Load/store register. Described here as 3.3.12, but the helper 394 that emits them can transform to 3.3.10 or 3.3.13. */ 395 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, 396 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, 397 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, 398 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, 399 400 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, 401 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, 402 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, 403 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, 404 405 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, 406 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, 407 408 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, 409 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, 410 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, 411 412 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30, 413 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30, 414 415 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30, 416 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30, 417 418 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30, 419 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30, 420 421 I3312_TO_I3310 = 0x00200800, 422 I3312_TO_I3313 = 0x01000000, 423 424 /* Load/store register pair instructions. */ 425 I3314_LDP = 0x28400000, 426 I3314_STP = 0x28000000, 427 428 /* Add/subtract immediate instructions. */ 429 I3401_ADDI = 0x11000000, 430 I3401_ADDSI = 0x31000000, 431 I3401_SUBI = 0x51000000, 432 I3401_SUBSI = 0x71000000, 433 434 /* Bitfield instructions. */ 435 I3402_BFM = 0x33000000, 436 I3402_SBFM = 0x13000000, 437 I3402_UBFM = 0x53000000, 438 439 /* Extract instruction. */ 440 I3403_EXTR = 0x13800000, 441 442 /* Logical immediate instructions. */ 443 I3404_ANDI = 0x12000000, 444 I3404_ORRI = 0x32000000, 445 I3404_EORI = 0x52000000, 446 447 /* Move wide immediate instructions. */ 448 I3405_MOVN = 0x12800000, 449 I3405_MOVZ = 0x52800000, 450 I3405_MOVK = 0x72800000, 451 452 /* PC relative addressing instructions. */ 453 I3406_ADR = 0x10000000, 454 I3406_ADRP = 0x90000000, 455 456 /* Add/subtract shifted register instructions (without a shift). */ 457 I3502_ADD = 0x0b000000, 458 I3502_ADDS = 0x2b000000, 459 I3502_SUB = 0x4b000000, 460 I3502_SUBS = 0x6b000000, 461 462 /* Add/subtract shifted register instructions (with a shift). */ 463 I3502S_ADD_LSL = I3502_ADD, 464 465 /* Add/subtract with carry instructions. */ 466 I3503_ADC = 0x1a000000, 467 I3503_SBC = 0x5a000000, 468 469 /* Conditional select instructions. */ 470 I3506_CSEL = 0x1a800000, 471 I3506_CSINC = 0x1a800400, 472 I3506_CSINV = 0x5a800000, 473 I3506_CSNEG = 0x5a800400, 474 475 /* Data-processing (1 source) instructions. */ 476 I3507_CLZ = 0x5ac01000, 477 I3507_RBIT = 0x5ac00000, 478 I3507_REV = 0x5ac00000, /* + size << 10 */ 479 480 /* Data-processing (2 source) instructions. */ 481 I3508_LSLV = 0x1ac02000, 482 I3508_LSRV = 0x1ac02400, 483 I3508_ASRV = 0x1ac02800, 484 I3508_RORV = 0x1ac02c00, 485 I3508_SMULH = 0x9b407c00, 486 I3508_UMULH = 0x9bc07c00, 487 I3508_UDIV = 0x1ac00800, 488 I3508_SDIV = 0x1ac00c00, 489 490 /* Data-processing (3 source) instructions. */ 491 I3509_MADD = 0x1b000000, 492 I3509_MSUB = 0x1b008000, 493 494 /* Logical shifted register instructions (without a shift). */ 495 I3510_AND = 0x0a000000, 496 I3510_BIC = 0x0a200000, 497 I3510_ORR = 0x2a000000, 498 I3510_ORN = 0x2a200000, 499 I3510_EOR = 0x4a000000, 500 I3510_EON = 0x4a200000, 501 I3510_ANDS = 0x6a000000, 502 503 /* Logical shifted register instructions (with a shift). */ 504 I3502S_AND_LSR = I3510_AND | (1 << 22), 505 506 /* AdvSIMD copy */ 507 I3605_DUP = 0x0e000400, 508 I3605_INS = 0x4e001c00, 509 I3605_UMOV = 0x0e003c00, 510 511 /* AdvSIMD modified immediate */ 512 I3606_MOVI = 0x0f000400, 513 I3606_MVNI = 0x2f000400, 514 I3606_BIC = 0x2f001400, 515 I3606_ORR = 0x0f001400, 516 517 /* AdvSIMD scalar shift by immediate */ 518 I3609_SSHR = 0x5f000400, 519 I3609_SSRA = 0x5f001400, 520 I3609_SHL = 0x5f005400, 521 I3609_USHR = 0x7f000400, 522 I3609_USRA = 0x7f001400, 523 I3609_SLI = 0x7f005400, 524 525 /* AdvSIMD scalar three same */ 526 I3611_SQADD = 0x5e200c00, 527 I3611_SQSUB = 0x5e202c00, 528 I3611_CMGT = 0x5e203400, 529 I3611_CMGE = 0x5e203c00, 530 I3611_SSHL = 0x5e204400, 531 I3611_ADD = 0x5e208400, 532 I3611_CMTST = 0x5e208c00, 533 I3611_UQADD = 0x7e200c00, 534 I3611_UQSUB = 0x7e202c00, 535 I3611_CMHI = 0x7e203400, 536 I3611_CMHS = 0x7e203c00, 537 I3611_USHL = 0x7e204400, 538 I3611_SUB = 0x7e208400, 539 I3611_CMEQ = 0x7e208c00, 540 541 /* AdvSIMD scalar two-reg misc */ 542 I3612_CMGT0 = 0x5e208800, 543 I3612_CMEQ0 = 0x5e209800, 544 I3612_CMLT0 = 0x5e20a800, 545 I3612_ABS = 0x5e20b800, 546 I3612_CMGE0 = 0x7e208800, 547 I3612_CMLE0 = 0x7e209800, 548 I3612_NEG = 0x7e20b800, 549 550 /* AdvSIMD shift by immediate */ 551 I3614_SSHR = 0x0f000400, 552 I3614_SSRA = 0x0f001400, 553 I3614_SHL = 0x0f005400, 554 I3614_SLI = 0x2f005400, 555 I3614_USHR = 0x2f000400, 556 I3614_USRA = 0x2f001400, 557 558 /* AdvSIMD three same. */ 559 I3616_ADD = 0x0e208400, 560 I3616_AND = 0x0e201c00, 561 I3616_BIC = 0x0e601c00, 562 I3616_BIF = 0x2ee01c00, 563 I3616_BIT = 0x2ea01c00, 564 I3616_BSL = 0x2e601c00, 565 I3616_EOR = 0x2e201c00, 566 I3616_MUL = 0x0e209c00, 567 I3616_ORR = 0x0ea01c00, 568 I3616_ORN = 0x0ee01c00, 569 I3616_SUB = 0x2e208400, 570 I3616_CMGT = 0x0e203400, 571 I3616_CMGE = 0x0e203c00, 572 I3616_CMTST = 0x0e208c00, 573 I3616_CMHI = 0x2e203400, 574 I3616_CMHS = 0x2e203c00, 575 I3616_CMEQ = 0x2e208c00, 576 I3616_SMAX = 0x0e206400, 577 I3616_SMIN = 0x0e206c00, 578 I3616_SSHL = 0x0e204400, 579 I3616_SQADD = 0x0e200c00, 580 I3616_SQSUB = 0x0e202c00, 581 I3616_UMAX = 0x2e206400, 582 I3616_UMIN = 0x2e206c00, 583 I3616_UQADD = 0x2e200c00, 584 I3616_UQSUB = 0x2e202c00, 585 I3616_USHL = 0x2e204400, 586 587 /* AdvSIMD two-reg misc. */ 588 I3617_CMGT0 = 0x0e208800, 589 I3617_CMEQ0 = 0x0e209800, 590 I3617_CMLT0 = 0x0e20a800, 591 I3617_CMGE0 = 0x2e208800, 592 I3617_CMLE0 = 0x2e209800, 593 I3617_NOT = 0x2e205800, 594 I3617_ABS = 0x0e20b800, 595 I3617_NEG = 0x2e20b800, 596 597 /* System instructions. */ 598 NOP = 0xd503201f, 599 DMB_ISH = 0xd50338bf, 600 DMB_LD = 0x00000100, 601 DMB_ST = 0x00000200, 602} AArch64Insn; 603 604static inline uint32_t tcg_in32(TCGContext *s) 605{ 606 uint32_t v = *(uint32_t *)s->code_ptr; 607 return v; 608} 609 610/* Emit an opcode with "type-checking" of the format. */ 611#define tcg_out_insn(S, FMT, OP, ...) \ 612 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) 613 614static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q, 615 TCGReg rt, TCGReg rn, unsigned size) 616{ 617 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30)); 618} 619 620static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, 621 int imm19, TCGReg rt) 622{ 623 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt); 624} 625 626static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, 627 TCGReg rt, int imm19) 628{ 629 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); 630} 631 632static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, 633 TCGCond c, int imm19) 634{ 635 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); 636} 637 638static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) 639{ 640 tcg_out32(s, insn | (imm26 & 0x03ffffff)); 641} 642 643static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) 644{ 645 tcg_out32(s, insn | rn << 5); 646} 647 648static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, 649 TCGReg r1, TCGReg r2, TCGReg rn, 650 tcg_target_long ofs, bool pre, bool w) 651{ 652 insn |= 1u << 31; /* ext */ 653 insn |= pre << 24; 654 insn |= w << 23; 655 656 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); 657 insn |= (ofs & (0x7f << 3)) << (15 - 3); 658 659 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); 660} 661 662static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, 663 TCGReg rd, TCGReg rn, uint64_t aimm) 664{ 665 if (aimm > 0xfff) { 666 tcg_debug_assert((aimm & 0xfff) == 0); 667 aimm >>= 12; 668 tcg_debug_assert(aimm <= 0xfff); 669 aimm |= 1 << 12; /* apply LSL 12 */ 670 } 671 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); 672} 673 674/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 675 (Logical immediate). Both insn groups have N, IMMR and IMMS fields 676 that feed the DecodeBitMasks pseudo function. */ 677static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, 678 TCGReg rd, TCGReg rn, int n, int immr, int imms) 679{ 680 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 681 | rn << 5 | rd); 682} 683 684#define tcg_out_insn_3404 tcg_out_insn_3402 685 686static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, 687 TCGReg rd, TCGReg rn, TCGReg rm, int imms) 688{ 689 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 690 | rn << 5 | rd); 691} 692 693/* This function is used for the Move (wide immediate) instruction group. 694 Note that SHIFT is a full shift count, not the 2 bit HW field. */ 695static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, 696 TCGReg rd, uint16_t half, unsigned shift) 697{ 698 tcg_debug_assert((shift & ~0x30) == 0); 699 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); 700} 701 702static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, 703 TCGReg rd, int64_t disp) 704{ 705 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); 706} 707 708/* This function is for both 3.5.2 (Add/Subtract shifted register), for 709 the rare occasion when we actually want to supply a shift amount. */ 710static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, 711 TCGType ext, TCGReg rd, TCGReg rn, 712 TCGReg rm, int imm6) 713{ 714 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); 715} 716 717/* This function is for 3.5.2 (Add/subtract shifted register), 718 and 3.5.10 (Logical shifted register), for the vast majorty of cases 719 when we don't want to apply a shift. Thus it can also be used for 720 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */ 721static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, 722 TCGReg rd, TCGReg rn, TCGReg rm) 723{ 724 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); 725} 726 727#define tcg_out_insn_3503 tcg_out_insn_3502 728#define tcg_out_insn_3508 tcg_out_insn_3502 729#define tcg_out_insn_3510 tcg_out_insn_3502 730 731static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, 732 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) 733{ 734 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd 735 | tcg_cond_to_aarch64[c] << 12); 736} 737 738static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, 739 TCGReg rd, TCGReg rn) 740{ 741 tcg_out32(s, insn | ext << 31 | rn << 5 | rd); 742} 743 744static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, 745 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) 746{ 747 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); 748} 749 750static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q, 751 TCGReg rd, TCGReg rn, int dst_idx, int src_idx) 752{ 753 /* Note that bit 11 set means general register input. Therefore 754 we can handle both register sets with one function. */ 755 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11) 756 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5); 757} 758 759static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q, 760 TCGReg rd, bool op, int cmode, uint8_t imm8) 761{ 762 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f) 763 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5); 764} 765 766static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn, 767 TCGReg rd, TCGReg rn, unsigned immhb) 768{ 769 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f)); 770} 771 772static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn, 773 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 774{ 775 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16 776 | (rn & 0x1f) << 5 | (rd & 0x1f)); 777} 778 779static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn, 780 unsigned size, TCGReg rd, TCGReg rn) 781{ 782 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f)); 783} 784 785static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q, 786 TCGReg rd, TCGReg rn, unsigned immhb) 787{ 788 tcg_out32(s, insn | q << 30 | immhb << 16 789 | (rn & 0x1f) << 5 | (rd & 0x1f)); 790} 791 792static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q, 793 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 794{ 795 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16 796 | (rn & 0x1f) << 5 | (rd & 0x1f)); 797} 798 799static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q, 800 unsigned size, TCGReg rd, TCGReg rn) 801{ 802 tcg_out32(s, insn | q << 30 | (size << 22) 803 | (rn & 0x1f) << 5 | (rd & 0x1f)); 804} 805 806static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, 807 TCGReg rd, TCGReg base, TCGType ext, 808 TCGReg regoff) 809{ 810 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 811 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | 812 0x4000 | ext << 13 | base << 5 | (rd & 0x1f)); 813} 814 815static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, 816 TCGReg rd, TCGReg rn, intptr_t offset) 817{ 818 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f)); 819} 820 821static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, 822 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) 823{ 824 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 825 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 826 | rn << 5 | (rd & 0x1f)); 827} 828 829/* Register to register move using ORR (shifted register with no shift). */ 830static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) 831{ 832 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); 833} 834 835/* Register to register move using ADDI (move to/from SP). */ 836static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) 837{ 838 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); 839} 840 841/* This function is used for the Logical (immediate) instruction group. 842 The value of LIMM must satisfy IS_LIMM. See the comment above about 843 only supporting simplified logical immediates. */ 844static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, 845 TCGReg rd, TCGReg rn, uint64_t limm) 846{ 847 unsigned h, l, r, c; 848 849 tcg_debug_assert(is_limm(limm)); 850 851 h = clz64(limm); 852 l = ctz64(limm); 853 if (l == 0) { 854 r = 0; /* form 0....01....1 */ 855 c = ctz64(~limm) - 1; 856 if (h == 0) { 857 r = clz64(~limm); /* form 1..10..01..1 */ 858 c += r; 859 } 860 } else { 861 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ 862 c = r - h - 1; 863 } 864 if (ext == TCG_TYPE_I32) { 865 r &= 31; 866 c &= 31; 867 } 868 869 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); 870} 871 872static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 873 TCGReg rd, int64_t v64) 874{ 875 bool q = type == TCG_TYPE_V128; 876 int cmode, imm8, i; 877 878 /* Test all bytes equal first. */ 879 if (vece == MO_8) { 880 imm8 = (uint8_t)v64; 881 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8); 882 return; 883 } 884 885 /* 886 * Test all bytes 0x00 or 0xff second. This can match cases that 887 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below. 888 */ 889 for (i = imm8 = 0; i < 8; i++) { 890 uint8_t byte = v64 >> (i * 8); 891 if (byte == 0xff) { 892 imm8 |= 1 << i; 893 } else if (byte != 0) { 894 goto fail_bytes; 895 } 896 } 897 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8); 898 return; 899 fail_bytes: 900 901 /* 902 * Tests for various replications. For each element width, if we 903 * cannot find an expansion there's no point checking a larger 904 * width because we already know by replication it cannot match. 905 */ 906 if (vece == MO_16) { 907 uint16_t v16 = v64; 908 909 if (is_shimm16(v16, &cmode, &imm8)) { 910 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 911 return; 912 } 913 if (is_shimm16(~v16, &cmode, &imm8)) { 914 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 915 return; 916 } 917 918 /* 919 * Otherwise, all remaining constants can be loaded in two insns: 920 * rd = v16 & 0xff, rd |= v16 & 0xff00. 921 */ 922 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff); 923 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8); 924 return; 925 } else if (vece == MO_32) { 926 uint32_t v32 = v64; 927 uint32_t n32 = ~v32; 928 929 if (is_shimm32(v32, &cmode, &imm8) || 930 is_soimm32(v32, &cmode, &imm8) || 931 is_fimm32(v32, &cmode, &imm8)) { 932 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 933 return; 934 } 935 if (is_shimm32(n32, &cmode, &imm8) || 936 is_soimm32(n32, &cmode, &imm8)) { 937 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 938 return; 939 } 940 941 /* 942 * Restrict the set of constants to those we can load with 943 * two instructions. Others we load from the pool. 944 */ 945 i = is_shimm32_pair(v32, &cmode, &imm8); 946 if (i) { 947 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 948 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8)); 949 return; 950 } 951 i = is_shimm32_pair(n32, &cmode, &imm8); 952 if (i) { 953 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 954 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8)); 955 return; 956 } 957 } else if (is_fimm64(v64, &cmode, &imm8)) { 958 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8); 959 return; 960 } 961 962 /* 963 * As a last resort, load from the constant pool. Sadly there 964 * is no LD1R (literal), so store the full 16-byte vector. 965 */ 966 if (type == TCG_TYPE_V128) { 967 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64); 968 tcg_out_insn(s, 3305, LDR_v128, 0, rd); 969 } else { 970 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0); 971 tcg_out_insn(s, 3305, LDR_v64, 0, rd); 972 } 973} 974 975static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 976 TCGReg rd, TCGReg rs) 977{ 978 int is_q = type - TCG_TYPE_V64; 979 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0); 980 return true; 981} 982 983static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 984 TCGReg r, TCGReg base, intptr_t offset) 985{ 986 TCGReg temp = TCG_REG_TMP; 987 988 if (offset < -0xffffff || offset > 0xffffff) { 989 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset); 990 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base); 991 base = temp; 992 } else { 993 AArch64Insn add_insn = I3401_ADDI; 994 995 if (offset < 0) { 996 add_insn = I3401_SUBI; 997 offset = -offset; 998 } 999 if (offset & 0xfff000) { 1000 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000); 1001 base = temp; 1002 } 1003 if (offset & 0xfff) { 1004 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff); 1005 base = temp; 1006 } 1007 } 1008 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece); 1009 return true; 1010} 1011 1012static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 1013 tcg_target_long value) 1014{ 1015 tcg_target_long svalue = value; 1016 tcg_target_long ivalue = ~value; 1017 tcg_target_long t0, t1, t2; 1018 int s0, s1; 1019 AArch64Insn opc; 1020 1021 switch (type) { 1022 case TCG_TYPE_I32: 1023 case TCG_TYPE_I64: 1024 tcg_debug_assert(rd < 32); 1025 break; 1026 default: 1027 g_assert_not_reached(); 1028 } 1029 1030 /* For 32-bit values, discard potential garbage in value. For 64-bit 1031 values within [2**31, 2**32-1], we can create smaller sequences by 1032 interpreting this as a negative 32-bit number, while ensuring that 1033 the high 32 bits are cleared by setting SF=0. */ 1034 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { 1035 svalue = (int32_t)value; 1036 value = (uint32_t)value; 1037 ivalue = (uint32_t)ivalue; 1038 type = TCG_TYPE_I32; 1039 } 1040 1041 /* Speed things up by handling the common case of small positive 1042 and negative values specially. */ 1043 if ((value & ~0xffffull) == 0) { 1044 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); 1045 return; 1046 } else if ((ivalue & ~0xffffull) == 0) { 1047 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); 1048 return; 1049 } 1050 1051 /* Check for bitfield immediates. For the benefit of 32-bit quantities, 1052 use the sign-extended value. That lets us match rotated values such 1053 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ 1054 if (is_limm(svalue)) { 1055 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); 1056 return; 1057 } 1058 1059 /* Look for host pointer values within 4G of the PC. This happens 1060 often when loading pointers to QEMU's own data structures. */ 1061 if (type == TCG_TYPE_I64) { 1062 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr); 1063 tcg_target_long disp = value - src_rx; 1064 if (disp == sextract64(disp, 0, 21)) { 1065 tcg_out_insn(s, 3406, ADR, rd, disp); 1066 return; 1067 } 1068 disp = (value >> 12) - (src_rx >> 12); 1069 if (disp == sextract64(disp, 0, 21)) { 1070 tcg_out_insn(s, 3406, ADRP, rd, disp); 1071 if (value & 0xfff) { 1072 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); 1073 } 1074 return; 1075 } 1076 } 1077 1078 /* Would it take fewer insns to begin with MOVN? */ 1079 if (ctpop64(value) >= 32) { 1080 t0 = ivalue; 1081 opc = I3405_MOVN; 1082 } else { 1083 t0 = value; 1084 opc = I3405_MOVZ; 1085 } 1086 s0 = ctz64(t0) & (63 & -16); 1087 t1 = t0 & ~(0xffffUL << s0); 1088 s1 = ctz64(t1) & (63 & -16); 1089 t2 = t1 & ~(0xffffUL << s1); 1090 if (t2 == 0) { 1091 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0); 1092 if (t1 != 0) { 1093 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1); 1094 } 1095 return; 1096 } 1097 1098 /* For more than 2 insns, dump it into the constant pool. */ 1099 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0); 1100 tcg_out_insn(s, 3305, LDR, 0, rd); 1101} 1102 1103/* Define something more legible for general use. */ 1104#define tcg_out_ldst_r tcg_out_insn_3310 1105 1106static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd, 1107 TCGReg rn, intptr_t offset, int lgsize) 1108{ 1109 /* If the offset is naturally aligned and in range, then we can 1110 use the scaled uimm12 encoding */ 1111 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) { 1112 uintptr_t scaled_uimm = offset >> lgsize; 1113 if (scaled_uimm <= 0xfff) { 1114 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); 1115 return; 1116 } 1117 } 1118 1119 /* Small signed offsets can use the unscaled encoding. */ 1120 if (offset >= -256 && offset < 256) { 1121 tcg_out_insn_3312(s, insn, rd, rn, offset); 1122 return; 1123 } 1124 1125 /* Worst-case scenario, move offset to temp register, use reg offset. */ 1126 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset); 1127 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP); 1128} 1129 1130static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 1131{ 1132 if (ret == arg) { 1133 return true; 1134 } 1135 switch (type) { 1136 case TCG_TYPE_I32: 1137 case TCG_TYPE_I64: 1138 if (ret < 32 && arg < 32) { 1139 tcg_out_movr(s, type, ret, arg); 1140 break; 1141 } else if (ret < 32) { 1142 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0); 1143 break; 1144 } else if (arg < 32) { 1145 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0); 1146 break; 1147 } 1148 /* FALLTHRU */ 1149 1150 case TCG_TYPE_V64: 1151 tcg_debug_assert(ret >= 32 && arg >= 32); 1152 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg); 1153 break; 1154 case TCG_TYPE_V128: 1155 tcg_debug_assert(ret >= 32 && arg >= 32); 1156 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg); 1157 break; 1158 1159 default: 1160 g_assert_not_reached(); 1161 } 1162 return true; 1163} 1164 1165static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1166 TCGReg base, intptr_t ofs) 1167{ 1168 AArch64Insn insn; 1169 int lgsz; 1170 1171 switch (type) { 1172 case TCG_TYPE_I32: 1173 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS); 1174 lgsz = 2; 1175 break; 1176 case TCG_TYPE_I64: 1177 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD); 1178 lgsz = 3; 1179 break; 1180 case TCG_TYPE_V64: 1181 insn = I3312_LDRVD; 1182 lgsz = 3; 1183 break; 1184 case TCG_TYPE_V128: 1185 insn = I3312_LDRVQ; 1186 lgsz = 4; 1187 break; 1188 default: 1189 g_assert_not_reached(); 1190 } 1191 tcg_out_ldst(s, insn, ret, base, ofs, lgsz); 1192} 1193 1194static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src, 1195 TCGReg base, intptr_t ofs) 1196{ 1197 AArch64Insn insn; 1198 int lgsz; 1199 1200 switch (type) { 1201 case TCG_TYPE_I32: 1202 insn = (src < 32 ? I3312_STRW : I3312_STRVS); 1203 lgsz = 2; 1204 break; 1205 case TCG_TYPE_I64: 1206 insn = (src < 32 ? I3312_STRX : I3312_STRVD); 1207 lgsz = 3; 1208 break; 1209 case TCG_TYPE_V64: 1210 insn = I3312_STRVD; 1211 lgsz = 3; 1212 break; 1213 case TCG_TYPE_V128: 1214 insn = I3312_STRVQ; 1215 lgsz = 4; 1216 break; 1217 default: 1218 g_assert_not_reached(); 1219 } 1220 tcg_out_ldst(s, insn, src, base, ofs, lgsz); 1221} 1222 1223static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1224 TCGReg base, intptr_t ofs) 1225{ 1226 if (type <= TCG_TYPE_I64 && val == 0) { 1227 tcg_out_st(s, type, TCG_REG_XZR, base, ofs); 1228 return true; 1229 } 1230 return false; 1231} 1232 1233static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, 1234 TCGReg rn, unsigned int a, unsigned int b) 1235{ 1236 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); 1237} 1238 1239static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, 1240 TCGReg rn, unsigned int a, unsigned int b) 1241{ 1242 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); 1243} 1244 1245static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, 1246 TCGReg rn, unsigned int a, unsigned int b) 1247{ 1248 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); 1249} 1250 1251static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, 1252 TCGReg rn, TCGReg rm, unsigned int a) 1253{ 1254 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); 1255} 1256 1257static inline void tcg_out_shl(TCGContext *s, TCGType ext, 1258 TCGReg rd, TCGReg rn, unsigned int m) 1259{ 1260 int bits = ext ? 64 : 32; 1261 int max = bits - 1; 1262 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max)); 1263} 1264 1265static inline void tcg_out_shr(TCGContext *s, TCGType ext, 1266 TCGReg rd, TCGReg rn, unsigned int m) 1267{ 1268 int max = ext ? 63 : 31; 1269 tcg_out_ubfm(s, ext, rd, rn, m & max, max); 1270} 1271 1272static inline void tcg_out_sar(TCGContext *s, TCGType ext, 1273 TCGReg rd, TCGReg rn, unsigned int m) 1274{ 1275 int max = ext ? 63 : 31; 1276 tcg_out_sbfm(s, ext, rd, rn, m & max, max); 1277} 1278 1279static inline void tcg_out_rotr(TCGContext *s, TCGType ext, 1280 TCGReg rd, TCGReg rn, unsigned int m) 1281{ 1282 int max = ext ? 63 : 31; 1283 tcg_out_extr(s, ext, rd, rn, rn, m & max); 1284} 1285 1286static inline void tcg_out_rotl(TCGContext *s, TCGType ext, 1287 TCGReg rd, TCGReg rn, unsigned int m) 1288{ 1289 int max = ext ? 63 : 31; 1290 tcg_out_extr(s, ext, rd, rn, rn, -m & max); 1291} 1292 1293static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, 1294 TCGReg rn, unsigned lsb, unsigned width) 1295{ 1296 unsigned size = ext ? 64 : 32; 1297 unsigned a = (size - lsb) & (size - 1); 1298 unsigned b = width - 1; 1299 tcg_out_bfm(s, ext, rd, rn, a, b); 1300} 1301 1302static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, 1303 tcg_target_long b, bool const_b) 1304{ 1305 if (const_b) { 1306 /* Using CMP or CMN aliases. */ 1307 if (b >= 0) { 1308 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); 1309 } else { 1310 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); 1311 } 1312 } else { 1313 /* Using CMP alias SUBS wzr, Wn, Wm */ 1314 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); 1315 } 1316} 1317 1318static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) 1319{ 1320 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1321 tcg_debug_assert(offset == sextract64(offset, 0, 26)); 1322 tcg_out_insn(s, 3206, B, offset); 1323} 1324 1325static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target) 1326{ 1327 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1328 if (offset == sextract64(offset, 0, 26)) { 1329 tcg_out_insn(s, 3206, B, offset); 1330 } else { 1331 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); 1332 tcg_out_insn(s, 3207, BR, TCG_REG_TMP); 1333 } 1334} 1335 1336static inline void tcg_out_callr(TCGContext *s, TCGReg reg) 1337{ 1338 tcg_out_insn(s, 3207, BLR, reg); 1339} 1340 1341static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) 1342{ 1343 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1344 if (offset == sextract64(offset, 0, 26)) { 1345 tcg_out_insn(s, 3206, BL, offset); 1346 } else { 1347 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); 1348 tcg_out_callr(s, TCG_REG_TMP); 1349 } 1350} 1351 1352void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, 1353 uintptr_t jmp_rw, uintptr_t addr) 1354{ 1355 tcg_insn_unit i1, i2; 1356 TCGType rt = TCG_TYPE_I64; 1357 TCGReg rd = TCG_REG_TMP; 1358 uint64_t pair; 1359 1360 ptrdiff_t offset = addr - jmp_rx; 1361 1362 if (offset == sextract64(offset, 0, 26)) { 1363 i1 = I3206_B | ((offset >> 2) & 0x3ffffff); 1364 i2 = NOP; 1365 } else { 1366 offset = (addr >> 12) - (jmp_rx >> 12); 1367 1368 /* patch ADRP */ 1369 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd; 1370 /* patch ADDI */ 1371 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd; 1372 } 1373 pair = (uint64_t)i2 << 32 | i1; 1374 qatomic_set((uint64_t *)jmp_rw, pair); 1375 flush_idcache_range(jmp_rx, jmp_rw, 8); 1376} 1377 1378static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) 1379{ 1380 if (!l->has_value) { 1381 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); 1382 tcg_out_insn(s, 3206, B, 0); 1383 } else { 1384 tcg_out_goto(s, l->u.value_ptr); 1385 } 1386} 1387 1388static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, 1389 TCGArg b, bool b_const, TCGLabel *l) 1390{ 1391 intptr_t offset; 1392 bool need_cmp; 1393 1394 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) { 1395 need_cmp = false; 1396 } else { 1397 need_cmp = true; 1398 tcg_out_cmp(s, ext, a, b, b_const); 1399 } 1400 1401 if (!l->has_value) { 1402 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1403 offset = tcg_in32(s) >> 5; 1404 } else { 1405 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2; 1406 tcg_debug_assert(offset == sextract64(offset, 0, 19)); 1407 } 1408 1409 if (need_cmp) { 1410 tcg_out_insn(s, 3202, B_C, c, offset); 1411 } else if (c == TCG_COND_EQ) { 1412 tcg_out_insn(s, 3201, CBZ, ext, a, offset); 1413 } else { 1414 tcg_out_insn(s, 3201, CBNZ, ext, a, offset); 1415 } 1416} 1417 1418static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits, 1419 TCGReg rd, TCGReg rn) 1420{ 1421 /* REV, REV16, REV32 */ 1422 tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn); 1423} 1424 1425static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits, 1426 TCGReg rd, TCGReg rn) 1427{ 1428 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ 1429 int bits = (8 << s_bits) - 1; 1430 tcg_out_sbfm(s, ext, rd, rn, 0, bits); 1431} 1432 1433static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits, 1434 TCGReg rd, TCGReg rn) 1435{ 1436 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ 1437 int bits = (8 << s_bits) - 1; 1438 tcg_out_ubfm(s, 0, rd, rn, 0, bits); 1439} 1440 1441static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, 1442 TCGReg rn, int64_t aimm) 1443{ 1444 if (aimm >= 0) { 1445 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); 1446 } else { 1447 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); 1448 } 1449} 1450 1451static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, 1452 TCGReg rh, TCGReg al, TCGReg ah, 1453 tcg_target_long bl, tcg_target_long bh, 1454 bool const_bl, bool const_bh, bool sub) 1455{ 1456 TCGReg orig_rl = rl; 1457 AArch64Insn insn; 1458 1459 if (rl == ah || (!const_bh && rl == bh)) { 1460 rl = TCG_REG_TMP; 1461 } 1462 1463 if (const_bl) { 1464 if (bl < 0) { 1465 bl = -bl; 1466 insn = sub ? I3401_ADDSI : I3401_SUBSI; 1467 } else { 1468 insn = sub ? I3401_SUBSI : I3401_ADDSI; 1469 } 1470 1471 if (unlikely(al == TCG_REG_XZR)) { 1472 /* ??? We want to allow al to be zero for the benefit of 1473 negation via subtraction. However, that leaves open the 1474 possibility of adding 0+const in the low part, and the 1475 immediate add instructions encode XSP not XZR. Don't try 1476 anything more elaborate here than loading another zero. */ 1477 al = TCG_REG_TMP; 1478 tcg_out_movi(s, ext, al, 0); 1479 } 1480 tcg_out_insn_3401(s, insn, ext, rl, al, bl); 1481 } else { 1482 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); 1483 } 1484 1485 insn = I3503_ADC; 1486 if (const_bh) { 1487 /* Note that the only two constants we support are 0 and -1, and 1488 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ 1489 if ((bh != 0) ^ sub) { 1490 insn = I3503_SBC; 1491 } 1492 bh = TCG_REG_XZR; 1493 } else if (sub) { 1494 insn = I3503_SBC; 1495 } 1496 tcg_out_insn_3503(s, insn, ext, rh, ah, bh); 1497 1498 tcg_out_mov(s, ext, orig_rl, rl); 1499} 1500 1501static inline void tcg_out_mb(TCGContext *s, TCGArg a0) 1502{ 1503 static const uint32_t sync[] = { 1504 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST, 1505 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST, 1506 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1507 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD, 1508 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1509 }; 1510 tcg_out32(s, sync[a0 & TCG_MO_ALL]); 1511} 1512 1513static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, 1514 TCGReg a0, TCGArg b, bool const_b, bool is_ctz) 1515{ 1516 TCGReg a1 = a0; 1517 if (is_ctz) { 1518 a1 = TCG_REG_TMP; 1519 tcg_out_insn(s, 3507, RBIT, ext, a1, a0); 1520 } 1521 if (const_b && b == (ext ? 64 : 32)) { 1522 tcg_out_insn(s, 3507, CLZ, ext, d, a1); 1523 } else { 1524 AArch64Insn sel = I3506_CSEL; 1525 1526 tcg_out_cmp(s, ext, a0, 0, 1); 1527 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1); 1528 1529 if (const_b) { 1530 if (b == -1) { 1531 b = TCG_REG_XZR; 1532 sel = I3506_CSINV; 1533 } else if (b == 0) { 1534 b = TCG_REG_XZR; 1535 } else { 1536 tcg_out_movi(s, ext, d, b); 1537 b = d; 1538 } 1539 } 1540 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE); 1541 } 1542} 1543 1544#ifdef CONFIG_SOFTMMU 1545#include "../tcg-ldst.c.inc" 1546 1547/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, 1548 * MemOpIdx oi, uintptr_t ra) 1549 */ 1550static void * const qemu_ld_helpers[MO_SIZE + 1] = { 1551 [MO_8] = helper_ret_ldub_mmu, 1552#ifdef HOST_WORDS_BIGENDIAN 1553 [MO_16] = helper_be_lduw_mmu, 1554 [MO_32] = helper_be_ldul_mmu, 1555 [MO_64] = helper_be_ldq_mmu, 1556#else 1557 [MO_16] = helper_le_lduw_mmu, 1558 [MO_32] = helper_le_ldul_mmu, 1559 [MO_64] = helper_le_ldq_mmu, 1560#endif 1561}; 1562 1563/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, 1564 * uintxx_t val, MemOpIdx oi, 1565 * uintptr_t ra) 1566 */ 1567static void * const qemu_st_helpers[MO_SIZE + 1] = { 1568 [MO_8] = helper_ret_stb_mmu, 1569#ifdef HOST_WORDS_BIGENDIAN 1570 [MO_16] = helper_be_stw_mmu, 1571 [MO_32] = helper_be_stl_mmu, 1572 [MO_64] = helper_be_stq_mmu, 1573#else 1574 [MO_16] = helper_le_stw_mmu, 1575 [MO_32] = helper_le_stl_mmu, 1576 [MO_64] = helper_le_stq_mmu, 1577#endif 1578}; 1579 1580static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target) 1581{ 1582 ptrdiff_t offset = tcg_pcrel_diff(s, target); 1583 tcg_debug_assert(offset == sextract64(offset, 0, 21)); 1584 tcg_out_insn(s, 3406, ADR, rd, offset); 1585} 1586 1587static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1588{ 1589 MemOpIdx oi = lb->oi; 1590 MemOp opc = get_memop(oi); 1591 MemOp size = opc & MO_SIZE; 1592 1593 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1594 return false; 1595 } 1596 1597 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1598 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); 1599 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi); 1600 tcg_out_adr(s, TCG_REG_X3, lb->raddr); 1601 tcg_out_call(s, qemu_ld_helpers[opc & MO_SIZE]); 1602 if (opc & MO_SIGN) { 1603 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0); 1604 } else { 1605 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0); 1606 } 1607 1608 tcg_out_goto(s, lb->raddr); 1609 return true; 1610} 1611 1612static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1613{ 1614 MemOpIdx oi = lb->oi; 1615 MemOp opc = get_memop(oi); 1616 MemOp size = opc & MO_SIZE; 1617 1618 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1619 return false; 1620 } 1621 1622 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1623 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); 1624 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); 1625 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi); 1626 tcg_out_adr(s, TCG_REG_X4, lb->raddr); 1627 tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE]); 1628 tcg_out_goto(s, lb->raddr); 1629 return true; 1630} 1631 1632static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi, 1633 TCGType ext, TCGReg data_reg, TCGReg addr_reg, 1634 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) 1635{ 1636 TCGLabelQemuLdst *label = new_ldst_label(s); 1637 1638 label->is_ld = is_ld; 1639 label->oi = oi; 1640 label->type = ext; 1641 label->datalo_reg = data_reg; 1642 label->addrlo_reg = addr_reg; 1643 label->raddr = tcg_splitwx_to_rx(raddr); 1644 label->label_ptr[0] = label_ptr; 1645} 1646 1647/* We expect to use a 7-bit scaled negative offset from ENV. */ 1648QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); 1649QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512); 1650 1651/* These offsets are built into the LDP below. */ 1652QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); 1653QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); 1654 1655/* Load and compare a TLB entry, emitting the conditional jump to the 1656 slow path for the failure case, which will be patched later when finalizing 1657 the slow path. Generated code returns the host addend in X1, 1658 clobbers X0,X2,X3,TMP. */ 1659static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc, 1660 tcg_insn_unit **label_ptr, int mem_index, 1661 bool is_read) 1662{ 1663 unsigned a_bits = get_alignment_bits(opc); 1664 unsigned s_bits = opc & MO_SIZE; 1665 unsigned a_mask = (1u << a_bits) - 1; 1666 unsigned s_mask = (1u << s_bits) - 1; 1667 TCGReg x3; 1668 TCGType mask_type; 1669 uint64_t compare_mask; 1670 1671 mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32 1672 ? TCG_TYPE_I64 : TCG_TYPE_I32); 1673 1674 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */ 1675 tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0, 1676 TLB_MASK_TABLE_OFS(mem_index), 1, 0); 1677 1678 /* Extract the TLB index from the address into X0. */ 1679 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, 1680 TCG_REG_X0, TCG_REG_X0, addr_reg, 1681 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); 1682 1683 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */ 1684 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0); 1685 1686 /* Load the tlb comparator into X0, and the fast path addend into X1. */ 1687 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read 1688 ? offsetof(CPUTLBEntry, addr_read) 1689 : offsetof(CPUTLBEntry, addr_write)); 1690 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1, 1691 offsetof(CPUTLBEntry, addend)); 1692 1693 /* For aligned accesses, we check the first byte and include the alignment 1694 bits within the address. For unaligned access, we check that we don't 1695 cross pages using the address of the last byte of the access. */ 1696 if (a_bits >= s_bits) { 1697 x3 = addr_reg; 1698 } else { 1699 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64, 1700 TCG_REG_X3, addr_reg, s_mask - a_mask); 1701 x3 = TCG_REG_X3; 1702 } 1703 compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; 1704 1705 /* Store the page mask part of the address into X3. */ 1706 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, 1707 TCG_REG_X3, x3, compare_mask); 1708 1709 /* Perform the address comparison. */ 1710 tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0); 1711 1712 /* If not equal, we jump to the slow path. */ 1713 *label_ptr = s->code_ptr; 1714 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1715} 1716 1717#endif /* CONFIG_SOFTMMU */ 1718 1719static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, 1720 TCGReg data_r, TCGReg addr_r, 1721 TCGType otype, TCGReg off_r) 1722{ 1723 /* Byte swapping is left to middle-end expansion. */ 1724 tcg_debug_assert((memop & MO_BSWAP) == 0); 1725 1726 switch (memop & MO_SSIZE) { 1727 case MO_UB: 1728 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r); 1729 break; 1730 case MO_SB: 1731 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, 1732 data_r, addr_r, otype, off_r); 1733 break; 1734 case MO_UW: 1735 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); 1736 break; 1737 case MO_SW: 1738 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), 1739 data_r, addr_r, otype, off_r); 1740 break; 1741 case MO_UL: 1742 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); 1743 break; 1744 case MO_SL: 1745 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r); 1746 break; 1747 case MO_UQ: 1748 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r); 1749 break; 1750 default: 1751 tcg_abort(); 1752 } 1753} 1754 1755static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, 1756 TCGReg data_r, TCGReg addr_r, 1757 TCGType otype, TCGReg off_r) 1758{ 1759 /* Byte swapping is left to middle-end expansion. */ 1760 tcg_debug_assert((memop & MO_BSWAP) == 0); 1761 1762 switch (memop & MO_SIZE) { 1763 case MO_8: 1764 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r); 1765 break; 1766 case MO_16: 1767 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r); 1768 break; 1769 case MO_32: 1770 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r); 1771 break; 1772 case MO_64: 1773 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r); 1774 break; 1775 default: 1776 tcg_abort(); 1777 } 1778} 1779 1780static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1781 MemOpIdx oi, TCGType ext) 1782{ 1783 MemOp memop = get_memop(oi); 1784 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; 1785#ifdef CONFIG_SOFTMMU 1786 unsigned mem_index = get_mmuidx(oi); 1787 tcg_insn_unit *label_ptr; 1788 1789 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1); 1790 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1791 TCG_REG_X1, otype, addr_reg); 1792 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg, 1793 s->code_ptr, label_ptr); 1794#else /* !CONFIG_SOFTMMU */ 1795 if (USE_GUEST_BASE) { 1796 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1797 TCG_REG_GUEST_BASE, otype, addr_reg); 1798 } else { 1799 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1800 addr_reg, TCG_TYPE_I64, TCG_REG_XZR); 1801 } 1802#endif /* CONFIG_SOFTMMU */ 1803} 1804 1805static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1806 MemOpIdx oi) 1807{ 1808 MemOp memop = get_memop(oi); 1809 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; 1810#ifdef CONFIG_SOFTMMU 1811 unsigned mem_index = get_mmuidx(oi); 1812 tcg_insn_unit *label_ptr; 1813 1814 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0); 1815 tcg_out_qemu_st_direct(s, memop, data_reg, 1816 TCG_REG_X1, otype, addr_reg); 1817 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64, 1818 data_reg, addr_reg, s->code_ptr, label_ptr); 1819#else /* !CONFIG_SOFTMMU */ 1820 if (USE_GUEST_BASE) { 1821 tcg_out_qemu_st_direct(s, memop, data_reg, 1822 TCG_REG_GUEST_BASE, otype, addr_reg); 1823 } else { 1824 tcg_out_qemu_st_direct(s, memop, data_reg, 1825 addr_reg, TCG_TYPE_I64, TCG_REG_XZR); 1826 } 1827#endif /* CONFIG_SOFTMMU */ 1828} 1829 1830static const tcg_insn_unit *tb_ret_addr; 1831 1832static void tcg_out_op(TCGContext *s, TCGOpcode opc, 1833 const TCGArg args[TCG_MAX_OP_ARGS], 1834 const int const_args[TCG_MAX_OP_ARGS]) 1835{ 1836 /* 99% of the time, we can signal the use of extension registers 1837 by looking to see if the opcode handles 64-bit data. */ 1838 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0; 1839 1840 /* Hoist the loads of the most common arguments. */ 1841 TCGArg a0 = args[0]; 1842 TCGArg a1 = args[1]; 1843 TCGArg a2 = args[2]; 1844 int c2 = const_args[2]; 1845 1846 /* Some operands are defined with "rZ" constraint, a register or 1847 the zero register. These need not actually test args[I] == 0. */ 1848#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) 1849 1850 switch (opc) { 1851 case INDEX_op_exit_tb: 1852 /* Reuse the zeroing that exists for goto_ptr. */ 1853 if (a0 == 0) { 1854 tcg_out_goto_long(s, tcg_code_gen_epilogue); 1855 } else { 1856 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); 1857 tcg_out_goto_long(s, tb_ret_addr); 1858 } 1859 break; 1860 1861 case INDEX_op_goto_tb: 1862 if (s->tb_jmp_insn_offset != NULL) { 1863 /* TCG_TARGET_HAS_direct_jump */ 1864 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic 1865 write can be used to patch the target address. */ 1866 if ((uintptr_t)s->code_ptr & 7) { 1867 tcg_out32(s, NOP); 1868 } 1869 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); 1870 /* actual branch destination will be patched by 1871 tb_target_set_jmp_target later. */ 1872 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0); 1873 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0); 1874 } else { 1875 /* !TCG_TARGET_HAS_direct_jump */ 1876 tcg_debug_assert(s->tb_jmp_target_addr != NULL); 1877 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2; 1878 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP); 1879 } 1880 tcg_out_insn(s, 3207, BR, TCG_REG_TMP); 1881 set_jmp_reset_offset(s, a0); 1882 break; 1883 1884 case INDEX_op_goto_ptr: 1885 tcg_out_insn(s, 3207, BR, a0); 1886 break; 1887 1888 case INDEX_op_br: 1889 tcg_out_goto_label(s, arg_label(a0)); 1890 break; 1891 1892 case INDEX_op_ld8u_i32: 1893 case INDEX_op_ld8u_i64: 1894 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0); 1895 break; 1896 case INDEX_op_ld8s_i32: 1897 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0); 1898 break; 1899 case INDEX_op_ld8s_i64: 1900 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0); 1901 break; 1902 case INDEX_op_ld16u_i32: 1903 case INDEX_op_ld16u_i64: 1904 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1); 1905 break; 1906 case INDEX_op_ld16s_i32: 1907 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1); 1908 break; 1909 case INDEX_op_ld16s_i64: 1910 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1); 1911 break; 1912 case INDEX_op_ld_i32: 1913 case INDEX_op_ld32u_i64: 1914 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2); 1915 break; 1916 case INDEX_op_ld32s_i64: 1917 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2); 1918 break; 1919 case INDEX_op_ld_i64: 1920 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3); 1921 break; 1922 1923 case INDEX_op_st8_i32: 1924 case INDEX_op_st8_i64: 1925 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0); 1926 break; 1927 case INDEX_op_st16_i32: 1928 case INDEX_op_st16_i64: 1929 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1); 1930 break; 1931 case INDEX_op_st_i32: 1932 case INDEX_op_st32_i64: 1933 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2); 1934 break; 1935 case INDEX_op_st_i64: 1936 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3); 1937 break; 1938 1939 case INDEX_op_add_i32: 1940 a2 = (int32_t)a2; 1941 /* FALLTHRU */ 1942 case INDEX_op_add_i64: 1943 if (c2) { 1944 tcg_out_addsubi(s, ext, a0, a1, a2); 1945 } else { 1946 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); 1947 } 1948 break; 1949 1950 case INDEX_op_sub_i32: 1951 a2 = (int32_t)a2; 1952 /* FALLTHRU */ 1953 case INDEX_op_sub_i64: 1954 if (c2) { 1955 tcg_out_addsubi(s, ext, a0, a1, -a2); 1956 } else { 1957 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); 1958 } 1959 break; 1960 1961 case INDEX_op_neg_i64: 1962 case INDEX_op_neg_i32: 1963 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); 1964 break; 1965 1966 case INDEX_op_and_i32: 1967 a2 = (int32_t)a2; 1968 /* FALLTHRU */ 1969 case INDEX_op_and_i64: 1970 if (c2) { 1971 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); 1972 } else { 1973 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); 1974 } 1975 break; 1976 1977 case INDEX_op_andc_i32: 1978 a2 = (int32_t)a2; 1979 /* FALLTHRU */ 1980 case INDEX_op_andc_i64: 1981 if (c2) { 1982 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); 1983 } else { 1984 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); 1985 } 1986 break; 1987 1988 case INDEX_op_or_i32: 1989 a2 = (int32_t)a2; 1990 /* FALLTHRU */ 1991 case INDEX_op_or_i64: 1992 if (c2) { 1993 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); 1994 } else { 1995 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); 1996 } 1997 break; 1998 1999 case INDEX_op_orc_i32: 2000 a2 = (int32_t)a2; 2001 /* FALLTHRU */ 2002 case INDEX_op_orc_i64: 2003 if (c2) { 2004 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); 2005 } else { 2006 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); 2007 } 2008 break; 2009 2010 case INDEX_op_xor_i32: 2011 a2 = (int32_t)a2; 2012 /* FALLTHRU */ 2013 case INDEX_op_xor_i64: 2014 if (c2) { 2015 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); 2016 } else { 2017 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); 2018 } 2019 break; 2020 2021 case INDEX_op_eqv_i32: 2022 a2 = (int32_t)a2; 2023 /* FALLTHRU */ 2024 case INDEX_op_eqv_i64: 2025 if (c2) { 2026 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); 2027 } else { 2028 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); 2029 } 2030 break; 2031 2032 case INDEX_op_not_i64: 2033 case INDEX_op_not_i32: 2034 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); 2035 break; 2036 2037 case INDEX_op_mul_i64: 2038 case INDEX_op_mul_i32: 2039 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); 2040 break; 2041 2042 case INDEX_op_div_i64: 2043 case INDEX_op_div_i32: 2044 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); 2045 break; 2046 case INDEX_op_divu_i64: 2047 case INDEX_op_divu_i32: 2048 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); 2049 break; 2050 2051 case INDEX_op_rem_i64: 2052 case INDEX_op_rem_i32: 2053 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2); 2054 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2055 break; 2056 case INDEX_op_remu_i64: 2057 case INDEX_op_remu_i32: 2058 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2); 2059 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2060 break; 2061 2062 case INDEX_op_shl_i64: 2063 case INDEX_op_shl_i32: 2064 if (c2) { 2065 tcg_out_shl(s, ext, a0, a1, a2); 2066 } else { 2067 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); 2068 } 2069 break; 2070 2071 case INDEX_op_shr_i64: 2072 case INDEX_op_shr_i32: 2073 if (c2) { 2074 tcg_out_shr(s, ext, a0, a1, a2); 2075 } else { 2076 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); 2077 } 2078 break; 2079 2080 case INDEX_op_sar_i64: 2081 case INDEX_op_sar_i32: 2082 if (c2) { 2083 tcg_out_sar(s, ext, a0, a1, a2); 2084 } else { 2085 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); 2086 } 2087 break; 2088 2089 case INDEX_op_rotr_i64: 2090 case INDEX_op_rotr_i32: 2091 if (c2) { 2092 tcg_out_rotr(s, ext, a0, a1, a2); 2093 } else { 2094 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); 2095 } 2096 break; 2097 2098 case INDEX_op_rotl_i64: 2099 case INDEX_op_rotl_i32: 2100 if (c2) { 2101 tcg_out_rotl(s, ext, a0, a1, a2); 2102 } else { 2103 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2); 2104 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP); 2105 } 2106 break; 2107 2108 case INDEX_op_clz_i64: 2109 case INDEX_op_clz_i32: 2110 tcg_out_cltz(s, ext, a0, a1, a2, c2, false); 2111 break; 2112 case INDEX_op_ctz_i64: 2113 case INDEX_op_ctz_i32: 2114 tcg_out_cltz(s, ext, a0, a1, a2, c2, true); 2115 break; 2116 2117 case INDEX_op_brcond_i32: 2118 a1 = (int32_t)a1; 2119 /* FALLTHRU */ 2120 case INDEX_op_brcond_i64: 2121 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); 2122 break; 2123 2124 case INDEX_op_setcond_i32: 2125 a2 = (int32_t)a2; 2126 /* FALLTHRU */ 2127 case INDEX_op_setcond_i64: 2128 tcg_out_cmp(s, ext, a1, a2, c2); 2129 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ 2130 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, 2131 TCG_REG_XZR, tcg_invert_cond(args[3])); 2132 break; 2133 2134 case INDEX_op_movcond_i32: 2135 a2 = (int32_t)a2; 2136 /* FALLTHRU */ 2137 case INDEX_op_movcond_i64: 2138 tcg_out_cmp(s, ext, a1, a2, c2); 2139 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); 2140 break; 2141 2142 case INDEX_op_qemu_ld_i32: 2143 case INDEX_op_qemu_ld_i64: 2144 tcg_out_qemu_ld(s, a0, a1, a2, ext); 2145 break; 2146 case INDEX_op_qemu_st_i32: 2147 case INDEX_op_qemu_st_i64: 2148 tcg_out_qemu_st(s, REG0(0), a1, a2); 2149 break; 2150 2151 case INDEX_op_bswap64_i64: 2152 tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1); 2153 break; 2154 case INDEX_op_bswap32_i64: 2155 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2156 if (a2 & TCG_BSWAP_OS) { 2157 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a0); 2158 } 2159 break; 2160 case INDEX_op_bswap32_i32: 2161 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2162 break; 2163 case INDEX_op_bswap16_i64: 2164 case INDEX_op_bswap16_i32: 2165 tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1); 2166 if (a2 & TCG_BSWAP_OS) { 2167 /* Output must be sign-extended. */ 2168 tcg_out_sxt(s, ext, MO_16, a0, a0); 2169 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 2170 /* Output must be zero-extended, but input isn't. */ 2171 tcg_out_uxt(s, MO_16, a0, a0); 2172 } 2173 break; 2174 2175 case INDEX_op_ext8s_i64: 2176 case INDEX_op_ext8s_i32: 2177 tcg_out_sxt(s, ext, MO_8, a0, a1); 2178 break; 2179 case INDEX_op_ext16s_i64: 2180 case INDEX_op_ext16s_i32: 2181 tcg_out_sxt(s, ext, MO_16, a0, a1); 2182 break; 2183 case INDEX_op_ext_i32_i64: 2184 case INDEX_op_ext32s_i64: 2185 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1); 2186 break; 2187 case INDEX_op_ext8u_i64: 2188 case INDEX_op_ext8u_i32: 2189 tcg_out_uxt(s, MO_8, a0, a1); 2190 break; 2191 case INDEX_op_ext16u_i64: 2192 case INDEX_op_ext16u_i32: 2193 tcg_out_uxt(s, MO_16, a0, a1); 2194 break; 2195 case INDEX_op_extu_i32_i64: 2196 case INDEX_op_ext32u_i64: 2197 tcg_out_movr(s, TCG_TYPE_I32, a0, a1); 2198 break; 2199 2200 case INDEX_op_deposit_i64: 2201 case INDEX_op_deposit_i32: 2202 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]); 2203 break; 2204 2205 case INDEX_op_extract_i64: 2206 case INDEX_op_extract_i32: 2207 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2208 break; 2209 2210 case INDEX_op_sextract_i64: 2211 case INDEX_op_sextract_i32: 2212 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2213 break; 2214 2215 case INDEX_op_extract2_i64: 2216 case INDEX_op_extract2_i32: 2217 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]); 2218 break; 2219 2220 case INDEX_op_add2_i32: 2221 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2222 (int32_t)args[4], args[5], const_args[4], 2223 const_args[5], false); 2224 break; 2225 case INDEX_op_add2_i64: 2226 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2227 args[5], const_args[4], const_args[5], false); 2228 break; 2229 case INDEX_op_sub2_i32: 2230 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2231 (int32_t)args[4], args[5], const_args[4], 2232 const_args[5], true); 2233 break; 2234 case INDEX_op_sub2_i64: 2235 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2236 args[5], const_args[4], const_args[5], true); 2237 break; 2238 2239 case INDEX_op_muluh_i64: 2240 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); 2241 break; 2242 case INDEX_op_mulsh_i64: 2243 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); 2244 break; 2245 2246 case INDEX_op_mb: 2247 tcg_out_mb(s, a0); 2248 break; 2249 2250 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 2251 case INDEX_op_mov_i64: 2252 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 2253 default: 2254 g_assert_not_reached(); 2255 } 2256 2257#undef REG0 2258} 2259 2260static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 2261 unsigned vecl, unsigned vece, 2262 const TCGArg args[TCG_MAX_OP_ARGS], 2263 const int const_args[TCG_MAX_OP_ARGS]) 2264{ 2265 static const AArch64Insn cmp_vec_insn[16] = { 2266 [TCG_COND_EQ] = I3616_CMEQ, 2267 [TCG_COND_GT] = I3616_CMGT, 2268 [TCG_COND_GE] = I3616_CMGE, 2269 [TCG_COND_GTU] = I3616_CMHI, 2270 [TCG_COND_GEU] = I3616_CMHS, 2271 }; 2272 static const AArch64Insn cmp_scalar_insn[16] = { 2273 [TCG_COND_EQ] = I3611_CMEQ, 2274 [TCG_COND_GT] = I3611_CMGT, 2275 [TCG_COND_GE] = I3611_CMGE, 2276 [TCG_COND_GTU] = I3611_CMHI, 2277 [TCG_COND_GEU] = I3611_CMHS, 2278 }; 2279 static const AArch64Insn cmp0_vec_insn[16] = { 2280 [TCG_COND_EQ] = I3617_CMEQ0, 2281 [TCG_COND_GT] = I3617_CMGT0, 2282 [TCG_COND_GE] = I3617_CMGE0, 2283 [TCG_COND_LT] = I3617_CMLT0, 2284 [TCG_COND_LE] = I3617_CMLE0, 2285 }; 2286 static const AArch64Insn cmp0_scalar_insn[16] = { 2287 [TCG_COND_EQ] = I3612_CMEQ0, 2288 [TCG_COND_GT] = I3612_CMGT0, 2289 [TCG_COND_GE] = I3612_CMGE0, 2290 [TCG_COND_LT] = I3612_CMLT0, 2291 [TCG_COND_LE] = I3612_CMLE0, 2292 }; 2293 2294 TCGType type = vecl + TCG_TYPE_V64; 2295 unsigned is_q = vecl; 2296 bool is_scalar = !is_q && vece == MO_64; 2297 TCGArg a0, a1, a2, a3; 2298 int cmode, imm8; 2299 2300 a0 = args[0]; 2301 a1 = args[1]; 2302 a2 = args[2]; 2303 2304 switch (opc) { 2305 case INDEX_op_ld_vec: 2306 tcg_out_ld(s, type, a0, a1, a2); 2307 break; 2308 case INDEX_op_st_vec: 2309 tcg_out_st(s, type, a0, a1, a2); 2310 break; 2311 case INDEX_op_dupm_vec: 2312 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 2313 break; 2314 case INDEX_op_add_vec: 2315 if (is_scalar) { 2316 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2); 2317 } else { 2318 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2); 2319 } 2320 break; 2321 case INDEX_op_sub_vec: 2322 if (is_scalar) { 2323 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2); 2324 } else { 2325 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2); 2326 } 2327 break; 2328 case INDEX_op_mul_vec: 2329 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2); 2330 break; 2331 case INDEX_op_neg_vec: 2332 if (is_scalar) { 2333 tcg_out_insn(s, 3612, NEG, vece, a0, a1); 2334 } else { 2335 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1); 2336 } 2337 break; 2338 case INDEX_op_abs_vec: 2339 if (is_scalar) { 2340 tcg_out_insn(s, 3612, ABS, vece, a0, a1); 2341 } else { 2342 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1); 2343 } 2344 break; 2345 case INDEX_op_and_vec: 2346 if (const_args[2]) { 2347 is_shimm1632(~a2, &cmode, &imm8); 2348 if (a0 == a1) { 2349 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2350 return; 2351 } 2352 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2353 a2 = a0; 2354 } 2355 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2); 2356 break; 2357 case INDEX_op_or_vec: 2358 if (const_args[2]) { 2359 is_shimm1632(a2, &cmode, &imm8); 2360 if (a0 == a1) { 2361 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2362 return; 2363 } 2364 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2365 a2 = a0; 2366 } 2367 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2); 2368 break; 2369 case INDEX_op_andc_vec: 2370 if (const_args[2]) { 2371 is_shimm1632(a2, &cmode, &imm8); 2372 if (a0 == a1) { 2373 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2374 return; 2375 } 2376 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2377 a2 = a0; 2378 } 2379 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2); 2380 break; 2381 case INDEX_op_orc_vec: 2382 if (const_args[2]) { 2383 is_shimm1632(~a2, &cmode, &imm8); 2384 if (a0 == a1) { 2385 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2386 return; 2387 } 2388 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2389 a2 = a0; 2390 } 2391 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2); 2392 break; 2393 case INDEX_op_xor_vec: 2394 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2); 2395 break; 2396 case INDEX_op_ssadd_vec: 2397 if (is_scalar) { 2398 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2); 2399 } else { 2400 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2); 2401 } 2402 break; 2403 case INDEX_op_sssub_vec: 2404 if (is_scalar) { 2405 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2); 2406 } else { 2407 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2); 2408 } 2409 break; 2410 case INDEX_op_usadd_vec: 2411 if (is_scalar) { 2412 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2); 2413 } else { 2414 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2); 2415 } 2416 break; 2417 case INDEX_op_ussub_vec: 2418 if (is_scalar) { 2419 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2); 2420 } else { 2421 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2); 2422 } 2423 break; 2424 case INDEX_op_smax_vec: 2425 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2); 2426 break; 2427 case INDEX_op_smin_vec: 2428 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2); 2429 break; 2430 case INDEX_op_umax_vec: 2431 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2); 2432 break; 2433 case INDEX_op_umin_vec: 2434 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2); 2435 break; 2436 case INDEX_op_not_vec: 2437 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1); 2438 break; 2439 case INDEX_op_shli_vec: 2440 if (is_scalar) { 2441 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece)); 2442 } else { 2443 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece)); 2444 } 2445 break; 2446 case INDEX_op_shri_vec: 2447 if (is_scalar) { 2448 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2); 2449 } else { 2450 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2); 2451 } 2452 break; 2453 case INDEX_op_sari_vec: 2454 if (is_scalar) { 2455 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2); 2456 } else { 2457 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); 2458 } 2459 break; 2460 case INDEX_op_aa64_sli_vec: 2461 if (is_scalar) { 2462 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece)); 2463 } else { 2464 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece)); 2465 } 2466 break; 2467 case INDEX_op_shlv_vec: 2468 if (is_scalar) { 2469 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2); 2470 } else { 2471 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); 2472 } 2473 break; 2474 case INDEX_op_aa64_sshl_vec: 2475 if (is_scalar) { 2476 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2); 2477 } else { 2478 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2); 2479 } 2480 break; 2481 case INDEX_op_cmp_vec: 2482 { 2483 TCGCond cond = args[3]; 2484 AArch64Insn insn; 2485 2486 if (cond == TCG_COND_NE) { 2487 if (const_args[2]) { 2488 if (is_scalar) { 2489 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1); 2490 } else { 2491 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1); 2492 } 2493 } else { 2494 if (is_scalar) { 2495 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2); 2496 } else { 2497 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2); 2498 } 2499 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); 2500 } 2501 } else { 2502 if (const_args[2]) { 2503 if (is_scalar) { 2504 insn = cmp0_scalar_insn[cond]; 2505 if (insn) { 2506 tcg_out_insn_3612(s, insn, vece, a0, a1); 2507 break; 2508 } 2509 } else { 2510 insn = cmp0_vec_insn[cond]; 2511 if (insn) { 2512 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1); 2513 break; 2514 } 2515 } 2516 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0); 2517 a2 = TCG_VEC_TMP; 2518 } 2519 if (is_scalar) { 2520 insn = cmp_scalar_insn[cond]; 2521 if (insn == 0) { 2522 TCGArg t; 2523 t = a1, a1 = a2, a2 = t; 2524 cond = tcg_swap_cond(cond); 2525 insn = cmp_scalar_insn[cond]; 2526 tcg_debug_assert(insn != 0); 2527 } 2528 tcg_out_insn_3611(s, insn, vece, a0, a1, a2); 2529 } else { 2530 insn = cmp_vec_insn[cond]; 2531 if (insn == 0) { 2532 TCGArg t; 2533 t = a1, a1 = a2, a2 = t; 2534 cond = tcg_swap_cond(cond); 2535 insn = cmp_vec_insn[cond]; 2536 tcg_debug_assert(insn != 0); 2537 } 2538 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2); 2539 } 2540 } 2541 } 2542 break; 2543 2544 case INDEX_op_bitsel_vec: 2545 a3 = args[3]; 2546 if (a0 == a3) { 2547 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1); 2548 } else if (a0 == a2) { 2549 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1); 2550 } else { 2551 if (a0 != a1) { 2552 tcg_out_mov(s, type, a0, a1); 2553 } 2554 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3); 2555 } 2556 break; 2557 2558 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 2559 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 2560 default: 2561 g_assert_not_reached(); 2562 } 2563} 2564 2565int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 2566{ 2567 switch (opc) { 2568 case INDEX_op_add_vec: 2569 case INDEX_op_sub_vec: 2570 case INDEX_op_and_vec: 2571 case INDEX_op_or_vec: 2572 case INDEX_op_xor_vec: 2573 case INDEX_op_andc_vec: 2574 case INDEX_op_orc_vec: 2575 case INDEX_op_neg_vec: 2576 case INDEX_op_abs_vec: 2577 case INDEX_op_not_vec: 2578 case INDEX_op_cmp_vec: 2579 case INDEX_op_shli_vec: 2580 case INDEX_op_shri_vec: 2581 case INDEX_op_sari_vec: 2582 case INDEX_op_ssadd_vec: 2583 case INDEX_op_sssub_vec: 2584 case INDEX_op_usadd_vec: 2585 case INDEX_op_ussub_vec: 2586 case INDEX_op_shlv_vec: 2587 case INDEX_op_bitsel_vec: 2588 return 1; 2589 case INDEX_op_rotli_vec: 2590 case INDEX_op_shrv_vec: 2591 case INDEX_op_sarv_vec: 2592 case INDEX_op_rotlv_vec: 2593 case INDEX_op_rotrv_vec: 2594 return -1; 2595 case INDEX_op_mul_vec: 2596 case INDEX_op_smax_vec: 2597 case INDEX_op_smin_vec: 2598 case INDEX_op_umax_vec: 2599 case INDEX_op_umin_vec: 2600 return vece < MO_64; 2601 2602 default: 2603 return 0; 2604 } 2605} 2606 2607void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 2608 TCGArg a0, ...) 2609{ 2610 va_list va; 2611 TCGv_vec v0, v1, v2, t1, t2, c1; 2612 TCGArg a2; 2613 2614 va_start(va, a0); 2615 v0 = temp_tcgv_vec(arg_temp(a0)); 2616 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 2617 a2 = va_arg(va, TCGArg); 2618 va_end(va); 2619 2620 switch (opc) { 2621 case INDEX_op_rotli_vec: 2622 t1 = tcg_temp_new_vec(type); 2623 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1)); 2624 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece, 2625 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2); 2626 tcg_temp_free_vec(t1); 2627 break; 2628 2629 case INDEX_op_shrv_vec: 2630 case INDEX_op_sarv_vec: 2631 /* Right shifts are negative left shifts for AArch64. */ 2632 v2 = temp_tcgv_vec(arg_temp(a2)); 2633 t1 = tcg_temp_new_vec(type); 2634 tcg_gen_neg_vec(vece, t1, v2); 2635 opc = (opc == INDEX_op_shrv_vec 2636 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec); 2637 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0), 2638 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2639 tcg_temp_free_vec(t1); 2640 break; 2641 2642 case INDEX_op_rotlv_vec: 2643 v2 = temp_tcgv_vec(arg_temp(a2)); 2644 t1 = tcg_temp_new_vec(type); 2645 c1 = tcg_constant_vec(type, vece, 8 << vece); 2646 tcg_gen_sub_vec(vece, t1, v2, c1); 2647 /* Right shifts are negative left shifts for AArch64. */ 2648 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2649 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2650 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0), 2651 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 2652 tcg_gen_or_vec(vece, v0, v0, t1); 2653 tcg_temp_free_vec(t1); 2654 break; 2655 2656 case INDEX_op_rotrv_vec: 2657 v2 = temp_tcgv_vec(arg_temp(a2)); 2658 t1 = tcg_temp_new_vec(type); 2659 t2 = tcg_temp_new_vec(type); 2660 c1 = tcg_constant_vec(type, vece, 8 << vece); 2661 tcg_gen_neg_vec(vece, t1, v2); 2662 tcg_gen_sub_vec(vece, t2, c1, v2); 2663 /* Right shifts are negative left shifts for AArch64. */ 2664 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2665 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2666 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2), 2667 tcgv_vec_arg(v1), tcgv_vec_arg(t2)); 2668 tcg_gen_or_vec(vece, v0, t1, t2); 2669 tcg_temp_free_vec(t1); 2670 tcg_temp_free_vec(t2); 2671 break; 2672 2673 default: 2674 g_assert_not_reached(); 2675 } 2676} 2677 2678static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 2679{ 2680 switch (op) { 2681 case INDEX_op_goto_ptr: 2682 return C_O0_I1(r); 2683 2684 case INDEX_op_ld8u_i32: 2685 case INDEX_op_ld8s_i32: 2686 case INDEX_op_ld16u_i32: 2687 case INDEX_op_ld16s_i32: 2688 case INDEX_op_ld_i32: 2689 case INDEX_op_ld8u_i64: 2690 case INDEX_op_ld8s_i64: 2691 case INDEX_op_ld16u_i64: 2692 case INDEX_op_ld16s_i64: 2693 case INDEX_op_ld32u_i64: 2694 case INDEX_op_ld32s_i64: 2695 case INDEX_op_ld_i64: 2696 case INDEX_op_neg_i32: 2697 case INDEX_op_neg_i64: 2698 case INDEX_op_not_i32: 2699 case INDEX_op_not_i64: 2700 case INDEX_op_bswap16_i32: 2701 case INDEX_op_bswap32_i32: 2702 case INDEX_op_bswap16_i64: 2703 case INDEX_op_bswap32_i64: 2704 case INDEX_op_bswap64_i64: 2705 case INDEX_op_ext8s_i32: 2706 case INDEX_op_ext16s_i32: 2707 case INDEX_op_ext8u_i32: 2708 case INDEX_op_ext16u_i32: 2709 case INDEX_op_ext8s_i64: 2710 case INDEX_op_ext16s_i64: 2711 case INDEX_op_ext32s_i64: 2712 case INDEX_op_ext8u_i64: 2713 case INDEX_op_ext16u_i64: 2714 case INDEX_op_ext32u_i64: 2715 case INDEX_op_ext_i32_i64: 2716 case INDEX_op_extu_i32_i64: 2717 case INDEX_op_extract_i32: 2718 case INDEX_op_extract_i64: 2719 case INDEX_op_sextract_i32: 2720 case INDEX_op_sextract_i64: 2721 return C_O1_I1(r, r); 2722 2723 case INDEX_op_st8_i32: 2724 case INDEX_op_st16_i32: 2725 case INDEX_op_st_i32: 2726 case INDEX_op_st8_i64: 2727 case INDEX_op_st16_i64: 2728 case INDEX_op_st32_i64: 2729 case INDEX_op_st_i64: 2730 return C_O0_I2(rZ, r); 2731 2732 case INDEX_op_add_i32: 2733 case INDEX_op_add_i64: 2734 case INDEX_op_sub_i32: 2735 case INDEX_op_sub_i64: 2736 case INDEX_op_setcond_i32: 2737 case INDEX_op_setcond_i64: 2738 return C_O1_I2(r, r, rA); 2739 2740 case INDEX_op_mul_i32: 2741 case INDEX_op_mul_i64: 2742 case INDEX_op_div_i32: 2743 case INDEX_op_div_i64: 2744 case INDEX_op_divu_i32: 2745 case INDEX_op_divu_i64: 2746 case INDEX_op_rem_i32: 2747 case INDEX_op_rem_i64: 2748 case INDEX_op_remu_i32: 2749 case INDEX_op_remu_i64: 2750 case INDEX_op_muluh_i64: 2751 case INDEX_op_mulsh_i64: 2752 return C_O1_I2(r, r, r); 2753 2754 case INDEX_op_and_i32: 2755 case INDEX_op_and_i64: 2756 case INDEX_op_or_i32: 2757 case INDEX_op_or_i64: 2758 case INDEX_op_xor_i32: 2759 case INDEX_op_xor_i64: 2760 case INDEX_op_andc_i32: 2761 case INDEX_op_andc_i64: 2762 case INDEX_op_orc_i32: 2763 case INDEX_op_orc_i64: 2764 case INDEX_op_eqv_i32: 2765 case INDEX_op_eqv_i64: 2766 return C_O1_I2(r, r, rL); 2767 2768 case INDEX_op_shl_i32: 2769 case INDEX_op_shr_i32: 2770 case INDEX_op_sar_i32: 2771 case INDEX_op_rotl_i32: 2772 case INDEX_op_rotr_i32: 2773 case INDEX_op_shl_i64: 2774 case INDEX_op_shr_i64: 2775 case INDEX_op_sar_i64: 2776 case INDEX_op_rotl_i64: 2777 case INDEX_op_rotr_i64: 2778 return C_O1_I2(r, r, ri); 2779 2780 case INDEX_op_clz_i32: 2781 case INDEX_op_ctz_i32: 2782 case INDEX_op_clz_i64: 2783 case INDEX_op_ctz_i64: 2784 return C_O1_I2(r, r, rAL); 2785 2786 case INDEX_op_brcond_i32: 2787 case INDEX_op_brcond_i64: 2788 return C_O0_I2(r, rA); 2789 2790 case INDEX_op_movcond_i32: 2791 case INDEX_op_movcond_i64: 2792 return C_O1_I4(r, r, rA, rZ, rZ); 2793 2794 case INDEX_op_qemu_ld_i32: 2795 case INDEX_op_qemu_ld_i64: 2796 return C_O1_I1(r, l); 2797 case INDEX_op_qemu_st_i32: 2798 case INDEX_op_qemu_st_i64: 2799 return C_O0_I2(lZ, l); 2800 2801 case INDEX_op_deposit_i32: 2802 case INDEX_op_deposit_i64: 2803 return C_O1_I2(r, 0, rZ); 2804 2805 case INDEX_op_extract2_i32: 2806 case INDEX_op_extract2_i64: 2807 return C_O1_I2(r, rZ, rZ); 2808 2809 case INDEX_op_add2_i32: 2810 case INDEX_op_add2_i64: 2811 case INDEX_op_sub2_i32: 2812 case INDEX_op_sub2_i64: 2813 return C_O2_I4(r, r, rZ, rZ, rA, rMZ); 2814 2815 case INDEX_op_add_vec: 2816 case INDEX_op_sub_vec: 2817 case INDEX_op_mul_vec: 2818 case INDEX_op_xor_vec: 2819 case INDEX_op_ssadd_vec: 2820 case INDEX_op_sssub_vec: 2821 case INDEX_op_usadd_vec: 2822 case INDEX_op_ussub_vec: 2823 case INDEX_op_smax_vec: 2824 case INDEX_op_smin_vec: 2825 case INDEX_op_umax_vec: 2826 case INDEX_op_umin_vec: 2827 case INDEX_op_shlv_vec: 2828 case INDEX_op_shrv_vec: 2829 case INDEX_op_sarv_vec: 2830 case INDEX_op_aa64_sshl_vec: 2831 return C_O1_I2(w, w, w); 2832 case INDEX_op_not_vec: 2833 case INDEX_op_neg_vec: 2834 case INDEX_op_abs_vec: 2835 case INDEX_op_shli_vec: 2836 case INDEX_op_shri_vec: 2837 case INDEX_op_sari_vec: 2838 return C_O1_I1(w, w); 2839 case INDEX_op_ld_vec: 2840 case INDEX_op_dupm_vec: 2841 return C_O1_I1(w, r); 2842 case INDEX_op_st_vec: 2843 return C_O0_I2(w, r); 2844 case INDEX_op_dup_vec: 2845 return C_O1_I1(w, wr); 2846 case INDEX_op_or_vec: 2847 case INDEX_op_andc_vec: 2848 return C_O1_I2(w, w, wO); 2849 case INDEX_op_and_vec: 2850 case INDEX_op_orc_vec: 2851 return C_O1_I2(w, w, wN); 2852 case INDEX_op_cmp_vec: 2853 return C_O1_I2(w, w, wZ); 2854 case INDEX_op_bitsel_vec: 2855 return C_O1_I3(w, w, w, w); 2856 case INDEX_op_aa64_sli_vec: 2857 return C_O1_I2(w, 0, w); 2858 2859 default: 2860 g_assert_not_reached(); 2861 } 2862} 2863 2864static void tcg_target_init(TCGContext *s) 2865{ 2866 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu; 2867 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu; 2868 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 2869 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 2870 2871 tcg_target_call_clobber_regs = -1ull; 2872 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19); 2873 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20); 2874 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21); 2875 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22); 2876 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23); 2877 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24); 2878 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25); 2879 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26); 2880 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27); 2881 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28); 2882 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29); 2883 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 2884 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 2885 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 2886 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 2887 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 2888 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 2889 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 2890 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 2891 2892 s->reserved_regs = 0; 2893 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 2894 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); 2895 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); 2896 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ 2897 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP); 2898} 2899 2900/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ 2901#define PUSH_SIZE ((30 - 19 + 1) * 8) 2902 2903#define FRAME_SIZE \ 2904 ((PUSH_SIZE \ 2905 + TCG_STATIC_CALL_ARGS_SIZE \ 2906 + CPU_TEMP_BUF_NLONGS * sizeof(long) \ 2907 + TCG_TARGET_STACK_ALIGN - 1) \ 2908 & ~(TCG_TARGET_STACK_ALIGN - 1)) 2909 2910/* We're expecting a 2 byte uleb128 encoded value. */ 2911QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 2912 2913/* We're expecting to use a single ADDI insn. */ 2914QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); 2915 2916static void tcg_target_qemu_prologue(TCGContext *s) 2917{ 2918 TCGReg r; 2919 2920 /* Push (FP, LR) and allocate space for all saved registers. */ 2921 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, 2922 TCG_REG_SP, -PUSH_SIZE, 1, 1); 2923 2924 /* Set up frame pointer for canonical unwinding. */ 2925 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); 2926 2927 /* Store callee-preserved regs x19..x28. */ 2928 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 2929 int ofs = (r - TCG_REG_X19 + 2) * 8; 2930 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 2931 } 2932 2933 /* Make stack space for TCG locals. */ 2934 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 2935 FRAME_SIZE - PUSH_SIZE); 2936 2937 /* Inform TCG about how to find TCG locals with register, offset, size. */ 2938 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, 2939 CPU_TEMP_BUF_NLONGS * sizeof(long)); 2940 2941#if !defined(CONFIG_SOFTMMU) 2942 if (USE_GUEST_BASE) { 2943 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); 2944 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); 2945 } 2946#endif 2947 2948 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 2949 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); 2950 2951 /* 2952 * Return path for goto_ptr. Set return value to 0, a-la exit_tb, 2953 * and fall through to the rest of the epilogue. 2954 */ 2955 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 2956 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0); 2957 2958 /* TB epilogue */ 2959 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); 2960 2961 /* Remove TCG locals stack space. */ 2962 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 2963 FRAME_SIZE - PUSH_SIZE); 2964 2965 /* Restore registers x19..x28. */ 2966 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 2967 int ofs = (r - TCG_REG_X19 + 2) * 8; 2968 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 2969 } 2970 2971 /* Pop (FP, LR), restore SP to previous frame. */ 2972 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, 2973 TCG_REG_SP, PUSH_SIZE, 0, 1); 2974 tcg_out_insn(s, 3207, RET, TCG_REG_LR); 2975} 2976 2977static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 2978{ 2979 int i; 2980 for (i = 0; i < count; ++i) { 2981 p[i] = NOP; 2982 } 2983} 2984 2985typedef struct { 2986 DebugFrameHeader h; 2987 uint8_t fde_def_cfa[4]; 2988 uint8_t fde_reg_ofs[24]; 2989} DebugFrame; 2990 2991#define ELF_HOST_MACHINE EM_AARCH64 2992 2993static const DebugFrame debug_frame = { 2994 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 2995 .h.cie.id = -1, 2996 .h.cie.version = 1, 2997 .h.cie.code_align = 1, 2998 .h.cie.data_align = 0x78, /* sleb128 -8 */ 2999 .h.cie.return_column = TCG_REG_LR, 3000 3001 /* Total FDE size does not include the "len" member. */ 3002 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 3003 3004 .fde_def_cfa = { 3005 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 3006 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 3007 (FRAME_SIZE >> 7) 3008 }, 3009 .fde_reg_ofs = { 3010 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ 3011 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ 3012 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ 3013 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ 3014 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ 3015 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ 3016 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ 3017 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ 3018 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ 3019 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ 3020 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ 3021 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ 3022 } 3023}; 3024 3025void tcg_register_jit(const void *buf, size_t buf_size) 3026{ 3027 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 3028} 3029