1/* 2 * Initial TCG Implementation for aarch64 3 * 4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH 5 * Written by Claudio Fontana 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or 8 * (at your option) any later version. 9 * 10 * See the COPYING file in the top-level directory for details. 11 */ 12 13#include "../tcg-ldst.c.inc" 14#include "../tcg-pool.c.inc" 15#include "qemu/bitops.h" 16#ifdef __linux__ 17#include <asm/hwcap.h> 18#endif 19#ifdef CONFIG_DARWIN 20#include <sys/sysctl.h> 21#endif 22 23/* We're going to re-use TCGType in setting of the SF bit, which controls 24 the size of the operation performed. If we know the values match, it 25 makes things much cleaner. */ 26QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); 27 28#ifdef CONFIG_DEBUG_TCG 29static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 30 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 31 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 32 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 33 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp", 34 35 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 36 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 37 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 38 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31", 39}; 40#endif /* CONFIG_DEBUG_TCG */ 41 42static const int tcg_target_reg_alloc_order[] = { 43 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, 44 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, 45 TCG_REG_X28, /* we will reserve this for guest_base if configured */ 46 47 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, 48 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, 49 TCG_REG_X16, TCG_REG_X17, 50 51 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 52 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, 53 54 /* X18 reserved by system */ 55 /* X19 reserved for AREG0 */ 56 /* X29 reserved as fp */ 57 /* X30 reserved as temporary */ 58 59 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, 60 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, 61 /* V8 - V15 are call-saved, and skipped. */ 62 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, 63 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, 64 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, 65 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, 66}; 67 68static const int tcg_target_call_iarg_regs[8] = { 69 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 70 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 71}; 72 73static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) 74{ 75 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); 76 tcg_debug_assert(slot >= 0 && slot <= 1); 77 return TCG_REG_X0 + slot; 78} 79 80bool have_lse; 81bool have_lse2; 82 83#define TCG_REG_TMP TCG_REG_X30 84#define TCG_VEC_TMP TCG_REG_V31 85 86#ifndef CONFIG_SOFTMMU 87#define TCG_REG_GUEST_BASE TCG_REG_X28 88#endif 89 90static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 91{ 92 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 93 ptrdiff_t offset = target - src_rx; 94 95 if (offset == sextract64(offset, 0, 26)) { 96 /* read instruction, mask away previous PC_REL26 parameter contents, 97 set the proper offset, then write back the instruction. */ 98 *src_rw = deposit32(*src_rw, 0, 26, offset); 99 return true; 100 } 101 return false; 102} 103 104static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 105{ 106 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 107 ptrdiff_t offset = target - src_rx; 108 109 if (offset == sextract64(offset, 0, 19)) { 110 *src_rw = deposit32(*src_rw, 5, 19, offset); 111 return true; 112 } 113 return false; 114} 115 116static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 117 intptr_t value, intptr_t addend) 118{ 119 tcg_debug_assert(addend == 0); 120 switch (type) { 121 case R_AARCH64_JUMP26: 122 case R_AARCH64_CALL26: 123 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value); 124 case R_AARCH64_CONDBR19: 125 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value); 126 default: 127 g_assert_not_reached(); 128 } 129} 130 131#define TCG_CT_CONST_AIMM 0x100 132#define TCG_CT_CONST_LIMM 0x200 133#define TCG_CT_CONST_ZERO 0x400 134#define TCG_CT_CONST_MONE 0x800 135#define TCG_CT_CONST_ORRI 0x1000 136#define TCG_CT_CONST_ANDI 0x2000 137 138#define ALL_GENERAL_REGS 0xffffffffu 139#define ALL_VECTOR_REGS 0xffffffff00000000ull 140 141#ifdef CONFIG_SOFTMMU 142#define ALL_QLDST_REGS \ 143 (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \ 144 (1 << TCG_REG_X2) | (1 << TCG_REG_X3))) 145#else 146#define ALL_QLDST_REGS ALL_GENERAL_REGS 147#endif 148 149/* Match a constant valid for addition (12-bit, optionally shifted). */ 150static inline bool is_aimm(uint64_t val) 151{ 152 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; 153} 154 155/* Match a constant valid for logical operations. */ 156static inline bool is_limm(uint64_t val) 157{ 158 /* Taking a simplified view of the logical immediates for now, ignoring 159 the replication that can happen across the field. Match bit patterns 160 of the forms 161 0....01....1 162 0..01..10..0 163 and their inverses. */ 164 165 /* Make things easier below, by testing the form with msb clear. */ 166 if ((int64_t)val < 0) { 167 val = ~val; 168 } 169 if (val == 0) { 170 return false; 171 } 172 val += val & -val; 173 return (val & (val - 1)) == 0; 174} 175 176/* Return true if v16 is a valid 16-bit shifted immediate. */ 177static bool is_shimm16(uint16_t v16, int *cmode, int *imm8) 178{ 179 if (v16 == (v16 & 0xff)) { 180 *cmode = 0x8; 181 *imm8 = v16 & 0xff; 182 return true; 183 } else if (v16 == (v16 & 0xff00)) { 184 *cmode = 0xa; 185 *imm8 = v16 >> 8; 186 return true; 187 } 188 return false; 189} 190 191/* Return true if v32 is a valid 32-bit shifted immediate. */ 192static bool is_shimm32(uint32_t v32, int *cmode, int *imm8) 193{ 194 if (v32 == (v32 & 0xff)) { 195 *cmode = 0x0; 196 *imm8 = v32 & 0xff; 197 return true; 198 } else if (v32 == (v32 & 0xff00)) { 199 *cmode = 0x2; 200 *imm8 = (v32 >> 8) & 0xff; 201 return true; 202 } else if (v32 == (v32 & 0xff0000)) { 203 *cmode = 0x4; 204 *imm8 = (v32 >> 16) & 0xff; 205 return true; 206 } else if (v32 == (v32 & 0xff000000)) { 207 *cmode = 0x6; 208 *imm8 = v32 >> 24; 209 return true; 210 } 211 return false; 212} 213 214/* Return true if v32 is a valid 32-bit shifting ones immediate. */ 215static bool is_soimm32(uint32_t v32, int *cmode, int *imm8) 216{ 217 if ((v32 & 0xffff00ff) == 0xff) { 218 *cmode = 0xc; 219 *imm8 = (v32 >> 8) & 0xff; 220 return true; 221 } else if ((v32 & 0xff00ffff) == 0xffff) { 222 *cmode = 0xd; 223 *imm8 = (v32 >> 16) & 0xff; 224 return true; 225 } 226 return false; 227} 228 229/* Return true if v32 is a valid float32 immediate. */ 230static bool is_fimm32(uint32_t v32, int *cmode, int *imm8) 231{ 232 if (extract32(v32, 0, 19) == 0 233 && (extract32(v32, 25, 6) == 0x20 234 || extract32(v32, 25, 6) == 0x1f)) { 235 *cmode = 0xf; 236 *imm8 = (extract32(v32, 31, 1) << 7) 237 | (extract32(v32, 25, 1) << 6) 238 | extract32(v32, 19, 6); 239 return true; 240 } 241 return false; 242} 243 244/* Return true if v64 is a valid float64 immediate. */ 245static bool is_fimm64(uint64_t v64, int *cmode, int *imm8) 246{ 247 if (extract64(v64, 0, 48) == 0 248 && (extract64(v64, 54, 9) == 0x100 249 || extract64(v64, 54, 9) == 0x0ff)) { 250 *cmode = 0xf; 251 *imm8 = (extract64(v64, 63, 1) << 7) 252 | (extract64(v64, 54, 1) << 6) 253 | extract64(v64, 48, 6); 254 return true; 255 } 256 return false; 257} 258 259/* 260 * Return non-zero if v32 can be formed by MOVI+ORR. 261 * Place the parameters for MOVI in (cmode, imm8). 262 * Return the cmode for ORR; the imm8 can be had via extraction from v32. 263 */ 264static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8) 265{ 266 int i; 267 268 for (i = 6; i > 0; i -= 2) { 269 /* Mask out one byte we can add with ORR. */ 270 uint32_t tmp = v32 & ~(0xffu << (i * 4)); 271 if (is_shimm32(tmp, cmode, imm8) || 272 is_soimm32(tmp, cmode, imm8)) { 273 break; 274 } 275 } 276 return i; 277} 278 279/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */ 280static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8) 281{ 282 if (v32 == deposit32(v32, 16, 16, v32)) { 283 return is_shimm16(v32, cmode, imm8); 284 } else { 285 return is_shimm32(v32, cmode, imm8); 286 } 287} 288 289static bool tcg_target_const_match(int64_t val, TCGType type, int ct) 290{ 291 if (ct & TCG_CT_CONST) { 292 return 1; 293 } 294 if (type == TCG_TYPE_I32) { 295 val = (int32_t)val; 296 } 297 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { 298 return 1; 299 } 300 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { 301 return 1; 302 } 303 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 304 return 1; 305 } 306 if ((ct & TCG_CT_CONST_MONE) && val == -1) { 307 return 1; 308 } 309 310 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) { 311 case 0: 312 break; 313 case TCG_CT_CONST_ANDI: 314 val = ~val; 315 /* fallthru */ 316 case TCG_CT_CONST_ORRI: 317 if (val == deposit64(val, 32, 32, val)) { 318 int cmode, imm8; 319 return is_shimm1632(val, &cmode, &imm8); 320 } 321 break; 322 default: 323 /* Both bits should not be set for the same insn. */ 324 g_assert_not_reached(); 325 } 326 327 return 0; 328} 329 330enum aarch64_cond_code { 331 COND_EQ = 0x0, 332 COND_NE = 0x1, 333 COND_CS = 0x2, /* Unsigned greater or equal */ 334 COND_HS = COND_CS, /* ALIAS greater or equal */ 335 COND_CC = 0x3, /* Unsigned less than */ 336 COND_LO = COND_CC, /* ALIAS Lower */ 337 COND_MI = 0x4, /* Negative */ 338 COND_PL = 0x5, /* Zero or greater */ 339 COND_VS = 0x6, /* Overflow */ 340 COND_VC = 0x7, /* No overflow */ 341 COND_HI = 0x8, /* Unsigned greater than */ 342 COND_LS = 0x9, /* Unsigned less or equal */ 343 COND_GE = 0xa, 344 COND_LT = 0xb, 345 COND_GT = 0xc, 346 COND_LE = 0xd, 347 COND_AL = 0xe, 348 COND_NV = 0xf, /* behaves like COND_AL here */ 349}; 350 351static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { 352 [TCG_COND_EQ] = COND_EQ, 353 [TCG_COND_NE] = COND_NE, 354 [TCG_COND_LT] = COND_LT, 355 [TCG_COND_GE] = COND_GE, 356 [TCG_COND_LE] = COND_LE, 357 [TCG_COND_GT] = COND_GT, 358 /* unsigned */ 359 [TCG_COND_LTU] = COND_LO, 360 [TCG_COND_GTU] = COND_HI, 361 [TCG_COND_GEU] = COND_HS, 362 [TCG_COND_LEU] = COND_LS, 363}; 364 365typedef enum { 366 LDST_ST = 0, /* store */ 367 LDST_LD = 1, /* load */ 368 LDST_LD_S_X = 2, /* load and sign-extend into Xt */ 369 LDST_LD_S_W = 3, /* load and sign-extend into Wt */ 370} AArch64LdstType; 371 372/* We encode the format of the insn into the beginning of the name, so that 373 we can have the preprocessor help "typecheck" the insn vs the output 374 function. Arm didn't provide us with nice names for the formats, so we 375 use the section number of the architecture reference manual in which the 376 instruction group is described. */ 377typedef enum { 378 /* Compare and branch (immediate). */ 379 I3201_CBZ = 0x34000000, 380 I3201_CBNZ = 0x35000000, 381 382 /* Conditional branch (immediate). */ 383 I3202_B_C = 0x54000000, 384 385 /* Unconditional branch (immediate). */ 386 I3206_B = 0x14000000, 387 I3206_BL = 0x94000000, 388 389 /* Unconditional branch (register). */ 390 I3207_BR = 0xd61f0000, 391 I3207_BLR = 0xd63f0000, 392 I3207_RET = 0xd65f0000, 393 394 /* AdvSIMD load/store single structure. */ 395 I3303_LD1R = 0x0d40c000, 396 397 /* Load literal for loading the address at pc-relative offset */ 398 I3305_LDR = 0x58000000, 399 I3305_LDR_v64 = 0x5c000000, 400 I3305_LDR_v128 = 0x9c000000, 401 402 /* Load/store register. Described here as 3.3.12, but the helper 403 that emits them can transform to 3.3.10 or 3.3.13. */ 404 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, 405 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, 406 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, 407 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, 408 409 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, 410 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, 411 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, 412 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, 413 414 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, 415 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, 416 417 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, 418 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, 419 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, 420 421 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30, 422 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30, 423 424 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30, 425 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30, 426 427 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30, 428 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30, 429 430 I3312_TO_I3310 = 0x00200800, 431 I3312_TO_I3313 = 0x01000000, 432 433 /* Load/store register pair instructions. */ 434 I3314_LDP = 0x28400000, 435 I3314_STP = 0x28000000, 436 437 /* Add/subtract immediate instructions. */ 438 I3401_ADDI = 0x11000000, 439 I3401_ADDSI = 0x31000000, 440 I3401_SUBI = 0x51000000, 441 I3401_SUBSI = 0x71000000, 442 443 /* Bitfield instructions. */ 444 I3402_BFM = 0x33000000, 445 I3402_SBFM = 0x13000000, 446 I3402_UBFM = 0x53000000, 447 448 /* Extract instruction. */ 449 I3403_EXTR = 0x13800000, 450 451 /* Logical immediate instructions. */ 452 I3404_ANDI = 0x12000000, 453 I3404_ORRI = 0x32000000, 454 I3404_EORI = 0x52000000, 455 I3404_ANDSI = 0x72000000, 456 457 /* Move wide immediate instructions. */ 458 I3405_MOVN = 0x12800000, 459 I3405_MOVZ = 0x52800000, 460 I3405_MOVK = 0x72800000, 461 462 /* PC relative addressing instructions. */ 463 I3406_ADR = 0x10000000, 464 I3406_ADRP = 0x90000000, 465 466 /* Add/subtract shifted register instructions (without a shift). */ 467 I3502_ADD = 0x0b000000, 468 I3502_ADDS = 0x2b000000, 469 I3502_SUB = 0x4b000000, 470 I3502_SUBS = 0x6b000000, 471 472 /* Add/subtract shifted register instructions (with a shift). */ 473 I3502S_ADD_LSL = I3502_ADD, 474 475 /* Add/subtract with carry instructions. */ 476 I3503_ADC = 0x1a000000, 477 I3503_SBC = 0x5a000000, 478 479 /* Conditional select instructions. */ 480 I3506_CSEL = 0x1a800000, 481 I3506_CSINC = 0x1a800400, 482 I3506_CSINV = 0x5a800000, 483 I3506_CSNEG = 0x5a800400, 484 485 /* Data-processing (1 source) instructions. */ 486 I3507_CLZ = 0x5ac01000, 487 I3507_RBIT = 0x5ac00000, 488 I3507_REV = 0x5ac00000, /* + size << 10 */ 489 490 /* Data-processing (2 source) instructions. */ 491 I3508_LSLV = 0x1ac02000, 492 I3508_LSRV = 0x1ac02400, 493 I3508_ASRV = 0x1ac02800, 494 I3508_RORV = 0x1ac02c00, 495 I3508_SMULH = 0x9b407c00, 496 I3508_UMULH = 0x9bc07c00, 497 I3508_UDIV = 0x1ac00800, 498 I3508_SDIV = 0x1ac00c00, 499 500 /* Data-processing (3 source) instructions. */ 501 I3509_MADD = 0x1b000000, 502 I3509_MSUB = 0x1b008000, 503 504 /* Logical shifted register instructions (without a shift). */ 505 I3510_AND = 0x0a000000, 506 I3510_BIC = 0x0a200000, 507 I3510_ORR = 0x2a000000, 508 I3510_ORN = 0x2a200000, 509 I3510_EOR = 0x4a000000, 510 I3510_EON = 0x4a200000, 511 I3510_ANDS = 0x6a000000, 512 513 /* Logical shifted register instructions (with a shift). */ 514 I3502S_AND_LSR = I3510_AND | (1 << 22), 515 516 /* AdvSIMD copy */ 517 I3605_DUP = 0x0e000400, 518 I3605_INS = 0x4e001c00, 519 I3605_UMOV = 0x0e003c00, 520 521 /* AdvSIMD modified immediate */ 522 I3606_MOVI = 0x0f000400, 523 I3606_MVNI = 0x2f000400, 524 I3606_BIC = 0x2f001400, 525 I3606_ORR = 0x0f001400, 526 527 /* AdvSIMD scalar shift by immediate */ 528 I3609_SSHR = 0x5f000400, 529 I3609_SSRA = 0x5f001400, 530 I3609_SHL = 0x5f005400, 531 I3609_USHR = 0x7f000400, 532 I3609_USRA = 0x7f001400, 533 I3609_SLI = 0x7f005400, 534 535 /* AdvSIMD scalar three same */ 536 I3611_SQADD = 0x5e200c00, 537 I3611_SQSUB = 0x5e202c00, 538 I3611_CMGT = 0x5e203400, 539 I3611_CMGE = 0x5e203c00, 540 I3611_SSHL = 0x5e204400, 541 I3611_ADD = 0x5e208400, 542 I3611_CMTST = 0x5e208c00, 543 I3611_UQADD = 0x7e200c00, 544 I3611_UQSUB = 0x7e202c00, 545 I3611_CMHI = 0x7e203400, 546 I3611_CMHS = 0x7e203c00, 547 I3611_USHL = 0x7e204400, 548 I3611_SUB = 0x7e208400, 549 I3611_CMEQ = 0x7e208c00, 550 551 /* AdvSIMD scalar two-reg misc */ 552 I3612_CMGT0 = 0x5e208800, 553 I3612_CMEQ0 = 0x5e209800, 554 I3612_CMLT0 = 0x5e20a800, 555 I3612_ABS = 0x5e20b800, 556 I3612_CMGE0 = 0x7e208800, 557 I3612_CMLE0 = 0x7e209800, 558 I3612_NEG = 0x7e20b800, 559 560 /* AdvSIMD shift by immediate */ 561 I3614_SSHR = 0x0f000400, 562 I3614_SSRA = 0x0f001400, 563 I3614_SHL = 0x0f005400, 564 I3614_SLI = 0x2f005400, 565 I3614_USHR = 0x2f000400, 566 I3614_USRA = 0x2f001400, 567 568 /* AdvSIMD three same. */ 569 I3616_ADD = 0x0e208400, 570 I3616_AND = 0x0e201c00, 571 I3616_BIC = 0x0e601c00, 572 I3616_BIF = 0x2ee01c00, 573 I3616_BIT = 0x2ea01c00, 574 I3616_BSL = 0x2e601c00, 575 I3616_EOR = 0x2e201c00, 576 I3616_MUL = 0x0e209c00, 577 I3616_ORR = 0x0ea01c00, 578 I3616_ORN = 0x0ee01c00, 579 I3616_SUB = 0x2e208400, 580 I3616_CMGT = 0x0e203400, 581 I3616_CMGE = 0x0e203c00, 582 I3616_CMTST = 0x0e208c00, 583 I3616_CMHI = 0x2e203400, 584 I3616_CMHS = 0x2e203c00, 585 I3616_CMEQ = 0x2e208c00, 586 I3616_SMAX = 0x0e206400, 587 I3616_SMIN = 0x0e206c00, 588 I3616_SSHL = 0x0e204400, 589 I3616_SQADD = 0x0e200c00, 590 I3616_SQSUB = 0x0e202c00, 591 I3616_UMAX = 0x2e206400, 592 I3616_UMIN = 0x2e206c00, 593 I3616_UQADD = 0x2e200c00, 594 I3616_UQSUB = 0x2e202c00, 595 I3616_USHL = 0x2e204400, 596 597 /* AdvSIMD two-reg misc. */ 598 I3617_CMGT0 = 0x0e208800, 599 I3617_CMEQ0 = 0x0e209800, 600 I3617_CMLT0 = 0x0e20a800, 601 I3617_CMGE0 = 0x2e208800, 602 I3617_CMLE0 = 0x2e209800, 603 I3617_NOT = 0x2e205800, 604 I3617_ABS = 0x0e20b800, 605 I3617_NEG = 0x2e20b800, 606 607 /* System instructions. */ 608 NOP = 0xd503201f, 609 DMB_ISH = 0xd50338bf, 610 DMB_LD = 0x00000100, 611 DMB_ST = 0x00000200, 612} AArch64Insn; 613 614static inline uint32_t tcg_in32(TCGContext *s) 615{ 616 uint32_t v = *(uint32_t *)s->code_ptr; 617 return v; 618} 619 620/* Emit an opcode with "type-checking" of the format. */ 621#define tcg_out_insn(S, FMT, OP, ...) \ 622 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) 623 624static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q, 625 TCGReg rt, TCGReg rn, unsigned size) 626{ 627 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30)); 628} 629 630static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, 631 int imm19, TCGReg rt) 632{ 633 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt); 634} 635 636static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, 637 TCGReg rt, int imm19) 638{ 639 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); 640} 641 642static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, 643 TCGCond c, int imm19) 644{ 645 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); 646} 647 648static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) 649{ 650 tcg_out32(s, insn | (imm26 & 0x03ffffff)); 651} 652 653static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) 654{ 655 tcg_out32(s, insn | rn << 5); 656} 657 658static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, 659 TCGReg r1, TCGReg r2, TCGReg rn, 660 tcg_target_long ofs, bool pre, bool w) 661{ 662 insn |= 1u << 31; /* ext */ 663 insn |= pre << 24; 664 insn |= w << 23; 665 666 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); 667 insn |= (ofs & (0x7f << 3)) << (15 - 3); 668 669 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); 670} 671 672static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, 673 TCGReg rd, TCGReg rn, uint64_t aimm) 674{ 675 if (aimm > 0xfff) { 676 tcg_debug_assert((aimm & 0xfff) == 0); 677 aimm >>= 12; 678 tcg_debug_assert(aimm <= 0xfff); 679 aimm |= 1 << 12; /* apply LSL 12 */ 680 } 681 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); 682} 683 684/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 685 (Logical immediate). Both insn groups have N, IMMR and IMMS fields 686 that feed the DecodeBitMasks pseudo function. */ 687static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, 688 TCGReg rd, TCGReg rn, int n, int immr, int imms) 689{ 690 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 691 | rn << 5 | rd); 692} 693 694#define tcg_out_insn_3404 tcg_out_insn_3402 695 696static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, 697 TCGReg rd, TCGReg rn, TCGReg rm, int imms) 698{ 699 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 700 | rn << 5 | rd); 701} 702 703/* This function is used for the Move (wide immediate) instruction group. 704 Note that SHIFT is a full shift count, not the 2 bit HW field. */ 705static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, 706 TCGReg rd, uint16_t half, unsigned shift) 707{ 708 tcg_debug_assert((shift & ~0x30) == 0); 709 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); 710} 711 712static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, 713 TCGReg rd, int64_t disp) 714{ 715 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); 716} 717 718/* This function is for both 3.5.2 (Add/Subtract shifted register), for 719 the rare occasion when we actually want to supply a shift amount. */ 720static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, 721 TCGType ext, TCGReg rd, TCGReg rn, 722 TCGReg rm, int imm6) 723{ 724 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); 725} 726 727/* This function is for 3.5.2 (Add/subtract shifted register), 728 and 3.5.10 (Logical shifted register), for the vast majorty of cases 729 when we don't want to apply a shift. Thus it can also be used for 730 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */ 731static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, 732 TCGReg rd, TCGReg rn, TCGReg rm) 733{ 734 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); 735} 736 737#define tcg_out_insn_3503 tcg_out_insn_3502 738#define tcg_out_insn_3508 tcg_out_insn_3502 739#define tcg_out_insn_3510 tcg_out_insn_3502 740 741static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, 742 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) 743{ 744 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd 745 | tcg_cond_to_aarch64[c] << 12); 746} 747 748static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, 749 TCGReg rd, TCGReg rn) 750{ 751 tcg_out32(s, insn | ext << 31 | rn << 5 | rd); 752} 753 754static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, 755 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) 756{ 757 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); 758} 759 760static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q, 761 TCGReg rd, TCGReg rn, int dst_idx, int src_idx) 762{ 763 /* Note that bit 11 set means general register input. Therefore 764 we can handle both register sets with one function. */ 765 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11) 766 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5); 767} 768 769static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q, 770 TCGReg rd, bool op, int cmode, uint8_t imm8) 771{ 772 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f) 773 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5); 774} 775 776static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn, 777 TCGReg rd, TCGReg rn, unsigned immhb) 778{ 779 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f)); 780} 781 782static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn, 783 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 784{ 785 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16 786 | (rn & 0x1f) << 5 | (rd & 0x1f)); 787} 788 789static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn, 790 unsigned size, TCGReg rd, TCGReg rn) 791{ 792 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f)); 793} 794 795static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q, 796 TCGReg rd, TCGReg rn, unsigned immhb) 797{ 798 tcg_out32(s, insn | q << 30 | immhb << 16 799 | (rn & 0x1f) << 5 | (rd & 0x1f)); 800} 801 802static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q, 803 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 804{ 805 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16 806 | (rn & 0x1f) << 5 | (rd & 0x1f)); 807} 808 809static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q, 810 unsigned size, TCGReg rd, TCGReg rn) 811{ 812 tcg_out32(s, insn | q << 30 | (size << 22) 813 | (rn & 0x1f) << 5 | (rd & 0x1f)); 814} 815 816static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, 817 TCGReg rd, TCGReg base, TCGType ext, 818 TCGReg regoff) 819{ 820 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 821 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | 822 0x4000 | ext << 13 | base << 5 | (rd & 0x1f)); 823} 824 825static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, 826 TCGReg rd, TCGReg rn, intptr_t offset) 827{ 828 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f)); 829} 830 831static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, 832 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) 833{ 834 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 835 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 836 | rn << 5 | (rd & 0x1f)); 837} 838 839/* Register to register move using ORR (shifted register with no shift). */ 840static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) 841{ 842 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); 843} 844 845/* Register to register move using ADDI (move to/from SP). */ 846static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) 847{ 848 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); 849} 850 851/* This function is used for the Logical (immediate) instruction group. 852 The value of LIMM must satisfy IS_LIMM. See the comment above about 853 only supporting simplified logical immediates. */ 854static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, 855 TCGReg rd, TCGReg rn, uint64_t limm) 856{ 857 unsigned h, l, r, c; 858 859 tcg_debug_assert(is_limm(limm)); 860 861 h = clz64(limm); 862 l = ctz64(limm); 863 if (l == 0) { 864 r = 0; /* form 0....01....1 */ 865 c = ctz64(~limm) - 1; 866 if (h == 0) { 867 r = clz64(~limm); /* form 1..10..01..1 */ 868 c += r; 869 } 870 } else { 871 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ 872 c = r - h - 1; 873 } 874 if (ext == TCG_TYPE_I32) { 875 r &= 31; 876 c &= 31; 877 } 878 879 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); 880} 881 882static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 883 TCGReg rd, int64_t v64) 884{ 885 bool q = type == TCG_TYPE_V128; 886 int cmode, imm8, i; 887 888 /* Test all bytes equal first. */ 889 if (vece == MO_8) { 890 imm8 = (uint8_t)v64; 891 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8); 892 return; 893 } 894 895 /* 896 * Test all bytes 0x00 or 0xff second. This can match cases that 897 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below. 898 */ 899 for (i = imm8 = 0; i < 8; i++) { 900 uint8_t byte = v64 >> (i * 8); 901 if (byte == 0xff) { 902 imm8 |= 1 << i; 903 } else if (byte != 0) { 904 goto fail_bytes; 905 } 906 } 907 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8); 908 return; 909 fail_bytes: 910 911 /* 912 * Tests for various replications. For each element width, if we 913 * cannot find an expansion there's no point checking a larger 914 * width because we already know by replication it cannot match. 915 */ 916 if (vece == MO_16) { 917 uint16_t v16 = v64; 918 919 if (is_shimm16(v16, &cmode, &imm8)) { 920 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 921 return; 922 } 923 if (is_shimm16(~v16, &cmode, &imm8)) { 924 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 925 return; 926 } 927 928 /* 929 * Otherwise, all remaining constants can be loaded in two insns: 930 * rd = v16 & 0xff, rd |= v16 & 0xff00. 931 */ 932 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff); 933 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8); 934 return; 935 } else if (vece == MO_32) { 936 uint32_t v32 = v64; 937 uint32_t n32 = ~v32; 938 939 if (is_shimm32(v32, &cmode, &imm8) || 940 is_soimm32(v32, &cmode, &imm8) || 941 is_fimm32(v32, &cmode, &imm8)) { 942 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 943 return; 944 } 945 if (is_shimm32(n32, &cmode, &imm8) || 946 is_soimm32(n32, &cmode, &imm8)) { 947 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 948 return; 949 } 950 951 /* 952 * Restrict the set of constants to those we can load with 953 * two instructions. Others we load from the pool. 954 */ 955 i = is_shimm32_pair(v32, &cmode, &imm8); 956 if (i) { 957 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 958 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8)); 959 return; 960 } 961 i = is_shimm32_pair(n32, &cmode, &imm8); 962 if (i) { 963 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 964 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8)); 965 return; 966 } 967 } else if (is_fimm64(v64, &cmode, &imm8)) { 968 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8); 969 return; 970 } 971 972 /* 973 * As a last resort, load from the constant pool. Sadly there 974 * is no LD1R (literal), so store the full 16-byte vector. 975 */ 976 if (type == TCG_TYPE_V128) { 977 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64); 978 tcg_out_insn(s, 3305, LDR_v128, 0, rd); 979 } else { 980 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0); 981 tcg_out_insn(s, 3305, LDR_v64, 0, rd); 982 } 983} 984 985static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 986 TCGReg rd, TCGReg rs) 987{ 988 int is_q = type - TCG_TYPE_V64; 989 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0); 990 return true; 991} 992 993static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 994 TCGReg r, TCGReg base, intptr_t offset) 995{ 996 TCGReg temp = TCG_REG_TMP; 997 998 if (offset < -0xffffff || offset > 0xffffff) { 999 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset); 1000 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base); 1001 base = temp; 1002 } else { 1003 AArch64Insn add_insn = I3401_ADDI; 1004 1005 if (offset < 0) { 1006 add_insn = I3401_SUBI; 1007 offset = -offset; 1008 } 1009 if (offset & 0xfff000) { 1010 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000); 1011 base = temp; 1012 } 1013 if (offset & 0xfff) { 1014 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff); 1015 base = temp; 1016 } 1017 } 1018 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece); 1019 return true; 1020} 1021 1022static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 1023 tcg_target_long value) 1024{ 1025 tcg_target_long svalue = value; 1026 tcg_target_long ivalue = ~value; 1027 tcg_target_long t0, t1, t2; 1028 int s0, s1; 1029 AArch64Insn opc; 1030 1031 switch (type) { 1032 case TCG_TYPE_I32: 1033 case TCG_TYPE_I64: 1034 tcg_debug_assert(rd < 32); 1035 break; 1036 default: 1037 g_assert_not_reached(); 1038 } 1039 1040 /* For 32-bit values, discard potential garbage in value. For 64-bit 1041 values within [2**31, 2**32-1], we can create smaller sequences by 1042 interpreting this as a negative 32-bit number, while ensuring that 1043 the high 32 bits are cleared by setting SF=0. */ 1044 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { 1045 svalue = (int32_t)value; 1046 value = (uint32_t)value; 1047 ivalue = (uint32_t)ivalue; 1048 type = TCG_TYPE_I32; 1049 } 1050 1051 /* Speed things up by handling the common case of small positive 1052 and negative values specially. */ 1053 if ((value & ~0xffffull) == 0) { 1054 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); 1055 return; 1056 } else if ((ivalue & ~0xffffull) == 0) { 1057 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); 1058 return; 1059 } 1060 1061 /* Check for bitfield immediates. For the benefit of 32-bit quantities, 1062 use the sign-extended value. That lets us match rotated values such 1063 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ 1064 if (is_limm(svalue)) { 1065 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); 1066 return; 1067 } 1068 1069 /* Look for host pointer values within 4G of the PC. This happens 1070 often when loading pointers to QEMU's own data structures. */ 1071 if (type == TCG_TYPE_I64) { 1072 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr); 1073 tcg_target_long disp = value - src_rx; 1074 if (disp == sextract64(disp, 0, 21)) { 1075 tcg_out_insn(s, 3406, ADR, rd, disp); 1076 return; 1077 } 1078 disp = (value >> 12) - (src_rx >> 12); 1079 if (disp == sextract64(disp, 0, 21)) { 1080 tcg_out_insn(s, 3406, ADRP, rd, disp); 1081 if (value & 0xfff) { 1082 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); 1083 } 1084 return; 1085 } 1086 } 1087 1088 /* Would it take fewer insns to begin with MOVN? */ 1089 if (ctpop64(value) >= 32) { 1090 t0 = ivalue; 1091 opc = I3405_MOVN; 1092 } else { 1093 t0 = value; 1094 opc = I3405_MOVZ; 1095 } 1096 s0 = ctz64(t0) & (63 & -16); 1097 t1 = t0 & ~(0xffffull << s0); 1098 s1 = ctz64(t1) & (63 & -16); 1099 t2 = t1 & ~(0xffffull << s1); 1100 if (t2 == 0) { 1101 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0); 1102 if (t1 != 0) { 1103 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1); 1104 } 1105 return; 1106 } 1107 1108 /* For more than 2 insns, dump it into the constant pool. */ 1109 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0); 1110 tcg_out_insn(s, 3305, LDR, 0, rd); 1111} 1112 1113static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) 1114{ 1115 return false; 1116} 1117 1118static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, 1119 tcg_target_long imm) 1120{ 1121 /* This function is only used for passing structs by reference. */ 1122 g_assert_not_reached(); 1123} 1124 1125/* Define something more legible for general use. */ 1126#define tcg_out_ldst_r tcg_out_insn_3310 1127 1128static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd, 1129 TCGReg rn, intptr_t offset, int lgsize) 1130{ 1131 /* If the offset is naturally aligned and in range, then we can 1132 use the scaled uimm12 encoding */ 1133 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) { 1134 uintptr_t scaled_uimm = offset >> lgsize; 1135 if (scaled_uimm <= 0xfff) { 1136 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); 1137 return; 1138 } 1139 } 1140 1141 /* Small signed offsets can use the unscaled encoding. */ 1142 if (offset >= -256 && offset < 256) { 1143 tcg_out_insn_3312(s, insn, rd, rn, offset); 1144 return; 1145 } 1146 1147 /* Worst-case scenario, move offset to temp register, use reg offset. */ 1148 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset); 1149 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP); 1150} 1151 1152static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 1153{ 1154 if (ret == arg) { 1155 return true; 1156 } 1157 switch (type) { 1158 case TCG_TYPE_I32: 1159 case TCG_TYPE_I64: 1160 if (ret < 32 && arg < 32) { 1161 tcg_out_movr(s, type, ret, arg); 1162 break; 1163 } else if (ret < 32) { 1164 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0); 1165 break; 1166 } else if (arg < 32) { 1167 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0); 1168 break; 1169 } 1170 /* FALLTHRU */ 1171 1172 case TCG_TYPE_V64: 1173 tcg_debug_assert(ret >= 32 && arg >= 32); 1174 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg); 1175 break; 1176 case TCG_TYPE_V128: 1177 tcg_debug_assert(ret >= 32 && arg >= 32); 1178 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg); 1179 break; 1180 1181 default: 1182 g_assert_not_reached(); 1183 } 1184 return true; 1185} 1186 1187static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1188 TCGReg base, intptr_t ofs) 1189{ 1190 AArch64Insn insn; 1191 int lgsz; 1192 1193 switch (type) { 1194 case TCG_TYPE_I32: 1195 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS); 1196 lgsz = 2; 1197 break; 1198 case TCG_TYPE_I64: 1199 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD); 1200 lgsz = 3; 1201 break; 1202 case TCG_TYPE_V64: 1203 insn = I3312_LDRVD; 1204 lgsz = 3; 1205 break; 1206 case TCG_TYPE_V128: 1207 insn = I3312_LDRVQ; 1208 lgsz = 4; 1209 break; 1210 default: 1211 g_assert_not_reached(); 1212 } 1213 tcg_out_ldst(s, insn, ret, base, ofs, lgsz); 1214} 1215 1216static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src, 1217 TCGReg base, intptr_t ofs) 1218{ 1219 AArch64Insn insn; 1220 int lgsz; 1221 1222 switch (type) { 1223 case TCG_TYPE_I32: 1224 insn = (src < 32 ? I3312_STRW : I3312_STRVS); 1225 lgsz = 2; 1226 break; 1227 case TCG_TYPE_I64: 1228 insn = (src < 32 ? I3312_STRX : I3312_STRVD); 1229 lgsz = 3; 1230 break; 1231 case TCG_TYPE_V64: 1232 insn = I3312_STRVD; 1233 lgsz = 3; 1234 break; 1235 case TCG_TYPE_V128: 1236 insn = I3312_STRVQ; 1237 lgsz = 4; 1238 break; 1239 default: 1240 g_assert_not_reached(); 1241 } 1242 tcg_out_ldst(s, insn, src, base, ofs, lgsz); 1243} 1244 1245static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1246 TCGReg base, intptr_t ofs) 1247{ 1248 if (type <= TCG_TYPE_I64 && val == 0) { 1249 tcg_out_st(s, type, TCG_REG_XZR, base, ofs); 1250 return true; 1251 } 1252 return false; 1253} 1254 1255static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, 1256 TCGReg rn, unsigned int a, unsigned int b) 1257{ 1258 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); 1259} 1260 1261static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, 1262 TCGReg rn, unsigned int a, unsigned int b) 1263{ 1264 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); 1265} 1266 1267static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, 1268 TCGReg rn, unsigned int a, unsigned int b) 1269{ 1270 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); 1271} 1272 1273static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, 1274 TCGReg rn, TCGReg rm, unsigned int a) 1275{ 1276 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); 1277} 1278 1279static inline void tcg_out_shl(TCGContext *s, TCGType ext, 1280 TCGReg rd, TCGReg rn, unsigned int m) 1281{ 1282 int bits = ext ? 64 : 32; 1283 int max = bits - 1; 1284 tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max); 1285} 1286 1287static inline void tcg_out_shr(TCGContext *s, TCGType ext, 1288 TCGReg rd, TCGReg rn, unsigned int m) 1289{ 1290 int max = ext ? 63 : 31; 1291 tcg_out_ubfm(s, ext, rd, rn, m & max, max); 1292} 1293 1294static inline void tcg_out_sar(TCGContext *s, TCGType ext, 1295 TCGReg rd, TCGReg rn, unsigned int m) 1296{ 1297 int max = ext ? 63 : 31; 1298 tcg_out_sbfm(s, ext, rd, rn, m & max, max); 1299} 1300 1301static inline void tcg_out_rotr(TCGContext *s, TCGType ext, 1302 TCGReg rd, TCGReg rn, unsigned int m) 1303{ 1304 int max = ext ? 63 : 31; 1305 tcg_out_extr(s, ext, rd, rn, rn, m & max); 1306} 1307 1308static inline void tcg_out_rotl(TCGContext *s, TCGType ext, 1309 TCGReg rd, TCGReg rn, unsigned int m) 1310{ 1311 int max = ext ? 63 : 31; 1312 tcg_out_extr(s, ext, rd, rn, rn, -m & max); 1313} 1314 1315static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, 1316 TCGReg rn, unsigned lsb, unsigned width) 1317{ 1318 unsigned size = ext ? 64 : 32; 1319 unsigned a = (size - lsb) & (size - 1); 1320 unsigned b = width - 1; 1321 tcg_out_bfm(s, ext, rd, rn, a, b); 1322} 1323 1324static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, 1325 tcg_target_long b, bool const_b) 1326{ 1327 if (const_b) { 1328 /* Using CMP or CMN aliases. */ 1329 if (b >= 0) { 1330 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); 1331 } else { 1332 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); 1333 } 1334 } else { 1335 /* Using CMP alias SUBS wzr, Wn, Wm */ 1336 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); 1337 } 1338} 1339 1340static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) 1341{ 1342 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1343 tcg_debug_assert(offset == sextract64(offset, 0, 26)); 1344 tcg_out_insn(s, 3206, B, offset); 1345} 1346 1347static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target) 1348{ 1349 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1350 if (offset == sextract64(offset, 0, 26)) { 1351 tcg_out_insn(s, 3206, B, offset); 1352 } else { 1353 /* Choose X9 as a call-clobbered non-LR temporary. */ 1354 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target); 1355 tcg_out_insn(s, 3207, BR, TCG_REG_X9); 1356 } 1357} 1358 1359static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target) 1360{ 1361 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1362 if (offset == sextract64(offset, 0, 26)) { 1363 tcg_out_insn(s, 3206, BL, offset); 1364 } else { 1365 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); 1366 tcg_out_insn(s, 3207, BLR, TCG_REG_TMP); 1367 } 1368} 1369 1370static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 1371 const TCGHelperInfo *info) 1372{ 1373 tcg_out_call_int(s, target); 1374} 1375 1376static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) 1377{ 1378 if (!l->has_value) { 1379 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); 1380 tcg_out_insn(s, 3206, B, 0); 1381 } else { 1382 tcg_out_goto(s, l->u.value_ptr); 1383 } 1384} 1385 1386static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, 1387 TCGArg b, bool b_const, TCGLabel *l) 1388{ 1389 intptr_t offset; 1390 bool need_cmp; 1391 1392 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) { 1393 need_cmp = false; 1394 } else { 1395 need_cmp = true; 1396 tcg_out_cmp(s, ext, a, b, b_const); 1397 } 1398 1399 if (!l->has_value) { 1400 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1401 offset = tcg_in32(s) >> 5; 1402 } else { 1403 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2; 1404 tcg_debug_assert(offset == sextract64(offset, 0, 19)); 1405 } 1406 1407 if (need_cmp) { 1408 tcg_out_insn(s, 3202, B_C, c, offset); 1409 } else if (c == TCG_COND_EQ) { 1410 tcg_out_insn(s, 3201, CBZ, ext, a, offset); 1411 } else { 1412 tcg_out_insn(s, 3201, CBNZ, ext, a, offset); 1413 } 1414} 1415 1416static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits, 1417 TCGReg rd, TCGReg rn) 1418{ 1419 /* REV, REV16, REV32 */ 1420 tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn); 1421} 1422 1423static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits, 1424 TCGReg rd, TCGReg rn) 1425{ 1426 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ 1427 int bits = (8 << s_bits) - 1; 1428 tcg_out_sbfm(s, ext, rd, rn, 0, bits); 1429} 1430 1431static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn) 1432{ 1433 tcg_out_sxt(s, type, MO_8, rd, rn); 1434} 1435 1436static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn) 1437{ 1438 tcg_out_sxt(s, type, MO_16, rd, rn); 1439} 1440 1441static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn) 1442{ 1443 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn); 1444} 1445 1446static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn) 1447{ 1448 tcg_out_ext32s(s, rd, rn); 1449} 1450 1451static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits, 1452 TCGReg rd, TCGReg rn) 1453{ 1454 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ 1455 int bits = (8 << s_bits) - 1; 1456 tcg_out_ubfm(s, 0, rd, rn, 0, bits); 1457} 1458 1459static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn) 1460{ 1461 tcg_out_uxt(s, MO_8, rd, rn); 1462} 1463 1464static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn) 1465{ 1466 tcg_out_uxt(s, MO_16, rd, rn); 1467} 1468 1469static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn) 1470{ 1471 tcg_out_movr(s, TCG_TYPE_I32, rd, rn); 1472} 1473 1474static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn) 1475{ 1476 tcg_out_ext32u(s, rd, rn); 1477} 1478 1479static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) 1480{ 1481 tcg_out_mov(s, TCG_TYPE_I32, rd, rn); 1482} 1483 1484static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, 1485 TCGReg rn, int64_t aimm) 1486{ 1487 if (aimm >= 0) { 1488 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); 1489 } else { 1490 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); 1491 } 1492} 1493 1494static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, 1495 TCGReg rh, TCGReg al, TCGReg ah, 1496 tcg_target_long bl, tcg_target_long bh, 1497 bool const_bl, bool const_bh, bool sub) 1498{ 1499 TCGReg orig_rl = rl; 1500 AArch64Insn insn; 1501 1502 if (rl == ah || (!const_bh && rl == bh)) { 1503 rl = TCG_REG_TMP; 1504 } 1505 1506 if (const_bl) { 1507 if (bl < 0) { 1508 bl = -bl; 1509 insn = sub ? I3401_ADDSI : I3401_SUBSI; 1510 } else { 1511 insn = sub ? I3401_SUBSI : I3401_ADDSI; 1512 } 1513 1514 if (unlikely(al == TCG_REG_XZR)) { 1515 /* ??? We want to allow al to be zero for the benefit of 1516 negation via subtraction. However, that leaves open the 1517 possibility of adding 0+const in the low part, and the 1518 immediate add instructions encode XSP not XZR. Don't try 1519 anything more elaborate here than loading another zero. */ 1520 al = TCG_REG_TMP; 1521 tcg_out_movi(s, ext, al, 0); 1522 } 1523 tcg_out_insn_3401(s, insn, ext, rl, al, bl); 1524 } else { 1525 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); 1526 } 1527 1528 insn = I3503_ADC; 1529 if (const_bh) { 1530 /* Note that the only two constants we support are 0 and -1, and 1531 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ 1532 if ((bh != 0) ^ sub) { 1533 insn = I3503_SBC; 1534 } 1535 bh = TCG_REG_XZR; 1536 } else if (sub) { 1537 insn = I3503_SBC; 1538 } 1539 tcg_out_insn_3503(s, insn, ext, rh, ah, bh); 1540 1541 tcg_out_mov(s, ext, orig_rl, rl); 1542} 1543 1544static inline void tcg_out_mb(TCGContext *s, TCGArg a0) 1545{ 1546 static const uint32_t sync[] = { 1547 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST, 1548 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST, 1549 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1550 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD, 1551 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1552 }; 1553 tcg_out32(s, sync[a0 & TCG_MO_ALL]); 1554} 1555 1556static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, 1557 TCGReg a0, TCGArg b, bool const_b, bool is_ctz) 1558{ 1559 TCGReg a1 = a0; 1560 if (is_ctz) { 1561 a1 = TCG_REG_TMP; 1562 tcg_out_insn(s, 3507, RBIT, ext, a1, a0); 1563 } 1564 if (const_b && b == (ext ? 64 : 32)) { 1565 tcg_out_insn(s, 3507, CLZ, ext, d, a1); 1566 } else { 1567 AArch64Insn sel = I3506_CSEL; 1568 1569 tcg_out_cmp(s, ext, a0, 0, 1); 1570 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1); 1571 1572 if (const_b) { 1573 if (b == -1) { 1574 b = TCG_REG_XZR; 1575 sel = I3506_CSINV; 1576 } else if (b == 0) { 1577 b = TCG_REG_XZR; 1578 } else { 1579 tcg_out_movi(s, ext, d, b); 1580 b = d; 1581 } 1582 } 1583 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE); 1584 } 1585} 1586 1587typedef struct { 1588 TCGReg base; 1589 TCGReg index; 1590 TCGType index_ext; 1591 TCGAtomAlign aa; 1592} HostAddress; 1593 1594bool tcg_target_has_memory_bswap(MemOp memop) 1595{ 1596 return false; 1597} 1598 1599static const TCGLdstHelperParam ldst_helper_param = { 1600 .ntmp = 1, .tmp = { TCG_REG_TMP } 1601}; 1602 1603static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1604{ 1605 MemOp opc = get_memop(lb->oi); 1606 1607 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1608 return false; 1609 } 1610 1611 tcg_out_ld_helper_args(s, lb, &ldst_helper_param); 1612 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); 1613 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); 1614 tcg_out_goto(s, lb->raddr); 1615 return true; 1616} 1617 1618static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1619{ 1620 MemOp opc = get_memop(lb->oi); 1621 1622 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1623 return false; 1624 } 1625 1626 tcg_out_st_helper_args(s, lb, &ldst_helper_param); 1627 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]); 1628 tcg_out_goto(s, lb->raddr); 1629 return true; 1630} 1631 1632/* 1633 * For softmmu, perform the TLB load and compare. 1634 * For useronly, perform any required alignment tests. 1635 * In both cases, return a TCGLabelQemuLdst structure if the slow path 1636 * is required and fill in @h with the host address for the fast path. 1637 */ 1638static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, 1639 TCGReg addr_reg, MemOpIdx oi, 1640 bool is_ld) 1641{ 1642 TCGType addr_type = s->addr_type; 1643 TCGLabelQemuLdst *ldst = NULL; 1644 MemOp opc = get_memop(oi); 1645 unsigned a_mask; 1646 1647 h->aa = atom_and_align_for_opc(s, opc, 1648 have_lse2 ? MO_ATOM_WITHIN16 1649 : MO_ATOM_IFALIGN, 1650 false); 1651 a_mask = (1 << h->aa.align) - 1; 1652 1653#ifdef CONFIG_SOFTMMU 1654 unsigned s_bits = opc & MO_SIZE; 1655 unsigned s_mask = (1u << s_bits) - 1; 1656 unsigned mem_index = get_mmuidx(oi); 1657 TCGReg x3; 1658 TCGType mask_type; 1659 uint64_t compare_mask; 1660 1661 ldst = new_ldst_label(s); 1662 ldst->is_ld = is_ld; 1663 ldst->oi = oi; 1664 ldst->addrlo_reg = addr_reg; 1665 1666 mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32 1667 ? TCG_TYPE_I64 : TCG_TYPE_I32); 1668 1669 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */ 1670 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); 1671 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512); 1672 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); 1673 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); 1674 tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0, 1675 TLB_MASK_TABLE_OFS(mem_index), 1, 0); 1676 1677 /* Extract the TLB index from the address into X0. */ 1678 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, 1679 TCG_REG_X0, TCG_REG_X0, addr_reg, 1680 s->page_bits - CPU_TLB_ENTRY_BITS); 1681 1682 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */ 1683 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0); 1684 1685 /* Load the tlb comparator into X0, and the fast path addend into X1. */ 1686 tcg_out_ld(s, addr_type, TCG_REG_X0, TCG_REG_X1, 1687 is_ld ? offsetof(CPUTLBEntry, addr_read) 1688 : offsetof(CPUTLBEntry, addr_write)); 1689 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1, 1690 offsetof(CPUTLBEntry, addend)); 1691 1692 /* 1693 * For aligned accesses, we check the first byte and include the alignment 1694 * bits within the address. For unaligned access, we check that we don't 1695 * cross pages using the address of the last byte of the access. 1696 */ 1697 if (a_mask >= s_mask) { 1698 x3 = addr_reg; 1699 } else { 1700 tcg_out_insn(s, 3401, ADDI, addr_type, 1701 TCG_REG_X3, addr_reg, s_mask - a_mask); 1702 x3 = TCG_REG_X3; 1703 } 1704 compare_mask = (uint64_t)s->page_mask | a_mask; 1705 1706 /* Store the page mask part of the address into X3. */ 1707 tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_X3, x3, compare_mask); 1708 1709 /* Perform the address comparison. */ 1710 tcg_out_cmp(s, addr_type, TCG_REG_X0, TCG_REG_X3, 0); 1711 1712 /* If not equal, we jump to the slow path. */ 1713 ldst->label_ptr[0] = s->code_ptr; 1714 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1715 1716 h->base = TCG_REG_X1, 1717 h->index = addr_reg; 1718 h->index_ext = addr_type; 1719#else 1720 if (a_mask) { 1721 ldst = new_ldst_label(s); 1722 1723 ldst->is_ld = is_ld; 1724 ldst->oi = oi; 1725 ldst->addrlo_reg = addr_reg; 1726 1727 /* tst addr, #mask */ 1728 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask); 1729 1730 /* b.ne slow_path */ 1731 ldst->label_ptr[0] = s->code_ptr; 1732 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1733 } 1734 1735 if (guest_base || addr_type == TCG_TYPE_I32) { 1736 h->base = TCG_REG_GUEST_BASE; 1737 h->index = addr_reg; 1738 h->index_ext = addr_type; 1739 } else { 1740 h->base = addr_reg; 1741 h->index = TCG_REG_XZR; 1742 h->index_ext = TCG_TYPE_I64; 1743 } 1744#endif 1745 1746 return ldst; 1747} 1748 1749static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, 1750 TCGReg data_r, HostAddress h) 1751{ 1752 switch (memop & MO_SSIZE) { 1753 case MO_UB: 1754 tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index); 1755 break; 1756 case MO_SB: 1757 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, 1758 data_r, h.base, h.index_ext, h.index); 1759 break; 1760 case MO_UW: 1761 tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index); 1762 break; 1763 case MO_SW: 1764 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), 1765 data_r, h.base, h.index_ext, h.index); 1766 break; 1767 case MO_UL: 1768 tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index); 1769 break; 1770 case MO_SL: 1771 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index); 1772 break; 1773 case MO_UQ: 1774 tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index); 1775 break; 1776 default: 1777 g_assert_not_reached(); 1778 } 1779} 1780 1781static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, 1782 TCGReg data_r, HostAddress h) 1783{ 1784 switch (memop & MO_SIZE) { 1785 case MO_8: 1786 tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index); 1787 break; 1788 case MO_16: 1789 tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index); 1790 break; 1791 case MO_32: 1792 tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index); 1793 break; 1794 case MO_64: 1795 tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index); 1796 break; 1797 default: 1798 g_assert_not_reached(); 1799 } 1800} 1801 1802static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1803 MemOpIdx oi, TCGType data_type) 1804{ 1805 TCGLabelQemuLdst *ldst; 1806 HostAddress h; 1807 1808 ldst = prepare_host_addr(s, &h, addr_reg, oi, true); 1809 tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h); 1810 1811 if (ldst) { 1812 ldst->type = data_type; 1813 ldst->datalo_reg = data_reg; 1814 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1815 } 1816} 1817 1818static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1819 MemOpIdx oi, TCGType data_type) 1820{ 1821 TCGLabelQemuLdst *ldst; 1822 HostAddress h; 1823 1824 ldst = prepare_host_addr(s, &h, addr_reg, oi, false); 1825 tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h); 1826 1827 if (ldst) { 1828 ldst->type = data_type; 1829 ldst->datalo_reg = data_reg; 1830 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1831 } 1832} 1833 1834static const tcg_insn_unit *tb_ret_addr; 1835 1836static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) 1837{ 1838 /* Reuse the zeroing that exists for goto_ptr. */ 1839 if (a0 == 0) { 1840 tcg_out_goto_long(s, tcg_code_gen_epilogue); 1841 } else { 1842 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); 1843 tcg_out_goto_long(s, tb_ret_addr); 1844 } 1845} 1846 1847static void tcg_out_goto_tb(TCGContext *s, int which) 1848{ 1849 /* 1850 * Direct branch, or indirect address load, will be patched 1851 * by tb_target_set_jmp_target. Assert indirect load offset 1852 * in range early, regardless of direct branch distance. 1853 */ 1854 intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which)); 1855 tcg_debug_assert(i_off == sextract64(i_off, 0, 21)); 1856 1857 set_jmp_insn_offset(s, which); 1858 tcg_out32(s, I3206_B); 1859 tcg_out_insn(s, 3207, BR, TCG_REG_TMP); 1860 set_jmp_reset_offset(s, which); 1861} 1862 1863void tb_target_set_jmp_target(const TranslationBlock *tb, int n, 1864 uintptr_t jmp_rx, uintptr_t jmp_rw) 1865{ 1866 uintptr_t d_addr = tb->jmp_target_addr[n]; 1867 ptrdiff_t d_offset = d_addr - jmp_rx; 1868 tcg_insn_unit insn; 1869 1870 /* Either directly branch, or indirect branch load. */ 1871 if (d_offset == sextract64(d_offset, 0, 28)) { 1872 insn = deposit32(I3206_B, 0, 26, d_offset >> 2); 1873 } else { 1874 uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n]; 1875 ptrdiff_t i_offset = i_addr - jmp_rx; 1876 1877 /* Note that we asserted this in range in tcg_out_goto_tb. */ 1878 insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2); 1879 } 1880 qatomic_set((uint32_t *)jmp_rw, insn); 1881 flush_idcache_range(jmp_rx, jmp_rw, 4); 1882} 1883 1884static void tcg_out_op(TCGContext *s, TCGOpcode opc, 1885 const TCGArg args[TCG_MAX_OP_ARGS], 1886 const int const_args[TCG_MAX_OP_ARGS]) 1887{ 1888 /* 99% of the time, we can signal the use of extension registers 1889 by looking to see if the opcode handles 64-bit data. */ 1890 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0; 1891 1892 /* Hoist the loads of the most common arguments. */ 1893 TCGArg a0 = args[0]; 1894 TCGArg a1 = args[1]; 1895 TCGArg a2 = args[2]; 1896 int c2 = const_args[2]; 1897 1898 /* Some operands are defined with "rZ" constraint, a register or 1899 the zero register. These need not actually test args[I] == 0. */ 1900#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) 1901 1902 switch (opc) { 1903 case INDEX_op_goto_ptr: 1904 tcg_out_insn(s, 3207, BR, a0); 1905 break; 1906 1907 case INDEX_op_br: 1908 tcg_out_goto_label(s, arg_label(a0)); 1909 break; 1910 1911 case INDEX_op_ld8u_i32: 1912 case INDEX_op_ld8u_i64: 1913 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0); 1914 break; 1915 case INDEX_op_ld8s_i32: 1916 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0); 1917 break; 1918 case INDEX_op_ld8s_i64: 1919 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0); 1920 break; 1921 case INDEX_op_ld16u_i32: 1922 case INDEX_op_ld16u_i64: 1923 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1); 1924 break; 1925 case INDEX_op_ld16s_i32: 1926 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1); 1927 break; 1928 case INDEX_op_ld16s_i64: 1929 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1); 1930 break; 1931 case INDEX_op_ld_i32: 1932 case INDEX_op_ld32u_i64: 1933 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2); 1934 break; 1935 case INDEX_op_ld32s_i64: 1936 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2); 1937 break; 1938 case INDEX_op_ld_i64: 1939 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3); 1940 break; 1941 1942 case INDEX_op_st8_i32: 1943 case INDEX_op_st8_i64: 1944 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0); 1945 break; 1946 case INDEX_op_st16_i32: 1947 case INDEX_op_st16_i64: 1948 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1); 1949 break; 1950 case INDEX_op_st_i32: 1951 case INDEX_op_st32_i64: 1952 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2); 1953 break; 1954 case INDEX_op_st_i64: 1955 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3); 1956 break; 1957 1958 case INDEX_op_add_i32: 1959 a2 = (int32_t)a2; 1960 /* FALLTHRU */ 1961 case INDEX_op_add_i64: 1962 if (c2) { 1963 tcg_out_addsubi(s, ext, a0, a1, a2); 1964 } else { 1965 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); 1966 } 1967 break; 1968 1969 case INDEX_op_sub_i32: 1970 a2 = (int32_t)a2; 1971 /* FALLTHRU */ 1972 case INDEX_op_sub_i64: 1973 if (c2) { 1974 tcg_out_addsubi(s, ext, a0, a1, -a2); 1975 } else { 1976 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); 1977 } 1978 break; 1979 1980 case INDEX_op_neg_i64: 1981 case INDEX_op_neg_i32: 1982 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); 1983 break; 1984 1985 case INDEX_op_and_i32: 1986 a2 = (int32_t)a2; 1987 /* FALLTHRU */ 1988 case INDEX_op_and_i64: 1989 if (c2) { 1990 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); 1991 } else { 1992 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); 1993 } 1994 break; 1995 1996 case INDEX_op_andc_i32: 1997 a2 = (int32_t)a2; 1998 /* FALLTHRU */ 1999 case INDEX_op_andc_i64: 2000 if (c2) { 2001 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); 2002 } else { 2003 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); 2004 } 2005 break; 2006 2007 case INDEX_op_or_i32: 2008 a2 = (int32_t)a2; 2009 /* FALLTHRU */ 2010 case INDEX_op_or_i64: 2011 if (c2) { 2012 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); 2013 } else { 2014 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); 2015 } 2016 break; 2017 2018 case INDEX_op_orc_i32: 2019 a2 = (int32_t)a2; 2020 /* FALLTHRU */ 2021 case INDEX_op_orc_i64: 2022 if (c2) { 2023 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); 2024 } else { 2025 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); 2026 } 2027 break; 2028 2029 case INDEX_op_xor_i32: 2030 a2 = (int32_t)a2; 2031 /* FALLTHRU */ 2032 case INDEX_op_xor_i64: 2033 if (c2) { 2034 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); 2035 } else { 2036 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); 2037 } 2038 break; 2039 2040 case INDEX_op_eqv_i32: 2041 a2 = (int32_t)a2; 2042 /* FALLTHRU */ 2043 case INDEX_op_eqv_i64: 2044 if (c2) { 2045 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); 2046 } else { 2047 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); 2048 } 2049 break; 2050 2051 case INDEX_op_not_i64: 2052 case INDEX_op_not_i32: 2053 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); 2054 break; 2055 2056 case INDEX_op_mul_i64: 2057 case INDEX_op_mul_i32: 2058 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); 2059 break; 2060 2061 case INDEX_op_div_i64: 2062 case INDEX_op_div_i32: 2063 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); 2064 break; 2065 case INDEX_op_divu_i64: 2066 case INDEX_op_divu_i32: 2067 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); 2068 break; 2069 2070 case INDEX_op_rem_i64: 2071 case INDEX_op_rem_i32: 2072 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2); 2073 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2074 break; 2075 case INDEX_op_remu_i64: 2076 case INDEX_op_remu_i32: 2077 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2); 2078 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2079 break; 2080 2081 case INDEX_op_shl_i64: 2082 case INDEX_op_shl_i32: 2083 if (c2) { 2084 tcg_out_shl(s, ext, a0, a1, a2); 2085 } else { 2086 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); 2087 } 2088 break; 2089 2090 case INDEX_op_shr_i64: 2091 case INDEX_op_shr_i32: 2092 if (c2) { 2093 tcg_out_shr(s, ext, a0, a1, a2); 2094 } else { 2095 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); 2096 } 2097 break; 2098 2099 case INDEX_op_sar_i64: 2100 case INDEX_op_sar_i32: 2101 if (c2) { 2102 tcg_out_sar(s, ext, a0, a1, a2); 2103 } else { 2104 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); 2105 } 2106 break; 2107 2108 case INDEX_op_rotr_i64: 2109 case INDEX_op_rotr_i32: 2110 if (c2) { 2111 tcg_out_rotr(s, ext, a0, a1, a2); 2112 } else { 2113 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); 2114 } 2115 break; 2116 2117 case INDEX_op_rotl_i64: 2118 case INDEX_op_rotl_i32: 2119 if (c2) { 2120 tcg_out_rotl(s, ext, a0, a1, a2); 2121 } else { 2122 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2); 2123 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP); 2124 } 2125 break; 2126 2127 case INDEX_op_clz_i64: 2128 case INDEX_op_clz_i32: 2129 tcg_out_cltz(s, ext, a0, a1, a2, c2, false); 2130 break; 2131 case INDEX_op_ctz_i64: 2132 case INDEX_op_ctz_i32: 2133 tcg_out_cltz(s, ext, a0, a1, a2, c2, true); 2134 break; 2135 2136 case INDEX_op_brcond_i32: 2137 a1 = (int32_t)a1; 2138 /* FALLTHRU */ 2139 case INDEX_op_brcond_i64: 2140 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); 2141 break; 2142 2143 case INDEX_op_setcond_i32: 2144 a2 = (int32_t)a2; 2145 /* FALLTHRU */ 2146 case INDEX_op_setcond_i64: 2147 tcg_out_cmp(s, ext, a1, a2, c2); 2148 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ 2149 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, 2150 TCG_REG_XZR, tcg_invert_cond(args[3])); 2151 break; 2152 2153 case INDEX_op_movcond_i32: 2154 a2 = (int32_t)a2; 2155 /* FALLTHRU */ 2156 case INDEX_op_movcond_i64: 2157 tcg_out_cmp(s, ext, a1, a2, c2); 2158 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); 2159 break; 2160 2161 case INDEX_op_qemu_ld_a32_i32: 2162 case INDEX_op_qemu_ld_a64_i32: 2163 case INDEX_op_qemu_ld_a32_i64: 2164 case INDEX_op_qemu_ld_a64_i64: 2165 tcg_out_qemu_ld(s, a0, a1, a2, ext); 2166 break; 2167 case INDEX_op_qemu_st_a32_i32: 2168 case INDEX_op_qemu_st_a64_i32: 2169 case INDEX_op_qemu_st_a32_i64: 2170 case INDEX_op_qemu_st_a64_i64: 2171 tcg_out_qemu_st(s, REG0(0), a1, a2, ext); 2172 break; 2173 2174 case INDEX_op_bswap64_i64: 2175 tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1); 2176 break; 2177 case INDEX_op_bswap32_i64: 2178 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2179 if (a2 & TCG_BSWAP_OS) { 2180 tcg_out_ext32s(s, a0, a0); 2181 } 2182 break; 2183 case INDEX_op_bswap32_i32: 2184 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2185 break; 2186 case INDEX_op_bswap16_i64: 2187 case INDEX_op_bswap16_i32: 2188 tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1); 2189 if (a2 & TCG_BSWAP_OS) { 2190 /* Output must be sign-extended. */ 2191 tcg_out_ext16s(s, ext, a0, a0); 2192 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 2193 /* Output must be zero-extended, but input isn't. */ 2194 tcg_out_ext16u(s, a0, a0); 2195 } 2196 break; 2197 2198 case INDEX_op_deposit_i64: 2199 case INDEX_op_deposit_i32: 2200 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]); 2201 break; 2202 2203 case INDEX_op_extract_i64: 2204 case INDEX_op_extract_i32: 2205 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2206 break; 2207 2208 case INDEX_op_sextract_i64: 2209 case INDEX_op_sextract_i32: 2210 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2211 break; 2212 2213 case INDEX_op_extract2_i64: 2214 case INDEX_op_extract2_i32: 2215 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]); 2216 break; 2217 2218 case INDEX_op_add2_i32: 2219 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2220 (int32_t)args[4], args[5], const_args[4], 2221 const_args[5], false); 2222 break; 2223 case INDEX_op_add2_i64: 2224 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2225 args[5], const_args[4], const_args[5], false); 2226 break; 2227 case INDEX_op_sub2_i32: 2228 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2229 (int32_t)args[4], args[5], const_args[4], 2230 const_args[5], true); 2231 break; 2232 case INDEX_op_sub2_i64: 2233 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2234 args[5], const_args[4], const_args[5], true); 2235 break; 2236 2237 case INDEX_op_muluh_i64: 2238 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); 2239 break; 2240 case INDEX_op_mulsh_i64: 2241 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); 2242 break; 2243 2244 case INDEX_op_mb: 2245 tcg_out_mb(s, a0); 2246 break; 2247 2248 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 2249 case INDEX_op_mov_i64: 2250 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 2251 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ 2252 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ 2253 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ 2254 case INDEX_op_ext8s_i64: 2255 case INDEX_op_ext8u_i32: 2256 case INDEX_op_ext8u_i64: 2257 case INDEX_op_ext16s_i64: 2258 case INDEX_op_ext16s_i32: 2259 case INDEX_op_ext16u_i64: 2260 case INDEX_op_ext16u_i32: 2261 case INDEX_op_ext32s_i64: 2262 case INDEX_op_ext32u_i64: 2263 case INDEX_op_ext_i32_i64: 2264 case INDEX_op_extu_i32_i64: 2265 case INDEX_op_extrl_i64_i32: 2266 default: 2267 g_assert_not_reached(); 2268 } 2269 2270#undef REG0 2271} 2272 2273static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 2274 unsigned vecl, unsigned vece, 2275 const TCGArg args[TCG_MAX_OP_ARGS], 2276 const int const_args[TCG_MAX_OP_ARGS]) 2277{ 2278 static const AArch64Insn cmp_vec_insn[16] = { 2279 [TCG_COND_EQ] = I3616_CMEQ, 2280 [TCG_COND_GT] = I3616_CMGT, 2281 [TCG_COND_GE] = I3616_CMGE, 2282 [TCG_COND_GTU] = I3616_CMHI, 2283 [TCG_COND_GEU] = I3616_CMHS, 2284 }; 2285 static const AArch64Insn cmp_scalar_insn[16] = { 2286 [TCG_COND_EQ] = I3611_CMEQ, 2287 [TCG_COND_GT] = I3611_CMGT, 2288 [TCG_COND_GE] = I3611_CMGE, 2289 [TCG_COND_GTU] = I3611_CMHI, 2290 [TCG_COND_GEU] = I3611_CMHS, 2291 }; 2292 static const AArch64Insn cmp0_vec_insn[16] = { 2293 [TCG_COND_EQ] = I3617_CMEQ0, 2294 [TCG_COND_GT] = I3617_CMGT0, 2295 [TCG_COND_GE] = I3617_CMGE0, 2296 [TCG_COND_LT] = I3617_CMLT0, 2297 [TCG_COND_LE] = I3617_CMLE0, 2298 }; 2299 static const AArch64Insn cmp0_scalar_insn[16] = { 2300 [TCG_COND_EQ] = I3612_CMEQ0, 2301 [TCG_COND_GT] = I3612_CMGT0, 2302 [TCG_COND_GE] = I3612_CMGE0, 2303 [TCG_COND_LT] = I3612_CMLT0, 2304 [TCG_COND_LE] = I3612_CMLE0, 2305 }; 2306 2307 TCGType type = vecl + TCG_TYPE_V64; 2308 unsigned is_q = vecl; 2309 bool is_scalar = !is_q && vece == MO_64; 2310 TCGArg a0, a1, a2, a3; 2311 int cmode, imm8; 2312 2313 a0 = args[0]; 2314 a1 = args[1]; 2315 a2 = args[2]; 2316 2317 switch (opc) { 2318 case INDEX_op_ld_vec: 2319 tcg_out_ld(s, type, a0, a1, a2); 2320 break; 2321 case INDEX_op_st_vec: 2322 tcg_out_st(s, type, a0, a1, a2); 2323 break; 2324 case INDEX_op_dupm_vec: 2325 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 2326 break; 2327 case INDEX_op_add_vec: 2328 if (is_scalar) { 2329 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2); 2330 } else { 2331 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2); 2332 } 2333 break; 2334 case INDEX_op_sub_vec: 2335 if (is_scalar) { 2336 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2); 2337 } else { 2338 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2); 2339 } 2340 break; 2341 case INDEX_op_mul_vec: 2342 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2); 2343 break; 2344 case INDEX_op_neg_vec: 2345 if (is_scalar) { 2346 tcg_out_insn(s, 3612, NEG, vece, a0, a1); 2347 } else { 2348 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1); 2349 } 2350 break; 2351 case INDEX_op_abs_vec: 2352 if (is_scalar) { 2353 tcg_out_insn(s, 3612, ABS, vece, a0, a1); 2354 } else { 2355 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1); 2356 } 2357 break; 2358 case INDEX_op_and_vec: 2359 if (const_args[2]) { 2360 is_shimm1632(~a2, &cmode, &imm8); 2361 if (a0 == a1) { 2362 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2363 return; 2364 } 2365 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2366 a2 = a0; 2367 } 2368 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2); 2369 break; 2370 case INDEX_op_or_vec: 2371 if (const_args[2]) { 2372 is_shimm1632(a2, &cmode, &imm8); 2373 if (a0 == a1) { 2374 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2375 return; 2376 } 2377 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2378 a2 = a0; 2379 } 2380 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2); 2381 break; 2382 case INDEX_op_andc_vec: 2383 if (const_args[2]) { 2384 is_shimm1632(a2, &cmode, &imm8); 2385 if (a0 == a1) { 2386 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2387 return; 2388 } 2389 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2390 a2 = a0; 2391 } 2392 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2); 2393 break; 2394 case INDEX_op_orc_vec: 2395 if (const_args[2]) { 2396 is_shimm1632(~a2, &cmode, &imm8); 2397 if (a0 == a1) { 2398 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2399 return; 2400 } 2401 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2402 a2 = a0; 2403 } 2404 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2); 2405 break; 2406 case INDEX_op_xor_vec: 2407 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2); 2408 break; 2409 case INDEX_op_ssadd_vec: 2410 if (is_scalar) { 2411 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2); 2412 } else { 2413 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2); 2414 } 2415 break; 2416 case INDEX_op_sssub_vec: 2417 if (is_scalar) { 2418 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2); 2419 } else { 2420 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2); 2421 } 2422 break; 2423 case INDEX_op_usadd_vec: 2424 if (is_scalar) { 2425 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2); 2426 } else { 2427 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2); 2428 } 2429 break; 2430 case INDEX_op_ussub_vec: 2431 if (is_scalar) { 2432 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2); 2433 } else { 2434 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2); 2435 } 2436 break; 2437 case INDEX_op_smax_vec: 2438 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2); 2439 break; 2440 case INDEX_op_smin_vec: 2441 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2); 2442 break; 2443 case INDEX_op_umax_vec: 2444 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2); 2445 break; 2446 case INDEX_op_umin_vec: 2447 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2); 2448 break; 2449 case INDEX_op_not_vec: 2450 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1); 2451 break; 2452 case INDEX_op_shli_vec: 2453 if (is_scalar) { 2454 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece)); 2455 } else { 2456 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece)); 2457 } 2458 break; 2459 case INDEX_op_shri_vec: 2460 if (is_scalar) { 2461 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2); 2462 } else { 2463 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2); 2464 } 2465 break; 2466 case INDEX_op_sari_vec: 2467 if (is_scalar) { 2468 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2); 2469 } else { 2470 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); 2471 } 2472 break; 2473 case INDEX_op_aa64_sli_vec: 2474 if (is_scalar) { 2475 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece)); 2476 } else { 2477 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece)); 2478 } 2479 break; 2480 case INDEX_op_shlv_vec: 2481 if (is_scalar) { 2482 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2); 2483 } else { 2484 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); 2485 } 2486 break; 2487 case INDEX_op_aa64_sshl_vec: 2488 if (is_scalar) { 2489 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2); 2490 } else { 2491 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2); 2492 } 2493 break; 2494 case INDEX_op_cmp_vec: 2495 { 2496 TCGCond cond = args[3]; 2497 AArch64Insn insn; 2498 2499 if (cond == TCG_COND_NE) { 2500 if (const_args[2]) { 2501 if (is_scalar) { 2502 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1); 2503 } else { 2504 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1); 2505 } 2506 } else { 2507 if (is_scalar) { 2508 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2); 2509 } else { 2510 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2); 2511 } 2512 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); 2513 } 2514 } else { 2515 if (const_args[2]) { 2516 if (is_scalar) { 2517 insn = cmp0_scalar_insn[cond]; 2518 if (insn) { 2519 tcg_out_insn_3612(s, insn, vece, a0, a1); 2520 break; 2521 } 2522 } else { 2523 insn = cmp0_vec_insn[cond]; 2524 if (insn) { 2525 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1); 2526 break; 2527 } 2528 } 2529 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0); 2530 a2 = TCG_VEC_TMP; 2531 } 2532 if (is_scalar) { 2533 insn = cmp_scalar_insn[cond]; 2534 if (insn == 0) { 2535 TCGArg t; 2536 t = a1, a1 = a2, a2 = t; 2537 cond = tcg_swap_cond(cond); 2538 insn = cmp_scalar_insn[cond]; 2539 tcg_debug_assert(insn != 0); 2540 } 2541 tcg_out_insn_3611(s, insn, vece, a0, a1, a2); 2542 } else { 2543 insn = cmp_vec_insn[cond]; 2544 if (insn == 0) { 2545 TCGArg t; 2546 t = a1, a1 = a2, a2 = t; 2547 cond = tcg_swap_cond(cond); 2548 insn = cmp_vec_insn[cond]; 2549 tcg_debug_assert(insn != 0); 2550 } 2551 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2); 2552 } 2553 } 2554 } 2555 break; 2556 2557 case INDEX_op_bitsel_vec: 2558 a3 = args[3]; 2559 if (a0 == a3) { 2560 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1); 2561 } else if (a0 == a2) { 2562 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1); 2563 } else { 2564 if (a0 != a1) { 2565 tcg_out_mov(s, type, a0, a1); 2566 } 2567 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3); 2568 } 2569 break; 2570 2571 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 2572 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 2573 default: 2574 g_assert_not_reached(); 2575 } 2576} 2577 2578int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 2579{ 2580 switch (opc) { 2581 case INDEX_op_add_vec: 2582 case INDEX_op_sub_vec: 2583 case INDEX_op_and_vec: 2584 case INDEX_op_or_vec: 2585 case INDEX_op_xor_vec: 2586 case INDEX_op_andc_vec: 2587 case INDEX_op_orc_vec: 2588 case INDEX_op_neg_vec: 2589 case INDEX_op_abs_vec: 2590 case INDEX_op_not_vec: 2591 case INDEX_op_cmp_vec: 2592 case INDEX_op_shli_vec: 2593 case INDEX_op_shri_vec: 2594 case INDEX_op_sari_vec: 2595 case INDEX_op_ssadd_vec: 2596 case INDEX_op_sssub_vec: 2597 case INDEX_op_usadd_vec: 2598 case INDEX_op_ussub_vec: 2599 case INDEX_op_shlv_vec: 2600 case INDEX_op_bitsel_vec: 2601 return 1; 2602 case INDEX_op_rotli_vec: 2603 case INDEX_op_shrv_vec: 2604 case INDEX_op_sarv_vec: 2605 case INDEX_op_rotlv_vec: 2606 case INDEX_op_rotrv_vec: 2607 return -1; 2608 case INDEX_op_mul_vec: 2609 case INDEX_op_smax_vec: 2610 case INDEX_op_smin_vec: 2611 case INDEX_op_umax_vec: 2612 case INDEX_op_umin_vec: 2613 return vece < MO_64; 2614 2615 default: 2616 return 0; 2617 } 2618} 2619 2620void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 2621 TCGArg a0, ...) 2622{ 2623 va_list va; 2624 TCGv_vec v0, v1, v2, t1, t2, c1; 2625 TCGArg a2; 2626 2627 va_start(va, a0); 2628 v0 = temp_tcgv_vec(arg_temp(a0)); 2629 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 2630 a2 = va_arg(va, TCGArg); 2631 va_end(va); 2632 2633 switch (opc) { 2634 case INDEX_op_rotli_vec: 2635 t1 = tcg_temp_new_vec(type); 2636 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1)); 2637 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece, 2638 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2); 2639 tcg_temp_free_vec(t1); 2640 break; 2641 2642 case INDEX_op_shrv_vec: 2643 case INDEX_op_sarv_vec: 2644 /* Right shifts are negative left shifts for AArch64. */ 2645 v2 = temp_tcgv_vec(arg_temp(a2)); 2646 t1 = tcg_temp_new_vec(type); 2647 tcg_gen_neg_vec(vece, t1, v2); 2648 opc = (opc == INDEX_op_shrv_vec 2649 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec); 2650 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0), 2651 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2652 tcg_temp_free_vec(t1); 2653 break; 2654 2655 case INDEX_op_rotlv_vec: 2656 v2 = temp_tcgv_vec(arg_temp(a2)); 2657 t1 = tcg_temp_new_vec(type); 2658 c1 = tcg_constant_vec(type, vece, 8 << vece); 2659 tcg_gen_sub_vec(vece, t1, v2, c1); 2660 /* Right shifts are negative left shifts for AArch64. */ 2661 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2662 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2663 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0), 2664 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 2665 tcg_gen_or_vec(vece, v0, v0, t1); 2666 tcg_temp_free_vec(t1); 2667 break; 2668 2669 case INDEX_op_rotrv_vec: 2670 v2 = temp_tcgv_vec(arg_temp(a2)); 2671 t1 = tcg_temp_new_vec(type); 2672 t2 = tcg_temp_new_vec(type); 2673 c1 = tcg_constant_vec(type, vece, 8 << vece); 2674 tcg_gen_neg_vec(vece, t1, v2); 2675 tcg_gen_sub_vec(vece, t2, c1, v2); 2676 /* Right shifts are negative left shifts for AArch64. */ 2677 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2678 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2679 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2), 2680 tcgv_vec_arg(v1), tcgv_vec_arg(t2)); 2681 tcg_gen_or_vec(vece, v0, t1, t2); 2682 tcg_temp_free_vec(t1); 2683 tcg_temp_free_vec(t2); 2684 break; 2685 2686 default: 2687 g_assert_not_reached(); 2688 } 2689} 2690 2691static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 2692{ 2693 switch (op) { 2694 case INDEX_op_goto_ptr: 2695 return C_O0_I1(r); 2696 2697 case INDEX_op_ld8u_i32: 2698 case INDEX_op_ld8s_i32: 2699 case INDEX_op_ld16u_i32: 2700 case INDEX_op_ld16s_i32: 2701 case INDEX_op_ld_i32: 2702 case INDEX_op_ld8u_i64: 2703 case INDEX_op_ld8s_i64: 2704 case INDEX_op_ld16u_i64: 2705 case INDEX_op_ld16s_i64: 2706 case INDEX_op_ld32u_i64: 2707 case INDEX_op_ld32s_i64: 2708 case INDEX_op_ld_i64: 2709 case INDEX_op_neg_i32: 2710 case INDEX_op_neg_i64: 2711 case INDEX_op_not_i32: 2712 case INDEX_op_not_i64: 2713 case INDEX_op_bswap16_i32: 2714 case INDEX_op_bswap32_i32: 2715 case INDEX_op_bswap16_i64: 2716 case INDEX_op_bswap32_i64: 2717 case INDEX_op_bswap64_i64: 2718 case INDEX_op_ext8s_i32: 2719 case INDEX_op_ext16s_i32: 2720 case INDEX_op_ext8u_i32: 2721 case INDEX_op_ext16u_i32: 2722 case INDEX_op_ext8s_i64: 2723 case INDEX_op_ext16s_i64: 2724 case INDEX_op_ext32s_i64: 2725 case INDEX_op_ext8u_i64: 2726 case INDEX_op_ext16u_i64: 2727 case INDEX_op_ext32u_i64: 2728 case INDEX_op_ext_i32_i64: 2729 case INDEX_op_extu_i32_i64: 2730 case INDEX_op_extract_i32: 2731 case INDEX_op_extract_i64: 2732 case INDEX_op_sextract_i32: 2733 case INDEX_op_sextract_i64: 2734 return C_O1_I1(r, r); 2735 2736 case INDEX_op_st8_i32: 2737 case INDEX_op_st16_i32: 2738 case INDEX_op_st_i32: 2739 case INDEX_op_st8_i64: 2740 case INDEX_op_st16_i64: 2741 case INDEX_op_st32_i64: 2742 case INDEX_op_st_i64: 2743 return C_O0_I2(rZ, r); 2744 2745 case INDEX_op_add_i32: 2746 case INDEX_op_add_i64: 2747 case INDEX_op_sub_i32: 2748 case INDEX_op_sub_i64: 2749 case INDEX_op_setcond_i32: 2750 case INDEX_op_setcond_i64: 2751 return C_O1_I2(r, r, rA); 2752 2753 case INDEX_op_mul_i32: 2754 case INDEX_op_mul_i64: 2755 case INDEX_op_div_i32: 2756 case INDEX_op_div_i64: 2757 case INDEX_op_divu_i32: 2758 case INDEX_op_divu_i64: 2759 case INDEX_op_rem_i32: 2760 case INDEX_op_rem_i64: 2761 case INDEX_op_remu_i32: 2762 case INDEX_op_remu_i64: 2763 case INDEX_op_muluh_i64: 2764 case INDEX_op_mulsh_i64: 2765 return C_O1_I2(r, r, r); 2766 2767 case INDEX_op_and_i32: 2768 case INDEX_op_and_i64: 2769 case INDEX_op_or_i32: 2770 case INDEX_op_or_i64: 2771 case INDEX_op_xor_i32: 2772 case INDEX_op_xor_i64: 2773 case INDEX_op_andc_i32: 2774 case INDEX_op_andc_i64: 2775 case INDEX_op_orc_i32: 2776 case INDEX_op_orc_i64: 2777 case INDEX_op_eqv_i32: 2778 case INDEX_op_eqv_i64: 2779 return C_O1_I2(r, r, rL); 2780 2781 case INDEX_op_shl_i32: 2782 case INDEX_op_shr_i32: 2783 case INDEX_op_sar_i32: 2784 case INDEX_op_rotl_i32: 2785 case INDEX_op_rotr_i32: 2786 case INDEX_op_shl_i64: 2787 case INDEX_op_shr_i64: 2788 case INDEX_op_sar_i64: 2789 case INDEX_op_rotl_i64: 2790 case INDEX_op_rotr_i64: 2791 return C_O1_I2(r, r, ri); 2792 2793 case INDEX_op_clz_i32: 2794 case INDEX_op_ctz_i32: 2795 case INDEX_op_clz_i64: 2796 case INDEX_op_ctz_i64: 2797 return C_O1_I2(r, r, rAL); 2798 2799 case INDEX_op_brcond_i32: 2800 case INDEX_op_brcond_i64: 2801 return C_O0_I2(r, rA); 2802 2803 case INDEX_op_movcond_i32: 2804 case INDEX_op_movcond_i64: 2805 return C_O1_I4(r, r, rA, rZ, rZ); 2806 2807 case INDEX_op_qemu_ld_a32_i32: 2808 case INDEX_op_qemu_ld_a64_i32: 2809 case INDEX_op_qemu_ld_a32_i64: 2810 case INDEX_op_qemu_ld_a64_i64: 2811 return C_O1_I1(r, l); 2812 case INDEX_op_qemu_st_a32_i32: 2813 case INDEX_op_qemu_st_a64_i32: 2814 case INDEX_op_qemu_st_a32_i64: 2815 case INDEX_op_qemu_st_a64_i64: 2816 return C_O0_I2(lZ, l); 2817 2818 case INDEX_op_deposit_i32: 2819 case INDEX_op_deposit_i64: 2820 return C_O1_I2(r, 0, rZ); 2821 2822 case INDEX_op_extract2_i32: 2823 case INDEX_op_extract2_i64: 2824 return C_O1_I2(r, rZ, rZ); 2825 2826 case INDEX_op_add2_i32: 2827 case INDEX_op_add2_i64: 2828 case INDEX_op_sub2_i32: 2829 case INDEX_op_sub2_i64: 2830 return C_O2_I4(r, r, rZ, rZ, rA, rMZ); 2831 2832 case INDEX_op_add_vec: 2833 case INDEX_op_sub_vec: 2834 case INDEX_op_mul_vec: 2835 case INDEX_op_xor_vec: 2836 case INDEX_op_ssadd_vec: 2837 case INDEX_op_sssub_vec: 2838 case INDEX_op_usadd_vec: 2839 case INDEX_op_ussub_vec: 2840 case INDEX_op_smax_vec: 2841 case INDEX_op_smin_vec: 2842 case INDEX_op_umax_vec: 2843 case INDEX_op_umin_vec: 2844 case INDEX_op_shlv_vec: 2845 case INDEX_op_shrv_vec: 2846 case INDEX_op_sarv_vec: 2847 case INDEX_op_aa64_sshl_vec: 2848 return C_O1_I2(w, w, w); 2849 case INDEX_op_not_vec: 2850 case INDEX_op_neg_vec: 2851 case INDEX_op_abs_vec: 2852 case INDEX_op_shli_vec: 2853 case INDEX_op_shri_vec: 2854 case INDEX_op_sari_vec: 2855 return C_O1_I1(w, w); 2856 case INDEX_op_ld_vec: 2857 case INDEX_op_dupm_vec: 2858 return C_O1_I1(w, r); 2859 case INDEX_op_st_vec: 2860 return C_O0_I2(w, r); 2861 case INDEX_op_dup_vec: 2862 return C_O1_I1(w, wr); 2863 case INDEX_op_or_vec: 2864 case INDEX_op_andc_vec: 2865 return C_O1_I2(w, w, wO); 2866 case INDEX_op_and_vec: 2867 case INDEX_op_orc_vec: 2868 return C_O1_I2(w, w, wN); 2869 case INDEX_op_cmp_vec: 2870 return C_O1_I2(w, w, wZ); 2871 case INDEX_op_bitsel_vec: 2872 return C_O1_I3(w, w, w, w); 2873 case INDEX_op_aa64_sli_vec: 2874 return C_O1_I2(w, 0, w); 2875 2876 default: 2877 g_assert_not_reached(); 2878 } 2879} 2880 2881#ifdef CONFIG_DARWIN 2882static bool sysctl_for_bool(const char *name) 2883{ 2884 int val = 0; 2885 size_t len = sizeof(val); 2886 2887 if (sysctlbyname(name, &val, &len, NULL, 0) == 0) { 2888 return val != 0; 2889 } 2890 2891 /* 2892 * We might in the future ask for properties not present in older kernels, 2893 * but we're only asking about static properties, all of which should be 2894 * 'int'. So we shouln't see ENOMEM (val too small), or any of the other 2895 * more exotic errors. 2896 */ 2897 assert(errno == ENOENT); 2898 return false; 2899} 2900#endif 2901 2902static void tcg_target_init(TCGContext *s) 2903{ 2904#ifdef __linux__ 2905 unsigned long hwcap = qemu_getauxval(AT_HWCAP); 2906 have_lse = hwcap & HWCAP_ATOMICS; 2907 have_lse2 = hwcap & HWCAP_USCAT; 2908#endif 2909#ifdef CONFIG_DARWIN 2910 have_lse = sysctl_for_bool("hw.optional.arm.FEAT_LSE"); 2911 have_lse2 = sysctl_for_bool("hw.optional.arm.FEAT_LSE2"); 2912#endif 2913 2914 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu; 2915 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu; 2916 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 2917 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 2918 2919 tcg_target_call_clobber_regs = -1ull; 2920 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19); 2921 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20); 2922 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21); 2923 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22); 2924 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23); 2925 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24); 2926 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25); 2927 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26); 2928 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27); 2929 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28); 2930 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29); 2931 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 2932 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 2933 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 2934 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 2935 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 2936 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 2937 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 2938 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 2939 2940 s->reserved_regs = 0; 2941 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 2942 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); 2943 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); 2944 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ 2945 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP); 2946} 2947 2948/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ 2949#define PUSH_SIZE ((30 - 19 + 1) * 8) 2950 2951#define FRAME_SIZE \ 2952 ((PUSH_SIZE \ 2953 + TCG_STATIC_CALL_ARGS_SIZE \ 2954 + CPU_TEMP_BUF_NLONGS * sizeof(long) \ 2955 + TCG_TARGET_STACK_ALIGN - 1) \ 2956 & ~(TCG_TARGET_STACK_ALIGN - 1)) 2957 2958/* We're expecting a 2 byte uleb128 encoded value. */ 2959QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 2960 2961/* We're expecting to use a single ADDI insn. */ 2962QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); 2963 2964static void tcg_target_qemu_prologue(TCGContext *s) 2965{ 2966 TCGReg r; 2967 2968 /* Push (FP, LR) and allocate space for all saved registers. */ 2969 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, 2970 TCG_REG_SP, -PUSH_SIZE, 1, 1); 2971 2972 /* Set up frame pointer for canonical unwinding. */ 2973 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); 2974 2975 /* Store callee-preserved regs x19..x28. */ 2976 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 2977 int ofs = (r - TCG_REG_X19 + 2) * 8; 2978 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 2979 } 2980 2981 /* Make stack space for TCG locals. */ 2982 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 2983 FRAME_SIZE - PUSH_SIZE); 2984 2985 /* Inform TCG about how to find TCG locals with register, offset, size. */ 2986 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, 2987 CPU_TEMP_BUF_NLONGS * sizeof(long)); 2988 2989#if !defined(CONFIG_SOFTMMU) 2990 /* 2991 * Note that XZR cannot be encoded in the address base register slot, 2992 * as that actaully encodes SP. Depending on the guest, we may need 2993 * to zero-extend the guest address via the address index register slot, 2994 * therefore we need to load even a zero guest base into a register. 2995 */ 2996 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); 2997 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); 2998#endif 2999 3000 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 3001 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); 3002 3003 /* 3004 * Return path for goto_ptr. Set return value to 0, a-la exit_tb, 3005 * and fall through to the rest of the epilogue. 3006 */ 3007 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 3008 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0); 3009 3010 /* TB epilogue */ 3011 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); 3012 3013 /* Remove TCG locals stack space. */ 3014 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 3015 FRAME_SIZE - PUSH_SIZE); 3016 3017 /* Restore registers x19..x28. */ 3018 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 3019 int ofs = (r - TCG_REG_X19 + 2) * 8; 3020 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 3021 } 3022 3023 /* Pop (FP, LR), restore SP to previous frame. */ 3024 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, 3025 TCG_REG_SP, PUSH_SIZE, 0, 1); 3026 tcg_out_insn(s, 3207, RET, TCG_REG_LR); 3027} 3028 3029static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 3030{ 3031 int i; 3032 for (i = 0; i < count; ++i) { 3033 p[i] = NOP; 3034 } 3035} 3036 3037typedef struct { 3038 DebugFrameHeader h; 3039 uint8_t fde_def_cfa[4]; 3040 uint8_t fde_reg_ofs[24]; 3041} DebugFrame; 3042 3043#define ELF_HOST_MACHINE EM_AARCH64 3044 3045static const DebugFrame debug_frame = { 3046 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 3047 .h.cie.id = -1, 3048 .h.cie.version = 1, 3049 .h.cie.code_align = 1, 3050 .h.cie.data_align = 0x78, /* sleb128 -8 */ 3051 .h.cie.return_column = TCG_REG_LR, 3052 3053 /* Total FDE size does not include the "len" member. */ 3054 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 3055 3056 .fde_def_cfa = { 3057 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 3058 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 3059 (FRAME_SIZE >> 7) 3060 }, 3061 .fde_reg_ofs = { 3062 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ 3063 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ 3064 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ 3065 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ 3066 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ 3067 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ 3068 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ 3069 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ 3070 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ 3071 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ 3072 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ 3073 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ 3074 } 3075}; 3076 3077void tcg_register_jit(const void *buf, size_t buf_size) 3078{ 3079 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 3080} 3081