1/* 2 * Initial TCG Implementation for aarch64 3 * 4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH 5 * Written by Claudio Fontana 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or 8 * (at your option) any later version. 9 * 10 * See the COPYING file in the top-level directory for details. 11 */ 12 13#include "../tcg-pool.c.inc" 14#include "qemu/bitops.h" 15 16/* We're going to re-use TCGType in setting of the SF bit, which controls 17 the size of the operation performed. If we know the values match, it 18 makes things much cleaner. */ 19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); 20 21#ifdef CONFIG_DEBUG_TCG 22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 23 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 24 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 25 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 26 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp", 27 28 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 29 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 30 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 31 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31", 32}; 33#endif /* CONFIG_DEBUG_TCG */ 34 35static const int tcg_target_reg_alloc_order[] = { 36 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, 37 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, 38 TCG_REG_X28, /* we will reserve this for guest_base if configured */ 39 40 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, 41 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, 42 TCG_REG_X16, TCG_REG_X17, 43 44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, 46 47 /* X18 reserved by system */ 48 /* X19 reserved for AREG0 */ 49 /* X29 reserved as fp */ 50 /* X30 reserved as temporary */ 51 52 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, 53 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, 54 /* V8 - V15 are call-saved, and skipped. */ 55 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, 56 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, 57 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, 58 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, 59}; 60 61static const int tcg_target_call_iarg_regs[8] = { 62 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 63 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 64}; 65static const int tcg_target_call_oarg_regs[1] = { 66 TCG_REG_X0 67}; 68 69#define TCG_REG_TMP TCG_REG_X30 70#define TCG_VEC_TMP TCG_REG_V31 71 72#ifndef CONFIG_SOFTMMU 73/* Note that XZR cannot be encoded in the address base register slot, 74 as that actaully encodes SP. So if we need to zero-extend the guest 75 address, via the address index register slot, we need to load even 76 a zero guest base into a register. */ 77#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32) 78#define TCG_REG_GUEST_BASE TCG_REG_X28 79#endif 80 81static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 82{ 83 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 84 ptrdiff_t offset = target - src_rx; 85 86 if (offset == sextract64(offset, 0, 26)) { 87 /* read instruction, mask away previous PC_REL26 parameter contents, 88 set the proper offset, then write back the instruction. */ 89 *src_rw = deposit32(*src_rw, 0, 26, offset); 90 return true; 91 } 92 return false; 93} 94 95static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 96{ 97 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 98 ptrdiff_t offset = target - src_rx; 99 100 if (offset == sextract64(offset, 0, 19)) { 101 *src_rw = deposit32(*src_rw, 5, 19, offset); 102 return true; 103 } 104 return false; 105} 106 107static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 108 intptr_t value, intptr_t addend) 109{ 110 tcg_debug_assert(addend == 0); 111 switch (type) { 112 case R_AARCH64_JUMP26: 113 case R_AARCH64_CALL26: 114 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value); 115 case R_AARCH64_CONDBR19: 116 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value); 117 default: 118 g_assert_not_reached(); 119 } 120} 121 122#define TCG_CT_CONST_AIMM 0x100 123#define TCG_CT_CONST_LIMM 0x200 124#define TCG_CT_CONST_ZERO 0x400 125#define TCG_CT_CONST_MONE 0x800 126#define TCG_CT_CONST_ORRI 0x1000 127#define TCG_CT_CONST_ANDI 0x2000 128 129/* parse target specific constraints */ 130static const char *target_parse_constraint(TCGArgConstraint *ct, 131 const char *ct_str, TCGType type) 132{ 133 switch (*ct_str++) { 134 case 'r': /* general registers */ 135 ct->regs |= 0xffffffffu; 136 break; 137 case 'w': /* advsimd registers */ 138 ct->regs |= 0xffffffff00000000ull; 139 break; 140 case 'l': /* qemu_ld / qemu_st address, data_reg */ 141 ct->regs = 0xffffffffu; 142#ifdef CONFIG_SOFTMMU 143 /* x0 and x1 will be overwritten when reading the tlb entry, 144 and x2, and x3 for helper args, better to avoid using them. */ 145 tcg_regset_reset_reg(ct->regs, TCG_REG_X0); 146 tcg_regset_reset_reg(ct->regs, TCG_REG_X1); 147 tcg_regset_reset_reg(ct->regs, TCG_REG_X2); 148 tcg_regset_reset_reg(ct->regs, TCG_REG_X3); 149#endif 150 break; 151 case 'A': /* Valid for arithmetic immediate (positive or negative). */ 152 ct->ct |= TCG_CT_CONST_AIMM; 153 break; 154 case 'L': /* Valid for logical immediate. */ 155 ct->ct |= TCG_CT_CONST_LIMM; 156 break; 157 case 'M': /* minus one */ 158 ct->ct |= TCG_CT_CONST_MONE; 159 break; 160 case 'O': /* vector orr/bic immediate */ 161 ct->ct |= TCG_CT_CONST_ORRI; 162 break; 163 case 'N': /* vector orr/bic immediate, inverted */ 164 ct->ct |= TCG_CT_CONST_ANDI; 165 break; 166 case 'Z': /* zero */ 167 ct->ct |= TCG_CT_CONST_ZERO; 168 break; 169 default: 170 return NULL; 171 } 172 return ct_str; 173} 174 175/* Match a constant valid for addition (12-bit, optionally shifted). */ 176static inline bool is_aimm(uint64_t val) 177{ 178 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; 179} 180 181/* Match a constant valid for logical operations. */ 182static inline bool is_limm(uint64_t val) 183{ 184 /* Taking a simplified view of the logical immediates for now, ignoring 185 the replication that can happen across the field. Match bit patterns 186 of the forms 187 0....01....1 188 0..01..10..0 189 and their inverses. */ 190 191 /* Make things easier below, by testing the form with msb clear. */ 192 if ((int64_t)val < 0) { 193 val = ~val; 194 } 195 if (val == 0) { 196 return false; 197 } 198 val += val & -val; 199 return (val & (val - 1)) == 0; 200} 201 202/* Return true if v16 is a valid 16-bit shifted immediate. */ 203static bool is_shimm16(uint16_t v16, int *cmode, int *imm8) 204{ 205 if (v16 == (v16 & 0xff)) { 206 *cmode = 0x8; 207 *imm8 = v16 & 0xff; 208 return true; 209 } else if (v16 == (v16 & 0xff00)) { 210 *cmode = 0xa; 211 *imm8 = v16 >> 8; 212 return true; 213 } 214 return false; 215} 216 217/* Return true if v32 is a valid 32-bit shifted immediate. */ 218static bool is_shimm32(uint32_t v32, int *cmode, int *imm8) 219{ 220 if (v32 == (v32 & 0xff)) { 221 *cmode = 0x0; 222 *imm8 = v32 & 0xff; 223 return true; 224 } else if (v32 == (v32 & 0xff00)) { 225 *cmode = 0x2; 226 *imm8 = (v32 >> 8) & 0xff; 227 return true; 228 } else if (v32 == (v32 & 0xff0000)) { 229 *cmode = 0x4; 230 *imm8 = (v32 >> 16) & 0xff; 231 return true; 232 } else if (v32 == (v32 & 0xff000000)) { 233 *cmode = 0x6; 234 *imm8 = v32 >> 24; 235 return true; 236 } 237 return false; 238} 239 240/* Return true if v32 is a valid 32-bit shifting ones immediate. */ 241static bool is_soimm32(uint32_t v32, int *cmode, int *imm8) 242{ 243 if ((v32 & 0xffff00ff) == 0xff) { 244 *cmode = 0xc; 245 *imm8 = (v32 >> 8) & 0xff; 246 return true; 247 } else if ((v32 & 0xff00ffff) == 0xffff) { 248 *cmode = 0xd; 249 *imm8 = (v32 >> 16) & 0xff; 250 return true; 251 } 252 return false; 253} 254 255/* Return true if v32 is a valid float32 immediate. */ 256static bool is_fimm32(uint32_t v32, int *cmode, int *imm8) 257{ 258 if (extract32(v32, 0, 19) == 0 259 && (extract32(v32, 25, 6) == 0x20 260 || extract32(v32, 25, 6) == 0x1f)) { 261 *cmode = 0xf; 262 *imm8 = (extract32(v32, 31, 1) << 7) 263 | (extract32(v32, 25, 1) << 6) 264 | extract32(v32, 19, 6); 265 return true; 266 } 267 return false; 268} 269 270/* Return true if v64 is a valid float64 immediate. */ 271static bool is_fimm64(uint64_t v64, int *cmode, int *imm8) 272{ 273 if (extract64(v64, 0, 48) == 0 274 && (extract64(v64, 54, 9) == 0x100 275 || extract64(v64, 54, 9) == 0x0ff)) { 276 *cmode = 0xf; 277 *imm8 = (extract64(v64, 63, 1) << 7) 278 | (extract64(v64, 54, 1) << 6) 279 | extract64(v64, 48, 6); 280 return true; 281 } 282 return false; 283} 284 285/* 286 * Return non-zero if v32 can be formed by MOVI+ORR. 287 * Place the parameters for MOVI in (cmode, imm8). 288 * Return the cmode for ORR; the imm8 can be had via extraction from v32. 289 */ 290static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8) 291{ 292 int i; 293 294 for (i = 6; i > 0; i -= 2) { 295 /* Mask out one byte we can add with ORR. */ 296 uint32_t tmp = v32 & ~(0xffu << (i * 4)); 297 if (is_shimm32(tmp, cmode, imm8) || 298 is_soimm32(tmp, cmode, imm8)) { 299 break; 300 } 301 } 302 return i; 303} 304 305/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */ 306static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8) 307{ 308 if (v32 == deposit32(v32, 16, 16, v32)) { 309 return is_shimm16(v32, cmode, imm8); 310 } else { 311 return is_shimm32(v32, cmode, imm8); 312 } 313} 314 315static int tcg_target_const_match(tcg_target_long val, TCGType type, 316 const TCGArgConstraint *arg_ct) 317{ 318 int ct = arg_ct->ct; 319 320 if (ct & TCG_CT_CONST) { 321 return 1; 322 } 323 if (type == TCG_TYPE_I32) { 324 val = (int32_t)val; 325 } 326 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { 327 return 1; 328 } 329 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { 330 return 1; 331 } 332 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 333 return 1; 334 } 335 if ((ct & TCG_CT_CONST_MONE) && val == -1) { 336 return 1; 337 } 338 339 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) { 340 case 0: 341 break; 342 case TCG_CT_CONST_ANDI: 343 val = ~val; 344 /* fallthru */ 345 case TCG_CT_CONST_ORRI: 346 if (val == deposit64(val, 32, 32, val)) { 347 int cmode, imm8; 348 return is_shimm1632(val, &cmode, &imm8); 349 } 350 break; 351 default: 352 /* Both bits should not be set for the same insn. */ 353 g_assert_not_reached(); 354 } 355 356 return 0; 357} 358 359enum aarch64_cond_code { 360 COND_EQ = 0x0, 361 COND_NE = 0x1, 362 COND_CS = 0x2, /* Unsigned greater or equal */ 363 COND_HS = COND_CS, /* ALIAS greater or equal */ 364 COND_CC = 0x3, /* Unsigned less than */ 365 COND_LO = COND_CC, /* ALIAS Lower */ 366 COND_MI = 0x4, /* Negative */ 367 COND_PL = 0x5, /* Zero or greater */ 368 COND_VS = 0x6, /* Overflow */ 369 COND_VC = 0x7, /* No overflow */ 370 COND_HI = 0x8, /* Unsigned greater than */ 371 COND_LS = 0x9, /* Unsigned less or equal */ 372 COND_GE = 0xa, 373 COND_LT = 0xb, 374 COND_GT = 0xc, 375 COND_LE = 0xd, 376 COND_AL = 0xe, 377 COND_NV = 0xf, /* behaves like COND_AL here */ 378}; 379 380static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { 381 [TCG_COND_EQ] = COND_EQ, 382 [TCG_COND_NE] = COND_NE, 383 [TCG_COND_LT] = COND_LT, 384 [TCG_COND_GE] = COND_GE, 385 [TCG_COND_LE] = COND_LE, 386 [TCG_COND_GT] = COND_GT, 387 /* unsigned */ 388 [TCG_COND_LTU] = COND_LO, 389 [TCG_COND_GTU] = COND_HI, 390 [TCG_COND_GEU] = COND_HS, 391 [TCG_COND_LEU] = COND_LS, 392}; 393 394typedef enum { 395 LDST_ST = 0, /* store */ 396 LDST_LD = 1, /* load */ 397 LDST_LD_S_X = 2, /* load and sign-extend into Xt */ 398 LDST_LD_S_W = 3, /* load and sign-extend into Wt */ 399} AArch64LdstType; 400 401/* We encode the format of the insn into the beginning of the name, so that 402 we can have the preprocessor help "typecheck" the insn vs the output 403 function. Arm didn't provide us with nice names for the formats, so we 404 use the section number of the architecture reference manual in which the 405 instruction group is described. */ 406typedef enum { 407 /* Compare and branch (immediate). */ 408 I3201_CBZ = 0x34000000, 409 I3201_CBNZ = 0x35000000, 410 411 /* Conditional branch (immediate). */ 412 I3202_B_C = 0x54000000, 413 414 /* Unconditional branch (immediate). */ 415 I3206_B = 0x14000000, 416 I3206_BL = 0x94000000, 417 418 /* Unconditional branch (register). */ 419 I3207_BR = 0xd61f0000, 420 I3207_BLR = 0xd63f0000, 421 I3207_RET = 0xd65f0000, 422 423 /* AdvSIMD load/store single structure. */ 424 I3303_LD1R = 0x0d40c000, 425 426 /* Load literal for loading the address at pc-relative offset */ 427 I3305_LDR = 0x58000000, 428 I3305_LDR_v64 = 0x5c000000, 429 I3305_LDR_v128 = 0x9c000000, 430 431 /* Load/store register. Described here as 3.3.12, but the helper 432 that emits them can transform to 3.3.10 or 3.3.13. */ 433 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, 434 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, 435 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, 436 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, 437 438 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, 439 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, 440 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, 441 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, 442 443 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, 444 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, 445 446 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, 447 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, 448 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, 449 450 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30, 451 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30, 452 453 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30, 454 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30, 455 456 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30, 457 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30, 458 459 I3312_TO_I3310 = 0x00200800, 460 I3312_TO_I3313 = 0x01000000, 461 462 /* Load/store register pair instructions. */ 463 I3314_LDP = 0x28400000, 464 I3314_STP = 0x28000000, 465 466 /* Add/subtract immediate instructions. */ 467 I3401_ADDI = 0x11000000, 468 I3401_ADDSI = 0x31000000, 469 I3401_SUBI = 0x51000000, 470 I3401_SUBSI = 0x71000000, 471 472 /* Bitfield instructions. */ 473 I3402_BFM = 0x33000000, 474 I3402_SBFM = 0x13000000, 475 I3402_UBFM = 0x53000000, 476 477 /* Extract instruction. */ 478 I3403_EXTR = 0x13800000, 479 480 /* Logical immediate instructions. */ 481 I3404_ANDI = 0x12000000, 482 I3404_ORRI = 0x32000000, 483 I3404_EORI = 0x52000000, 484 485 /* Move wide immediate instructions. */ 486 I3405_MOVN = 0x12800000, 487 I3405_MOVZ = 0x52800000, 488 I3405_MOVK = 0x72800000, 489 490 /* PC relative addressing instructions. */ 491 I3406_ADR = 0x10000000, 492 I3406_ADRP = 0x90000000, 493 494 /* Add/subtract shifted register instructions (without a shift). */ 495 I3502_ADD = 0x0b000000, 496 I3502_ADDS = 0x2b000000, 497 I3502_SUB = 0x4b000000, 498 I3502_SUBS = 0x6b000000, 499 500 /* Add/subtract shifted register instructions (with a shift). */ 501 I3502S_ADD_LSL = I3502_ADD, 502 503 /* Add/subtract with carry instructions. */ 504 I3503_ADC = 0x1a000000, 505 I3503_SBC = 0x5a000000, 506 507 /* Conditional select instructions. */ 508 I3506_CSEL = 0x1a800000, 509 I3506_CSINC = 0x1a800400, 510 I3506_CSINV = 0x5a800000, 511 I3506_CSNEG = 0x5a800400, 512 513 /* Data-processing (1 source) instructions. */ 514 I3507_CLZ = 0x5ac01000, 515 I3507_RBIT = 0x5ac00000, 516 I3507_REV16 = 0x5ac00400, 517 I3507_REV32 = 0x5ac00800, 518 I3507_REV64 = 0x5ac00c00, 519 520 /* Data-processing (2 source) instructions. */ 521 I3508_LSLV = 0x1ac02000, 522 I3508_LSRV = 0x1ac02400, 523 I3508_ASRV = 0x1ac02800, 524 I3508_RORV = 0x1ac02c00, 525 I3508_SMULH = 0x9b407c00, 526 I3508_UMULH = 0x9bc07c00, 527 I3508_UDIV = 0x1ac00800, 528 I3508_SDIV = 0x1ac00c00, 529 530 /* Data-processing (3 source) instructions. */ 531 I3509_MADD = 0x1b000000, 532 I3509_MSUB = 0x1b008000, 533 534 /* Logical shifted register instructions (without a shift). */ 535 I3510_AND = 0x0a000000, 536 I3510_BIC = 0x0a200000, 537 I3510_ORR = 0x2a000000, 538 I3510_ORN = 0x2a200000, 539 I3510_EOR = 0x4a000000, 540 I3510_EON = 0x4a200000, 541 I3510_ANDS = 0x6a000000, 542 543 /* Logical shifted register instructions (with a shift). */ 544 I3502S_AND_LSR = I3510_AND | (1 << 22), 545 546 /* AdvSIMD copy */ 547 I3605_DUP = 0x0e000400, 548 I3605_INS = 0x4e001c00, 549 I3605_UMOV = 0x0e003c00, 550 551 /* AdvSIMD modified immediate */ 552 I3606_MOVI = 0x0f000400, 553 I3606_MVNI = 0x2f000400, 554 I3606_BIC = 0x2f001400, 555 I3606_ORR = 0x0f001400, 556 557 /* AdvSIMD shift by immediate */ 558 I3614_SSHR = 0x0f000400, 559 I3614_SSRA = 0x0f001400, 560 I3614_SHL = 0x0f005400, 561 I3614_SLI = 0x2f005400, 562 I3614_USHR = 0x2f000400, 563 I3614_USRA = 0x2f001400, 564 565 /* AdvSIMD three same. */ 566 I3616_ADD = 0x0e208400, 567 I3616_AND = 0x0e201c00, 568 I3616_BIC = 0x0e601c00, 569 I3616_BIF = 0x2ee01c00, 570 I3616_BIT = 0x2ea01c00, 571 I3616_BSL = 0x2e601c00, 572 I3616_EOR = 0x2e201c00, 573 I3616_MUL = 0x0e209c00, 574 I3616_ORR = 0x0ea01c00, 575 I3616_ORN = 0x0ee01c00, 576 I3616_SUB = 0x2e208400, 577 I3616_CMGT = 0x0e203400, 578 I3616_CMGE = 0x0e203c00, 579 I3616_CMTST = 0x0e208c00, 580 I3616_CMHI = 0x2e203400, 581 I3616_CMHS = 0x2e203c00, 582 I3616_CMEQ = 0x2e208c00, 583 I3616_SMAX = 0x0e206400, 584 I3616_SMIN = 0x0e206c00, 585 I3616_SSHL = 0x0e204400, 586 I3616_SQADD = 0x0e200c00, 587 I3616_SQSUB = 0x0e202c00, 588 I3616_UMAX = 0x2e206400, 589 I3616_UMIN = 0x2e206c00, 590 I3616_UQADD = 0x2e200c00, 591 I3616_UQSUB = 0x2e202c00, 592 I3616_USHL = 0x2e204400, 593 594 /* AdvSIMD two-reg misc. */ 595 I3617_CMGT0 = 0x0e208800, 596 I3617_CMEQ0 = 0x0e209800, 597 I3617_CMLT0 = 0x0e20a800, 598 I3617_CMGE0 = 0x2e208800, 599 I3617_CMLE0 = 0x2e20a800, 600 I3617_NOT = 0x2e205800, 601 I3617_ABS = 0x0e20b800, 602 I3617_NEG = 0x2e20b800, 603 604 /* System instructions. */ 605 NOP = 0xd503201f, 606 DMB_ISH = 0xd50338bf, 607 DMB_LD = 0x00000100, 608 DMB_ST = 0x00000200, 609} AArch64Insn; 610 611static inline uint32_t tcg_in32(TCGContext *s) 612{ 613 uint32_t v = *(uint32_t *)s->code_ptr; 614 return v; 615} 616 617/* Emit an opcode with "type-checking" of the format. */ 618#define tcg_out_insn(S, FMT, OP, ...) \ 619 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) 620 621static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q, 622 TCGReg rt, TCGReg rn, unsigned size) 623{ 624 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30)); 625} 626 627static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, 628 int imm19, TCGReg rt) 629{ 630 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt); 631} 632 633static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, 634 TCGReg rt, int imm19) 635{ 636 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); 637} 638 639static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, 640 TCGCond c, int imm19) 641{ 642 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); 643} 644 645static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) 646{ 647 tcg_out32(s, insn | (imm26 & 0x03ffffff)); 648} 649 650static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) 651{ 652 tcg_out32(s, insn | rn << 5); 653} 654 655static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, 656 TCGReg r1, TCGReg r2, TCGReg rn, 657 tcg_target_long ofs, bool pre, bool w) 658{ 659 insn |= 1u << 31; /* ext */ 660 insn |= pre << 24; 661 insn |= w << 23; 662 663 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); 664 insn |= (ofs & (0x7f << 3)) << (15 - 3); 665 666 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); 667} 668 669static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, 670 TCGReg rd, TCGReg rn, uint64_t aimm) 671{ 672 if (aimm > 0xfff) { 673 tcg_debug_assert((aimm & 0xfff) == 0); 674 aimm >>= 12; 675 tcg_debug_assert(aimm <= 0xfff); 676 aimm |= 1 << 12; /* apply LSL 12 */ 677 } 678 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); 679} 680 681/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 682 (Logical immediate). Both insn groups have N, IMMR and IMMS fields 683 that feed the DecodeBitMasks pseudo function. */ 684static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, 685 TCGReg rd, TCGReg rn, int n, int immr, int imms) 686{ 687 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 688 | rn << 5 | rd); 689} 690 691#define tcg_out_insn_3404 tcg_out_insn_3402 692 693static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, 694 TCGReg rd, TCGReg rn, TCGReg rm, int imms) 695{ 696 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 697 | rn << 5 | rd); 698} 699 700/* This function is used for the Move (wide immediate) instruction group. 701 Note that SHIFT is a full shift count, not the 2 bit HW field. */ 702static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, 703 TCGReg rd, uint16_t half, unsigned shift) 704{ 705 tcg_debug_assert((shift & ~0x30) == 0); 706 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); 707} 708 709static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, 710 TCGReg rd, int64_t disp) 711{ 712 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); 713} 714 715/* This function is for both 3.5.2 (Add/Subtract shifted register), for 716 the rare occasion when we actually want to supply a shift amount. */ 717static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, 718 TCGType ext, TCGReg rd, TCGReg rn, 719 TCGReg rm, int imm6) 720{ 721 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); 722} 723 724/* This function is for 3.5.2 (Add/subtract shifted register), 725 and 3.5.10 (Logical shifted register), for the vast majorty of cases 726 when we don't want to apply a shift. Thus it can also be used for 727 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */ 728static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, 729 TCGReg rd, TCGReg rn, TCGReg rm) 730{ 731 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); 732} 733 734#define tcg_out_insn_3503 tcg_out_insn_3502 735#define tcg_out_insn_3508 tcg_out_insn_3502 736#define tcg_out_insn_3510 tcg_out_insn_3502 737 738static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, 739 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) 740{ 741 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd 742 | tcg_cond_to_aarch64[c] << 12); 743} 744 745static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, 746 TCGReg rd, TCGReg rn) 747{ 748 tcg_out32(s, insn | ext << 31 | rn << 5 | rd); 749} 750 751static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, 752 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) 753{ 754 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); 755} 756 757static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q, 758 TCGReg rd, TCGReg rn, int dst_idx, int src_idx) 759{ 760 /* Note that bit 11 set means general register input. Therefore 761 we can handle both register sets with one function. */ 762 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11) 763 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5); 764} 765 766static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q, 767 TCGReg rd, bool op, int cmode, uint8_t imm8) 768{ 769 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f) 770 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5); 771} 772 773static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q, 774 TCGReg rd, TCGReg rn, unsigned immhb) 775{ 776 tcg_out32(s, insn | q << 30 | immhb << 16 777 | (rn & 0x1f) << 5 | (rd & 0x1f)); 778} 779 780static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q, 781 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 782{ 783 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16 784 | (rn & 0x1f) << 5 | (rd & 0x1f)); 785} 786 787static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q, 788 unsigned size, TCGReg rd, TCGReg rn) 789{ 790 tcg_out32(s, insn | q << 30 | (size << 22) 791 | (rn & 0x1f) << 5 | (rd & 0x1f)); 792} 793 794static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, 795 TCGReg rd, TCGReg base, TCGType ext, 796 TCGReg regoff) 797{ 798 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 799 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | 800 0x4000 | ext << 13 | base << 5 | (rd & 0x1f)); 801} 802 803static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, 804 TCGReg rd, TCGReg rn, intptr_t offset) 805{ 806 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f)); 807} 808 809static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, 810 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) 811{ 812 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 813 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 814 | rn << 5 | (rd & 0x1f)); 815} 816 817/* Register to register move using ORR (shifted register with no shift). */ 818static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) 819{ 820 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); 821} 822 823/* Register to register move using ADDI (move to/from SP). */ 824static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) 825{ 826 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); 827} 828 829/* This function is used for the Logical (immediate) instruction group. 830 The value of LIMM must satisfy IS_LIMM. See the comment above about 831 only supporting simplified logical immediates. */ 832static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, 833 TCGReg rd, TCGReg rn, uint64_t limm) 834{ 835 unsigned h, l, r, c; 836 837 tcg_debug_assert(is_limm(limm)); 838 839 h = clz64(limm); 840 l = ctz64(limm); 841 if (l == 0) { 842 r = 0; /* form 0....01....1 */ 843 c = ctz64(~limm) - 1; 844 if (h == 0) { 845 r = clz64(~limm); /* form 1..10..01..1 */ 846 c += r; 847 } 848 } else { 849 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ 850 c = r - h - 1; 851 } 852 if (ext == TCG_TYPE_I32) { 853 r &= 31; 854 c &= 31; 855 } 856 857 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); 858} 859 860static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 861 TCGReg rd, int64_t v64) 862{ 863 bool q = type == TCG_TYPE_V128; 864 int cmode, imm8, i; 865 866 /* Test all bytes equal first. */ 867 if (vece == MO_8) { 868 imm8 = (uint8_t)v64; 869 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8); 870 return; 871 } 872 873 /* 874 * Test all bytes 0x00 or 0xff second. This can match cases that 875 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below. 876 */ 877 for (i = imm8 = 0; i < 8; i++) { 878 uint8_t byte = v64 >> (i * 8); 879 if (byte == 0xff) { 880 imm8 |= 1 << i; 881 } else if (byte != 0) { 882 goto fail_bytes; 883 } 884 } 885 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8); 886 return; 887 fail_bytes: 888 889 /* 890 * Tests for various replications. For each element width, if we 891 * cannot find an expansion there's no point checking a larger 892 * width because we already know by replication it cannot match. 893 */ 894 if (vece == MO_16) { 895 uint16_t v16 = v64; 896 897 if (is_shimm16(v16, &cmode, &imm8)) { 898 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 899 return; 900 } 901 if (is_shimm16(~v16, &cmode, &imm8)) { 902 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 903 return; 904 } 905 906 /* 907 * Otherwise, all remaining constants can be loaded in two insns: 908 * rd = v16 & 0xff, rd |= v16 & 0xff00. 909 */ 910 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff); 911 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8); 912 return; 913 } else if (vece == MO_32) { 914 uint32_t v32 = v64; 915 uint32_t n32 = ~v32; 916 917 if (is_shimm32(v32, &cmode, &imm8) || 918 is_soimm32(v32, &cmode, &imm8) || 919 is_fimm32(v32, &cmode, &imm8)) { 920 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 921 return; 922 } 923 if (is_shimm32(n32, &cmode, &imm8) || 924 is_soimm32(n32, &cmode, &imm8)) { 925 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 926 return; 927 } 928 929 /* 930 * Restrict the set of constants to those we can load with 931 * two instructions. Others we load from the pool. 932 */ 933 i = is_shimm32_pair(v32, &cmode, &imm8); 934 if (i) { 935 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 936 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8)); 937 return; 938 } 939 i = is_shimm32_pair(n32, &cmode, &imm8); 940 if (i) { 941 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 942 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8)); 943 return; 944 } 945 } else if (is_fimm64(v64, &cmode, &imm8)) { 946 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8); 947 return; 948 } 949 950 /* 951 * As a last resort, load from the constant pool. Sadly there 952 * is no LD1R (literal), so store the full 16-byte vector. 953 */ 954 if (type == TCG_TYPE_V128) { 955 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64); 956 tcg_out_insn(s, 3305, LDR_v128, 0, rd); 957 } else { 958 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0); 959 tcg_out_insn(s, 3305, LDR_v64, 0, rd); 960 } 961} 962 963static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 964 TCGReg rd, TCGReg rs) 965{ 966 int is_q = type - TCG_TYPE_V64; 967 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0); 968 return true; 969} 970 971static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 972 TCGReg r, TCGReg base, intptr_t offset) 973{ 974 TCGReg temp = TCG_REG_TMP; 975 976 if (offset < -0xffffff || offset > 0xffffff) { 977 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset); 978 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base); 979 base = temp; 980 } else { 981 AArch64Insn add_insn = I3401_ADDI; 982 983 if (offset < 0) { 984 add_insn = I3401_SUBI; 985 offset = -offset; 986 } 987 if (offset & 0xfff000) { 988 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000); 989 base = temp; 990 } 991 if (offset & 0xfff) { 992 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff); 993 base = temp; 994 } 995 } 996 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece); 997 return true; 998} 999 1000static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 1001 tcg_target_long value) 1002{ 1003 tcg_target_long svalue = value; 1004 tcg_target_long ivalue = ~value; 1005 tcg_target_long t0, t1, t2; 1006 int s0, s1; 1007 AArch64Insn opc; 1008 1009 switch (type) { 1010 case TCG_TYPE_I32: 1011 case TCG_TYPE_I64: 1012 tcg_debug_assert(rd < 32); 1013 break; 1014 default: 1015 g_assert_not_reached(); 1016 } 1017 1018 /* For 32-bit values, discard potential garbage in value. For 64-bit 1019 values within [2**31, 2**32-1], we can create smaller sequences by 1020 interpreting this as a negative 32-bit number, while ensuring that 1021 the high 32 bits are cleared by setting SF=0. */ 1022 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { 1023 svalue = (int32_t)value; 1024 value = (uint32_t)value; 1025 ivalue = (uint32_t)ivalue; 1026 type = TCG_TYPE_I32; 1027 } 1028 1029 /* Speed things up by handling the common case of small positive 1030 and negative values specially. */ 1031 if ((value & ~0xffffull) == 0) { 1032 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); 1033 return; 1034 } else if ((ivalue & ~0xffffull) == 0) { 1035 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); 1036 return; 1037 } 1038 1039 /* Check for bitfield immediates. For the benefit of 32-bit quantities, 1040 use the sign-extended value. That lets us match rotated values such 1041 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ 1042 if (is_limm(svalue)) { 1043 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); 1044 return; 1045 } 1046 1047 /* Look for host pointer values within 4G of the PC. This happens 1048 often when loading pointers to QEMU's own data structures. */ 1049 if (type == TCG_TYPE_I64) { 1050 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr); 1051 tcg_target_long disp = value - src_rx; 1052 if (disp == sextract64(disp, 0, 21)) { 1053 tcg_out_insn(s, 3406, ADR, rd, disp); 1054 return; 1055 } 1056 disp = (value >> 12) - (src_rx >> 12); 1057 if (disp == sextract64(disp, 0, 21)) { 1058 tcg_out_insn(s, 3406, ADRP, rd, disp); 1059 if (value & 0xfff) { 1060 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); 1061 } 1062 return; 1063 } 1064 } 1065 1066 /* Would it take fewer insns to begin with MOVN? */ 1067 if (ctpop64(value) >= 32) { 1068 t0 = ivalue; 1069 opc = I3405_MOVN; 1070 } else { 1071 t0 = value; 1072 opc = I3405_MOVZ; 1073 } 1074 s0 = ctz64(t0) & (63 & -16); 1075 t1 = t0 & ~(0xffffUL << s0); 1076 s1 = ctz64(t1) & (63 & -16); 1077 t2 = t1 & ~(0xffffUL << s1); 1078 if (t2 == 0) { 1079 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0); 1080 if (t1 != 0) { 1081 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1); 1082 } 1083 return; 1084 } 1085 1086 /* For more than 2 insns, dump it into the constant pool. */ 1087 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0); 1088 tcg_out_insn(s, 3305, LDR, 0, rd); 1089} 1090 1091/* Define something more legible for general use. */ 1092#define tcg_out_ldst_r tcg_out_insn_3310 1093 1094static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd, 1095 TCGReg rn, intptr_t offset, int lgsize) 1096{ 1097 /* If the offset is naturally aligned and in range, then we can 1098 use the scaled uimm12 encoding */ 1099 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) { 1100 uintptr_t scaled_uimm = offset >> lgsize; 1101 if (scaled_uimm <= 0xfff) { 1102 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); 1103 return; 1104 } 1105 } 1106 1107 /* Small signed offsets can use the unscaled encoding. */ 1108 if (offset >= -256 && offset < 256) { 1109 tcg_out_insn_3312(s, insn, rd, rn, offset); 1110 return; 1111 } 1112 1113 /* Worst-case scenario, move offset to temp register, use reg offset. */ 1114 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset); 1115 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP); 1116} 1117 1118static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 1119{ 1120 if (ret == arg) { 1121 return true; 1122 } 1123 switch (type) { 1124 case TCG_TYPE_I32: 1125 case TCG_TYPE_I64: 1126 if (ret < 32 && arg < 32) { 1127 tcg_out_movr(s, type, ret, arg); 1128 break; 1129 } else if (ret < 32) { 1130 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0); 1131 break; 1132 } else if (arg < 32) { 1133 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0); 1134 break; 1135 } 1136 /* FALLTHRU */ 1137 1138 case TCG_TYPE_V64: 1139 tcg_debug_assert(ret >= 32 && arg >= 32); 1140 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg); 1141 break; 1142 case TCG_TYPE_V128: 1143 tcg_debug_assert(ret >= 32 && arg >= 32); 1144 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg); 1145 break; 1146 1147 default: 1148 g_assert_not_reached(); 1149 } 1150 return true; 1151} 1152 1153static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1154 TCGReg base, intptr_t ofs) 1155{ 1156 AArch64Insn insn; 1157 int lgsz; 1158 1159 switch (type) { 1160 case TCG_TYPE_I32: 1161 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS); 1162 lgsz = 2; 1163 break; 1164 case TCG_TYPE_I64: 1165 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD); 1166 lgsz = 3; 1167 break; 1168 case TCG_TYPE_V64: 1169 insn = I3312_LDRVD; 1170 lgsz = 3; 1171 break; 1172 case TCG_TYPE_V128: 1173 insn = I3312_LDRVQ; 1174 lgsz = 4; 1175 break; 1176 default: 1177 g_assert_not_reached(); 1178 } 1179 tcg_out_ldst(s, insn, ret, base, ofs, lgsz); 1180} 1181 1182static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src, 1183 TCGReg base, intptr_t ofs) 1184{ 1185 AArch64Insn insn; 1186 int lgsz; 1187 1188 switch (type) { 1189 case TCG_TYPE_I32: 1190 insn = (src < 32 ? I3312_STRW : I3312_STRVS); 1191 lgsz = 2; 1192 break; 1193 case TCG_TYPE_I64: 1194 insn = (src < 32 ? I3312_STRX : I3312_STRVD); 1195 lgsz = 3; 1196 break; 1197 case TCG_TYPE_V64: 1198 insn = I3312_STRVD; 1199 lgsz = 3; 1200 break; 1201 case TCG_TYPE_V128: 1202 insn = I3312_STRVQ; 1203 lgsz = 4; 1204 break; 1205 default: 1206 g_assert_not_reached(); 1207 } 1208 tcg_out_ldst(s, insn, src, base, ofs, lgsz); 1209} 1210 1211static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1212 TCGReg base, intptr_t ofs) 1213{ 1214 if (type <= TCG_TYPE_I64 && val == 0) { 1215 tcg_out_st(s, type, TCG_REG_XZR, base, ofs); 1216 return true; 1217 } 1218 return false; 1219} 1220 1221static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, 1222 TCGReg rn, unsigned int a, unsigned int b) 1223{ 1224 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); 1225} 1226 1227static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, 1228 TCGReg rn, unsigned int a, unsigned int b) 1229{ 1230 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); 1231} 1232 1233static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, 1234 TCGReg rn, unsigned int a, unsigned int b) 1235{ 1236 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); 1237} 1238 1239static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, 1240 TCGReg rn, TCGReg rm, unsigned int a) 1241{ 1242 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); 1243} 1244 1245static inline void tcg_out_shl(TCGContext *s, TCGType ext, 1246 TCGReg rd, TCGReg rn, unsigned int m) 1247{ 1248 int bits = ext ? 64 : 32; 1249 int max = bits - 1; 1250 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max)); 1251} 1252 1253static inline void tcg_out_shr(TCGContext *s, TCGType ext, 1254 TCGReg rd, TCGReg rn, unsigned int m) 1255{ 1256 int max = ext ? 63 : 31; 1257 tcg_out_ubfm(s, ext, rd, rn, m & max, max); 1258} 1259 1260static inline void tcg_out_sar(TCGContext *s, TCGType ext, 1261 TCGReg rd, TCGReg rn, unsigned int m) 1262{ 1263 int max = ext ? 63 : 31; 1264 tcg_out_sbfm(s, ext, rd, rn, m & max, max); 1265} 1266 1267static inline void tcg_out_rotr(TCGContext *s, TCGType ext, 1268 TCGReg rd, TCGReg rn, unsigned int m) 1269{ 1270 int max = ext ? 63 : 31; 1271 tcg_out_extr(s, ext, rd, rn, rn, m & max); 1272} 1273 1274static inline void tcg_out_rotl(TCGContext *s, TCGType ext, 1275 TCGReg rd, TCGReg rn, unsigned int m) 1276{ 1277 int bits = ext ? 64 : 32; 1278 int max = bits - 1; 1279 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max)); 1280} 1281 1282static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, 1283 TCGReg rn, unsigned lsb, unsigned width) 1284{ 1285 unsigned size = ext ? 64 : 32; 1286 unsigned a = (size - lsb) & (size - 1); 1287 unsigned b = width - 1; 1288 tcg_out_bfm(s, ext, rd, rn, a, b); 1289} 1290 1291static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, 1292 tcg_target_long b, bool const_b) 1293{ 1294 if (const_b) { 1295 /* Using CMP or CMN aliases. */ 1296 if (b >= 0) { 1297 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); 1298 } else { 1299 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); 1300 } 1301 } else { 1302 /* Using CMP alias SUBS wzr, Wn, Wm */ 1303 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); 1304 } 1305} 1306 1307static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) 1308{ 1309 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1310 tcg_debug_assert(offset == sextract64(offset, 0, 26)); 1311 tcg_out_insn(s, 3206, B, offset); 1312} 1313 1314static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target) 1315{ 1316 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1317 if (offset == sextract64(offset, 0, 26)) { 1318 tcg_out_insn(s, 3206, B, offset); 1319 } else { 1320 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); 1321 tcg_out_insn(s, 3207, BR, TCG_REG_TMP); 1322 } 1323} 1324 1325static inline void tcg_out_callr(TCGContext *s, TCGReg reg) 1326{ 1327 tcg_out_insn(s, 3207, BLR, reg); 1328} 1329 1330static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) 1331{ 1332 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1333 if (offset == sextract64(offset, 0, 26)) { 1334 tcg_out_insn(s, 3206, BL, offset); 1335 } else { 1336 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); 1337 tcg_out_callr(s, TCG_REG_TMP); 1338 } 1339} 1340 1341void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, 1342 uintptr_t jmp_rw, uintptr_t addr) 1343{ 1344 tcg_insn_unit i1, i2; 1345 TCGType rt = TCG_TYPE_I64; 1346 TCGReg rd = TCG_REG_TMP; 1347 uint64_t pair; 1348 1349 ptrdiff_t offset = addr - jmp_rx; 1350 1351 if (offset == sextract64(offset, 0, 26)) { 1352 i1 = I3206_B | ((offset >> 2) & 0x3ffffff); 1353 i2 = NOP; 1354 } else { 1355 offset = (addr >> 12) - (jmp_rx >> 12); 1356 1357 /* patch ADRP */ 1358 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd; 1359 /* patch ADDI */ 1360 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd; 1361 } 1362 pair = (uint64_t)i2 << 32 | i1; 1363 qatomic_set((uint64_t *)jmp_rw, pair); 1364 flush_idcache_range(jmp_rx, jmp_rw, 8); 1365} 1366 1367static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) 1368{ 1369 if (!l->has_value) { 1370 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); 1371 tcg_out_insn(s, 3206, B, 0); 1372 } else { 1373 tcg_out_goto(s, l->u.value_ptr); 1374 } 1375} 1376 1377static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, 1378 TCGArg b, bool b_const, TCGLabel *l) 1379{ 1380 intptr_t offset; 1381 bool need_cmp; 1382 1383 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) { 1384 need_cmp = false; 1385 } else { 1386 need_cmp = true; 1387 tcg_out_cmp(s, ext, a, b, b_const); 1388 } 1389 1390 if (!l->has_value) { 1391 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1392 offset = tcg_in32(s) >> 5; 1393 } else { 1394 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2; 1395 tcg_debug_assert(offset == sextract64(offset, 0, 19)); 1396 } 1397 1398 if (need_cmp) { 1399 tcg_out_insn(s, 3202, B_C, c, offset); 1400 } else if (c == TCG_COND_EQ) { 1401 tcg_out_insn(s, 3201, CBZ, ext, a, offset); 1402 } else { 1403 tcg_out_insn(s, 3201, CBNZ, ext, a, offset); 1404 } 1405} 1406 1407static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn) 1408{ 1409 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn); 1410} 1411 1412static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn) 1413{ 1414 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn); 1415} 1416 1417static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn) 1418{ 1419 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn); 1420} 1421 1422static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits, 1423 TCGReg rd, TCGReg rn) 1424{ 1425 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ 1426 int bits = (8 << s_bits) - 1; 1427 tcg_out_sbfm(s, ext, rd, rn, 0, bits); 1428} 1429 1430static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits, 1431 TCGReg rd, TCGReg rn) 1432{ 1433 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ 1434 int bits = (8 << s_bits) - 1; 1435 tcg_out_ubfm(s, 0, rd, rn, 0, bits); 1436} 1437 1438static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, 1439 TCGReg rn, int64_t aimm) 1440{ 1441 if (aimm >= 0) { 1442 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); 1443 } else { 1444 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); 1445 } 1446} 1447 1448static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, 1449 TCGReg rh, TCGReg al, TCGReg ah, 1450 tcg_target_long bl, tcg_target_long bh, 1451 bool const_bl, bool const_bh, bool sub) 1452{ 1453 TCGReg orig_rl = rl; 1454 AArch64Insn insn; 1455 1456 if (rl == ah || (!const_bh && rl == bh)) { 1457 rl = TCG_REG_TMP; 1458 } 1459 1460 if (const_bl) { 1461 insn = I3401_ADDSI; 1462 if ((bl < 0) ^ sub) { 1463 insn = I3401_SUBSI; 1464 bl = -bl; 1465 } 1466 if (unlikely(al == TCG_REG_XZR)) { 1467 /* ??? We want to allow al to be zero for the benefit of 1468 negation via subtraction. However, that leaves open the 1469 possibility of adding 0+const in the low part, and the 1470 immediate add instructions encode XSP not XZR. Don't try 1471 anything more elaborate here than loading another zero. */ 1472 al = TCG_REG_TMP; 1473 tcg_out_movi(s, ext, al, 0); 1474 } 1475 tcg_out_insn_3401(s, insn, ext, rl, al, bl); 1476 } else { 1477 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); 1478 } 1479 1480 insn = I3503_ADC; 1481 if (const_bh) { 1482 /* Note that the only two constants we support are 0 and -1, and 1483 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ 1484 if ((bh != 0) ^ sub) { 1485 insn = I3503_SBC; 1486 } 1487 bh = TCG_REG_XZR; 1488 } else if (sub) { 1489 insn = I3503_SBC; 1490 } 1491 tcg_out_insn_3503(s, insn, ext, rh, ah, bh); 1492 1493 tcg_out_mov(s, ext, orig_rl, rl); 1494} 1495 1496static inline void tcg_out_mb(TCGContext *s, TCGArg a0) 1497{ 1498 static const uint32_t sync[] = { 1499 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST, 1500 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST, 1501 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1502 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD, 1503 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1504 }; 1505 tcg_out32(s, sync[a0 & TCG_MO_ALL]); 1506} 1507 1508static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, 1509 TCGReg a0, TCGArg b, bool const_b, bool is_ctz) 1510{ 1511 TCGReg a1 = a0; 1512 if (is_ctz) { 1513 a1 = TCG_REG_TMP; 1514 tcg_out_insn(s, 3507, RBIT, ext, a1, a0); 1515 } 1516 if (const_b && b == (ext ? 64 : 32)) { 1517 tcg_out_insn(s, 3507, CLZ, ext, d, a1); 1518 } else { 1519 AArch64Insn sel = I3506_CSEL; 1520 1521 tcg_out_cmp(s, ext, a0, 0, 1); 1522 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1); 1523 1524 if (const_b) { 1525 if (b == -1) { 1526 b = TCG_REG_XZR; 1527 sel = I3506_CSINV; 1528 } else if (b == 0) { 1529 b = TCG_REG_XZR; 1530 } else { 1531 tcg_out_movi(s, ext, d, b); 1532 b = d; 1533 } 1534 } 1535 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE); 1536 } 1537} 1538 1539#ifdef CONFIG_SOFTMMU 1540#include "../tcg-ldst.c.inc" 1541 1542/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, 1543 * TCGMemOpIdx oi, uintptr_t ra) 1544 */ 1545static void * const qemu_ld_helpers[16] = { 1546 [MO_UB] = helper_ret_ldub_mmu, 1547 [MO_LEUW] = helper_le_lduw_mmu, 1548 [MO_LEUL] = helper_le_ldul_mmu, 1549 [MO_LEQ] = helper_le_ldq_mmu, 1550 [MO_BEUW] = helper_be_lduw_mmu, 1551 [MO_BEUL] = helper_be_ldul_mmu, 1552 [MO_BEQ] = helper_be_ldq_mmu, 1553}; 1554 1555/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, 1556 * uintxx_t val, TCGMemOpIdx oi, 1557 * uintptr_t ra) 1558 */ 1559static void * const qemu_st_helpers[16] = { 1560 [MO_UB] = helper_ret_stb_mmu, 1561 [MO_LEUW] = helper_le_stw_mmu, 1562 [MO_LEUL] = helper_le_stl_mmu, 1563 [MO_LEQ] = helper_le_stq_mmu, 1564 [MO_BEUW] = helper_be_stw_mmu, 1565 [MO_BEUL] = helper_be_stl_mmu, 1566 [MO_BEQ] = helper_be_stq_mmu, 1567}; 1568 1569static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target) 1570{ 1571 ptrdiff_t offset = tcg_pcrel_diff(s, target); 1572 tcg_debug_assert(offset == sextract64(offset, 0, 21)); 1573 tcg_out_insn(s, 3406, ADR, rd, offset); 1574} 1575 1576static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1577{ 1578 TCGMemOpIdx oi = lb->oi; 1579 MemOp opc = get_memop(oi); 1580 MemOp size = opc & MO_SIZE; 1581 1582 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1583 return false; 1584 } 1585 1586 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1587 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); 1588 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi); 1589 tcg_out_adr(s, TCG_REG_X3, lb->raddr); 1590 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); 1591 if (opc & MO_SIGN) { 1592 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0); 1593 } else { 1594 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0); 1595 } 1596 1597 tcg_out_goto(s, lb->raddr); 1598 return true; 1599} 1600 1601static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1602{ 1603 TCGMemOpIdx oi = lb->oi; 1604 MemOp opc = get_memop(oi); 1605 MemOp size = opc & MO_SIZE; 1606 1607 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1608 return false; 1609 } 1610 1611 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1612 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); 1613 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); 1614 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi); 1615 tcg_out_adr(s, TCG_REG_X4, lb->raddr); 1616 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); 1617 tcg_out_goto(s, lb->raddr); 1618 return true; 1619} 1620 1621static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, 1622 TCGType ext, TCGReg data_reg, TCGReg addr_reg, 1623 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) 1624{ 1625 TCGLabelQemuLdst *label = new_ldst_label(s); 1626 1627 label->is_ld = is_ld; 1628 label->oi = oi; 1629 label->type = ext; 1630 label->datalo_reg = data_reg; 1631 label->addrlo_reg = addr_reg; 1632 label->raddr = tcg_splitwx_to_rx(raddr); 1633 label->label_ptr[0] = label_ptr; 1634} 1635 1636/* We expect to use a 7-bit scaled negative offset from ENV. */ 1637QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); 1638QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512); 1639 1640/* These offsets are built into the LDP below. */ 1641QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); 1642QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); 1643 1644/* Load and compare a TLB entry, emitting the conditional jump to the 1645 slow path for the failure case, which will be patched later when finalizing 1646 the slow path. Generated code returns the host addend in X1, 1647 clobbers X0,X2,X3,TMP. */ 1648static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc, 1649 tcg_insn_unit **label_ptr, int mem_index, 1650 bool is_read) 1651{ 1652 unsigned a_bits = get_alignment_bits(opc); 1653 unsigned s_bits = opc & MO_SIZE; 1654 unsigned a_mask = (1u << a_bits) - 1; 1655 unsigned s_mask = (1u << s_bits) - 1; 1656 TCGReg x3; 1657 TCGType mask_type; 1658 uint64_t compare_mask; 1659 1660 mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32 1661 ? TCG_TYPE_I64 : TCG_TYPE_I32); 1662 1663 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */ 1664 tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0, 1665 TLB_MASK_TABLE_OFS(mem_index), 1, 0); 1666 1667 /* Extract the TLB index from the address into X0. */ 1668 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, 1669 TCG_REG_X0, TCG_REG_X0, addr_reg, 1670 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); 1671 1672 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */ 1673 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0); 1674 1675 /* Load the tlb comparator into X0, and the fast path addend into X1. */ 1676 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read 1677 ? offsetof(CPUTLBEntry, addr_read) 1678 : offsetof(CPUTLBEntry, addr_write)); 1679 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1, 1680 offsetof(CPUTLBEntry, addend)); 1681 1682 /* For aligned accesses, we check the first byte and include the alignment 1683 bits within the address. For unaligned access, we check that we don't 1684 cross pages using the address of the last byte of the access. */ 1685 if (a_bits >= s_bits) { 1686 x3 = addr_reg; 1687 } else { 1688 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64, 1689 TCG_REG_X3, addr_reg, s_mask - a_mask); 1690 x3 = TCG_REG_X3; 1691 } 1692 compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; 1693 1694 /* Store the page mask part of the address into X3. */ 1695 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, 1696 TCG_REG_X3, x3, compare_mask); 1697 1698 /* Perform the address comparison. */ 1699 tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0); 1700 1701 /* If not equal, we jump to the slow path. */ 1702 *label_ptr = s->code_ptr; 1703 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1704} 1705 1706#endif /* CONFIG_SOFTMMU */ 1707 1708static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, 1709 TCGReg data_r, TCGReg addr_r, 1710 TCGType otype, TCGReg off_r) 1711{ 1712 const MemOp bswap = memop & MO_BSWAP; 1713 1714 switch (memop & MO_SSIZE) { 1715 case MO_UB: 1716 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r); 1717 break; 1718 case MO_SB: 1719 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, 1720 data_r, addr_r, otype, off_r); 1721 break; 1722 case MO_UW: 1723 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); 1724 if (bswap) { 1725 tcg_out_rev16(s, data_r, data_r); 1726 } 1727 break; 1728 case MO_SW: 1729 if (bswap) { 1730 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); 1731 tcg_out_rev16(s, data_r, data_r); 1732 tcg_out_sxt(s, ext, MO_16, data_r, data_r); 1733 } else { 1734 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), 1735 data_r, addr_r, otype, off_r); 1736 } 1737 break; 1738 case MO_UL: 1739 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); 1740 if (bswap) { 1741 tcg_out_rev32(s, data_r, data_r); 1742 } 1743 break; 1744 case MO_SL: 1745 if (bswap) { 1746 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); 1747 tcg_out_rev32(s, data_r, data_r); 1748 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r); 1749 } else { 1750 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r); 1751 } 1752 break; 1753 case MO_Q: 1754 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r); 1755 if (bswap) { 1756 tcg_out_rev64(s, data_r, data_r); 1757 } 1758 break; 1759 default: 1760 tcg_abort(); 1761 } 1762} 1763 1764static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, 1765 TCGReg data_r, TCGReg addr_r, 1766 TCGType otype, TCGReg off_r) 1767{ 1768 const MemOp bswap = memop & MO_BSWAP; 1769 1770 switch (memop & MO_SIZE) { 1771 case MO_8: 1772 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r); 1773 break; 1774 case MO_16: 1775 if (bswap && data_r != TCG_REG_XZR) { 1776 tcg_out_rev16(s, TCG_REG_TMP, data_r); 1777 data_r = TCG_REG_TMP; 1778 } 1779 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r); 1780 break; 1781 case MO_32: 1782 if (bswap && data_r != TCG_REG_XZR) { 1783 tcg_out_rev32(s, TCG_REG_TMP, data_r); 1784 data_r = TCG_REG_TMP; 1785 } 1786 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r); 1787 break; 1788 case MO_64: 1789 if (bswap && data_r != TCG_REG_XZR) { 1790 tcg_out_rev64(s, TCG_REG_TMP, data_r); 1791 data_r = TCG_REG_TMP; 1792 } 1793 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r); 1794 break; 1795 default: 1796 tcg_abort(); 1797 } 1798} 1799 1800static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1801 TCGMemOpIdx oi, TCGType ext) 1802{ 1803 MemOp memop = get_memop(oi); 1804 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; 1805#ifdef CONFIG_SOFTMMU 1806 unsigned mem_index = get_mmuidx(oi); 1807 tcg_insn_unit *label_ptr; 1808 1809 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1); 1810 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1811 TCG_REG_X1, otype, addr_reg); 1812 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg, 1813 s->code_ptr, label_ptr); 1814#else /* !CONFIG_SOFTMMU */ 1815 if (USE_GUEST_BASE) { 1816 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1817 TCG_REG_GUEST_BASE, otype, addr_reg); 1818 } else { 1819 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1820 addr_reg, TCG_TYPE_I64, TCG_REG_XZR); 1821 } 1822#endif /* CONFIG_SOFTMMU */ 1823} 1824 1825static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1826 TCGMemOpIdx oi) 1827{ 1828 MemOp memop = get_memop(oi); 1829 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; 1830#ifdef CONFIG_SOFTMMU 1831 unsigned mem_index = get_mmuidx(oi); 1832 tcg_insn_unit *label_ptr; 1833 1834 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0); 1835 tcg_out_qemu_st_direct(s, memop, data_reg, 1836 TCG_REG_X1, otype, addr_reg); 1837 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64, 1838 data_reg, addr_reg, s->code_ptr, label_ptr); 1839#else /* !CONFIG_SOFTMMU */ 1840 if (USE_GUEST_BASE) { 1841 tcg_out_qemu_st_direct(s, memop, data_reg, 1842 TCG_REG_GUEST_BASE, otype, addr_reg); 1843 } else { 1844 tcg_out_qemu_st_direct(s, memop, data_reg, 1845 addr_reg, TCG_TYPE_I64, TCG_REG_XZR); 1846 } 1847#endif /* CONFIG_SOFTMMU */ 1848} 1849 1850static const tcg_insn_unit *tb_ret_addr; 1851 1852static void tcg_out_op(TCGContext *s, TCGOpcode opc, 1853 const TCGArg args[TCG_MAX_OP_ARGS], 1854 const int const_args[TCG_MAX_OP_ARGS]) 1855{ 1856 /* 99% of the time, we can signal the use of extension registers 1857 by looking to see if the opcode handles 64-bit data. */ 1858 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0; 1859 1860 /* Hoist the loads of the most common arguments. */ 1861 TCGArg a0 = args[0]; 1862 TCGArg a1 = args[1]; 1863 TCGArg a2 = args[2]; 1864 int c2 = const_args[2]; 1865 1866 /* Some operands are defined with "rZ" constraint, a register or 1867 the zero register. These need not actually test args[I] == 0. */ 1868#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) 1869 1870 switch (opc) { 1871 case INDEX_op_exit_tb: 1872 /* Reuse the zeroing that exists for goto_ptr. */ 1873 if (a0 == 0) { 1874 tcg_out_goto_long(s, tcg_code_gen_epilogue); 1875 } else { 1876 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); 1877 tcg_out_goto_long(s, tb_ret_addr); 1878 } 1879 break; 1880 1881 case INDEX_op_goto_tb: 1882 if (s->tb_jmp_insn_offset != NULL) { 1883 /* TCG_TARGET_HAS_direct_jump */ 1884 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic 1885 write can be used to patch the target address. */ 1886 if ((uintptr_t)s->code_ptr & 7) { 1887 tcg_out32(s, NOP); 1888 } 1889 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); 1890 /* actual branch destination will be patched by 1891 tb_target_set_jmp_target later. */ 1892 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0); 1893 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0); 1894 } else { 1895 /* !TCG_TARGET_HAS_direct_jump */ 1896 tcg_debug_assert(s->tb_jmp_target_addr != NULL); 1897 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2; 1898 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP); 1899 } 1900 tcg_out_insn(s, 3207, BR, TCG_REG_TMP); 1901 set_jmp_reset_offset(s, a0); 1902 break; 1903 1904 case INDEX_op_goto_ptr: 1905 tcg_out_insn(s, 3207, BR, a0); 1906 break; 1907 1908 case INDEX_op_br: 1909 tcg_out_goto_label(s, arg_label(a0)); 1910 break; 1911 1912 case INDEX_op_ld8u_i32: 1913 case INDEX_op_ld8u_i64: 1914 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0); 1915 break; 1916 case INDEX_op_ld8s_i32: 1917 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0); 1918 break; 1919 case INDEX_op_ld8s_i64: 1920 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0); 1921 break; 1922 case INDEX_op_ld16u_i32: 1923 case INDEX_op_ld16u_i64: 1924 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1); 1925 break; 1926 case INDEX_op_ld16s_i32: 1927 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1); 1928 break; 1929 case INDEX_op_ld16s_i64: 1930 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1); 1931 break; 1932 case INDEX_op_ld_i32: 1933 case INDEX_op_ld32u_i64: 1934 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2); 1935 break; 1936 case INDEX_op_ld32s_i64: 1937 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2); 1938 break; 1939 case INDEX_op_ld_i64: 1940 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3); 1941 break; 1942 1943 case INDEX_op_st8_i32: 1944 case INDEX_op_st8_i64: 1945 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0); 1946 break; 1947 case INDEX_op_st16_i32: 1948 case INDEX_op_st16_i64: 1949 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1); 1950 break; 1951 case INDEX_op_st_i32: 1952 case INDEX_op_st32_i64: 1953 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2); 1954 break; 1955 case INDEX_op_st_i64: 1956 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3); 1957 break; 1958 1959 case INDEX_op_add_i32: 1960 a2 = (int32_t)a2; 1961 /* FALLTHRU */ 1962 case INDEX_op_add_i64: 1963 if (c2) { 1964 tcg_out_addsubi(s, ext, a0, a1, a2); 1965 } else { 1966 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); 1967 } 1968 break; 1969 1970 case INDEX_op_sub_i32: 1971 a2 = (int32_t)a2; 1972 /* FALLTHRU */ 1973 case INDEX_op_sub_i64: 1974 if (c2) { 1975 tcg_out_addsubi(s, ext, a0, a1, -a2); 1976 } else { 1977 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); 1978 } 1979 break; 1980 1981 case INDEX_op_neg_i64: 1982 case INDEX_op_neg_i32: 1983 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); 1984 break; 1985 1986 case INDEX_op_and_i32: 1987 a2 = (int32_t)a2; 1988 /* FALLTHRU */ 1989 case INDEX_op_and_i64: 1990 if (c2) { 1991 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); 1992 } else { 1993 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); 1994 } 1995 break; 1996 1997 case INDEX_op_andc_i32: 1998 a2 = (int32_t)a2; 1999 /* FALLTHRU */ 2000 case INDEX_op_andc_i64: 2001 if (c2) { 2002 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); 2003 } else { 2004 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); 2005 } 2006 break; 2007 2008 case INDEX_op_or_i32: 2009 a2 = (int32_t)a2; 2010 /* FALLTHRU */ 2011 case INDEX_op_or_i64: 2012 if (c2) { 2013 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); 2014 } else { 2015 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); 2016 } 2017 break; 2018 2019 case INDEX_op_orc_i32: 2020 a2 = (int32_t)a2; 2021 /* FALLTHRU */ 2022 case INDEX_op_orc_i64: 2023 if (c2) { 2024 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); 2025 } else { 2026 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); 2027 } 2028 break; 2029 2030 case INDEX_op_xor_i32: 2031 a2 = (int32_t)a2; 2032 /* FALLTHRU */ 2033 case INDEX_op_xor_i64: 2034 if (c2) { 2035 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); 2036 } else { 2037 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); 2038 } 2039 break; 2040 2041 case INDEX_op_eqv_i32: 2042 a2 = (int32_t)a2; 2043 /* FALLTHRU */ 2044 case INDEX_op_eqv_i64: 2045 if (c2) { 2046 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); 2047 } else { 2048 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); 2049 } 2050 break; 2051 2052 case INDEX_op_not_i64: 2053 case INDEX_op_not_i32: 2054 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); 2055 break; 2056 2057 case INDEX_op_mul_i64: 2058 case INDEX_op_mul_i32: 2059 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); 2060 break; 2061 2062 case INDEX_op_div_i64: 2063 case INDEX_op_div_i32: 2064 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); 2065 break; 2066 case INDEX_op_divu_i64: 2067 case INDEX_op_divu_i32: 2068 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); 2069 break; 2070 2071 case INDEX_op_rem_i64: 2072 case INDEX_op_rem_i32: 2073 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2); 2074 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2075 break; 2076 case INDEX_op_remu_i64: 2077 case INDEX_op_remu_i32: 2078 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2); 2079 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2080 break; 2081 2082 case INDEX_op_shl_i64: 2083 case INDEX_op_shl_i32: 2084 if (c2) { 2085 tcg_out_shl(s, ext, a0, a1, a2); 2086 } else { 2087 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); 2088 } 2089 break; 2090 2091 case INDEX_op_shr_i64: 2092 case INDEX_op_shr_i32: 2093 if (c2) { 2094 tcg_out_shr(s, ext, a0, a1, a2); 2095 } else { 2096 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); 2097 } 2098 break; 2099 2100 case INDEX_op_sar_i64: 2101 case INDEX_op_sar_i32: 2102 if (c2) { 2103 tcg_out_sar(s, ext, a0, a1, a2); 2104 } else { 2105 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); 2106 } 2107 break; 2108 2109 case INDEX_op_rotr_i64: 2110 case INDEX_op_rotr_i32: 2111 if (c2) { 2112 tcg_out_rotr(s, ext, a0, a1, a2); 2113 } else { 2114 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); 2115 } 2116 break; 2117 2118 case INDEX_op_rotl_i64: 2119 case INDEX_op_rotl_i32: 2120 if (c2) { 2121 tcg_out_rotl(s, ext, a0, a1, a2); 2122 } else { 2123 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2); 2124 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP); 2125 } 2126 break; 2127 2128 case INDEX_op_clz_i64: 2129 case INDEX_op_clz_i32: 2130 tcg_out_cltz(s, ext, a0, a1, a2, c2, false); 2131 break; 2132 case INDEX_op_ctz_i64: 2133 case INDEX_op_ctz_i32: 2134 tcg_out_cltz(s, ext, a0, a1, a2, c2, true); 2135 break; 2136 2137 case INDEX_op_brcond_i32: 2138 a1 = (int32_t)a1; 2139 /* FALLTHRU */ 2140 case INDEX_op_brcond_i64: 2141 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); 2142 break; 2143 2144 case INDEX_op_setcond_i32: 2145 a2 = (int32_t)a2; 2146 /* FALLTHRU */ 2147 case INDEX_op_setcond_i64: 2148 tcg_out_cmp(s, ext, a1, a2, c2); 2149 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ 2150 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, 2151 TCG_REG_XZR, tcg_invert_cond(args[3])); 2152 break; 2153 2154 case INDEX_op_movcond_i32: 2155 a2 = (int32_t)a2; 2156 /* FALLTHRU */ 2157 case INDEX_op_movcond_i64: 2158 tcg_out_cmp(s, ext, a1, a2, c2); 2159 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); 2160 break; 2161 2162 case INDEX_op_qemu_ld_i32: 2163 case INDEX_op_qemu_ld_i64: 2164 tcg_out_qemu_ld(s, a0, a1, a2, ext); 2165 break; 2166 case INDEX_op_qemu_st_i32: 2167 case INDEX_op_qemu_st_i64: 2168 tcg_out_qemu_st(s, REG0(0), a1, a2); 2169 break; 2170 2171 case INDEX_op_bswap64_i64: 2172 tcg_out_rev64(s, a0, a1); 2173 break; 2174 case INDEX_op_bswap32_i64: 2175 case INDEX_op_bswap32_i32: 2176 tcg_out_rev32(s, a0, a1); 2177 break; 2178 case INDEX_op_bswap16_i64: 2179 case INDEX_op_bswap16_i32: 2180 tcg_out_rev16(s, a0, a1); 2181 break; 2182 2183 case INDEX_op_ext8s_i64: 2184 case INDEX_op_ext8s_i32: 2185 tcg_out_sxt(s, ext, MO_8, a0, a1); 2186 break; 2187 case INDEX_op_ext16s_i64: 2188 case INDEX_op_ext16s_i32: 2189 tcg_out_sxt(s, ext, MO_16, a0, a1); 2190 break; 2191 case INDEX_op_ext_i32_i64: 2192 case INDEX_op_ext32s_i64: 2193 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1); 2194 break; 2195 case INDEX_op_ext8u_i64: 2196 case INDEX_op_ext8u_i32: 2197 tcg_out_uxt(s, MO_8, a0, a1); 2198 break; 2199 case INDEX_op_ext16u_i64: 2200 case INDEX_op_ext16u_i32: 2201 tcg_out_uxt(s, MO_16, a0, a1); 2202 break; 2203 case INDEX_op_extu_i32_i64: 2204 case INDEX_op_ext32u_i64: 2205 tcg_out_movr(s, TCG_TYPE_I32, a0, a1); 2206 break; 2207 2208 case INDEX_op_deposit_i64: 2209 case INDEX_op_deposit_i32: 2210 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]); 2211 break; 2212 2213 case INDEX_op_extract_i64: 2214 case INDEX_op_extract_i32: 2215 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2216 break; 2217 2218 case INDEX_op_sextract_i64: 2219 case INDEX_op_sextract_i32: 2220 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2221 break; 2222 2223 case INDEX_op_extract2_i64: 2224 case INDEX_op_extract2_i32: 2225 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]); 2226 break; 2227 2228 case INDEX_op_add2_i32: 2229 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2230 (int32_t)args[4], args[5], const_args[4], 2231 const_args[5], false); 2232 break; 2233 case INDEX_op_add2_i64: 2234 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2235 args[5], const_args[4], const_args[5], false); 2236 break; 2237 case INDEX_op_sub2_i32: 2238 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2239 (int32_t)args[4], args[5], const_args[4], 2240 const_args[5], true); 2241 break; 2242 case INDEX_op_sub2_i64: 2243 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2244 args[5], const_args[4], const_args[5], true); 2245 break; 2246 2247 case INDEX_op_muluh_i64: 2248 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); 2249 break; 2250 case INDEX_op_mulsh_i64: 2251 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); 2252 break; 2253 2254 case INDEX_op_mb: 2255 tcg_out_mb(s, a0); 2256 break; 2257 2258 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 2259 case INDEX_op_mov_i64: 2260 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 2261 default: 2262 g_assert_not_reached(); 2263 } 2264 2265#undef REG0 2266} 2267 2268static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 2269 unsigned vecl, unsigned vece, 2270 const TCGArg *args, const int *const_args) 2271{ 2272 static const AArch64Insn cmp_insn[16] = { 2273 [TCG_COND_EQ] = I3616_CMEQ, 2274 [TCG_COND_GT] = I3616_CMGT, 2275 [TCG_COND_GE] = I3616_CMGE, 2276 [TCG_COND_GTU] = I3616_CMHI, 2277 [TCG_COND_GEU] = I3616_CMHS, 2278 }; 2279 static const AArch64Insn cmp0_insn[16] = { 2280 [TCG_COND_EQ] = I3617_CMEQ0, 2281 [TCG_COND_GT] = I3617_CMGT0, 2282 [TCG_COND_GE] = I3617_CMGE0, 2283 [TCG_COND_LT] = I3617_CMLT0, 2284 [TCG_COND_LE] = I3617_CMLE0, 2285 }; 2286 2287 TCGType type = vecl + TCG_TYPE_V64; 2288 unsigned is_q = vecl; 2289 TCGArg a0, a1, a2, a3; 2290 int cmode, imm8; 2291 2292 a0 = args[0]; 2293 a1 = args[1]; 2294 a2 = args[2]; 2295 2296 switch (opc) { 2297 case INDEX_op_ld_vec: 2298 tcg_out_ld(s, type, a0, a1, a2); 2299 break; 2300 case INDEX_op_st_vec: 2301 tcg_out_st(s, type, a0, a1, a2); 2302 break; 2303 case INDEX_op_dupm_vec: 2304 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 2305 break; 2306 case INDEX_op_add_vec: 2307 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2); 2308 break; 2309 case INDEX_op_sub_vec: 2310 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2); 2311 break; 2312 case INDEX_op_mul_vec: 2313 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2); 2314 break; 2315 case INDEX_op_neg_vec: 2316 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1); 2317 break; 2318 case INDEX_op_abs_vec: 2319 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1); 2320 break; 2321 case INDEX_op_and_vec: 2322 if (const_args[2]) { 2323 is_shimm1632(~a2, &cmode, &imm8); 2324 if (a0 == a1) { 2325 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2326 return; 2327 } 2328 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2329 a2 = a0; 2330 } 2331 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2); 2332 break; 2333 case INDEX_op_or_vec: 2334 if (const_args[2]) { 2335 is_shimm1632(a2, &cmode, &imm8); 2336 if (a0 == a1) { 2337 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2338 return; 2339 } 2340 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2341 a2 = a0; 2342 } 2343 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2); 2344 break; 2345 case INDEX_op_andc_vec: 2346 if (const_args[2]) { 2347 is_shimm1632(a2, &cmode, &imm8); 2348 if (a0 == a1) { 2349 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2350 return; 2351 } 2352 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2353 a2 = a0; 2354 } 2355 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2); 2356 break; 2357 case INDEX_op_orc_vec: 2358 if (const_args[2]) { 2359 is_shimm1632(~a2, &cmode, &imm8); 2360 if (a0 == a1) { 2361 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2362 return; 2363 } 2364 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2365 a2 = a0; 2366 } 2367 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2); 2368 break; 2369 case INDEX_op_xor_vec: 2370 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2); 2371 break; 2372 case INDEX_op_ssadd_vec: 2373 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2); 2374 break; 2375 case INDEX_op_sssub_vec: 2376 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2); 2377 break; 2378 case INDEX_op_usadd_vec: 2379 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2); 2380 break; 2381 case INDEX_op_ussub_vec: 2382 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2); 2383 break; 2384 case INDEX_op_smax_vec: 2385 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2); 2386 break; 2387 case INDEX_op_smin_vec: 2388 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2); 2389 break; 2390 case INDEX_op_umax_vec: 2391 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2); 2392 break; 2393 case INDEX_op_umin_vec: 2394 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2); 2395 break; 2396 case INDEX_op_not_vec: 2397 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1); 2398 break; 2399 case INDEX_op_shli_vec: 2400 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece)); 2401 break; 2402 case INDEX_op_shri_vec: 2403 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2); 2404 break; 2405 case INDEX_op_sari_vec: 2406 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); 2407 break; 2408 case INDEX_op_aa64_sli_vec: 2409 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece)); 2410 break; 2411 case INDEX_op_shlv_vec: 2412 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); 2413 break; 2414 case INDEX_op_aa64_sshl_vec: 2415 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2); 2416 break; 2417 case INDEX_op_cmp_vec: 2418 { 2419 TCGCond cond = args[3]; 2420 AArch64Insn insn; 2421 2422 if (cond == TCG_COND_NE) { 2423 if (const_args[2]) { 2424 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1); 2425 } else { 2426 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2); 2427 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); 2428 } 2429 } else { 2430 if (const_args[2]) { 2431 insn = cmp0_insn[cond]; 2432 if (insn) { 2433 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1); 2434 break; 2435 } 2436 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0); 2437 a2 = TCG_VEC_TMP; 2438 } 2439 insn = cmp_insn[cond]; 2440 if (insn == 0) { 2441 TCGArg t; 2442 t = a1, a1 = a2, a2 = t; 2443 cond = tcg_swap_cond(cond); 2444 insn = cmp_insn[cond]; 2445 tcg_debug_assert(insn != 0); 2446 } 2447 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2); 2448 } 2449 } 2450 break; 2451 2452 case INDEX_op_bitsel_vec: 2453 a3 = args[3]; 2454 if (a0 == a3) { 2455 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1); 2456 } else if (a0 == a2) { 2457 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1); 2458 } else { 2459 if (a0 != a1) { 2460 tcg_out_mov(s, type, a0, a1); 2461 } 2462 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3); 2463 } 2464 break; 2465 2466 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 2467 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 2468 default: 2469 g_assert_not_reached(); 2470 } 2471} 2472 2473int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 2474{ 2475 switch (opc) { 2476 case INDEX_op_add_vec: 2477 case INDEX_op_sub_vec: 2478 case INDEX_op_and_vec: 2479 case INDEX_op_or_vec: 2480 case INDEX_op_xor_vec: 2481 case INDEX_op_andc_vec: 2482 case INDEX_op_orc_vec: 2483 case INDEX_op_neg_vec: 2484 case INDEX_op_abs_vec: 2485 case INDEX_op_not_vec: 2486 case INDEX_op_cmp_vec: 2487 case INDEX_op_shli_vec: 2488 case INDEX_op_shri_vec: 2489 case INDEX_op_sari_vec: 2490 case INDEX_op_ssadd_vec: 2491 case INDEX_op_sssub_vec: 2492 case INDEX_op_usadd_vec: 2493 case INDEX_op_ussub_vec: 2494 case INDEX_op_shlv_vec: 2495 case INDEX_op_bitsel_vec: 2496 return 1; 2497 case INDEX_op_rotli_vec: 2498 case INDEX_op_shrv_vec: 2499 case INDEX_op_sarv_vec: 2500 case INDEX_op_rotlv_vec: 2501 case INDEX_op_rotrv_vec: 2502 return -1; 2503 case INDEX_op_mul_vec: 2504 case INDEX_op_smax_vec: 2505 case INDEX_op_smin_vec: 2506 case INDEX_op_umax_vec: 2507 case INDEX_op_umin_vec: 2508 return vece < MO_64; 2509 2510 default: 2511 return 0; 2512 } 2513} 2514 2515void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 2516 TCGArg a0, ...) 2517{ 2518 va_list va; 2519 TCGv_vec v0, v1, v2, t1, t2, c1; 2520 TCGArg a2; 2521 2522 va_start(va, a0); 2523 v0 = temp_tcgv_vec(arg_temp(a0)); 2524 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 2525 a2 = va_arg(va, TCGArg); 2526 v2 = temp_tcgv_vec(arg_temp(a2)); 2527 2528 switch (opc) { 2529 case INDEX_op_rotli_vec: 2530 t1 = tcg_temp_new_vec(type); 2531 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1)); 2532 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece, 2533 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2); 2534 tcg_temp_free_vec(t1); 2535 break; 2536 2537 case INDEX_op_shrv_vec: 2538 case INDEX_op_sarv_vec: 2539 /* Right shifts are negative left shifts for AArch64. */ 2540 t1 = tcg_temp_new_vec(type); 2541 tcg_gen_neg_vec(vece, t1, v2); 2542 opc = (opc == INDEX_op_shrv_vec 2543 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec); 2544 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0), 2545 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2546 tcg_temp_free_vec(t1); 2547 break; 2548 2549 case INDEX_op_rotlv_vec: 2550 t1 = tcg_temp_new_vec(type); 2551 c1 = tcg_constant_vec(type, vece, 8 << vece); 2552 tcg_gen_sub_vec(vece, t1, v2, c1); 2553 /* Right shifts are negative left shifts for AArch64. */ 2554 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2555 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2556 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0), 2557 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 2558 tcg_gen_or_vec(vece, v0, v0, t1); 2559 tcg_temp_free_vec(t1); 2560 break; 2561 2562 case INDEX_op_rotrv_vec: 2563 t1 = tcg_temp_new_vec(type); 2564 t2 = tcg_temp_new_vec(type); 2565 c1 = tcg_constant_vec(type, vece, 8 << vece); 2566 tcg_gen_neg_vec(vece, t1, v2); 2567 tcg_gen_sub_vec(vece, t2, c1, v2); 2568 /* Right shifts are negative left shifts for AArch64. */ 2569 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2570 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2571 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2), 2572 tcgv_vec_arg(v1), tcgv_vec_arg(t2)); 2573 tcg_gen_or_vec(vece, v0, t1, t2); 2574 tcg_temp_free_vec(t1); 2575 tcg_temp_free_vec(t2); 2576 break; 2577 2578 default: 2579 g_assert_not_reached(); 2580 } 2581 2582 va_end(va); 2583} 2584 2585static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) 2586{ 2587 static const TCGTargetOpDef r = { .args_ct_str = { "r" } }; 2588 static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } }; 2589 static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } }; 2590 static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } }; 2591 static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } }; 2592 static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } }; 2593 static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } }; 2594 static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } }; 2595 static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } }; 2596 static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } }; 2597 static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } }; 2598 static const TCGTargetOpDef w_0_w = { .args_ct_str = { "w", "0", "w" } }; 2599 static const TCGTargetOpDef w_w_wO = { .args_ct_str = { "w", "w", "wO" } }; 2600 static const TCGTargetOpDef w_w_wN = { .args_ct_str = { "w", "w", "wN" } }; 2601 static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } }; 2602 static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } }; 2603 static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } }; 2604 static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } }; 2605 static const TCGTargetOpDef r_r_rAL 2606 = { .args_ct_str = { "r", "r", "rAL" } }; 2607 static const TCGTargetOpDef dep 2608 = { .args_ct_str = { "r", "0", "rZ" } }; 2609 static const TCGTargetOpDef ext2 2610 = { .args_ct_str = { "r", "rZ", "rZ" } }; 2611 static const TCGTargetOpDef movc 2612 = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } }; 2613 static const TCGTargetOpDef add2 2614 = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } }; 2615 static const TCGTargetOpDef w_w_w_w 2616 = { .args_ct_str = { "w", "w", "w", "w" } }; 2617 2618 switch (op) { 2619 case INDEX_op_goto_ptr: 2620 return &r; 2621 2622 case INDEX_op_ld8u_i32: 2623 case INDEX_op_ld8s_i32: 2624 case INDEX_op_ld16u_i32: 2625 case INDEX_op_ld16s_i32: 2626 case INDEX_op_ld_i32: 2627 case INDEX_op_ld8u_i64: 2628 case INDEX_op_ld8s_i64: 2629 case INDEX_op_ld16u_i64: 2630 case INDEX_op_ld16s_i64: 2631 case INDEX_op_ld32u_i64: 2632 case INDEX_op_ld32s_i64: 2633 case INDEX_op_ld_i64: 2634 case INDEX_op_neg_i32: 2635 case INDEX_op_neg_i64: 2636 case INDEX_op_not_i32: 2637 case INDEX_op_not_i64: 2638 case INDEX_op_bswap16_i32: 2639 case INDEX_op_bswap32_i32: 2640 case INDEX_op_bswap16_i64: 2641 case INDEX_op_bswap32_i64: 2642 case INDEX_op_bswap64_i64: 2643 case INDEX_op_ext8s_i32: 2644 case INDEX_op_ext16s_i32: 2645 case INDEX_op_ext8u_i32: 2646 case INDEX_op_ext16u_i32: 2647 case INDEX_op_ext8s_i64: 2648 case INDEX_op_ext16s_i64: 2649 case INDEX_op_ext32s_i64: 2650 case INDEX_op_ext8u_i64: 2651 case INDEX_op_ext16u_i64: 2652 case INDEX_op_ext32u_i64: 2653 case INDEX_op_ext_i32_i64: 2654 case INDEX_op_extu_i32_i64: 2655 case INDEX_op_extract_i32: 2656 case INDEX_op_extract_i64: 2657 case INDEX_op_sextract_i32: 2658 case INDEX_op_sextract_i64: 2659 return &r_r; 2660 2661 case INDEX_op_st8_i32: 2662 case INDEX_op_st16_i32: 2663 case INDEX_op_st_i32: 2664 case INDEX_op_st8_i64: 2665 case INDEX_op_st16_i64: 2666 case INDEX_op_st32_i64: 2667 case INDEX_op_st_i64: 2668 return &rZ_r; 2669 2670 case INDEX_op_add_i32: 2671 case INDEX_op_add_i64: 2672 case INDEX_op_sub_i32: 2673 case INDEX_op_sub_i64: 2674 case INDEX_op_setcond_i32: 2675 case INDEX_op_setcond_i64: 2676 return &r_r_rA; 2677 2678 case INDEX_op_mul_i32: 2679 case INDEX_op_mul_i64: 2680 case INDEX_op_div_i32: 2681 case INDEX_op_div_i64: 2682 case INDEX_op_divu_i32: 2683 case INDEX_op_divu_i64: 2684 case INDEX_op_rem_i32: 2685 case INDEX_op_rem_i64: 2686 case INDEX_op_remu_i32: 2687 case INDEX_op_remu_i64: 2688 case INDEX_op_muluh_i64: 2689 case INDEX_op_mulsh_i64: 2690 return &r_r_r; 2691 2692 case INDEX_op_and_i32: 2693 case INDEX_op_and_i64: 2694 case INDEX_op_or_i32: 2695 case INDEX_op_or_i64: 2696 case INDEX_op_xor_i32: 2697 case INDEX_op_xor_i64: 2698 case INDEX_op_andc_i32: 2699 case INDEX_op_andc_i64: 2700 case INDEX_op_orc_i32: 2701 case INDEX_op_orc_i64: 2702 case INDEX_op_eqv_i32: 2703 case INDEX_op_eqv_i64: 2704 return &r_r_rL; 2705 2706 case INDEX_op_shl_i32: 2707 case INDEX_op_shr_i32: 2708 case INDEX_op_sar_i32: 2709 case INDEX_op_rotl_i32: 2710 case INDEX_op_rotr_i32: 2711 case INDEX_op_shl_i64: 2712 case INDEX_op_shr_i64: 2713 case INDEX_op_sar_i64: 2714 case INDEX_op_rotl_i64: 2715 case INDEX_op_rotr_i64: 2716 return &r_r_ri; 2717 2718 case INDEX_op_clz_i32: 2719 case INDEX_op_ctz_i32: 2720 case INDEX_op_clz_i64: 2721 case INDEX_op_ctz_i64: 2722 return &r_r_rAL; 2723 2724 case INDEX_op_brcond_i32: 2725 case INDEX_op_brcond_i64: 2726 return &r_rA; 2727 2728 case INDEX_op_movcond_i32: 2729 case INDEX_op_movcond_i64: 2730 return &movc; 2731 2732 case INDEX_op_qemu_ld_i32: 2733 case INDEX_op_qemu_ld_i64: 2734 return &r_l; 2735 case INDEX_op_qemu_st_i32: 2736 case INDEX_op_qemu_st_i64: 2737 return &lZ_l; 2738 2739 case INDEX_op_deposit_i32: 2740 case INDEX_op_deposit_i64: 2741 return &dep; 2742 2743 case INDEX_op_extract2_i32: 2744 case INDEX_op_extract2_i64: 2745 return &ext2; 2746 2747 case INDEX_op_add2_i32: 2748 case INDEX_op_add2_i64: 2749 case INDEX_op_sub2_i32: 2750 case INDEX_op_sub2_i64: 2751 return &add2; 2752 2753 case INDEX_op_add_vec: 2754 case INDEX_op_sub_vec: 2755 case INDEX_op_mul_vec: 2756 case INDEX_op_xor_vec: 2757 case INDEX_op_ssadd_vec: 2758 case INDEX_op_sssub_vec: 2759 case INDEX_op_usadd_vec: 2760 case INDEX_op_ussub_vec: 2761 case INDEX_op_smax_vec: 2762 case INDEX_op_smin_vec: 2763 case INDEX_op_umax_vec: 2764 case INDEX_op_umin_vec: 2765 case INDEX_op_shlv_vec: 2766 case INDEX_op_shrv_vec: 2767 case INDEX_op_sarv_vec: 2768 case INDEX_op_aa64_sshl_vec: 2769 return &w_w_w; 2770 case INDEX_op_not_vec: 2771 case INDEX_op_neg_vec: 2772 case INDEX_op_abs_vec: 2773 case INDEX_op_shli_vec: 2774 case INDEX_op_shri_vec: 2775 case INDEX_op_sari_vec: 2776 return &w_w; 2777 case INDEX_op_ld_vec: 2778 case INDEX_op_st_vec: 2779 case INDEX_op_dupm_vec: 2780 return &w_r; 2781 case INDEX_op_dup_vec: 2782 return &w_wr; 2783 case INDEX_op_or_vec: 2784 case INDEX_op_andc_vec: 2785 return &w_w_wO; 2786 case INDEX_op_and_vec: 2787 case INDEX_op_orc_vec: 2788 return &w_w_wN; 2789 case INDEX_op_cmp_vec: 2790 return &w_w_wZ; 2791 case INDEX_op_bitsel_vec: 2792 return &w_w_w_w; 2793 case INDEX_op_aa64_sli_vec: 2794 return &w_0_w; 2795 2796 default: 2797 return NULL; 2798 } 2799} 2800 2801static void tcg_target_init(TCGContext *s) 2802{ 2803 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu; 2804 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu; 2805 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 2806 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 2807 2808 tcg_target_call_clobber_regs = -1ull; 2809 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19); 2810 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20); 2811 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21); 2812 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22); 2813 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23); 2814 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24); 2815 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25); 2816 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26); 2817 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27); 2818 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28); 2819 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29); 2820 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 2821 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 2822 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 2823 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 2824 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 2825 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 2826 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 2827 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 2828 2829 s->reserved_regs = 0; 2830 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 2831 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); 2832 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); 2833 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ 2834 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP); 2835} 2836 2837/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ 2838#define PUSH_SIZE ((30 - 19 + 1) * 8) 2839 2840#define FRAME_SIZE \ 2841 ((PUSH_SIZE \ 2842 + TCG_STATIC_CALL_ARGS_SIZE \ 2843 + CPU_TEMP_BUF_NLONGS * sizeof(long) \ 2844 + TCG_TARGET_STACK_ALIGN - 1) \ 2845 & ~(TCG_TARGET_STACK_ALIGN - 1)) 2846 2847/* We're expecting a 2 byte uleb128 encoded value. */ 2848QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 2849 2850/* We're expecting to use a single ADDI insn. */ 2851QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); 2852 2853static void tcg_target_qemu_prologue(TCGContext *s) 2854{ 2855 TCGReg r; 2856 2857 /* Push (FP, LR) and allocate space for all saved registers. */ 2858 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, 2859 TCG_REG_SP, -PUSH_SIZE, 1, 1); 2860 2861 /* Set up frame pointer for canonical unwinding. */ 2862 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); 2863 2864 /* Store callee-preserved regs x19..x28. */ 2865 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 2866 int ofs = (r - TCG_REG_X19 + 2) * 8; 2867 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 2868 } 2869 2870 /* Make stack space for TCG locals. */ 2871 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 2872 FRAME_SIZE - PUSH_SIZE); 2873 2874 /* Inform TCG about how to find TCG locals with register, offset, size. */ 2875 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, 2876 CPU_TEMP_BUF_NLONGS * sizeof(long)); 2877 2878#if !defined(CONFIG_SOFTMMU) 2879 if (USE_GUEST_BASE) { 2880 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); 2881 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); 2882 } 2883#endif 2884 2885 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 2886 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); 2887 2888 /* 2889 * Return path for goto_ptr. Set return value to 0, a-la exit_tb, 2890 * and fall through to the rest of the epilogue. 2891 */ 2892 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 2893 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0); 2894 2895 /* TB epilogue */ 2896 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); 2897 2898 /* Remove TCG locals stack space. */ 2899 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 2900 FRAME_SIZE - PUSH_SIZE); 2901 2902 /* Restore registers x19..x28. */ 2903 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 2904 int ofs = (r - TCG_REG_X19 + 2) * 8; 2905 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 2906 } 2907 2908 /* Pop (FP, LR), restore SP to previous frame. */ 2909 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, 2910 TCG_REG_SP, PUSH_SIZE, 0, 1); 2911 tcg_out_insn(s, 3207, RET, TCG_REG_LR); 2912} 2913 2914static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 2915{ 2916 int i; 2917 for (i = 0; i < count; ++i) { 2918 p[i] = NOP; 2919 } 2920} 2921 2922typedef struct { 2923 DebugFrameHeader h; 2924 uint8_t fde_def_cfa[4]; 2925 uint8_t fde_reg_ofs[24]; 2926} DebugFrame; 2927 2928#define ELF_HOST_MACHINE EM_AARCH64 2929 2930static const DebugFrame debug_frame = { 2931 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 2932 .h.cie.id = -1, 2933 .h.cie.version = 1, 2934 .h.cie.code_align = 1, 2935 .h.cie.data_align = 0x78, /* sleb128 -8 */ 2936 .h.cie.return_column = TCG_REG_LR, 2937 2938 /* Total FDE size does not include the "len" member. */ 2939 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 2940 2941 .fde_def_cfa = { 2942 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 2943 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 2944 (FRAME_SIZE >> 7) 2945 }, 2946 .fde_reg_ofs = { 2947 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ 2948 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ 2949 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ 2950 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ 2951 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ 2952 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ 2953 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ 2954 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ 2955 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ 2956 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ 2957 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ 2958 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ 2959 } 2960}; 2961 2962void tcg_register_jit(const void *buf, size_t buf_size) 2963{ 2964 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 2965} 2966