1/* 2 * Initial TCG Implementation for aarch64 3 * 4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH 5 * Written by Claudio Fontana 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or 8 * (at your option) any later version. 9 * 10 * See the COPYING file in the top-level directory for details. 11 */ 12 13#include "../tcg-pool.c.inc" 14#include "qemu/bitops.h" 15 16/* We're going to re-use TCGType in setting of the SF bit, which controls 17 the size of the operation performed. If we know the values match, it 18 makes things much cleaner. */ 19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); 20 21#ifdef CONFIG_DEBUG_TCG 22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 23 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 24 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 25 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 26 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp", 27 28 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 29 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 30 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 31 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31", 32}; 33#endif /* CONFIG_DEBUG_TCG */ 34 35static const int tcg_target_reg_alloc_order[] = { 36 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, 37 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, 38 TCG_REG_X28, /* we will reserve this for guest_base if configured */ 39 40 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, 41 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, 42 TCG_REG_X16, TCG_REG_X17, 43 44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, 46 47 /* X18 reserved by system */ 48 /* X19 reserved for AREG0 */ 49 /* X29 reserved as fp */ 50 /* X30 reserved as temporary */ 51 52 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, 53 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, 54 /* V8 - V15 are call-saved, and skipped. */ 55 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, 56 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, 57 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, 58 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, 59}; 60 61static const int tcg_target_call_iarg_regs[8] = { 62 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 63 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 64}; 65static const int tcg_target_call_oarg_regs[1] = { 66 TCG_REG_X0 67}; 68 69#define TCG_REG_TMP TCG_REG_X30 70#define TCG_VEC_TMP TCG_REG_V31 71 72#ifndef CONFIG_SOFTMMU 73/* Note that XZR cannot be encoded in the address base register slot, 74 as that actaully encodes SP. So if we need to zero-extend the guest 75 address, via the address index register slot, we need to load even 76 a zero guest base into a register. */ 77#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32) 78#define TCG_REG_GUEST_BASE TCG_REG_X28 79#endif 80 81static inline bool reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target) 82{ 83 ptrdiff_t offset = target - code_ptr; 84 if (offset == sextract64(offset, 0, 26)) { 85 /* read instruction, mask away previous PC_REL26 parameter contents, 86 set the proper offset, then write back the instruction. */ 87 *code_ptr = deposit32(*code_ptr, 0, 26, offset); 88 return true; 89 } 90 return false; 91} 92 93static inline bool reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target) 94{ 95 ptrdiff_t offset = target - code_ptr; 96 if (offset == sextract64(offset, 0, 19)) { 97 *code_ptr = deposit32(*code_ptr, 5, 19, offset); 98 return true; 99 } 100 return false; 101} 102 103static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type, 104 intptr_t value, intptr_t addend) 105{ 106 tcg_debug_assert(addend == 0); 107 switch (type) { 108 case R_AARCH64_JUMP26: 109 case R_AARCH64_CALL26: 110 return reloc_pc26(code_ptr, (tcg_insn_unit *)value); 111 case R_AARCH64_CONDBR19: 112 return reloc_pc19(code_ptr, (tcg_insn_unit *)value); 113 default: 114 g_assert_not_reached(); 115 } 116} 117 118#define TCG_CT_CONST_AIMM 0x100 119#define TCG_CT_CONST_LIMM 0x200 120#define TCG_CT_CONST_ZERO 0x400 121#define TCG_CT_CONST_MONE 0x800 122#define TCG_CT_CONST_ORRI 0x1000 123#define TCG_CT_CONST_ANDI 0x2000 124 125/* parse target specific constraints */ 126static const char *target_parse_constraint(TCGArgConstraint *ct, 127 const char *ct_str, TCGType type) 128{ 129 switch (*ct_str++) { 130 case 'r': /* general registers */ 131 ct->ct |= TCG_CT_REG; 132 ct->u.regs |= 0xffffffffu; 133 break; 134 case 'w': /* advsimd registers */ 135 ct->ct |= TCG_CT_REG; 136 ct->u.regs |= 0xffffffff00000000ull; 137 break; 138 case 'l': /* qemu_ld / qemu_st address, data_reg */ 139 ct->ct |= TCG_CT_REG; 140 ct->u.regs = 0xffffffffu; 141#ifdef CONFIG_SOFTMMU 142 /* x0 and x1 will be overwritten when reading the tlb entry, 143 and x2, and x3 for helper args, better to avoid using them. */ 144 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0); 145 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1); 146 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2); 147 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3); 148#endif 149 break; 150 case 'A': /* Valid for arithmetic immediate (positive or negative). */ 151 ct->ct |= TCG_CT_CONST_AIMM; 152 break; 153 case 'L': /* Valid for logical immediate. */ 154 ct->ct |= TCG_CT_CONST_LIMM; 155 break; 156 case 'M': /* minus one */ 157 ct->ct |= TCG_CT_CONST_MONE; 158 break; 159 case 'O': /* vector orr/bic immediate */ 160 ct->ct |= TCG_CT_CONST_ORRI; 161 break; 162 case 'N': /* vector orr/bic immediate, inverted */ 163 ct->ct |= TCG_CT_CONST_ANDI; 164 break; 165 case 'Z': /* zero */ 166 ct->ct |= TCG_CT_CONST_ZERO; 167 break; 168 default: 169 return NULL; 170 } 171 return ct_str; 172} 173 174/* Match a constant valid for addition (12-bit, optionally shifted). */ 175static inline bool is_aimm(uint64_t val) 176{ 177 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; 178} 179 180/* Match a constant valid for logical operations. */ 181static inline bool is_limm(uint64_t val) 182{ 183 /* Taking a simplified view of the logical immediates for now, ignoring 184 the replication that can happen across the field. Match bit patterns 185 of the forms 186 0....01....1 187 0..01..10..0 188 and their inverses. */ 189 190 /* Make things easier below, by testing the form with msb clear. */ 191 if ((int64_t)val < 0) { 192 val = ~val; 193 } 194 if (val == 0) { 195 return false; 196 } 197 val += val & -val; 198 return (val & (val - 1)) == 0; 199} 200 201/* Return true if v16 is a valid 16-bit shifted immediate. */ 202static bool is_shimm16(uint16_t v16, int *cmode, int *imm8) 203{ 204 if (v16 == (v16 & 0xff)) { 205 *cmode = 0x8; 206 *imm8 = v16 & 0xff; 207 return true; 208 } else if (v16 == (v16 & 0xff00)) { 209 *cmode = 0xa; 210 *imm8 = v16 >> 8; 211 return true; 212 } 213 return false; 214} 215 216/* Return true if v32 is a valid 32-bit shifted immediate. */ 217static bool is_shimm32(uint32_t v32, int *cmode, int *imm8) 218{ 219 if (v32 == (v32 & 0xff)) { 220 *cmode = 0x0; 221 *imm8 = v32 & 0xff; 222 return true; 223 } else if (v32 == (v32 & 0xff00)) { 224 *cmode = 0x2; 225 *imm8 = (v32 >> 8) & 0xff; 226 return true; 227 } else if (v32 == (v32 & 0xff0000)) { 228 *cmode = 0x4; 229 *imm8 = (v32 >> 16) & 0xff; 230 return true; 231 } else if (v32 == (v32 & 0xff000000)) { 232 *cmode = 0x6; 233 *imm8 = v32 >> 24; 234 return true; 235 } 236 return false; 237} 238 239/* Return true if v32 is a valid 32-bit shifting ones immediate. */ 240static bool is_soimm32(uint32_t v32, int *cmode, int *imm8) 241{ 242 if ((v32 & 0xffff00ff) == 0xff) { 243 *cmode = 0xc; 244 *imm8 = (v32 >> 8) & 0xff; 245 return true; 246 } else if ((v32 & 0xff00ffff) == 0xffff) { 247 *cmode = 0xd; 248 *imm8 = (v32 >> 16) & 0xff; 249 return true; 250 } 251 return false; 252} 253 254/* Return true if v32 is a valid float32 immediate. */ 255static bool is_fimm32(uint32_t v32, int *cmode, int *imm8) 256{ 257 if (extract32(v32, 0, 19) == 0 258 && (extract32(v32, 25, 6) == 0x20 259 || extract32(v32, 25, 6) == 0x1f)) { 260 *cmode = 0xf; 261 *imm8 = (extract32(v32, 31, 1) << 7) 262 | (extract32(v32, 25, 1) << 6) 263 | extract32(v32, 19, 6); 264 return true; 265 } 266 return false; 267} 268 269/* Return true if v64 is a valid float64 immediate. */ 270static bool is_fimm64(uint64_t v64, int *cmode, int *imm8) 271{ 272 if (extract64(v64, 0, 48) == 0 273 && (extract64(v64, 54, 9) == 0x100 274 || extract64(v64, 54, 9) == 0x0ff)) { 275 *cmode = 0xf; 276 *imm8 = (extract64(v64, 63, 1) << 7) 277 | (extract64(v64, 54, 1) << 6) 278 | extract64(v64, 48, 6); 279 return true; 280 } 281 return false; 282} 283 284/* 285 * Return non-zero if v32 can be formed by MOVI+ORR. 286 * Place the parameters for MOVI in (cmode, imm8). 287 * Return the cmode for ORR; the imm8 can be had via extraction from v32. 288 */ 289static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8) 290{ 291 int i; 292 293 for (i = 6; i > 0; i -= 2) { 294 /* Mask out one byte we can add with ORR. */ 295 uint32_t tmp = v32 & ~(0xffu << (i * 4)); 296 if (is_shimm32(tmp, cmode, imm8) || 297 is_soimm32(tmp, cmode, imm8)) { 298 break; 299 } 300 } 301 return i; 302} 303 304/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */ 305static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8) 306{ 307 if (v32 == deposit32(v32, 16, 16, v32)) { 308 return is_shimm16(v32, cmode, imm8); 309 } else { 310 return is_shimm32(v32, cmode, imm8); 311 } 312} 313 314static int tcg_target_const_match(tcg_target_long val, TCGType type, 315 const TCGArgConstraint *arg_ct) 316{ 317 int ct = arg_ct->ct; 318 319 if (ct & TCG_CT_CONST) { 320 return 1; 321 } 322 if (type == TCG_TYPE_I32) { 323 val = (int32_t)val; 324 } 325 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { 326 return 1; 327 } 328 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { 329 return 1; 330 } 331 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 332 return 1; 333 } 334 if ((ct & TCG_CT_CONST_MONE) && val == -1) { 335 return 1; 336 } 337 338 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) { 339 case 0: 340 break; 341 case TCG_CT_CONST_ANDI: 342 val = ~val; 343 /* fallthru */ 344 case TCG_CT_CONST_ORRI: 345 if (val == deposit64(val, 32, 32, val)) { 346 int cmode, imm8; 347 return is_shimm1632(val, &cmode, &imm8); 348 } 349 break; 350 default: 351 /* Both bits should not be set for the same insn. */ 352 g_assert_not_reached(); 353 } 354 355 return 0; 356} 357 358enum aarch64_cond_code { 359 COND_EQ = 0x0, 360 COND_NE = 0x1, 361 COND_CS = 0x2, /* Unsigned greater or equal */ 362 COND_HS = COND_CS, /* ALIAS greater or equal */ 363 COND_CC = 0x3, /* Unsigned less than */ 364 COND_LO = COND_CC, /* ALIAS Lower */ 365 COND_MI = 0x4, /* Negative */ 366 COND_PL = 0x5, /* Zero or greater */ 367 COND_VS = 0x6, /* Overflow */ 368 COND_VC = 0x7, /* No overflow */ 369 COND_HI = 0x8, /* Unsigned greater than */ 370 COND_LS = 0x9, /* Unsigned less or equal */ 371 COND_GE = 0xa, 372 COND_LT = 0xb, 373 COND_GT = 0xc, 374 COND_LE = 0xd, 375 COND_AL = 0xe, 376 COND_NV = 0xf, /* behaves like COND_AL here */ 377}; 378 379static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { 380 [TCG_COND_EQ] = COND_EQ, 381 [TCG_COND_NE] = COND_NE, 382 [TCG_COND_LT] = COND_LT, 383 [TCG_COND_GE] = COND_GE, 384 [TCG_COND_LE] = COND_LE, 385 [TCG_COND_GT] = COND_GT, 386 /* unsigned */ 387 [TCG_COND_LTU] = COND_LO, 388 [TCG_COND_GTU] = COND_HI, 389 [TCG_COND_GEU] = COND_HS, 390 [TCG_COND_LEU] = COND_LS, 391}; 392 393typedef enum { 394 LDST_ST = 0, /* store */ 395 LDST_LD = 1, /* load */ 396 LDST_LD_S_X = 2, /* load and sign-extend into Xt */ 397 LDST_LD_S_W = 3, /* load and sign-extend into Wt */ 398} AArch64LdstType; 399 400/* We encode the format of the insn into the beginning of the name, so that 401 we can have the preprocessor help "typecheck" the insn vs the output 402 function. Arm didn't provide us with nice names for the formats, so we 403 use the section number of the architecture reference manual in which the 404 instruction group is described. */ 405typedef enum { 406 /* Compare and branch (immediate). */ 407 I3201_CBZ = 0x34000000, 408 I3201_CBNZ = 0x35000000, 409 410 /* Conditional branch (immediate). */ 411 I3202_B_C = 0x54000000, 412 413 /* Unconditional branch (immediate). */ 414 I3206_B = 0x14000000, 415 I3206_BL = 0x94000000, 416 417 /* Unconditional branch (register). */ 418 I3207_BR = 0xd61f0000, 419 I3207_BLR = 0xd63f0000, 420 I3207_RET = 0xd65f0000, 421 422 /* AdvSIMD load/store single structure. */ 423 I3303_LD1R = 0x0d40c000, 424 425 /* Load literal for loading the address at pc-relative offset */ 426 I3305_LDR = 0x58000000, 427 I3305_LDR_v64 = 0x5c000000, 428 I3305_LDR_v128 = 0x9c000000, 429 430 /* Load/store register. Described here as 3.3.12, but the helper 431 that emits them can transform to 3.3.10 or 3.3.13. */ 432 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, 433 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, 434 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, 435 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, 436 437 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, 438 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, 439 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, 440 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, 441 442 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, 443 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, 444 445 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, 446 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, 447 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, 448 449 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30, 450 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30, 451 452 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30, 453 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30, 454 455 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30, 456 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30, 457 458 I3312_TO_I3310 = 0x00200800, 459 I3312_TO_I3313 = 0x01000000, 460 461 /* Load/store register pair instructions. */ 462 I3314_LDP = 0x28400000, 463 I3314_STP = 0x28000000, 464 465 /* Add/subtract immediate instructions. */ 466 I3401_ADDI = 0x11000000, 467 I3401_ADDSI = 0x31000000, 468 I3401_SUBI = 0x51000000, 469 I3401_SUBSI = 0x71000000, 470 471 /* Bitfield instructions. */ 472 I3402_BFM = 0x33000000, 473 I3402_SBFM = 0x13000000, 474 I3402_UBFM = 0x53000000, 475 476 /* Extract instruction. */ 477 I3403_EXTR = 0x13800000, 478 479 /* Logical immediate instructions. */ 480 I3404_ANDI = 0x12000000, 481 I3404_ORRI = 0x32000000, 482 I3404_EORI = 0x52000000, 483 484 /* Move wide immediate instructions. */ 485 I3405_MOVN = 0x12800000, 486 I3405_MOVZ = 0x52800000, 487 I3405_MOVK = 0x72800000, 488 489 /* PC relative addressing instructions. */ 490 I3406_ADR = 0x10000000, 491 I3406_ADRP = 0x90000000, 492 493 /* Add/subtract shifted register instructions (without a shift). */ 494 I3502_ADD = 0x0b000000, 495 I3502_ADDS = 0x2b000000, 496 I3502_SUB = 0x4b000000, 497 I3502_SUBS = 0x6b000000, 498 499 /* Add/subtract shifted register instructions (with a shift). */ 500 I3502S_ADD_LSL = I3502_ADD, 501 502 /* Add/subtract with carry instructions. */ 503 I3503_ADC = 0x1a000000, 504 I3503_SBC = 0x5a000000, 505 506 /* Conditional select instructions. */ 507 I3506_CSEL = 0x1a800000, 508 I3506_CSINC = 0x1a800400, 509 I3506_CSINV = 0x5a800000, 510 I3506_CSNEG = 0x5a800400, 511 512 /* Data-processing (1 source) instructions. */ 513 I3507_CLZ = 0x5ac01000, 514 I3507_RBIT = 0x5ac00000, 515 I3507_REV16 = 0x5ac00400, 516 I3507_REV32 = 0x5ac00800, 517 I3507_REV64 = 0x5ac00c00, 518 519 /* Data-processing (2 source) instructions. */ 520 I3508_LSLV = 0x1ac02000, 521 I3508_LSRV = 0x1ac02400, 522 I3508_ASRV = 0x1ac02800, 523 I3508_RORV = 0x1ac02c00, 524 I3508_SMULH = 0x9b407c00, 525 I3508_UMULH = 0x9bc07c00, 526 I3508_UDIV = 0x1ac00800, 527 I3508_SDIV = 0x1ac00c00, 528 529 /* Data-processing (3 source) instructions. */ 530 I3509_MADD = 0x1b000000, 531 I3509_MSUB = 0x1b008000, 532 533 /* Logical shifted register instructions (without a shift). */ 534 I3510_AND = 0x0a000000, 535 I3510_BIC = 0x0a200000, 536 I3510_ORR = 0x2a000000, 537 I3510_ORN = 0x2a200000, 538 I3510_EOR = 0x4a000000, 539 I3510_EON = 0x4a200000, 540 I3510_ANDS = 0x6a000000, 541 542 /* Logical shifted register instructions (with a shift). */ 543 I3502S_AND_LSR = I3510_AND | (1 << 22), 544 545 /* AdvSIMD copy */ 546 I3605_DUP = 0x0e000400, 547 I3605_INS = 0x4e001c00, 548 I3605_UMOV = 0x0e003c00, 549 550 /* AdvSIMD modified immediate */ 551 I3606_MOVI = 0x0f000400, 552 I3606_MVNI = 0x2f000400, 553 I3606_BIC = 0x2f001400, 554 I3606_ORR = 0x0f001400, 555 556 /* AdvSIMD shift by immediate */ 557 I3614_SSHR = 0x0f000400, 558 I3614_SSRA = 0x0f001400, 559 I3614_SHL = 0x0f005400, 560 I3614_SLI = 0x2f005400, 561 I3614_USHR = 0x2f000400, 562 I3614_USRA = 0x2f001400, 563 564 /* AdvSIMD three same. */ 565 I3616_ADD = 0x0e208400, 566 I3616_AND = 0x0e201c00, 567 I3616_BIC = 0x0e601c00, 568 I3616_BIF = 0x2ee01c00, 569 I3616_BIT = 0x2ea01c00, 570 I3616_BSL = 0x2e601c00, 571 I3616_EOR = 0x2e201c00, 572 I3616_MUL = 0x0e209c00, 573 I3616_ORR = 0x0ea01c00, 574 I3616_ORN = 0x0ee01c00, 575 I3616_SUB = 0x2e208400, 576 I3616_CMGT = 0x0e203400, 577 I3616_CMGE = 0x0e203c00, 578 I3616_CMTST = 0x0e208c00, 579 I3616_CMHI = 0x2e203400, 580 I3616_CMHS = 0x2e203c00, 581 I3616_CMEQ = 0x2e208c00, 582 I3616_SMAX = 0x0e206400, 583 I3616_SMIN = 0x0e206c00, 584 I3616_SSHL = 0x0e204400, 585 I3616_SQADD = 0x0e200c00, 586 I3616_SQSUB = 0x0e202c00, 587 I3616_UMAX = 0x2e206400, 588 I3616_UMIN = 0x2e206c00, 589 I3616_UQADD = 0x2e200c00, 590 I3616_UQSUB = 0x2e202c00, 591 I3616_USHL = 0x2e204400, 592 593 /* AdvSIMD two-reg misc. */ 594 I3617_CMGT0 = 0x0e208800, 595 I3617_CMEQ0 = 0x0e209800, 596 I3617_CMLT0 = 0x0e20a800, 597 I3617_CMGE0 = 0x2e208800, 598 I3617_CMLE0 = 0x2e20a800, 599 I3617_NOT = 0x2e205800, 600 I3617_ABS = 0x0e20b800, 601 I3617_NEG = 0x2e20b800, 602 603 /* System instructions. */ 604 NOP = 0xd503201f, 605 DMB_ISH = 0xd50338bf, 606 DMB_LD = 0x00000100, 607 DMB_ST = 0x00000200, 608} AArch64Insn; 609 610static inline uint32_t tcg_in32(TCGContext *s) 611{ 612 uint32_t v = *(uint32_t *)s->code_ptr; 613 return v; 614} 615 616/* Emit an opcode with "type-checking" of the format. */ 617#define tcg_out_insn(S, FMT, OP, ...) \ 618 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) 619 620static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q, 621 TCGReg rt, TCGReg rn, unsigned size) 622{ 623 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30)); 624} 625 626static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, 627 int imm19, TCGReg rt) 628{ 629 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt); 630} 631 632static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, 633 TCGReg rt, int imm19) 634{ 635 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); 636} 637 638static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, 639 TCGCond c, int imm19) 640{ 641 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); 642} 643 644static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) 645{ 646 tcg_out32(s, insn | (imm26 & 0x03ffffff)); 647} 648 649static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) 650{ 651 tcg_out32(s, insn | rn << 5); 652} 653 654static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, 655 TCGReg r1, TCGReg r2, TCGReg rn, 656 tcg_target_long ofs, bool pre, bool w) 657{ 658 insn |= 1u << 31; /* ext */ 659 insn |= pre << 24; 660 insn |= w << 23; 661 662 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); 663 insn |= (ofs & (0x7f << 3)) << (15 - 3); 664 665 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); 666} 667 668static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, 669 TCGReg rd, TCGReg rn, uint64_t aimm) 670{ 671 if (aimm > 0xfff) { 672 tcg_debug_assert((aimm & 0xfff) == 0); 673 aimm >>= 12; 674 tcg_debug_assert(aimm <= 0xfff); 675 aimm |= 1 << 12; /* apply LSL 12 */ 676 } 677 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); 678} 679 680/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 681 (Logical immediate). Both insn groups have N, IMMR and IMMS fields 682 that feed the DecodeBitMasks pseudo function. */ 683static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, 684 TCGReg rd, TCGReg rn, int n, int immr, int imms) 685{ 686 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 687 | rn << 5 | rd); 688} 689 690#define tcg_out_insn_3404 tcg_out_insn_3402 691 692static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, 693 TCGReg rd, TCGReg rn, TCGReg rm, int imms) 694{ 695 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 696 | rn << 5 | rd); 697} 698 699/* This function is used for the Move (wide immediate) instruction group. 700 Note that SHIFT is a full shift count, not the 2 bit HW field. */ 701static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, 702 TCGReg rd, uint16_t half, unsigned shift) 703{ 704 tcg_debug_assert((shift & ~0x30) == 0); 705 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); 706} 707 708static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, 709 TCGReg rd, int64_t disp) 710{ 711 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); 712} 713 714/* This function is for both 3.5.2 (Add/Subtract shifted register), for 715 the rare occasion when we actually want to supply a shift amount. */ 716static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, 717 TCGType ext, TCGReg rd, TCGReg rn, 718 TCGReg rm, int imm6) 719{ 720 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); 721} 722 723/* This function is for 3.5.2 (Add/subtract shifted register), 724 and 3.5.10 (Logical shifted register), for the vast majorty of cases 725 when we don't want to apply a shift. Thus it can also be used for 726 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */ 727static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, 728 TCGReg rd, TCGReg rn, TCGReg rm) 729{ 730 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); 731} 732 733#define tcg_out_insn_3503 tcg_out_insn_3502 734#define tcg_out_insn_3508 tcg_out_insn_3502 735#define tcg_out_insn_3510 tcg_out_insn_3502 736 737static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, 738 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) 739{ 740 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd 741 | tcg_cond_to_aarch64[c] << 12); 742} 743 744static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, 745 TCGReg rd, TCGReg rn) 746{ 747 tcg_out32(s, insn | ext << 31 | rn << 5 | rd); 748} 749 750static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, 751 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) 752{ 753 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); 754} 755 756static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q, 757 TCGReg rd, TCGReg rn, int dst_idx, int src_idx) 758{ 759 /* Note that bit 11 set means general register input. Therefore 760 we can handle both register sets with one function. */ 761 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11) 762 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5); 763} 764 765static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q, 766 TCGReg rd, bool op, int cmode, uint8_t imm8) 767{ 768 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f) 769 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5); 770} 771 772static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q, 773 TCGReg rd, TCGReg rn, unsigned immhb) 774{ 775 tcg_out32(s, insn | q << 30 | immhb << 16 776 | (rn & 0x1f) << 5 | (rd & 0x1f)); 777} 778 779static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q, 780 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 781{ 782 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16 783 | (rn & 0x1f) << 5 | (rd & 0x1f)); 784} 785 786static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q, 787 unsigned size, TCGReg rd, TCGReg rn) 788{ 789 tcg_out32(s, insn | q << 30 | (size << 22) 790 | (rn & 0x1f) << 5 | (rd & 0x1f)); 791} 792 793static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, 794 TCGReg rd, TCGReg base, TCGType ext, 795 TCGReg regoff) 796{ 797 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 798 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | 799 0x4000 | ext << 13 | base << 5 | (rd & 0x1f)); 800} 801 802static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, 803 TCGReg rd, TCGReg rn, intptr_t offset) 804{ 805 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f)); 806} 807 808static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, 809 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) 810{ 811 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 812 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 813 | rn << 5 | (rd & 0x1f)); 814} 815 816/* Register to register move using ORR (shifted register with no shift). */ 817static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) 818{ 819 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); 820} 821 822/* Register to register move using ADDI (move to/from SP). */ 823static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) 824{ 825 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); 826} 827 828/* This function is used for the Logical (immediate) instruction group. 829 The value of LIMM must satisfy IS_LIMM. See the comment above about 830 only supporting simplified logical immediates. */ 831static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, 832 TCGReg rd, TCGReg rn, uint64_t limm) 833{ 834 unsigned h, l, r, c; 835 836 tcg_debug_assert(is_limm(limm)); 837 838 h = clz64(limm); 839 l = ctz64(limm); 840 if (l == 0) { 841 r = 0; /* form 0....01....1 */ 842 c = ctz64(~limm) - 1; 843 if (h == 0) { 844 r = clz64(~limm); /* form 1..10..01..1 */ 845 c += r; 846 } 847 } else { 848 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ 849 c = r - h - 1; 850 } 851 if (ext == TCG_TYPE_I32) { 852 r &= 31; 853 c &= 31; 854 } 855 856 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); 857} 858 859static void tcg_out_dupi_vec(TCGContext *s, TCGType type, 860 TCGReg rd, tcg_target_long v64) 861{ 862 bool q = type == TCG_TYPE_V128; 863 int cmode, imm8, i; 864 865 /* Test all bytes equal first. */ 866 if (v64 == dup_const(MO_8, v64)) { 867 imm8 = (uint8_t)v64; 868 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8); 869 return; 870 } 871 872 /* 873 * Test all bytes 0x00 or 0xff second. This can match cases that 874 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below. 875 */ 876 for (i = imm8 = 0; i < 8; i++) { 877 uint8_t byte = v64 >> (i * 8); 878 if (byte == 0xff) { 879 imm8 |= 1 << i; 880 } else if (byte != 0) { 881 goto fail_bytes; 882 } 883 } 884 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8); 885 return; 886 fail_bytes: 887 888 /* 889 * Tests for various replications. For each element width, if we 890 * cannot find an expansion there's no point checking a larger 891 * width because we already know by replication it cannot match. 892 */ 893 if (v64 == dup_const(MO_16, v64)) { 894 uint16_t v16 = v64; 895 896 if (is_shimm16(v16, &cmode, &imm8)) { 897 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 898 return; 899 } 900 if (is_shimm16(~v16, &cmode, &imm8)) { 901 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 902 return; 903 } 904 905 /* 906 * Otherwise, all remaining constants can be loaded in two insns: 907 * rd = v16 & 0xff, rd |= v16 & 0xff00. 908 */ 909 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff); 910 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8); 911 return; 912 } else if (v64 == dup_const(MO_32, v64)) { 913 uint32_t v32 = v64; 914 uint32_t n32 = ~v32; 915 916 if (is_shimm32(v32, &cmode, &imm8) || 917 is_soimm32(v32, &cmode, &imm8) || 918 is_fimm32(v32, &cmode, &imm8)) { 919 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 920 return; 921 } 922 if (is_shimm32(n32, &cmode, &imm8) || 923 is_soimm32(n32, &cmode, &imm8)) { 924 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 925 return; 926 } 927 928 /* 929 * Restrict the set of constants to those we can load with 930 * two instructions. Others we load from the pool. 931 */ 932 i = is_shimm32_pair(v32, &cmode, &imm8); 933 if (i) { 934 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 935 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8)); 936 return; 937 } 938 i = is_shimm32_pair(n32, &cmode, &imm8); 939 if (i) { 940 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 941 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8)); 942 return; 943 } 944 } else if (is_fimm64(v64, &cmode, &imm8)) { 945 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8); 946 return; 947 } 948 949 /* 950 * As a last resort, load from the constant pool. Sadly there 951 * is no LD1R (literal), so store the full 16-byte vector. 952 */ 953 if (type == TCG_TYPE_V128) { 954 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64); 955 tcg_out_insn(s, 3305, LDR_v128, 0, rd); 956 } else { 957 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0); 958 tcg_out_insn(s, 3305, LDR_v64, 0, rd); 959 } 960} 961 962static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 963 TCGReg rd, TCGReg rs) 964{ 965 int is_q = type - TCG_TYPE_V64; 966 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0); 967 return true; 968} 969 970static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 971 TCGReg r, TCGReg base, intptr_t offset) 972{ 973 TCGReg temp = TCG_REG_TMP; 974 975 if (offset < -0xffffff || offset > 0xffffff) { 976 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset); 977 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base); 978 base = temp; 979 } else { 980 AArch64Insn add_insn = I3401_ADDI; 981 982 if (offset < 0) { 983 add_insn = I3401_SUBI; 984 offset = -offset; 985 } 986 if (offset & 0xfff000) { 987 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000); 988 base = temp; 989 } 990 if (offset & 0xfff) { 991 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff); 992 base = temp; 993 } 994 } 995 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece); 996 return true; 997} 998 999static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 1000 tcg_target_long value) 1001{ 1002 tcg_target_long svalue = value; 1003 tcg_target_long ivalue = ~value; 1004 tcg_target_long t0, t1, t2; 1005 int s0, s1; 1006 AArch64Insn opc; 1007 1008 switch (type) { 1009 case TCG_TYPE_I32: 1010 case TCG_TYPE_I64: 1011 tcg_debug_assert(rd < 32); 1012 break; 1013 1014 case TCG_TYPE_V64: 1015 case TCG_TYPE_V128: 1016 tcg_debug_assert(rd >= 32); 1017 tcg_out_dupi_vec(s, type, rd, value); 1018 return; 1019 1020 default: 1021 g_assert_not_reached(); 1022 } 1023 1024 /* For 32-bit values, discard potential garbage in value. For 64-bit 1025 values within [2**31, 2**32-1], we can create smaller sequences by 1026 interpreting this as a negative 32-bit number, while ensuring that 1027 the high 32 bits are cleared by setting SF=0. */ 1028 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { 1029 svalue = (int32_t)value; 1030 value = (uint32_t)value; 1031 ivalue = (uint32_t)ivalue; 1032 type = TCG_TYPE_I32; 1033 } 1034 1035 /* Speed things up by handling the common case of small positive 1036 and negative values specially. */ 1037 if ((value & ~0xffffull) == 0) { 1038 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); 1039 return; 1040 } else if ((ivalue & ~0xffffull) == 0) { 1041 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); 1042 return; 1043 } 1044 1045 /* Check for bitfield immediates. For the benefit of 32-bit quantities, 1046 use the sign-extended value. That lets us match rotated values such 1047 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ 1048 if (is_limm(svalue)) { 1049 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); 1050 return; 1051 } 1052 1053 /* Look for host pointer values within 4G of the PC. This happens 1054 often when loading pointers to QEMU's own data structures. */ 1055 if (type == TCG_TYPE_I64) { 1056 tcg_target_long disp = value - (intptr_t)s->code_ptr; 1057 if (disp == sextract64(disp, 0, 21)) { 1058 tcg_out_insn(s, 3406, ADR, rd, disp); 1059 return; 1060 } 1061 disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12); 1062 if (disp == sextract64(disp, 0, 21)) { 1063 tcg_out_insn(s, 3406, ADRP, rd, disp); 1064 if (value & 0xfff) { 1065 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); 1066 } 1067 return; 1068 } 1069 } 1070 1071 /* Would it take fewer insns to begin with MOVN? */ 1072 if (ctpop64(value) >= 32) { 1073 t0 = ivalue; 1074 opc = I3405_MOVN; 1075 } else { 1076 t0 = value; 1077 opc = I3405_MOVZ; 1078 } 1079 s0 = ctz64(t0) & (63 & -16); 1080 t1 = t0 & ~(0xffffUL << s0); 1081 s1 = ctz64(t1) & (63 & -16); 1082 t2 = t1 & ~(0xffffUL << s1); 1083 if (t2 == 0) { 1084 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0); 1085 if (t1 != 0) { 1086 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1); 1087 } 1088 return; 1089 } 1090 1091 /* For more than 2 insns, dump it into the constant pool. */ 1092 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0); 1093 tcg_out_insn(s, 3305, LDR, 0, rd); 1094} 1095 1096/* Define something more legible for general use. */ 1097#define tcg_out_ldst_r tcg_out_insn_3310 1098 1099static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd, 1100 TCGReg rn, intptr_t offset, int lgsize) 1101{ 1102 /* If the offset is naturally aligned and in range, then we can 1103 use the scaled uimm12 encoding */ 1104 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) { 1105 uintptr_t scaled_uimm = offset >> lgsize; 1106 if (scaled_uimm <= 0xfff) { 1107 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); 1108 return; 1109 } 1110 } 1111 1112 /* Small signed offsets can use the unscaled encoding. */ 1113 if (offset >= -256 && offset < 256) { 1114 tcg_out_insn_3312(s, insn, rd, rn, offset); 1115 return; 1116 } 1117 1118 /* Worst-case scenario, move offset to temp register, use reg offset. */ 1119 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset); 1120 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP); 1121} 1122 1123static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 1124{ 1125 if (ret == arg) { 1126 return true; 1127 } 1128 switch (type) { 1129 case TCG_TYPE_I32: 1130 case TCG_TYPE_I64: 1131 if (ret < 32 && arg < 32) { 1132 tcg_out_movr(s, type, ret, arg); 1133 break; 1134 } else if (ret < 32) { 1135 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0); 1136 break; 1137 } else if (arg < 32) { 1138 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0); 1139 break; 1140 } 1141 /* FALLTHRU */ 1142 1143 case TCG_TYPE_V64: 1144 tcg_debug_assert(ret >= 32 && arg >= 32); 1145 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg); 1146 break; 1147 case TCG_TYPE_V128: 1148 tcg_debug_assert(ret >= 32 && arg >= 32); 1149 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg); 1150 break; 1151 1152 default: 1153 g_assert_not_reached(); 1154 } 1155 return true; 1156} 1157 1158static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1159 TCGReg base, intptr_t ofs) 1160{ 1161 AArch64Insn insn; 1162 int lgsz; 1163 1164 switch (type) { 1165 case TCG_TYPE_I32: 1166 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS); 1167 lgsz = 2; 1168 break; 1169 case TCG_TYPE_I64: 1170 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD); 1171 lgsz = 3; 1172 break; 1173 case TCG_TYPE_V64: 1174 insn = I3312_LDRVD; 1175 lgsz = 3; 1176 break; 1177 case TCG_TYPE_V128: 1178 insn = I3312_LDRVQ; 1179 lgsz = 4; 1180 break; 1181 default: 1182 g_assert_not_reached(); 1183 } 1184 tcg_out_ldst(s, insn, ret, base, ofs, lgsz); 1185} 1186 1187static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src, 1188 TCGReg base, intptr_t ofs) 1189{ 1190 AArch64Insn insn; 1191 int lgsz; 1192 1193 switch (type) { 1194 case TCG_TYPE_I32: 1195 insn = (src < 32 ? I3312_STRW : I3312_STRVS); 1196 lgsz = 2; 1197 break; 1198 case TCG_TYPE_I64: 1199 insn = (src < 32 ? I3312_STRX : I3312_STRVD); 1200 lgsz = 3; 1201 break; 1202 case TCG_TYPE_V64: 1203 insn = I3312_STRVD; 1204 lgsz = 3; 1205 break; 1206 case TCG_TYPE_V128: 1207 insn = I3312_STRVQ; 1208 lgsz = 4; 1209 break; 1210 default: 1211 g_assert_not_reached(); 1212 } 1213 tcg_out_ldst(s, insn, src, base, ofs, lgsz); 1214} 1215 1216static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1217 TCGReg base, intptr_t ofs) 1218{ 1219 if (type <= TCG_TYPE_I64 && val == 0) { 1220 tcg_out_st(s, type, TCG_REG_XZR, base, ofs); 1221 return true; 1222 } 1223 return false; 1224} 1225 1226static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, 1227 TCGReg rn, unsigned int a, unsigned int b) 1228{ 1229 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); 1230} 1231 1232static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, 1233 TCGReg rn, unsigned int a, unsigned int b) 1234{ 1235 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); 1236} 1237 1238static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, 1239 TCGReg rn, unsigned int a, unsigned int b) 1240{ 1241 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); 1242} 1243 1244static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, 1245 TCGReg rn, TCGReg rm, unsigned int a) 1246{ 1247 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); 1248} 1249 1250static inline void tcg_out_shl(TCGContext *s, TCGType ext, 1251 TCGReg rd, TCGReg rn, unsigned int m) 1252{ 1253 int bits = ext ? 64 : 32; 1254 int max = bits - 1; 1255 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max)); 1256} 1257 1258static inline void tcg_out_shr(TCGContext *s, TCGType ext, 1259 TCGReg rd, TCGReg rn, unsigned int m) 1260{ 1261 int max = ext ? 63 : 31; 1262 tcg_out_ubfm(s, ext, rd, rn, m & max, max); 1263} 1264 1265static inline void tcg_out_sar(TCGContext *s, TCGType ext, 1266 TCGReg rd, TCGReg rn, unsigned int m) 1267{ 1268 int max = ext ? 63 : 31; 1269 tcg_out_sbfm(s, ext, rd, rn, m & max, max); 1270} 1271 1272static inline void tcg_out_rotr(TCGContext *s, TCGType ext, 1273 TCGReg rd, TCGReg rn, unsigned int m) 1274{ 1275 int max = ext ? 63 : 31; 1276 tcg_out_extr(s, ext, rd, rn, rn, m & max); 1277} 1278 1279static inline void tcg_out_rotl(TCGContext *s, TCGType ext, 1280 TCGReg rd, TCGReg rn, unsigned int m) 1281{ 1282 int bits = ext ? 64 : 32; 1283 int max = bits - 1; 1284 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max)); 1285} 1286 1287static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, 1288 TCGReg rn, unsigned lsb, unsigned width) 1289{ 1290 unsigned size = ext ? 64 : 32; 1291 unsigned a = (size - lsb) & (size - 1); 1292 unsigned b = width - 1; 1293 tcg_out_bfm(s, ext, rd, rn, a, b); 1294} 1295 1296static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, 1297 tcg_target_long b, bool const_b) 1298{ 1299 if (const_b) { 1300 /* Using CMP or CMN aliases. */ 1301 if (b >= 0) { 1302 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); 1303 } else { 1304 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); 1305 } 1306 } else { 1307 /* Using CMP alias SUBS wzr, Wn, Wm */ 1308 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); 1309 } 1310} 1311 1312static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target) 1313{ 1314 ptrdiff_t offset = target - s->code_ptr; 1315 tcg_debug_assert(offset == sextract64(offset, 0, 26)); 1316 tcg_out_insn(s, 3206, B, offset); 1317} 1318 1319static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target) 1320{ 1321 ptrdiff_t offset = target - s->code_ptr; 1322 if (offset == sextract64(offset, 0, 26)) { 1323 tcg_out_insn(s, 3206, BL, offset); 1324 } else { 1325 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); 1326 tcg_out_insn(s, 3207, BR, TCG_REG_TMP); 1327 } 1328} 1329 1330static inline void tcg_out_callr(TCGContext *s, TCGReg reg) 1331{ 1332 tcg_out_insn(s, 3207, BLR, reg); 1333} 1334 1335static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target) 1336{ 1337 ptrdiff_t offset = target - s->code_ptr; 1338 if (offset == sextract64(offset, 0, 26)) { 1339 tcg_out_insn(s, 3206, BL, offset); 1340 } else { 1341 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); 1342 tcg_out_callr(s, TCG_REG_TMP); 1343 } 1344} 1345 1346void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr, 1347 uintptr_t addr) 1348{ 1349 tcg_insn_unit i1, i2; 1350 TCGType rt = TCG_TYPE_I64; 1351 TCGReg rd = TCG_REG_TMP; 1352 uint64_t pair; 1353 1354 ptrdiff_t offset = addr - jmp_addr; 1355 1356 if (offset == sextract64(offset, 0, 26)) { 1357 i1 = I3206_B | ((offset >> 2) & 0x3ffffff); 1358 i2 = NOP; 1359 } else { 1360 offset = (addr >> 12) - (jmp_addr >> 12); 1361 1362 /* patch ADRP */ 1363 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd; 1364 /* patch ADDI */ 1365 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd; 1366 } 1367 pair = (uint64_t)i2 << 32 | i1; 1368 qatomic_set((uint64_t *)jmp_addr, pair); 1369 flush_icache_range(jmp_addr, jmp_addr + 8); 1370} 1371 1372static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) 1373{ 1374 if (!l->has_value) { 1375 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); 1376 tcg_out_insn(s, 3206, B, 0); 1377 } else { 1378 tcg_out_goto(s, l->u.value_ptr); 1379 } 1380} 1381 1382static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, 1383 TCGArg b, bool b_const, TCGLabel *l) 1384{ 1385 intptr_t offset; 1386 bool need_cmp; 1387 1388 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) { 1389 need_cmp = false; 1390 } else { 1391 need_cmp = true; 1392 tcg_out_cmp(s, ext, a, b, b_const); 1393 } 1394 1395 if (!l->has_value) { 1396 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1397 offset = tcg_in32(s) >> 5; 1398 } else { 1399 offset = l->u.value_ptr - s->code_ptr; 1400 tcg_debug_assert(offset == sextract64(offset, 0, 19)); 1401 } 1402 1403 if (need_cmp) { 1404 tcg_out_insn(s, 3202, B_C, c, offset); 1405 } else if (c == TCG_COND_EQ) { 1406 tcg_out_insn(s, 3201, CBZ, ext, a, offset); 1407 } else { 1408 tcg_out_insn(s, 3201, CBNZ, ext, a, offset); 1409 } 1410} 1411 1412static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn) 1413{ 1414 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn); 1415} 1416 1417static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn) 1418{ 1419 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn); 1420} 1421 1422static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn) 1423{ 1424 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn); 1425} 1426 1427static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits, 1428 TCGReg rd, TCGReg rn) 1429{ 1430 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ 1431 int bits = (8 << s_bits) - 1; 1432 tcg_out_sbfm(s, ext, rd, rn, 0, bits); 1433} 1434 1435static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits, 1436 TCGReg rd, TCGReg rn) 1437{ 1438 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ 1439 int bits = (8 << s_bits) - 1; 1440 tcg_out_ubfm(s, 0, rd, rn, 0, bits); 1441} 1442 1443static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, 1444 TCGReg rn, int64_t aimm) 1445{ 1446 if (aimm >= 0) { 1447 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); 1448 } else { 1449 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); 1450 } 1451} 1452 1453static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, 1454 TCGReg rh, TCGReg al, TCGReg ah, 1455 tcg_target_long bl, tcg_target_long bh, 1456 bool const_bl, bool const_bh, bool sub) 1457{ 1458 TCGReg orig_rl = rl; 1459 AArch64Insn insn; 1460 1461 if (rl == ah || (!const_bh && rl == bh)) { 1462 rl = TCG_REG_TMP; 1463 } 1464 1465 if (const_bl) { 1466 insn = I3401_ADDSI; 1467 if ((bl < 0) ^ sub) { 1468 insn = I3401_SUBSI; 1469 bl = -bl; 1470 } 1471 if (unlikely(al == TCG_REG_XZR)) { 1472 /* ??? We want to allow al to be zero for the benefit of 1473 negation via subtraction. However, that leaves open the 1474 possibility of adding 0+const in the low part, and the 1475 immediate add instructions encode XSP not XZR. Don't try 1476 anything more elaborate here than loading another zero. */ 1477 al = TCG_REG_TMP; 1478 tcg_out_movi(s, ext, al, 0); 1479 } 1480 tcg_out_insn_3401(s, insn, ext, rl, al, bl); 1481 } else { 1482 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); 1483 } 1484 1485 insn = I3503_ADC; 1486 if (const_bh) { 1487 /* Note that the only two constants we support are 0 and -1, and 1488 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ 1489 if ((bh != 0) ^ sub) { 1490 insn = I3503_SBC; 1491 } 1492 bh = TCG_REG_XZR; 1493 } else if (sub) { 1494 insn = I3503_SBC; 1495 } 1496 tcg_out_insn_3503(s, insn, ext, rh, ah, bh); 1497 1498 tcg_out_mov(s, ext, orig_rl, rl); 1499} 1500 1501static inline void tcg_out_mb(TCGContext *s, TCGArg a0) 1502{ 1503 static const uint32_t sync[] = { 1504 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST, 1505 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST, 1506 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1507 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD, 1508 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1509 }; 1510 tcg_out32(s, sync[a0 & TCG_MO_ALL]); 1511} 1512 1513static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, 1514 TCGReg a0, TCGArg b, bool const_b, bool is_ctz) 1515{ 1516 TCGReg a1 = a0; 1517 if (is_ctz) { 1518 a1 = TCG_REG_TMP; 1519 tcg_out_insn(s, 3507, RBIT, ext, a1, a0); 1520 } 1521 if (const_b && b == (ext ? 64 : 32)) { 1522 tcg_out_insn(s, 3507, CLZ, ext, d, a1); 1523 } else { 1524 AArch64Insn sel = I3506_CSEL; 1525 1526 tcg_out_cmp(s, ext, a0, 0, 1); 1527 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1); 1528 1529 if (const_b) { 1530 if (b == -1) { 1531 b = TCG_REG_XZR; 1532 sel = I3506_CSINV; 1533 } else if (b == 0) { 1534 b = TCG_REG_XZR; 1535 } else { 1536 tcg_out_movi(s, ext, d, b); 1537 b = d; 1538 } 1539 } 1540 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE); 1541 } 1542} 1543 1544#ifdef CONFIG_SOFTMMU 1545#include "../tcg-ldst.c.inc" 1546 1547/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, 1548 * TCGMemOpIdx oi, uintptr_t ra) 1549 */ 1550static void * const qemu_ld_helpers[16] = { 1551 [MO_UB] = helper_ret_ldub_mmu, 1552 [MO_LEUW] = helper_le_lduw_mmu, 1553 [MO_LEUL] = helper_le_ldul_mmu, 1554 [MO_LEQ] = helper_le_ldq_mmu, 1555 [MO_BEUW] = helper_be_lduw_mmu, 1556 [MO_BEUL] = helper_be_ldul_mmu, 1557 [MO_BEQ] = helper_be_ldq_mmu, 1558}; 1559 1560/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, 1561 * uintxx_t val, TCGMemOpIdx oi, 1562 * uintptr_t ra) 1563 */ 1564static void * const qemu_st_helpers[16] = { 1565 [MO_UB] = helper_ret_stb_mmu, 1566 [MO_LEUW] = helper_le_stw_mmu, 1567 [MO_LEUL] = helper_le_stl_mmu, 1568 [MO_LEQ] = helper_le_stq_mmu, 1569 [MO_BEUW] = helper_be_stw_mmu, 1570 [MO_BEUL] = helper_be_stl_mmu, 1571 [MO_BEQ] = helper_be_stq_mmu, 1572}; 1573 1574static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target) 1575{ 1576 ptrdiff_t offset = tcg_pcrel_diff(s, target); 1577 tcg_debug_assert(offset == sextract64(offset, 0, 21)); 1578 tcg_out_insn(s, 3406, ADR, rd, offset); 1579} 1580 1581static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1582{ 1583 TCGMemOpIdx oi = lb->oi; 1584 MemOp opc = get_memop(oi); 1585 MemOp size = opc & MO_SIZE; 1586 1587 if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) { 1588 return false; 1589 } 1590 1591 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1592 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); 1593 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi); 1594 tcg_out_adr(s, TCG_REG_X3, lb->raddr); 1595 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); 1596 if (opc & MO_SIGN) { 1597 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0); 1598 } else { 1599 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0); 1600 } 1601 1602 tcg_out_goto(s, lb->raddr); 1603 return true; 1604} 1605 1606static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1607{ 1608 TCGMemOpIdx oi = lb->oi; 1609 MemOp opc = get_memop(oi); 1610 MemOp size = opc & MO_SIZE; 1611 1612 if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) { 1613 return false; 1614 } 1615 1616 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1617 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); 1618 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); 1619 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi); 1620 tcg_out_adr(s, TCG_REG_X4, lb->raddr); 1621 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); 1622 tcg_out_goto(s, lb->raddr); 1623 return true; 1624} 1625 1626static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, 1627 TCGType ext, TCGReg data_reg, TCGReg addr_reg, 1628 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) 1629{ 1630 TCGLabelQemuLdst *label = new_ldst_label(s); 1631 1632 label->is_ld = is_ld; 1633 label->oi = oi; 1634 label->type = ext; 1635 label->datalo_reg = data_reg; 1636 label->addrlo_reg = addr_reg; 1637 label->raddr = raddr; 1638 label->label_ptr[0] = label_ptr; 1639} 1640 1641/* We expect to use a 7-bit scaled negative offset from ENV. */ 1642QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); 1643QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512); 1644 1645/* These offsets are built into the LDP below. */ 1646QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); 1647QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); 1648 1649/* Load and compare a TLB entry, emitting the conditional jump to the 1650 slow path for the failure case, which will be patched later when finalizing 1651 the slow path. Generated code returns the host addend in X1, 1652 clobbers X0,X2,X3,TMP. */ 1653static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc, 1654 tcg_insn_unit **label_ptr, int mem_index, 1655 bool is_read) 1656{ 1657 unsigned a_bits = get_alignment_bits(opc); 1658 unsigned s_bits = opc & MO_SIZE; 1659 unsigned a_mask = (1u << a_bits) - 1; 1660 unsigned s_mask = (1u << s_bits) - 1; 1661 TCGReg x3; 1662 TCGType mask_type; 1663 uint64_t compare_mask; 1664 1665 mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32 1666 ? TCG_TYPE_I64 : TCG_TYPE_I32); 1667 1668 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */ 1669 tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0, 1670 TLB_MASK_TABLE_OFS(mem_index), 1, 0); 1671 1672 /* Extract the TLB index from the address into X0. */ 1673 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, 1674 TCG_REG_X0, TCG_REG_X0, addr_reg, 1675 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); 1676 1677 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */ 1678 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0); 1679 1680 /* Load the tlb comparator into X0, and the fast path addend into X1. */ 1681 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read 1682 ? offsetof(CPUTLBEntry, addr_read) 1683 : offsetof(CPUTLBEntry, addr_write)); 1684 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1, 1685 offsetof(CPUTLBEntry, addend)); 1686 1687 /* For aligned accesses, we check the first byte and include the alignment 1688 bits within the address. For unaligned access, we check that we don't 1689 cross pages using the address of the last byte of the access. */ 1690 if (a_bits >= s_bits) { 1691 x3 = addr_reg; 1692 } else { 1693 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64, 1694 TCG_REG_X3, addr_reg, s_mask - a_mask); 1695 x3 = TCG_REG_X3; 1696 } 1697 compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; 1698 1699 /* Store the page mask part of the address into X3. */ 1700 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, 1701 TCG_REG_X3, x3, compare_mask); 1702 1703 /* Perform the address comparison. */ 1704 tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0); 1705 1706 /* If not equal, we jump to the slow path. */ 1707 *label_ptr = s->code_ptr; 1708 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1709} 1710 1711#endif /* CONFIG_SOFTMMU */ 1712 1713static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, 1714 TCGReg data_r, TCGReg addr_r, 1715 TCGType otype, TCGReg off_r) 1716{ 1717 const MemOp bswap = memop & MO_BSWAP; 1718 1719 switch (memop & MO_SSIZE) { 1720 case MO_UB: 1721 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r); 1722 break; 1723 case MO_SB: 1724 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, 1725 data_r, addr_r, otype, off_r); 1726 break; 1727 case MO_UW: 1728 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); 1729 if (bswap) { 1730 tcg_out_rev16(s, data_r, data_r); 1731 } 1732 break; 1733 case MO_SW: 1734 if (bswap) { 1735 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); 1736 tcg_out_rev16(s, data_r, data_r); 1737 tcg_out_sxt(s, ext, MO_16, data_r, data_r); 1738 } else { 1739 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), 1740 data_r, addr_r, otype, off_r); 1741 } 1742 break; 1743 case MO_UL: 1744 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); 1745 if (bswap) { 1746 tcg_out_rev32(s, data_r, data_r); 1747 } 1748 break; 1749 case MO_SL: 1750 if (bswap) { 1751 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); 1752 tcg_out_rev32(s, data_r, data_r); 1753 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r); 1754 } else { 1755 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r); 1756 } 1757 break; 1758 case MO_Q: 1759 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r); 1760 if (bswap) { 1761 tcg_out_rev64(s, data_r, data_r); 1762 } 1763 break; 1764 default: 1765 tcg_abort(); 1766 } 1767} 1768 1769static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, 1770 TCGReg data_r, TCGReg addr_r, 1771 TCGType otype, TCGReg off_r) 1772{ 1773 const MemOp bswap = memop & MO_BSWAP; 1774 1775 switch (memop & MO_SIZE) { 1776 case MO_8: 1777 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r); 1778 break; 1779 case MO_16: 1780 if (bswap && data_r != TCG_REG_XZR) { 1781 tcg_out_rev16(s, TCG_REG_TMP, data_r); 1782 data_r = TCG_REG_TMP; 1783 } 1784 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r); 1785 break; 1786 case MO_32: 1787 if (bswap && data_r != TCG_REG_XZR) { 1788 tcg_out_rev32(s, TCG_REG_TMP, data_r); 1789 data_r = TCG_REG_TMP; 1790 } 1791 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r); 1792 break; 1793 case MO_64: 1794 if (bswap && data_r != TCG_REG_XZR) { 1795 tcg_out_rev64(s, TCG_REG_TMP, data_r); 1796 data_r = TCG_REG_TMP; 1797 } 1798 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r); 1799 break; 1800 default: 1801 tcg_abort(); 1802 } 1803} 1804 1805static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1806 TCGMemOpIdx oi, TCGType ext) 1807{ 1808 MemOp memop = get_memop(oi); 1809 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; 1810#ifdef CONFIG_SOFTMMU 1811 unsigned mem_index = get_mmuidx(oi); 1812 tcg_insn_unit *label_ptr; 1813 1814 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1); 1815 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1816 TCG_REG_X1, otype, addr_reg); 1817 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg, 1818 s->code_ptr, label_ptr); 1819#else /* !CONFIG_SOFTMMU */ 1820 if (USE_GUEST_BASE) { 1821 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1822 TCG_REG_GUEST_BASE, otype, addr_reg); 1823 } else { 1824 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1825 addr_reg, TCG_TYPE_I64, TCG_REG_XZR); 1826 } 1827#endif /* CONFIG_SOFTMMU */ 1828} 1829 1830static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1831 TCGMemOpIdx oi) 1832{ 1833 MemOp memop = get_memop(oi); 1834 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; 1835#ifdef CONFIG_SOFTMMU 1836 unsigned mem_index = get_mmuidx(oi); 1837 tcg_insn_unit *label_ptr; 1838 1839 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0); 1840 tcg_out_qemu_st_direct(s, memop, data_reg, 1841 TCG_REG_X1, otype, addr_reg); 1842 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64, 1843 data_reg, addr_reg, s->code_ptr, label_ptr); 1844#else /* !CONFIG_SOFTMMU */ 1845 if (USE_GUEST_BASE) { 1846 tcg_out_qemu_st_direct(s, memop, data_reg, 1847 TCG_REG_GUEST_BASE, otype, addr_reg); 1848 } else { 1849 tcg_out_qemu_st_direct(s, memop, data_reg, 1850 addr_reg, TCG_TYPE_I64, TCG_REG_XZR); 1851 } 1852#endif /* CONFIG_SOFTMMU */ 1853} 1854 1855static tcg_insn_unit *tb_ret_addr; 1856 1857static void tcg_out_op(TCGContext *s, TCGOpcode opc, 1858 const TCGArg args[TCG_MAX_OP_ARGS], 1859 const int const_args[TCG_MAX_OP_ARGS]) 1860{ 1861 /* 99% of the time, we can signal the use of extension registers 1862 by looking to see if the opcode handles 64-bit data. */ 1863 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0; 1864 1865 /* Hoist the loads of the most common arguments. */ 1866 TCGArg a0 = args[0]; 1867 TCGArg a1 = args[1]; 1868 TCGArg a2 = args[2]; 1869 int c2 = const_args[2]; 1870 1871 /* Some operands are defined with "rZ" constraint, a register or 1872 the zero register. These need not actually test args[I] == 0. */ 1873#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) 1874 1875 switch (opc) { 1876 case INDEX_op_exit_tb: 1877 /* Reuse the zeroing that exists for goto_ptr. */ 1878 if (a0 == 0) { 1879 tcg_out_goto_long(s, s->code_gen_epilogue); 1880 } else { 1881 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); 1882 tcg_out_goto_long(s, tb_ret_addr); 1883 } 1884 break; 1885 1886 case INDEX_op_goto_tb: 1887 if (s->tb_jmp_insn_offset != NULL) { 1888 /* TCG_TARGET_HAS_direct_jump */ 1889 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic 1890 write can be used to patch the target address. */ 1891 if ((uintptr_t)s->code_ptr & 7) { 1892 tcg_out32(s, NOP); 1893 } 1894 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); 1895 /* actual branch destination will be patched by 1896 tb_target_set_jmp_target later. */ 1897 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0); 1898 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0); 1899 } else { 1900 /* !TCG_TARGET_HAS_direct_jump */ 1901 tcg_debug_assert(s->tb_jmp_target_addr != NULL); 1902 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2; 1903 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP); 1904 } 1905 tcg_out_insn(s, 3207, BR, TCG_REG_TMP); 1906 set_jmp_reset_offset(s, a0); 1907 break; 1908 1909 case INDEX_op_goto_ptr: 1910 tcg_out_insn(s, 3207, BR, a0); 1911 break; 1912 1913 case INDEX_op_br: 1914 tcg_out_goto_label(s, arg_label(a0)); 1915 break; 1916 1917 case INDEX_op_ld8u_i32: 1918 case INDEX_op_ld8u_i64: 1919 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0); 1920 break; 1921 case INDEX_op_ld8s_i32: 1922 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0); 1923 break; 1924 case INDEX_op_ld8s_i64: 1925 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0); 1926 break; 1927 case INDEX_op_ld16u_i32: 1928 case INDEX_op_ld16u_i64: 1929 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1); 1930 break; 1931 case INDEX_op_ld16s_i32: 1932 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1); 1933 break; 1934 case INDEX_op_ld16s_i64: 1935 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1); 1936 break; 1937 case INDEX_op_ld_i32: 1938 case INDEX_op_ld32u_i64: 1939 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2); 1940 break; 1941 case INDEX_op_ld32s_i64: 1942 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2); 1943 break; 1944 case INDEX_op_ld_i64: 1945 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3); 1946 break; 1947 1948 case INDEX_op_st8_i32: 1949 case INDEX_op_st8_i64: 1950 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0); 1951 break; 1952 case INDEX_op_st16_i32: 1953 case INDEX_op_st16_i64: 1954 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1); 1955 break; 1956 case INDEX_op_st_i32: 1957 case INDEX_op_st32_i64: 1958 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2); 1959 break; 1960 case INDEX_op_st_i64: 1961 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3); 1962 break; 1963 1964 case INDEX_op_add_i32: 1965 a2 = (int32_t)a2; 1966 /* FALLTHRU */ 1967 case INDEX_op_add_i64: 1968 if (c2) { 1969 tcg_out_addsubi(s, ext, a0, a1, a2); 1970 } else { 1971 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); 1972 } 1973 break; 1974 1975 case INDEX_op_sub_i32: 1976 a2 = (int32_t)a2; 1977 /* FALLTHRU */ 1978 case INDEX_op_sub_i64: 1979 if (c2) { 1980 tcg_out_addsubi(s, ext, a0, a1, -a2); 1981 } else { 1982 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); 1983 } 1984 break; 1985 1986 case INDEX_op_neg_i64: 1987 case INDEX_op_neg_i32: 1988 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); 1989 break; 1990 1991 case INDEX_op_and_i32: 1992 a2 = (int32_t)a2; 1993 /* FALLTHRU */ 1994 case INDEX_op_and_i64: 1995 if (c2) { 1996 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); 1997 } else { 1998 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); 1999 } 2000 break; 2001 2002 case INDEX_op_andc_i32: 2003 a2 = (int32_t)a2; 2004 /* FALLTHRU */ 2005 case INDEX_op_andc_i64: 2006 if (c2) { 2007 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); 2008 } else { 2009 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); 2010 } 2011 break; 2012 2013 case INDEX_op_or_i32: 2014 a2 = (int32_t)a2; 2015 /* FALLTHRU */ 2016 case INDEX_op_or_i64: 2017 if (c2) { 2018 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); 2019 } else { 2020 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); 2021 } 2022 break; 2023 2024 case INDEX_op_orc_i32: 2025 a2 = (int32_t)a2; 2026 /* FALLTHRU */ 2027 case INDEX_op_orc_i64: 2028 if (c2) { 2029 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); 2030 } else { 2031 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); 2032 } 2033 break; 2034 2035 case INDEX_op_xor_i32: 2036 a2 = (int32_t)a2; 2037 /* FALLTHRU */ 2038 case INDEX_op_xor_i64: 2039 if (c2) { 2040 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); 2041 } else { 2042 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); 2043 } 2044 break; 2045 2046 case INDEX_op_eqv_i32: 2047 a2 = (int32_t)a2; 2048 /* FALLTHRU */ 2049 case INDEX_op_eqv_i64: 2050 if (c2) { 2051 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); 2052 } else { 2053 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); 2054 } 2055 break; 2056 2057 case INDEX_op_not_i64: 2058 case INDEX_op_not_i32: 2059 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); 2060 break; 2061 2062 case INDEX_op_mul_i64: 2063 case INDEX_op_mul_i32: 2064 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); 2065 break; 2066 2067 case INDEX_op_div_i64: 2068 case INDEX_op_div_i32: 2069 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); 2070 break; 2071 case INDEX_op_divu_i64: 2072 case INDEX_op_divu_i32: 2073 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); 2074 break; 2075 2076 case INDEX_op_rem_i64: 2077 case INDEX_op_rem_i32: 2078 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2); 2079 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2080 break; 2081 case INDEX_op_remu_i64: 2082 case INDEX_op_remu_i32: 2083 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2); 2084 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2085 break; 2086 2087 case INDEX_op_shl_i64: 2088 case INDEX_op_shl_i32: 2089 if (c2) { 2090 tcg_out_shl(s, ext, a0, a1, a2); 2091 } else { 2092 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); 2093 } 2094 break; 2095 2096 case INDEX_op_shr_i64: 2097 case INDEX_op_shr_i32: 2098 if (c2) { 2099 tcg_out_shr(s, ext, a0, a1, a2); 2100 } else { 2101 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); 2102 } 2103 break; 2104 2105 case INDEX_op_sar_i64: 2106 case INDEX_op_sar_i32: 2107 if (c2) { 2108 tcg_out_sar(s, ext, a0, a1, a2); 2109 } else { 2110 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); 2111 } 2112 break; 2113 2114 case INDEX_op_rotr_i64: 2115 case INDEX_op_rotr_i32: 2116 if (c2) { 2117 tcg_out_rotr(s, ext, a0, a1, a2); 2118 } else { 2119 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); 2120 } 2121 break; 2122 2123 case INDEX_op_rotl_i64: 2124 case INDEX_op_rotl_i32: 2125 if (c2) { 2126 tcg_out_rotl(s, ext, a0, a1, a2); 2127 } else { 2128 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2); 2129 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP); 2130 } 2131 break; 2132 2133 case INDEX_op_clz_i64: 2134 case INDEX_op_clz_i32: 2135 tcg_out_cltz(s, ext, a0, a1, a2, c2, false); 2136 break; 2137 case INDEX_op_ctz_i64: 2138 case INDEX_op_ctz_i32: 2139 tcg_out_cltz(s, ext, a0, a1, a2, c2, true); 2140 break; 2141 2142 case INDEX_op_brcond_i32: 2143 a1 = (int32_t)a1; 2144 /* FALLTHRU */ 2145 case INDEX_op_brcond_i64: 2146 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); 2147 break; 2148 2149 case INDEX_op_setcond_i32: 2150 a2 = (int32_t)a2; 2151 /* FALLTHRU */ 2152 case INDEX_op_setcond_i64: 2153 tcg_out_cmp(s, ext, a1, a2, c2); 2154 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ 2155 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, 2156 TCG_REG_XZR, tcg_invert_cond(args[3])); 2157 break; 2158 2159 case INDEX_op_movcond_i32: 2160 a2 = (int32_t)a2; 2161 /* FALLTHRU */ 2162 case INDEX_op_movcond_i64: 2163 tcg_out_cmp(s, ext, a1, a2, c2); 2164 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); 2165 break; 2166 2167 case INDEX_op_qemu_ld_i32: 2168 case INDEX_op_qemu_ld_i64: 2169 tcg_out_qemu_ld(s, a0, a1, a2, ext); 2170 break; 2171 case INDEX_op_qemu_st_i32: 2172 case INDEX_op_qemu_st_i64: 2173 tcg_out_qemu_st(s, REG0(0), a1, a2); 2174 break; 2175 2176 case INDEX_op_bswap64_i64: 2177 tcg_out_rev64(s, a0, a1); 2178 break; 2179 case INDEX_op_bswap32_i64: 2180 case INDEX_op_bswap32_i32: 2181 tcg_out_rev32(s, a0, a1); 2182 break; 2183 case INDEX_op_bswap16_i64: 2184 case INDEX_op_bswap16_i32: 2185 tcg_out_rev16(s, a0, a1); 2186 break; 2187 2188 case INDEX_op_ext8s_i64: 2189 case INDEX_op_ext8s_i32: 2190 tcg_out_sxt(s, ext, MO_8, a0, a1); 2191 break; 2192 case INDEX_op_ext16s_i64: 2193 case INDEX_op_ext16s_i32: 2194 tcg_out_sxt(s, ext, MO_16, a0, a1); 2195 break; 2196 case INDEX_op_ext_i32_i64: 2197 case INDEX_op_ext32s_i64: 2198 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1); 2199 break; 2200 case INDEX_op_ext8u_i64: 2201 case INDEX_op_ext8u_i32: 2202 tcg_out_uxt(s, MO_8, a0, a1); 2203 break; 2204 case INDEX_op_ext16u_i64: 2205 case INDEX_op_ext16u_i32: 2206 tcg_out_uxt(s, MO_16, a0, a1); 2207 break; 2208 case INDEX_op_extu_i32_i64: 2209 case INDEX_op_ext32u_i64: 2210 tcg_out_movr(s, TCG_TYPE_I32, a0, a1); 2211 break; 2212 2213 case INDEX_op_deposit_i64: 2214 case INDEX_op_deposit_i32: 2215 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]); 2216 break; 2217 2218 case INDEX_op_extract_i64: 2219 case INDEX_op_extract_i32: 2220 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2221 break; 2222 2223 case INDEX_op_sextract_i64: 2224 case INDEX_op_sextract_i32: 2225 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2226 break; 2227 2228 case INDEX_op_extract2_i64: 2229 case INDEX_op_extract2_i32: 2230 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]); 2231 break; 2232 2233 case INDEX_op_add2_i32: 2234 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2235 (int32_t)args[4], args[5], const_args[4], 2236 const_args[5], false); 2237 break; 2238 case INDEX_op_add2_i64: 2239 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2240 args[5], const_args[4], const_args[5], false); 2241 break; 2242 case INDEX_op_sub2_i32: 2243 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2244 (int32_t)args[4], args[5], const_args[4], 2245 const_args[5], true); 2246 break; 2247 case INDEX_op_sub2_i64: 2248 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2249 args[5], const_args[4], const_args[5], true); 2250 break; 2251 2252 case INDEX_op_muluh_i64: 2253 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); 2254 break; 2255 case INDEX_op_mulsh_i64: 2256 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); 2257 break; 2258 2259 case INDEX_op_mb: 2260 tcg_out_mb(s, a0); 2261 break; 2262 2263 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 2264 case INDEX_op_mov_i64: 2265 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ 2266 case INDEX_op_movi_i64: 2267 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 2268 default: 2269 g_assert_not_reached(); 2270 } 2271 2272#undef REG0 2273} 2274 2275static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 2276 unsigned vecl, unsigned vece, 2277 const TCGArg *args, const int *const_args) 2278{ 2279 static const AArch64Insn cmp_insn[16] = { 2280 [TCG_COND_EQ] = I3616_CMEQ, 2281 [TCG_COND_GT] = I3616_CMGT, 2282 [TCG_COND_GE] = I3616_CMGE, 2283 [TCG_COND_GTU] = I3616_CMHI, 2284 [TCG_COND_GEU] = I3616_CMHS, 2285 }; 2286 static const AArch64Insn cmp0_insn[16] = { 2287 [TCG_COND_EQ] = I3617_CMEQ0, 2288 [TCG_COND_GT] = I3617_CMGT0, 2289 [TCG_COND_GE] = I3617_CMGE0, 2290 [TCG_COND_LT] = I3617_CMLT0, 2291 [TCG_COND_LE] = I3617_CMLE0, 2292 }; 2293 2294 TCGType type = vecl + TCG_TYPE_V64; 2295 unsigned is_q = vecl; 2296 TCGArg a0, a1, a2, a3; 2297 int cmode, imm8; 2298 2299 a0 = args[0]; 2300 a1 = args[1]; 2301 a2 = args[2]; 2302 2303 switch (opc) { 2304 case INDEX_op_ld_vec: 2305 tcg_out_ld(s, type, a0, a1, a2); 2306 break; 2307 case INDEX_op_st_vec: 2308 tcg_out_st(s, type, a0, a1, a2); 2309 break; 2310 case INDEX_op_dupm_vec: 2311 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 2312 break; 2313 case INDEX_op_add_vec: 2314 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2); 2315 break; 2316 case INDEX_op_sub_vec: 2317 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2); 2318 break; 2319 case INDEX_op_mul_vec: 2320 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2); 2321 break; 2322 case INDEX_op_neg_vec: 2323 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1); 2324 break; 2325 case INDEX_op_abs_vec: 2326 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1); 2327 break; 2328 case INDEX_op_and_vec: 2329 if (const_args[2]) { 2330 is_shimm1632(~a2, &cmode, &imm8); 2331 if (a0 == a1) { 2332 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2333 return; 2334 } 2335 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2336 a2 = a0; 2337 } 2338 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2); 2339 break; 2340 case INDEX_op_or_vec: 2341 if (const_args[2]) { 2342 is_shimm1632(a2, &cmode, &imm8); 2343 if (a0 == a1) { 2344 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2345 return; 2346 } 2347 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2348 a2 = a0; 2349 } 2350 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2); 2351 break; 2352 case INDEX_op_andc_vec: 2353 if (const_args[2]) { 2354 is_shimm1632(a2, &cmode, &imm8); 2355 if (a0 == a1) { 2356 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2357 return; 2358 } 2359 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2360 a2 = a0; 2361 } 2362 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2); 2363 break; 2364 case INDEX_op_orc_vec: 2365 if (const_args[2]) { 2366 is_shimm1632(~a2, &cmode, &imm8); 2367 if (a0 == a1) { 2368 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2369 return; 2370 } 2371 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2372 a2 = a0; 2373 } 2374 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2); 2375 break; 2376 case INDEX_op_xor_vec: 2377 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2); 2378 break; 2379 case INDEX_op_ssadd_vec: 2380 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2); 2381 break; 2382 case INDEX_op_sssub_vec: 2383 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2); 2384 break; 2385 case INDEX_op_usadd_vec: 2386 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2); 2387 break; 2388 case INDEX_op_ussub_vec: 2389 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2); 2390 break; 2391 case INDEX_op_smax_vec: 2392 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2); 2393 break; 2394 case INDEX_op_smin_vec: 2395 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2); 2396 break; 2397 case INDEX_op_umax_vec: 2398 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2); 2399 break; 2400 case INDEX_op_umin_vec: 2401 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2); 2402 break; 2403 case INDEX_op_not_vec: 2404 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1); 2405 break; 2406 case INDEX_op_shli_vec: 2407 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece)); 2408 break; 2409 case INDEX_op_shri_vec: 2410 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2); 2411 break; 2412 case INDEX_op_sari_vec: 2413 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); 2414 break; 2415 case INDEX_op_aa64_sli_vec: 2416 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece)); 2417 break; 2418 case INDEX_op_shlv_vec: 2419 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); 2420 break; 2421 case INDEX_op_aa64_sshl_vec: 2422 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2); 2423 break; 2424 case INDEX_op_cmp_vec: 2425 { 2426 TCGCond cond = args[3]; 2427 AArch64Insn insn; 2428 2429 if (cond == TCG_COND_NE) { 2430 if (const_args[2]) { 2431 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1); 2432 } else { 2433 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2); 2434 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); 2435 } 2436 } else { 2437 if (const_args[2]) { 2438 insn = cmp0_insn[cond]; 2439 if (insn) { 2440 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1); 2441 break; 2442 } 2443 tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0); 2444 a2 = TCG_VEC_TMP; 2445 } 2446 insn = cmp_insn[cond]; 2447 if (insn == 0) { 2448 TCGArg t; 2449 t = a1, a1 = a2, a2 = t; 2450 cond = tcg_swap_cond(cond); 2451 insn = cmp_insn[cond]; 2452 tcg_debug_assert(insn != 0); 2453 } 2454 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2); 2455 } 2456 } 2457 break; 2458 2459 case INDEX_op_bitsel_vec: 2460 a3 = args[3]; 2461 if (a0 == a3) { 2462 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1); 2463 } else if (a0 == a2) { 2464 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1); 2465 } else { 2466 if (a0 != a1) { 2467 tcg_out_mov(s, type, a0, a1); 2468 } 2469 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3); 2470 } 2471 break; 2472 2473 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 2474 case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */ 2475 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 2476 default: 2477 g_assert_not_reached(); 2478 } 2479} 2480 2481int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 2482{ 2483 switch (opc) { 2484 case INDEX_op_add_vec: 2485 case INDEX_op_sub_vec: 2486 case INDEX_op_and_vec: 2487 case INDEX_op_or_vec: 2488 case INDEX_op_xor_vec: 2489 case INDEX_op_andc_vec: 2490 case INDEX_op_orc_vec: 2491 case INDEX_op_neg_vec: 2492 case INDEX_op_abs_vec: 2493 case INDEX_op_not_vec: 2494 case INDEX_op_cmp_vec: 2495 case INDEX_op_shli_vec: 2496 case INDEX_op_shri_vec: 2497 case INDEX_op_sari_vec: 2498 case INDEX_op_ssadd_vec: 2499 case INDEX_op_sssub_vec: 2500 case INDEX_op_usadd_vec: 2501 case INDEX_op_ussub_vec: 2502 case INDEX_op_shlv_vec: 2503 case INDEX_op_bitsel_vec: 2504 return 1; 2505 case INDEX_op_rotli_vec: 2506 case INDEX_op_shrv_vec: 2507 case INDEX_op_sarv_vec: 2508 case INDEX_op_rotlv_vec: 2509 case INDEX_op_rotrv_vec: 2510 return -1; 2511 case INDEX_op_mul_vec: 2512 case INDEX_op_smax_vec: 2513 case INDEX_op_smin_vec: 2514 case INDEX_op_umax_vec: 2515 case INDEX_op_umin_vec: 2516 return vece < MO_64; 2517 2518 default: 2519 return 0; 2520 } 2521} 2522 2523void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 2524 TCGArg a0, ...) 2525{ 2526 va_list va; 2527 TCGv_vec v0, v1, v2, t1, t2; 2528 TCGArg a2; 2529 2530 va_start(va, a0); 2531 v0 = temp_tcgv_vec(arg_temp(a0)); 2532 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 2533 a2 = va_arg(va, TCGArg); 2534 v2 = temp_tcgv_vec(arg_temp(a2)); 2535 2536 switch (opc) { 2537 case INDEX_op_rotli_vec: 2538 t1 = tcg_temp_new_vec(type); 2539 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1)); 2540 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece, 2541 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2); 2542 tcg_temp_free_vec(t1); 2543 break; 2544 2545 case INDEX_op_shrv_vec: 2546 case INDEX_op_sarv_vec: 2547 /* Right shifts are negative left shifts for AArch64. */ 2548 t1 = tcg_temp_new_vec(type); 2549 tcg_gen_neg_vec(vece, t1, v2); 2550 opc = (opc == INDEX_op_shrv_vec 2551 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec); 2552 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0), 2553 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2554 tcg_temp_free_vec(t1); 2555 break; 2556 2557 case INDEX_op_rotlv_vec: 2558 t1 = tcg_temp_new_vec(type); 2559 tcg_gen_dupi_vec(vece, t1, 8 << vece); 2560 tcg_gen_sub_vec(vece, t1, v2, t1); 2561 /* Right shifts are negative left shifts for AArch64. */ 2562 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2563 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2564 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0), 2565 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 2566 tcg_gen_or_vec(vece, v0, v0, t1); 2567 tcg_temp_free_vec(t1); 2568 break; 2569 2570 case INDEX_op_rotrv_vec: 2571 t1 = tcg_temp_new_vec(type); 2572 t2 = tcg_temp_new_vec(type); 2573 tcg_gen_neg_vec(vece, t1, v2); 2574 tcg_gen_dupi_vec(vece, t2, 8 << vece); 2575 tcg_gen_add_vec(vece, t2, t1, t2); 2576 /* Right shifts are negative left shifts for AArch64. */ 2577 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2578 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2579 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2), 2580 tcgv_vec_arg(v1), tcgv_vec_arg(t2)); 2581 tcg_gen_or_vec(vece, v0, t1, t2); 2582 tcg_temp_free_vec(t1); 2583 tcg_temp_free_vec(t2); 2584 break; 2585 2586 default: 2587 g_assert_not_reached(); 2588 } 2589 2590 va_end(va); 2591} 2592 2593static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) 2594{ 2595 static const TCGTargetOpDef r = { .args_ct_str = { "r" } }; 2596 static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } }; 2597 static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } }; 2598 static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } }; 2599 static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } }; 2600 static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } }; 2601 static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } }; 2602 static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } }; 2603 static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } }; 2604 static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } }; 2605 static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } }; 2606 static const TCGTargetOpDef w_0_w = { .args_ct_str = { "w", "0", "w" } }; 2607 static const TCGTargetOpDef w_w_wO = { .args_ct_str = { "w", "w", "wO" } }; 2608 static const TCGTargetOpDef w_w_wN = { .args_ct_str = { "w", "w", "wN" } }; 2609 static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } }; 2610 static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } }; 2611 static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } }; 2612 static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } }; 2613 static const TCGTargetOpDef r_r_rAL 2614 = { .args_ct_str = { "r", "r", "rAL" } }; 2615 static const TCGTargetOpDef dep 2616 = { .args_ct_str = { "r", "0", "rZ" } }; 2617 static const TCGTargetOpDef ext2 2618 = { .args_ct_str = { "r", "rZ", "rZ" } }; 2619 static const TCGTargetOpDef movc 2620 = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } }; 2621 static const TCGTargetOpDef add2 2622 = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } }; 2623 static const TCGTargetOpDef w_w_w_w 2624 = { .args_ct_str = { "w", "w", "w", "w" } }; 2625 2626 switch (op) { 2627 case INDEX_op_goto_ptr: 2628 return &r; 2629 2630 case INDEX_op_ld8u_i32: 2631 case INDEX_op_ld8s_i32: 2632 case INDEX_op_ld16u_i32: 2633 case INDEX_op_ld16s_i32: 2634 case INDEX_op_ld_i32: 2635 case INDEX_op_ld8u_i64: 2636 case INDEX_op_ld8s_i64: 2637 case INDEX_op_ld16u_i64: 2638 case INDEX_op_ld16s_i64: 2639 case INDEX_op_ld32u_i64: 2640 case INDEX_op_ld32s_i64: 2641 case INDEX_op_ld_i64: 2642 case INDEX_op_neg_i32: 2643 case INDEX_op_neg_i64: 2644 case INDEX_op_not_i32: 2645 case INDEX_op_not_i64: 2646 case INDEX_op_bswap16_i32: 2647 case INDEX_op_bswap32_i32: 2648 case INDEX_op_bswap16_i64: 2649 case INDEX_op_bswap32_i64: 2650 case INDEX_op_bswap64_i64: 2651 case INDEX_op_ext8s_i32: 2652 case INDEX_op_ext16s_i32: 2653 case INDEX_op_ext8u_i32: 2654 case INDEX_op_ext16u_i32: 2655 case INDEX_op_ext8s_i64: 2656 case INDEX_op_ext16s_i64: 2657 case INDEX_op_ext32s_i64: 2658 case INDEX_op_ext8u_i64: 2659 case INDEX_op_ext16u_i64: 2660 case INDEX_op_ext32u_i64: 2661 case INDEX_op_ext_i32_i64: 2662 case INDEX_op_extu_i32_i64: 2663 case INDEX_op_extract_i32: 2664 case INDEX_op_extract_i64: 2665 case INDEX_op_sextract_i32: 2666 case INDEX_op_sextract_i64: 2667 return &r_r; 2668 2669 case INDEX_op_st8_i32: 2670 case INDEX_op_st16_i32: 2671 case INDEX_op_st_i32: 2672 case INDEX_op_st8_i64: 2673 case INDEX_op_st16_i64: 2674 case INDEX_op_st32_i64: 2675 case INDEX_op_st_i64: 2676 return &rZ_r; 2677 2678 case INDEX_op_add_i32: 2679 case INDEX_op_add_i64: 2680 case INDEX_op_sub_i32: 2681 case INDEX_op_sub_i64: 2682 case INDEX_op_setcond_i32: 2683 case INDEX_op_setcond_i64: 2684 return &r_r_rA; 2685 2686 case INDEX_op_mul_i32: 2687 case INDEX_op_mul_i64: 2688 case INDEX_op_div_i32: 2689 case INDEX_op_div_i64: 2690 case INDEX_op_divu_i32: 2691 case INDEX_op_divu_i64: 2692 case INDEX_op_rem_i32: 2693 case INDEX_op_rem_i64: 2694 case INDEX_op_remu_i32: 2695 case INDEX_op_remu_i64: 2696 case INDEX_op_muluh_i64: 2697 case INDEX_op_mulsh_i64: 2698 return &r_r_r; 2699 2700 case INDEX_op_and_i32: 2701 case INDEX_op_and_i64: 2702 case INDEX_op_or_i32: 2703 case INDEX_op_or_i64: 2704 case INDEX_op_xor_i32: 2705 case INDEX_op_xor_i64: 2706 case INDEX_op_andc_i32: 2707 case INDEX_op_andc_i64: 2708 case INDEX_op_orc_i32: 2709 case INDEX_op_orc_i64: 2710 case INDEX_op_eqv_i32: 2711 case INDEX_op_eqv_i64: 2712 return &r_r_rL; 2713 2714 case INDEX_op_shl_i32: 2715 case INDEX_op_shr_i32: 2716 case INDEX_op_sar_i32: 2717 case INDEX_op_rotl_i32: 2718 case INDEX_op_rotr_i32: 2719 case INDEX_op_shl_i64: 2720 case INDEX_op_shr_i64: 2721 case INDEX_op_sar_i64: 2722 case INDEX_op_rotl_i64: 2723 case INDEX_op_rotr_i64: 2724 return &r_r_ri; 2725 2726 case INDEX_op_clz_i32: 2727 case INDEX_op_ctz_i32: 2728 case INDEX_op_clz_i64: 2729 case INDEX_op_ctz_i64: 2730 return &r_r_rAL; 2731 2732 case INDEX_op_brcond_i32: 2733 case INDEX_op_brcond_i64: 2734 return &r_rA; 2735 2736 case INDEX_op_movcond_i32: 2737 case INDEX_op_movcond_i64: 2738 return &movc; 2739 2740 case INDEX_op_qemu_ld_i32: 2741 case INDEX_op_qemu_ld_i64: 2742 return &r_l; 2743 case INDEX_op_qemu_st_i32: 2744 case INDEX_op_qemu_st_i64: 2745 return &lZ_l; 2746 2747 case INDEX_op_deposit_i32: 2748 case INDEX_op_deposit_i64: 2749 return &dep; 2750 2751 case INDEX_op_extract2_i32: 2752 case INDEX_op_extract2_i64: 2753 return &ext2; 2754 2755 case INDEX_op_add2_i32: 2756 case INDEX_op_add2_i64: 2757 case INDEX_op_sub2_i32: 2758 case INDEX_op_sub2_i64: 2759 return &add2; 2760 2761 case INDEX_op_add_vec: 2762 case INDEX_op_sub_vec: 2763 case INDEX_op_mul_vec: 2764 case INDEX_op_xor_vec: 2765 case INDEX_op_ssadd_vec: 2766 case INDEX_op_sssub_vec: 2767 case INDEX_op_usadd_vec: 2768 case INDEX_op_ussub_vec: 2769 case INDEX_op_smax_vec: 2770 case INDEX_op_smin_vec: 2771 case INDEX_op_umax_vec: 2772 case INDEX_op_umin_vec: 2773 case INDEX_op_shlv_vec: 2774 case INDEX_op_shrv_vec: 2775 case INDEX_op_sarv_vec: 2776 case INDEX_op_aa64_sshl_vec: 2777 return &w_w_w; 2778 case INDEX_op_not_vec: 2779 case INDEX_op_neg_vec: 2780 case INDEX_op_abs_vec: 2781 case INDEX_op_shli_vec: 2782 case INDEX_op_shri_vec: 2783 case INDEX_op_sari_vec: 2784 return &w_w; 2785 case INDEX_op_ld_vec: 2786 case INDEX_op_st_vec: 2787 case INDEX_op_dupm_vec: 2788 return &w_r; 2789 case INDEX_op_dup_vec: 2790 return &w_wr; 2791 case INDEX_op_or_vec: 2792 case INDEX_op_andc_vec: 2793 return &w_w_wO; 2794 case INDEX_op_and_vec: 2795 case INDEX_op_orc_vec: 2796 return &w_w_wN; 2797 case INDEX_op_cmp_vec: 2798 return &w_w_wZ; 2799 case INDEX_op_bitsel_vec: 2800 return &w_w_w_w; 2801 case INDEX_op_aa64_sli_vec: 2802 return &w_0_w; 2803 2804 default: 2805 return NULL; 2806 } 2807} 2808 2809static void tcg_target_init(TCGContext *s) 2810{ 2811 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu; 2812 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu; 2813 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 2814 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 2815 2816 tcg_target_call_clobber_regs = -1ull; 2817 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19); 2818 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20); 2819 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21); 2820 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22); 2821 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23); 2822 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24); 2823 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25); 2824 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26); 2825 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27); 2826 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28); 2827 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29); 2828 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 2829 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 2830 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 2831 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 2832 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 2833 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 2834 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 2835 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 2836 2837 s->reserved_regs = 0; 2838 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 2839 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); 2840 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); 2841 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ 2842 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP); 2843} 2844 2845/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ 2846#define PUSH_SIZE ((30 - 19 + 1) * 8) 2847 2848#define FRAME_SIZE \ 2849 ((PUSH_SIZE \ 2850 + TCG_STATIC_CALL_ARGS_SIZE \ 2851 + CPU_TEMP_BUF_NLONGS * sizeof(long) \ 2852 + TCG_TARGET_STACK_ALIGN - 1) \ 2853 & ~(TCG_TARGET_STACK_ALIGN - 1)) 2854 2855/* We're expecting a 2 byte uleb128 encoded value. */ 2856QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 2857 2858/* We're expecting to use a single ADDI insn. */ 2859QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); 2860 2861static void tcg_target_qemu_prologue(TCGContext *s) 2862{ 2863 TCGReg r; 2864 2865 /* Push (FP, LR) and allocate space for all saved registers. */ 2866 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, 2867 TCG_REG_SP, -PUSH_SIZE, 1, 1); 2868 2869 /* Set up frame pointer for canonical unwinding. */ 2870 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); 2871 2872 /* Store callee-preserved regs x19..x28. */ 2873 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 2874 int ofs = (r - TCG_REG_X19 + 2) * 8; 2875 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 2876 } 2877 2878 /* Make stack space for TCG locals. */ 2879 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 2880 FRAME_SIZE - PUSH_SIZE); 2881 2882 /* Inform TCG about how to find TCG locals with register, offset, size. */ 2883 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, 2884 CPU_TEMP_BUF_NLONGS * sizeof(long)); 2885 2886#if !defined(CONFIG_SOFTMMU) 2887 if (USE_GUEST_BASE) { 2888 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); 2889 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); 2890 } 2891#endif 2892 2893 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 2894 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); 2895 2896 /* 2897 * Return path for goto_ptr. Set return value to 0, a-la exit_tb, 2898 * and fall through to the rest of the epilogue. 2899 */ 2900 s->code_gen_epilogue = s->code_ptr; 2901 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0); 2902 2903 /* TB epilogue */ 2904 tb_ret_addr = s->code_ptr; 2905 2906 /* Remove TCG locals stack space. */ 2907 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 2908 FRAME_SIZE - PUSH_SIZE); 2909 2910 /* Restore registers x19..x28. */ 2911 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 2912 int ofs = (r - TCG_REG_X19 + 2) * 8; 2913 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 2914 } 2915 2916 /* Pop (FP, LR), restore SP to previous frame. */ 2917 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, 2918 TCG_REG_SP, PUSH_SIZE, 0, 1); 2919 tcg_out_insn(s, 3207, RET, TCG_REG_LR); 2920} 2921 2922static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 2923{ 2924 int i; 2925 for (i = 0; i < count; ++i) { 2926 p[i] = NOP; 2927 } 2928} 2929 2930typedef struct { 2931 DebugFrameHeader h; 2932 uint8_t fde_def_cfa[4]; 2933 uint8_t fde_reg_ofs[24]; 2934} DebugFrame; 2935 2936#define ELF_HOST_MACHINE EM_AARCH64 2937 2938static const DebugFrame debug_frame = { 2939 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 2940 .h.cie.id = -1, 2941 .h.cie.version = 1, 2942 .h.cie.code_align = 1, 2943 .h.cie.data_align = 0x78, /* sleb128 -8 */ 2944 .h.cie.return_column = TCG_REG_LR, 2945 2946 /* Total FDE size does not include the "len" member. */ 2947 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 2948 2949 .fde_def_cfa = { 2950 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 2951 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 2952 (FRAME_SIZE >> 7) 2953 }, 2954 .fde_reg_ofs = { 2955 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ 2956 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ 2957 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ 2958 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ 2959 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ 2960 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ 2961 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ 2962 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ 2963 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ 2964 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ 2965 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ 2966 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ 2967 } 2968}; 2969 2970void tcg_register_jit(void *buf, size_t buf_size) 2971{ 2972 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 2973} 2974