1/* 2 * Initial TCG Implementation for aarch64 3 * 4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH 5 * Written by Claudio Fontana 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or 8 * (at your option) any later version. 9 * 10 * See the COPYING file in the top-level directory for details. 11 */ 12 13#include "../tcg-pool.c.inc" 14#include "qemu/bitops.h" 15 16/* We're going to re-use TCGType in setting of the SF bit, which controls 17 the size of the operation performed. If we know the values match, it 18 makes things much cleaner. */ 19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); 20 21#ifdef CONFIG_DEBUG_TCG 22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 23 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 24 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 25 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 26 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp", 27 28 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 29 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 30 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 31 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31", 32}; 33#endif /* CONFIG_DEBUG_TCG */ 34 35static const int tcg_target_reg_alloc_order[] = { 36 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, 37 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, 38 TCG_REG_X28, /* we will reserve this for guest_base if configured */ 39 40 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, 41 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, 42 TCG_REG_X16, TCG_REG_X17, 43 44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, 46 47 /* X18 reserved by system */ 48 /* X19 reserved for AREG0 */ 49 /* X29 reserved as fp */ 50 /* X30 reserved as temporary */ 51 52 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, 53 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, 54 /* V8 - V15 are call-saved, and skipped. */ 55 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, 56 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, 57 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, 58 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, 59}; 60 61static const int tcg_target_call_iarg_regs[8] = { 62 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 63 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 64}; 65static const int tcg_target_call_oarg_regs[1] = { 66 TCG_REG_X0 67}; 68 69#define TCG_REG_TMP TCG_REG_X30 70#define TCG_VEC_TMP TCG_REG_V31 71 72#ifndef CONFIG_SOFTMMU 73/* Note that XZR cannot be encoded in the address base register slot, 74 as that actaully encodes SP. So if we need to zero-extend the guest 75 address, via the address index register slot, we need to load even 76 a zero guest base into a register. */ 77#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32) 78#define TCG_REG_GUEST_BASE TCG_REG_X28 79#endif 80 81static inline bool reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target) 82{ 83 ptrdiff_t offset = target - code_ptr; 84 if (offset == sextract64(offset, 0, 26)) { 85 /* read instruction, mask away previous PC_REL26 parameter contents, 86 set the proper offset, then write back the instruction. */ 87 *code_ptr = deposit32(*code_ptr, 0, 26, offset); 88 return true; 89 } 90 return false; 91} 92 93static inline bool reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target) 94{ 95 ptrdiff_t offset = target - code_ptr; 96 if (offset == sextract64(offset, 0, 19)) { 97 *code_ptr = deposit32(*code_ptr, 5, 19, offset); 98 return true; 99 } 100 return false; 101} 102 103static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type, 104 intptr_t value, intptr_t addend) 105{ 106 tcg_debug_assert(addend == 0); 107 switch (type) { 108 case R_AARCH64_JUMP26: 109 case R_AARCH64_CALL26: 110 return reloc_pc26(code_ptr, (tcg_insn_unit *)value); 111 case R_AARCH64_CONDBR19: 112 return reloc_pc19(code_ptr, (tcg_insn_unit *)value); 113 default: 114 g_assert_not_reached(); 115 } 116} 117 118#define TCG_CT_CONST_AIMM 0x100 119#define TCG_CT_CONST_LIMM 0x200 120#define TCG_CT_CONST_ZERO 0x400 121#define TCG_CT_CONST_MONE 0x800 122#define TCG_CT_CONST_ORRI 0x1000 123#define TCG_CT_CONST_ANDI 0x2000 124 125/* parse target specific constraints */ 126static const char *target_parse_constraint(TCGArgConstraint *ct, 127 const char *ct_str, TCGType type) 128{ 129 switch (*ct_str++) { 130 case 'r': /* general registers */ 131 ct->regs |= 0xffffffffu; 132 break; 133 case 'w': /* advsimd registers */ 134 ct->regs |= 0xffffffff00000000ull; 135 break; 136 case 'l': /* qemu_ld / qemu_st address, data_reg */ 137 ct->regs = 0xffffffffu; 138#ifdef CONFIG_SOFTMMU 139 /* x0 and x1 will be overwritten when reading the tlb entry, 140 and x2, and x3 for helper args, better to avoid using them. */ 141 tcg_regset_reset_reg(ct->regs, TCG_REG_X0); 142 tcg_regset_reset_reg(ct->regs, TCG_REG_X1); 143 tcg_regset_reset_reg(ct->regs, TCG_REG_X2); 144 tcg_regset_reset_reg(ct->regs, TCG_REG_X3); 145#endif 146 break; 147 case 'A': /* Valid for arithmetic immediate (positive or negative). */ 148 ct->ct |= TCG_CT_CONST_AIMM; 149 break; 150 case 'L': /* Valid for logical immediate. */ 151 ct->ct |= TCG_CT_CONST_LIMM; 152 break; 153 case 'M': /* minus one */ 154 ct->ct |= TCG_CT_CONST_MONE; 155 break; 156 case 'O': /* vector orr/bic immediate */ 157 ct->ct |= TCG_CT_CONST_ORRI; 158 break; 159 case 'N': /* vector orr/bic immediate, inverted */ 160 ct->ct |= TCG_CT_CONST_ANDI; 161 break; 162 case 'Z': /* zero */ 163 ct->ct |= TCG_CT_CONST_ZERO; 164 break; 165 default: 166 return NULL; 167 } 168 return ct_str; 169} 170 171/* Match a constant valid for addition (12-bit, optionally shifted). */ 172static inline bool is_aimm(uint64_t val) 173{ 174 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; 175} 176 177/* Match a constant valid for logical operations. */ 178static inline bool is_limm(uint64_t val) 179{ 180 /* Taking a simplified view of the logical immediates for now, ignoring 181 the replication that can happen across the field. Match bit patterns 182 of the forms 183 0....01....1 184 0..01..10..0 185 and their inverses. */ 186 187 /* Make things easier below, by testing the form with msb clear. */ 188 if ((int64_t)val < 0) { 189 val = ~val; 190 } 191 if (val == 0) { 192 return false; 193 } 194 val += val & -val; 195 return (val & (val - 1)) == 0; 196} 197 198/* Return true if v16 is a valid 16-bit shifted immediate. */ 199static bool is_shimm16(uint16_t v16, int *cmode, int *imm8) 200{ 201 if (v16 == (v16 & 0xff)) { 202 *cmode = 0x8; 203 *imm8 = v16 & 0xff; 204 return true; 205 } else if (v16 == (v16 & 0xff00)) { 206 *cmode = 0xa; 207 *imm8 = v16 >> 8; 208 return true; 209 } 210 return false; 211} 212 213/* Return true if v32 is a valid 32-bit shifted immediate. */ 214static bool is_shimm32(uint32_t v32, int *cmode, int *imm8) 215{ 216 if (v32 == (v32 & 0xff)) { 217 *cmode = 0x0; 218 *imm8 = v32 & 0xff; 219 return true; 220 } else if (v32 == (v32 & 0xff00)) { 221 *cmode = 0x2; 222 *imm8 = (v32 >> 8) & 0xff; 223 return true; 224 } else if (v32 == (v32 & 0xff0000)) { 225 *cmode = 0x4; 226 *imm8 = (v32 >> 16) & 0xff; 227 return true; 228 } else if (v32 == (v32 & 0xff000000)) { 229 *cmode = 0x6; 230 *imm8 = v32 >> 24; 231 return true; 232 } 233 return false; 234} 235 236/* Return true if v32 is a valid 32-bit shifting ones immediate. */ 237static bool is_soimm32(uint32_t v32, int *cmode, int *imm8) 238{ 239 if ((v32 & 0xffff00ff) == 0xff) { 240 *cmode = 0xc; 241 *imm8 = (v32 >> 8) & 0xff; 242 return true; 243 } else if ((v32 & 0xff00ffff) == 0xffff) { 244 *cmode = 0xd; 245 *imm8 = (v32 >> 16) & 0xff; 246 return true; 247 } 248 return false; 249} 250 251/* Return true if v32 is a valid float32 immediate. */ 252static bool is_fimm32(uint32_t v32, int *cmode, int *imm8) 253{ 254 if (extract32(v32, 0, 19) == 0 255 && (extract32(v32, 25, 6) == 0x20 256 || extract32(v32, 25, 6) == 0x1f)) { 257 *cmode = 0xf; 258 *imm8 = (extract32(v32, 31, 1) << 7) 259 | (extract32(v32, 25, 1) << 6) 260 | extract32(v32, 19, 6); 261 return true; 262 } 263 return false; 264} 265 266/* Return true if v64 is a valid float64 immediate. */ 267static bool is_fimm64(uint64_t v64, int *cmode, int *imm8) 268{ 269 if (extract64(v64, 0, 48) == 0 270 && (extract64(v64, 54, 9) == 0x100 271 || extract64(v64, 54, 9) == 0x0ff)) { 272 *cmode = 0xf; 273 *imm8 = (extract64(v64, 63, 1) << 7) 274 | (extract64(v64, 54, 1) << 6) 275 | extract64(v64, 48, 6); 276 return true; 277 } 278 return false; 279} 280 281/* 282 * Return non-zero if v32 can be formed by MOVI+ORR. 283 * Place the parameters for MOVI in (cmode, imm8). 284 * Return the cmode for ORR; the imm8 can be had via extraction from v32. 285 */ 286static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8) 287{ 288 int i; 289 290 for (i = 6; i > 0; i -= 2) { 291 /* Mask out one byte we can add with ORR. */ 292 uint32_t tmp = v32 & ~(0xffu << (i * 4)); 293 if (is_shimm32(tmp, cmode, imm8) || 294 is_soimm32(tmp, cmode, imm8)) { 295 break; 296 } 297 } 298 return i; 299} 300 301/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */ 302static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8) 303{ 304 if (v32 == deposit32(v32, 16, 16, v32)) { 305 return is_shimm16(v32, cmode, imm8); 306 } else { 307 return is_shimm32(v32, cmode, imm8); 308 } 309} 310 311static int tcg_target_const_match(tcg_target_long val, TCGType type, 312 const TCGArgConstraint *arg_ct) 313{ 314 int ct = arg_ct->ct; 315 316 if (ct & TCG_CT_CONST) { 317 return 1; 318 } 319 if (type == TCG_TYPE_I32) { 320 val = (int32_t)val; 321 } 322 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { 323 return 1; 324 } 325 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { 326 return 1; 327 } 328 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 329 return 1; 330 } 331 if ((ct & TCG_CT_CONST_MONE) && val == -1) { 332 return 1; 333 } 334 335 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) { 336 case 0: 337 break; 338 case TCG_CT_CONST_ANDI: 339 val = ~val; 340 /* fallthru */ 341 case TCG_CT_CONST_ORRI: 342 if (val == deposit64(val, 32, 32, val)) { 343 int cmode, imm8; 344 return is_shimm1632(val, &cmode, &imm8); 345 } 346 break; 347 default: 348 /* Both bits should not be set for the same insn. */ 349 g_assert_not_reached(); 350 } 351 352 return 0; 353} 354 355enum aarch64_cond_code { 356 COND_EQ = 0x0, 357 COND_NE = 0x1, 358 COND_CS = 0x2, /* Unsigned greater or equal */ 359 COND_HS = COND_CS, /* ALIAS greater or equal */ 360 COND_CC = 0x3, /* Unsigned less than */ 361 COND_LO = COND_CC, /* ALIAS Lower */ 362 COND_MI = 0x4, /* Negative */ 363 COND_PL = 0x5, /* Zero or greater */ 364 COND_VS = 0x6, /* Overflow */ 365 COND_VC = 0x7, /* No overflow */ 366 COND_HI = 0x8, /* Unsigned greater than */ 367 COND_LS = 0x9, /* Unsigned less or equal */ 368 COND_GE = 0xa, 369 COND_LT = 0xb, 370 COND_GT = 0xc, 371 COND_LE = 0xd, 372 COND_AL = 0xe, 373 COND_NV = 0xf, /* behaves like COND_AL here */ 374}; 375 376static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { 377 [TCG_COND_EQ] = COND_EQ, 378 [TCG_COND_NE] = COND_NE, 379 [TCG_COND_LT] = COND_LT, 380 [TCG_COND_GE] = COND_GE, 381 [TCG_COND_LE] = COND_LE, 382 [TCG_COND_GT] = COND_GT, 383 /* unsigned */ 384 [TCG_COND_LTU] = COND_LO, 385 [TCG_COND_GTU] = COND_HI, 386 [TCG_COND_GEU] = COND_HS, 387 [TCG_COND_LEU] = COND_LS, 388}; 389 390typedef enum { 391 LDST_ST = 0, /* store */ 392 LDST_LD = 1, /* load */ 393 LDST_LD_S_X = 2, /* load and sign-extend into Xt */ 394 LDST_LD_S_W = 3, /* load and sign-extend into Wt */ 395} AArch64LdstType; 396 397/* We encode the format of the insn into the beginning of the name, so that 398 we can have the preprocessor help "typecheck" the insn vs the output 399 function. Arm didn't provide us with nice names for the formats, so we 400 use the section number of the architecture reference manual in which the 401 instruction group is described. */ 402typedef enum { 403 /* Compare and branch (immediate). */ 404 I3201_CBZ = 0x34000000, 405 I3201_CBNZ = 0x35000000, 406 407 /* Conditional branch (immediate). */ 408 I3202_B_C = 0x54000000, 409 410 /* Unconditional branch (immediate). */ 411 I3206_B = 0x14000000, 412 I3206_BL = 0x94000000, 413 414 /* Unconditional branch (register). */ 415 I3207_BR = 0xd61f0000, 416 I3207_BLR = 0xd63f0000, 417 I3207_RET = 0xd65f0000, 418 419 /* AdvSIMD load/store single structure. */ 420 I3303_LD1R = 0x0d40c000, 421 422 /* Load literal for loading the address at pc-relative offset */ 423 I3305_LDR = 0x58000000, 424 I3305_LDR_v64 = 0x5c000000, 425 I3305_LDR_v128 = 0x9c000000, 426 427 /* Load/store register. Described here as 3.3.12, but the helper 428 that emits them can transform to 3.3.10 or 3.3.13. */ 429 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, 430 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, 431 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, 432 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, 433 434 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, 435 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, 436 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, 437 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, 438 439 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, 440 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, 441 442 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, 443 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, 444 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, 445 446 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30, 447 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30, 448 449 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30, 450 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30, 451 452 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30, 453 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30, 454 455 I3312_TO_I3310 = 0x00200800, 456 I3312_TO_I3313 = 0x01000000, 457 458 /* Load/store register pair instructions. */ 459 I3314_LDP = 0x28400000, 460 I3314_STP = 0x28000000, 461 462 /* Add/subtract immediate instructions. */ 463 I3401_ADDI = 0x11000000, 464 I3401_ADDSI = 0x31000000, 465 I3401_SUBI = 0x51000000, 466 I3401_SUBSI = 0x71000000, 467 468 /* Bitfield instructions. */ 469 I3402_BFM = 0x33000000, 470 I3402_SBFM = 0x13000000, 471 I3402_UBFM = 0x53000000, 472 473 /* Extract instruction. */ 474 I3403_EXTR = 0x13800000, 475 476 /* Logical immediate instructions. */ 477 I3404_ANDI = 0x12000000, 478 I3404_ORRI = 0x32000000, 479 I3404_EORI = 0x52000000, 480 481 /* Move wide immediate instructions. */ 482 I3405_MOVN = 0x12800000, 483 I3405_MOVZ = 0x52800000, 484 I3405_MOVK = 0x72800000, 485 486 /* PC relative addressing instructions. */ 487 I3406_ADR = 0x10000000, 488 I3406_ADRP = 0x90000000, 489 490 /* Add/subtract shifted register instructions (without a shift). */ 491 I3502_ADD = 0x0b000000, 492 I3502_ADDS = 0x2b000000, 493 I3502_SUB = 0x4b000000, 494 I3502_SUBS = 0x6b000000, 495 496 /* Add/subtract shifted register instructions (with a shift). */ 497 I3502S_ADD_LSL = I3502_ADD, 498 499 /* Add/subtract with carry instructions. */ 500 I3503_ADC = 0x1a000000, 501 I3503_SBC = 0x5a000000, 502 503 /* Conditional select instructions. */ 504 I3506_CSEL = 0x1a800000, 505 I3506_CSINC = 0x1a800400, 506 I3506_CSINV = 0x5a800000, 507 I3506_CSNEG = 0x5a800400, 508 509 /* Data-processing (1 source) instructions. */ 510 I3507_CLZ = 0x5ac01000, 511 I3507_RBIT = 0x5ac00000, 512 I3507_REV16 = 0x5ac00400, 513 I3507_REV32 = 0x5ac00800, 514 I3507_REV64 = 0x5ac00c00, 515 516 /* Data-processing (2 source) instructions. */ 517 I3508_LSLV = 0x1ac02000, 518 I3508_LSRV = 0x1ac02400, 519 I3508_ASRV = 0x1ac02800, 520 I3508_RORV = 0x1ac02c00, 521 I3508_SMULH = 0x9b407c00, 522 I3508_UMULH = 0x9bc07c00, 523 I3508_UDIV = 0x1ac00800, 524 I3508_SDIV = 0x1ac00c00, 525 526 /* Data-processing (3 source) instructions. */ 527 I3509_MADD = 0x1b000000, 528 I3509_MSUB = 0x1b008000, 529 530 /* Logical shifted register instructions (without a shift). */ 531 I3510_AND = 0x0a000000, 532 I3510_BIC = 0x0a200000, 533 I3510_ORR = 0x2a000000, 534 I3510_ORN = 0x2a200000, 535 I3510_EOR = 0x4a000000, 536 I3510_EON = 0x4a200000, 537 I3510_ANDS = 0x6a000000, 538 539 /* Logical shifted register instructions (with a shift). */ 540 I3502S_AND_LSR = I3510_AND | (1 << 22), 541 542 /* AdvSIMD copy */ 543 I3605_DUP = 0x0e000400, 544 I3605_INS = 0x4e001c00, 545 I3605_UMOV = 0x0e003c00, 546 547 /* AdvSIMD modified immediate */ 548 I3606_MOVI = 0x0f000400, 549 I3606_MVNI = 0x2f000400, 550 I3606_BIC = 0x2f001400, 551 I3606_ORR = 0x0f001400, 552 553 /* AdvSIMD shift by immediate */ 554 I3614_SSHR = 0x0f000400, 555 I3614_SSRA = 0x0f001400, 556 I3614_SHL = 0x0f005400, 557 I3614_SLI = 0x2f005400, 558 I3614_USHR = 0x2f000400, 559 I3614_USRA = 0x2f001400, 560 561 /* AdvSIMD three same. */ 562 I3616_ADD = 0x0e208400, 563 I3616_AND = 0x0e201c00, 564 I3616_BIC = 0x0e601c00, 565 I3616_BIF = 0x2ee01c00, 566 I3616_BIT = 0x2ea01c00, 567 I3616_BSL = 0x2e601c00, 568 I3616_EOR = 0x2e201c00, 569 I3616_MUL = 0x0e209c00, 570 I3616_ORR = 0x0ea01c00, 571 I3616_ORN = 0x0ee01c00, 572 I3616_SUB = 0x2e208400, 573 I3616_CMGT = 0x0e203400, 574 I3616_CMGE = 0x0e203c00, 575 I3616_CMTST = 0x0e208c00, 576 I3616_CMHI = 0x2e203400, 577 I3616_CMHS = 0x2e203c00, 578 I3616_CMEQ = 0x2e208c00, 579 I3616_SMAX = 0x0e206400, 580 I3616_SMIN = 0x0e206c00, 581 I3616_SSHL = 0x0e204400, 582 I3616_SQADD = 0x0e200c00, 583 I3616_SQSUB = 0x0e202c00, 584 I3616_UMAX = 0x2e206400, 585 I3616_UMIN = 0x2e206c00, 586 I3616_UQADD = 0x2e200c00, 587 I3616_UQSUB = 0x2e202c00, 588 I3616_USHL = 0x2e204400, 589 590 /* AdvSIMD two-reg misc. */ 591 I3617_CMGT0 = 0x0e208800, 592 I3617_CMEQ0 = 0x0e209800, 593 I3617_CMLT0 = 0x0e20a800, 594 I3617_CMGE0 = 0x2e208800, 595 I3617_CMLE0 = 0x2e20a800, 596 I3617_NOT = 0x2e205800, 597 I3617_ABS = 0x0e20b800, 598 I3617_NEG = 0x2e20b800, 599 600 /* System instructions. */ 601 NOP = 0xd503201f, 602 DMB_ISH = 0xd50338bf, 603 DMB_LD = 0x00000100, 604 DMB_ST = 0x00000200, 605} AArch64Insn; 606 607static inline uint32_t tcg_in32(TCGContext *s) 608{ 609 uint32_t v = *(uint32_t *)s->code_ptr; 610 return v; 611} 612 613/* Emit an opcode with "type-checking" of the format. */ 614#define tcg_out_insn(S, FMT, OP, ...) \ 615 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) 616 617static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q, 618 TCGReg rt, TCGReg rn, unsigned size) 619{ 620 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30)); 621} 622 623static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, 624 int imm19, TCGReg rt) 625{ 626 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt); 627} 628 629static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, 630 TCGReg rt, int imm19) 631{ 632 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); 633} 634 635static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, 636 TCGCond c, int imm19) 637{ 638 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); 639} 640 641static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) 642{ 643 tcg_out32(s, insn | (imm26 & 0x03ffffff)); 644} 645 646static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) 647{ 648 tcg_out32(s, insn | rn << 5); 649} 650 651static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, 652 TCGReg r1, TCGReg r2, TCGReg rn, 653 tcg_target_long ofs, bool pre, bool w) 654{ 655 insn |= 1u << 31; /* ext */ 656 insn |= pre << 24; 657 insn |= w << 23; 658 659 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); 660 insn |= (ofs & (0x7f << 3)) << (15 - 3); 661 662 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); 663} 664 665static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, 666 TCGReg rd, TCGReg rn, uint64_t aimm) 667{ 668 if (aimm > 0xfff) { 669 tcg_debug_assert((aimm & 0xfff) == 0); 670 aimm >>= 12; 671 tcg_debug_assert(aimm <= 0xfff); 672 aimm |= 1 << 12; /* apply LSL 12 */ 673 } 674 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); 675} 676 677/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 678 (Logical immediate). Both insn groups have N, IMMR and IMMS fields 679 that feed the DecodeBitMasks pseudo function. */ 680static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, 681 TCGReg rd, TCGReg rn, int n, int immr, int imms) 682{ 683 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 684 | rn << 5 | rd); 685} 686 687#define tcg_out_insn_3404 tcg_out_insn_3402 688 689static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, 690 TCGReg rd, TCGReg rn, TCGReg rm, int imms) 691{ 692 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 693 | rn << 5 | rd); 694} 695 696/* This function is used for the Move (wide immediate) instruction group. 697 Note that SHIFT is a full shift count, not the 2 bit HW field. */ 698static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, 699 TCGReg rd, uint16_t half, unsigned shift) 700{ 701 tcg_debug_assert((shift & ~0x30) == 0); 702 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); 703} 704 705static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, 706 TCGReg rd, int64_t disp) 707{ 708 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); 709} 710 711/* This function is for both 3.5.2 (Add/Subtract shifted register), for 712 the rare occasion when we actually want to supply a shift amount. */ 713static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, 714 TCGType ext, TCGReg rd, TCGReg rn, 715 TCGReg rm, int imm6) 716{ 717 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); 718} 719 720/* This function is for 3.5.2 (Add/subtract shifted register), 721 and 3.5.10 (Logical shifted register), for the vast majorty of cases 722 when we don't want to apply a shift. Thus it can also be used for 723 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */ 724static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, 725 TCGReg rd, TCGReg rn, TCGReg rm) 726{ 727 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); 728} 729 730#define tcg_out_insn_3503 tcg_out_insn_3502 731#define tcg_out_insn_3508 tcg_out_insn_3502 732#define tcg_out_insn_3510 tcg_out_insn_3502 733 734static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, 735 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) 736{ 737 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd 738 | tcg_cond_to_aarch64[c] << 12); 739} 740 741static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, 742 TCGReg rd, TCGReg rn) 743{ 744 tcg_out32(s, insn | ext << 31 | rn << 5 | rd); 745} 746 747static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, 748 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) 749{ 750 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); 751} 752 753static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q, 754 TCGReg rd, TCGReg rn, int dst_idx, int src_idx) 755{ 756 /* Note that bit 11 set means general register input. Therefore 757 we can handle both register sets with one function. */ 758 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11) 759 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5); 760} 761 762static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q, 763 TCGReg rd, bool op, int cmode, uint8_t imm8) 764{ 765 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f) 766 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5); 767} 768 769static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q, 770 TCGReg rd, TCGReg rn, unsigned immhb) 771{ 772 tcg_out32(s, insn | q << 30 | immhb << 16 773 | (rn & 0x1f) << 5 | (rd & 0x1f)); 774} 775 776static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q, 777 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 778{ 779 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16 780 | (rn & 0x1f) << 5 | (rd & 0x1f)); 781} 782 783static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q, 784 unsigned size, TCGReg rd, TCGReg rn) 785{ 786 tcg_out32(s, insn | q << 30 | (size << 22) 787 | (rn & 0x1f) << 5 | (rd & 0x1f)); 788} 789 790static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, 791 TCGReg rd, TCGReg base, TCGType ext, 792 TCGReg regoff) 793{ 794 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 795 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | 796 0x4000 | ext << 13 | base << 5 | (rd & 0x1f)); 797} 798 799static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, 800 TCGReg rd, TCGReg rn, intptr_t offset) 801{ 802 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f)); 803} 804 805static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, 806 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) 807{ 808 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 809 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 810 | rn << 5 | (rd & 0x1f)); 811} 812 813/* Register to register move using ORR (shifted register with no shift). */ 814static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) 815{ 816 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); 817} 818 819/* Register to register move using ADDI (move to/from SP). */ 820static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) 821{ 822 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); 823} 824 825/* This function is used for the Logical (immediate) instruction group. 826 The value of LIMM must satisfy IS_LIMM. See the comment above about 827 only supporting simplified logical immediates. */ 828static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, 829 TCGReg rd, TCGReg rn, uint64_t limm) 830{ 831 unsigned h, l, r, c; 832 833 tcg_debug_assert(is_limm(limm)); 834 835 h = clz64(limm); 836 l = ctz64(limm); 837 if (l == 0) { 838 r = 0; /* form 0....01....1 */ 839 c = ctz64(~limm) - 1; 840 if (h == 0) { 841 r = clz64(~limm); /* form 1..10..01..1 */ 842 c += r; 843 } 844 } else { 845 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ 846 c = r - h - 1; 847 } 848 if (ext == TCG_TYPE_I32) { 849 r &= 31; 850 c &= 31; 851 } 852 853 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); 854} 855 856static void tcg_out_dupi_vec(TCGContext *s, TCGType type, 857 TCGReg rd, tcg_target_long v64) 858{ 859 bool q = type == TCG_TYPE_V128; 860 int cmode, imm8, i; 861 862 /* Test all bytes equal first. */ 863 if (v64 == dup_const(MO_8, v64)) { 864 imm8 = (uint8_t)v64; 865 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8); 866 return; 867 } 868 869 /* 870 * Test all bytes 0x00 or 0xff second. This can match cases that 871 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below. 872 */ 873 for (i = imm8 = 0; i < 8; i++) { 874 uint8_t byte = v64 >> (i * 8); 875 if (byte == 0xff) { 876 imm8 |= 1 << i; 877 } else if (byte != 0) { 878 goto fail_bytes; 879 } 880 } 881 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8); 882 return; 883 fail_bytes: 884 885 /* 886 * Tests for various replications. For each element width, if we 887 * cannot find an expansion there's no point checking a larger 888 * width because we already know by replication it cannot match. 889 */ 890 if (v64 == dup_const(MO_16, v64)) { 891 uint16_t v16 = v64; 892 893 if (is_shimm16(v16, &cmode, &imm8)) { 894 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 895 return; 896 } 897 if (is_shimm16(~v16, &cmode, &imm8)) { 898 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 899 return; 900 } 901 902 /* 903 * Otherwise, all remaining constants can be loaded in two insns: 904 * rd = v16 & 0xff, rd |= v16 & 0xff00. 905 */ 906 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff); 907 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8); 908 return; 909 } else if (v64 == dup_const(MO_32, v64)) { 910 uint32_t v32 = v64; 911 uint32_t n32 = ~v32; 912 913 if (is_shimm32(v32, &cmode, &imm8) || 914 is_soimm32(v32, &cmode, &imm8) || 915 is_fimm32(v32, &cmode, &imm8)) { 916 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 917 return; 918 } 919 if (is_shimm32(n32, &cmode, &imm8) || 920 is_soimm32(n32, &cmode, &imm8)) { 921 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 922 return; 923 } 924 925 /* 926 * Restrict the set of constants to those we can load with 927 * two instructions. Others we load from the pool. 928 */ 929 i = is_shimm32_pair(v32, &cmode, &imm8); 930 if (i) { 931 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 932 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8)); 933 return; 934 } 935 i = is_shimm32_pair(n32, &cmode, &imm8); 936 if (i) { 937 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 938 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8)); 939 return; 940 } 941 } else if (is_fimm64(v64, &cmode, &imm8)) { 942 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8); 943 return; 944 } 945 946 /* 947 * As a last resort, load from the constant pool. Sadly there 948 * is no LD1R (literal), so store the full 16-byte vector. 949 */ 950 if (type == TCG_TYPE_V128) { 951 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64); 952 tcg_out_insn(s, 3305, LDR_v128, 0, rd); 953 } else { 954 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0); 955 tcg_out_insn(s, 3305, LDR_v64, 0, rd); 956 } 957} 958 959static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 960 TCGReg rd, TCGReg rs) 961{ 962 int is_q = type - TCG_TYPE_V64; 963 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0); 964 return true; 965} 966 967static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 968 TCGReg r, TCGReg base, intptr_t offset) 969{ 970 TCGReg temp = TCG_REG_TMP; 971 972 if (offset < -0xffffff || offset > 0xffffff) { 973 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset); 974 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base); 975 base = temp; 976 } else { 977 AArch64Insn add_insn = I3401_ADDI; 978 979 if (offset < 0) { 980 add_insn = I3401_SUBI; 981 offset = -offset; 982 } 983 if (offset & 0xfff000) { 984 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000); 985 base = temp; 986 } 987 if (offset & 0xfff) { 988 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff); 989 base = temp; 990 } 991 } 992 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece); 993 return true; 994} 995 996static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 997 tcg_target_long value) 998{ 999 tcg_target_long svalue = value; 1000 tcg_target_long ivalue = ~value; 1001 tcg_target_long t0, t1, t2; 1002 int s0, s1; 1003 AArch64Insn opc; 1004 1005 switch (type) { 1006 case TCG_TYPE_I32: 1007 case TCG_TYPE_I64: 1008 tcg_debug_assert(rd < 32); 1009 break; 1010 1011 case TCG_TYPE_V64: 1012 case TCG_TYPE_V128: 1013 tcg_debug_assert(rd >= 32); 1014 tcg_out_dupi_vec(s, type, rd, value); 1015 return; 1016 1017 default: 1018 g_assert_not_reached(); 1019 } 1020 1021 /* For 32-bit values, discard potential garbage in value. For 64-bit 1022 values within [2**31, 2**32-1], we can create smaller sequences by 1023 interpreting this as a negative 32-bit number, while ensuring that 1024 the high 32 bits are cleared by setting SF=0. */ 1025 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { 1026 svalue = (int32_t)value; 1027 value = (uint32_t)value; 1028 ivalue = (uint32_t)ivalue; 1029 type = TCG_TYPE_I32; 1030 } 1031 1032 /* Speed things up by handling the common case of small positive 1033 and negative values specially. */ 1034 if ((value & ~0xffffull) == 0) { 1035 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); 1036 return; 1037 } else if ((ivalue & ~0xffffull) == 0) { 1038 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); 1039 return; 1040 } 1041 1042 /* Check for bitfield immediates. For the benefit of 32-bit quantities, 1043 use the sign-extended value. That lets us match rotated values such 1044 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ 1045 if (is_limm(svalue)) { 1046 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); 1047 return; 1048 } 1049 1050 /* Look for host pointer values within 4G of the PC. This happens 1051 often when loading pointers to QEMU's own data structures. */ 1052 if (type == TCG_TYPE_I64) { 1053 tcg_target_long disp = value - (intptr_t)s->code_ptr; 1054 if (disp == sextract64(disp, 0, 21)) { 1055 tcg_out_insn(s, 3406, ADR, rd, disp); 1056 return; 1057 } 1058 disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12); 1059 if (disp == sextract64(disp, 0, 21)) { 1060 tcg_out_insn(s, 3406, ADRP, rd, disp); 1061 if (value & 0xfff) { 1062 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); 1063 } 1064 return; 1065 } 1066 } 1067 1068 /* Would it take fewer insns to begin with MOVN? */ 1069 if (ctpop64(value) >= 32) { 1070 t0 = ivalue; 1071 opc = I3405_MOVN; 1072 } else { 1073 t0 = value; 1074 opc = I3405_MOVZ; 1075 } 1076 s0 = ctz64(t0) & (63 & -16); 1077 t1 = t0 & ~(0xffffUL << s0); 1078 s1 = ctz64(t1) & (63 & -16); 1079 t2 = t1 & ~(0xffffUL << s1); 1080 if (t2 == 0) { 1081 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0); 1082 if (t1 != 0) { 1083 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1); 1084 } 1085 return; 1086 } 1087 1088 /* For more than 2 insns, dump it into the constant pool. */ 1089 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0); 1090 tcg_out_insn(s, 3305, LDR, 0, rd); 1091} 1092 1093/* Define something more legible for general use. */ 1094#define tcg_out_ldst_r tcg_out_insn_3310 1095 1096static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd, 1097 TCGReg rn, intptr_t offset, int lgsize) 1098{ 1099 /* If the offset is naturally aligned and in range, then we can 1100 use the scaled uimm12 encoding */ 1101 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) { 1102 uintptr_t scaled_uimm = offset >> lgsize; 1103 if (scaled_uimm <= 0xfff) { 1104 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); 1105 return; 1106 } 1107 } 1108 1109 /* Small signed offsets can use the unscaled encoding. */ 1110 if (offset >= -256 && offset < 256) { 1111 tcg_out_insn_3312(s, insn, rd, rn, offset); 1112 return; 1113 } 1114 1115 /* Worst-case scenario, move offset to temp register, use reg offset. */ 1116 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset); 1117 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP); 1118} 1119 1120static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 1121{ 1122 if (ret == arg) { 1123 return true; 1124 } 1125 switch (type) { 1126 case TCG_TYPE_I32: 1127 case TCG_TYPE_I64: 1128 if (ret < 32 && arg < 32) { 1129 tcg_out_movr(s, type, ret, arg); 1130 break; 1131 } else if (ret < 32) { 1132 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0); 1133 break; 1134 } else if (arg < 32) { 1135 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0); 1136 break; 1137 } 1138 /* FALLTHRU */ 1139 1140 case TCG_TYPE_V64: 1141 tcg_debug_assert(ret >= 32 && arg >= 32); 1142 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg); 1143 break; 1144 case TCG_TYPE_V128: 1145 tcg_debug_assert(ret >= 32 && arg >= 32); 1146 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg); 1147 break; 1148 1149 default: 1150 g_assert_not_reached(); 1151 } 1152 return true; 1153} 1154 1155static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1156 TCGReg base, intptr_t ofs) 1157{ 1158 AArch64Insn insn; 1159 int lgsz; 1160 1161 switch (type) { 1162 case TCG_TYPE_I32: 1163 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS); 1164 lgsz = 2; 1165 break; 1166 case TCG_TYPE_I64: 1167 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD); 1168 lgsz = 3; 1169 break; 1170 case TCG_TYPE_V64: 1171 insn = I3312_LDRVD; 1172 lgsz = 3; 1173 break; 1174 case TCG_TYPE_V128: 1175 insn = I3312_LDRVQ; 1176 lgsz = 4; 1177 break; 1178 default: 1179 g_assert_not_reached(); 1180 } 1181 tcg_out_ldst(s, insn, ret, base, ofs, lgsz); 1182} 1183 1184static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src, 1185 TCGReg base, intptr_t ofs) 1186{ 1187 AArch64Insn insn; 1188 int lgsz; 1189 1190 switch (type) { 1191 case TCG_TYPE_I32: 1192 insn = (src < 32 ? I3312_STRW : I3312_STRVS); 1193 lgsz = 2; 1194 break; 1195 case TCG_TYPE_I64: 1196 insn = (src < 32 ? I3312_STRX : I3312_STRVD); 1197 lgsz = 3; 1198 break; 1199 case TCG_TYPE_V64: 1200 insn = I3312_STRVD; 1201 lgsz = 3; 1202 break; 1203 case TCG_TYPE_V128: 1204 insn = I3312_STRVQ; 1205 lgsz = 4; 1206 break; 1207 default: 1208 g_assert_not_reached(); 1209 } 1210 tcg_out_ldst(s, insn, src, base, ofs, lgsz); 1211} 1212 1213static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1214 TCGReg base, intptr_t ofs) 1215{ 1216 if (type <= TCG_TYPE_I64 && val == 0) { 1217 tcg_out_st(s, type, TCG_REG_XZR, base, ofs); 1218 return true; 1219 } 1220 return false; 1221} 1222 1223static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, 1224 TCGReg rn, unsigned int a, unsigned int b) 1225{ 1226 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); 1227} 1228 1229static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, 1230 TCGReg rn, unsigned int a, unsigned int b) 1231{ 1232 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); 1233} 1234 1235static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, 1236 TCGReg rn, unsigned int a, unsigned int b) 1237{ 1238 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); 1239} 1240 1241static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, 1242 TCGReg rn, TCGReg rm, unsigned int a) 1243{ 1244 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); 1245} 1246 1247static inline void tcg_out_shl(TCGContext *s, TCGType ext, 1248 TCGReg rd, TCGReg rn, unsigned int m) 1249{ 1250 int bits = ext ? 64 : 32; 1251 int max = bits - 1; 1252 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max)); 1253} 1254 1255static inline void tcg_out_shr(TCGContext *s, TCGType ext, 1256 TCGReg rd, TCGReg rn, unsigned int m) 1257{ 1258 int max = ext ? 63 : 31; 1259 tcg_out_ubfm(s, ext, rd, rn, m & max, max); 1260} 1261 1262static inline void tcg_out_sar(TCGContext *s, TCGType ext, 1263 TCGReg rd, TCGReg rn, unsigned int m) 1264{ 1265 int max = ext ? 63 : 31; 1266 tcg_out_sbfm(s, ext, rd, rn, m & max, max); 1267} 1268 1269static inline void tcg_out_rotr(TCGContext *s, TCGType ext, 1270 TCGReg rd, TCGReg rn, unsigned int m) 1271{ 1272 int max = ext ? 63 : 31; 1273 tcg_out_extr(s, ext, rd, rn, rn, m & max); 1274} 1275 1276static inline void tcg_out_rotl(TCGContext *s, TCGType ext, 1277 TCGReg rd, TCGReg rn, unsigned int m) 1278{ 1279 int bits = ext ? 64 : 32; 1280 int max = bits - 1; 1281 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max)); 1282} 1283 1284static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, 1285 TCGReg rn, unsigned lsb, unsigned width) 1286{ 1287 unsigned size = ext ? 64 : 32; 1288 unsigned a = (size - lsb) & (size - 1); 1289 unsigned b = width - 1; 1290 tcg_out_bfm(s, ext, rd, rn, a, b); 1291} 1292 1293static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, 1294 tcg_target_long b, bool const_b) 1295{ 1296 if (const_b) { 1297 /* Using CMP or CMN aliases. */ 1298 if (b >= 0) { 1299 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); 1300 } else { 1301 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); 1302 } 1303 } else { 1304 /* Using CMP alias SUBS wzr, Wn, Wm */ 1305 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); 1306 } 1307} 1308 1309static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target) 1310{ 1311 ptrdiff_t offset = target - s->code_ptr; 1312 tcg_debug_assert(offset == sextract64(offset, 0, 26)); 1313 tcg_out_insn(s, 3206, B, offset); 1314} 1315 1316static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target) 1317{ 1318 ptrdiff_t offset = target - s->code_ptr; 1319 if (offset == sextract64(offset, 0, 26)) { 1320 tcg_out_insn(s, 3206, BL, offset); 1321 } else { 1322 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); 1323 tcg_out_insn(s, 3207, BR, TCG_REG_TMP); 1324 } 1325} 1326 1327static inline void tcg_out_callr(TCGContext *s, TCGReg reg) 1328{ 1329 tcg_out_insn(s, 3207, BLR, reg); 1330} 1331 1332static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target) 1333{ 1334 ptrdiff_t offset = target - s->code_ptr; 1335 if (offset == sextract64(offset, 0, 26)) { 1336 tcg_out_insn(s, 3206, BL, offset); 1337 } else { 1338 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); 1339 tcg_out_callr(s, TCG_REG_TMP); 1340 } 1341} 1342 1343void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr, 1344 uintptr_t addr) 1345{ 1346 tcg_insn_unit i1, i2; 1347 TCGType rt = TCG_TYPE_I64; 1348 TCGReg rd = TCG_REG_TMP; 1349 uint64_t pair; 1350 1351 ptrdiff_t offset = addr - jmp_addr; 1352 1353 if (offset == sextract64(offset, 0, 26)) { 1354 i1 = I3206_B | ((offset >> 2) & 0x3ffffff); 1355 i2 = NOP; 1356 } else { 1357 offset = (addr >> 12) - (jmp_addr >> 12); 1358 1359 /* patch ADRP */ 1360 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd; 1361 /* patch ADDI */ 1362 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd; 1363 } 1364 pair = (uint64_t)i2 << 32 | i1; 1365 qatomic_set((uint64_t *)jmp_addr, pair); 1366 flush_icache_range(jmp_addr, jmp_addr + 8); 1367} 1368 1369static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) 1370{ 1371 if (!l->has_value) { 1372 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); 1373 tcg_out_insn(s, 3206, B, 0); 1374 } else { 1375 tcg_out_goto(s, l->u.value_ptr); 1376 } 1377} 1378 1379static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, 1380 TCGArg b, bool b_const, TCGLabel *l) 1381{ 1382 intptr_t offset; 1383 bool need_cmp; 1384 1385 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) { 1386 need_cmp = false; 1387 } else { 1388 need_cmp = true; 1389 tcg_out_cmp(s, ext, a, b, b_const); 1390 } 1391 1392 if (!l->has_value) { 1393 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1394 offset = tcg_in32(s) >> 5; 1395 } else { 1396 offset = l->u.value_ptr - s->code_ptr; 1397 tcg_debug_assert(offset == sextract64(offset, 0, 19)); 1398 } 1399 1400 if (need_cmp) { 1401 tcg_out_insn(s, 3202, B_C, c, offset); 1402 } else if (c == TCG_COND_EQ) { 1403 tcg_out_insn(s, 3201, CBZ, ext, a, offset); 1404 } else { 1405 tcg_out_insn(s, 3201, CBNZ, ext, a, offset); 1406 } 1407} 1408 1409static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn) 1410{ 1411 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn); 1412} 1413 1414static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn) 1415{ 1416 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn); 1417} 1418 1419static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn) 1420{ 1421 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn); 1422} 1423 1424static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits, 1425 TCGReg rd, TCGReg rn) 1426{ 1427 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ 1428 int bits = (8 << s_bits) - 1; 1429 tcg_out_sbfm(s, ext, rd, rn, 0, bits); 1430} 1431 1432static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits, 1433 TCGReg rd, TCGReg rn) 1434{ 1435 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ 1436 int bits = (8 << s_bits) - 1; 1437 tcg_out_ubfm(s, 0, rd, rn, 0, bits); 1438} 1439 1440static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, 1441 TCGReg rn, int64_t aimm) 1442{ 1443 if (aimm >= 0) { 1444 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); 1445 } else { 1446 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); 1447 } 1448} 1449 1450static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, 1451 TCGReg rh, TCGReg al, TCGReg ah, 1452 tcg_target_long bl, tcg_target_long bh, 1453 bool const_bl, bool const_bh, bool sub) 1454{ 1455 TCGReg orig_rl = rl; 1456 AArch64Insn insn; 1457 1458 if (rl == ah || (!const_bh && rl == bh)) { 1459 rl = TCG_REG_TMP; 1460 } 1461 1462 if (const_bl) { 1463 insn = I3401_ADDSI; 1464 if ((bl < 0) ^ sub) { 1465 insn = I3401_SUBSI; 1466 bl = -bl; 1467 } 1468 if (unlikely(al == TCG_REG_XZR)) { 1469 /* ??? We want to allow al to be zero for the benefit of 1470 negation via subtraction. However, that leaves open the 1471 possibility of adding 0+const in the low part, and the 1472 immediate add instructions encode XSP not XZR. Don't try 1473 anything more elaborate here than loading another zero. */ 1474 al = TCG_REG_TMP; 1475 tcg_out_movi(s, ext, al, 0); 1476 } 1477 tcg_out_insn_3401(s, insn, ext, rl, al, bl); 1478 } else { 1479 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); 1480 } 1481 1482 insn = I3503_ADC; 1483 if (const_bh) { 1484 /* Note that the only two constants we support are 0 and -1, and 1485 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ 1486 if ((bh != 0) ^ sub) { 1487 insn = I3503_SBC; 1488 } 1489 bh = TCG_REG_XZR; 1490 } else if (sub) { 1491 insn = I3503_SBC; 1492 } 1493 tcg_out_insn_3503(s, insn, ext, rh, ah, bh); 1494 1495 tcg_out_mov(s, ext, orig_rl, rl); 1496} 1497 1498static inline void tcg_out_mb(TCGContext *s, TCGArg a0) 1499{ 1500 static const uint32_t sync[] = { 1501 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST, 1502 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST, 1503 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1504 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD, 1505 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1506 }; 1507 tcg_out32(s, sync[a0 & TCG_MO_ALL]); 1508} 1509 1510static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, 1511 TCGReg a0, TCGArg b, bool const_b, bool is_ctz) 1512{ 1513 TCGReg a1 = a0; 1514 if (is_ctz) { 1515 a1 = TCG_REG_TMP; 1516 tcg_out_insn(s, 3507, RBIT, ext, a1, a0); 1517 } 1518 if (const_b && b == (ext ? 64 : 32)) { 1519 tcg_out_insn(s, 3507, CLZ, ext, d, a1); 1520 } else { 1521 AArch64Insn sel = I3506_CSEL; 1522 1523 tcg_out_cmp(s, ext, a0, 0, 1); 1524 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1); 1525 1526 if (const_b) { 1527 if (b == -1) { 1528 b = TCG_REG_XZR; 1529 sel = I3506_CSINV; 1530 } else if (b == 0) { 1531 b = TCG_REG_XZR; 1532 } else { 1533 tcg_out_movi(s, ext, d, b); 1534 b = d; 1535 } 1536 } 1537 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE); 1538 } 1539} 1540 1541#ifdef CONFIG_SOFTMMU 1542#include "../tcg-ldst.c.inc" 1543 1544/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, 1545 * TCGMemOpIdx oi, uintptr_t ra) 1546 */ 1547static void * const qemu_ld_helpers[16] = { 1548 [MO_UB] = helper_ret_ldub_mmu, 1549 [MO_LEUW] = helper_le_lduw_mmu, 1550 [MO_LEUL] = helper_le_ldul_mmu, 1551 [MO_LEQ] = helper_le_ldq_mmu, 1552 [MO_BEUW] = helper_be_lduw_mmu, 1553 [MO_BEUL] = helper_be_ldul_mmu, 1554 [MO_BEQ] = helper_be_ldq_mmu, 1555}; 1556 1557/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, 1558 * uintxx_t val, TCGMemOpIdx oi, 1559 * uintptr_t ra) 1560 */ 1561static void * const qemu_st_helpers[16] = { 1562 [MO_UB] = helper_ret_stb_mmu, 1563 [MO_LEUW] = helper_le_stw_mmu, 1564 [MO_LEUL] = helper_le_stl_mmu, 1565 [MO_LEQ] = helper_le_stq_mmu, 1566 [MO_BEUW] = helper_be_stw_mmu, 1567 [MO_BEUL] = helper_be_stl_mmu, 1568 [MO_BEQ] = helper_be_stq_mmu, 1569}; 1570 1571static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target) 1572{ 1573 ptrdiff_t offset = tcg_pcrel_diff(s, target); 1574 tcg_debug_assert(offset == sextract64(offset, 0, 21)); 1575 tcg_out_insn(s, 3406, ADR, rd, offset); 1576} 1577 1578static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1579{ 1580 TCGMemOpIdx oi = lb->oi; 1581 MemOp opc = get_memop(oi); 1582 MemOp size = opc & MO_SIZE; 1583 1584 if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) { 1585 return false; 1586 } 1587 1588 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1589 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); 1590 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi); 1591 tcg_out_adr(s, TCG_REG_X3, lb->raddr); 1592 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); 1593 if (opc & MO_SIGN) { 1594 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0); 1595 } else { 1596 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0); 1597 } 1598 1599 tcg_out_goto(s, lb->raddr); 1600 return true; 1601} 1602 1603static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1604{ 1605 TCGMemOpIdx oi = lb->oi; 1606 MemOp opc = get_memop(oi); 1607 MemOp size = opc & MO_SIZE; 1608 1609 if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) { 1610 return false; 1611 } 1612 1613 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); 1614 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); 1615 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); 1616 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi); 1617 tcg_out_adr(s, TCG_REG_X4, lb->raddr); 1618 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); 1619 tcg_out_goto(s, lb->raddr); 1620 return true; 1621} 1622 1623static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, 1624 TCGType ext, TCGReg data_reg, TCGReg addr_reg, 1625 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) 1626{ 1627 TCGLabelQemuLdst *label = new_ldst_label(s); 1628 1629 label->is_ld = is_ld; 1630 label->oi = oi; 1631 label->type = ext; 1632 label->datalo_reg = data_reg; 1633 label->addrlo_reg = addr_reg; 1634 label->raddr = raddr; 1635 label->label_ptr[0] = label_ptr; 1636} 1637 1638/* We expect to use a 7-bit scaled negative offset from ENV. */ 1639QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); 1640QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512); 1641 1642/* These offsets are built into the LDP below. */ 1643QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); 1644QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); 1645 1646/* Load and compare a TLB entry, emitting the conditional jump to the 1647 slow path for the failure case, which will be patched later when finalizing 1648 the slow path. Generated code returns the host addend in X1, 1649 clobbers X0,X2,X3,TMP. */ 1650static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc, 1651 tcg_insn_unit **label_ptr, int mem_index, 1652 bool is_read) 1653{ 1654 unsigned a_bits = get_alignment_bits(opc); 1655 unsigned s_bits = opc & MO_SIZE; 1656 unsigned a_mask = (1u << a_bits) - 1; 1657 unsigned s_mask = (1u << s_bits) - 1; 1658 TCGReg x3; 1659 TCGType mask_type; 1660 uint64_t compare_mask; 1661 1662 mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32 1663 ? TCG_TYPE_I64 : TCG_TYPE_I32); 1664 1665 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */ 1666 tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0, 1667 TLB_MASK_TABLE_OFS(mem_index), 1, 0); 1668 1669 /* Extract the TLB index from the address into X0. */ 1670 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, 1671 TCG_REG_X0, TCG_REG_X0, addr_reg, 1672 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); 1673 1674 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */ 1675 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0); 1676 1677 /* Load the tlb comparator into X0, and the fast path addend into X1. */ 1678 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read 1679 ? offsetof(CPUTLBEntry, addr_read) 1680 : offsetof(CPUTLBEntry, addr_write)); 1681 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1, 1682 offsetof(CPUTLBEntry, addend)); 1683 1684 /* For aligned accesses, we check the first byte and include the alignment 1685 bits within the address. For unaligned access, we check that we don't 1686 cross pages using the address of the last byte of the access. */ 1687 if (a_bits >= s_bits) { 1688 x3 = addr_reg; 1689 } else { 1690 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64, 1691 TCG_REG_X3, addr_reg, s_mask - a_mask); 1692 x3 = TCG_REG_X3; 1693 } 1694 compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; 1695 1696 /* Store the page mask part of the address into X3. */ 1697 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, 1698 TCG_REG_X3, x3, compare_mask); 1699 1700 /* Perform the address comparison. */ 1701 tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0); 1702 1703 /* If not equal, we jump to the slow path. */ 1704 *label_ptr = s->code_ptr; 1705 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1706} 1707 1708#endif /* CONFIG_SOFTMMU */ 1709 1710static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, 1711 TCGReg data_r, TCGReg addr_r, 1712 TCGType otype, TCGReg off_r) 1713{ 1714 const MemOp bswap = memop & MO_BSWAP; 1715 1716 switch (memop & MO_SSIZE) { 1717 case MO_UB: 1718 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r); 1719 break; 1720 case MO_SB: 1721 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, 1722 data_r, addr_r, otype, off_r); 1723 break; 1724 case MO_UW: 1725 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); 1726 if (bswap) { 1727 tcg_out_rev16(s, data_r, data_r); 1728 } 1729 break; 1730 case MO_SW: 1731 if (bswap) { 1732 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); 1733 tcg_out_rev16(s, data_r, data_r); 1734 tcg_out_sxt(s, ext, MO_16, data_r, data_r); 1735 } else { 1736 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), 1737 data_r, addr_r, otype, off_r); 1738 } 1739 break; 1740 case MO_UL: 1741 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); 1742 if (bswap) { 1743 tcg_out_rev32(s, data_r, data_r); 1744 } 1745 break; 1746 case MO_SL: 1747 if (bswap) { 1748 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); 1749 tcg_out_rev32(s, data_r, data_r); 1750 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r); 1751 } else { 1752 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r); 1753 } 1754 break; 1755 case MO_Q: 1756 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r); 1757 if (bswap) { 1758 tcg_out_rev64(s, data_r, data_r); 1759 } 1760 break; 1761 default: 1762 tcg_abort(); 1763 } 1764} 1765 1766static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, 1767 TCGReg data_r, TCGReg addr_r, 1768 TCGType otype, TCGReg off_r) 1769{ 1770 const MemOp bswap = memop & MO_BSWAP; 1771 1772 switch (memop & MO_SIZE) { 1773 case MO_8: 1774 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r); 1775 break; 1776 case MO_16: 1777 if (bswap && data_r != TCG_REG_XZR) { 1778 tcg_out_rev16(s, TCG_REG_TMP, data_r); 1779 data_r = TCG_REG_TMP; 1780 } 1781 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r); 1782 break; 1783 case MO_32: 1784 if (bswap && data_r != TCG_REG_XZR) { 1785 tcg_out_rev32(s, TCG_REG_TMP, data_r); 1786 data_r = TCG_REG_TMP; 1787 } 1788 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r); 1789 break; 1790 case MO_64: 1791 if (bswap && data_r != TCG_REG_XZR) { 1792 tcg_out_rev64(s, TCG_REG_TMP, data_r); 1793 data_r = TCG_REG_TMP; 1794 } 1795 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r); 1796 break; 1797 default: 1798 tcg_abort(); 1799 } 1800} 1801 1802static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1803 TCGMemOpIdx oi, TCGType ext) 1804{ 1805 MemOp memop = get_memop(oi); 1806 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; 1807#ifdef CONFIG_SOFTMMU 1808 unsigned mem_index = get_mmuidx(oi); 1809 tcg_insn_unit *label_ptr; 1810 1811 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1); 1812 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1813 TCG_REG_X1, otype, addr_reg); 1814 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg, 1815 s->code_ptr, label_ptr); 1816#else /* !CONFIG_SOFTMMU */ 1817 if (USE_GUEST_BASE) { 1818 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1819 TCG_REG_GUEST_BASE, otype, addr_reg); 1820 } else { 1821 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, 1822 addr_reg, TCG_TYPE_I64, TCG_REG_XZR); 1823 } 1824#endif /* CONFIG_SOFTMMU */ 1825} 1826 1827static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1828 TCGMemOpIdx oi) 1829{ 1830 MemOp memop = get_memop(oi); 1831 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; 1832#ifdef CONFIG_SOFTMMU 1833 unsigned mem_index = get_mmuidx(oi); 1834 tcg_insn_unit *label_ptr; 1835 1836 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0); 1837 tcg_out_qemu_st_direct(s, memop, data_reg, 1838 TCG_REG_X1, otype, addr_reg); 1839 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64, 1840 data_reg, addr_reg, s->code_ptr, label_ptr); 1841#else /* !CONFIG_SOFTMMU */ 1842 if (USE_GUEST_BASE) { 1843 tcg_out_qemu_st_direct(s, memop, data_reg, 1844 TCG_REG_GUEST_BASE, otype, addr_reg); 1845 } else { 1846 tcg_out_qemu_st_direct(s, memop, data_reg, 1847 addr_reg, TCG_TYPE_I64, TCG_REG_XZR); 1848 } 1849#endif /* CONFIG_SOFTMMU */ 1850} 1851 1852static tcg_insn_unit *tb_ret_addr; 1853 1854static void tcg_out_op(TCGContext *s, TCGOpcode opc, 1855 const TCGArg args[TCG_MAX_OP_ARGS], 1856 const int const_args[TCG_MAX_OP_ARGS]) 1857{ 1858 /* 99% of the time, we can signal the use of extension registers 1859 by looking to see if the opcode handles 64-bit data. */ 1860 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0; 1861 1862 /* Hoist the loads of the most common arguments. */ 1863 TCGArg a0 = args[0]; 1864 TCGArg a1 = args[1]; 1865 TCGArg a2 = args[2]; 1866 int c2 = const_args[2]; 1867 1868 /* Some operands are defined with "rZ" constraint, a register or 1869 the zero register. These need not actually test args[I] == 0. */ 1870#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) 1871 1872 switch (opc) { 1873 case INDEX_op_exit_tb: 1874 /* Reuse the zeroing that exists for goto_ptr. */ 1875 if (a0 == 0) { 1876 tcg_out_goto_long(s, s->code_gen_epilogue); 1877 } else { 1878 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); 1879 tcg_out_goto_long(s, tb_ret_addr); 1880 } 1881 break; 1882 1883 case INDEX_op_goto_tb: 1884 if (s->tb_jmp_insn_offset != NULL) { 1885 /* TCG_TARGET_HAS_direct_jump */ 1886 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic 1887 write can be used to patch the target address. */ 1888 if ((uintptr_t)s->code_ptr & 7) { 1889 tcg_out32(s, NOP); 1890 } 1891 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); 1892 /* actual branch destination will be patched by 1893 tb_target_set_jmp_target later. */ 1894 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0); 1895 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0); 1896 } else { 1897 /* !TCG_TARGET_HAS_direct_jump */ 1898 tcg_debug_assert(s->tb_jmp_target_addr != NULL); 1899 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2; 1900 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP); 1901 } 1902 tcg_out_insn(s, 3207, BR, TCG_REG_TMP); 1903 set_jmp_reset_offset(s, a0); 1904 break; 1905 1906 case INDEX_op_goto_ptr: 1907 tcg_out_insn(s, 3207, BR, a0); 1908 break; 1909 1910 case INDEX_op_br: 1911 tcg_out_goto_label(s, arg_label(a0)); 1912 break; 1913 1914 case INDEX_op_ld8u_i32: 1915 case INDEX_op_ld8u_i64: 1916 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0); 1917 break; 1918 case INDEX_op_ld8s_i32: 1919 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0); 1920 break; 1921 case INDEX_op_ld8s_i64: 1922 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0); 1923 break; 1924 case INDEX_op_ld16u_i32: 1925 case INDEX_op_ld16u_i64: 1926 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1); 1927 break; 1928 case INDEX_op_ld16s_i32: 1929 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1); 1930 break; 1931 case INDEX_op_ld16s_i64: 1932 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1); 1933 break; 1934 case INDEX_op_ld_i32: 1935 case INDEX_op_ld32u_i64: 1936 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2); 1937 break; 1938 case INDEX_op_ld32s_i64: 1939 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2); 1940 break; 1941 case INDEX_op_ld_i64: 1942 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3); 1943 break; 1944 1945 case INDEX_op_st8_i32: 1946 case INDEX_op_st8_i64: 1947 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0); 1948 break; 1949 case INDEX_op_st16_i32: 1950 case INDEX_op_st16_i64: 1951 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1); 1952 break; 1953 case INDEX_op_st_i32: 1954 case INDEX_op_st32_i64: 1955 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2); 1956 break; 1957 case INDEX_op_st_i64: 1958 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3); 1959 break; 1960 1961 case INDEX_op_add_i32: 1962 a2 = (int32_t)a2; 1963 /* FALLTHRU */ 1964 case INDEX_op_add_i64: 1965 if (c2) { 1966 tcg_out_addsubi(s, ext, a0, a1, a2); 1967 } else { 1968 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); 1969 } 1970 break; 1971 1972 case INDEX_op_sub_i32: 1973 a2 = (int32_t)a2; 1974 /* FALLTHRU */ 1975 case INDEX_op_sub_i64: 1976 if (c2) { 1977 tcg_out_addsubi(s, ext, a0, a1, -a2); 1978 } else { 1979 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); 1980 } 1981 break; 1982 1983 case INDEX_op_neg_i64: 1984 case INDEX_op_neg_i32: 1985 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); 1986 break; 1987 1988 case INDEX_op_and_i32: 1989 a2 = (int32_t)a2; 1990 /* FALLTHRU */ 1991 case INDEX_op_and_i64: 1992 if (c2) { 1993 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); 1994 } else { 1995 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); 1996 } 1997 break; 1998 1999 case INDEX_op_andc_i32: 2000 a2 = (int32_t)a2; 2001 /* FALLTHRU */ 2002 case INDEX_op_andc_i64: 2003 if (c2) { 2004 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); 2005 } else { 2006 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); 2007 } 2008 break; 2009 2010 case INDEX_op_or_i32: 2011 a2 = (int32_t)a2; 2012 /* FALLTHRU */ 2013 case INDEX_op_or_i64: 2014 if (c2) { 2015 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); 2016 } else { 2017 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); 2018 } 2019 break; 2020 2021 case INDEX_op_orc_i32: 2022 a2 = (int32_t)a2; 2023 /* FALLTHRU */ 2024 case INDEX_op_orc_i64: 2025 if (c2) { 2026 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); 2027 } else { 2028 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); 2029 } 2030 break; 2031 2032 case INDEX_op_xor_i32: 2033 a2 = (int32_t)a2; 2034 /* FALLTHRU */ 2035 case INDEX_op_xor_i64: 2036 if (c2) { 2037 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); 2038 } else { 2039 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); 2040 } 2041 break; 2042 2043 case INDEX_op_eqv_i32: 2044 a2 = (int32_t)a2; 2045 /* FALLTHRU */ 2046 case INDEX_op_eqv_i64: 2047 if (c2) { 2048 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); 2049 } else { 2050 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); 2051 } 2052 break; 2053 2054 case INDEX_op_not_i64: 2055 case INDEX_op_not_i32: 2056 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); 2057 break; 2058 2059 case INDEX_op_mul_i64: 2060 case INDEX_op_mul_i32: 2061 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); 2062 break; 2063 2064 case INDEX_op_div_i64: 2065 case INDEX_op_div_i32: 2066 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); 2067 break; 2068 case INDEX_op_divu_i64: 2069 case INDEX_op_divu_i32: 2070 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); 2071 break; 2072 2073 case INDEX_op_rem_i64: 2074 case INDEX_op_rem_i32: 2075 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2); 2076 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2077 break; 2078 case INDEX_op_remu_i64: 2079 case INDEX_op_remu_i32: 2080 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2); 2081 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2082 break; 2083 2084 case INDEX_op_shl_i64: 2085 case INDEX_op_shl_i32: 2086 if (c2) { 2087 tcg_out_shl(s, ext, a0, a1, a2); 2088 } else { 2089 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); 2090 } 2091 break; 2092 2093 case INDEX_op_shr_i64: 2094 case INDEX_op_shr_i32: 2095 if (c2) { 2096 tcg_out_shr(s, ext, a0, a1, a2); 2097 } else { 2098 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); 2099 } 2100 break; 2101 2102 case INDEX_op_sar_i64: 2103 case INDEX_op_sar_i32: 2104 if (c2) { 2105 tcg_out_sar(s, ext, a0, a1, a2); 2106 } else { 2107 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); 2108 } 2109 break; 2110 2111 case INDEX_op_rotr_i64: 2112 case INDEX_op_rotr_i32: 2113 if (c2) { 2114 tcg_out_rotr(s, ext, a0, a1, a2); 2115 } else { 2116 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); 2117 } 2118 break; 2119 2120 case INDEX_op_rotl_i64: 2121 case INDEX_op_rotl_i32: 2122 if (c2) { 2123 tcg_out_rotl(s, ext, a0, a1, a2); 2124 } else { 2125 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2); 2126 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP); 2127 } 2128 break; 2129 2130 case INDEX_op_clz_i64: 2131 case INDEX_op_clz_i32: 2132 tcg_out_cltz(s, ext, a0, a1, a2, c2, false); 2133 break; 2134 case INDEX_op_ctz_i64: 2135 case INDEX_op_ctz_i32: 2136 tcg_out_cltz(s, ext, a0, a1, a2, c2, true); 2137 break; 2138 2139 case INDEX_op_brcond_i32: 2140 a1 = (int32_t)a1; 2141 /* FALLTHRU */ 2142 case INDEX_op_brcond_i64: 2143 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); 2144 break; 2145 2146 case INDEX_op_setcond_i32: 2147 a2 = (int32_t)a2; 2148 /* FALLTHRU */ 2149 case INDEX_op_setcond_i64: 2150 tcg_out_cmp(s, ext, a1, a2, c2); 2151 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ 2152 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, 2153 TCG_REG_XZR, tcg_invert_cond(args[3])); 2154 break; 2155 2156 case INDEX_op_movcond_i32: 2157 a2 = (int32_t)a2; 2158 /* FALLTHRU */ 2159 case INDEX_op_movcond_i64: 2160 tcg_out_cmp(s, ext, a1, a2, c2); 2161 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); 2162 break; 2163 2164 case INDEX_op_qemu_ld_i32: 2165 case INDEX_op_qemu_ld_i64: 2166 tcg_out_qemu_ld(s, a0, a1, a2, ext); 2167 break; 2168 case INDEX_op_qemu_st_i32: 2169 case INDEX_op_qemu_st_i64: 2170 tcg_out_qemu_st(s, REG0(0), a1, a2); 2171 break; 2172 2173 case INDEX_op_bswap64_i64: 2174 tcg_out_rev64(s, a0, a1); 2175 break; 2176 case INDEX_op_bswap32_i64: 2177 case INDEX_op_bswap32_i32: 2178 tcg_out_rev32(s, a0, a1); 2179 break; 2180 case INDEX_op_bswap16_i64: 2181 case INDEX_op_bswap16_i32: 2182 tcg_out_rev16(s, a0, a1); 2183 break; 2184 2185 case INDEX_op_ext8s_i64: 2186 case INDEX_op_ext8s_i32: 2187 tcg_out_sxt(s, ext, MO_8, a0, a1); 2188 break; 2189 case INDEX_op_ext16s_i64: 2190 case INDEX_op_ext16s_i32: 2191 tcg_out_sxt(s, ext, MO_16, a0, a1); 2192 break; 2193 case INDEX_op_ext_i32_i64: 2194 case INDEX_op_ext32s_i64: 2195 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1); 2196 break; 2197 case INDEX_op_ext8u_i64: 2198 case INDEX_op_ext8u_i32: 2199 tcg_out_uxt(s, MO_8, a0, a1); 2200 break; 2201 case INDEX_op_ext16u_i64: 2202 case INDEX_op_ext16u_i32: 2203 tcg_out_uxt(s, MO_16, a0, a1); 2204 break; 2205 case INDEX_op_extu_i32_i64: 2206 case INDEX_op_ext32u_i64: 2207 tcg_out_movr(s, TCG_TYPE_I32, a0, a1); 2208 break; 2209 2210 case INDEX_op_deposit_i64: 2211 case INDEX_op_deposit_i32: 2212 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]); 2213 break; 2214 2215 case INDEX_op_extract_i64: 2216 case INDEX_op_extract_i32: 2217 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2218 break; 2219 2220 case INDEX_op_sextract_i64: 2221 case INDEX_op_sextract_i32: 2222 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2223 break; 2224 2225 case INDEX_op_extract2_i64: 2226 case INDEX_op_extract2_i32: 2227 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]); 2228 break; 2229 2230 case INDEX_op_add2_i32: 2231 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2232 (int32_t)args[4], args[5], const_args[4], 2233 const_args[5], false); 2234 break; 2235 case INDEX_op_add2_i64: 2236 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2237 args[5], const_args[4], const_args[5], false); 2238 break; 2239 case INDEX_op_sub2_i32: 2240 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2241 (int32_t)args[4], args[5], const_args[4], 2242 const_args[5], true); 2243 break; 2244 case INDEX_op_sub2_i64: 2245 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2246 args[5], const_args[4], const_args[5], true); 2247 break; 2248 2249 case INDEX_op_muluh_i64: 2250 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); 2251 break; 2252 case INDEX_op_mulsh_i64: 2253 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); 2254 break; 2255 2256 case INDEX_op_mb: 2257 tcg_out_mb(s, a0); 2258 break; 2259 2260 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 2261 case INDEX_op_mov_i64: 2262 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ 2263 case INDEX_op_movi_i64: 2264 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 2265 default: 2266 g_assert_not_reached(); 2267 } 2268 2269#undef REG0 2270} 2271 2272static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 2273 unsigned vecl, unsigned vece, 2274 const TCGArg *args, const int *const_args) 2275{ 2276 static const AArch64Insn cmp_insn[16] = { 2277 [TCG_COND_EQ] = I3616_CMEQ, 2278 [TCG_COND_GT] = I3616_CMGT, 2279 [TCG_COND_GE] = I3616_CMGE, 2280 [TCG_COND_GTU] = I3616_CMHI, 2281 [TCG_COND_GEU] = I3616_CMHS, 2282 }; 2283 static const AArch64Insn cmp0_insn[16] = { 2284 [TCG_COND_EQ] = I3617_CMEQ0, 2285 [TCG_COND_GT] = I3617_CMGT0, 2286 [TCG_COND_GE] = I3617_CMGE0, 2287 [TCG_COND_LT] = I3617_CMLT0, 2288 [TCG_COND_LE] = I3617_CMLE0, 2289 }; 2290 2291 TCGType type = vecl + TCG_TYPE_V64; 2292 unsigned is_q = vecl; 2293 TCGArg a0, a1, a2, a3; 2294 int cmode, imm8; 2295 2296 a0 = args[0]; 2297 a1 = args[1]; 2298 a2 = args[2]; 2299 2300 switch (opc) { 2301 case INDEX_op_ld_vec: 2302 tcg_out_ld(s, type, a0, a1, a2); 2303 break; 2304 case INDEX_op_st_vec: 2305 tcg_out_st(s, type, a0, a1, a2); 2306 break; 2307 case INDEX_op_dupm_vec: 2308 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 2309 break; 2310 case INDEX_op_add_vec: 2311 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2); 2312 break; 2313 case INDEX_op_sub_vec: 2314 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2); 2315 break; 2316 case INDEX_op_mul_vec: 2317 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2); 2318 break; 2319 case INDEX_op_neg_vec: 2320 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1); 2321 break; 2322 case INDEX_op_abs_vec: 2323 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1); 2324 break; 2325 case INDEX_op_and_vec: 2326 if (const_args[2]) { 2327 is_shimm1632(~a2, &cmode, &imm8); 2328 if (a0 == a1) { 2329 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2330 return; 2331 } 2332 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2333 a2 = a0; 2334 } 2335 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2); 2336 break; 2337 case INDEX_op_or_vec: 2338 if (const_args[2]) { 2339 is_shimm1632(a2, &cmode, &imm8); 2340 if (a0 == a1) { 2341 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2342 return; 2343 } 2344 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2345 a2 = a0; 2346 } 2347 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2); 2348 break; 2349 case INDEX_op_andc_vec: 2350 if (const_args[2]) { 2351 is_shimm1632(a2, &cmode, &imm8); 2352 if (a0 == a1) { 2353 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2354 return; 2355 } 2356 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2357 a2 = a0; 2358 } 2359 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2); 2360 break; 2361 case INDEX_op_orc_vec: 2362 if (const_args[2]) { 2363 is_shimm1632(~a2, &cmode, &imm8); 2364 if (a0 == a1) { 2365 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2366 return; 2367 } 2368 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2369 a2 = a0; 2370 } 2371 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2); 2372 break; 2373 case INDEX_op_xor_vec: 2374 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2); 2375 break; 2376 case INDEX_op_ssadd_vec: 2377 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2); 2378 break; 2379 case INDEX_op_sssub_vec: 2380 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2); 2381 break; 2382 case INDEX_op_usadd_vec: 2383 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2); 2384 break; 2385 case INDEX_op_ussub_vec: 2386 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2); 2387 break; 2388 case INDEX_op_smax_vec: 2389 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2); 2390 break; 2391 case INDEX_op_smin_vec: 2392 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2); 2393 break; 2394 case INDEX_op_umax_vec: 2395 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2); 2396 break; 2397 case INDEX_op_umin_vec: 2398 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2); 2399 break; 2400 case INDEX_op_not_vec: 2401 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1); 2402 break; 2403 case INDEX_op_shli_vec: 2404 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece)); 2405 break; 2406 case INDEX_op_shri_vec: 2407 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2); 2408 break; 2409 case INDEX_op_sari_vec: 2410 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); 2411 break; 2412 case INDEX_op_aa64_sli_vec: 2413 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece)); 2414 break; 2415 case INDEX_op_shlv_vec: 2416 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); 2417 break; 2418 case INDEX_op_aa64_sshl_vec: 2419 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2); 2420 break; 2421 case INDEX_op_cmp_vec: 2422 { 2423 TCGCond cond = args[3]; 2424 AArch64Insn insn; 2425 2426 if (cond == TCG_COND_NE) { 2427 if (const_args[2]) { 2428 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1); 2429 } else { 2430 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2); 2431 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); 2432 } 2433 } else { 2434 if (const_args[2]) { 2435 insn = cmp0_insn[cond]; 2436 if (insn) { 2437 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1); 2438 break; 2439 } 2440 tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0); 2441 a2 = TCG_VEC_TMP; 2442 } 2443 insn = cmp_insn[cond]; 2444 if (insn == 0) { 2445 TCGArg t; 2446 t = a1, a1 = a2, a2 = t; 2447 cond = tcg_swap_cond(cond); 2448 insn = cmp_insn[cond]; 2449 tcg_debug_assert(insn != 0); 2450 } 2451 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2); 2452 } 2453 } 2454 break; 2455 2456 case INDEX_op_bitsel_vec: 2457 a3 = args[3]; 2458 if (a0 == a3) { 2459 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1); 2460 } else if (a0 == a2) { 2461 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1); 2462 } else { 2463 if (a0 != a1) { 2464 tcg_out_mov(s, type, a0, a1); 2465 } 2466 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3); 2467 } 2468 break; 2469 2470 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 2471 case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */ 2472 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 2473 default: 2474 g_assert_not_reached(); 2475 } 2476} 2477 2478int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 2479{ 2480 switch (opc) { 2481 case INDEX_op_add_vec: 2482 case INDEX_op_sub_vec: 2483 case INDEX_op_and_vec: 2484 case INDEX_op_or_vec: 2485 case INDEX_op_xor_vec: 2486 case INDEX_op_andc_vec: 2487 case INDEX_op_orc_vec: 2488 case INDEX_op_neg_vec: 2489 case INDEX_op_abs_vec: 2490 case INDEX_op_not_vec: 2491 case INDEX_op_cmp_vec: 2492 case INDEX_op_shli_vec: 2493 case INDEX_op_shri_vec: 2494 case INDEX_op_sari_vec: 2495 case INDEX_op_ssadd_vec: 2496 case INDEX_op_sssub_vec: 2497 case INDEX_op_usadd_vec: 2498 case INDEX_op_ussub_vec: 2499 case INDEX_op_shlv_vec: 2500 case INDEX_op_bitsel_vec: 2501 return 1; 2502 case INDEX_op_rotli_vec: 2503 case INDEX_op_shrv_vec: 2504 case INDEX_op_sarv_vec: 2505 case INDEX_op_rotlv_vec: 2506 case INDEX_op_rotrv_vec: 2507 return -1; 2508 case INDEX_op_mul_vec: 2509 case INDEX_op_smax_vec: 2510 case INDEX_op_smin_vec: 2511 case INDEX_op_umax_vec: 2512 case INDEX_op_umin_vec: 2513 return vece < MO_64; 2514 2515 default: 2516 return 0; 2517 } 2518} 2519 2520void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 2521 TCGArg a0, ...) 2522{ 2523 va_list va; 2524 TCGv_vec v0, v1, v2, t1, t2; 2525 TCGArg a2; 2526 2527 va_start(va, a0); 2528 v0 = temp_tcgv_vec(arg_temp(a0)); 2529 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 2530 a2 = va_arg(va, TCGArg); 2531 v2 = temp_tcgv_vec(arg_temp(a2)); 2532 2533 switch (opc) { 2534 case INDEX_op_rotli_vec: 2535 t1 = tcg_temp_new_vec(type); 2536 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1)); 2537 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece, 2538 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2); 2539 tcg_temp_free_vec(t1); 2540 break; 2541 2542 case INDEX_op_shrv_vec: 2543 case INDEX_op_sarv_vec: 2544 /* Right shifts are negative left shifts for AArch64. */ 2545 t1 = tcg_temp_new_vec(type); 2546 tcg_gen_neg_vec(vece, t1, v2); 2547 opc = (opc == INDEX_op_shrv_vec 2548 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec); 2549 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0), 2550 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2551 tcg_temp_free_vec(t1); 2552 break; 2553 2554 case INDEX_op_rotlv_vec: 2555 t1 = tcg_temp_new_vec(type); 2556 tcg_gen_dupi_vec(vece, t1, 8 << vece); 2557 tcg_gen_sub_vec(vece, t1, v2, t1); 2558 /* Right shifts are negative left shifts for AArch64. */ 2559 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2560 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2561 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0), 2562 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 2563 tcg_gen_or_vec(vece, v0, v0, t1); 2564 tcg_temp_free_vec(t1); 2565 break; 2566 2567 case INDEX_op_rotrv_vec: 2568 t1 = tcg_temp_new_vec(type); 2569 t2 = tcg_temp_new_vec(type); 2570 tcg_gen_neg_vec(vece, t1, v2); 2571 tcg_gen_dupi_vec(vece, t2, 8 << vece); 2572 tcg_gen_add_vec(vece, t2, t1, t2); 2573 /* Right shifts are negative left shifts for AArch64. */ 2574 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2575 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2576 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2), 2577 tcgv_vec_arg(v1), tcgv_vec_arg(t2)); 2578 tcg_gen_or_vec(vece, v0, t1, t2); 2579 tcg_temp_free_vec(t1); 2580 tcg_temp_free_vec(t2); 2581 break; 2582 2583 default: 2584 g_assert_not_reached(); 2585 } 2586 2587 va_end(va); 2588} 2589 2590static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) 2591{ 2592 static const TCGTargetOpDef r = { .args_ct_str = { "r" } }; 2593 static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } }; 2594 static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } }; 2595 static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } }; 2596 static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } }; 2597 static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } }; 2598 static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } }; 2599 static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } }; 2600 static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } }; 2601 static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } }; 2602 static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } }; 2603 static const TCGTargetOpDef w_0_w = { .args_ct_str = { "w", "0", "w" } }; 2604 static const TCGTargetOpDef w_w_wO = { .args_ct_str = { "w", "w", "wO" } }; 2605 static const TCGTargetOpDef w_w_wN = { .args_ct_str = { "w", "w", "wN" } }; 2606 static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } }; 2607 static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } }; 2608 static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } }; 2609 static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } }; 2610 static const TCGTargetOpDef r_r_rAL 2611 = { .args_ct_str = { "r", "r", "rAL" } }; 2612 static const TCGTargetOpDef dep 2613 = { .args_ct_str = { "r", "0", "rZ" } }; 2614 static const TCGTargetOpDef ext2 2615 = { .args_ct_str = { "r", "rZ", "rZ" } }; 2616 static const TCGTargetOpDef movc 2617 = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } }; 2618 static const TCGTargetOpDef add2 2619 = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } }; 2620 static const TCGTargetOpDef w_w_w_w 2621 = { .args_ct_str = { "w", "w", "w", "w" } }; 2622 2623 switch (op) { 2624 case INDEX_op_goto_ptr: 2625 return &r; 2626 2627 case INDEX_op_ld8u_i32: 2628 case INDEX_op_ld8s_i32: 2629 case INDEX_op_ld16u_i32: 2630 case INDEX_op_ld16s_i32: 2631 case INDEX_op_ld_i32: 2632 case INDEX_op_ld8u_i64: 2633 case INDEX_op_ld8s_i64: 2634 case INDEX_op_ld16u_i64: 2635 case INDEX_op_ld16s_i64: 2636 case INDEX_op_ld32u_i64: 2637 case INDEX_op_ld32s_i64: 2638 case INDEX_op_ld_i64: 2639 case INDEX_op_neg_i32: 2640 case INDEX_op_neg_i64: 2641 case INDEX_op_not_i32: 2642 case INDEX_op_not_i64: 2643 case INDEX_op_bswap16_i32: 2644 case INDEX_op_bswap32_i32: 2645 case INDEX_op_bswap16_i64: 2646 case INDEX_op_bswap32_i64: 2647 case INDEX_op_bswap64_i64: 2648 case INDEX_op_ext8s_i32: 2649 case INDEX_op_ext16s_i32: 2650 case INDEX_op_ext8u_i32: 2651 case INDEX_op_ext16u_i32: 2652 case INDEX_op_ext8s_i64: 2653 case INDEX_op_ext16s_i64: 2654 case INDEX_op_ext32s_i64: 2655 case INDEX_op_ext8u_i64: 2656 case INDEX_op_ext16u_i64: 2657 case INDEX_op_ext32u_i64: 2658 case INDEX_op_ext_i32_i64: 2659 case INDEX_op_extu_i32_i64: 2660 case INDEX_op_extract_i32: 2661 case INDEX_op_extract_i64: 2662 case INDEX_op_sextract_i32: 2663 case INDEX_op_sextract_i64: 2664 return &r_r; 2665 2666 case INDEX_op_st8_i32: 2667 case INDEX_op_st16_i32: 2668 case INDEX_op_st_i32: 2669 case INDEX_op_st8_i64: 2670 case INDEX_op_st16_i64: 2671 case INDEX_op_st32_i64: 2672 case INDEX_op_st_i64: 2673 return &rZ_r; 2674 2675 case INDEX_op_add_i32: 2676 case INDEX_op_add_i64: 2677 case INDEX_op_sub_i32: 2678 case INDEX_op_sub_i64: 2679 case INDEX_op_setcond_i32: 2680 case INDEX_op_setcond_i64: 2681 return &r_r_rA; 2682 2683 case INDEX_op_mul_i32: 2684 case INDEX_op_mul_i64: 2685 case INDEX_op_div_i32: 2686 case INDEX_op_div_i64: 2687 case INDEX_op_divu_i32: 2688 case INDEX_op_divu_i64: 2689 case INDEX_op_rem_i32: 2690 case INDEX_op_rem_i64: 2691 case INDEX_op_remu_i32: 2692 case INDEX_op_remu_i64: 2693 case INDEX_op_muluh_i64: 2694 case INDEX_op_mulsh_i64: 2695 return &r_r_r; 2696 2697 case INDEX_op_and_i32: 2698 case INDEX_op_and_i64: 2699 case INDEX_op_or_i32: 2700 case INDEX_op_or_i64: 2701 case INDEX_op_xor_i32: 2702 case INDEX_op_xor_i64: 2703 case INDEX_op_andc_i32: 2704 case INDEX_op_andc_i64: 2705 case INDEX_op_orc_i32: 2706 case INDEX_op_orc_i64: 2707 case INDEX_op_eqv_i32: 2708 case INDEX_op_eqv_i64: 2709 return &r_r_rL; 2710 2711 case INDEX_op_shl_i32: 2712 case INDEX_op_shr_i32: 2713 case INDEX_op_sar_i32: 2714 case INDEX_op_rotl_i32: 2715 case INDEX_op_rotr_i32: 2716 case INDEX_op_shl_i64: 2717 case INDEX_op_shr_i64: 2718 case INDEX_op_sar_i64: 2719 case INDEX_op_rotl_i64: 2720 case INDEX_op_rotr_i64: 2721 return &r_r_ri; 2722 2723 case INDEX_op_clz_i32: 2724 case INDEX_op_ctz_i32: 2725 case INDEX_op_clz_i64: 2726 case INDEX_op_ctz_i64: 2727 return &r_r_rAL; 2728 2729 case INDEX_op_brcond_i32: 2730 case INDEX_op_brcond_i64: 2731 return &r_rA; 2732 2733 case INDEX_op_movcond_i32: 2734 case INDEX_op_movcond_i64: 2735 return &movc; 2736 2737 case INDEX_op_qemu_ld_i32: 2738 case INDEX_op_qemu_ld_i64: 2739 return &r_l; 2740 case INDEX_op_qemu_st_i32: 2741 case INDEX_op_qemu_st_i64: 2742 return &lZ_l; 2743 2744 case INDEX_op_deposit_i32: 2745 case INDEX_op_deposit_i64: 2746 return &dep; 2747 2748 case INDEX_op_extract2_i32: 2749 case INDEX_op_extract2_i64: 2750 return &ext2; 2751 2752 case INDEX_op_add2_i32: 2753 case INDEX_op_add2_i64: 2754 case INDEX_op_sub2_i32: 2755 case INDEX_op_sub2_i64: 2756 return &add2; 2757 2758 case INDEX_op_add_vec: 2759 case INDEX_op_sub_vec: 2760 case INDEX_op_mul_vec: 2761 case INDEX_op_xor_vec: 2762 case INDEX_op_ssadd_vec: 2763 case INDEX_op_sssub_vec: 2764 case INDEX_op_usadd_vec: 2765 case INDEX_op_ussub_vec: 2766 case INDEX_op_smax_vec: 2767 case INDEX_op_smin_vec: 2768 case INDEX_op_umax_vec: 2769 case INDEX_op_umin_vec: 2770 case INDEX_op_shlv_vec: 2771 case INDEX_op_shrv_vec: 2772 case INDEX_op_sarv_vec: 2773 case INDEX_op_aa64_sshl_vec: 2774 return &w_w_w; 2775 case INDEX_op_not_vec: 2776 case INDEX_op_neg_vec: 2777 case INDEX_op_abs_vec: 2778 case INDEX_op_shli_vec: 2779 case INDEX_op_shri_vec: 2780 case INDEX_op_sari_vec: 2781 return &w_w; 2782 case INDEX_op_ld_vec: 2783 case INDEX_op_st_vec: 2784 case INDEX_op_dupm_vec: 2785 return &w_r; 2786 case INDEX_op_dup_vec: 2787 return &w_wr; 2788 case INDEX_op_or_vec: 2789 case INDEX_op_andc_vec: 2790 return &w_w_wO; 2791 case INDEX_op_and_vec: 2792 case INDEX_op_orc_vec: 2793 return &w_w_wN; 2794 case INDEX_op_cmp_vec: 2795 return &w_w_wZ; 2796 case INDEX_op_bitsel_vec: 2797 return &w_w_w_w; 2798 case INDEX_op_aa64_sli_vec: 2799 return &w_0_w; 2800 2801 default: 2802 return NULL; 2803 } 2804} 2805 2806static void tcg_target_init(TCGContext *s) 2807{ 2808 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu; 2809 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu; 2810 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 2811 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 2812 2813 tcg_target_call_clobber_regs = -1ull; 2814 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19); 2815 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20); 2816 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21); 2817 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22); 2818 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23); 2819 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24); 2820 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25); 2821 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26); 2822 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27); 2823 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28); 2824 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29); 2825 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 2826 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 2827 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 2828 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 2829 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 2830 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 2831 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 2832 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 2833 2834 s->reserved_regs = 0; 2835 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 2836 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); 2837 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); 2838 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ 2839 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP); 2840} 2841 2842/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ 2843#define PUSH_SIZE ((30 - 19 + 1) * 8) 2844 2845#define FRAME_SIZE \ 2846 ((PUSH_SIZE \ 2847 + TCG_STATIC_CALL_ARGS_SIZE \ 2848 + CPU_TEMP_BUF_NLONGS * sizeof(long) \ 2849 + TCG_TARGET_STACK_ALIGN - 1) \ 2850 & ~(TCG_TARGET_STACK_ALIGN - 1)) 2851 2852/* We're expecting a 2 byte uleb128 encoded value. */ 2853QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 2854 2855/* We're expecting to use a single ADDI insn. */ 2856QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); 2857 2858static void tcg_target_qemu_prologue(TCGContext *s) 2859{ 2860 TCGReg r; 2861 2862 /* Push (FP, LR) and allocate space for all saved registers. */ 2863 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, 2864 TCG_REG_SP, -PUSH_SIZE, 1, 1); 2865 2866 /* Set up frame pointer for canonical unwinding. */ 2867 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); 2868 2869 /* Store callee-preserved regs x19..x28. */ 2870 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 2871 int ofs = (r - TCG_REG_X19 + 2) * 8; 2872 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 2873 } 2874 2875 /* Make stack space for TCG locals. */ 2876 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 2877 FRAME_SIZE - PUSH_SIZE); 2878 2879 /* Inform TCG about how to find TCG locals with register, offset, size. */ 2880 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, 2881 CPU_TEMP_BUF_NLONGS * sizeof(long)); 2882 2883#if !defined(CONFIG_SOFTMMU) 2884 if (USE_GUEST_BASE) { 2885 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); 2886 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); 2887 } 2888#endif 2889 2890 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 2891 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); 2892 2893 /* 2894 * Return path for goto_ptr. Set return value to 0, a-la exit_tb, 2895 * and fall through to the rest of the epilogue. 2896 */ 2897 s->code_gen_epilogue = s->code_ptr; 2898 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0); 2899 2900 /* TB epilogue */ 2901 tb_ret_addr = s->code_ptr; 2902 2903 /* Remove TCG locals stack space. */ 2904 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 2905 FRAME_SIZE - PUSH_SIZE); 2906 2907 /* Restore registers x19..x28. */ 2908 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 2909 int ofs = (r - TCG_REG_X19 + 2) * 8; 2910 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 2911 } 2912 2913 /* Pop (FP, LR), restore SP to previous frame. */ 2914 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, 2915 TCG_REG_SP, PUSH_SIZE, 0, 1); 2916 tcg_out_insn(s, 3207, RET, TCG_REG_LR); 2917} 2918 2919static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 2920{ 2921 int i; 2922 for (i = 0; i < count; ++i) { 2923 p[i] = NOP; 2924 } 2925} 2926 2927typedef struct { 2928 DebugFrameHeader h; 2929 uint8_t fde_def_cfa[4]; 2930 uint8_t fde_reg_ofs[24]; 2931} DebugFrame; 2932 2933#define ELF_HOST_MACHINE EM_AARCH64 2934 2935static const DebugFrame debug_frame = { 2936 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 2937 .h.cie.id = -1, 2938 .h.cie.version = 1, 2939 .h.cie.code_align = 1, 2940 .h.cie.data_align = 0x78, /* sleb128 -8 */ 2941 .h.cie.return_column = TCG_REG_LR, 2942 2943 /* Total FDE size does not include the "len" member. */ 2944 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 2945 2946 .fde_def_cfa = { 2947 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 2948 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 2949 (FRAME_SIZE >> 7) 2950 }, 2951 .fde_reg_ofs = { 2952 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ 2953 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ 2954 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ 2955 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ 2956 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ 2957 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ 2958 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ 2959 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ 2960 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ 2961 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ 2962 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ 2963 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ 2964 } 2965}; 2966 2967void tcg_register_jit(void *buf, size_t buf_size) 2968{ 2969 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 2970} 2971