1/* 2 * Initial TCG Implementation for aarch64 3 * 4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH 5 * Written by Claudio Fontana 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or 8 * (at your option) any later version. 9 * 10 * See the COPYING file in the top-level directory for details. 11 */ 12 13#include "../tcg-ldst.c.inc" 14#include "../tcg-pool.c.inc" 15#include "qemu/bitops.h" 16 17/* We're going to re-use TCGType in setting of the SF bit, which controls 18 the size of the operation performed. If we know the values match, it 19 makes things much cleaner. */ 20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); 21 22#ifdef CONFIG_DEBUG_TCG 23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 24 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 25 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 26 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 27 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp", 28 29 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 30 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 31 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 32 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31", 33}; 34#endif /* CONFIG_DEBUG_TCG */ 35 36static const int tcg_target_reg_alloc_order[] = { 37 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, 38 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, 39 TCG_REG_X28, /* we will reserve this for guest_base if configured */ 40 41 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, 42 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, 43 44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, 46 47 /* X16 reserved as temporary */ 48 /* X17 reserved as temporary */ 49 /* X18 reserved by system */ 50 /* X19 reserved for AREG0 */ 51 /* X29 reserved as fp */ 52 /* X30 reserved as temporary */ 53 54 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, 55 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, 56 /* V8 - V15 are call-saved, and skipped. */ 57 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, 58 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, 59 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, 60 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, 61}; 62 63static const int tcg_target_call_iarg_regs[8] = { 64 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 65 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 66}; 67 68static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) 69{ 70 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); 71 tcg_debug_assert(slot >= 0 && slot <= 1); 72 return TCG_REG_X0 + slot; 73} 74 75#define TCG_REG_TMP0 TCG_REG_X16 76#define TCG_REG_TMP1 TCG_REG_X17 77#define TCG_REG_TMP2 TCG_REG_X30 78#define TCG_VEC_TMP0 TCG_REG_V31 79 80#ifndef CONFIG_SOFTMMU 81#define TCG_REG_GUEST_BASE TCG_REG_X28 82#endif 83 84static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 85{ 86 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 87 ptrdiff_t offset = target - src_rx; 88 89 if (offset == sextract64(offset, 0, 26)) { 90 /* read instruction, mask away previous PC_REL26 parameter contents, 91 set the proper offset, then write back the instruction. */ 92 *src_rw = deposit32(*src_rw, 0, 26, offset); 93 return true; 94 } 95 return false; 96} 97 98static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 99{ 100 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 101 ptrdiff_t offset = target - src_rx; 102 103 if (offset == sextract64(offset, 0, 19)) { 104 *src_rw = deposit32(*src_rw, 5, 19, offset); 105 return true; 106 } 107 return false; 108} 109 110static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 111 intptr_t value, intptr_t addend) 112{ 113 tcg_debug_assert(addend == 0); 114 switch (type) { 115 case R_AARCH64_JUMP26: 116 case R_AARCH64_CALL26: 117 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value); 118 case R_AARCH64_CONDBR19: 119 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value); 120 default: 121 g_assert_not_reached(); 122 } 123} 124 125#define TCG_CT_CONST_AIMM 0x100 126#define TCG_CT_CONST_LIMM 0x200 127#define TCG_CT_CONST_ZERO 0x400 128#define TCG_CT_CONST_MONE 0x800 129#define TCG_CT_CONST_ORRI 0x1000 130#define TCG_CT_CONST_ANDI 0x2000 131 132#define ALL_GENERAL_REGS 0xffffffffu 133#define ALL_VECTOR_REGS 0xffffffff00000000ull 134 135/* Match a constant valid for addition (12-bit, optionally shifted). */ 136static inline bool is_aimm(uint64_t val) 137{ 138 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; 139} 140 141/* Match a constant valid for logical operations. */ 142static inline bool is_limm(uint64_t val) 143{ 144 /* Taking a simplified view of the logical immediates for now, ignoring 145 the replication that can happen across the field. Match bit patterns 146 of the forms 147 0....01....1 148 0..01..10..0 149 and their inverses. */ 150 151 /* Make things easier below, by testing the form with msb clear. */ 152 if ((int64_t)val < 0) { 153 val = ~val; 154 } 155 if (val == 0) { 156 return false; 157 } 158 val += val & -val; 159 return (val & (val - 1)) == 0; 160} 161 162/* Return true if v16 is a valid 16-bit shifted immediate. */ 163static bool is_shimm16(uint16_t v16, int *cmode, int *imm8) 164{ 165 if (v16 == (v16 & 0xff)) { 166 *cmode = 0x8; 167 *imm8 = v16 & 0xff; 168 return true; 169 } else if (v16 == (v16 & 0xff00)) { 170 *cmode = 0xa; 171 *imm8 = v16 >> 8; 172 return true; 173 } 174 return false; 175} 176 177/* Return true if v32 is a valid 32-bit shifted immediate. */ 178static bool is_shimm32(uint32_t v32, int *cmode, int *imm8) 179{ 180 if (v32 == (v32 & 0xff)) { 181 *cmode = 0x0; 182 *imm8 = v32 & 0xff; 183 return true; 184 } else if (v32 == (v32 & 0xff00)) { 185 *cmode = 0x2; 186 *imm8 = (v32 >> 8) & 0xff; 187 return true; 188 } else if (v32 == (v32 & 0xff0000)) { 189 *cmode = 0x4; 190 *imm8 = (v32 >> 16) & 0xff; 191 return true; 192 } else if (v32 == (v32 & 0xff000000)) { 193 *cmode = 0x6; 194 *imm8 = v32 >> 24; 195 return true; 196 } 197 return false; 198} 199 200/* Return true if v32 is a valid 32-bit shifting ones immediate. */ 201static bool is_soimm32(uint32_t v32, int *cmode, int *imm8) 202{ 203 if ((v32 & 0xffff00ff) == 0xff) { 204 *cmode = 0xc; 205 *imm8 = (v32 >> 8) & 0xff; 206 return true; 207 } else if ((v32 & 0xff00ffff) == 0xffff) { 208 *cmode = 0xd; 209 *imm8 = (v32 >> 16) & 0xff; 210 return true; 211 } 212 return false; 213} 214 215/* Return true if v32 is a valid float32 immediate. */ 216static bool is_fimm32(uint32_t v32, int *cmode, int *imm8) 217{ 218 if (extract32(v32, 0, 19) == 0 219 && (extract32(v32, 25, 6) == 0x20 220 || extract32(v32, 25, 6) == 0x1f)) { 221 *cmode = 0xf; 222 *imm8 = (extract32(v32, 31, 1) << 7) 223 | (extract32(v32, 25, 1) << 6) 224 | extract32(v32, 19, 6); 225 return true; 226 } 227 return false; 228} 229 230/* Return true if v64 is a valid float64 immediate. */ 231static bool is_fimm64(uint64_t v64, int *cmode, int *imm8) 232{ 233 if (extract64(v64, 0, 48) == 0 234 && (extract64(v64, 54, 9) == 0x100 235 || extract64(v64, 54, 9) == 0x0ff)) { 236 *cmode = 0xf; 237 *imm8 = (extract64(v64, 63, 1) << 7) 238 | (extract64(v64, 54, 1) << 6) 239 | extract64(v64, 48, 6); 240 return true; 241 } 242 return false; 243} 244 245/* 246 * Return non-zero if v32 can be formed by MOVI+ORR. 247 * Place the parameters for MOVI in (cmode, imm8). 248 * Return the cmode for ORR; the imm8 can be had via extraction from v32. 249 */ 250static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8) 251{ 252 int i; 253 254 for (i = 6; i > 0; i -= 2) { 255 /* Mask out one byte we can add with ORR. */ 256 uint32_t tmp = v32 & ~(0xffu << (i * 4)); 257 if (is_shimm32(tmp, cmode, imm8) || 258 is_soimm32(tmp, cmode, imm8)) { 259 break; 260 } 261 } 262 return i; 263} 264 265/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */ 266static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8) 267{ 268 if (v32 == deposit32(v32, 16, 16, v32)) { 269 return is_shimm16(v32, cmode, imm8); 270 } else { 271 return is_shimm32(v32, cmode, imm8); 272 } 273} 274 275static bool tcg_target_const_match(int64_t val, TCGType type, int ct) 276{ 277 if (ct & TCG_CT_CONST) { 278 return 1; 279 } 280 if (type == TCG_TYPE_I32) { 281 val = (int32_t)val; 282 } 283 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { 284 return 1; 285 } 286 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { 287 return 1; 288 } 289 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 290 return 1; 291 } 292 if ((ct & TCG_CT_CONST_MONE) && val == -1) { 293 return 1; 294 } 295 296 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) { 297 case 0: 298 break; 299 case TCG_CT_CONST_ANDI: 300 val = ~val; 301 /* fallthru */ 302 case TCG_CT_CONST_ORRI: 303 if (val == deposit64(val, 32, 32, val)) { 304 int cmode, imm8; 305 return is_shimm1632(val, &cmode, &imm8); 306 } 307 break; 308 default: 309 /* Both bits should not be set for the same insn. */ 310 g_assert_not_reached(); 311 } 312 313 return 0; 314} 315 316enum aarch64_cond_code { 317 COND_EQ = 0x0, 318 COND_NE = 0x1, 319 COND_CS = 0x2, /* Unsigned greater or equal */ 320 COND_HS = COND_CS, /* ALIAS greater or equal */ 321 COND_CC = 0x3, /* Unsigned less than */ 322 COND_LO = COND_CC, /* ALIAS Lower */ 323 COND_MI = 0x4, /* Negative */ 324 COND_PL = 0x5, /* Zero or greater */ 325 COND_VS = 0x6, /* Overflow */ 326 COND_VC = 0x7, /* No overflow */ 327 COND_HI = 0x8, /* Unsigned greater than */ 328 COND_LS = 0x9, /* Unsigned less or equal */ 329 COND_GE = 0xa, 330 COND_LT = 0xb, 331 COND_GT = 0xc, 332 COND_LE = 0xd, 333 COND_AL = 0xe, 334 COND_NV = 0xf, /* behaves like COND_AL here */ 335}; 336 337static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { 338 [TCG_COND_EQ] = COND_EQ, 339 [TCG_COND_NE] = COND_NE, 340 [TCG_COND_LT] = COND_LT, 341 [TCG_COND_GE] = COND_GE, 342 [TCG_COND_LE] = COND_LE, 343 [TCG_COND_GT] = COND_GT, 344 /* unsigned */ 345 [TCG_COND_LTU] = COND_LO, 346 [TCG_COND_GTU] = COND_HI, 347 [TCG_COND_GEU] = COND_HS, 348 [TCG_COND_LEU] = COND_LS, 349}; 350 351typedef enum { 352 LDST_ST = 0, /* store */ 353 LDST_LD = 1, /* load */ 354 LDST_LD_S_X = 2, /* load and sign-extend into Xt */ 355 LDST_LD_S_W = 3, /* load and sign-extend into Wt */ 356} AArch64LdstType; 357 358/* We encode the format of the insn into the beginning of the name, so that 359 we can have the preprocessor help "typecheck" the insn vs the output 360 function. Arm didn't provide us with nice names for the formats, so we 361 use the section number of the architecture reference manual in which the 362 instruction group is described. */ 363typedef enum { 364 /* Compare and branch (immediate). */ 365 I3201_CBZ = 0x34000000, 366 I3201_CBNZ = 0x35000000, 367 368 /* Conditional branch (immediate). */ 369 I3202_B_C = 0x54000000, 370 371 /* Unconditional branch (immediate). */ 372 I3206_B = 0x14000000, 373 I3206_BL = 0x94000000, 374 375 /* Unconditional branch (register). */ 376 I3207_BR = 0xd61f0000, 377 I3207_BLR = 0xd63f0000, 378 I3207_RET = 0xd65f0000, 379 380 /* AdvSIMD load/store single structure. */ 381 I3303_LD1R = 0x0d40c000, 382 383 /* Load literal for loading the address at pc-relative offset */ 384 I3305_LDR = 0x58000000, 385 I3305_LDR_v64 = 0x5c000000, 386 I3305_LDR_v128 = 0x9c000000, 387 388 /* Load/store exclusive. */ 389 I3306_LDXP = 0xc8600000, 390 I3306_STXP = 0xc8200000, 391 392 /* Load/store register. Described here as 3.3.12, but the helper 393 that emits them can transform to 3.3.10 or 3.3.13. */ 394 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, 395 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, 396 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, 397 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, 398 399 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, 400 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, 401 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, 402 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, 403 404 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, 405 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, 406 407 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, 408 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, 409 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, 410 411 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30, 412 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30, 413 414 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30, 415 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30, 416 417 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30, 418 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30, 419 420 I3312_TO_I3310 = 0x00200800, 421 I3312_TO_I3313 = 0x01000000, 422 423 /* Load/store register pair instructions. */ 424 I3314_LDP = 0x28400000, 425 I3314_STP = 0x28000000, 426 427 /* Add/subtract immediate instructions. */ 428 I3401_ADDI = 0x11000000, 429 I3401_ADDSI = 0x31000000, 430 I3401_SUBI = 0x51000000, 431 I3401_SUBSI = 0x71000000, 432 433 /* Bitfield instructions. */ 434 I3402_BFM = 0x33000000, 435 I3402_SBFM = 0x13000000, 436 I3402_UBFM = 0x53000000, 437 438 /* Extract instruction. */ 439 I3403_EXTR = 0x13800000, 440 441 /* Logical immediate instructions. */ 442 I3404_ANDI = 0x12000000, 443 I3404_ORRI = 0x32000000, 444 I3404_EORI = 0x52000000, 445 I3404_ANDSI = 0x72000000, 446 447 /* Move wide immediate instructions. */ 448 I3405_MOVN = 0x12800000, 449 I3405_MOVZ = 0x52800000, 450 I3405_MOVK = 0x72800000, 451 452 /* PC relative addressing instructions. */ 453 I3406_ADR = 0x10000000, 454 I3406_ADRP = 0x90000000, 455 456 /* Add/subtract extended register instructions. */ 457 I3501_ADD = 0x0b200000, 458 459 /* Add/subtract shifted register instructions (without a shift). */ 460 I3502_ADD = 0x0b000000, 461 I3502_ADDS = 0x2b000000, 462 I3502_SUB = 0x4b000000, 463 I3502_SUBS = 0x6b000000, 464 465 /* Add/subtract shifted register instructions (with a shift). */ 466 I3502S_ADD_LSL = I3502_ADD, 467 468 /* Add/subtract with carry instructions. */ 469 I3503_ADC = 0x1a000000, 470 I3503_SBC = 0x5a000000, 471 472 /* Conditional select instructions. */ 473 I3506_CSEL = 0x1a800000, 474 I3506_CSINC = 0x1a800400, 475 I3506_CSINV = 0x5a800000, 476 I3506_CSNEG = 0x5a800400, 477 478 /* Data-processing (1 source) instructions. */ 479 I3507_CLZ = 0x5ac01000, 480 I3507_RBIT = 0x5ac00000, 481 I3507_REV = 0x5ac00000, /* + size << 10 */ 482 483 /* Data-processing (2 source) instructions. */ 484 I3508_LSLV = 0x1ac02000, 485 I3508_LSRV = 0x1ac02400, 486 I3508_ASRV = 0x1ac02800, 487 I3508_RORV = 0x1ac02c00, 488 I3508_SMULH = 0x9b407c00, 489 I3508_UMULH = 0x9bc07c00, 490 I3508_UDIV = 0x1ac00800, 491 I3508_SDIV = 0x1ac00c00, 492 493 /* Data-processing (3 source) instructions. */ 494 I3509_MADD = 0x1b000000, 495 I3509_MSUB = 0x1b008000, 496 497 /* Logical shifted register instructions (without a shift). */ 498 I3510_AND = 0x0a000000, 499 I3510_BIC = 0x0a200000, 500 I3510_ORR = 0x2a000000, 501 I3510_ORN = 0x2a200000, 502 I3510_EOR = 0x4a000000, 503 I3510_EON = 0x4a200000, 504 I3510_ANDS = 0x6a000000, 505 506 /* Logical shifted register instructions (with a shift). */ 507 I3502S_AND_LSR = I3510_AND | (1 << 22), 508 509 /* AdvSIMD copy */ 510 I3605_DUP = 0x0e000400, 511 I3605_INS = 0x4e001c00, 512 I3605_UMOV = 0x0e003c00, 513 514 /* AdvSIMD modified immediate */ 515 I3606_MOVI = 0x0f000400, 516 I3606_MVNI = 0x2f000400, 517 I3606_BIC = 0x2f001400, 518 I3606_ORR = 0x0f001400, 519 520 /* AdvSIMD scalar shift by immediate */ 521 I3609_SSHR = 0x5f000400, 522 I3609_SSRA = 0x5f001400, 523 I3609_SHL = 0x5f005400, 524 I3609_USHR = 0x7f000400, 525 I3609_USRA = 0x7f001400, 526 I3609_SLI = 0x7f005400, 527 528 /* AdvSIMD scalar three same */ 529 I3611_SQADD = 0x5e200c00, 530 I3611_SQSUB = 0x5e202c00, 531 I3611_CMGT = 0x5e203400, 532 I3611_CMGE = 0x5e203c00, 533 I3611_SSHL = 0x5e204400, 534 I3611_ADD = 0x5e208400, 535 I3611_CMTST = 0x5e208c00, 536 I3611_UQADD = 0x7e200c00, 537 I3611_UQSUB = 0x7e202c00, 538 I3611_CMHI = 0x7e203400, 539 I3611_CMHS = 0x7e203c00, 540 I3611_USHL = 0x7e204400, 541 I3611_SUB = 0x7e208400, 542 I3611_CMEQ = 0x7e208c00, 543 544 /* AdvSIMD scalar two-reg misc */ 545 I3612_CMGT0 = 0x5e208800, 546 I3612_CMEQ0 = 0x5e209800, 547 I3612_CMLT0 = 0x5e20a800, 548 I3612_ABS = 0x5e20b800, 549 I3612_CMGE0 = 0x7e208800, 550 I3612_CMLE0 = 0x7e209800, 551 I3612_NEG = 0x7e20b800, 552 553 /* AdvSIMD shift by immediate */ 554 I3614_SSHR = 0x0f000400, 555 I3614_SSRA = 0x0f001400, 556 I3614_SHL = 0x0f005400, 557 I3614_SLI = 0x2f005400, 558 I3614_USHR = 0x2f000400, 559 I3614_USRA = 0x2f001400, 560 561 /* AdvSIMD three same. */ 562 I3616_ADD = 0x0e208400, 563 I3616_AND = 0x0e201c00, 564 I3616_BIC = 0x0e601c00, 565 I3616_BIF = 0x2ee01c00, 566 I3616_BIT = 0x2ea01c00, 567 I3616_BSL = 0x2e601c00, 568 I3616_EOR = 0x2e201c00, 569 I3616_MUL = 0x0e209c00, 570 I3616_ORR = 0x0ea01c00, 571 I3616_ORN = 0x0ee01c00, 572 I3616_SUB = 0x2e208400, 573 I3616_CMGT = 0x0e203400, 574 I3616_CMGE = 0x0e203c00, 575 I3616_CMTST = 0x0e208c00, 576 I3616_CMHI = 0x2e203400, 577 I3616_CMHS = 0x2e203c00, 578 I3616_CMEQ = 0x2e208c00, 579 I3616_SMAX = 0x0e206400, 580 I3616_SMIN = 0x0e206c00, 581 I3616_SSHL = 0x0e204400, 582 I3616_SQADD = 0x0e200c00, 583 I3616_SQSUB = 0x0e202c00, 584 I3616_UMAX = 0x2e206400, 585 I3616_UMIN = 0x2e206c00, 586 I3616_UQADD = 0x2e200c00, 587 I3616_UQSUB = 0x2e202c00, 588 I3616_USHL = 0x2e204400, 589 590 /* AdvSIMD two-reg misc. */ 591 I3617_CMGT0 = 0x0e208800, 592 I3617_CMEQ0 = 0x0e209800, 593 I3617_CMLT0 = 0x0e20a800, 594 I3617_CMGE0 = 0x2e208800, 595 I3617_CMLE0 = 0x2e209800, 596 I3617_NOT = 0x2e205800, 597 I3617_ABS = 0x0e20b800, 598 I3617_NEG = 0x2e20b800, 599 600 /* System instructions. */ 601 NOP = 0xd503201f, 602 DMB_ISH = 0xd50338bf, 603 DMB_LD = 0x00000100, 604 DMB_ST = 0x00000200, 605} AArch64Insn; 606 607static inline uint32_t tcg_in32(TCGContext *s) 608{ 609 uint32_t v = *(uint32_t *)s->code_ptr; 610 return v; 611} 612 613/* Emit an opcode with "type-checking" of the format. */ 614#define tcg_out_insn(S, FMT, OP, ...) \ 615 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) 616 617static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q, 618 TCGReg rt, TCGReg rn, unsigned size) 619{ 620 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30)); 621} 622 623static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, 624 int imm19, TCGReg rt) 625{ 626 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt); 627} 628 629static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs, 630 TCGReg rt, TCGReg rt2, TCGReg rn) 631{ 632 tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt); 633} 634 635static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, 636 TCGReg rt, int imm19) 637{ 638 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); 639} 640 641static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, 642 TCGCond c, int imm19) 643{ 644 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); 645} 646 647static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) 648{ 649 tcg_out32(s, insn | (imm26 & 0x03ffffff)); 650} 651 652static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) 653{ 654 tcg_out32(s, insn | rn << 5); 655} 656 657static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, 658 TCGReg r1, TCGReg r2, TCGReg rn, 659 tcg_target_long ofs, bool pre, bool w) 660{ 661 insn |= 1u << 31; /* ext */ 662 insn |= pre << 24; 663 insn |= w << 23; 664 665 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); 666 insn |= (ofs & (0x7f << 3)) << (15 - 3); 667 668 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); 669} 670 671static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, 672 TCGReg rd, TCGReg rn, uint64_t aimm) 673{ 674 if (aimm > 0xfff) { 675 tcg_debug_assert((aimm & 0xfff) == 0); 676 aimm >>= 12; 677 tcg_debug_assert(aimm <= 0xfff); 678 aimm |= 1 << 12; /* apply LSL 12 */ 679 } 680 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); 681} 682 683/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 684 (Logical immediate). Both insn groups have N, IMMR and IMMS fields 685 that feed the DecodeBitMasks pseudo function. */ 686static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, 687 TCGReg rd, TCGReg rn, int n, int immr, int imms) 688{ 689 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 690 | rn << 5 | rd); 691} 692 693#define tcg_out_insn_3404 tcg_out_insn_3402 694 695static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, 696 TCGReg rd, TCGReg rn, TCGReg rm, int imms) 697{ 698 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 699 | rn << 5 | rd); 700} 701 702/* This function is used for the Move (wide immediate) instruction group. 703 Note that SHIFT is a full shift count, not the 2 bit HW field. */ 704static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, 705 TCGReg rd, uint16_t half, unsigned shift) 706{ 707 tcg_debug_assert((shift & ~0x30) == 0); 708 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); 709} 710 711static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, 712 TCGReg rd, int64_t disp) 713{ 714 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); 715} 716 717static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn, 718 TCGType sf, TCGReg rd, TCGReg rn, 719 TCGReg rm, int opt, int imm3) 720{ 721 tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 | 722 imm3 << 10 | rn << 5 | rd); 723} 724 725/* This function is for both 3.5.2 (Add/Subtract shifted register), for 726 the rare occasion when we actually want to supply a shift amount. */ 727static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, 728 TCGType ext, TCGReg rd, TCGReg rn, 729 TCGReg rm, int imm6) 730{ 731 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); 732} 733 734/* This function is for 3.5.2 (Add/subtract shifted register), 735 and 3.5.10 (Logical shifted register), for the vast majorty of cases 736 when we don't want to apply a shift. Thus it can also be used for 737 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */ 738static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, 739 TCGReg rd, TCGReg rn, TCGReg rm) 740{ 741 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); 742} 743 744#define tcg_out_insn_3503 tcg_out_insn_3502 745#define tcg_out_insn_3508 tcg_out_insn_3502 746#define tcg_out_insn_3510 tcg_out_insn_3502 747 748static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, 749 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) 750{ 751 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd 752 | tcg_cond_to_aarch64[c] << 12); 753} 754 755static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, 756 TCGReg rd, TCGReg rn) 757{ 758 tcg_out32(s, insn | ext << 31 | rn << 5 | rd); 759} 760 761static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, 762 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) 763{ 764 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); 765} 766 767static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q, 768 TCGReg rd, TCGReg rn, int dst_idx, int src_idx) 769{ 770 /* Note that bit 11 set means general register input. Therefore 771 we can handle both register sets with one function. */ 772 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11) 773 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5); 774} 775 776static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q, 777 TCGReg rd, bool op, int cmode, uint8_t imm8) 778{ 779 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f) 780 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5); 781} 782 783static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn, 784 TCGReg rd, TCGReg rn, unsigned immhb) 785{ 786 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f)); 787} 788 789static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn, 790 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 791{ 792 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16 793 | (rn & 0x1f) << 5 | (rd & 0x1f)); 794} 795 796static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn, 797 unsigned size, TCGReg rd, TCGReg rn) 798{ 799 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f)); 800} 801 802static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q, 803 TCGReg rd, TCGReg rn, unsigned immhb) 804{ 805 tcg_out32(s, insn | q << 30 | immhb << 16 806 | (rn & 0x1f) << 5 | (rd & 0x1f)); 807} 808 809static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q, 810 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 811{ 812 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16 813 | (rn & 0x1f) << 5 | (rd & 0x1f)); 814} 815 816static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q, 817 unsigned size, TCGReg rd, TCGReg rn) 818{ 819 tcg_out32(s, insn | q << 30 | (size << 22) 820 | (rn & 0x1f) << 5 | (rd & 0x1f)); 821} 822 823static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, 824 TCGReg rd, TCGReg base, TCGType ext, 825 TCGReg regoff) 826{ 827 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 828 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | 829 0x4000 | ext << 13 | base << 5 | (rd & 0x1f)); 830} 831 832static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, 833 TCGReg rd, TCGReg rn, intptr_t offset) 834{ 835 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f)); 836} 837 838static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, 839 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) 840{ 841 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 842 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 843 | rn << 5 | (rd & 0x1f)); 844} 845 846/* Register to register move using ORR (shifted register with no shift). */ 847static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) 848{ 849 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); 850} 851 852/* Register to register move using ADDI (move to/from SP). */ 853static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) 854{ 855 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); 856} 857 858/* This function is used for the Logical (immediate) instruction group. 859 The value of LIMM must satisfy IS_LIMM. See the comment above about 860 only supporting simplified logical immediates. */ 861static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, 862 TCGReg rd, TCGReg rn, uint64_t limm) 863{ 864 unsigned h, l, r, c; 865 866 tcg_debug_assert(is_limm(limm)); 867 868 h = clz64(limm); 869 l = ctz64(limm); 870 if (l == 0) { 871 r = 0; /* form 0....01....1 */ 872 c = ctz64(~limm) - 1; 873 if (h == 0) { 874 r = clz64(~limm); /* form 1..10..01..1 */ 875 c += r; 876 } 877 } else { 878 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ 879 c = r - h - 1; 880 } 881 if (ext == TCG_TYPE_I32) { 882 r &= 31; 883 c &= 31; 884 } 885 886 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); 887} 888 889static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 890 TCGReg rd, int64_t v64) 891{ 892 bool q = type == TCG_TYPE_V128; 893 int cmode, imm8, i; 894 895 /* Test all bytes equal first. */ 896 if (vece == MO_8) { 897 imm8 = (uint8_t)v64; 898 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8); 899 return; 900 } 901 902 /* 903 * Test all bytes 0x00 or 0xff second. This can match cases that 904 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below. 905 */ 906 for (i = imm8 = 0; i < 8; i++) { 907 uint8_t byte = v64 >> (i * 8); 908 if (byte == 0xff) { 909 imm8 |= 1 << i; 910 } else if (byte != 0) { 911 goto fail_bytes; 912 } 913 } 914 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8); 915 return; 916 fail_bytes: 917 918 /* 919 * Tests for various replications. For each element width, if we 920 * cannot find an expansion there's no point checking a larger 921 * width because we already know by replication it cannot match. 922 */ 923 if (vece == MO_16) { 924 uint16_t v16 = v64; 925 926 if (is_shimm16(v16, &cmode, &imm8)) { 927 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 928 return; 929 } 930 if (is_shimm16(~v16, &cmode, &imm8)) { 931 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 932 return; 933 } 934 935 /* 936 * Otherwise, all remaining constants can be loaded in two insns: 937 * rd = v16 & 0xff, rd |= v16 & 0xff00. 938 */ 939 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff); 940 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8); 941 return; 942 } else if (vece == MO_32) { 943 uint32_t v32 = v64; 944 uint32_t n32 = ~v32; 945 946 if (is_shimm32(v32, &cmode, &imm8) || 947 is_soimm32(v32, &cmode, &imm8) || 948 is_fimm32(v32, &cmode, &imm8)) { 949 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 950 return; 951 } 952 if (is_shimm32(n32, &cmode, &imm8) || 953 is_soimm32(n32, &cmode, &imm8)) { 954 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 955 return; 956 } 957 958 /* 959 * Restrict the set of constants to those we can load with 960 * two instructions. Others we load from the pool. 961 */ 962 i = is_shimm32_pair(v32, &cmode, &imm8); 963 if (i) { 964 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 965 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8)); 966 return; 967 } 968 i = is_shimm32_pair(n32, &cmode, &imm8); 969 if (i) { 970 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 971 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8)); 972 return; 973 } 974 } else if (is_fimm64(v64, &cmode, &imm8)) { 975 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8); 976 return; 977 } 978 979 /* 980 * As a last resort, load from the constant pool. Sadly there 981 * is no LD1R (literal), so store the full 16-byte vector. 982 */ 983 if (type == TCG_TYPE_V128) { 984 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64); 985 tcg_out_insn(s, 3305, LDR_v128, 0, rd); 986 } else { 987 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0); 988 tcg_out_insn(s, 3305, LDR_v64, 0, rd); 989 } 990} 991 992static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 993 TCGReg rd, TCGReg rs) 994{ 995 int is_q = type - TCG_TYPE_V64; 996 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0); 997 return true; 998} 999 1000static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 1001 TCGReg r, TCGReg base, intptr_t offset) 1002{ 1003 TCGReg temp = TCG_REG_TMP0; 1004 1005 if (offset < -0xffffff || offset > 0xffffff) { 1006 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset); 1007 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base); 1008 base = temp; 1009 } else { 1010 AArch64Insn add_insn = I3401_ADDI; 1011 1012 if (offset < 0) { 1013 add_insn = I3401_SUBI; 1014 offset = -offset; 1015 } 1016 if (offset & 0xfff000) { 1017 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000); 1018 base = temp; 1019 } 1020 if (offset & 0xfff) { 1021 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff); 1022 base = temp; 1023 } 1024 } 1025 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece); 1026 return true; 1027} 1028 1029static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 1030 tcg_target_long value) 1031{ 1032 tcg_target_long svalue = value; 1033 tcg_target_long ivalue = ~value; 1034 tcg_target_long t0, t1, t2; 1035 int s0, s1; 1036 AArch64Insn opc; 1037 1038 switch (type) { 1039 case TCG_TYPE_I32: 1040 case TCG_TYPE_I64: 1041 tcg_debug_assert(rd < 32); 1042 break; 1043 default: 1044 g_assert_not_reached(); 1045 } 1046 1047 /* For 32-bit values, discard potential garbage in value. For 64-bit 1048 values within [2**31, 2**32-1], we can create smaller sequences by 1049 interpreting this as a negative 32-bit number, while ensuring that 1050 the high 32 bits are cleared by setting SF=0. */ 1051 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { 1052 svalue = (int32_t)value; 1053 value = (uint32_t)value; 1054 ivalue = (uint32_t)ivalue; 1055 type = TCG_TYPE_I32; 1056 } 1057 1058 /* Speed things up by handling the common case of small positive 1059 and negative values specially. */ 1060 if ((value & ~0xffffull) == 0) { 1061 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); 1062 return; 1063 } else if ((ivalue & ~0xffffull) == 0) { 1064 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); 1065 return; 1066 } 1067 1068 /* Check for bitfield immediates. For the benefit of 32-bit quantities, 1069 use the sign-extended value. That lets us match rotated values such 1070 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ 1071 if (is_limm(svalue)) { 1072 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); 1073 return; 1074 } 1075 1076 /* Look for host pointer values within 4G of the PC. This happens 1077 often when loading pointers to QEMU's own data structures. */ 1078 if (type == TCG_TYPE_I64) { 1079 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr); 1080 tcg_target_long disp = value - src_rx; 1081 if (disp == sextract64(disp, 0, 21)) { 1082 tcg_out_insn(s, 3406, ADR, rd, disp); 1083 return; 1084 } 1085 disp = (value >> 12) - (src_rx >> 12); 1086 if (disp == sextract64(disp, 0, 21)) { 1087 tcg_out_insn(s, 3406, ADRP, rd, disp); 1088 if (value & 0xfff) { 1089 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); 1090 } 1091 return; 1092 } 1093 } 1094 1095 /* Would it take fewer insns to begin with MOVN? */ 1096 if (ctpop64(value) >= 32) { 1097 t0 = ivalue; 1098 opc = I3405_MOVN; 1099 } else { 1100 t0 = value; 1101 opc = I3405_MOVZ; 1102 } 1103 s0 = ctz64(t0) & (63 & -16); 1104 t1 = t0 & ~(0xffffull << s0); 1105 s1 = ctz64(t1) & (63 & -16); 1106 t2 = t1 & ~(0xffffull << s1); 1107 if (t2 == 0) { 1108 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0); 1109 if (t1 != 0) { 1110 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1); 1111 } 1112 return; 1113 } 1114 1115 /* For more than 2 insns, dump it into the constant pool. */ 1116 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0); 1117 tcg_out_insn(s, 3305, LDR, 0, rd); 1118} 1119 1120static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) 1121{ 1122 return false; 1123} 1124 1125static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, 1126 tcg_target_long imm) 1127{ 1128 /* This function is only used for passing structs by reference. */ 1129 g_assert_not_reached(); 1130} 1131 1132/* Define something more legible for general use. */ 1133#define tcg_out_ldst_r tcg_out_insn_3310 1134 1135static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd, 1136 TCGReg rn, intptr_t offset, int lgsize) 1137{ 1138 /* If the offset is naturally aligned and in range, then we can 1139 use the scaled uimm12 encoding */ 1140 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) { 1141 uintptr_t scaled_uimm = offset >> lgsize; 1142 if (scaled_uimm <= 0xfff) { 1143 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); 1144 return; 1145 } 1146 } 1147 1148 /* Small signed offsets can use the unscaled encoding. */ 1149 if (offset >= -256 && offset < 256) { 1150 tcg_out_insn_3312(s, insn, rd, rn, offset); 1151 return; 1152 } 1153 1154 /* Worst-case scenario, move offset to temp register, use reg offset. */ 1155 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset); 1156 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0); 1157} 1158 1159static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 1160{ 1161 if (ret == arg) { 1162 return true; 1163 } 1164 switch (type) { 1165 case TCG_TYPE_I32: 1166 case TCG_TYPE_I64: 1167 if (ret < 32 && arg < 32) { 1168 tcg_out_movr(s, type, ret, arg); 1169 break; 1170 } else if (ret < 32) { 1171 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0); 1172 break; 1173 } else if (arg < 32) { 1174 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0); 1175 break; 1176 } 1177 /* FALLTHRU */ 1178 1179 case TCG_TYPE_V64: 1180 tcg_debug_assert(ret >= 32 && arg >= 32); 1181 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg); 1182 break; 1183 case TCG_TYPE_V128: 1184 tcg_debug_assert(ret >= 32 && arg >= 32); 1185 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg); 1186 break; 1187 1188 default: 1189 g_assert_not_reached(); 1190 } 1191 return true; 1192} 1193 1194static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1195 TCGReg base, intptr_t ofs) 1196{ 1197 AArch64Insn insn; 1198 int lgsz; 1199 1200 switch (type) { 1201 case TCG_TYPE_I32: 1202 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS); 1203 lgsz = 2; 1204 break; 1205 case TCG_TYPE_I64: 1206 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD); 1207 lgsz = 3; 1208 break; 1209 case TCG_TYPE_V64: 1210 insn = I3312_LDRVD; 1211 lgsz = 3; 1212 break; 1213 case TCG_TYPE_V128: 1214 insn = I3312_LDRVQ; 1215 lgsz = 4; 1216 break; 1217 default: 1218 g_assert_not_reached(); 1219 } 1220 tcg_out_ldst(s, insn, ret, base, ofs, lgsz); 1221} 1222 1223static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src, 1224 TCGReg base, intptr_t ofs) 1225{ 1226 AArch64Insn insn; 1227 int lgsz; 1228 1229 switch (type) { 1230 case TCG_TYPE_I32: 1231 insn = (src < 32 ? I3312_STRW : I3312_STRVS); 1232 lgsz = 2; 1233 break; 1234 case TCG_TYPE_I64: 1235 insn = (src < 32 ? I3312_STRX : I3312_STRVD); 1236 lgsz = 3; 1237 break; 1238 case TCG_TYPE_V64: 1239 insn = I3312_STRVD; 1240 lgsz = 3; 1241 break; 1242 case TCG_TYPE_V128: 1243 insn = I3312_STRVQ; 1244 lgsz = 4; 1245 break; 1246 default: 1247 g_assert_not_reached(); 1248 } 1249 tcg_out_ldst(s, insn, src, base, ofs, lgsz); 1250} 1251 1252static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1253 TCGReg base, intptr_t ofs) 1254{ 1255 if (type <= TCG_TYPE_I64 && val == 0) { 1256 tcg_out_st(s, type, TCG_REG_XZR, base, ofs); 1257 return true; 1258 } 1259 return false; 1260} 1261 1262static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, 1263 TCGReg rn, unsigned int a, unsigned int b) 1264{ 1265 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); 1266} 1267 1268static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, 1269 TCGReg rn, unsigned int a, unsigned int b) 1270{ 1271 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); 1272} 1273 1274static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, 1275 TCGReg rn, unsigned int a, unsigned int b) 1276{ 1277 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); 1278} 1279 1280static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, 1281 TCGReg rn, TCGReg rm, unsigned int a) 1282{ 1283 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); 1284} 1285 1286static inline void tcg_out_shl(TCGContext *s, TCGType ext, 1287 TCGReg rd, TCGReg rn, unsigned int m) 1288{ 1289 int bits = ext ? 64 : 32; 1290 int max = bits - 1; 1291 tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max); 1292} 1293 1294static inline void tcg_out_shr(TCGContext *s, TCGType ext, 1295 TCGReg rd, TCGReg rn, unsigned int m) 1296{ 1297 int max = ext ? 63 : 31; 1298 tcg_out_ubfm(s, ext, rd, rn, m & max, max); 1299} 1300 1301static inline void tcg_out_sar(TCGContext *s, TCGType ext, 1302 TCGReg rd, TCGReg rn, unsigned int m) 1303{ 1304 int max = ext ? 63 : 31; 1305 tcg_out_sbfm(s, ext, rd, rn, m & max, max); 1306} 1307 1308static inline void tcg_out_rotr(TCGContext *s, TCGType ext, 1309 TCGReg rd, TCGReg rn, unsigned int m) 1310{ 1311 int max = ext ? 63 : 31; 1312 tcg_out_extr(s, ext, rd, rn, rn, m & max); 1313} 1314 1315static inline void tcg_out_rotl(TCGContext *s, TCGType ext, 1316 TCGReg rd, TCGReg rn, unsigned int m) 1317{ 1318 int max = ext ? 63 : 31; 1319 tcg_out_extr(s, ext, rd, rn, rn, -m & max); 1320} 1321 1322static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, 1323 TCGReg rn, unsigned lsb, unsigned width) 1324{ 1325 unsigned size = ext ? 64 : 32; 1326 unsigned a = (size - lsb) & (size - 1); 1327 unsigned b = width - 1; 1328 tcg_out_bfm(s, ext, rd, rn, a, b); 1329} 1330 1331static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, 1332 tcg_target_long b, bool const_b) 1333{ 1334 if (const_b) { 1335 /* Using CMP or CMN aliases. */ 1336 if (b >= 0) { 1337 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); 1338 } else { 1339 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); 1340 } 1341 } else { 1342 /* Using CMP alias SUBS wzr, Wn, Wm */ 1343 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); 1344 } 1345} 1346 1347static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) 1348{ 1349 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1350 tcg_debug_assert(offset == sextract64(offset, 0, 26)); 1351 tcg_out_insn(s, 3206, B, offset); 1352} 1353 1354static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target) 1355{ 1356 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1357 if (offset == sextract64(offset, 0, 26)) { 1358 tcg_out_insn(s, 3206, B, offset); 1359 } else { 1360 /* Choose X9 as a call-clobbered non-LR temporary. */ 1361 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target); 1362 tcg_out_insn(s, 3207, BR, TCG_REG_X9); 1363 } 1364} 1365 1366static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target) 1367{ 1368 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1369 if (offset == sextract64(offset, 0, 26)) { 1370 tcg_out_insn(s, 3206, BL, offset); 1371 } else { 1372 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target); 1373 tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0); 1374 } 1375} 1376 1377static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 1378 const TCGHelperInfo *info) 1379{ 1380 tcg_out_call_int(s, target); 1381} 1382 1383static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) 1384{ 1385 if (!l->has_value) { 1386 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); 1387 tcg_out_insn(s, 3206, B, 0); 1388 } else { 1389 tcg_out_goto(s, l->u.value_ptr); 1390 } 1391} 1392 1393static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, 1394 TCGArg b, bool b_const, TCGLabel *l) 1395{ 1396 intptr_t offset; 1397 bool need_cmp; 1398 1399 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) { 1400 need_cmp = false; 1401 } else { 1402 need_cmp = true; 1403 tcg_out_cmp(s, ext, a, b, b_const); 1404 } 1405 1406 if (!l->has_value) { 1407 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1408 offset = tcg_in32(s) >> 5; 1409 } else { 1410 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2; 1411 tcg_debug_assert(offset == sextract64(offset, 0, 19)); 1412 } 1413 1414 if (need_cmp) { 1415 tcg_out_insn(s, 3202, B_C, c, offset); 1416 } else if (c == TCG_COND_EQ) { 1417 tcg_out_insn(s, 3201, CBZ, ext, a, offset); 1418 } else { 1419 tcg_out_insn(s, 3201, CBNZ, ext, a, offset); 1420 } 1421} 1422 1423static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits, 1424 TCGReg rd, TCGReg rn) 1425{ 1426 /* REV, REV16, REV32 */ 1427 tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn); 1428} 1429 1430static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits, 1431 TCGReg rd, TCGReg rn) 1432{ 1433 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ 1434 int bits = (8 << s_bits) - 1; 1435 tcg_out_sbfm(s, ext, rd, rn, 0, bits); 1436} 1437 1438static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn) 1439{ 1440 tcg_out_sxt(s, type, MO_8, rd, rn); 1441} 1442 1443static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn) 1444{ 1445 tcg_out_sxt(s, type, MO_16, rd, rn); 1446} 1447 1448static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn) 1449{ 1450 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn); 1451} 1452 1453static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn) 1454{ 1455 tcg_out_ext32s(s, rd, rn); 1456} 1457 1458static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits, 1459 TCGReg rd, TCGReg rn) 1460{ 1461 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ 1462 int bits = (8 << s_bits) - 1; 1463 tcg_out_ubfm(s, 0, rd, rn, 0, bits); 1464} 1465 1466static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn) 1467{ 1468 tcg_out_uxt(s, MO_8, rd, rn); 1469} 1470 1471static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn) 1472{ 1473 tcg_out_uxt(s, MO_16, rd, rn); 1474} 1475 1476static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn) 1477{ 1478 tcg_out_movr(s, TCG_TYPE_I32, rd, rn); 1479} 1480 1481static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn) 1482{ 1483 tcg_out_ext32u(s, rd, rn); 1484} 1485 1486static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) 1487{ 1488 tcg_out_mov(s, TCG_TYPE_I32, rd, rn); 1489} 1490 1491static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, 1492 TCGReg rn, int64_t aimm) 1493{ 1494 if (aimm >= 0) { 1495 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); 1496 } else { 1497 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); 1498 } 1499} 1500 1501static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, 1502 TCGReg rh, TCGReg al, TCGReg ah, 1503 tcg_target_long bl, tcg_target_long bh, 1504 bool const_bl, bool const_bh, bool sub) 1505{ 1506 TCGReg orig_rl = rl; 1507 AArch64Insn insn; 1508 1509 if (rl == ah || (!const_bh && rl == bh)) { 1510 rl = TCG_REG_TMP0; 1511 } 1512 1513 if (const_bl) { 1514 if (bl < 0) { 1515 bl = -bl; 1516 insn = sub ? I3401_ADDSI : I3401_SUBSI; 1517 } else { 1518 insn = sub ? I3401_SUBSI : I3401_ADDSI; 1519 } 1520 1521 if (unlikely(al == TCG_REG_XZR)) { 1522 /* ??? We want to allow al to be zero for the benefit of 1523 negation via subtraction. However, that leaves open the 1524 possibility of adding 0+const in the low part, and the 1525 immediate add instructions encode XSP not XZR. Don't try 1526 anything more elaborate here than loading another zero. */ 1527 al = TCG_REG_TMP0; 1528 tcg_out_movi(s, ext, al, 0); 1529 } 1530 tcg_out_insn_3401(s, insn, ext, rl, al, bl); 1531 } else { 1532 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); 1533 } 1534 1535 insn = I3503_ADC; 1536 if (const_bh) { 1537 /* Note that the only two constants we support are 0 and -1, and 1538 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ 1539 if ((bh != 0) ^ sub) { 1540 insn = I3503_SBC; 1541 } 1542 bh = TCG_REG_XZR; 1543 } else if (sub) { 1544 insn = I3503_SBC; 1545 } 1546 tcg_out_insn_3503(s, insn, ext, rh, ah, bh); 1547 1548 tcg_out_mov(s, ext, orig_rl, rl); 1549} 1550 1551static inline void tcg_out_mb(TCGContext *s, TCGArg a0) 1552{ 1553 static const uint32_t sync[] = { 1554 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST, 1555 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST, 1556 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1557 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD, 1558 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1559 }; 1560 tcg_out32(s, sync[a0 & TCG_MO_ALL]); 1561} 1562 1563static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, 1564 TCGReg a0, TCGArg b, bool const_b, bool is_ctz) 1565{ 1566 TCGReg a1 = a0; 1567 if (is_ctz) { 1568 a1 = TCG_REG_TMP0; 1569 tcg_out_insn(s, 3507, RBIT, ext, a1, a0); 1570 } 1571 if (const_b && b == (ext ? 64 : 32)) { 1572 tcg_out_insn(s, 3507, CLZ, ext, d, a1); 1573 } else { 1574 AArch64Insn sel = I3506_CSEL; 1575 1576 tcg_out_cmp(s, ext, a0, 0, 1); 1577 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1); 1578 1579 if (const_b) { 1580 if (b == -1) { 1581 b = TCG_REG_XZR; 1582 sel = I3506_CSINV; 1583 } else if (b == 0) { 1584 b = TCG_REG_XZR; 1585 } else { 1586 tcg_out_movi(s, ext, d, b); 1587 b = d; 1588 } 1589 } 1590 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE); 1591 } 1592} 1593 1594typedef struct { 1595 TCGReg base; 1596 TCGReg index; 1597 TCGType index_ext; 1598 TCGAtomAlign aa; 1599} HostAddress; 1600 1601bool tcg_target_has_memory_bswap(MemOp memop) 1602{ 1603 return false; 1604} 1605 1606static const TCGLdstHelperParam ldst_helper_param = { 1607 .ntmp = 1, .tmp = { TCG_REG_TMP0 } 1608}; 1609 1610static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1611{ 1612 MemOp opc = get_memop(lb->oi); 1613 1614 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1615 return false; 1616 } 1617 1618 tcg_out_ld_helper_args(s, lb, &ldst_helper_param); 1619 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); 1620 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); 1621 tcg_out_goto(s, lb->raddr); 1622 return true; 1623} 1624 1625static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1626{ 1627 MemOp opc = get_memop(lb->oi); 1628 1629 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1630 return false; 1631 } 1632 1633 tcg_out_st_helper_args(s, lb, &ldst_helper_param); 1634 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]); 1635 tcg_out_goto(s, lb->raddr); 1636 return true; 1637} 1638 1639/* 1640 * For softmmu, perform the TLB load and compare. 1641 * For useronly, perform any required alignment tests. 1642 * In both cases, return a TCGLabelQemuLdst structure if the slow path 1643 * is required and fill in @h with the host address for the fast path. 1644 */ 1645static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, 1646 TCGReg addr_reg, MemOpIdx oi, 1647 bool is_ld) 1648{ 1649 TCGType addr_type = s->addr_type; 1650 TCGLabelQemuLdst *ldst = NULL; 1651 MemOp opc = get_memop(oi); 1652 MemOp s_bits = opc & MO_SIZE; 1653 unsigned a_mask; 1654 1655 h->aa = atom_and_align_for_opc(s, opc, 1656 have_lse2 ? MO_ATOM_WITHIN16 1657 : MO_ATOM_IFALIGN, 1658 s_bits == MO_128); 1659 a_mask = (1 << h->aa.align) - 1; 1660 1661#ifdef CONFIG_SOFTMMU 1662 unsigned s_mask = (1u << s_bits) - 1; 1663 unsigned mem_index = get_mmuidx(oi); 1664 TCGReg addr_adj; 1665 TCGType mask_type; 1666 uint64_t compare_mask; 1667 1668 ldst = new_ldst_label(s); 1669 ldst->is_ld = is_ld; 1670 ldst->oi = oi; 1671 ldst->addrlo_reg = addr_reg; 1672 1673 mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32 1674 ? TCG_TYPE_I64 : TCG_TYPE_I32); 1675 1676 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {tmp0,tmp1}. */ 1677 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); 1678 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512); 1679 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); 1680 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); 1681 tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0, 1682 TLB_MASK_TABLE_OFS(mem_index), 1, 0); 1683 1684 /* Extract the TLB index from the address into X0. */ 1685 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, 1686 TCG_REG_TMP0, TCG_REG_TMP0, addr_reg, 1687 s->page_bits - CPU_TLB_ENTRY_BITS); 1688 1689 /* Add the tlb_table pointer, forming the CPUTLBEntry address in TMP1. */ 1690 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0); 1691 1692 /* Load the tlb comparator into TMP0, and the fast path addend into TMP1. */ 1693 tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1, 1694 is_ld ? offsetof(CPUTLBEntry, addr_read) 1695 : offsetof(CPUTLBEntry, addr_write)); 1696 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, 1697 offsetof(CPUTLBEntry, addend)); 1698 1699 /* 1700 * For aligned accesses, we check the first byte and include the alignment 1701 * bits within the address. For unaligned access, we check that we don't 1702 * cross pages using the address of the last byte of the access. 1703 */ 1704 if (a_mask >= s_mask) { 1705 addr_adj = addr_reg; 1706 } else { 1707 addr_adj = TCG_REG_TMP2; 1708 tcg_out_insn(s, 3401, ADDI, addr_type, 1709 addr_adj, addr_reg, s_mask - a_mask); 1710 } 1711 compare_mask = (uint64_t)s->page_mask | a_mask; 1712 1713 /* Store the page mask part of the address into TMP2. */ 1714 tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2, 1715 addr_adj, compare_mask); 1716 1717 /* Perform the address comparison. */ 1718 tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0); 1719 1720 /* If not equal, we jump to the slow path. */ 1721 ldst->label_ptr[0] = s->code_ptr; 1722 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1723 1724 h->base = TCG_REG_TMP1; 1725 h->index = addr_reg; 1726 h->index_ext = addr_type; 1727#else 1728 if (a_mask) { 1729 ldst = new_ldst_label(s); 1730 1731 ldst->is_ld = is_ld; 1732 ldst->oi = oi; 1733 ldst->addrlo_reg = addr_reg; 1734 1735 /* tst addr, #mask */ 1736 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask); 1737 1738 /* b.ne slow_path */ 1739 ldst->label_ptr[0] = s->code_ptr; 1740 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1741 } 1742 1743 if (guest_base || addr_type == TCG_TYPE_I32) { 1744 h->base = TCG_REG_GUEST_BASE; 1745 h->index = addr_reg; 1746 h->index_ext = addr_type; 1747 } else { 1748 h->base = addr_reg; 1749 h->index = TCG_REG_XZR; 1750 h->index_ext = TCG_TYPE_I64; 1751 } 1752#endif 1753 1754 return ldst; 1755} 1756 1757static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, 1758 TCGReg data_r, HostAddress h) 1759{ 1760 switch (memop & MO_SSIZE) { 1761 case MO_UB: 1762 tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index); 1763 break; 1764 case MO_SB: 1765 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, 1766 data_r, h.base, h.index_ext, h.index); 1767 break; 1768 case MO_UW: 1769 tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index); 1770 break; 1771 case MO_SW: 1772 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), 1773 data_r, h.base, h.index_ext, h.index); 1774 break; 1775 case MO_UL: 1776 tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index); 1777 break; 1778 case MO_SL: 1779 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index); 1780 break; 1781 case MO_UQ: 1782 tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index); 1783 break; 1784 default: 1785 g_assert_not_reached(); 1786 } 1787} 1788 1789static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, 1790 TCGReg data_r, HostAddress h) 1791{ 1792 switch (memop & MO_SIZE) { 1793 case MO_8: 1794 tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index); 1795 break; 1796 case MO_16: 1797 tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index); 1798 break; 1799 case MO_32: 1800 tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index); 1801 break; 1802 case MO_64: 1803 tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index); 1804 break; 1805 default: 1806 g_assert_not_reached(); 1807 } 1808} 1809 1810static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1811 MemOpIdx oi, TCGType data_type) 1812{ 1813 TCGLabelQemuLdst *ldst; 1814 HostAddress h; 1815 1816 ldst = prepare_host_addr(s, &h, addr_reg, oi, true); 1817 tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h); 1818 1819 if (ldst) { 1820 ldst->type = data_type; 1821 ldst->datalo_reg = data_reg; 1822 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1823 } 1824} 1825 1826static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1827 MemOpIdx oi, TCGType data_type) 1828{ 1829 TCGLabelQemuLdst *ldst; 1830 HostAddress h; 1831 1832 ldst = prepare_host_addr(s, &h, addr_reg, oi, false); 1833 tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h); 1834 1835 if (ldst) { 1836 ldst->type = data_type; 1837 ldst->datalo_reg = data_reg; 1838 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1839 } 1840} 1841 1842static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, 1843 TCGReg addr_reg, MemOpIdx oi, bool is_ld) 1844{ 1845 TCGLabelQemuLdst *ldst; 1846 HostAddress h; 1847 TCGReg base; 1848 bool use_pair; 1849 1850 ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld); 1851 1852 /* Compose the final address, as LDP/STP have no indexing. */ 1853 if (h.index == TCG_REG_XZR) { 1854 base = h.base; 1855 } else { 1856 base = TCG_REG_TMP2; 1857 if (h.index_ext == TCG_TYPE_I32) { 1858 /* add base, base, index, uxtw */ 1859 tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base, 1860 h.base, h.index, MO_32, 0); 1861 } else { 1862 /* add base, base, index */ 1863 tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index); 1864 } 1865 } 1866 1867 use_pair = h.aa.atom < MO_128 || have_lse2; 1868 1869 if (!use_pair) { 1870 tcg_insn_unit *branch = NULL; 1871 TCGReg ll, lh, sl, sh; 1872 1873 /* 1874 * If we have already checked for 16-byte alignment, that's all 1875 * we need. Otherwise we have determined that misaligned atomicity 1876 * may be handled with two 8-byte loads. 1877 */ 1878 if (h.aa.align < MO_128) { 1879 /* 1880 * TODO: align should be MO_64, so we only need test bit 3, 1881 * which means we could use TBNZ instead of ANDS+B_C. 1882 */ 1883 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15); 1884 branch = s->code_ptr; 1885 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1886 use_pair = true; 1887 } 1888 1889 if (is_ld) { 1890 /* 1891 * 16-byte atomicity without LSE2 requires LDXP+STXP loop: 1892 * ldxp lo, hi, [base] 1893 * stxp t0, lo, hi, [base] 1894 * cbnz t0, .-8 1895 * Require no overlap between data{lo,hi} and base. 1896 */ 1897 if (datalo == base || datahi == base) { 1898 tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base); 1899 base = TCG_REG_TMP2; 1900 } 1901 ll = sl = datalo; 1902 lh = sh = datahi; 1903 } else { 1904 /* 1905 * 16-byte atomicity without LSE2 requires LDXP+STXP loop: 1906 * 1: ldxp t0, t1, [base] 1907 * stxp t0, lo, hi, [base] 1908 * cbnz t0, 1b 1909 */ 1910 tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1); 1911 ll = TCG_REG_TMP0; 1912 lh = TCG_REG_TMP1; 1913 sl = datalo; 1914 sh = datahi; 1915 } 1916 1917 tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base); 1918 tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base); 1919 tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2); 1920 1921 if (use_pair) { 1922 /* "b .+8", branching across the one insn of use_pair. */ 1923 tcg_out_insn(s, 3206, B, 2); 1924 reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr)); 1925 } 1926 } 1927 1928 if (use_pair) { 1929 if (is_ld) { 1930 tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0); 1931 } else { 1932 tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0); 1933 } 1934 } 1935 1936 if (ldst) { 1937 ldst->type = TCG_TYPE_I128; 1938 ldst->datalo_reg = datalo; 1939 ldst->datahi_reg = datahi; 1940 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1941 } 1942} 1943 1944static const tcg_insn_unit *tb_ret_addr; 1945 1946static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) 1947{ 1948 /* Reuse the zeroing that exists for goto_ptr. */ 1949 if (a0 == 0) { 1950 tcg_out_goto_long(s, tcg_code_gen_epilogue); 1951 } else { 1952 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); 1953 tcg_out_goto_long(s, tb_ret_addr); 1954 } 1955} 1956 1957static void tcg_out_goto_tb(TCGContext *s, int which) 1958{ 1959 /* 1960 * Direct branch, or indirect address load, will be patched 1961 * by tb_target_set_jmp_target. Assert indirect load offset 1962 * in range early, regardless of direct branch distance. 1963 */ 1964 intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which)); 1965 tcg_debug_assert(i_off == sextract64(i_off, 0, 21)); 1966 1967 set_jmp_insn_offset(s, which); 1968 tcg_out32(s, I3206_B); 1969 tcg_out_insn(s, 3207, BR, TCG_REG_TMP0); 1970 set_jmp_reset_offset(s, which); 1971} 1972 1973void tb_target_set_jmp_target(const TranslationBlock *tb, int n, 1974 uintptr_t jmp_rx, uintptr_t jmp_rw) 1975{ 1976 uintptr_t d_addr = tb->jmp_target_addr[n]; 1977 ptrdiff_t d_offset = d_addr - jmp_rx; 1978 tcg_insn_unit insn; 1979 1980 /* Either directly branch, or indirect branch load. */ 1981 if (d_offset == sextract64(d_offset, 0, 28)) { 1982 insn = deposit32(I3206_B, 0, 26, d_offset >> 2); 1983 } else { 1984 uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n]; 1985 ptrdiff_t i_offset = i_addr - jmp_rx; 1986 1987 /* Note that we asserted this in range in tcg_out_goto_tb. */ 1988 insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2); 1989 } 1990 qatomic_set((uint32_t *)jmp_rw, insn); 1991 flush_idcache_range(jmp_rx, jmp_rw, 4); 1992} 1993 1994static void tcg_out_op(TCGContext *s, TCGOpcode opc, 1995 const TCGArg args[TCG_MAX_OP_ARGS], 1996 const int const_args[TCG_MAX_OP_ARGS]) 1997{ 1998 /* 99% of the time, we can signal the use of extension registers 1999 by looking to see if the opcode handles 64-bit data. */ 2000 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0; 2001 2002 /* Hoist the loads of the most common arguments. */ 2003 TCGArg a0 = args[0]; 2004 TCGArg a1 = args[1]; 2005 TCGArg a2 = args[2]; 2006 int c2 = const_args[2]; 2007 2008 /* Some operands are defined with "rZ" constraint, a register or 2009 the zero register. These need not actually test args[I] == 0. */ 2010#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) 2011 2012 switch (opc) { 2013 case INDEX_op_goto_ptr: 2014 tcg_out_insn(s, 3207, BR, a0); 2015 break; 2016 2017 case INDEX_op_br: 2018 tcg_out_goto_label(s, arg_label(a0)); 2019 break; 2020 2021 case INDEX_op_ld8u_i32: 2022 case INDEX_op_ld8u_i64: 2023 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0); 2024 break; 2025 case INDEX_op_ld8s_i32: 2026 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0); 2027 break; 2028 case INDEX_op_ld8s_i64: 2029 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0); 2030 break; 2031 case INDEX_op_ld16u_i32: 2032 case INDEX_op_ld16u_i64: 2033 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1); 2034 break; 2035 case INDEX_op_ld16s_i32: 2036 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1); 2037 break; 2038 case INDEX_op_ld16s_i64: 2039 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1); 2040 break; 2041 case INDEX_op_ld_i32: 2042 case INDEX_op_ld32u_i64: 2043 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2); 2044 break; 2045 case INDEX_op_ld32s_i64: 2046 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2); 2047 break; 2048 case INDEX_op_ld_i64: 2049 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3); 2050 break; 2051 2052 case INDEX_op_st8_i32: 2053 case INDEX_op_st8_i64: 2054 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0); 2055 break; 2056 case INDEX_op_st16_i32: 2057 case INDEX_op_st16_i64: 2058 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1); 2059 break; 2060 case INDEX_op_st_i32: 2061 case INDEX_op_st32_i64: 2062 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2); 2063 break; 2064 case INDEX_op_st_i64: 2065 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3); 2066 break; 2067 2068 case INDEX_op_add_i32: 2069 a2 = (int32_t)a2; 2070 /* FALLTHRU */ 2071 case INDEX_op_add_i64: 2072 if (c2) { 2073 tcg_out_addsubi(s, ext, a0, a1, a2); 2074 } else { 2075 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); 2076 } 2077 break; 2078 2079 case INDEX_op_sub_i32: 2080 a2 = (int32_t)a2; 2081 /* FALLTHRU */ 2082 case INDEX_op_sub_i64: 2083 if (c2) { 2084 tcg_out_addsubi(s, ext, a0, a1, -a2); 2085 } else { 2086 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); 2087 } 2088 break; 2089 2090 case INDEX_op_neg_i64: 2091 case INDEX_op_neg_i32: 2092 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); 2093 break; 2094 2095 case INDEX_op_and_i32: 2096 a2 = (int32_t)a2; 2097 /* FALLTHRU */ 2098 case INDEX_op_and_i64: 2099 if (c2) { 2100 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); 2101 } else { 2102 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); 2103 } 2104 break; 2105 2106 case INDEX_op_andc_i32: 2107 a2 = (int32_t)a2; 2108 /* FALLTHRU */ 2109 case INDEX_op_andc_i64: 2110 if (c2) { 2111 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); 2112 } else { 2113 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); 2114 } 2115 break; 2116 2117 case INDEX_op_or_i32: 2118 a2 = (int32_t)a2; 2119 /* FALLTHRU */ 2120 case INDEX_op_or_i64: 2121 if (c2) { 2122 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); 2123 } else { 2124 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); 2125 } 2126 break; 2127 2128 case INDEX_op_orc_i32: 2129 a2 = (int32_t)a2; 2130 /* FALLTHRU */ 2131 case INDEX_op_orc_i64: 2132 if (c2) { 2133 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); 2134 } else { 2135 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); 2136 } 2137 break; 2138 2139 case INDEX_op_xor_i32: 2140 a2 = (int32_t)a2; 2141 /* FALLTHRU */ 2142 case INDEX_op_xor_i64: 2143 if (c2) { 2144 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); 2145 } else { 2146 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); 2147 } 2148 break; 2149 2150 case INDEX_op_eqv_i32: 2151 a2 = (int32_t)a2; 2152 /* FALLTHRU */ 2153 case INDEX_op_eqv_i64: 2154 if (c2) { 2155 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); 2156 } else { 2157 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); 2158 } 2159 break; 2160 2161 case INDEX_op_not_i64: 2162 case INDEX_op_not_i32: 2163 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); 2164 break; 2165 2166 case INDEX_op_mul_i64: 2167 case INDEX_op_mul_i32: 2168 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); 2169 break; 2170 2171 case INDEX_op_div_i64: 2172 case INDEX_op_div_i32: 2173 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); 2174 break; 2175 case INDEX_op_divu_i64: 2176 case INDEX_op_divu_i32: 2177 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); 2178 break; 2179 2180 case INDEX_op_rem_i64: 2181 case INDEX_op_rem_i32: 2182 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2); 2183 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1); 2184 break; 2185 case INDEX_op_remu_i64: 2186 case INDEX_op_remu_i32: 2187 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2); 2188 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1); 2189 break; 2190 2191 case INDEX_op_shl_i64: 2192 case INDEX_op_shl_i32: 2193 if (c2) { 2194 tcg_out_shl(s, ext, a0, a1, a2); 2195 } else { 2196 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); 2197 } 2198 break; 2199 2200 case INDEX_op_shr_i64: 2201 case INDEX_op_shr_i32: 2202 if (c2) { 2203 tcg_out_shr(s, ext, a0, a1, a2); 2204 } else { 2205 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); 2206 } 2207 break; 2208 2209 case INDEX_op_sar_i64: 2210 case INDEX_op_sar_i32: 2211 if (c2) { 2212 tcg_out_sar(s, ext, a0, a1, a2); 2213 } else { 2214 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); 2215 } 2216 break; 2217 2218 case INDEX_op_rotr_i64: 2219 case INDEX_op_rotr_i32: 2220 if (c2) { 2221 tcg_out_rotr(s, ext, a0, a1, a2); 2222 } else { 2223 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); 2224 } 2225 break; 2226 2227 case INDEX_op_rotl_i64: 2228 case INDEX_op_rotl_i32: 2229 if (c2) { 2230 tcg_out_rotl(s, ext, a0, a1, a2); 2231 } else { 2232 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2); 2233 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0); 2234 } 2235 break; 2236 2237 case INDEX_op_clz_i64: 2238 case INDEX_op_clz_i32: 2239 tcg_out_cltz(s, ext, a0, a1, a2, c2, false); 2240 break; 2241 case INDEX_op_ctz_i64: 2242 case INDEX_op_ctz_i32: 2243 tcg_out_cltz(s, ext, a0, a1, a2, c2, true); 2244 break; 2245 2246 case INDEX_op_brcond_i32: 2247 a1 = (int32_t)a1; 2248 /* FALLTHRU */ 2249 case INDEX_op_brcond_i64: 2250 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); 2251 break; 2252 2253 case INDEX_op_setcond_i32: 2254 a2 = (int32_t)a2; 2255 /* FALLTHRU */ 2256 case INDEX_op_setcond_i64: 2257 tcg_out_cmp(s, ext, a1, a2, c2); 2258 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ 2259 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, 2260 TCG_REG_XZR, tcg_invert_cond(args[3])); 2261 break; 2262 2263 case INDEX_op_movcond_i32: 2264 a2 = (int32_t)a2; 2265 /* FALLTHRU */ 2266 case INDEX_op_movcond_i64: 2267 tcg_out_cmp(s, ext, a1, a2, c2); 2268 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); 2269 break; 2270 2271 case INDEX_op_qemu_ld_a32_i32: 2272 case INDEX_op_qemu_ld_a64_i32: 2273 case INDEX_op_qemu_ld_a32_i64: 2274 case INDEX_op_qemu_ld_a64_i64: 2275 tcg_out_qemu_ld(s, a0, a1, a2, ext); 2276 break; 2277 case INDEX_op_qemu_st_a32_i32: 2278 case INDEX_op_qemu_st_a64_i32: 2279 case INDEX_op_qemu_st_a32_i64: 2280 case INDEX_op_qemu_st_a64_i64: 2281 tcg_out_qemu_st(s, REG0(0), a1, a2, ext); 2282 break; 2283 case INDEX_op_qemu_ld_a32_i128: 2284 case INDEX_op_qemu_ld_a64_i128: 2285 tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true); 2286 break; 2287 case INDEX_op_qemu_st_a32_i128: 2288 case INDEX_op_qemu_st_a64_i128: 2289 tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false); 2290 break; 2291 2292 case INDEX_op_bswap64_i64: 2293 tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1); 2294 break; 2295 case INDEX_op_bswap32_i64: 2296 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2297 if (a2 & TCG_BSWAP_OS) { 2298 tcg_out_ext32s(s, a0, a0); 2299 } 2300 break; 2301 case INDEX_op_bswap32_i32: 2302 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2303 break; 2304 case INDEX_op_bswap16_i64: 2305 case INDEX_op_bswap16_i32: 2306 tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1); 2307 if (a2 & TCG_BSWAP_OS) { 2308 /* Output must be sign-extended. */ 2309 tcg_out_ext16s(s, ext, a0, a0); 2310 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 2311 /* Output must be zero-extended, but input isn't. */ 2312 tcg_out_ext16u(s, a0, a0); 2313 } 2314 break; 2315 2316 case INDEX_op_deposit_i64: 2317 case INDEX_op_deposit_i32: 2318 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]); 2319 break; 2320 2321 case INDEX_op_extract_i64: 2322 case INDEX_op_extract_i32: 2323 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2324 break; 2325 2326 case INDEX_op_sextract_i64: 2327 case INDEX_op_sextract_i32: 2328 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2329 break; 2330 2331 case INDEX_op_extract2_i64: 2332 case INDEX_op_extract2_i32: 2333 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]); 2334 break; 2335 2336 case INDEX_op_add2_i32: 2337 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2338 (int32_t)args[4], args[5], const_args[4], 2339 const_args[5], false); 2340 break; 2341 case INDEX_op_add2_i64: 2342 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2343 args[5], const_args[4], const_args[5], false); 2344 break; 2345 case INDEX_op_sub2_i32: 2346 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2347 (int32_t)args[4], args[5], const_args[4], 2348 const_args[5], true); 2349 break; 2350 case INDEX_op_sub2_i64: 2351 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2352 args[5], const_args[4], const_args[5], true); 2353 break; 2354 2355 case INDEX_op_muluh_i64: 2356 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); 2357 break; 2358 case INDEX_op_mulsh_i64: 2359 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); 2360 break; 2361 2362 case INDEX_op_mb: 2363 tcg_out_mb(s, a0); 2364 break; 2365 2366 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 2367 case INDEX_op_mov_i64: 2368 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 2369 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ 2370 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ 2371 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ 2372 case INDEX_op_ext8s_i64: 2373 case INDEX_op_ext8u_i32: 2374 case INDEX_op_ext8u_i64: 2375 case INDEX_op_ext16s_i64: 2376 case INDEX_op_ext16s_i32: 2377 case INDEX_op_ext16u_i64: 2378 case INDEX_op_ext16u_i32: 2379 case INDEX_op_ext32s_i64: 2380 case INDEX_op_ext32u_i64: 2381 case INDEX_op_ext_i32_i64: 2382 case INDEX_op_extu_i32_i64: 2383 case INDEX_op_extrl_i64_i32: 2384 default: 2385 g_assert_not_reached(); 2386 } 2387 2388#undef REG0 2389} 2390 2391static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 2392 unsigned vecl, unsigned vece, 2393 const TCGArg args[TCG_MAX_OP_ARGS], 2394 const int const_args[TCG_MAX_OP_ARGS]) 2395{ 2396 static const AArch64Insn cmp_vec_insn[16] = { 2397 [TCG_COND_EQ] = I3616_CMEQ, 2398 [TCG_COND_GT] = I3616_CMGT, 2399 [TCG_COND_GE] = I3616_CMGE, 2400 [TCG_COND_GTU] = I3616_CMHI, 2401 [TCG_COND_GEU] = I3616_CMHS, 2402 }; 2403 static const AArch64Insn cmp_scalar_insn[16] = { 2404 [TCG_COND_EQ] = I3611_CMEQ, 2405 [TCG_COND_GT] = I3611_CMGT, 2406 [TCG_COND_GE] = I3611_CMGE, 2407 [TCG_COND_GTU] = I3611_CMHI, 2408 [TCG_COND_GEU] = I3611_CMHS, 2409 }; 2410 static const AArch64Insn cmp0_vec_insn[16] = { 2411 [TCG_COND_EQ] = I3617_CMEQ0, 2412 [TCG_COND_GT] = I3617_CMGT0, 2413 [TCG_COND_GE] = I3617_CMGE0, 2414 [TCG_COND_LT] = I3617_CMLT0, 2415 [TCG_COND_LE] = I3617_CMLE0, 2416 }; 2417 static const AArch64Insn cmp0_scalar_insn[16] = { 2418 [TCG_COND_EQ] = I3612_CMEQ0, 2419 [TCG_COND_GT] = I3612_CMGT0, 2420 [TCG_COND_GE] = I3612_CMGE0, 2421 [TCG_COND_LT] = I3612_CMLT0, 2422 [TCG_COND_LE] = I3612_CMLE0, 2423 }; 2424 2425 TCGType type = vecl + TCG_TYPE_V64; 2426 unsigned is_q = vecl; 2427 bool is_scalar = !is_q && vece == MO_64; 2428 TCGArg a0, a1, a2, a3; 2429 int cmode, imm8; 2430 2431 a0 = args[0]; 2432 a1 = args[1]; 2433 a2 = args[2]; 2434 2435 switch (opc) { 2436 case INDEX_op_ld_vec: 2437 tcg_out_ld(s, type, a0, a1, a2); 2438 break; 2439 case INDEX_op_st_vec: 2440 tcg_out_st(s, type, a0, a1, a2); 2441 break; 2442 case INDEX_op_dupm_vec: 2443 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 2444 break; 2445 case INDEX_op_add_vec: 2446 if (is_scalar) { 2447 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2); 2448 } else { 2449 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2); 2450 } 2451 break; 2452 case INDEX_op_sub_vec: 2453 if (is_scalar) { 2454 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2); 2455 } else { 2456 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2); 2457 } 2458 break; 2459 case INDEX_op_mul_vec: 2460 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2); 2461 break; 2462 case INDEX_op_neg_vec: 2463 if (is_scalar) { 2464 tcg_out_insn(s, 3612, NEG, vece, a0, a1); 2465 } else { 2466 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1); 2467 } 2468 break; 2469 case INDEX_op_abs_vec: 2470 if (is_scalar) { 2471 tcg_out_insn(s, 3612, ABS, vece, a0, a1); 2472 } else { 2473 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1); 2474 } 2475 break; 2476 case INDEX_op_and_vec: 2477 if (const_args[2]) { 2478 is_shimm1632(~a2, &cmode, &imm8); 2479 if (a0 == a1) { 2480 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2481 return; 2482 } 2483 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2484 a2 = a0; 2485 } 2486 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2); 2487 break; 2488 case INDEX_op_or_vec: 2489 if (const_args[2]) { 2490 is_shimm1632(a2, &cmode, &imm8); 2491 if (a0 == a1) { 2492 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2493 return; 2494 } 2495 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2496 a2 = a0; 2497 } 2498 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2); 2499 break; 2500 case INDEX_op_andc_vec: 2501 if (const_args[2]) { 2502 is_shimm1632(a2, &cmode, &imm8); 2503 if (a0 == a1) { 2504 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2505 return; 2506 } 2507 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2508 a2 = a0; 2509 } 2510 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2); 2511 break; 2512 case INDEX_op_orc_vec: 2513 if (const_args[2]) { 2514 is_shimm1632(~a2, &cmode, &imm8); 2515 if (a0 == a1) { 2516 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2517 return; 2518 } 2519 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2520 a2 = a0; 2521 } 2522 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2); 2523 break; 2524 case INDEX_op_xor_vec: 2525 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2); 2526 break; 2527 case INDEX_op_ssadd_vec: 2528 if (is_scalar) { 2529 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2); 2530 } else { 2531 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2); 2532 } 2533 break; 2534 case INDEX_op_sssub_vec: 2535 if (is_scalar) { 2536 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2); 2537 } else { 2538 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2); 2539 } 2540 break; 2541 case INDEX_op_usadd_vec: 2542 if (is_scalar) { 2543 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2); 2544 } else { 2545 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2); 2546 } 2547 break; 2548 case INDEX_op_ussub_vec: 2549 if (is_scalar) { 2550 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2); 2551 } else { 2552 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2); 2553 } 2554 break; 2555 case INDEX_op_smax_vec: 2556 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2); 2557 break; 2558 case INDEX_op_smin_vec: 2559 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2); 2560 break; 2561 case INDEX_op_umax_vec: 2562 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2); 2563 break; 2564 case INDEX_op_umin_vec: 2565 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2); 2566 break; 2567 case INDEX_op_not_vec: 2568 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1); 2569 break; 2570 case INDEX_op_shli_vec: 2571 if (is_scalar) { 2572 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece)); 2573 } else { 2574 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece)); 2575 } 2576 break; 2577 case INDEX_op_shri_vec: 2578 if (is_scalar) { 2579 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2); 2580 } else { 2581 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2); 2582 } 2583 break; 2584 case INDEX_op_sari_vec: 2585 if (is_scalar) { 2586 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2); 2587 } else { 2588 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); 2589 } 2590 break; 2591 case INDEX_op_aa64_sli_vec: 2592 if (is_scalar) { 2593 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece)); 2594 } else { 2595 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece)); 2596 } 2597 break; 2598 case INDEX_op_shlv_vec: 2599 if (is_scalar) { 2600 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2); 2601 } else { 2602 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); 2603 } 2604 break; 2605 case INDEX_op_aa64_sshl_vec: 2606 if (is_scalar) { 2607 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2); 2608 } else { 2609 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2); 2610 } 2611 break; 2612 case INDEX_op_cmp_vec: 2613 { 2614 TCGCond cond = args[3]; 2615 AArch64Insn insn; 2616 2617 if (cond == TCG_COND_NE) { 2618 if (const_args[2]) { 2619 if (is_scalar) { 2620 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1); 2621 } else { 2622 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1); 2623 } 2624 } else { 2625 if (is_scalar) { 2626 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2); 2627 } else { 2628 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2); 2629 } 2630 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); 2631 } 2632 } else { 2633 if (const_args[2]) { 2634 if (is_scalar) { 2635 insn = cmp0_scalar_insn[cond]; 2636 if (insn) { 2637 tcg_out_insn_3612(s, insn, vece, a0, a1); 2638 break; 2639 } 2640 } else { 2641 insn = cmp0_vec_insn[cond]; 2642 if (insn) { 2643 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1); 2644 break; 2645 } 2646 } 2647 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0); 2648 a2 = TCG_VEC_TMP0; 2649 } 2650 if (is_scalar) { 2651 insn = cmp_scalar_insn[cond]; 2652 if (insn == 0) { 2653 TCGArg t; 2654 t = a1, a1 = a2, a2 = t; 2655 cond = tcg_swap_cond(cond); 2656 insn = cmp_scalar_insn[cond]; 2657 tcg_debug_assert(insn != 0); 2658 } 2659 tcg_out_insn_3611(s, insn, vece, a0, a1, a2); 2660 } else { 2661 insn = cmp_vec_insn[cond]; 2662 if (insn == 0) { 2663 TCGArg t; 2664 t = a1, a1 = a2, a2 = t; 2665 cond = tcg_swap_cond(cond); 2666 insn = cmp_vec_insn[cond]; 2667 tcg_debug_assert(insn != 0); 2668 } 2669 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2); 2670 } 2671 } 2672 } 2673 break; 2674 2675 case INDEX_op_bitsel_vec: 2676 a3 = args[3]; 2677 if (a0 == a3) { 2678 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1); 2679 } else if (a0 == a2) { 2680 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1); 2681 } else { 2682 if (a0 != a1) { 2683 tcg_out_mov(s, type, a0, a1); 2684 } 2685 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3); 2686 } 2687 break; 2688 2689 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 2690 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 2691 default: 2692 g_assert_not_reached(); 2693 } 2694} 2695 2696int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 2697{ 2698 switch (opc) { 2699 case INDEX_op_add_vec: 2700 case INDEX_op_sub_vec: 2701 case INDEX_op_and_vec: 2702 case INDEX_op_or_vec: 2703 case INDEX_op_xor_vec: 2704 case INDEX_op_andc_vec: 2705 case INDEX_op_orc_vec: 2706 case INDEX_op_neg_vec: 2707 case INDEX_op_abs_vec: 2708 case INDEX_op_not_vec: 2709 case INDEX_op_cmp_vec: 2710 case INDEX_op_shli_vec: 2711 case INDEX_op_shri_vec: 2712 case INDEX_op_sari_vec: 2713 case INDEX_op_ssadd_vec: 2714 case INDEX_op_sssub_vec: 2715 case INDEX_op_usadd_vec: 2716 case INDEX_op_ussub_vec: 2717 case INDEX_op_shlv_vec: 2718 case INDEX_op_bitsel_vec: 2719 return 1; 2720 case INDEX_op_rotli_vec: 2721 case INDEX_op_shrv_vec: 2722 case INDEX_op_sarv_vec: 2723 case INDEX_op_rotlv_vec: 2724 case INDEX_op_rotrv_vec: 2725 return -1; 2726 case INDEX_op_mul_vec: 2727 case INDEX_op_smax_vec: 2728 case INDEX_op_smin_vec: 2729 case INDEX_op_umax_vec: 2730 case INDEX_op_umin_vec: 2731 return vece < MO_64; 2732 2733 default: 2734 return 0; 2735 } 2736} 2737 2738void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 2739 TCGArg a0, ...) 2740{ 2741 va_list va; 2742 TCGv_vec v0, v1, v2, t1, t2, c1; 2743 TCGArg a2; 2744 2745 va_start(va, a0); 2746 v0 = temp_tcgv_vec(arg_temp(a0)); 2747 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 2748 a2 = va_arg(va, TCGArg); 2749 va_end(va); 2750 2751 switch (opc) { 2752 case INDEX_op_rotli_vec: 2753 t1 = tcg_temp_new_vec(type); 2754 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1)); 2755 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece, 2756 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2); 2757 tcg_temp_free_vec(t1); 2758 break; 2759 2760 case INDEX_op_shrv_vec: 2761 case INDEX_op_sarv_vec: 2762 /* Right shifts are negative left shifts for AArch64. */ 2763 v2 = temp_tcgv_vec(arg_temp(a2)); 2764 t1 = tcg_temp_new_vec(type); 2765 tcg_gen_neg_vec(vece, t1, v2); 2766 opc = (opc == INDEX_op_shrv_vec 2767 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec); 2768 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0), 2769 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2770 tcg_temp_free_vec(t1); 2771 break; 2772 2773 case INDEX_op_rotlv_vec: 2774 v2 = temp_tcgv_vec(arg_temp(a2)); 2775 t1 = tcg_temp_new_vec(type); 2776 c1 = tcg_constant_vec(type, vece, 8 << vece); 2777 tcg_gen_sub_vec(vece, t1, v2, c1); 2778 /* Right shifts are negative left shifts for AArch64. */ 2779 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2780 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2781 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0), 2782 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 2783 tcg_gen_or_vec(vece, v0, v0, t1); 2784 tcg_temp_free_vec(t1); 2785 break; 2786 2787 case INDEX_op_rotrv_vec: 2788 v2 = temp_tcgv_vec(arg_temp(a2)); 2789 t1 = tcg_temp_new_vec(type); 2790 t2 = tcg_temp_new_vec(type); 2791 c1 = tcg_constant_vec(type, vece, 8 << vece); 2792 tcg_gen_neg_vec(vece, t1, v2); 2793 tcg_gen_sub_vec(vece, t2, c1, v2); 2794 /* Right shifts are negative left shifts for AArch64. */ 2795 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2796 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2797 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2), 2798 tcgv_vec_arg(v1), tcgv_vec_arg(t2)); 2799 tcg_gen_or_vec(vece, v0, t1, t2); 2800 tcg_temp_free_vec(t1); 2801 tcg_temp_free_vec(t2); 2802 break; 2803 2804 default: 2805 g_assert_not_reached(); 2806 } 2807} 2808 2809static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 2810{ 2811 switch (op) { 2812 case INDEX_op_goto_ptr: 2813 return C_O0_I1(r); 2814 2815 case INDEX_op_ld8u_i32: 2816 case INDEX_op_ld8s_i32: 2817 case INDEX_op_ld16u_i32: 2818 case INDEX_op_ld16s_i32: 2819 case INDEX_op_ld_i32: 2820 case INDEX_op_ld8u_i64: 2821 case INDEX_op_ld8s_i64: 2822 case INDEX_op_ld16u_i64: 2823 case INDEX_op_ld16s_i64: 2824 case INDEX_op_ld32u_i64: 2825 case INDEX_op_ld32s_i64: 2826 case INDEX_op_ld_i64: 2827 case INDEX_op_neg_i32: 2828 case INDEX_op_neg_i64: 2829 case INDEX_op_not_i32: 2830 case INDEX_op_not_i64: 2831 case INDEX_op_bswap16_i32: 2832 case INDEX_op_bswap32_i32: 2833 case INDEX_op_bswap16_i64: 2834 case INDEX_op_bswap32_i64: 2835 case INDEX_op_bswap64_i64: 2836 case INDEX_op_ext8s_i32: 2837 case INDEX_op_ext16s_i32: 2838 case INDEX_op_ext8u_i32: 2839 case INDEX_op_ext16u_i32: 2840 case INDEX_op_ext8s_i64: 2841 case INDEX_op_ext16s_i64: 2842 case INDEX_op_ext32s_i64: 2843 case INDEX_op_ext8u_i64: 2844 case INDEX_op_ext16u_i64: 2845 case INDEX_op_ext32u_i64: 2846 case INDEX_op_ext_i32_i64: 2847 case INDEX_op_extu_i32_i64: 2848 case INDEX_op_extract_i32: 2849 case INDEX_op_extract_i64: 2850 case INDEX_op_sextract_i32: 2851 case INDEX_op_sextract_i64: 2852 return C_O1_I1(r, r); 2853 2854 case INDEX_op_st8_i32: 2855 case INDEX_op_st16_i32: 2856 case INDEX_op_st_i32: 2857 case INDEX_op_st8_i64: 2858 case INDEX_op_st16_i64: 2859 case INDEX_op_st32_i64: 2860 case INDEX_op_st_i64: 2861 return C_O0_I2(rZ, r); 2862 2863 case INDEX_op_add_i32: 2864 case INDEX_op_add_i64: 2865 case INDEX_op_sub_i32: 2866 case INDEX_op_sub_i64: 2867 case INDEX_op_setcond_i32: 2868 case INDEX_op_setcond_i64: 2869 return C_O1_I2(r, r, rA); 2870 2871 case INDEX_op_mul_i32: 2872 case INDEX_op_mul_i64: 2873 case INDEX_op_div_i32: 2874 case INDEX_op_div_i64: 2875 case INDEX_op_divu_i32: 2876 case INDEX_op_divu_i64: 2877 case INDEX_op_rem_i32: 2878 case INDEX_op_rem_i64: 2879 case INDEX_op_remu_i32: 2880 case INDEX_op_remu_i64: 2881 case INDEX_op_muluh_i64: 2882 case INDEX_op_mulsh_i64: 2883 return C_O1_I2(r, r, r); 2884 2885 case INDEX_op_and_i32: 2886 case INDEX_op_and_i64: 2887 case INDEX_op_or_i32: 2888 case INDEX_op_or_i64: 2889 case INDEX_op_xor_i32: 2890 case INDEX_op_xor_i64: 2891 case INDEX_op_andc_i32: 2892 case INDEX_op_andc_i64: 2893 case INDEX_op_orc_i32: 2894 case INDEX_op_orc_i64: 2895 case INDEX_op_eqv_i32: 2896 case INDEX_op_eqv_i64: 2897 return C_O1_I2(r, r, rL); 2898 2899 case INDEX_op_shl_i32: 2900 case INDEX_op_shr_i32: 2901 case INDEX_op_sar_i32: 2902 case INDEX_op_rotl_i32: 2903 case INDEX_op_rotr_i32: 2904 case INDEX_op_shl_i64: 2905 case INDEX_op_shr_i64: 2906 case INDEX_op_sar_i64: 2907 case INDEX_op_rotl_i64: 2908 case INDEX_op_rotr_i64: 2909 return C_O1_I2(r, r, ri); 2910 2911 case INDEX_op_clz_i32: 2912 case INDEX_op_ctz_i32: 2913 case INDEX_op_clz_i64: 2914 case INDEX_op_ctz_i64: 2915 return C_O1_I2(r, r, rAL); 2916 2917 case INDEX_op_brcond_i32: 2918 case INDEX_op_brcond_i64: 2919 return C_O0_I2(r, rA); 2920 2921 case INDEX_op_movcond_i32: 2922 case INDEX_op_movcond_i64: 2923 return C_O1_I4(r, r, rA, rZ, rZ); 2924 2925 case INDEX_op_qemu_ld_a32_i32: 2926 case INDEX_op_qemu_ld_a64_i32: 2927 case INDEX_op_qemu_ld_a32_i64: 2928 case INDEX_op_qemu_ld_a64_i64: 2929 return C_O1_I1(r, r); 2930 case INDEX_op_qemu_ld_a32_i128: 2931 case INDEX_op_qemu_ld_a64_i128: 2932 return C_O2_I1(r, r, r); 2933 case INDEX_op_qemu_st_a32_i32: 2934 case INDEX_op_qemu_st_a64_i32: 2935 case INDEX_op_qemu_st_a32_i64: 2936 case INDEX_op_qemu_st_a64_i64: 2937 return C_O0_I2(rZ, r); 2938 case INDEX_op_qemu_st_a32_i128: 2939 case INDEX_op_qemu_st_a64_i128: 2940 return C_O0_I3(rZ, rZ, r); 2941 2942 case INDEX_op_deposit_i32: 2943 case INDEX_op_deposit_i64: 2944 return C_O1_I2(r, 0, rZ); 2945 2946 case INDEX_op_extract2_i32: 2947 case INDEX_op_extract2_i64: 2948 return C_O1_I2(r, rZ, rZ); 2949 2950 case INDEX_op_add2_i32: 2951 case INDEX_op_add2_i64: 2952 case INDEX_op_sub2_i32: 2953 case INDEX_op_sub2_i64: 2954 return C_O2_I4(r, r, rZ, rZ, rA, rMZ); 2955 2956 case INDEX_op_add_vec: 2957 case INDEX_op_sub_vec: 2958 case INDEX_op_mul_vec: 2959 case INDEX_op_xor_vec: 2960 case INDEX_op_ssadd_vec: 2961 case INDEX_op_sssub_vec: 2962 case INDEX_op_usadd_vec: 2963 case INDEX_op_ussub_vec: 2964 case INDEX_op_smax_vec: 2965 case INDEX_op_smin_vec: 2966 case INDEX_op_umax_vec: 2967 case INDEX_op_umin_vec: 2968 case INDEX_op_shlv_vec: 2969 case INDEX_op_shrv_vec: 2970 case INDEX_op_sarv_vec: 2971 case INDEX_op_aa64_sshl_vec: 2972 return C_O1_I2(w, w, w); 2973 case INDEX_op_not_vec: 2974 case INDEX_op_neg_vec: 2975 case INDEX_op_abs_vec: 2976 case INDEX_op_shli_vec: 2977 case INDEX_op_shri_vec: 2978 case INDEX_op_sari_vec: 2979 return C_O1_I1(w, w); 2980 case INDEX_op_ld_vec: 2981 case INDEX_op_dupm_vec: 2982 return C_O1_I1(w, r); 2983 case INDEX_op_st_vec: 2984 return C_O0_I2(w, r); 2985 case INDEX_op_dup_vec: 2986 return C_O1_I1(w, wr); 2987 case INDEX_op_or_vec: 2988 case INDEX_op_andc_vec: 2989 return C_O1_I2(w, w, wO); 2990 case INDEX_op_and_vec: 2991 case INDEX_op_orc_vec: 2992 return C_O1_I2(w, w, wN); 2993 case INDEX_op_cmp_vec: 2994 return C_O1_I2(w, w, wZ); 2995 case INDEX_op_bitsel_vec: 2996 return C_O1_I3(w, w, w, w); 2997 case INDEX_op_aa64_sli_vec: 2998 return C_O1_I2(w, 0, w); 2999 3000 default: 3001 g_assert_not_reached(); 3002 } 3003} 3004 3005static void tcg_target_init(TCGContext *s) 3006{ 3007 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu; 3008 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu; 3009 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 3010 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 3011 3012 tcg_target_call_clobber_regs = -1ull; 3013 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19); 3014 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20); 3015 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21); 3016 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22); 3017 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23); 3018 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24); 3019 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25); 3020 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26); 3021 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27); 3022 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28); 3023 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29); 3024 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 3025 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 3026 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 3027 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 3028 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 3029 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 3030 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 3031 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 3032 3033 s->reserved_regs = 0; 3034 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 3035 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); 3036 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ 3037 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0); 3038 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); 3039 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); 3040 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0); 3041} 3042 3043/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ 3044#define PUSH_SIZE ((30 - 19 + 1) * 8) 3045 3046#define FRAME_SIZE \ 3047 ((PUSH_SIZE \ 3048 + TCG_STATIC_CALL_ARGS_SIZE \ 3049 + CPU_TEMP_BUF_NLONGS * sizeof(long) \ 3050 + TCG_TARGET_STACK_ALIGN - 1) \ 3051 & ~(TCG_TARGET_STACK_ALIGN - 1)) 3052 3053/* We're expecting a 2 byte uleb128 encoded value. */ 3054QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 3055 3056/* We're expecting to use a single ADDI insn. */ 3057QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); 3058 3059static void tcg_target_qemu_prologue(TCGContext *s) 3060{ 3061 TCGReg r; 3062 3063 /* Push (FP, LR) and allocate space for all saved registers. */ 3064 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, 3065 TCG_REG_SP, -PUSH_SIZE, 1, 1); 3066 3067 /* Set up frame pointer for canonical unwinding. */ 3068 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); 3069 3070 /* Store callee-preserved regs x19..x28. */ 3071 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 3072 int ofs = (r - TCG_REG_X19 + 2) * 8; 3073 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 3074 } 3075 3076 /* Make stack space for TCG locals. */ 3077 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 3078 FRAME_SIZE - PUSH_SIZE); 3079 3080 /* Inform TCG about how to find TCG locals with register, offset, size. */ 3081 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, 3082 CPU_TEMP_BUF_NLONGS * sizeof(long)); 3083 3084#if !defined(CONFIG_SOFTMMU) 3085 /* 3086 * Note that XZR cannot be encoded in the address base register slot, 3087 * as that actaully encodes SP. Depending on the guest, we may need 3088 * to zero-extend the guest address via the address index register slot, 3089 * therefore we need to load even a zero guest base into a register. 3090 */ 3091 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); 3092 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); 3093#endif 3094 3095 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 3096 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); 3097 3098 /* 3099 * Return path for goto_ptr. Set return value to 0, a-la exit_tb, 3100 * and fall through to the rest of the epilogue. 3101 */ 3102 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 3103 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0); 3104 3105 /* TB epilogue */ 3106 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); 3107 3108 /* Remove TCG locals stack space. */ 3109 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 3110 FRAME_SIZE - PUSH_SIZE); 3111 3112 /* Restore registers x19..x28. */ 3113 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 3114 int ofs = (r - TCG_REG_X19 + 2) * 8; 3115 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 3116 } 3117 3118 /* Pop (FP, LR), restore SP to previous frame. */ 3119 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, 3120 TCG_REG_SP, PUSH_SIZE, 0, 1); 3121 tcg_out_insn(s, 3207, RET, TCG_REG_LR); 3122} 3123 3124static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 3125{ 3126 int i; 3127 for (i = 0; i < count; ++i) { 3128 p[i] = NOP; 3129 } 3130} 3131 3132typedef struct { 3133 DebugFrameHeader h; 3134 uint8_t fde_def_cfa[4]; 3135 uint8_t fde_reg_ofs[24]; 3136} DebugFrame; 3137 3138#define ELF_HOST_MACHINE EM_AARCH64 3139 3140static const DebugFrame debug_frame = { 3141 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 3142 .h.cie.id = -1, 3143 .h.cie.version = 1, 3144 .h.cie.code_align = 1, 3145 .h.cie.data_align = 0x78, /* sleb128 -8 */ 3146 .h.cie.return_column = TCG_REG_LR, 3147 3148 /* Total FDE size does not include the "len" member. */ 3149 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 3150 3151 .fde_def_cfa = { 3152 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 3153 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 3154 (FRAME_SIZE >> 7) 3155 }, 3156 .fde_reg_ofs = { 3157 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ 3158 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ 3159 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ 3160 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ 3161 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ 3162 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ 3163 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ 3164 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ 3165 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ 3166 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ 3167 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ 3168 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ 3169 } 3170}; 3171 3172void tcg_register_jit(const void *buf, size_t buf_size) 3173{ 3174 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 3175} 3176