1/* 2 * Initial TCG Implementation for aarch64 3 * 4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH 5 * Written by Claudio Fontana 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or 8 * (at your option) any later version. 9 * 10 * See the COPYING file in the top-level directory for details. 11 */ 12 13#include "../tcg-ldst.c.inc" 14#include "../tcg-pool.c.inc" 15#include "qemu/bitops.h" 16 17/* We're going to re-use TCGType in setting of the SF bit, which controls 18 the size of the operation performed. If we know the values match, it 19 makes things much cleaner. */ 20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); 21 22#ifdef CONFIG_DEBUG_TCG 23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 24 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 25 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 26 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 27 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp", 28 29 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 30 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 31 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 32 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31", 33}; 34#endif /* CONFIG_DEBUG_TCG */ 35 36static const int tcg_target_reg_alloc_order[] = { 37 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, 38 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, 39 TCG_REG_X28, /* we will reserve this for guest_base if configured */ 40 41 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, 42 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, 43 TCG_REG_X16, TCG_REG_X17, 44 45 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 46 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, 47 48 /* X18 reserved by system */ 49 /* X19 reserved for AREG0 */ 50 /* X29 reserved as fp */ 51 /* X30 reserved as temporary */ 52 53 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, 54 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, 55 /* V8 - V15 are call-saved, and skipped. */ 56 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, 57 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, 58 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, 59 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, 60}; 61 62static const int tcg_target_call_iarg_regs[8] = { 63 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, 64 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 65}; 66 67static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) 68{ 69 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); 70 tcg_debug_assert(slot >= 0 && slot <= 1); 71 return TCG_REG_X0 + slot; 72} 73 74#define TCG_REG_TMP TCG_REG_X30 75#define TCG_VEC_TMP TCG_REG_V31 76 77#ifndef CONFIG_SOFTMMU 78#define TCG_REG_GUEST_BASE TCG_REG_X28 79#endif 80 81static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 82{ 83 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 84 ptrdiff_t offset = target - src_rx; 85 86 if (offset == sextract64(offset, 0, 26)) { 87 /* read instruction, mask away previous PC_REL26 parameter contents, 88 set the proper offset, then write back the instruction. */ 89 *src_rw = deposit32(*src_rw, 0, 26, offset); 90 return true; 91 } 92 return false; 93} 94 95static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 96{ 97 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 98 ptrdiff_t offset = target - src_rx; 99 100 if (offset == sextract64(offset, 0, 19)) { 101 *src_rw = deposit32(*src_rw, 5, 19, offset); 102 return true; 103 } 104 return false; 105} 106 107static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 108 intptr_t value, intptr_t addend) 109{ 110 tcg_debug_assert(addend == 0); 111 switch (type) { 112 case R_AARCH64_JUMP26: 113 case R_AARCH64_CALL26: 114 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value); 115 case R_AARCH64_CONDBR19: 116 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value); 117 default: 118 g_assert_not_reached(); 119 } 120} 121 122#define TCG_CT_CONST_AIMM 0x100 123#define TCG_CT_CONST_LIMM 0x200 124#define TCG_CT_CONST_ZERO 0x400 125#define TCG_CT_CONST_MONE 0x800 126#define TCG_CT_CONST_ORRI 0x1000 127#define TCG_CT_CONST_ANDI 0x2000 128 129#define ALL_GENERAL_REGS 0xffffffffu 130#define ALL_VECTOR_REGS 0xffffffff00000000ull 131 132#ifdef CONFIG_SOFTMMU 133#define ALL_QLDST_REGS \ 134 (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \ 135 (1 << TCG_REG_X2) | (1 << TCG_REG_X3))) 136#else 137#define ALL_QLDST_REGS ALL_GENERAL_REGS 138#endif 139 140/* Match a constant valid for addition (12-bit, optionally shifted). */ 141static inline bool is_aimm(uint64_t val) 142{ 143 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; 144} 145 146/* Match a constant valid for logical operations. */ 147static inline bool is_limm(uint64_t val) 148{ 149 /* Taking a simplified view of the logical immediates for now, ignoring 150 the replication that can happen across the field. Match bit patterns 151 of the forms 152 0....01....1 153 0..01..10..0 154 and their inverses. */ 155 156 /* Make things easier below, by testing the form with msb clear. */ 157 if ((int64_t)val < 0) { 158 val = ~val; 159 } 160 if (val == 0) { 161 return false; 162 } 163 val += val & -val; 164 return (val & (val - 1)) == 0; 165} 166 167/* Return true if v16 is a valid 16-bit shifted immediate. */ 168static bool is_shimm16(uint16_t v16, int *cmode, int *imm8) 169{ 170 if (v16 == (v16 & 0xff)) { 171 *cmode = 0x8; 172 *imm8 = v16 & 0xff; 173 return true; 174 } else if (v16 == (v16 & 0xff00)) { 175 *cmode = 0xa; 176 *imm8 = v16 >> 8; 177 return true; 178 } 179 return false; 180} 181 182/* Return true if v32 is a valid 32-bit shifted immediate. */ 183static bool is_shimm32(uint32_t v32, int *cmode, int *imm8) 184{ 185 if (v32 == (v32 & 0xff)) { 186 *cmode = 0x0; 187 *imm8 = v32 & 0xff; 188 return true; 189 } else if (v32 == (v32 & 0xff00)) { 190 *cmode = 0x2; 191 *imm8 = (v32 >> 8) & 0xff; 192 return true; 193 } else if (v32 == (v32 & 0xff0000)) { 194 *cmode = 0x4; 195 *imm8 = (v32 >> 16) & 0xff; 196 return true; 197 } else if (v32 == (v32 & 0xff000000)) { 198 *cmode = 0x6; 199 *imm8 = v32 >> 24; 200 return true; 201 } 202 return false; 203} 204 205/* Return true if v32 is a valid 32-bit shifting ones immediate. */ 206static bool is_soimm32(uint32_t v32, int *cmode, int *imm8) 207{ 208 if ((v32 & 0xffff00ff) == 0xff) { 209 *cmode = 0xc; 210 *imm8 = (v32 >> 8) & 0xff; 211 return true; 212 } else if ((v32 & 0xff00ffff) == 0xffff) { 213 *cmode = 0xd; 214 *imm8 = (v32 >> 16) & 0xff; 215 return true; 216 } 217 return false; 218} 219 220/* Return true if v32 is a valid float32 immediate. */ 221static bool is_fimm32(uint32_t v32, int *cmode, int *imm8) 222{ 223 if (extract32(v32, 0, 19) == 0 224 && (extract32(v32, 25, 6) == 0x20 225 || extract32(v32, 25, 6) == 0x1f)) { 226 *cmode = 0xf; 227 *imm8 = (extract32(v32, 31, 1) << 7) 228 | (extract32(v32, 25, 1) << 6) 229 | extract32(v32, 19, 6); 230 return true; 231 } 232 return false; 233} 234 235/* Return true if v64 is a valid float64 immediate. */ 236static bool is_fimm64(uint64_t v64, int *cmode, int *imm8) 237{ 238 if (extract64(v64, 0, 48) == 0 239 && (extract64(v64, 54, 9) == 0x100 240 || extract64(v64, 54, 9) == 0x0ff)) { 241 *cmode = 0xf; 242 *imm8 = (extract64(v64, 63, 1) << 7) 243 | (extract64(v64, 54, 1) << 6) 244 | extract64(v64, 48, 6); 245 return true; 246 } 247 return false; 248} 249 250/* 251 * Return non-zero if v32 can be formed by MOVI+ORR. 252 * Place the parameters for MOVI in (cmode, imm8). 253 * Return the cmode for ORR; the imm8 can be had via extraction from v32. 254 */ 255static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8) 256{ 257 int i; 258 259 for (i = 6; i > 0; i -= 2) { 260 /* Mask out one byte we can add with ORR. */ 261 uint32_t tmp = v32 & ~(0xffu << (i * 4)); 262 if (is_shimm32(tmp, cmode, imm8) || 263 is_soimm32(tmp, cmode, imm8)) { 264 break; 265 } 266 } 267 return i; 268} 269 270/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */ 271static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8) 272{ 273 if (v32 == deposit32(v32, 16, 16, v32)) { 274 return is_shimm16(v32, cmode, imm8); 275 } else { 276 return is_shimm32(v32, cmode, imm8); 277 } 278} 279 280static bool tcg_target_const_match(int64_t val, TCGType type, int ct) 281{ 282 if (ct & TCG_CT_CONST) { 283 return 1; 284 } 285 if (type == TCG_TYPE_I32) { 286 val = (int32_t)val; 287 } 288 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { 289 return 1; 290 } 291 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { 292 return 1; 293 } 294 if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 295 return 1; 296 } 297 if ((ct & TCG_CT_CONST_MONE) && val == -1) { 298 return 1; 299 } 300 301 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) { 302 case 0: 303 break; 304 case TCG_CT_CONST_ANDI: 305 val = ~val; 306 /* fallthru */ 307 case TCG_CT_CONST_ORRI: 308 if (val == deposit64(val, 32, 32, val)) { 309 int cmode, imm8; 310 return is_shimm1632(val, &cmode, &imm8); 311 } 312 break; 313 default: 314 /* Both bits should not be set for the same insn. */ 315 g_assert_not_reached(); 316 } 317 318 return 0; 319} 320 321enum aarch64_cond_code { 322 COND_EQ = 0x0, 323 COND_NE = 0x1, 324 COND_CS = 0x2, /* Unsigned greater or equal */ 325 COND_HS = COND_CS, /* ALIAS greater or equal */ 326 COND_CC = 0x3, /* Unsigned less than */ 327 COND_LO = COND_CC, /* ALIAS Lower */ 328 COND_MI = 0x4, /* Negative */ 329 COND_PL = 0x5, /* Zero or greater */ 330 COND_VS = 0x6, /* Overflow */ 331 COND_VC = 0x7, /* No overflow */ 332 COND_HI = 0x8, /* Unsigned greater than */ 333 COND_LS = 0x9, /* Unsigned less or equal */ 334 COND_GE = 0xa, 335 COND_LT = 0xb, 336 COND_GT = 0xc, 337 COND_LE = 0xd, 338 COND_AL = 0xe, 339 COND_NV = 0xf, /* behaves like COND_AL here */ 340}; 341 342static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { 343 [TCG_COND_EQ] = COND_EQ, 344 [TCG_COND_NE] = COND_NE, 345 [TCG_COND_LT] = COND_LT, 346 [TCG_COND_GE] = COND_GE, 347 [TCG_COND_LE] = COND_LE, 348 [TCG_COND_GT] = COND_GT, 349 /* unsigned */ 350 [TCG_COND_LTU] = COND_LO, 351 [TCG_COND_GTU] = COND_HI, 352 [TCG_COND_GEU] = COND_HS, 353 [TCG_COND_LEU] = COND_LS, 354}; 355 356typedef enum { 357 LDST_ST = 0, /* store */ 358 LDST_LD = 1, /* load */ 359 LDST_LD_S_X = 2, /* load and sign-extend into Xt */ 360 LDST_LD_S_W = 3, /* load and sign-extend into Wt */ 361} AArch64LdstType; 362 363/* We encode the format of the insn into the beginning of the name, so that 364 we can have the preprocessor help "typecheck" the insn vs the output 365 function. Arm didn't provide us with nice names for the formats, so we 366 use the section number of the architecture reference manual in which the 367 instruction group is described. */ 368typedef enum { 369 /* Compare and branch (immediate). */ 370 I3201_CBZ = 0x34000000, 371 I3201_CBNZ = 0x35000000, 372 373 /* Conditional branch (immediate). */ 374 I3202_B_C = 0x54000000, 375 376 /* Unconditional branch (immediate). */ 377 I3206_B = 0x14000000, 378 I3206_BL = 0x94000000, 379 380 /* Unconditional branch (register). */ 381 I3207_BR = 0xd61f0000, 382 I3207_BLR = 0xd63f0000, 383 I3207_RET = 0xd65f0000, 384 385 /* AdvSIMD load/store single structure. */ 386 I3303_LD1R = 0x0d40c000, 387 388 /* Load literal for loading the address at pc-relative offset */ 389 I3305_LDR = 0x58000000, 390 I3305_LDR_v64 = 0x5c000000, 391 I3305_LDR_v128 = 0x9c000000, 392 393 /* Load/store register. Described here as 3.3.12, but the helper 394 that emits them can transform to 3.3.10 or 3.3.13. */ 395 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, 396 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, 397 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, 398 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, 399 400 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, 401 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, 402 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, 403 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, 404 405 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, 406 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, 407 408 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, 409 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, 410 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, 411 412 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30, 413 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30, 414 415 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30, 416 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30, 417 418 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30, 419 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30, 420 421 I3312_TO_I3310 = 0x00200800, 422 I3312_TO_I3313 = 0x01000000, 423 424 /* Load/store register pair instructions. */ 425 I3314_LDP = 0x28400000, 426 I3314_STP = 0x28000000, 427 428 /* Add/subtract immediate instructions. */ 429 I3401_ADDI = 0x11000000, 430 I3401_ADDSI = 0x31000000, 431 I3401_SUBI = 0x51000000, 432 I3401_SUBSI = 0x71000000, 433 434 /* Bitfield instructions. */ 435 I3402_BFM = 0x33000000, 436 I3402_SBFM = 0x13000000, 437 I3402_UBFM = 0x53000000, 438 439 /* Extract instruction. */ 440 I3403_EXTR = 0x13800000, 441 442 /* Logical immediate instructions. */ 443 I3404_ANDI = 0x12000000, 444 I3404_ORRI = 0x32000000, 445 I3404_EORI = 0x52000000, 446 I3404_ANDSI = 0x72000000, 447 448 /* Move wide immediate instructions. */ 449 I3405_MOVN = 0x12800000, 450 I3405_MOVZ = 0x52800000, 451 I3405_MOVK = 0x72800000, 452 453 /* PC relative addressing instructions. */ 454 I3406_ADR = 0x10000000, 455 I3406_ADRP = 0x90000000, 456 457 /* Add/subtract shifted register instructions (without a shift). */ 458 I3502_ADD = 0x0b000000, 459 I3502_ADDS = 0x2b000000, 460 I3502_SUB = 0x4b000000, 461 I3502_SUBS = 0x6b000000, 462 463 /* Add/subtract shifted register instructions (with a shift). */ 464 I3502S_ADD_LSL = I3502_ADD, 465 466 /* Add/subtract with carry instructions. */ 467 I3503_ADC = 0x1a000000, 468 I3503_SBC = 0x5a000000, 469 470 /* Conditional select instructions. */ 471 I3506_CSEL = 0x1a800000, 472 I3506_CSINC = 0x1a800400, 473 I3506_CSINV = 0x5a800000, 474 I3506_CSNEG = 0x5a800400, 475 476 /* Data-processing (1 source) instructions. */ 477 I3507_CLZ = 0x5ac01000, 478 I3507_RBIT = 0x5ac00000, 479 I3507_REV = 0x5ac00000, /* + size << 10 */ 480 481 /* Data-processing (2 source) instructions. */ 482 I3508_LSLV = 0x1ac02000, 483 I3508_LSRV = 0x1ac02400, 484 I3508_ASRV = 0x1ac02800, 485 I3508_RORV = 0x1ac02c00, 486 I3508_SMULH = 0x9b407c00, 487 I3508_UMULH = 0x9bc07c00, 488 I3508_UDIV = 0x1ac00800, 489 I3508_SDIV = 0x1ac00c00, 490 491 /* Data-processing (3 source) instructions. */ 492 I3509_MADD = 0x1b000000, 493 I3509_MSUB = 0x1b008000, 494 495 /* Logical shifted register instructions (without a shift). */ 496 I3510_AND = 0x0a000000, 497 I3510_BIC = 0x0a200000, 498 I3510_ORR = 0x2a000000, 499 I3510_ORN = 0x2a200000, 500 I3510_EOR = 0x4a000000, 501 I3510_EON = 0x4a200000, 502 I3510_ANDS = 0x6a000000, 503 504 /* Logical shifted register instructions (with a shift). */ 505 I3502S_AND_LSR = I3510_AND | (1 << 22), 506 507 /* AdvSIMD copy */ 508 I3605_DUP = 0x0e000400, 509 I3605_INS = 0x4e001c00, 510 I3605_UMOV = 0x0e003c00, 511 512 /* AdvSIMD modified immediate */ 513 I3606_MOVI = 0x0f000400, 514 I3606_MVNI = 0x2f000400, 515 I3606_BIC = 0x2f001400, 516 I3606_ORR = 0x0f001400, 517 518 /* AdvSIMD scalar shift by immediate */ 519 I3609_SSHR = 0x5f000400, 520 I3609_SSRA = 0x5f001400, 521 I3609_SHL = 0x5f005400, 522 I3609_USHR = 0x7f000400, 523 I3609_USRA = 0x7f001400, 524 I3609_SLI = 0x7f005400, 525 526 /* AdvSIMD scalar three same */ 527 I3611_SQADD = 0x5e200c00, 528 I3611_SQSUB = 0x5e202c00, 529 I3611_CMGT = 0x5e203400, 530 I3611_CMGE = 0x5e203c00, 531 I3611_SSHL = 0x5e204400, 532 I3611_ADD = 0x5e208400, 533 I3611_CMTST = 0x5e208c00, 534 I3611_UQADD = 0x7e200c00, 535 I3611_UQSUB = 0x7e202c00, 536 I3611_CMHI = 0x7e203400, 537 I3611_CMHS = 0x7e203c00, 538 I3611_USHL = 0x7e204400, 539 I3611_SUB = 0x7e208400, 540 I3611_CMEQ = 0x7e208c00, 541 542 /* AdvSIMD scalar two-reg misc */ 543 I3612_CMGT0 = 0x5e208800, 544 I3612_CMEQ0 = 0x5e209800, 545 I3612_CMLT0 = 0x5e20a800, 546 I3612_ABS = 0x5e20b800, 547 I3612_CMGE0 = 0x7e208800, 548 I3612_CMLE0 = 0x7e209800, 549 I3612_NEG = 0x7e20b800, 550 551 /* AdvSIMD shift by immediate */ 552 I3614_SSHR = 0x0f000400, 553 I3614_SSRA = 0x0f001400, 554 I3614_SHL = 0x0f005400, 555 I3614_SLI = 0x2f005400, 556 I3614_USHR = 0x2f000400, 557 I3614_USRA = 0x2f001400, 558 559 /* AdvSIMD three same. */ 560 I3616_ADD = 0x0e208400, 561 I3616_AND = 0x0e201c00, 562 I3616_BIC = 0x0e601c00, 563 I3616_BIF = 0x2ee01c00, 564 I3616_BIT = 0x2ea01c00, 565 I3616_BSL = 0x2e601c00, 566 I3616_EOR = 0x2e201c00, 567 I3616_MUL = 0x0e209c00, 568 I3616_ORR = 0x0ea01c00, 569 I3616_ORN = 0x0ee01c00, 570 I3616_SUB = 0x2e208400, 571 I3616_CMGT = 0x0e203400, 572 I3616_CMGE = 0x0e203c00, 573 I3616_CMTST = 0x0e208c00, 574 I3616_CMHI = 0x2e203400, 575 I3616_CMHS = 0x2e203c00, 576 I3616_CMEQ = 0x2e208c00, 577 I3616_SMAX = 0x0e206400, 578 I3616_SMIN = 0x0e206c00, 579 I3616_SSHL = 0x0e204400, 580 I3616_SQADD = 0x0e200c00, 581 I3616_SQSUB = 0x0e202c00, 582 I3616_UMAX = 0x2e206400, 583 I3616_UMIN = 0x2e206c00, 584 I3616_UQADD = 0x2e200c00, 585 I3616_UQSUB = 0x2e202c00, 586 I3616_USHL = 0x2e204400, 587 588 /* AdvSIMD two-reg misc. */ 589 I3617_CMGT0 = 0x0e208800, 590 I3617_CMEQ0 = 0x0e209800, 591 I3617_CMLT0 = 0x0e20a800, 592 I3617_CMGE0 = 0x2e208800, 593 I3617_CMLE0 = 0x2e209800, 594 I3617_NOT = 0x2e205800, 595 I3617_ABS = 0x0e20b800, 596 I3617_NEG = 0x2e20b800, 597 598 /* System instructions. */ 599 NOP = 0xd503201f, 600 DMB_ISH = 0xd50338bf, 601 DMB_LD = 0x00000100, 602 DMB_ST = 0x00000200, 603} AArch64Insn; 604 605static inline uint32_t tcg_in32(TCGContext *s) 606{ 607 uint32_t v = *(uint32_t *)s->code_ptr; 608 return v; 609} 610 611/* Emit an opcode with "type-checking" of the format. */ 612#define tcg_out_insn(S, FMT, OP, ...) \ 613 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) 614 615static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q, 616 TCGReg rt, TCGReg rn, unsigned size) 617{ 618 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30)); 619} 620 621static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, 622 int imm19, TCGReg rt) 623{ 624 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt); 625} 626 627static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, 628 TCGReg rt, int imm19) 629{ 630 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); 631} 632 633static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, 634 TCGCond c, int imm19) 635{ 636 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); 637} 638 639static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) 640{ 641 tcg_out32(s, insn | (imm26 & 0x03ffffff)); 642} 643 644static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) 645{ 646 tcg_out32(s, insn | rn << 5); 647} 648 649static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, 650 TCGReg r1, TCGReg r2, TCGReg rn, 651 tcg_target_long ofs, bool pre, bool w) 652{ 653 insn |= 1u << 31; /* ext */ 654 insn |= pre << 24; 655 insn |= w << 23; 656 657 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); 658 insn |= (ofs & (0x7f << 3)) << (15 - 3); 659 660 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); 661} 662 663static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, 664 TCGReg rd, TCGReg rn, uint64_t aimm) 665{ 666 if (aimm > 0xfff) { 667 tcg_debug_assert((aimm & 0xfff) == 0); 668 aimm >>= 12; 669 tcg_debug_assert(aimm <= 0xfff); 670 aimm |= 1 << 12; /* apply LSL 12 */ 671 } 672 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); 673} 674 675/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 676 (Logical immediate). Both insn groups have N, IMMR and IMMS fields 677 that feed the DecodeBitMasks pseudo function. */ 678static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, 679 TCGReg rd, TCGReg rn, int n, int immr, int imms) 680{ 681 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 682 | rn << 5 | rd); 683} 684 685#define tcg_out_insn_3404 tcg_out_insn_3402 686 687static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, 688 TCGReg rd, TCGReg rn, TCGReg rm, int imms) 689{ 690 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 691 | rn << 5 | rd); 692} 693 694/* This function is used for the Move (wide immediate) instruction group. 695 Note that SHIFT is a full shift count, not the 2 bit HW field. */ 696static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, 697 TCGReg rd, uint16_t half, unsigned shift) 698{ 699 tcg_debug_assert((shift & ~0x30) == 0); 700 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); 701} 702 703static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, 704 TCGReg rd, int64_t disp) 705{ 706 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); 707} 708 709/* This function is for both 3.5.2 (Add/Subtract shifted register), for 710 the rare occasion when we actually want to supply a shift amount. */ 711static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, 712 TCGType ext, TCGReg rd, TCGReg rn, 713 TCGReg rm, int imm6) 714{ 715 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); 716} 717 718/* This function is for 3.5.2 (Add/subtract shifted register), 719 and 3.5.10 (Logical shifted register), for the vast majorty of cases 720 when we don't want to apply a shift. Thus it can also be used for 721 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */ 722static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, 723 TCGReg rd, TCGReg rn, TCGReg rm) 724{ 725 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); 726} 727 728#define tcg_out_insn_3503 tcg_out_insn_3502 729#define tcg_out_insn_3508 tcg_out_insn_3502 730#define tcg_out_insn_3510 tcg_out_insn_3502 731 732static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, 733 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) 734{ 735 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd 736 | tcg_cond_to_aarch64[c] << 12); 737} 738 739static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, 740 TCGReg rd, TCGReg rn) 741{ 742 tcg_out32(s, insn | ext << 31 | rn << 5 | rd); 743} 744 745static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, 746 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) 747{ 748 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); 749} 750 751static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q, 752 TCGReg rd, TCGReg rn, int dst_idx, int src_idx) 753{ 754 /* Note that bit 11 set means general register input. Therefore 755 we can handle both register sets with one function. */ 756 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11) 757 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5); 758} 759 760static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q, 761 TCGReg rd, bool op, int cmode, uint8_t imm8) 762{ 763 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f) 764 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5); 765} 766 767static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn, 768 TCGReg rd, TCGReg rn, unsigned immhb) 769{ 770 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f)); 771} 772 773static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn, 774 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 775{ 776 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16 777 | (rn & 0x1f) << 5 | (rd & 0x1f)); 778} 779 780static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn, 781 unsigned size, TCGReg rd, TCGReg rn) 782{ 783 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f)); 784} 785 786static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q, 787 TCGReg rd, TCGReg rn, unsigned immhb) 788{ 789 tcg_out32(s, insn | q << 30 | immhb << 16 790 | (rn & 0x1f) << 5 | (rd & 0x1f)); 791} 792 793static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q, 794 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) 795{ 796 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16 797 | (rn & 0x1f) << 5 | (rd & 0x1f)); 798} 799 800static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q, 801 unsigned size, TCGReg rd, TCGReg rn) 802{ 803 tcg_out32(s, insn | q << 30 | (size << 22) 804 | (rn & 0x1f) << 5 | (rd & 0x1f)); 805} 806 807static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, 808 TCGReg rd, TCGReg base, TCGType ext, 809 TCGReg regoff) 810{ 811 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 812 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | 813 0x4000 | ext << 13 | base << 5 | (rd & 0x1f)); 814} 815 816static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, 817 TCGReg rd, TCGReg rn, intptr_t offset) 818{ 819 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f)); 820} 821 822static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, 823 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) 824{ 825 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ 826 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 827 | rn << 5 | (rd & 0x1f)); 828} 829 830/* Register to register move using ORR (shifted register with no shift). */ 831static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) 832{ 833 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); 834} 835 836/* Register to register move using ADDI (move to/from SP). */ 837static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) 838{ 839 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); 840} 841 842/* This function is used for the Logical (immediate) instruction group. 843 The value of LIMM must satisfy IS_LIMM. See the comment above about 844 only supporting simplified logical immediates. */ 845static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, 846 TCGReg rd, TCGReg rn, uint64_t limm) 847{ 848 unsigned h, l, r, c; 849 850 tcg_debug_assert(is_limm(limm)); 851 852 h = clz64(limm); 853 l = ctz64(limm); 854 if (l == 0) { 855 r = 0; /* form 0....01....1 */ 856 c = ctz64(~limm) - 1; 857 if (h == 0) { 858 r = clz64(~limm); /* form 1..10..01..1 */ 859 c += r; 860 } 861 } else { 862 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ 863 c = r - h - 1; 864 } 865 if (ext == TCG_TYPE_I32) { 866 r &= 31; 867 c &= 31; 868 } 869 870 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); 871} 872 873static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 874 TCGReg rd, int64_t v64) 875{ 876 bool q = type == TCG_TYPE_V128; 877 int cmode, imm8, i; 878 879 /* Test all bytes equal first. */ 880 if (vece == MO_8) { 881 imm8 = (uint8_t)v64; 882 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8); 883 return; 884 } 885 886 /* 887 * Test all bytes 0x00 or 0xff second. This can match cases that 888 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below. 889 */ 890 for (i = imm8 = 0; i < 8; i++) { 891 uint8_t byte = v64 >> (i * 8); 892 if (byte == 0xff) { 893 imm8 |= 1 << i; 894 } else if (byte != 0) { 895 goto fail_bytes; 896 } 897 } 898 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8); 899 return; 900 fail_bytes: 901 902 /* 903 * Tests for various replications. For each element width, if we 904 * cannot find an expansion there's no point checking a larger 905 * width because we already know by replication it cannot match. 906 */ 907 if (vece == MO_16) { 908 uint16_t v16 = v64; 909 910 if (is_shimm16(v16, &cmode, &imm8)) { 911 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 912 return; 913 } 914 if (is_shimm16(~v16, &cmode, &imm8)) { 915 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 916 return; 917 } 918 919 /* 920 * Otherwise, all remaining constants can be loaded in two insns: 921 * rd = v16 & 0xff, rd |= v16 & 0xff00. 922 */ 923 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff); 924 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8); 925 return; 926 } else if (vece == MO_32) { 927 uint32_t v32 = v64; 928 uint32_t n32 = ~v32; 929 930 if (is_shimm32(v32, &cmode, &imm8) || 931 is_soimm32(v32, &cmode, &imm8) || 932 is_fimm32(v32, &cmode, &imm8)) { 933 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 934 return; 935 } 936 if (is_shimm32(n32, &cmode, &imm8) || 937 is_soimm32(n32, &cmode, &imm8)) { 938 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 939 return; 940 } 941 942 /* 943 * Restrict the set of constants to those we can load with 944 * two instructions. Others we load from the pool. 945 */ 946 i = is_shimm32_pair(v32, &cmode, &imm8); 947 if (i) { 948 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); 949 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8)); 950 return; 951 } 952 i = is_shimm32_pair(n32, &cmode, &imm8); 953 if (i) { 954 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); 955 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8)); 956 return; 957 } 958 } else if (is_fimm64(v64, &cmode, &imm8)) { 959 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8); 960 return; 961 } 962 963 /* 964 * As a last resort, load from the constant pool. Sadly there 965 * is no LD1R (literal), so store the full 16-byte vector. 966 */ 967 if (type == TCG_TYPE_V128) { 968 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64); 969 tcg_out_insn(s, 3305, LDR_v128, 0, rd); 970 } else { 971 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0); 972 tcg_out_insn(s, 3305, LDR_v64, 0, rd); 973 } 974} 975 976static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 977 TCGReg rd, TCGReg rs) 978{ 979 int is_q = type - TCG_TYPE_V64; 980 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0); 981 return true; 982} 983 984static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 985 TCGReg r, TCGReg base, intptr_t offset) 986{ 987 TCGReg temp = TCG_REG_TMP; 988 989 if (offset < -0xffffff || offset > 0xffffff) { 990 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset); 991 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base); 992 base = temp; 993 } else { 994 AArch64Insn add_insn = I3401_ADDI; 995 996 if (offset < 0) { 997 add_insn = I3401_SUBI; 998 offset = -offset; 999 } 1000 if (offset & 0xfff000) { 1001 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000); 1002 base = temp; 1003 } 1004 if (offset & 0xfff) { 1005 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff); 1006 base = temp; 1007 } 1008 } 1009 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece); 1010 return true; 1011} 1012 1013static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, 1014 tcg_target_long value) 1015{ 1016 tcg_target_long svalue = value; 1017 tcg_target_long ivalue = ~value; 1018 tcg_target_long t0, t1, t2; 1019 int s0, s1; 1020 AArch64Insn opc; 1021 1022 switch (type) { 1023 case TCG_TYPE_I32: 1024 case TCG_TYPE_I64: 1025 tcg_debug_assert(rd < 32); 1026 break; 1027 default: 1028 g_assert_not_reached(); 1029 } 1030 1031 /* For 32-bit values, discard potential garbage in value. For 64-bit 1032 values within [2**31, 2**32-1], we can create smaller sequences by 1033 interpreting this as a negative 32-bit number, while ensuring that 1034 the high 32 bits are cleared by setting SF=0. */ 1035 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { 1036 svalue = (int32_t)value; 1037 value = (uint32_t)value; 1038 ivalue = (uint32_t)ivalue; 1039 type = TCG_TYPE_I32; 1040 } 1041 1042 /* Speed things up by handling the common case of small positive 1043 and negative values specially. */ 1044 if ((value & ~0xffffull) == 0) { 1045 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); 1046 return; 1047 } else if ((ivalue & ~0xffffull) == 0) { 1048 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); 1049 return; 1050 } 1051 1052 /* Check for bitfield immediates. For the benefit of 32-bit quantities, 1053 use the sign-extended value. That lets us match rotated values such 1054 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ 1055 if (is_limm(svalue)) { 1056 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); 1057 return; 1058 } 1059 1060 /* Look for host pointer values within 4G of the PC. This happens 1061 often when loading pointers to QEMU's own data structures. */ 1062 if (type == TCG_TYPE_I64) { 1063 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr); 1064 tcg_target_long disp = value - src_rx; 1065 if (disp == sextract64(disp, 0, 21)) { 1066 tcg_out_insn(s, 3406, ADR, rd, disp); 1067 return; 1068 } 1069 disp = (value >> 12) - (src_rx >> 12); 1070 if (disp == sextract64(disp, 0, 21)) { 1071 tcg_out_insn(s, 3406, ADRP, rd, disp); 1072 if (value & 0xfff) { 1073 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); 1074 } 1075 return; 1076 } 1077 } 1078 1079 /* Would it take fewer insns to begin with MOVN? */ 1080 if (ctpop64(value) >= 32) { 1081 t0 = ivalue; 1082 opc = I3405_MOVN; 1083 } else { 1084 t0 = value; 1085 opc = I3405_MOVZ; 1086 } 1087 s0 = ctz64(t0) & (63 & -16); 1088 t1 = t0 & ~(0xffffull << s0); 1089 s1 = ctz64(t1) & (63 & -16); 1090 t2 = t1 & ~(0xffffull << s1); 1091 if (t2 == 0) { 1092 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0); 1093 if (t1 != 0) { 1094 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1); 1095 } 1096 return; 1097 } 1098 1099 /* For more than 2 insns, dump it into the constant pool. */ 1100 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0); 1101 tcg_out_insn(s, 3305, LDR, 0, rd); 1102} 1103 1104static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) 1105{ 1106 return false; 1107} 1108 1109static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, 1110 tcg_target_long imm) 1111{ 1112 /* This function is only used for passing structs by reference. */ 1113 g_assert_not_reached(); 1114} 1115 1116/* Define something more legible for general use. */ 1117#define tcg_out_ldst_r tcg_out_insn_3310 1118 1119static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd, 1120 TCGReg rn, intptr_t offset, int lgsize) 1121{ 1122 /* If the offset is naturally aligned and in range, then we can 1123 use the scaled uimm12 encoding */ 1124 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) { 1125 uintptr_t scaled_uimm = offset >> lgsize; 1126 if (scaled_uimm <= 0xfff) { 1127 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); 1128 return; 1129 } 1130 } 1131 1132 /* Small signed offsets can use the unscaled encoding. */ 1133 if (offset >= -256 && offset < 256) { 1134 tcg_out_insn_3312(s, insn, rd, rn, offset); 1135 return; 1136 } 1137 1138 /* Worst-case scenario, move offset to temp register, use reg offset. */ 1139 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset); 1140 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP); 1141} 1142 1143static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 1144{ 1145 if (ret == arg) { 1146 return true; 1147 } 1148 switch (type) { 1149 case TCG_TYPE_I32: 1150 case TCG_TYPE_I64: 1151 if (ret < 32 && arg < 32) { 1152 tcg_out_movr(s, type, ret, arg); 1153 break; 1154 } else if (ret < 32) { 1155 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0); 1156 break; 1157 } else if (arg < 32) { 1158 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0); 1159 break; 1160 } 1161 /* FALLTHRU */ 1162 1163 case TCG_TYPE_V64: 1164 tcg_debug_assert(ret >= 32 && arg >= 32); 1165 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg); 1166 break; 1167 case TCG_TYPE_V128: 1168 tcg_debug_assert(ret >= 32 && arg >= 32); 1169 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg); 1170 break; 1171 1172 default: 1173 g_assert_not_reached(); 1174 } 1175 return true; 1176} 1177 1178static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1179 TCGReg base, intptr_t ofs) 1180{ 1181 AArch64Insn insn; 1182 int lgsz; 1183 1184 switch (type) { 1185 case TCG_TYPE_I32: 1186 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS); 1187 lgsz = 2; 1188 break; 1189 case TCG_TYPE_I64: 1190 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD); 1191 lgsz = 3; 1192 break; 1193 case TCG_TYPE_V64: 1194 insn = I3312_LDRVD; 1195 lgsz = 3; 1196 break; 1197 case TCG_TYPE_V128: 1198 insn = I3312_LDRVQ; 1199 lgsz = 4; 1200 break; 1201 default: 1202 g_assert_not_reached(); 1203 } 1204 tcg_out_ldst(s, insn, ret, base, ofs, lgsz); 1205} 1206 1207static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src, 1208 TCGReg base, intptr_t ofs) 1209{ 1210 AArch64Insn insn; 1211 int lgsz; 1212 1213 switch (type) { 1214 case TCG_TYPE_I32: 1215 insn = (src < 32 ? I3312_STRW : I3312_STRVS); 1216 lgsz = 2; 1217 break; 1218 case TCG_TYPE_I64: 1219 insn = (src < 32 ? I3312_STRX : I3312_STRVD); 1220 lgsz = 3; 1221 break; 1222 case TCG_TYPE_V64: 1223 insn = I3312_STRVD; 1224 lgsz = 3; 1225 break; 1226 case TCG_TYPE_V128: 1227 insn = I3312_STRVQ; 1228 lgsz = 4; 1229 break; 1230 default: 1231 g_assert_not_reached(); 1232 } 1233 tcg_out_ldst(s, insn, src, base, ofs, lgsz); 1234} 1235 1236static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1237 TCGReg base, intptr_t ofs) 1238{ 1239 if (type <= TCG_TYPE_I64 && val == 0) { 1240 tcg_out_st(s, type, TCG_REG_XZR, base, ofs); 1241 return true; 1242 } 1243 return false; 1244} 1245 1246static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, 1247 TCGReg rn, unsigned int a, unsigned int b) 1248{ 1249 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); 1250} 1251 1252static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, 1253 TCGReg rn, unsigned int a, unsigned int b) 1254{ 1255 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); 1256} 1257 1258static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, 1259 TCGReg rn, unsigned int a, unsigned int b) 1260{ 1261 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); 1262} 1263 1264static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, 1265 TCGReg rn, TCGReg rm, unsigned int a) 1266{ 1267 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); 1268} 1269 1270static inline void tcg_out_shl(TCGContext *s, TCGType ext, 1271 TCGReg rd, TCGReg rn, unsigned int m) 1272{ 1273 int bits = ext ? 64 : 32; 1274 int max = bits - 1; 1275 tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max); 1276} 1277 1278static inline void tcg_out_shr(TCGContext *s, TCGType ext, 1279 TCGReg rd, TCGReg rn, unsigned int m) 1280{ 1281 int max = ext ? 63 : 31; 1282 tcg_out_ubfm(s, ext, rd, rn, m & max, max); 1283} 1284 1285static inline void tcg_out_sar(TCGContext *s, TCGType ext, 1286 TCGReg rd, TCGReg rn, unsigned int m) 1287{ 1288 int max = ext ? 63 : 31; 1289 tcg_out_sbfm(s, ext, rd, rn, m & max, max); 1290} 1291 1292static inline void tcg_out_rotr(TCGContext *s, TCGType ext, 1293 TCGReg rd, TCGReg rn, unsigned int m) 1294{ 1295 int max = ext ? 63 : 31; 1296 tcg_out_extr(s, ext, rd, rn, rn, m & max); 1297} 1298 1299static inline void tcg_out_rotl(TCGContext *s, TCGType ext, 1300 TCGReg rd, TCGReg rn, unsigned int m) 1301{ 1302 int max = ext ? 63 : 31; 1303 tcg_out_extr(s, ext, rd, rn, rn, -m & max); 1304} 1305 1306static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, 1307 TCGReg rn, unsigned lsb, unsigned width) 1308{ 1309 unsigned size = ext ? 64 : 32; 1310 unsigned a = (size - lsb) & (size - 1); 1311 unsigned b = width - 1; 1312 tcg_out_bfm(s, ext, rd, rn, a, b); 1313} 1314 1315static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, 1316 tcg_target_long b, bool const_b) 1317{ 1318 if (const_b) { 1319 /* Using CMP or CMN aliases. */ 1320 if (b >= 0) { 1321 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); 1322 } else { 1323 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); 1324 } 1325 } else { 1326 /* Using CMP alias SUBS wzr, Wn, Wm */ 1327 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); 1328 } 1329} 1330 1331static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) 1332{ 1333 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1334 tcg_debug_assert(offset == sextract64(offset, 0, 26)); 1335 tcg_out_insn(s, 3206, B, offset); 1336} 1337 1338static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target) 1339{ 1340 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1341 if (offset == sextract64(offset, 0, 26)) { 1342 tcg_out_insn(s, 3206, B, offset); 1343 } else { 1344 /* Choose X9 as a call-clobbered non-LR temporary. */ 1345 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target); 1346 tcg_out_insn(s, 3207, BR, TCG_REG_X9); 1347 } 1348} 1349 1350static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target) 1351{ 1352 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; 1353 if (offset == sextract64(offset, 0, 26)) { 1354 tcg_out_insn(s, 3206, BL, offset); 1355 } else { 1356 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); 1357 tcg_out_insn(s, 3207, BLR, TCG_REG_TMP); 1358 } 1359} 1360 1361static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 1362 const TCGHelperInfo *info) 1363{ 1364 tcg_out_call_int(s, target); 1365} 1366 1367static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) 1368{ 1369 if (!l->has_value) { 1370 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); 1371 tcg_out_insn(s, 3206, B, 0); 1372 } else { 1373 tcg_out_goto(s, l->u.value_ptr); 1374 } 1375} 1376 1377static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, 1378 TCGArg b, bool b_const, TCGLabel *l) 1379{ 1380 intptr_t offset; 1381 bool need_cmp; 1382 1383 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) { 1384 need_cmp = false; 1385 } else { 1386 need_cmp = true; 1387 tcg_out_cmp(s, ext, a, b, b_const); 1388 } 1389 1390 if (!l->has_value) { 1391 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); 1392 offset = tcg_in32(s) >> 5; 1393 } else { 1394 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2; 1395 tcg_debug_assert(offset == sextract64(offset, 0, 19)); 1396 } 1397 1398 if (need_cmp) { 1399 tcg_out_insn(s, 3202, B_C, c, offset); 1400 } else if (c == TCG_COND_EQ) { 1401 tcg_out_insn(s, 3201, CBZ, ext, a, offset); 1402 } else { 1403 tcg_out_insn(s, 3201, CBNZ, ext, a, offset); 1404 } 1405} 1406 1407static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits, 1408 TCGReg rd, TCGReg rn) 1409{ 1410 /* REV, REV16, REV32 */ 1411 tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn); 1412} 1413 1414static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits, 1415 TCGReg rd, TCGReg rn) 1416{ 1417 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ 1418 int bits = (8 << s_bits) - 1; 1419 tcg_out_sbfm(s, ext, rd, rn, 0, bits); 1420} 1421 1422static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn) 1423{ 1424 tcg_out_sxt(s, type, MO_8, rd, rn); 1425} 1426 1427static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn) 1428{ 1429 tcg_out_sxt(s, type, MO_16, rd, rn); 1430} 1431 1432static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn) 1433{ 1434 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn); 1435} 1436 1437static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn) 1438{ 1439 tcg_out_ext32s(s, rd, rn); 1440} 1441 1442static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits, 1443 TCGReg rd, TCGReg rn) 1444{ 1445 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ 1446 int bits = (8 << s_bits) - 1; 1447 tcg_out_ubfm(s, 0, rd, rn, 0, bits); 1448} 1449 1450static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn) 1451{ 1452 tcg_out_uxt(s, MO_8, rd, rn); 1453} 1454 1455static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn) 1456{ 1457 tcg_out_uxt(s, MO_16, rd, rn); 1458} 1459 1460static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn) 1461{ 1462 tcg_out_movr(s, TCG_TYPE_I32, rd, rn); 1463} 1464 1465static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn) 1466{ 1467 tcg_out_ext32u(s, rd, rn); 1468} 1469 1470static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) 1471{ 1472 tcg_out_mov(s, TCG_TYPE_I32, rd, rn); 1473} 1474 1475static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, 1476 TCGReg rn, int64_t aimm) 1477{ 1478 if (aimm >= 0) { 1479 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); 1480 } else { 1481 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); 1482 } 1483} 1484 1485static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, 1486 TCGReg rh, TCGReg al, TCGReg ah, 1487 tcg_target_long bl, tcg_target_long bh, 1488 bool const_bl, bool const_bh, bool sub) 1489{ 1490 TCGReg orig_rl = rl; 1491 AArch64Insn insn; 1492 1493 if (rl == ah || (!const_bh && rl == bh)) { 1494 rl = TCG_REG_TMP; 1495 } 1496 1497 if (const_bl) { 1498 if (bl < 0) { 1499 bl = -bl; 1500 insn = sub ? I3401_ADDSI : I3401_SUBSI; 1501 } else { 1502 insn = sub ? I3401_SUBSI : I3401_ADDSI; 1503 } 1504 1505 if (unlikely(al == TCG_REG_XZR)) { 1506 /* ??? We want to allow al to be zero for the benefit of 1507 negation via subtraction. However, that leaves open the 1508 possibility of adding 0+const in the low part, and the 1509 immediate add instructions encode XSP not XZR. Don't try 1510 anything more elaborate here than loading another zero. */ 1511 al = TCG_REG_TMP; 1512 tcg_out_movi(s, ext, al, 0); 1513 } 1514 tcg_out_insn_3401(s, insn, ext, rl, al, bl); 1515 } else { 1516 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); 1517 } 1518 1519 insn = I3503_ADC; 1520 if (const_bh) { 1521 /* Note that the only two constants we support are 0 and -1, and 1522 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ 1523 if ((bh != 0) ^ sub) { 1524 insn = I3503_SBC; 1525 } 1526 bh = TCG_REG_XZR; 1527 } else if (sub) { 1528 insn = I3503_SBC; 1529 } 1530 tcg_out_insn_3503(s, insn, ext, rh, ah, bh); 1531 1532 tcg_out_mov(s, ext, orig_rl, rl); 1533} 1534 1535static inline void tcg_out_mb(TCGContext *s, TCGArg a0) 1536{ 1537 static const uint32_t sync[] = { 1538 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST, 1539 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST, 1540 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1541 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD, 1542 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD, 1543 }; 1544 tcg_out32(s, sync[a0 & TCG_MO_ALL]); 1545} 1546 1547static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, 1548 TCGReg a0, TCGArg b, bool const_b, bool is_ctz) 1549{ 1550 TCGReg a1 = a0; 1551 if (is_ctz) { 1552 a1 = TCG_REG_TMP; 1553 tcg_out_insn(s, 3507, RBIT, ext, a1, a0); 1554 } 1555 if (const_b && b == (ext ? 64 : 32)) { 1556 tcg_out_insn(s, 3507, CLZ, ext, d, a1); 1557 } else { 1558 AArch64Insn sel = I3506_CSEL; 1559 1560 tcg_out_cmp(s, ext, a0, 0, 1); 1561 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1); 1562 1563 if (const_b) { 1564 if (b == -1) { 1565 b = TCG_REG_XZR; 1566 sel = I3506_CSINV; 1567 } else if (b == 0) { 1568 b = TCG_REG_XZR; 1569 } else { 1570 tcg_out_movi(s, ext, d, b); 1571 b = d; 1572 } 1573 } 1574 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE); 1575 } 1576} 1577 1578typedef struct { 1579 TCGReg base; 1580 TCGReg index; 1581 TCGType index_ext; 1582 TCGAtomAlign aa; 1583} HostAddress; 1584 1585bool tcg_target_has_memory_bswap(MemOp memop) 1586{ 1587 return false; 1588} 1589 1590static const TCGLdstHelperParam ldst_helper_param = { 1591 .ntmp = 1, .tmp = { TCG_REG_TMP } 1592}; 1593 1594static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1595{ 1596 MemOp opc = get_memop(lb->oi); 1597 1598 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1599 return false; 1600 } 1601 1602 tcg_out_ld_helper_args(s, lb, &ldst_helper_param); 1603 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); 1604 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); 1605 tcg_out_goto(s, lb->raddr); 1606 return true; 1607} 1608 1609static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1610{ 1611 MemOp opc = get_memop(lb->oi); 1612 1613 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1614 return false; 1615 } 1616 1617 tcg_out_st_helper_args(s, lb, &ldst_helper_param); 1618 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]); 1619 tcg_out_goto(s, lb->raddr); 1620 return true; 1621} 1622 1623/* 1624 * For softmmu, perform the TLB load and compare. 1625 * For useronly, perform any required alignment tests. 1626 * In both cases, return a TCGLabelQemuLdst structure if the slow path 1627 * is required and fill in @h with the host address for the fast path. 1628 */ 1629static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, 1630 TCGReg addr_reg, MemOpIdx oi, 1631 bool is_ld) 1632{ 1633 TCGType addr_type = s->addr_type; 1634 TCGLabelQemuLdst *ldst = NULL; 1635 MemOp opc = get_memop(oi); 1636 unsigned a_mask; 1637 1638 h->aa = atom_and_align_for_opc(s, opc, 1639 have_lse2 ? MO_ATOM_WITHIN16 1640 : MO_ATOM_IFALIGN, 1641 false); 1642 a_mask = (1 << h->aa.align) - 1; 1643 1644#ifdef CONFIG_SOFTMMU 1645 unsigned s_bits = opc & MO_SIZE; 1646 unsigned s_mask = (1u << s_bits) - 1; 1647 unsigned mem_index = get_mmuidx(oi); 1648 TCGReg x3; 1649 TCGType mask_type; 1650 uint64_t compare_mask; 1651 1652 ldst = new_ldst_label(s); 1653 ldst->is_ld = is_ld; 1654 ldst->oi = oi; 1655 ldst->addrlo_reg = addr_reg; 1656 1657 mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32 1658 ? TCG_TYPE_I64 : TCG_TYPE_I32); 1659 1660 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */ 1661 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); 1662 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512); 1663 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); 1664 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); 1665 tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0, 1666 TLB_MASK_TABLE_OFS(mem_index), 1, 0); 1667 1668 /* Extract the TLB index from the address into X0. */ 1669 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, 1670 TCG_REG_X0, TCG_REG_X0, addr_reg, 1671 s->page_bits - CPU_TLB_ENTRY_BITS); 1672 1673 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */ 1674 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0); 1675 1676 /* Load the tlb comparator into X0, and the fast path addend into X1. */ 1677 tcg_out_ld(s, addr_type, TCG_REG_X0, TCG_REG_X1, 1678 is_ld ? offsetof(CPUTLBEntry, addr_read) 1679 : offsetof(CPUTLBEntry, addr_write)); 1680 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1, 1681 offsetof(CPUTLBEntry, addend)); 1682 1683 /* 1684 * For aligned accesses, we check the first byte and include the alignment 1685 * bits within the address. For unaligned access, we check that we don't 1686 * cross pages using the address of the last byte of the access. 1687 */ 1688 if (a_mask >= s_mask) { 1689 x3 = addr_reg; 1690 } else { 1691 tcg_out_insn(s, 3401, ADDI, addr_type, 1692 TCG_REG_X3, addr_reg, s_mask - a_mask); 1693 x3 = TCG_REG_X3; 1694 } 1695 compare_mask = (uint64_t)s->page_mask | a_mask; 1696 1697 /* Store the page mask part of the address into X3. */ 1698 tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_X3, x3, compare_mask); 1699 1700 /* Perform the address comparison. */ 1701 tcg_out_cmp(s, addr_type, TCG_REG_X0, TCG_REG_X3, 0); 1702 1703 /* If not equal, we jump to the slow path. */ 1704 ldst->label_ptr[0] = s->code_ptr; 1705 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1706 1707 h->base = TCG_REG_X1, 1708 h->index = addr_reg; 1709 h->index_ext = addr_type; 1710#else 1711 if (a_mask) { 1712 ldst = new_ldst_label(s); 1713 1714 ldst->is_ld = is_ld; 1715 ldst->oi = oi; 1716 ldst->addrlo_reg = addr_reg; 1717 1718 /* tst addr, #mask */ 1719 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask); 1720 1721 /* b.ne slow_path */ 1722 ldst->label_ptr[0] = s->code_ptr; 1723 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); 1724 } 1725 1726 if (guest_base || addr_type == TCG_TYPE_I32) { 1727 h->base = TCG_REG_GUEST_BASE; 1728 h->index = addr_reg; 1729 h->index_ext = addr_type; 1730 } else { 1731 h->base = addr_reg; 1732 h->index = TCG_REG_XZR; 1733 h->index_ext = TCG_TYPE_I64; 1734 } 1735#endif 1736 1737 return ldst; 1738} 1739 1740static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, 1741 TCGReg data_r, HostAddress h) 1742{ 1743 switch (memop & MO_SSIZE) { 1744 case MO_UB: 1745 tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index); 1746 break; 1747 case MO_SB: 1748 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, 1749 data_r, h.base, h.index_ext, h.index); 1750 break; 1751 case MO_UW: 1752 tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index); 1753 break; 1754 case MO_SW: 1755 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), 1756 data_r, h.base, h.index_ext, h.index); 1757 break; 1758 case MO_UL: 1759 tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index); 1760 break; 1761 case MO_SL: 1762 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index); 1763 break; 1764 case MO_UQ: 1765 tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index); 1766 break; 1767 default: 1768 g_assert_not_reached(); 1769 } 1770} 1771 1772static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, 1773 TCGReg data_r, HostAddress h) 1774{ 1775 switch (memop & MO_SIZE) { 1776 case MO_8: 1777 tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index); 1778 break; 1779 case MO_16: 1780 tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index); 1781 break; 1782 case MO_32: 1783 tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index); 1784 break; 1785 case MO_64: 1786 tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index); 1787 break; 1788 default: 1789 g_assert_not_reached(); 1790 } 1791} 1792 1793static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1794 MemOpIdx oi, TCGType data_type) 1795{ 1796 TCGLabelQemuLdst *ldst; 1797 HostAddress h; 1798 1799 ldst = prepare_host_addr(s, &h, addr_reg, oi, true); 1800 tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h); 1801 1802 if (ldst) { 1803 ldst->type = data_type; 1804 ldst->datalo_reg = data_reg; 1805 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1806 } 1807} 1808 1809static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, 1810 MemOpIdx oi, TCGType data_type) 1811{ 1812 TCGLabelQemuLdst *ldst; 1813 HostAddress h; 1814 1815 ldst = prepare_host_addr(s, &h, addr_reg, oi, false); 1816 tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h); 1817 1818 if (ldst) { 1819 ldst->type = data_type; 1820 ldst->datalo_reg = data_reg; 1821 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 1822 } 1823} 1824 1825static const tcg_insn_unit *tb_ret_addr; 1826 1827static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) 1828{ 1829 /* Reuse the zeroing that exists for goto_ptr. */ 1830 if (a0 == 0) { 1831 tcg_out_goto_long(s, tcg_code_gen_epilogue); 1832 } else { 1833 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); 1834 tcg_out_goto_long(s, tb_ret_addr); 1835 } 1836} 1837 1838static void tcg_out_goto_tb(TCGContext *s, int which) 1839{ 1840 /* 1841 * Direct branch, or indirect address load, will be patched 1842 * by tb_target_set_jmp_target. Assert indirect load offset 1843 * in range early, regardless of direct branch distance. 1844 */ 1845 intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which)); 1846 tcg_debug_assert(i_off == sextract64(i_off, 0, 21)); 1847 1848 set_jmp_insn_offset(s, which); 1849 tcg_out32(s, I3206_B); 1850 tcg_out_insn(s, 3207, BR, TCG_REG_TMP); 1851 set_jmp_reset_offset(s, which); 1852} 1853 1854void tb_target_set_jmp_target(const TranslationBlock *tb, int n, 1855 uintptr_t jmp_rx, uintptr_t jmp_rw) 1856{ 1857 uintptr_t d_addr = tb->jmp_target_addr[n]; 1858 ptrdiff_t d_offset = d_addr - jmp_rx; 1859 tcg_insn_unit insn; 1860 1861 /* Either directly branch, or indirect branch load. */ 1862 if (d_offset == sextract64(d_offset, 0, 28)) { 1863 insn = deposit32(I3206_B, 0, 26, d_offset >> 2); 1864 } else { 1865 uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n]; 1866 ptrdiff_t i_offset = i_addr - jmp_rx; 1867 1868 /* Note that we asserted this in range in tcg_out_goto_tb. */ 1869 insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2); 1870 } 1871 qatomic_set((uint32_t *)jmp_rw, insn); 1872 flush_idcache_range(jmp_rx, jmp_rw, 4); 1873} 1874 1875static void tcg_out_op(TCGContext *s, TCGOpcode opc, 1876 const TCGArg args[TCG_MAX_OP_ARGS], 1877 const int const_args[TCG_MAX_OP_ARGS]) 1878{ 1879 /* 99% of the time, we can signal the use of extension registers 1880 by looking to see if the opcode handles 64-bit data. */ 1881 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0; 1882 1883 /* Hoist the loads of the most common arguments. */ 1884 TCGArg a0 = args[0]; 1885 TCGArg a1 = args[1]; 1886 TCGArg a2 = args[2]; 1887 int c2 = const_args[2]; 1888 1889 /* Some operands are defined with "rZ" constraint, a register or 1890 the zero register. These need not actually test args[I] == 0. */ 1891#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) 1892 1893 switch (opc) { 1894 case INDEX_op_goto_ptr: 1895 tcg_out_insn(s, 3207, BR, a0); 1896 break; 1897 1898 case INDEX_op_br: 1899 tcg_out_goto_label(s, arg_label(a0)); 1900 break; 1901 1902 case INDEX_op_ld8u_i32: 1903 case INDEX_op_ld8u_i64: 1904 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0); 1905 break; 1906 case INDEX_op_ld8s_i32: 1907 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0); 1908 break; 1909 case INDEX_op_ld8s_i64: 1910 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0); 1911 break; 1912 case INDEX_op_ld16u_i32: 1913 case INDEX_op_ld16u_i64: 1914 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1); 1915 break; 1916 case INDEX_op_ld16s_i32: 1917 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1); 1918 break; 1919 case INDEX_op_ld16s_i64: 1920 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1); 1921 break; 1922 case INDEX_op_ld_i32: 1923 case INDEX_op_ld32u_i64: 1924 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2); 1925 break; 1926 case INDEX_op_ld32s_i64: 1927 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2); 1928 break; 1929 case INDEX_op_ld_i64: 1930 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3); 1931 break; 1932 1933 case INDEX_op_st8_i32: 1934 case INDEX_op_st8_i64: 1935 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0); 1936 break; 1937 case INDEX_op_st16_i32: 1938 case INDEX_op_st16_i64: 1939 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1); 1940 break; 1941 case INDEX_op_st_i32: 1942 case INDEX_op_st32_i64: 1943 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2); 1944 break; 1945 case INDEX_op_st_i64: 1946 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3); 1947 break; 1948 1949 case INDEX_op_add_i32: 1950 a2 = (int32_t)a2; 1951 /* FALLTHRU */ 1952 case INDEX_op_add_i64: 1953 if (c2) { 1954 tcg_out_addsubi(s, ext, a0, a1, a2); 1955 } else { 1956 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); 1957 } 1958 break; 1959 1960 case INDEX_op_sub_i32: 1961 a2 = (int32_t)a2; 1962 /* FALLTHRU */ 1963 case INDEX_op_sub_i64: 1964 if (c2) { 1965 tcg_out_addsubi(s, ext, a0, a1, -a2); 1966 } else { 1967 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); 1968 } 1969 break; 1970 1971 case INDEX_op_neg_i64: 1972 case INDEX_op_neg_i32: 1973 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); 1974 break; 1975 1976 case INDEX_op_and_i32: 1977 a2 = (int32_t)a2; 1978 /* FALLTHRU */ 1979 case INDEX_op_and_i64: 1980 if (c2) { 1981 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); 1982 } else { 1983 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); 1984 } 1985 break; 1986 1987 case INDEX_op_andc_i32: 1988 a2 = (int32_t)a2; 1989 /* FALLTHRU */ 1990 case INDEX_op_andc_i64: 1991 if (c2) { 1992 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); 1993 } else { 1994 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); 1995 } 1996 break; 1997 1998 case INDEX_op_or_i32: 1999 a2 = (int32_t)a2; 2000 /* FALLTHRU */ 2001 case INDEX_op_or_i64: 2002 if (c2) { 2003 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); 2004 } else { 2005 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); 2006 } 2007 break; 2008 2009 case INDEX_op_orc_i32: 2010 a2 = (int32_t)a2; 2011 /* FALLTHRU */ 2012 case INDEX_op_orc_i64: 2013 if (c2) { 2014 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); 2015 } else { 2016 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); 2017 } 2018 break; 2019 2020 case INDEX_op_xor_i32: 2021 a2 = (int32_t)a2; 2022 /* FALLTHRU */ 2023 case INDEX_op_xor_i64: 2024 if (c2) { 2025 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); 2026 } else { 2027 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); 2028 } 2029 break; 2030 2031 case INDEX_op_eqv_i32: 2032 a2 = (int32_t)a2; 2033 /* FALLTHRU */ 2034 case INDEX_op_eqv_i64: 2035 if (c2) { 2036 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); 2037 } else { 2038 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); 2039 } 2040 break; 2041 2042 case INDEX_op_not_i64: 2043 case INDEX_op_not_i32: 2044 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); 2045 break; 2046 2047 case INDEX_op_mul_i64: 2048 case INDEX_op_mul_i32: 2049 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); 2050 break; 2051 2052 case INDEX_op_div_i64: 2053 case INDEX_op_div_i32: 2054 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); 2055 break; 2056 case INDEX_op_divu_i64: 2057 case INDEX_op_divu_i32: 2058 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); 2059 break; 2060 2061 case INDEX_op_rem_i64: 2062 case INDEX_op_rem_i32: 2063 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2); 2064 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2065 break; 2066 case INDEX_op_remu_i64: 2067 case INDEX_op_remu_i32: 2068 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2); 2069 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); 2070 break; 2071 2072 case INDEX_op_shl_i64: 2073 case INDEX_op_shl_i32: 2074 if (c2) { 2075 tcg_out_shl(s, ext, a0, a1, a2); 2076 } else { 2077 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); 2078 } 2079 break; 2080 2081 case INDEX_op_shr_i64: 2082 case INDEX_op_shr_i32: 2083 if (c2) { 2084 tcg_out_shr(s, ext, a0, a1, a2); 2085 } else { 2086 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); 2087 } 2088 break; 2089 2090 case INDEX_op_sar_i64: 2091 case INDEX_op_sar_i32: 2092 if (c2) { 2093 tcg_out_sar(s, ext, a0, a1, a2); 2094 } else { 2095 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); 2096 } 2097 break; 2098 2099 case INDEX_op_rotr_i64: 2100 case INDEX_op_rotr_i32: 2101 if (c2) { 2102 tcg_out_rotr(s, ext, a0, a1, a2); 2103 } else { 2104 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); 2105 } 2106 break; 2107 2108 case INDEX_op_rotl_i64: 2109 case INDEX_op_rotl_i32: 2110 if (c2) { 2111 tcg_out_rotl(s, ext, a0, a1, a2); 2112 } else { 2113 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2); 2114 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP); 2115 } 2116 break; 2117 2118 case INDEX_op_clz_i64: 2119 case INDEX_op_clz_i32: 2120 tcg_out_cltz(s, ext, a0, a1, a2, c2, false); 2121 break; 2122 case INDEX_op_ctz_i64: 2123 case INDEX_op_ctz_i32: 2124 tcg_out_cltz(s, ext, a0, a1, a2, c2, true); 2125 break; 2126 2127 case INDEX_op_brcond_i32: 2128 a1 = (int32_t)a1; 2129 /* FALLTHRU */ 2130 case INDEX_op_brcond_i64: 2131 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); 2132 break; 2133 2134 case INDEX_op_setcond_i32: 2135 a2 = (int32_t)a2; 2136 /* FALLTHRU */ 2137 case INDEX_op_setcond_i64: 2138 tcg_out_cmp(s, ext, a1, a2, c2); 2139 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ 2140 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, 2141 TCG_REG_XZR, tcg_invert_cond(args[3])); 2142 break; 2143 2144 case INDEX_op_movcond_i32: 2145 a2 = (int32_t)a2; 2146 /* FALLTHRU */ 2147 case INDEX_op_movcond_i64: 2148 tcg_out_cmp(s, ext, a1, a2, c2); 2149 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); 2150 break; 2151 2152 case INDEX_op_qemu_ld_a32_i32: 2153 case INDEX_op_qemu_ld_a64_i32: 2154 case INDEX_op_qemu_ld_a32_i64: 2155 case INDEX_op_qemu_ld_a64_i64: 2156 tcg_out_qemu_ld(s, a0, a1, a2, ext); 2157 break; 2158 case INDEX_op_qemu_st_a32_i32: 2159 case INDEX_op_qemu_st_a64_i32: 2160 case INDEX_op_qemu_st_a32_i64: 2161 case INDEX_op_qemu_st_a64_i64: 2162 tcg_out_qemu_st(s, REG0(0), a1, a2, ext); 2163 break; 2164 2165 case INDEX_op_bswap64_i64: 2166 tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1); 2167 break; 2168 case INDEX_op_bswap32_i64: 2169 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2170 if (a2 & TCG_BSWAP_OS) { 2171 tcg_out_ext32s(s, a0, a0); 2172 } 2173 break; 2174 case INDEX_op_bswap32_i32: 2175 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); 2176 break; 2177 case INDEX_op_bswap16_i64: 2178 case INDEX_op_bswap16_i32: 2179 tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1); 2180 if (a2 & TCG_BSWAP_OS) { 2181 /* Output must be sign-extended. */ 2182 tcg_out_ext16s(s, ext, a0, a0); 2183 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 2184 /* Output must be zero-extended, but input isn't. */ 2185 tcg_out_ext16u(s, a0, a0); 2186 } 2187 break; 2188 2189 case INDEX_op_deposit_i64: 2190 case INDEX_op_deposit_i32: 2191 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]); 2192 break; 2193 2194 case INDEX_op_extract_i64: 2195 case INDEX_op_extract_i32: 2196 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2197 break; 2198 2199 case INDEX_op_sextract_i64: 2200 case INDEX_op_sextract_i32: 2201 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1); 2202 break; 2203 2204 case INDEX_op_extract2_i64: 2205 case INDEX_op_extract2_i32: 2206 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]); 2207 break; 2208 2209 case INDEX_op_add2_i32: 2210 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2211 (int32_t)args[4], args[5], const_args[4], 2212 const_args[5], false); 2213 break; 2214 case INDEX_op_add2_i64: 2215 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2216 args[5], const_args[4], const_args[5], false); 2217 break; 2218 case INDEX_op_sub2_i32: 2219 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), 2220 (int32_t)args[4], args[5], const_args[4], 2221 const_args[5], true); 2222 break; 2223 case INDEX_op_sub2_i64: 2224 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], 2225 args[5], const_args[4], const_args[5], true); 2226 break; 2227 2228 case INDEX_op_muluh_i64: 2229 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); 2230 break; 2231 case INDEX_op_mulsh_i64: 2232 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); 2233 break; 2234 2235 case INDEX_op_mb: 2236 tcg_out_mb(s, a0); 2237 break; 2238 2239 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 2240 case INDEX_op_mov_i64: 2241 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 2242 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ 2243 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ 2244 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ 2245 case INDEX_op_ext8s_i64: 2246 case INDEX_op_ext8u_i32: 2247 case INDEX_op_ext8u_i64: 2248 case INDEX_op_ext16s_i64: 2249 case INDEX_op_ext16s_i32: 2250 case INDEX_op_ext16u_i64: 2251 case INDEX_op_ext16u_i32: 2252 case INDEX_op_ext32s_i64: 2253 case INDEX_op_ext32u_i64: 2254 case INDEX_op_ext_i32_i64: 2255 case INDEX_op_extu_i32_i64: 2256 case INDEX_op_extrl_i64_i32: 2257 default: 2258 g_assert_not_reached(); 2259 } 2260 2261#undef REG0 2262} 2263 2264static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 2265 unsigned vecl, unsigned vece, 2266 const TCGArg args[TCG_MAX_OP_ARGS], 2267 const int const_args[TCG_MAX_OP_ARGS]) 2268{ 2269 static const AArch64Insn cmp_vec_insn[16] = { 2270 [TCG_COND_EQ] = I3616_CMEQ, 2271 [TCG_COND_GT] = I3616_CMGT, 2272 [TCG_COND_GE] = I3616_CMGE, 2273 [TCG_COND_GTU] = I3616_CMHI, 2274 [TCG_COND_GEU] = I3616_CMHS, 2275 }; 2276 static const AArch64Insn cmp_scalar_insn[16] = { 2277 [TCG_COND_EQ] = I3611_CMEQ, 2278 [TCG_COND_GT] = I3611_CMGT, 2279 [TCG_COND_GE] = I3611_CMGE, 2280 [TCG_COND_GTU] = I3611_CMHI, 2281 [TCG_COND_GEU] = I3611_CMHS, 2282 }; 2283 static const AArch64Insn cmp0_vec_insn[16] = { 2284 [TCG_COND_EQ] = I3617_CMEQ0, 2285 [TCG_COND_GT] = I3617_CMGT0, 2286 [TCG_COND_GE] = I3617_CMGE0, 2287 [TCG_COND_LT] = I3617_CMLT0, 2288 [TCG_COND_LE] = I3617_CMLE0, 2289 }; 2290 static const AArch64Insn cmp0_scalar_insn[16] = { 2291 [TCG_COND_EQ] = I3612_CMEQ0, 2292 [TCG_COND_GT] = I3612_CMGT0, 2293 [TCG_COND_GE] = I3612_CMGE0, 2294 [TCG_COND_LT] = I3612_CMLT0, 2295 [TCG_COND_LE] = I3612_CMLE0, 2296 }; 2297 2298 TCGType type = vecl + TCG_TYPE_V64; 2299 unsigned is_q = vecl; 2300 bool is_scalar = !is_q && vece == MO_64; 2301 TCGArg a0, a1, a2, a3; 2302 int cmode, imm8; 2303 2304 a0 = args[0]; 2305 a1 = args[1]; 2306 a2 = args[2]; 2307 2308 switch (opc) { 2309 case INDEX_op_ld_vec: 2310 tcg_out_ld(s, type, a0, a1, a2); 2311 break; 2312 case INDEX_op_st_vec: 2313 tcg_out_st(s, type, a0, a1, a2); 2314 break; 2315 case INDEX_op_dupm_vec: 2316 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 2317 break; 2318 case INDEX_op_add_vec: 2319 if (is_scalar) { 2320 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2); 2321 } else { 2322 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2); 2323 } 2324 break; 2325 case INDEX_op_sub_vec: 2326 if (is_scalar) { 2327 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2); 2328 } else { 2329 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2); 2330 } 2331 break; 2332 case INDEX_op_mul_vec: 2333 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2); 2334 break; 2335 case INDEX_op_neg_vec: 2336 if (is_scalar) { 2337 tcg_out_insn(s, 3612, NEG, vece, a0, a1); 2338 } else { 2339 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1); 2340 } 2341 break; 2342 case INDEX_op_abs_vec: 2343 if (is_scalar) { 2344 tcg_out_insn(s, 3612, ABS, vece, a0, a1); 2345 } else { 2346 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1); 2347 } 2348 break; 2349 case INDEX_op_and_vec: 2350 if (const_args[2]) { 2351 is_shimm1632(~a2, &cmode, &imm8); 2352 if (a0 == a1) { 2353 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2354 return; 2355 } 2356 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2357 a2 = a0; 2358 } 2359 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2); 2360 break; 2361 case INDEX_op_or_vec: 2362 if (const_args[2]) { 2363 is_shimm1632(a2, &cmode, &imm8); 2364 if (a0 == a1) { 2365 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2366 return; 2367 } 2368 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2369 a2 = a0; 2370 } 2371 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2); 2372 break; 2373 case INDEX_op_andc_vec: 2374 if (const_args[2]) { 2375 is_shimm1632(a2, &cmode, &imm8); 2376 if (a0 == a1) { 2377 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); 2378 return; 2379 } 2380 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); 2381 a2 = a0; 2382 } 2383 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2); 2384 break; 2385 case INDEX_op_orc_vec: 2386 if (const_args[2]) { 2387 is_shimm1632(~a2, &cmode, &imm8); 2388 if (a0 == a1) { 2389 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); 2390 return; 2391 } 2392 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); 2393 a2 = a0; 2394 } 2395 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2); 2396 break; 2397 case INDEX_op_xor_vec: 2398 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2); 2399 break; 2400 case INDEX_op_ssadd_vec: 2401 if (is_scalar) { 2402 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2); 2403 } else { 2404 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2); 2405 } 2406 break; 2407 case INDEX_op_sssub_vec: 2408 if (is_scalar) { 2409 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2); 2410 } else { 2411 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2); 2412 } 2413 break; 2414 case INDEX_op_usadd_vec: 2415 if (is_scalar) { 2416 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2); 2417 } else { 2418 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2); 2419 } 2420 break; 2421 case INDEX_op_ussub_vec: 2422 if (is_scalar) { 2423 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2); 2424 } else { 2425 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2); 2426 } 2427 break; 2428 case INDEX_op_smax_vec: 2429 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2); 2430 break; 2431 case INDEX_op_smin_vec: 2432 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2); 2433 break; 2434 case INDEX_op_umax_vec: 2435 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2); 2436 break; 2437 case INDEX_op_umin_vec: 2438 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2); 2439 break; 2440 case INDEX_op_not_vec: 2441 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1); 2442 break; 2443 case INDEX_op_shli_vec: 2444 if (is_scalar) { 2445 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece)); 2446 } else { 2447 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece)); 2448 } 2449 break; 2450 case INDEX_op_shri_vec: 2451 if (is_scalar) { 2452 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2); 2453 } else { 2454 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2); 2455 } 2456 break; 2457 case INDEX_op_sari_vec: 2458 if (is_scalar) { 2459 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2); 2460 } else { 2461 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); 2462 } 2463 break; 2464 case INDEX_op_aa64_sli_vec: 2465 if (is_scalar) { 2466 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece)); 2467 } else { 2468 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece)); 2469 } 2470 break; 2471 case INDEX_op_shlv_vec: 2472 if (is_scalar) { 2473 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2); 2474 } else { 2475 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); 2476 } 2477 break; 2478 case INDEX_op_aa64_sshl_vec: 2479 if (is_scalar) { 2480 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2); 2481 } else { 2482 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2); 2483 } 2484 break; 2485 case INDEX_op_cmp_vec: 2486 { 2487 TCGCond cond = args[3]; 2488 AArch64Insn insn; 2489 2490 if (cond == TCG_COND_NE) { 2491 if (const_args[2]) { 2492 if (is_scalar) { 2493 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1); 2494 } else { 2495 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1); 2496 } 2497 } else { 2498 if (is_scalar) { 2499 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2); 2500 } else { 2501 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2); 2502 } 2503 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); 2504 } 2505 } else { 2506 if (const_args[2]) { 2507 if (is_scalar) { 2508 insn = cmp0_scalar_insn[cond]; 2509 if (insn) { 2510 tcg_out_insn_3612(s, insn, vece, a0, a1); 2511 break; 2512 } 2513 } else { 2514 insn = cmp0_vec_insn[cond]; 2515 if (insn) { 2516 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1); 2517 break; 2518 } 2519 } 2520 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0); 2521 a2 = TCG_VEC_TMP; 2522 } 2523 if (is_scalar) { 2524 insn = cmp_scalar_insn[cond]; 2525 if (insn == 0) { 2526 TCGArg t; 2527 t = a1, a1 = a2, a2 = t; 2528 cond = tcg_swap_cond(cond); 2529 insn = cmp_scalar_insn[cond]; 2530 tcg_debug_assert(insn != 0); 2531 } 2532 tcg_out_insn_3611(s, insn, vece, a0, a1, a2); 2533 } else { 2534 insn = cmp_vec_insn[cond]; 2535 if (insn == 0) { 2536 TCGArg t; 2537 t = a1, a1 = a2, a2 = t; 2538 cond = tcg_swap_cond(cond); 2539 insn = cmp_vec_insn[cond]; 2540 tcg_debug_assert(insn != 0); 2541 } 2542 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2); 2543 } 2544 } 2545 } 2546 break; 2547 2548 case INDEX_op_bitsel_vec: 2549 a3 = args[3]; 2550 if (a0 == a3) { 2551 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1); 2552 } else if (a0 == a2) { 2553 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1); 2554 } else { 2555 if (a0 != a1) { 2556 tcg_out_mov(s, type, a0, a1); 2557 } 2558 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3); 2559 } 2560 break; 2561 2562 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 2563 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 2564 default: 2565 g_assert_not_reached(); 2566 } 2567} 2568 2569int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 2570{ 2571 switch (opc) { 2572 case INDEX_op_add_vec: 2573 case INDEX_op_sub_vec: 2574 case INDEX_op_and_vec: 2575 case INDEX_op_or_vec: 2576 case INDEX_op_xor_vec: 2577 case INDEX_op_andc_vec: 2578 case INDEX_op_orc_vec: 2579 case INDEX_op_neg_vec: 2580 case INDEX_op_abs_vec: 2581 case INDEX_op_not_vec: 2582 case INDEX_op_cmp_vec: 2583 case INDEX_op_shli_vec: 2584 case INDEX_op_shri_vec: 2585 case INDEX_op_sari_vec: 2586 case INDEX_op_ssadd_vec: 2587 case INDEX_op_sssub_vec: 2588 case INDEX_op_usadd_vec: 2589 case INDEX_op_ussub_vec: 2590 case INDEX_op_shlv_vec: 2591 case INDEX_op_bitsel_vec: 2592 return 1; 2593 case INDEX_op_rotli_vec: 2594 case INDEX_op_shrv_vec: 2595 case INDEX_op_sarv_vec: 2596 case INDEX_op_rotlv_vec: 2597 case INDEX_op_rotrv_vec: 2598 return -1; 2599 case INDEX_op_mul_vec: 2600 case INDEX_op_smax_vec: 2601 case INDEX_op_smin_vec: 2602 case INDEX_op_umax_vec: 2603 case INDEX_op_umin_vec: 2604 return vece < MO_64; 2605 2606 default: 2607 return 0; 2608 } 2609} 2610 2611void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 2612 TCGArg a0, ...) 2613{ 2614 va_list va; 2615 TCGv_vec v0, v1, v2, t1, t2, c1; 2616 TCGArg a2; 2617 2618 va_start(va, a0); 2619 v0 = temp_tcgv_vec(arg_temp(a0)); 2620 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 2621 a2 = va_arg(va, TCGArg); 2622 va_end(va); 2623 2624 switch (opc) { 2625 case INDEX_op_rotli_vec: 2626 t1 = tcg_temp_new_vec(type); 2627 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1)); 2628 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece, 2629 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2); 2630 tcg_temp_free_vec(t1); 2631 break; 2632 2633 case INDEX_op_shrv_vec: 2634 case INDEX_op_sarv_vec: 2635 /* Right shifts are negative left shifts for AArch64. */ 2636 v2 = temp_tcgv_vec(arg_temp(a2)); 2637 t1 = tcg_temp_new_vec(type); 2638 tcg_gen_neg_vec(vece, t1, v2); 2639 opc = (opc == INDEX_op_shrv_vec 2640 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec); 2641 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0), 2642 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2643 tcg_temp_free_vec(t1); 2644 break; 2645 2646 case INDEX_op_rotlv_vec: 2647 v2 = temp_tcgv_vec(arg_temp(a2)); 2648 t1 = tcg_temp_new_vec(type); 2649 c1 = tcg_constant_vec(type, vece, 8 << vece); 2650 tcg_gen_sub_vec(vece, t1, v2, c1); 2651 /* Right shifts are negative left shifts for AArch64. */ 2652 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2653 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2654 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0), 2655 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 2656 tcg_gen_or_vec(vece, v0, v0, t1); 2657 tcg_temp_free_vec(t1); 2658 break; 2659 2660 case INDEX_op_rotrv_vec: 2661 v2 = temp_tcgv_vec(arg_temp(a2)); 2662 t1 = tcg_temp_new_vec(type); 2663 t2 = tcg_temp_new_vec(type); 2664 c1 = tcg_constant_vec(type, vece, 8 << vece); 2665 tcg_gen_neg_vec(vece, t1, v2); 2666 tcg_gen_sub_vec(vece, t2, c1, v2); 2667 /* Right shifts are negative left shifts for AArch64. */ 2668 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1), 2669 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 2670 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2), 2671 tcgv_vec_arg(v1), tcgv_vec_arg(t2)); 2672 tcg_gen_or_vec(vece, v0, t1, t2); 2673 tcg_temp_free_vec(t1); 2674 tcg_temp_free_vec(t2); 2675 break; 2676 2677 default: 2678 g_assert_not_reached(); 2679 } 2680} 2681 2682static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 2683{ 2684 switch (op) { 2685 case INDEX_op_goto_ptr: 2686 return C_O0_I1(r); 2687 2688 case INDEX_op_ld8u_i32: 2689 case INDEX_op_ld8s_i32: 2690 case INDEX_op_ld16u_i32: 2691 case INDEX_op_ld16s_i32: 2692 case INDEX_op_ld_i32: 2693 case INDEX_op_ld8u_i64: 2694 case INDEX_op_ld8s_i64: 2695 case INDEX_op_ld16u_i64: 2696 case INDEX_op_ld16s_i64: 2697 case INDEX_op_ld32u_i64: 2698 case INDEX_op_ld32s_i64: 2699 case INDEX_op_ld_i64: 2700 case INDEX_op_neg_i32: 2701 case INDEX_op_neg_i64: 2702 case INDEX_op_not_i32: 2703 case INDEX_op_not_i64: 2704 case INDEX_op_bswap16_i32: 2705 case INDEX_op_bswap32_i32: 2706 case INDEX_op_bswap16_i64: 2707 case INDEX_op_bswap32_i64: 2708 case INDEX_op_bswap64_i64: 2709 case INDEX_op_ext8s_i32: 2710 case INDEX_op_ext16s_i32: 2711 case INDEX_op_ext8u_i32: 2712 case INDEX_op_ext16u_i32: 2713 case INDEX_op_ext8s_i64: 2714 case INDEX_op_ext16s_i64: 2715 case INDEX_op_ext32s_i64: 2716 case INDEX_op_ext8u_i64: 2717 case INDEX_op_ext16u_i64: 2718 case INDEX_op_ext32u_i64: 2719 case INDEX_op_ext_i32_i64: 2720 case INDEX_op_extu_i32_i64: 2721 case INDEX_op_extract_i32: 2722 case INDEX_op_extract_i64: 2723 case INDEX_op_sextract_i32: 2724 case INDEX_op_sextract_i64: 2725 return C_O1_I1(r, r); 2726 2727 case INDEX_op_st8_i32: 2728 case INDEX_op_st16_i32: 2729 case INDEX_op_st_i32: 2730 case INDEX_op_st8_i64: 2731 case INDEX_op_st16_i64: 2732 case INDEX_op_st32_i64: 2733 case INDEX_op_st_i64: 2734 return C_O0_I2(rZ, r); 2735 2736 case INDEX_op_add_i32: 2737 case INDEX_op_add_i64: 2738 case INDEX_op_sub_i32: 2739 case INDEX_op_sub_i64: 2740 case INDEX_op_setcond_i32: 2741 case INDEX_op_setcond_i64: 2742 return C_O1_I2(r, r, rA); 2743 2744 case INDEX_op_mul_i32: 2745 case INDEX_op_mul_i64: 2746 case INDEX_op_div_i32: 2747 case INDEX_op_div_i64: 2748 case INDEX_op_divu_i32: 2749 case INDEX_op_divu_i64: 2750 case INDEX_op_rem_i32: 2751 case INDEX_op_rem_i64: 2752 case INDEX_op_remu_i32: 2753 case INDEX_op_remu_i64: 2754 case INDEX_op_muluh_i64: 2755 case INDEX_op_mulsh_i64: 2756 return C_O1_I2(r, r, r); 2757 2758 case INDEX_op_and_i32: 2759 case INDEX_op_and_i64: 2760 case INDEX_op_or_i32: 2761 case INDEX_op_or_i64: 2762 case INDEX_op_xor_i32: 2763 case INDEX_op_xor_i64: 2764 case INDEX_op_andc_i32: 2765 case INDEX_op_andc_i64: 2766 case INDEX_op_orc_i32: 2767 case INDEX_op_orc_i64: 2768 case INDEX_op_eqv_i32: 2769 case INDEX_op_eqv_i64: 2770 return C_O1_I2(r, r, rL); 2771 2772 case INDEX_op_shl_i32: 2773 case INDEX_op_shr_i32: 2774 case INDEX_op_sar_i32: 2775 case INDEX_op_rotl_i32: 2776 case INDEX_op_rotr_i32: 2777 case INDEX_op_shl_i64: 2778 case INDEX_op_shr_i64: 2779 case INDEX_op_sar_i64: 2780 case INDEX_op_rotl_i64: 2781 case INDEX_op_rotr_i64: 2782 return C_O1_I2(r, r, ri); 2783 2784 case INDEX_op_clz_i32: 2785 case INDEX_op_ctz_i32: 2786 case INDEX_op_clz_i64: 2787 case INDEX_op_ctz_i64: 2788 return C_O1_I2(r, r, rAL); 2789 2790 case INDEX_op_brcond_i32: 2791 case INDEX_op_brcond_i64: 2792 return C_O0_I2(r, rA); 2793 2794 case INDEX_op_movcond_i32: 2795 case INDEX_op_movcond_i64: 2796 return C_O1_I4(r, r, rA, rZ, rZ); 2797 2798 case INDEX_op_qemu_ld_a32_i32: 2799 case INDEX_op_qemu_ld_a64_i32: 2800 case INDEX_op_qemu_ld_a32_i64: 2801 case INDEX_op_qemu_ld_a64_i64: 2802 return C_O1_I1(r, l); 2803 case INDEX_op_qemu_st_a32_i32: 2804 case INDEX_op_qemu_st_a64_i32: 2805 case INDEX_op_qemu_st_a32_i64: 2806 case INDEX_op_qemu_st_a64_i64: 2807 return C_O0_I2(lZ, l); 2808 2809 case INDEX_op_deposit_i32: 2810 case INDEX_op_deposit_i64: 2811 return C_O1_I2(r, 0, rZ); 2812 2813 case INDEX_op_extract2_i32: 2814 case INDEX_op_extract2_i64: 2815 return C_O1_I2(r, rZ, rZ); 2816 2817 case INDEX_op_add2_i32: 2818 case INDEX_op_add2_i64: 2819 case INDEX_op_sub2_i32: 2820 case INDEX_op_sub2_i64: 2821 return C_O2_I4(r, r, rZ, rZ, rA, rMZ); 2822 2823 case INDEX_op_add_vec: 2824 case INDEX_op_sub_vec: 2825 case INDEX_op_mul_vec: 2826 case INDEX_op_xor_vec: 2827 case INDEX_op_ssadd_vec: 2828 case INDEX_op_sssub_vec: 2829 case INDEX_op_usadd_vec: 2830 case INDEX_op_ussub_vec: 2831 case INDEX_op_smax_vec: 2832 case INDEX_op_smin_vec: 2833 case INDEX_op_umax_vec: 2834 case INDEX_op_umin_vec: 2835 case INDEX_op_shlv_vec: 2836 case INDEX_op_shrv_vec: 2837 case INDEX_op_sarv_vec: 2838 case INDEX_op_aa64_sshl_vec: 2839 return C_O1_I2(w, w, w); 2840 case INDEX_op_not_vec: 2841 case INDEX_op_neg_vec: 2842 case INDEX_op_abs_vec: 2843 case INDEX_op_shli_vec: 2844 case INDEX_op_shri_vec: 2845 case INDEX_op_sari_vec: 2846 return C_O1_I1(w, w); 2847 case INDEX_op_ld_vec: 2848 case INDEX_op_dupm_vec: 2849 return C_O1_I1(w, r); 2850 case INDEX_op_st_vec: 2851 return C_O0_I2(w, r); 2852 case INDEX_op_dup_vec: 2853 return C_O1_I1(w, wr); 2854 case INDEX_op_or_vec: 2855 case INDEX_op_andc_vec: 2856 return C_O1_I2(w, w, wO); 2857 case INDEX_op_and_vec: 2858 case INDEX_op_orc_vec: 2859 return C_O1_I2(w, w, wN); 2860 case INDEX_op_cmp_vec: 2861 return C_O1_I2(w, w, wZ); 2862 case INDEX_op_bitsel_vec: 2863 return C_O1_I3(w, w, w, w); 2864 case INDEX_op_aa64_sli_vec: 2865 return C_O1_I2(w, 0, w); 2866 2867 default: 2868 g_assert_not_reached(); 2869 } 2870} 2871 2872static void tcg_target_init(TCGContext *s) 2873{ 2874 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu; 2875 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu; 2876 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 2877 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 2878 2879 tcg_target_call_clobber_regs = -1ull; 2880 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19); 2881 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20); 2882 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21); 2883 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22); 2884 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23); 2885 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24); 2886 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25); 2887 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26); 2888 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27); 2889 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28); 2890 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29); 2891 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 2892 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 2893 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 2894 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 2895 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 2896 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 2897 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 2898 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 2899 2900 s->reserved_regs = 0; 2901 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); 2902 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); 2903 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); 2904 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ 2905 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP); 2906} 2907 2908/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ 2909#define PUSH_SIZE ((30 - 19 + 1) * 8) 2910 2911#define FRAME_SIZE \ 2912 ((PUSH_SIZE \ 2913 + TCG_STATIC_CALL_ARGS_SIZE \ 2914 + CPU_TEMP_BUF_NLONGS * sizeof(long) \ 2915 + TCG_TARGET_STACK_ALIGN - 1) \ 2916 & ~(TCG_TARGET_STACK_ALIGN - 1)) 2917 2918/* We're expecting a 2 byte uleb128 encoded value. */ 2919QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 2920 2921/* We're expecting to use a single ADDI insn. */ 2922QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); 2923 2924static void tcg_target_qemu_prologue(TCGContext *s) 2925{ 2926 TCGReg r; 2927 2928 /* Push (FP, LR) and allocate space for all saved registers. */ 2929 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, 2930 TCG_REG_SP, -PUSH_SIZE, 1, 1); 2931 2932 /* Set up frame pointer for canonical unwinding. */ 2933 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); 2934 2935 /* Store callee-preserved regs x19..x28. */ 2936 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 2937 int ofs = (r - TCG_REG_X19 + 2) * 8; 2938 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 2939 } 2940 2941 /* Make stack space for TCG locals. */ 2942 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 2943 FRAME_SIZE - PUSH_SIZE); 2944 2945 /* Inform TCG about how to find TCG locals with register, offset, size. */ 2946 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, 2947 CPU_TEMP_BUF_NLONGS * sizeof(long)); 2948 2949#if !defined(CONFIG_SOFTMMU) 2950 /* 2951 * Note that XZR cannot be encoded in the address base register slot, 2952 * as that actaully encodes SP. Depending on the guest, we may need 2953 * to zero-extend the guest address via the address index register slot, 2954 * therefore we need to load even a zero guest base into a register. 2955 */ 2956 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); 2957 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); 2958#endif 2959 2960 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 2961 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); 2962 2963 /* 2964 * Return path for goto_ptr. Set return value to 0, a-la exit_tb, 2965 * and fall through to the rest of the epilogue. 2966 */ 2967 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 2968 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0); 2969 2970 /* TB epilogue */ 2971 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); 2972 2973 /* Remove TCG locals stack space. */ 2974 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, 2975 FRAME_SIZE - PUSH_SIZE); 2976 2977 /* Restore registers x19..x28. */ 2978 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { 2979 int ofs = (r - TCG_REG_X19 + 2) * 8; 2980 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); 2981 } 2982 2983 /* Pop (FP, LR), restore SP to previous frame. */ 2984 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, 2985 TCG_REG_SP, PUSH_SIZE, 0, 1); 2986 tcg_out_insn(s, 3207, RET, TCG_REG_LR); 2987} 2988 2989static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 2990{ 2991 int i; 2992 for (i = 0; i < count; ++i) { 2993 p[i] = NOP; 2994 } 2995} 2996 2997typedef struct { 2998 DebugFrameHeader h; 2999 uint8_t fde_def_cfa[4]; 3000 uint8_t fde_reg_ofs[24]; 3001} DebugFrame; 3002 3003#define ELF_HOST_MACHINE EM_AARCH64 3004 3005static const DebugFrame debug_frame = { 3006 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 3007 .h.cie.id = -1, 3008 .h.cie.version = 1, 3009 .h.cie.code_align = 1, 3010 .h.cie.data_align = 0x78, /* sleb128 -8 */ 3011 .h.cie.return_column = TCG_REG_LR, 3012 3013 /* Total FDE size does not include the "len" member. */ 3014 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), 3015 3016 .fde_def_cfa = { 3017 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ 3018 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 3019 (FRAME_SIZE >> 7) 3020 }, 3021 .fde_reg_ofs = { 3022 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ 3023 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ 3024 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ 3025 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ 3026 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ 3027 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ 3028 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ 3029 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ 3030 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ 3031 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ 3032 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ 3033 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ 3034 } 3035}; 3036 3037void tcg_register_jit(const void *buf, size_t buf_size) 3038{ 3039 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 3040} 3041