1/* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25#include "elf.h" 26 27/* 28 * Standardize on the _CALL_FOO symbols used by GCC: 29 * Apple XCode does not define _CALL_DARWIN. 30 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV or _CALL_AIX. 31 */ 32#if TCG_TARGET_REG_BITS == 64 33# ifdef _CALL_AIX 34 /* ok */ 35# elif defined(_CALL_ELF) && _CALL_ELF == 1 36# define _CALL_AIX 37# elif defined(_CALL_ELF) && _CALL_ELF == 2 38 /* ok */ 39# else 40# error "Unknown ABI" 41# endif 42#else 43# if defined(_CALL_SYSV) || defined(_CALL_DARWIN) 44 /* ok */ 45# elif defined(__APPLE__) 46# define _CALL_DARWIN 47# elif defined(__ELF__) 48# define _CALL_SYSV 49# else 50# error "Unknown ABI" 51# endif 52#endif 53 54#if TCG_TARGET_REG_BITS == 64 55# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND 56# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL 57#else 58# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL 59# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF 60#endif 61#ifdef _CALL_SYSV 62# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN 63# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF 64#else 65# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL 66# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL 67#endif 68 69/* For some memory operations, we need a scratch that isn't R0. For the AIX 70 calling convention, we can re-use the TOC register since we'll be reloading 71 it at every call. Otherwise R12 will do nicely as neither a call-saved 72 register nor a parameter register. */ 73#ifdef _CALL_AIX 74# define TCG_REG_TMP1 TCG_REG_R2 75#else 76# define TCG_REG_TMP1 TCG_REG_R12 77#endif 78#define TCG_REG_TMP2 TCG_REG_R11 79 80#define TCG_VEC_TMP1 TCG_REG_V0 81#define TCG_VEC_TMP2 TCG_REG_V1 82 83#define TCG_REG_TB TCG_REG_R31 84#define USE_REG_TB (TCG_TARGET_REG_BITS == 64 && !have_isa_3_00) 85 86/* Shorthand for size of a pointer. Avoid promotion to unsigned. */ 87#define SZP ((int)sizeof(void *)) 88 89/* Shorthand for size of a register. */ 90#define SZR (TCG_TARGET_REG_BITS / 8) 91 92#define TCG_CT_CONST_S16 0x100 93#define TCG_CT_CONST_U16 0x200 94#define TCG_CT_CONST_S32 0x400 95#define TCG_CT_CONST_U32 0x800 96#define TCG_CT_CONST_ZERO 0x1000 97#define TCG_CT_CONST_MONE 0x2000 98#define TCG_CT_CONST_WSZ 0x4000 99#define TCG_CT_CONST_CMP 0x8000 100 101#define ALL_GENERAL_REGS 0xffffffffu 102#define ALL_VECTOR_REGS 0xffffffff00000000ull 103 104#ifndef R_PPC64_PCREL34 105#define R_PPC64_PCREL34 132 106#endif 107 108#define have_isel (cpuinfo & CPUINFO_ISEL) 109 110#define TCG_GUEST_BASE_REG TCG_REG_R30 111 112#ifdef CONFIG_DEBUG_TCG 113static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = { 114 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", 115 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", 116 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", 117 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", 118 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 119 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 120 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 121 "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", 122}; 123#endif 124 125static const int tcg_target_reg_alloc_order[] = { 126 TCG_REG_R14, /* call saved registers */ 127 TCG_REG_R15, 128 TCG_REG_R16, 129 TCG_REG_R17, 130 TCG_REG_R18, 131 TCG_REG_R19, 132 TCG_REG_R20, 133 TCG_REG_R21, 134 TCG_REG_R22, 135 TCG_REG_R23, 136 TCG_REG_R24, 137 TCG_REG_R25, 138 TCG_REG_R26, 139 TCG_REG_R27, 140 TCG_REG_R28, 141 TCG_REG_R29, 142 TCG_REG_R30, 143 TCG_REG_R31, 144 TCG_REG_R12, /* call clobbered, non-arguments */ 145 TCG_REG_R11, 146 TCG_REG_R2, 147 TCG_REG_R13, 148 TCG_REG_R10, /* call clobbered, arguments */ 149 TCG_REG_R9, 150 TCG_REG_R8, 151 TCG_REG_R7, 152 TCG_REG_R6, 153 TCG_REG_R5, 154 TCG_REG_R4, 155 TCG_REG_R3, 156 157 /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */ 158 TCG_REG_V2, /* call clobbered, vectors */ 159 TCG_REG_V3, 160 TCG_REG_V4, 161 TCG_REG_V5, 162 TCG_REG_V6, 163 TCG_REG_V7, 164 TCG_REG_V8, 165 TCG_REG_V9, 166 TCG_REG_V10, 167 TCG_REG_V11, 168 TCG_REG_V12, 169 TCG_REG_V13, 170 TCG_REG_V14, 171 TCG_REG_V15, 172 TCG_REG_V16, 173 TCG_REG_V17, 174 TCG_REG_V18, 175 TCG_REG_V19, 176}; 177 178static const int tcg_target_call_iarg_regs[] = { 179 TCG_REG_R3, 180 TCG_REG_R4, 181 TCG_REG_R5, 182 TCG_REG_R6, 183 TCG_REG_R7, 184 TCG_REG_R8, 185 TCG_REG_R9, 186 TCG_REG_R10 187}; 188 189static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) 190{ 191 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); 192 tcg_debug_assert(slot >= 0 && slot <= 1); 193 return TCG_REG_R3 + slot; 194} 195 196static const int tcg_target_callee_save_regs[] = { 197#ifdef _CALL_DARWIN 198 TCG_REG_R11, 199#endif 200 TCG_REG_R14, 201 TCG_REG_R15, 202 TCG_REG_R16, 203 TCG_REG_R17, 204 TCG_REG_R18, 205 TCG_REG_R19, 206 TCG_REG_R20, 207 TCG_REG_R21, 208 TCG_REG_R22, 209 TCG_REG_R23, 210 TCG_REG_R24, 211 TCG_REG_R25, 212 TCG_REG_R26, 213 TCG_REG_R27, /* currently used for the global env */ 214 TCG_REG_R28, 215 TCG_REG_R29, 216 TCG_REG_R30, 217 TCG_REG_R31 218}; 219 220/* For PPC, we use TB+4 instead of TB as the base. */ 221static inline ptrdiff_t ppc_tbrel_diff(TCGContext *s, const void *target) 222{ 223 return tcg_tbrel_diff(s, target) - 4; 224} 225 226static inline bool in_range_b(tcg_target_long target) 227{ 228 return target == sextract64(target, 0, 26); 229} 230 231static uint32_t reloc_pc24_val(const tcg_insn_unit *pc, 232 const tcg_insn_unit *target) 233{ 234 ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); 235 tcg_debug_assert(in_range_b(disp)); 236 return disp & 0x3fffffc; 237} 238 239static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 240{ 241 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 242 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); 243 244 if (in_range_b(disp)) { 245 *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc); 246 return true; 247 } 248 return false; 249} 250 251static uint16_t reloc_pc14_val(const tcg_insn_unit *pc, 252 const tcg_insn_unit *target) 253{ 254 ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); 255 tcg_debug_assert(disp == (int16_t) disp); 256 return disp & 0xfffc; 257} 258 259static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 260{ 261 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 262 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); 263 264 if (disp == (int16_t) disp) { 265 *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc); 266 return true; 267 } 268 return false; 269} 270 271static bool reloc_pc34(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 272{ 273 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 274 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); 275 276 if (disp == sextract64(disp, 0, 34)) { 277 src_rw[0] = (src_rw[0] & ~0x3ffff) | ((disp >> 16) & 0x3ffff); 278 src_rw[1] = (src_rw[1] & ~0xffff) | (disp & 0xffff); 279 return true; 280 } 281 return false; 282} 283 284static bool mask_operand(uint32_t c, int *mb, int *me); 285static bool mask64_operand(uint64_t c, int *mb, int *me); 286 287/* test if a constant matches the constraint */ 288static bool tcg_target_const_match(int64_t sval, int ct, 289 TCGType type, TCGCond cond, int vece) 290{ 291 uint64_t uval = sval; 292 int mb, me; 293 294 if (ct & TCG_CT_CONST) { 295 return 1; 296 } 297 298 if (type == TCG_TYPE_I32) { 299 uval = (uint32_t)sval; 300 sval = (int32_t)sval; 301 } 302 303 if (ct & TCG_CT_CONST_CMP) { 304 switch (cond) { 305 case TCG_COND_EQ: 306 case TCG_COND_NE: 307 ct |= TCG_CT_CONST_S16 | TCG_CT_CONST_U16; 308 break; 309 case TCG_COND_LT: 310 case TCG_COND_GE: 311 case TCG_COND_LE: 312 case TCG_COND_GT: 313 ct |= TCG_CT_CONST_S16; 314 break; 315 case TCG_COND_LTU: 316 case TCG_COND_GEU: 317 case TCG_COND_LEU: 318 case TCG_COND_GTU: 319 ct |= TCG_CT_CONST_U16; 320 break; 321 case TCG_COND_TSTEQ: 322 case TCG_COND_TSTNE: 323 if ((uval & ~0xffff) == 0 || (uval & ~0xffff0000ull) == 0) { 324 return 1; 325 } 326 if (uval == (uint32_t)uval && mask_operand(uval, &mb, &me)) { 327 return 1; 328 } 329 if (TCG_TARGET_REG_BITS == 64 && 330 mask64_operand(uval << clz64(uval), &mb, &me)) { 331 return 1; 332 } 333 return 0; 334 default: 335 g_assert_not_reached(); 336 } 337 } 338 339 if ((ct & TCG_CT_CONST_S16) && sval == (int16_t)sval) { 340 return 1; 341 } 342 if ((ct & TCG_CT_CONST_U16) && uval == (uint16_t)uval) { 343 return 1; 344 } 345 if ((ct & TCG_CT_CONST_S32) && sval == (int32_t)sval) { 346 return 1; 347 } 348 if ((ct & TCG_CT_CONST_U32) && uval == (uint32_t)uval) { 349 return 1; 350 } 351 if ((ct & TCG_CT_CONST_ZERO) && sval == 0) { 352 return 1; 353 } 354 if ((ct & TCG_CT_CONST_MONE) && sval == -1) { 355 return 1; 356 } 357 if ((ct & TCG_CT_CONST_WSZ) && sval == (type == TCG_TYPE_I32 ? 32 : 64)) { 358 return 1; 359 } 360 return 0; 361} 362 363#define OPCD(opc) ((opc)<<26) 364#define XO19(opc) (OPCD(19)|((opc)<<1)) 365#define MD30(opc) (OPCD(30)|((opc)<<2)) 366#define MDS30(opc) (OPCD(30)|((opc)<<1)) 367#define XO31(opc) (OPCD(31)|((opc)<<1)) 368#define XO58(opc) (OPCD(58)|(opc)) 369#define XO62(opc) (OPCD(62)|(opc)) 370#define VX4(opc) (OPCD(4)|(opc)) 371 372#define B OPCD( 18) 373#define BC OPCD( 16) 374 375#define LBZ OPCD( 34) 376#define LHZ OPCD( 40) 377#define LHA OPCD( 42) 378#define LWZ OPCD( 32) 379#define LWZUX XO31( 55) 380#define LD XO58( 0) 381#define LDX XO31( 21) 382#define LDU XO58( 1) 383#define LDUX XO31( 53) 384#define LWA XO58( 2) 385#define LWAX XO31(341) 386#define LQ OPCD( 56) 387 388#define STB OPCD( 38) 389#define STH OPCD( 44) 390#define STW OPCD( 36) 391#define STD XO62( 0) 392#define STDU XO62( 1) 393#define STDX XO31(149) 394#define STQ XO62( 2) 395 396#define PLWA OPCD( 41) 397#define PLD OPCD( 57) 398#define PLXSD OPCD( 42) 399#define PLXV OPCD(25 * 2 + 1) /* force tx=1 */ 400 401#define PSTD OPCD( 61) 402#define PSTXSD OPCD( 46) 403#define PSTXV OPCD(27 * 2 + 1) /* force sx=1 */ 404 405#define ADDIC OPCD( 12) 406#define ADDI OPCD( 14) 407#define ADDIS OPCD( 15) 408#define ORI OPCD( 24) 409#define ORIS OPCD( 25) 410#define XORI OPCD( 26) 411#define XORIS OPCD( 27) 412#define ANDI OPCD( 28) 413#define ANDIS OPCD( 29) 414#define MULLI OPCD( 7) 415#define CMPLI OPCD( 10) 416#define CMPI OPCD( 11) 417#define SUBFIC OPCD( 8) 418 419#define LWZU OPCD( 33) 420#define STWU OPCD( 37) 421 422#define RLWIMI OPCD( 20) 423#define RLWINM OPCD( 21) 424#define RLWNM OPCD( 23) 425 426#define RLDICL MD30( 0) 427#define RLDICR MD30( 1) 428#define RLDIMI MD30( 3) 429#define RLDCL MDS30( 8) 430 431#define BCLR XO19( 16) 432#define BCCTR XO19(528) 433#define CRAND XO19(257) 434#define CRANDC XO19(129) 435#define CRNAND XO19(225) 436#define CROR XO19(449) 437#define CRNOR XO19( 33) 438#define ADDPCIS XO19( 2) 439 440#define EXTSB XO31(954) 441#define EXTSH XO31(922) 442#define EXTSW XO31(986) 443#define ADD XO31(266) 444#define ADDE XO31(138) 445#define ADDME XO31(234) 446#define ADDZE XO31(202) 447#define ADDC XO31( 10) 448#define AND XO31( 28) 449#define SUBF XO31( 40) 450#define SUBFC XO31( 8) 451#define SUBFE XO31(136) 452#define SUBFME XO31(232) 453#define SUBFZE XO31(200) 454#define OR XO31(444) 455#define XOR XO31(316) 456#define MULLW XO31(235) 457#define MULHW XO31( 75) 458#define MULHWU XO31( 11) 459#define DIVW XO31(491) 460#define DIVWU XO31(459) 461#define MODSW XO31(779) 462#define MODUW XO31(267) 463#define CMP XO31( 0) 464#define CMPL XO31( 32) 465#define LHBRX XO31(790) 466#define LWBRX XO31(534) 467#define LDBRX XO31(532) 468#define STHBRX XO31(918) 469#define STWBRX XO31(662) 470#define STDBRX XO31(660) 471#define MFSPR XO31(339) 472#define MTSPR XO31(467) 473#define SRAWI XO31(824) 474#define NEG XO31(104) 475#define MFCR XO31( 19) 476#define MFOCRF (MFCR | (1u << 20)) 477#define NOR XO31(124) 478#define CNTLZW XO31( 26) 479#define CNTLZD XO31( 58) 480#define CNTTZW XO31(538) 481#define CNTTZD XO31(570) 482#define CNTPOPW XO31(378) 483#define CNTPOPD XO31(506) 484#define ANDC XO31( 60) 485#define ORC XO31(412) 486#define EQV XO31(284) 487#define NAND XO31(476) 488#define ISEL XO31( 15) 489 490#define MULLD XO31(233) 491#define MULHD XO31( 73) 492#define MULHDU XO31( 9) 493#define DIVD XO31(489) 494#define DIVDU XO31(457) 495#define MODSD XO31(777) 496#define MODUD XO31(265) 497 498#define LBZX XO31( 87) 499#define LHZX XO31(279) 500#define LHAX XO31(343) 501#define LWZX XO31( 23) 502#define STBX XO31(215) 503#define STHX XO31(407) 504#define STWX XO31(151) 505 506#define EIEIO XO31(854) 507#define HWSYNC XO31(598) 508#define LWSYNC (HWSYNC | (1u << 21)) 509 510#define SPR(a, b) ((((a)<<5)|(b))<<11) 511#define LR SPR(8, 0) 512#define CTR SPR(9, 0) 513 514#define SLW XO31( 24) 515#define SRW XO31(536) 516#define SRAW XO31(792) 517 518#define SLD XO31( 27) 519#define SRD XO31(539) 520#define SRAD XO31(794) 521#define SRADI XO31(413<<1) 522 523#define BRH XO31(219) 524#define BRW XO31(155) 525#define BRD XO31(187) 526 527#define TW XO31( 4) 528#define TRAP (TW | TO(31)) 529 530#define SETBC XO31(384) /* v3.10 */ 531#define SETBCR XO31(416) /* v3.10 */ 532#define SETNBC XO31(448) /* v3.10 */ 533#define SETNBCR XO31(480) /* v3.10 */ 534 535#define NOP ORI /* ori 0,0,0 */ 536 537#define LVX XO31(103) 538#define LVEBX XO31(7) 539#define LVEHX XO31(39) 540#define LVEWX XO31(71) 541#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */ 542#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */ 543#define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */ 544#define LXV (OPCD(61) | 8 | 1) /* v3.00, force tx=1 */ 545#define LXSD (OPCD(57) | 2) /* v3.00 */ 546#define LXVWSX (XO31(364) | 1) /* v3.00, force tx=1 */ 547 548#define STVX XO31(231) 549#define STVEWX XO31(199) 550#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */ 551#define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */ 552#define STXV (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */ 553#define STXSD (OPCD(61) | 2) /* v3.00 */ 554 555#define VADDSBS VX4(768) 556#define VADDUBS VX4(512) 557#define VADDUBM VX4(0) 558#define VADDSHS VX4(832) 559#define VADDUHS VX4(576) 560#define VADDUHM VX4(64) 561#define VADDSWS VX4(896) 562#define VADDUWS VX4(640) 563#define VADDUWM VX4(128) 564#define VADDUDM VX4(192) /* v2.07 */ 565 566#define VSUBSBS VX4(1792) 567#define VSUBUBS VX4(1536) 568#define VSUBUBM VX4(1024) 569#define VSUBSHS VX4(1856) 570#define VSUBUHS VX4(1600) 571#define VSUBUHM VX4(1088) 572#define VSUBSWS VX4(1920) 573#define VSUBUWS VX4(1664) 574#define VSUBUWM VX4(1152) 575#define VSUBUDM VX4(1216) /* v2.07 */ 576 577#define VNEGW (VX4(1538) | (6 << 16)) /* v3.00 */ 578#define VNEGD (VX4(1538) | (7 << 16)) /* v3.00 */ 579 580#define VMAXSB VX4(258) 581#define VMAXSH VX4(322) 582#define VMAXSW VX4(386) 583#define VMAXSD VX4(450) /* v2.07 */ 584#define VMAXUB VX4(2) 585#define VMAXUH VX4(66) 586#define VMAXUW VX4(130) 587#define VMAXUD VX4(194) /* v2.07 */ 588#define VMINSB VX4(770) 589#define VMINSH VX4(834) 590#define VMINSW VX4(898) 591#define VMINSD VX4(962) /* v2.07 */ 592#define VMINUB VX4(514) 593#define VMINUH VX4(578) 594#define VMINUW VX4(642) 595#define VMINUD VX4(706) /* v2.07 */ 596 597#define VCMPEQUB VX4(6) 598#define VCMPEQUH VX4(70) 599#define VCMPEQUW VX4(134) 600#define VCMPEQUD VX4(199) /* v2.07 */ 601#define VCMPGTSB VX4(774) 602#define VCMPGTSH VX4(838) 603#define VCMPGTSW VX4(902) 604#define VCMPGTSD VX4(967) /* v2.07 */ 605#define VCMPGTUB VX4(518) 606#define VCMPGTUH VX4(582) 607#define VCMPGTUW VX4(646) 608#define VCMPGTUD VX4(711) /* v2.07 */ 609#define VCMPNEB VX4(7) /* v3.00 */ 610#define VCMPNEH VX4(71) /* v3.00 */ 611#define VCMPNEW VX4(135) /* v3.00 */ 612 613#define VSLB VX4(260) 614#define VSLH VX4(324) 615#define VSLW VX4(388) 616#define VSLD VX4(1476) /* v2.07 */ 617#define VSRB VX4(516) 618#define VSRH VX4(580) 619#define VSRW VX4(644) 620#define VSRD VX4(1732) /* v2.07 */ 621#define VSRAB VX4(772) 622#define VSRAH VX4(836) 623#define VSRAW VX4(900) 624#define VSRAD VX4(964) /* v2.07 */ 625#define VRLB VX4(4) 626#define VRLH VX4(68) 627#define VRLW VX4(132) 628#define VRLD VX4(196) /* v2.07 */ 629 630#define VMULEUB VX4(520) 631#define VMULEUH VX4(584) 632#define VMULEUW VX4(648) /* v2.07 */ 633#define VMULOUB VX4(8) 634#define VMULOUH VX4(72) 635#define VMULOUW VX4(136) /* v2.07 */ 636#define VMULUWM VX4(137) /* v2.07 */ 637#define VMULLD VX4(457) /* v3.10 */ 638#define VMSUMUHM VX4(38) 639 640#define VMRGHB VX4(12) 641#define VMRGHH VX4(76) 642#define VMRGHW VX4(140) 643#define VMRGLB VX4(268) 644#define VMRGLH VX4(332) 645#define VMRGLW VX4(396) 646 647#define VPKUHUM VX4(14) 648#define VPKUWUM VX4(78) 649 650#define VAND VX4(1028) 651#define VANDC VX4(1092) 652#define VNOR VX4(1284) 653#define VOR VX4(1156) 654#define VXOR VX4(1220) 655#define VEQV VX4(1668) /* v2.07 */ 656#define VNAND VX4(1412) /* v2.07 */ 657#define VORC VX4(1348) /* v2.07 */ 658 659#define VSPLTB VX4(524) 660#define VSPLTH VX4(588) 661#define VSPLTW VX4(652) 662#define VSPLTISB VX4(780) 663#define VSPLTISH VX4(844) 664#define VSPLTISW VX4(908) 665 666#define VSLDOI VX4(44) 667 668#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */ 669#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */ 670#define XXSPLTIB (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */ 671 672#define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */ 673#define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */ 674#define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */ 675#define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */ 676#define MTVSRDD (XO31(435) | 1) /* v3.00, force tx=1 */ 677#define MTVSRWS (XO31(403) | 1) /* v3.00, force tx=1 */ 678 679#define RT(r) ((r)<<21) 680#define RS(r) ((r)<<21) 681#define RA(r) ((r)<<16) 682#define RB(r) ((r)<<11) 683#define TO(t) ((t)<<21) 684#define SH(s) ((s)<<11) 685#define MB(b) ((b)<<6) 686#define ME(e) ((e)<<1) 687#define BO(o) ((o)<<21) 688#define MB64(b) ((b)<<5) 689#define FXM(b) (1 << (19 - (b))) 690 691#define VRT(r) (((r) & 31) << 21) 692#define VRA(r) (((r) & 31) << 16) 693#define VRB(r) (((r) & 31) << 11) 694#define VRC(r) (((r) & 31) << 6) 695 696#define LK 1 697 698#define TAB(t, a, b) (RT(t) | RA(a) | RB(b)) 699#define SAB(s, a, b) (RS(s) | RA(a) | RB(b)) 700#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff)) 701#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff)) 702 703#define BF(n) ((n)<<23) 704#define BI(n, c) (((c)+((n)*4))<<16) 705#define BT(n, c) (((c)+((n)*4))<<21) 706#define BA(n, c) (((c)+((n)*4))<<16) 707#define BB(n, c) (((c)+((n)*4))<<11) 708#define BC_(n, c) (((c)+((n)*4))<<6) 709 710#define BO_COND_TRUE BO(12) 711#define BO_COND_FALSE BO( 4) 712#define BO_ALWAYS BO(20) 713 714enum { 715 CR_LT, 716 CR_GT, 717 CR_EQ, 718 CR_SO 719}; 720 721static const uint32_t tcg_to_bc[16] = { 722 [TCG_COND_EQ] = BC | BI(0, CR_EQ) | BO_COND_TRUE, 723 [TCG_COND_NE] = BC | BI(0, CR_EQ) | BO_COND_FALSE, 724 [TCG_COND_TSTEQ] = BC | BI(0, CR_EQ) | BO_COND_TRUE, 725 [TCG_COND_TSTNE] = BC | BI(0, CR_EQ) | BO_COND_FALSE, 726 [TCG_COND_LT] = BC | BI(0, CR_LT) | BO_COND_TRUE, 727 [TCG_COND_GE] = BC | BI(0, CR_LT) | BO_COND_FALSE, 728 [TCG_COND_LE] = BC | BI(0, CR_GT) | BO_COND_FALSE, 729 [TCG_COND_GT] = BC | BI(0, CR_GT) | BO_COND_TRUE, 730 [TCG_COND_LTU] = BC | BI(0, CR_LT) | BO_COND_TRUE, 731 [TCG_COND_GEU] = BC | BI(0, CR_LT) | BO_COND_FALSE, 732 [TCG_COND_LEU] = BC | BI(0, CR_GT) | BO_COND_FALSE, 733 [TCG_COND_GTU] = BC | BI(0, CR_GT) | BO_COND_TRUE, 734}; 735 736/* The low bit here is set if the RA and RB fields must be inverted. */ 737static const uint32_t tcg_to_isel[16] = { 738 [TCG_COND_EQ] = ISEL | BC_(0, CR_EQ), 739 [TCG_COND_NE] = ISEL | BC_(0, CR_EQ) | 1, 740 [TCG_COND_TSTEQ] = ISEL | BC_(0, CR_EQ), 741 [TCG_COND_TSTNE] = ISEL | BC_(0, CR_EQ) | 1, 742 [TCG_COND_LT] = ISEL | BC_(0, CR_LT), 743 [TCG_COND_GE] = ISEL | BC_(0, CR_LT) | 1, 744 [TCG_COND_LE] = ISEL | BC_(0, CR_GT) | 1, 745 [TCG_COND_GT] = ISEL | BC_(0, CR_GT), 746 [TCG_COND_LTU] = ISEL | BC_(0, CR_LT), 747 [TCG_COND_GEU] = ISEL | BC_(0, CR_LT) | 1, 748 [TCG_COND_LEU] = ISEL | BC_(0, CR_GT) | 1, 749 [TCG_COND_GTU] = ISEL | BC_(0, CR_GT), 750}; 751 752static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 753 intptr_t value, intptr_t addend) 754{ 755 const tcg_insn_unit *target; 756 int16_t lo; 757 int32_t hi; 758 759 value += addend; 760 target = (const tcg_insn_unit *)value; 761 762 switch (type) { 763 case R_PPC_REL14: 764 return reloc_pc14(code_ptr, target); 765 case R_PPC_REL24: 766 return reloc_pc24(code_ptr, target); 767 case R_PPC64_PCREL34: 768 return reloc_pc34(code_ptr, target); 769 case R_PPC_ADDR16: 770 /* 771 * We are (slightly) abusing this relocation type. In particular, 772 * assert that the low 2 bits are zero, and do not modify them. 773 * That way we can use this with LD et al that have opcode bits 774 * in the low 2 bits of the insn. 775 */ 776 if ((value & 3) || value != (int16_t)value) { 777 return false; 778 } 779 *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc); 780 break; 781 case R_PPC_ADDR32: 782 /* 783 * We are abusing this relocation type. Again, this points to 784 * a pair of insns, lis + load. This is an absolute address 785 * relocation for PPC32 so the lis cannot be removed. 786 */ 787 lo = value; 788 hi = value - lo; 789 if (hi + lo != value) { 790 return false; 791 } 792 code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16); 793 code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo); 794 break; 795 default: 796 g_assert_not_reached(); 797 } 798 return true; 799} 800 801/* Ensure that the prefixed instruction does not cross a 64-byte boundary. */ 802static bool tcg_out_need_prefix_align(TCGContext *s) 803{ 804 return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c; 805} 806 807static void tcg_out_prefix_align(TCGContext *s) 808{ 809 if (tcg_out_need_prefix_align(s)) { 810 tcg_out32(s, NOP); 811 } 812} 813 814static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target) 815{ 816 return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0); 817} 818 819/* Output Type 00 Prefix - 8-Byte Load/Store Form (8LS:D) */ 820static void tcg_out_8ls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt, 821 unsigned ra, tcg_target_long imm, bool r) 822{ 823 tcg_insn_unit p, i; 824 825 p = OPCD(1) | (r << 20) | ((imm >> 16) & 0x3ffff); 826 i = opc | TAI(rt, ra, imm); 827 828 tcg_out_prefix_align(s); 829 tcg_out32(s, p); 830 tcg_out32(s, i); 831} 832 833/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */ 834static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt, 835 unsigned ra, tcg_target_long imm, bool r) 836{ 837 tcg_insn_unit p, i; 838 839 p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff); 840 i = opc | TAI(rt, ra, imm); 841 842 tcg_out_prefix_align(s); 843 tcg_out32(s, p); 844 tcg_out32(s, i); 845} 846 847static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, 848 TCGReg base, tcg_target_long offset); 849 850static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 851{ 852 if (ret == arg) { 853 return true; 854 } 855 switch (type) { 856 case TCG_TYPE_I64: 857 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 858 /* fallthru */ 859 case TCG_TYPE_I32: 860 if (ret < TCG_REG_V0) { 861 if (arg < TCG_REG_V0) { 862 tcg_out32(s, OR | SAB(arg, ret, arg)); 863 break; 864 } else if (have_isa_2_07) { 865 tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD) 866 | VRT(arg) | RA(ret)); 867 break; 868 } else { 869 /* Altivec does not support vector->integer moves. */ 870 return false; 871 } 872 } else if (arg < TCG_REG_V0) { 873 if (have_isa_2_07) { 874 tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD) 875 | VRT(ret) | RA(arg)); 876 break; 877 } else { 878 /* Altivec does not support integer->vector moves. */ 879 return false; 880 } 881 } 882 /* fallthru */ 883 case TCG_TYPE_V64: 884 case TCG_TYPE_V128: 885 tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0); 886 tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg)); 887 break; 888 default: 889 g_assert_not_reached(); 890 } 891 return true; 892} 893 894static void tcg_out_rld_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs, 895 int sh, int mb, bool rc) 896{ 897 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 898 sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1); 899 mb = MB64((mb >> 5) | ((mb << 1) & 0x3f)); 900 tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb | rc); 901} 902 903static void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs, 904 int sh, int mb) 905{ 906 tcg_out_rld_rc(s, op, ra, rs, sh, mb, false); 907} 908 909static void tcg_out_rlw_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs, 910 int sh, int mb, int me, bool rc) 911{ 912 tcg_debug_assert((mb & 0x1f) == mb); 913 tcg_debug_assert((me & 0x1f) == me); 914 tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh & 0x1f) | MB(mb) | ME(me) | rc); 915} 916 917static void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs, 918 int sh, int mb, int me) 919{ 920 tcg_out_rlw_rc(s, op, ra, rs, sh, mb, me, false); 921} 922 923static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) 924{ 925 tcg_out32(s, EXTSB | RA(dst) | RS(src)); 926} 927 928static void tcg_out_ext8u(TCGContext *s, TCGReg dst, TCGReg src) 929{ 930 tcg_out32(s, ANDI | SAI(src, dst, 0xff)); 931} 932 933static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) 934{ 935 tcg_out32(s, EXTSH | RA(dst) | RS(src)); 936} 937 938static void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src) 939{ 940 tcg_out32(s, ANDI | SAI(src, dst, 0xffff)); 941} 942 943static void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src) 944{ 945 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 946 tcg_out32(s, EXTSW | RA(dst) | RS(src)); 947} 948 949static void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src) 950{ 951 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 952 tcg_out_rld(s, RLDICL, dst, src, 0, 32); 953} 954 955static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dst, TCGReg src) 956{ 957 tcg_out_ext32s(s, dst, src); 958} 959 960static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dst, TCGReg src) 961{ 962 tcg_out_ext32u(s, dst, src); 963} 964 965static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) 966{ 967 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 968 tcg_out_mov(s, TCG_TYPE_I32, rd, rn); 969} 970 971static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c) 972{ 973 tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c); 974} 975 976static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c) 977{ 978 tcg_out_rld(s, RLDICR, dst, src, c, 63 - c); 979} 980 981static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c) 982{ 983 /* Limit immediate shift count lest we create an illegal insn. */ 984 tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31)); 985} 986 987static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c) 988{ 989 tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31); 990} 991 992static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c) 993{ 994 tcg_out_rld(s, RLDICL, dst, src, 64 - c, c); 995} 996 997static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c) 998{ 999 tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2)); 1000} 1001 1002static void tcg_out_addpcis(TCGContext *s, TCGReg dst, intptr_t imm) 1003{ 1004 uint32_t d0, d1, d2; 1005 1006 tcg_debug_assert((imm & 0xffff) == 0); 1007 tcg_debug_assert(imm == (int32_t)imm); 1008 1009 d2 = extract32(imm, 16, 1); 1010 d1 = extract32(imm, 17, 5); 1011 d0 = extract32(imm, 22, 10); 1012 tcg_out32(s, ADDPCIS | RT(dst) | (d1 << 16) | (d0 << 6) | d2); 1013} 1014 1015/* Emit a move into ret of arg, if it can be done in one insn. */ 1016static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg) 1017{ 1018 if (arg == (int16_t)arg) { 1019 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 1020 return true; 1021 } 1022 if (arg == (int32_t)arg && (arg & 0xffff) == 0) { 1023 tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); 1024 return true; 1025 } 1026 return false; 1027} 1028 1029static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, 1030 tcg_target_long arg, bool in_prologue) 1031{ 1032 intptr_t tb_diff; 1033 tcg_target_long tmp; 1034 int shift; 1035 1036 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1037 1038 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { 1039 arg = (int32_t)arg; 1040 } 1041 1042 /* Load 16-bit immediates with one insn. */ 1043 if (tcg_out_movi_one(s, ret, arg)) { 1044 return; 1045 } 1046 1047 /* Load addresses within the TB with one insn. */ 1048 tb_diff = ppc_tbrel_diff(s, (void *)arg); 1049 if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) { 1050 tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff)); 1051 return; 1052 } 1053 1054 /* 1055 * Load values up to 34 bits, and pc-relative addresses, 1056 * with one prefixed insn. 1057 */ 1058 if (have_isa_3_10) { 1059 if (arg == sextract64(arg, 0, 34)) { 1060 /* pli ret,value = paddi ret,0,value,0 */ 1061 tcg_out_mls_d(s, ADDI, ret, 0, arg, 0); 1062 return; 1063 } 1064 1065 tmp = tcg_pcrel_diff_for_prefix(s, (void *)arg); 1066 if (tmp == sextract64(tmp, 0, 34)) { 1067 /* pla ret,value = paddi ret,0,value,1 */ 1068 tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1); 1069 return; 1070 } 1071 } 1072 1073 /* Load 32-bit immediates with two insns. Note that we've already 1074 eliminated bare ADDIS, so we know both insns are required. */ 1075 if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) { 1076 tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); 1077 tcg_out32(s, ORI | SAI(ret, ret, arg)); 1078 return; 1079 } 1080 if (arg == (uint32_t)arg && !(arg & 0x8000)) { 1081 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 1082 tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); 1083 return; 1084 } 1085 1086 /* Load masked 16-bit value. */ 1087 if (arg > 0 && (arg & 0x8000)) { 1088 tmp = arg | 0x7fff; 1089 if ((tmp & (tmp + 1)) == 0) { 1090 int mb = clz64(tmp + 1) + 1; 1091 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 1092 tcg_out_rld(s, RLDICL, ret, ret, 0, mb); 1093 return; 1094 } 1095 } 1096 1097 /* Load common masks with 2 insns. */ 1098 shift = ctz64(arg); 1099 tmp = arg >> shift; 1100 if (tmp == (int16_t)tmp) { 1101 tcg_out32(s, ADDI | TAI(ret, 0, tmp)); 1102 tcg_out_shli64(s, ret, ret, shift); 1103 return; 1104 } 1105 shift = clz64(arg); 1106 if (tcg_out_movi_one(s, ret, arg << shift)) { 1107 tcg_out_shri64(s, ret, ret, shift); 1108 return; 1109 } 1110 1111 /* Load addresses within 2GB with 2 insns. */ 1112 if (have_isa_3_00) { 1113 intptr_t hi = tcg_pcrel_diff(s, (void *)arg) - 4; 1114 int16_t lo = hi; 1115 1116 hi -= lo; 1117 if (hi == (int32_t)hi) { 1118 tcg_out_addpcis(s, TCG_REG_TMP2, hi); 1119 tcg_out32(s, ADDI | TAI(ret, TCG_REG_TMP2, lo)); 1120 return; 1121 } 1122 } 1123 1124 /* Load addresses within 2GB of TB with 2 (or rarely 3) insns. */ 1125 if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) { 1126 tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff); 1127 return; 1128 } 1129 1130 /* Use the constant pool, if possible. */ 1131 if (!in_prologue && USE_REG_TB) { 1132 new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr, 1133 ppc_tbrel_diff(s, NULL)); 1134 tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0)); 1135 return; 1136 } 1137 if (have_isa_3_10) { 1138 tcg_out_8ls_d(s, PLD, ret, 0, 0, 1); 1139 new_pool_label(s, arg, R_PPC64_PCREL34, s->code_ptr - 2, 0); 1140 return; 1141 } 1142 if (have_isa_3_00) { 1143 tcg_out_addpcis(s, TCG_REG_TMP2, 0); 1144 new_pool_label(s, arg, R_PPC_REL14, s->code_ptr, 0); 1145 tcg_out32(s, LD | TAI(ret, TCG_REG_TMP2, 0)); 1146 return; 1147 } 1148 1149 tmp = arg >> 31 >> 1; 1150 tcg_out_movi(s, TCG_TYPE_I32, ret, tmp); 1151 if (tmp) { 1152 tcg_out_shli64(s, ret, ret, 32); 1153 } 1154 if (arg & 0xffff0000) { 1155 tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); 1156 } 1157 if (arg & 0xffff) { 1158 tcg_out32(s, ORI | SAI(ret, ret, arg)); 1159 } 1160} 1161 1162static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 1163 TCGReg ret, int64_t val) 1164{ 1165 uint32_t load_insn; 1166 int rel, low; 1167 intptr_t add; 1168 1169 switch (vece) { 1170 case MO_8: 1171 low = (int8_t)val; 1172 if (low >= -16 && low < 16) { 1173 tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16)); 1174 return; 1175 } 1176 if (have_isa_3_00) { 1177 tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11)); 1178 return; 1179 } 1180 break; 1181 1182 case MO_16: 1183 low = (int16_t)val; 1184 if (low >= -16 && low < 16) { 1185 tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16)); 1186 return; 1187 } 1188 break; 1189 1190 case MO_32: 1191 low = (int32_t)val; 1192 if (low >= -16 && low < 16) { 1193 tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16)); 1194 return; 1195 } 1196 break; 1197 } 1198 1199 /* 1200 * Otherwise we must load the value from the constant pool. 1201 */ 1202 if (USE_REG_TB) { 1203 rel = R_PPC_ADDR16; 1204 add = ppc_tbrel_diff(s, NULL); 1205 } else if (have_isa_3_10) { 1206 if (type == TCG_TYPE_V64) { 1207 tcg_out_8ls_d(s, PLXSD, ret & 31, 0, 0, 1); 1208 new_pool_label(s, val, R_PPC64_PCREL34, s->code_ptr - 2, 0); 1209 } else { 1210 tcg_out_8ls_d(s, PLXV, ret & 31, 0, 0, 1); 1211 new_pool_l2(s, R_PPC64_PCREL34, s->code_ptr - 2, 0, val, val); 1212 } 1213 return; 1214 } else if (have_isa_3_00) { 1215 tcg_out_addpcis(s, TCG_REG_TMP1, 0); 1216 rel = R_PPC_REL14; 1217 add = 0; 1218 } else { 1219 rel = R_PPC_ADDR32; 1220 add = 0; 1221 } 1222 1223 if (have_vsx) { 1224 load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX; 1225 load_insn |= VRT(ret) | RB(TCG_REG_TMP1); 1226 if (TCG_TARGET_REG_BITS == 64) { 1227 new_pool_label(s, val, rel, s->code_ptr, add); 1228 } else { 1229 new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val); 1230 } 1231 } else { 1232 load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); 1233 if (TCG_TARGET_REG_BITS == 64) { 1234 new_pool_l2(s, rel, s->code_ptr, add, val, val); 1235 } else { 1236 new_pool_l4(s, rel, s->code_ptr, add, 1237 val >> 32, val, val >> 32, val); 1238 } 1239 } 1240 1241 if (USE_REG_TB) { 1242 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0)); 1243 load_insn |= RA(TCG_REG_TB); 1244 } else if (have_isa_3_00) { 1245 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0)); 1246 } else { 1247 tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0)); 1248 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0)); 1249 } 1250 tcg_out32(s, load_insn); 1251} 1252 1253static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, 1254 tcg_target_long arg) 1255{ 1256 switch (type) { 1257 case TCG_TYPE_I32: 1258 case TCG_TYPE_I64: 1259 tcg_debug_assert(ret < TCG_REG_V0); 1260 tcg_out_movi_int(s, type, ret, arg, false); 1261 break; 1262 1263 default: 1264 g_assert_not_reached(); 1265 } 1266} 1267 1268static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) 1269{ 1270 return false; 1271} 1272 1273static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, 1274 tcg_target_long imm) 1275{ 1276 /* This function is only used for passing structs by reference. */ 1277 g_assert_not_reached(); 1278} 1279 1280static bool mask_operand(uint32_t c, int *mb, int *me) 1281{ 1282 uint32_t lsb, test; 1283 1284 /* Accept a bit pattern like: 1285 0....01....1 1286 1....10....0 1287 0..01..10..0 1288 Keep track of the transitions. */ 1289 if (c == 0 || c == -1) { 1290 return false; 1291 } 1292 test = c; 1293 lsb = test & -test; 1294 test += lsb; 1295 if (test & (test - 1)) { 1296 return false; 1297 } 1298 1299 *me = clz32(lsb); 1300 *mb = test ? clz32(test & -test) + 1 : 0; 1301 return true; 1302} 1303 1304static bool mask64_operand(uint64_t c, int *mb, int *me) 1305{ 1306 uint64_t lsb; 1307 1308 if (c == 0) { 1309 return false; 1310 } 1311 1312 lsb = c & -c; 1313 /* Accept 1..10..0. */ 1314 if (c == -lsb) { 1315 *mb = 0; 1316 *me = clz64(lsb); 1317 return true; 1318 } 1319 /* Accept 0..01..1. */ 1320 if (lsb == 1 && (c & (c + 1)) == 0) { 1321 *mb = clz64(c + 1) + 1; 1322 *me = 63; 1323 return true; 1324 } 1325 return false; 1326} 1327 1328static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1329{ 1330 int mb, me; 1331 1332 if (mask_operand(c, &mb, &me)) { 1333 tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); 1334 } else if ((c & 0xffff) == c) { 1335 tcg_out32(s, ANDI | SAI(src, dst, c)); 1336 return; 1337 } else if ((c & 0xffff0000) == c) { 1338 tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); 1339 return; 1340 } else { 1341 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c); 1342 tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); 1343 } 1344} 1345 1346static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c) 1347{ 1348 int mb, me; 1349 1350 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1351 if (mask64_operand(c, &mb, &me)) { 1352 if (mb == 0) { 1353 tcg_out_rld(s, RLDICR, dst, src, 0, me); 1354 } else { 1355 tcg_out_rld(s, RLDICL, dst, src, 0, mb); 1356 } 1357 } else if ((c & 0xffff) == c) { 1358 tcg_out32(s, ANDI | SAI(src, dst, c)); 1359 return; 1360 } else if ((c & 0xffff0000) == c) { 1361 tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); 1362 return; 1363 } else { 1364 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c); 1365 tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); 1366 } 1367} 1368 1369static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c, 1370 int op_lo, int op_hi) 1371{ 1372 if (c >> 16) { 1373 tcg_out32(s, op_hi | SAI(src, dst, c >> 16)); 1374 src = dst; 1375 } 1376 if (c & 0xffff) { 1377 tcg_out32(s, op_lo | SAI(src, dst, c)); 1378 src = dst; 1379 } 1380} 1381 1382static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1383{ 1384 tcg_out_zori32(s, dst, src, c, ORI, ORIS); 1385} 1386 1387static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1388{ 1389 tcg_out_zori32(s, dst, src, c, XORI, XORIS); 1390} 1391 1392static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target) 1393{ 1394 ptrdiff_t disp = tcg_pcrel_diff(s, target); 1395 if (in_range_b(disp)) { 1396 tcg_out32(s, B | (disp & 0x3fffffc) | mask); 1397 } else { 1398 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target); 1399 tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR); 1400 tcg_out32(s, BCCTR | BO_ALWAYS | mask); 1401 } 1402} 1403 1404static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, 1405 TCGReg base, tcg_target_long offset) 1406{ 1407 tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; 1408 bool is_int_store = false; 1409 TCGReg rs = TCG_REG_TMP1; 1410 1411 switch (opi) { 1412 case LD: case LWA: 1413 align = 3; 1414 /* FALLTHRU */ 1415 default: 1416 if (rt > TCG_REG_R0 && rt < TCG_REG_V0) { 1417 rs = rt; 1418 break; 1419 } 1420 break; 1421 case LXSD: 1422 case STXSD: 1423 align = 3; 1424 break; 1425 case LXV: 1426 case STXV: 1427 align = 15; 1428 break; 1429 case STD: 1430 align = 3; 1431 /* FALLTHRU */ 1432 case STB: case STH: case STW: 1433 is_int_store = true; 1434 break; 1435 } 1436 1437 /* For unaligned or large offsets, use the prefixed form. */ 1438 if (have_isa_3_10 1439 && (offset != (int16_t)offset || (offset & align)) 1440 && offset == sextract64(offset, 0, 34)) { 1441 /* 1442 * Note that the MLS:D insns retain their un-prefixed opcode, 1443 * while the 8LS:D insns use a different opcode space. 1444 */ 1445 switch (opi) { 1446 case LBZ: 1447 case LHZ: 1448 case LHA: 1449 case LWZ: 1450 case STB: 1451 case STH: 1452 case STW: 1453 case ADDI: 1454 tcg_out_mls_d(s, opi, rt, base, offset, 0); 1455 return; 1456 case LWA: 1457 tcg_out_8ls_d(s, PLWA, rt, base, offset, 0); 1458 return; 1459 case LD: 1460 tcg_out_8ls_d(s, PLD, rt, base, offset, 0); 1461 return; 1462 case STD: 1463 tcg_out_8ls_d(s, PSTD, rt, base, offset, 0); 1464 return; 1465 case LXSD: 1466 tcg_out_8ls_d(s, PLXSD, rt & 31, base, offset, 0); 1467 return; 1468 case STXSD: 1469 tcg_out_8ls_d(s, PSTXSD, rt & 31, base, offset, 0); 1470 return; 1471 case LXV: 1472 tcg_out_8ls_d(s, PLXV, rt & 31, base, offset, 0); 1473 return; 1474 case STXV: 1475 tcg_out_8ls_d(s, PSTXV, rt & 31, base, offset, 0); 1476 return; 1477 } 1478 } 1479 1480 /* For unaligned, or very large offsets, use the indexed form. */ 1481 if (offset & align || offset != (int32_t)offset || opi == 0) { 1482 if (rs == base) { 1483 rs = TCG_REG_R0; 1484 } 1485 tcg_debug_assert(!is_int_store || rs != rt); 1486 tcg_out_movi(s, TCG_TYPE_PTR, rs, orig); 1487 tcg_out32(s, opx | TAB(rt & 31, base, rs)); 1488 return; 1489 } 1490 1491 l0 = (int16_t)offset; 1492 offset = (offset - l0) >> 16; 1493 l1 = (int16_t)offset; 1494 1495 if (l1 < 0 && orig >= 0) { 1496 extra = 0x4000; 1497 l1 = (int16_t)(offset - 0x4000); 1498 } 1499 if (l1) { 1500 tcg_out32(s, ADDIS | TAI(rs, base, l1)); 1501 base = rs; 1502 } 1503 if (extra) { 1504 tcg_out32(s, ADDIS | TAI(rs, base, extra)); 1505 base = rs; 1506 } 1507 if (opi != ADDI || base != rt || l0 != 0) { 1508 tcg_out32(s, opi | TAI(rt & 31, base, l0)); 1509 } 1510} 1511 1512static void tcg_out_vsldoi(TCGContext *s, TCGReg ret, 1513 TCGReg va, TCGReg vb, int shb) 1514{ 1515 tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6)); 1516} 1517 1518static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1519 TCGReg base, intptr_t offset) 1520{ 1521 int shift; 1522 1523 switch (type) { 1524 case TCG_TYPE_I32: 1525 if (ret < TCG_REG_V0) { 1526 tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset); 1527 break; 1528 } 1529 if (have_isa_2_07 && have_vsx) { 1530 tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset); 1531 break; 1532 } 1533 tcg_debug_assert((offset & 3) == 0); 1534 tcg_out_mem_long(s, 0, LVEWX, ret, base, offset); 1535 shift = (offset - 4) & 0xc; 1536 if (shift) { 1537 tcg_out_vsldoi(s, ret, ret, ret, shift); 1538 } 1539 break; 1540 case TCG_TYPE_I64: 1541 if (ret < TCG_REG_V0) { 1542 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1543 tcg_out_mem_long(s, LD, LDX, ret, base, offset); 1544 break; 1545 } 1546 /* fallthru */ 1547 case TCG_TYPE_V64: 1548 tcg_debug_assert(ret >= TCG_REG_V0); 1549 if (have_vsx) { 1550 tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX, 1551 ret, base, offset); 1552 break; 1553 } 1554 tcg_debug_assert((offset & 7) == 0); 1555 tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16); 1556 if (offset & 8) { 1557 tcg_out_vsldoi(s, ret, ret, ret, 8); 1558 } 1559 break; 1560 case TCG_TYPE_V128: 1561 tcg_debug_assert(ret >= TCG_REG_V0); 1562 tcg_debug_assert((offset & 15) == 0); 1563 tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0, 1564 LVX, ret, base, offset); 1565 break; 1566 default: 1567 g_assert_not_reached(); 1568 } 1569} 1570 1571static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, 1572 TCGReg base, intptr_t offset) 1573{ 1574 int shift; 1575 1576 switch (type) { 1577 case TCG_TYPE_I32: 1578 if (arg < TCG_REG_V0) { 1579 tcg_out_mem_long(s, STW, STWX, arg, base, offset); 1580 break; 1581 } 1582 if (have_isa_2_07 && have_vsx) { 1583 tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset); 1584 break; 1585 } 1586 assert((offset & 3) == 0); 1587 tcg_debug_assert((offset & 3) == 0); 1588 shift = (offset - 4) & 0xc; 1589 if (shift) { 1590 tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift); 1591 arg = TCG_VEC_TMP1; 1592 } 1593 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); 1594 break; 1595 case TCG_TYPE_I64: 1596 if (arg < TCG_REG_V0) { 1597 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1598 tcg_out_mem_long(s, STD, STDX, arg, base, offset); 1599 break; 1600 } 1601 /* fallthru */ 1602 case TCG_TYPE_V64: 1603 tcg_debug_assert(arg >= TCG_REG_V0); 1604 if (have_vsx) { 1605 tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0, 1606 STXSDX, arg, base, offset); 1607 break; 1608 } 1609 tcg_debug_assert((offset & 7) == 0); 1610 if (offset & 8) { 1611 tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8); 1612 arg = TCG_VEC_TMP1; 1613 } 1614 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); 1615 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4); 1616 break; 1617 case TCG_TYPE_V128: 1618 tcg_debug_assert(arg >= TCG_REG_V0); 1619 tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0, 1620 STVX, arg, base, offset); 1621 break; 1622 default: 1623 g_assert_not_reached(); 1624 } 1625} 1626 1627static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1628 TCGReg base, intptr_t ofs) 1629{ 1630 return false; 1631} 1632 1633/* 1634 * Set dest non-zero if and only if (arg1 & arg2) is non-zero. 1635 * If RC, then also set RC0. 1636 */ 1637static void tcg_out_test(TCGContext *s, TCGReg dest, TCGReg arg1, TCGArg arg2, 1638 bool const_arg2, TCGType type, bool rc) 1639{ 1640 int mb, me; 1641 1642 if (!const_arg2) { 1643 tcg_out32(s, AND | SAB(arg1, dest, arg2) | rc); 1644 return; 1645 } 1646 1647 if (type == TCG_TYPE_I32) { 1648 arg2 = (uint32_t)arg2; 1649 } 1650 1651 if ((arg2 & ~0xffff) == 0) { 1652 tcg_out32(s, ANDI | SAI(arg1, dest, arg2)); 1653 return; 1654 } 1655 if ((arg2 & ~0xffff0000ull) == 0) { 1656 tcg_out32(s, ANDIS | SAI(arg1, dest, arg2 >> 16)); 1657 return; 1658 } 1659 if (arg2 == (uint32_t)arg2 && mask_operand(arg2, &mb, &me)) { 1660 tcg_out_rlw_rc(s, RLWINM, dest, arg1, 0, mb, me, rc); 1661 return; 1662 } 1663 if (TCG_TARGET_REG_BITS == 64) { 1664 int sh = clz64(arg2); 1665 if (mask64_operand(arg2 << sh, &mb, &me)) { 1666 tcg_out_rld_rc(s, RLDICR, dest, arg1, sh, me, rc); 1667 return; 1668 } 1669 } 1670 /* Constraints should satisfy this. */ 1671 g_assert_not_reached(); 1672} 1673 1674static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, 1675 bool const_arg2, int cr, TCGType type) 1676{ 1677 uint32_t op; 1678 1679 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1680 1681 /* 1682 * Simplify the comparisons below wrt CMPI. 1683 * All of the tests are 16-bit, so a 32-bit sign extend always works. 1684 */ 1685 if (type == TCG_TYPE_I32) { 1686 arg2 = (int32_t)arg2; 1687 } 1688 1689 switch (cond) { 1690 case TCG_COND_EQ: 1691 case TCG_COND_NE: 1692 if (const_arg2) { 1693 if ((int16_t)arg2 == arg2) { 1694 op = CMPI; 1695 break; 1696 } 1697 tcg_debug_assert((uint16_t)arg2 == arg2); 1698 op = CMPLI; 1699 break; 1700 } 1701 op = CMPL; 1702 break; 1703 1704 case TCG_COND_TSTEQ: 1705 case TCG_COND_TSTNE: 1706 tcg_debug_assert(cr == 0); 1707 tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, true); 1708 return; 1709 1710 case TCG_COND_LT: 1711 case TCG_COND_GE: 1712 case TCG_COND_LE: 1713 case TCG_COND_GT: 1714 if (const_arg2) { 1715 tcg_debug_assert((int16_t)arg2 == arg2); 1716 op = CMPI; 1717 break; 1718 } 1719 op = CMP; 1720 break; 1721 1722 case TCG_COND_LTU: 1723 case TCG_COND_GEU: 1724 case TCG_COND_LEU: 1725 case TCG_COND_GTU: 1726 if (const_arg2) { 1727 tcg_debug_assert((uint16_t)arg2 == arg2); 1728 op = CMPLI; 1729 break; 1730 } 1731 op = CMPL; 1732 break; 1733 1734 default: 1735 g_assert_not_reached(); 1736 } 1737 op |= BF(cr) | ((type == TCG_TYPE_I64) << 21); 1738 op |= RA(arg1); 1739 op |= const_arg2 ? arg2 & 0xffff : RB(arg2); 1740 tcg_out32(s, op); 1741} 1742 1743static void tcg_out_setcond_eq0(TCGContext *s, TCGType type, 1744 TCGReg dst, TCGReg src, bool neg) 1745{ 1746 if (neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) { 1747 /* 1748 * X != 0 implies X + -1 generates a carry. 1749 * RT = (~X + X) + CA 1750 * = -1 + CA 1751 * = CA ? 0 : -1 1752 */ 1753 tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); 1754 tcg_out32(s, SUBFE | TAB(dst, src, src)); 1755 return; 1756 } 1757 1758 if (type == TCG_TYPE_I32) { 1759 tcg_out32(s, CNTLZW | RS(src) | RA(dst)); 1760 tcg_out_shri32(s, dst, dst, 5); 1761 } else { 1762 tcg_out32(s, CNTLZD | RS(src) | RA(dst)); 1763 tcg_out_shri64(s, dst, dst, 6); 1764 } 1765 if (neg) { 1766 tcg_out32(s, NEG | RT(dst) | RA(dst)); 1767 } 1768} 1769 1770static void tcg_out_setcond_ne0(TCGContext *s, TCGType type, 1771 TCGReg dst, TCGReg src, bool neg) 1772{ 1773 if (!neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) { 1774 /* 1775 * X != 0 implies X + -1 generates a carry. Extra addition 1776 * trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. 1777 */ 1778 tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); 1779 tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src)); 1780 return; 1781 } 1782 tcg_out_setcond_eq0(s, type, dst, src, false); 1783 if (neg) { 1784 tcg_out32(s, ADDI | TAI(dst, dst, -1)); 1785 } else { 1786 tcg_out_xori32(s, dst, dst, 1); 1787 } 1788} 1789 1790static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2, 1791 bool const_arg2) 1792{ 1793 if (const_arg2) { 1794 if ((uint32_t)arg2 == arg2) { 1795 tcg_out_xori32(s, TCG_REG_R0, arg1, arg2); 1796 } else { 1797 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2); 1798 tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0)); 1799 } 1800 } else { 1801 tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2)); 1802 } 1803 return TCG_REG_R0; 1804} 1805 1806static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, 1807 TCGReg arg0, TCGReg arg1, TCGArg arg2, 1808 bool const_arg2, bool neg) 1809{ 1810 int sh; 1811 bool inv; 1812 1813 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1814 1815 /* Ignore high bits of a potential constant arg2. */ 1816 if (type == TCG_TYPE_I32) { 1817 arg2 = (uint32_t)arg2; 1818 } 1819 1820 /* With SETBC/SETBCR, we can always implement with 2 insns. */ 1821 if (have_isa_3_10) { 1822 tcg_insn_unit bi, opc; 1823 1824 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type); 1825 1826 /* Re-use tcg_to_bc for BI and BO_COND_{TRUE,FALSE}. */ 1827 bi = tcg_to_bc[cond] & (0x1f << 16); 1828 if (tcg_to_bc[cond] & BO(8)) { 1829 opc = neg ? SETNBC : SETBC; 1830 } else { 1831 opc = neg ? SETNBCR : SETBCR; 1832 } 1833 tcg_out32(s, opc | RT(arg0) | bi); 1834 return; 1835 } 1836 1837 /* Handle common and trivial cases before handling anything else. */ 1838 if (arg2 == 0) { 1839 switch (cond) { 1840 case TCG_COND_EQ: 1841 tcg_out_setcond_eq0(s, type, arg0, arg1, neg); 1842 return; 1843 case TCG_COND_NE: 1844 tcg_out_setcond_ne0(s, type, arg0, arg1, neg); 1845 return; 1846 case TCG_COND_GE: 1847 tcg_out32(s, NOR | SAB(arg1, arg0, arg1)); 1848 arg1 = arg0; 1849 /* FALLTHRU */ 1850 case TCG_COND_LT: 1851 /* Extract the sign bit. */ 1852 if (type == TCG_TYPE_I32) { 1853 if (neg) { 1854 tcg_out_sari32(s, arg0, arg1, 31); 1855 } else { 1856 tcg_out_shri32(s, arg0, arg1, 31); 1857 } 1858 } else { 1859 if (neg) { 1860 tcg_out_sari64(s, arg0, arg1, 63); 1861 } else { 1862 tcg_out_shri64(s, arg0, arg1, 63); 1863 } 1864 } 1865 return; 1866 default: 1867 break; 1868 } 1869 } 1870 1871 /* If we have ISEL, we can implement everything with 3 or 4 insns. 1872 All other cases below are also at least 3 insns, so speed up the 1873 code generator by not considering them and always using ISEL. */ 1874 if (have_isel) { 1875 int isel, tab; 1876 1877 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type); 1878 1879 isel = tcg_to_isel[cond]; 1880 1881 tcg_out_movi(s, type, arg0, neg ? -1 : 1); 1882 if (isel & 1) { 1883 /* arg0 = (bc ? 0 : 1) */ 1884 tab = TAB(arg0, 0, arg0); 1885 isel &= ~1; 1886 } else { 1887 /* arg0 = (bc ? 1 : 0) */ 1888 tcg_out_movi(s, type, TCG_REG_R0, 0); 1889 tab = TAB(arg0, arg0, TCG_REG_R0); 1890 } 1891 tcg_out32(s, isel | tab); 1892 return; 1893 } 1894 1895 inv = false; 1896 switch (cond) { 1897 case TCG_COND_EQ: 1898 arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); 1899 tcg_out_setcond_eq0(s, type, arg0, arg1, neg); 1900 break; 1901 1902 case TCG_COND_NE: 1903 arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); 1904 tcg_out_setcond_ne0(s, type, arg0, arg1, neg); 1905 break; 1906 1907 case TCG_COND_TSTEQ: 1908 tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false); 1909 tcg_out_setcond_eq0(s, type, arg0, TCG_REG_R0, neg); 1910 break; 1911 1912 case TCG_COND_TSTNE: 1913 tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false); 1914 tcg_out_setcond_ne0(s, type, arg0, TCG_REG_R0, neg); 1915 break; 1916 1917 case TCG_COND_LE: 1918 case TCG_COND_LEU: 1919 inv = true; 1920 /* fall through */ 1921 case TCG_COND_GT: 1922 case TCG_COND_GTU: 1923 sh = 30; /* CR7 CR_GT */ 1924 goto crtest; 1925 1926 case TCG_COND_GE: 1927 case TCG_COND_GEU: 1928 inv = true; 1929 /* fall through */ 1930 case TCG_COND_LT: 1931 case TCG_COND_LTU: 1932 sh = 29; /* CR7 CR_LT */ 1933 goto crtest; 1934 1935 crtest: 1936 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); 1937 tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); 1938 tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31); 1939 if (neg && inv) { 1940 tcg_out32(s, ADDI | TAI(arg0, arg0, -1)); 1941 } else if (neg) { 1942 tcg_out32(s, NEG | RT(arg0) | RA(arg0)); 1943 } else if (inv) { 1944 tcg_out_xori32(s, arg0, arg0, 1); 1945 } 1946 break; 1947 1948 default: 1949 g_assert_not_reached(); 1950 } 1951} 1952 1953static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, 1954 TCGReg dest, TCGReg arg1, TCGReg arg2) 1955{ 1956 tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, false); 1957} 1958 1959static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond, 1960 TCGReg dest, TCGReg arg1, tcg_target_long arg2) 1961{ 1962 tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, false); 1963} 1964 1965static const TCGOutOpSetcond outop_setcond = { 1966 .base.static_constraint = C_O1_I2(r, r, rC), 1967 .out_rrr = tgen_setcond, 1968 .out_rri = tgen_setcondi, 1969}; 1970 1971static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, 1972 TCGReg dest, TCGReg arg1, TCGReg arg2) 1973{ 1974 tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, true); 1975} 1976 1977static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond, 1978 TCGReg dest, TCGReg arg1, tcg_target_long arg2) 1979{ 1980 tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, true); 1981} 1982 1983static const TCGOutOpSetcond outop_negsetcond = { 1984 .base.static_constraint = C_O1_I2(r, r, rC), 1985 .out_rrr = tgen_negsetcond, 1986 .out_rri = tgen_negsetcondi, 1987}; 1988 1989static void tcg_out_bc(TCGContext *s, TCGCond cond, int bd) 1990{ 1991 tcg_out32(s, tcg_to_bc[cond] | bd); 1992} 1993 1994static void tcg_out_bc_lab(TCGContext *s, TCGCond cond, TCGLabel *l) 1995{ 1996 int bd = 0; 1997 if (l->has_value) { 1998 bd = reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr); 1999 } else { 2000 tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0); 2001 } 2002 tcg_out_bc(s, cond, bd); 2003} 2004 2005static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond, 2006 TCGReg arg1, TCGReg arg2, TCGLabel *l) 2007{ 2008 tcg_out_cmp(s, cond, arg1, arg2, false, 0, type); 2009 tcg_out_bc_lab(s, cond, l); 2010} 2011 2012static void tgen_brcondi(TCGContext *s, TCGType type, TCGCond cond, 2013 TCGReg arg1, tcg_target_long arg2, TCGLabel *l) 2014{ 2015 tcg_out_cmp(s, cond, arg1, arg2, true, 0, type); 2016 tcg_out_bc_lab(s, cond, l); 2017} 2018 2019static const TCGOutOpBrcond outop_brcond = { 2020 .base.static_constraint = C_O0_I2(r, rC), 2021 .out_rr = tgen_brcond, 2022 .out_ri = tgen_brcondi, 2023}; 2024 2025static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, 2026 TCGReg dest, TCGReg c1, TCGArg c2, bool const_c2, 2027 TCGArg v1, bool const_v1, TCGArg v2, bool const_v2) 2028{ 2029 /* If for some reason both inputs are zero, don't produce bad code. */ 2030 if (v1 == 0 && v2 == 0) { 2031 tcg_out_movi(s, type, dest, 0); 2032 return; 2033 } 2034 2035 tcg_out_cmp(s, cond, c1, c2, const_c2, 0, type); 2036 2037 if (have_isel) { 2038 int isel = tcg_to_isel[cond]; 2039 2040 /* Swap the V operands if the operation indicates inversion. */ 2041 if (isel & 1) { 2042 int t = v1; 2043 v1 = v2; 2044 v2 = t; 2045 isel &= ~1; 2046 } 2047 /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand. */ 2048 if (v2 == 0) { 2049 tcg_out_movi(s, type, TCG_REG_R0, 0); 2050 } 2051 tcg_out32(s, isel | TAB(dest, v1, v2)); 2052 } else { 2053 if (dest == v2) { 2054 cond = tcg_invert_cond(cond); 2055 v2 = v1; 2056 } else if (dest != v1) { 2057 if (v1 == 0) { 2058 tcg_out_movi(s, type, dest, 0); 2059 } else { 2060 tcg_out_mov(s, type, dest, v1); 2061 } 2062 } 2063 /* Branch forward over one insn */ 2064 tcg_out_bc(s, cond, 8); 2065 if (v2 == 0) { 2066 tcg_out_movi(s, type, dest, 0); 2067 } else { 2068 tcg_out_mov(s, type, dest, v2); 2069 } 2070 } 2071} 2072 2073static const TCGOutOpMovcond outop_movcond = { 2074 .base.static_constraint = C_O1_I4(r, r, rC, rZ, rZ), 2075 .out = tgen_movcond, 2076}; 2077 2078static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc, 2079 TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2) 2080{ 2081 if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) { 2082 tcg_out32(s, opc | RA(a0) | RS(a1)); 2083 } else { 2084 tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 0, type); 2085 /* Note that the only other valid constant for a2 is 0. */ 2086 if (have_isel) { 2087 tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1)); 2088 tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0)); 2089 } else if (!const_a2 && a0 == a2) { 2090 tcg_out_bc(s, TCG_COND_EQ, 8); 2091 tcg_out32(s, opc | RA(a0) | RS(a1)); 2092 } else { 2093 tcg_out32(s, opc | RA(a0) | RS(a1)); 2094 tcg_out_bc(s, TCG_COND_NE, 8); 2095 if (const_a2) { 2096 tcg_out_movi(s, type, a0, 0); 2097 } else { 2098 tcg_out_mov(s, type, a0, a2); 2099 } 2100 } 2101 } 2102} 2103 2104static void tcg_out_cmp2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, 2105 TCGArg bl, bool blconst, TCGArg bh, bool bhconst) 2106{ 2107 static const struct { uint8_t bit1, bit2; } bits[] = { 2108 [TCG_COND_LT ] = { CR_LT, CR_LT }, 2109 [TCG_COND_LE ] = { CR_LT, CR_GT }, 2110 [TCG_COND_GT ] = { CR_GT, CR_GT }, 2111 [TCG_COND_GE ] = { CR_GT, CR_LT }, 2112 [TCG_COND_LTU] = { CR_LT, CR_LT }, 2113 [TCG_COND_LEU] = { CR_LT, CR_GT }, 2114 [TCG_COND_GTU] = { CR_GT, CR_GT }, 2115 [TCG_COND_GEU] = { CR_GT, CR_LT }, 2116 }; 2117 2118 TCGCond cond2; 2119 int op, bit1, bit2; 2120 2121 switch (cond) { 2122 case TCG_COND_EQ: 2123 op = CRAND; 2124 goto do_equality; 2125 case TCG_COND_NE: 2126 op = CRNAND; 2127 do_equality: 2128 tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32); 2129 tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32); 2130 tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); 2131 break; 2132 2133 case TCG_COND_TSTEQ: 2134 case TCG_COND_TSTNE: 2135 if (blconst) { 2136 tcg_out_andi32(s, TCG_REG_R0, al, bl); 2137 } else { 2138 tcg_out32(s, AND | SAB(al, TCG_REG_R0, bl)); 2139 } 2140 if (bhconst) { 2141 tcg_out_andi32(s, TCG_REG_TMP1, ah, bh); 2142 } else { 2143 tcg_out32(s, AND | SAB(ah, TCG_REG_TMP1, bh)); 2144 } 2145 tcg_out32(s, OR | SAB(TCG_REG_R0, TCG_REG_R0, TCG_REG_TMP1) | 1); 2146 break; 2147 2148 case TCG_COND_LT: 2149 case TCG_COND_LE: 2150 case TCG_COND_GT: 2151 case TCG_COND_GE: 2152 case TCG_COND_LTU: 2153 case TCG_COND_LEU: 2154 case TCG_COND_GTU: 2155 case TCG_COND_GEU: 2156 bit1 = bits[cond].bit1; 2157 bit2 = bits[cond].bit2; 2158 op = (bit1 != bit2 ? CRANDC : CRAND); 2159 cond2 = tcg_unsigned_cond(cond); 2160 2161 tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32); 2162 tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32); 2163 tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2)); 2164 tcg_out32(s, CROR | BT(0, CR_EQ) | BA(6, bit1) | BB(0, CR_EQ)); 2165 break; 2166 2167 default: 2168 g_assert_not_reached(); 2169 } 2170} 2171 2172static void tgen_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, 2173 TCGReg al, TCGReg ah, 2174 TCGArg bl, bool const_bl, 2175 TCGArg bh, bool const_bh) 2176{ 2177 tcg_out_cmp2(s, cond, al, ah, bl, const_bl, bh, const_bh); 2178 tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(0)); 2179 tcg_out_rlw(s, RLWINM, ret, TCG_REG_R0, CR_EQ + 0*4 + 1, 31, 31); 2180} 2181 2182#if TCG_TARGET_REG_BITS != 32 2183__attribute__((unused)) 2184#endif 2185static const TCGOutOpSetcond2 outop_setcond2 = { 2186 .base.static_constraint = C_O1_I4(r, r, r, rU, rC), 2187 .out = tgen_setcond2, 2188}; 2189 2190static void tgen_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, 2191 TCGArg bl, bool const_bl, 2192 TCGArg bh, bool const_bh, TCGLabel *l) 2193{ 2194 assert(TCG_TARGET_REG_BITS == 32); 2195 tcg_out_cmp2(s, cond, al, ah, bl, const_bl, bh, const_bh); 2196 tcg_out_bc_lab(s, TCG_COND_EQ, l); 2197} 2198 2199#if TCG_TARGET_REG_BITS != 32 2200__attribute__((unused)) 2201#endif 2202static const TCGOutOpBrcond2 outop_brcond2 = { 2203 .base.static_constraint = C_O0_I4(r, r, rU, rC), 2204 .out = tgen_brcond2, 2205}; 2206 2207static void tcg_out_mb(TCGContext *s, TCGArg a0) 2208{ 2209 uint32_t insn; 2210 2211 if (a0 & TCG_MO_ST_LD) { 2212 insn = HWSYNC; 2213 } else { 2214 insn = LWSYNC; 2215 } 2216 2217 tcg_out32(s, insn); 2218} 2219 2220static void tcg_out_call_int(TCGContext *s, int lk, 2221 const tcg_insn_unit *target) 2222{ 2223#ifdef _CALL_AIX 2224 /* Look through the descriptor. If the branch is in range, and we 2225 don't have to spend too much effort on building the toc. */ 2226 const void *tgt = ((const void * const *)target)[0]; 2227 uintptr_t toc = ((const uintptr_t *)target)[1]; 2228 intptr_t diff = tcg_pcrel_diff(s, tgt); 2229 2230 if (in_range_b(diff) && toc == (uint32_t)toc) { 2231 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc); 2232 tcg_out_b(s, lk, tgt); 2233 } else { 2234 /* Fold the low bits of the constant into the addresses below. */ 2235 intptr_t arg = (intptr_t)target; 2236 int ofs = (int16_t)arg; 2237 2238 if (ofs + 8 < 0x8000) { 2239 arg -= ofs; 2240 } else { 2241 ofs = 0; 2242 } 2243 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg); 2244 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs); 2245 tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR); 2246 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP); 2247 tcg_out32(s, BCCTR | BO_ALWAYS | lk); 2248 } 2249#elif defined(_CALL_ELF) && _CALL_ELF == 2 2250 intptr_t diff; 2251 2252 /* In the ELFv2 ABI, we have to set up r12 to contain the destination 2253 address, which the callee uses to compute its TOC address. */ 2254 /* FIXME: when the branch is in range, we could avoid r12 load if we 2255 knew that the destination uses the same TOC, and what its local 2256 entry point offset is. */ 2257 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target); 2258 2259 diff = tcg_pcrel_diff(s, target); 2260 if (in_range_b(diff)) { 2261 tcg_out_b(s, lk, target); 2262 } else { 2263 tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR); 2264 tcg_out32(s, BCCTR | BO_ALWAYS | lk); 2265 } 2266#else 2267 tcg_out_b(s, lk, target); 2268#endif 2269} 2270 2271static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 2272 const TCGHelperInfo *info) 2273{ 2274 tcg_out_call_int(s, LK, target); 2275} 2276 2277static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = { 2278 [MO_UB] = LBZX, 2279 [MO_UW] = LHZX, 2280 [MO_UL] = LWZX, 2281 [MO_UQ] = LDX, 2282 [MO_SW] = LHAX, 2283 [MO_SL] = LWAX, 2284 [MO_BSWAP | MO_UB] = LBZX, 2285 [MO_BSWAP | MO_UW] = LHBRX, 2286 [MO_BSWAP | MO_UL] = LWBRX, 2287 [MO_BSWAP | MO_UQ] = LDBRX, 2288}; 2289 2290static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = { 2291 [MO_UB] = STBX, 2292 [MO_UW] = STHX, 2293 [MO_UL] = STWX, 2294 [MO_UQ] = STDX, 2295 [MO_BSWAP | MO_UB] = STBX, 2296 [MO_BSWAP | MO_UW] = STHBRX, 2297 [MO_BSWAP | MO_UL] = STWBRX, 2298 [MO_BSWAP | MO_UQ] = STDBRX, 2299}; 2300 2301static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg) 2302{ 2303 if (arg < 0) { 2304 arg = TCG_REG_TMP1; 2305 } 2306 tcg_out32(s, MFSPR | RT(arg) | LR); 2307 return arg; 2308} 2309 2310/* 2311 * For the purposes of ppc32 sorting 4 input registers into 4 argument 2312 * registers, there is an outside chance we would require 3 temps. 2313 */ 2314static const TCGLdstHelperParam ldst_helper_param = { 2315 .ra_gen = ldst_ra_gen, 2316 .ntmp = 3, 2317 .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 } 2318}; 2319 2320static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 2321{ 2322 MemOp opc = get_memop(lb->oi); 2323 2324 if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 2325 return false; 2326 } 2327 2328 tcg_out_ld_helper_args(s, lb, &ldst_helper_param); 2329 tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]); 2330 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); 2331 2332 tcg_out_b(s, 0, lb->raddr); 2333 return true; 2334} 2335 2336static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 2337{ 2338 MemOp opc = get_memop(lb->oi); 2339 2340 if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 2341 return false; 2342 } 2343 2344 tcg_out_st_helper_args(s, lb, &ldst_helper_param); 2345 tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]); 2346 2347 tcg_out_b(s, 0, lb->raddr); 2348 return true; 2349} 2350 2351typedef struct { 2352 TCGReg base; 2353 TCGReg index; 2354 TCGAtomAlign aa; 2355} HostAddress; 2356 2357bool tcg_target_has_memory_bswap(MemOp memop) 2358{ 2359 TCGAtomAlign aa; 2360 2361 if ((memop & MO_SIZE) <= MO_64) { 2362 return true; 2363 } 2364 2365 /* 2366 * Reject 16-byte memop with 16-byte atomicity, 2367 * but do allow a pair of 64-bit operations. 2368 */ 2369 aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true); 2370 return aa.atom <= MO_64; 2371} 2372 2373/* We expect to use a 16-bit negative offset from ENV. */ 2374#define MIN_TLB_MASK_TABLE_OFS -32768 2375 2376/* 2377 * For system-mode, perform the TLB load and compare. 2378 * For user-mode, perform any required alignment tests. 2379 * In both cases, return a TCGLabelQemuLdst structure if the slow path 2380 * is required and fill in @h with the host address for the fast path. 2381 */ 2382static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, 2383 TCGReg addr, MemOpIdx oi, bool is_ld) 2384{ 2385 TCGType addr_type = s->addr_type; 2386 TCGLabelQemuLdst *ldst = NULL; 2387 MemOp opc = get_memop(oi); 2388 MemOp a_bits, s_bits; 2389 2390 /* 2391 * Book II, Section 1.4, Single-Copy Atomicity, specifies: 2392 * 2393 * Before 3.0, "An access that is not atomic is performed as a set of 2394 * smaller disjoint atomic accesses. In general, the number and alignment 2395 * of these accesses are implementation-dependent." Thus MO_ATOM_IFALIGN. 2396 * 2397 * As of 3.0, "the non-atomic access is performed as described in 2398 * the corresponding list", which matches MO_ATOM_SUBALIGN. 2399 */ 2400 s_bits = opc & MO_SIZE; 2401 h->aa = atom_and_align_for_opc(s, opc, 2402 have_isa_3_00 ? MO_ATOM_SUBALIGN 2403 : MO_ATOM_IFALIGN, 2404 s_bits == MO_128); 2405 a_bits = h->aa.align; 2406 2407 if (tcg_use_softmmu) { 2408 int mem_index = get_mmuidx(oi); 2409 int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read) 2410 : offsetof(CPUTLBEntry, addr_write); 2411 int fast_off = tlb_mask_table_ofs(s, mem_index); 2412 int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); 2413 int table_off = fast_off + offsetof(CPUTLBDescFast, table); 2414 2415 ldst = new_ldst_label(s); 2416 ldst->is_ld = is_ld; 2417 ldst->oi = oi; 2418 ldst->addr_reg = addr; 2419 2420 /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ 2421 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off); 2422 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off); 2423 2424 /* Extract the page index, shifted into place for tlb index. */ 2425 if (TCG_TARGET_REG_BITS == 32) { 2426 tcg_out_shri32(s, TCG_REG_R0, addr, 2427 s->page_bits - CPU_TLB_ENTRY_BITS); 2428 } else { 2429 tcg_out_shri64(s, TCG_REG_R0, addr, 2430 s->page_bits - CPU_TLB_ENTRY_BITS); 2431 } 2432 tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0)); 2433 2434 /* 2435 * Load the TLB comparator into TMP2. 2436 * For 64-bit host, always load the entire 64-bit slot for simplicity. 2437 * We will ignore the high bits with tcg_out_cmp(..., addr_type). 2438 */ 2439 if (cmp_off == 0) { 2440 tcg_out32(s, (TCG_TARGET_REG_BITS == 64 ? LDUX : LWZUX) 2441 | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2)); 2442 } else { 2443 tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2)); 2444 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off); 2445 } 2446 2447 /* 2448 * Load the TLB addend for use on the fast path. 2449 * Do this asap to minimize any load use delay. 2450 */ 2451 if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) { 2452 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, 2453 offsetof(CPUTLBEntry, addend)); 2454 } 2455 2456 /* Clear the non-page, non-alignment bits from the address in R0. */ 2457 if (TCG_TARGET_REG_BITS == 32) { 2458 /* 2459 * We don't support unaligned accesses on 32-bits. 2460 * Preserve the bottom bits and thus trigger a comparison 2461 * failure on unaligned accesses. 2462 */ 2463 if (a_bits < s_bits) { 2464 a_bits = s_bits; 2465 } 2466 tcg_out_rlw(s, RLWINM, TCG_REG_R0, addr, 0, 2467 (32 - a_bits) & 31, 31 - s->page_bits); 2468 } else { 2469 TCGReg t = addr; 2470 2471 /* 2472 * If the access is unaligned, we need to make sure we fail if we 2473 * cross a page boundary. The trick is to add the access size-1 2474 * to the address before masking the low bits. That will make the 2475 * address overflow to the next page if we cross a page boundary, 2476 * which will then force a mismatch of the TLB compare. 2477 */ 2478 if (a_bits < s_bits) { 2479 unsigned a_mask = (1 << a_bits) - 1; 2480 unsigned s_mask = (1 << s_bits) - 1; 2481 tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask)); 2482 t = TCG_REG_R0; 2483 } 2484 2485 /* Mask the address for the requested alignment. */ 2486 if (addr_type == TCG_TYPE_I32) { 2487 tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0, 2488 (32 - a_bits) & 31, 31 - s->page_bits); 2489 } else if (a_bits == 0) { 2490 tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits); 2491 } else { 2492 tcg_out_rld(s, RLDICL, TCG_REG_R0, t, 2493 64 - s->page_bits, s->page_bits - a_bits); 2494 tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0); 2495 } 2496 } 2497 2498 /* Full comparison into cr0. */ 2499 tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 0, 0, addr_type); 2500 2501 /* Load a pointer into the current opcode w/conditional branch-link. */ 2502 ldst->label_ptr[0] = s->code_ptr; 2503 tcg_out_bc(s, TCG_COND_NE, LK); 2504 2505 h->base = TCG_REG_TMP1; 2506 } else { 2507 if (a_bits) { 2508 ldst = new_ldst_label(s); 2509 ldst->is_ld = is_ld; 2510 ldst->oi = oi; 2511 ldst->addr_reg = addr; 2512 2513 /* We are expecting a_bits to max out at 7, much lower than ANDI. */ 2514 tcg_debug_assert(a_bits < 16); 2515 tcg_out32(s, ANDI | SAI(addr, TCG_REG_R0, (1 << a_bits) - 1)); 2516 2517 ldst->label_ptr[0] = s->code_ptr; 2518 tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK); 2519 } 2520 2521 h->base = guest_base ? TCG_GUEST_BASE_REG : 0; 2522 } 2523 2524 if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) { 2525 /* Zero-extend the guest address for use in the host address. */ 2526 tcg_out_ext32u(s, TCG_REG_TMP2, addr); 2527 h->index = TCG_REG_TMP2; 2528 } else { 2529 h->index = addr; 2530 } 2531 2532 return ldst; 2533} 2534 2535static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, 2536 TCGReg addr, MemOpIdx oi, TCGType data_type) 2537{ 2538 MemOp opc = get_memop(oi); 2539 TCGLabelQemuLdst *ldst; 2540 HostAddress h; 2541 2542 ldst = prepare_host_addr(s, &h, addr, oi, true); 2543 2544 if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { 2545 if (opc & MO_BSWAP) { 2546 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2547 tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index)); 2548 tcg_out32(s, LWBRX | TAB(datahi, h.base, TCG_REG_R0)); 2549 } else if (h.base != 0) { 2550 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2551 tcg_out32(s, LWZX | TAB(datahi, h.base, h.index)); 2552 tcg_out32(s, LWZX | TAB(datalo, h.base, TCG_REG_R0)); 2553 } else if (h.index == datahi) { 2554 tcg_out32(s, LWZ | TAI(datalo, h.index, 4)); 2555 tcg_out32(s, LWZ | TAI(datahi, h.index, 0)); 2556 } else { 2557 tcg_out32(s, LWZ | TAI(datahi, h.index, 0)); 2558 tcg_out32(s, LWZ | TAI(datalo, h.index, 4)); 2559 } 2560 } else { 2561 uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)]; 2562 if (!have_isa_2_06 && insn == LDBRX) { 2563 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2564 tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index)); 2565 tcg_out32(s, LWBRX | TAB(TCG_REG_R0, h.base, TCG_REG_R0)); 2566 tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0); 2567 } else if (insn) { 2568 tcg_out32(s, insn | TAB(datalo, h.base, h.index)); 2569 } else { 2570 insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)]; 2571 tcg_out32(s, insn | TAB(datalo, h.base, h.index)); 2572 tcg_out_movext(s, TCG_TYPE_REG, datalo, 2573 TCG_TYPE_REG, opc & MO_SSIZE, datalo); 2574 } 2575 } 2576 2577 if (ldst) { 2578 ldst->type = data_type; 2579 ldst->datalo_reg = datalo; 2580 ldst->datahi_reg = datahi; 2581 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2582 } 2583} 2584 2585static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, 2586 TCGReg addr, MemOpIdx oi, TCGType data_type) 2587{ 2588 MemOp opc = get_memop(oi); 2589 TCGLabelQemuLdst *ldst; 2590 HostAddress h; 2591 2592 ldst = prepare_host_addr(s, &h, addr, oi, false); 2593 2594 if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { 2595 if (opc & MO_BSWAP) { 2596 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2597 tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index)); 2598 tcg_out32(s, STWBRX | SAB(datahi, h.base, TCG_REG_R0)); 2599 } else if (h.base != 0) { 2600 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2601 tcg_out32(s, STWX | SAB(datahi, h.base, h.index)); 2602 tcg_out32(s, STWX | SAB(datalo, h.base, TCG_REG_R0)); 2603 } else { 2604 tcg_out32(s, STW | TAI(datahi, h.index, 0)); 2605 tcg_out32(s, STW | TAI(datalo, h.index, 4)); 2606 } 2607 } else { 2608 uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)]; 2609 if (!have_isa_2_06 && insn == STDBRX) { 2610 tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index)); 2611 tcg_out32(s, ADDI | TAI(TCG_REG_TMP2, h.index, 4)); 2612 tcg_out_shri64(s, TCG_REG_R0, datalo, 32); 2613 tcg_out32(s, STWBRX | SAB(TCG_REG_R0, h.base, TCG_REG_TMP2)); 2614 } else { 2615 tcg_out32(s, insn | SAB(datalo, h.base, h.index)); 2616 } 2617 } 2618 2619 if (ldst) { 2620 ldst->type = data_type; 2621 ldst->datalo_reg = datalo; 2622 ldst->datahi_reg = datahi; 2623 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2624 } 2625} 2626 2627static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, 2628 TCGReg addr_reg, MemOpIdx oi, bool is_ld) 2629{ 2630 TCGLabelQemuLdst *ldst; 2631 HostAddress h; 2632 bool need_bswap; 2633 uint32_t insn; 2634 TCGReg index; 2635 2636 ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld); 2637 2638 /* Compose the final address, as LQ/STQ have no indexing. */ 2639 index = h.index; 2640 if (h.base != 0) { 2641 index = TCG_REG_TMP1; 2642 tcg_out32(s, ADD | TAB(index, h.base, h.index)); 2643 } 2644 need_bswap = get_memop(oi) & MO_BSWAP; 2645 2646 if (h.aa.atom == MO_128) { 2647 tcg_debug_assert(!need_bswap); 2648 tcg_debug_assert(datalo & 1); 2649 tcg_debug_assert(datahi == datalo - 1); 2650 tcg_debug_assert(!is_ld || datahi != index); 2651 insn = is_ld ? LQ : STQ; 2652 tcg_out32(s, insn | TAI(datahi, index, 0)); 2653 } else { 2654 TCGReg d1, d2; 2655 2656 if (HOST_BIG_ENDIAN ^ need_bswap) { 2657 d1 = datahi, d2 = datalo; 2658 } else { 2659 d1 = datalo, d2 = datahi; 2660 } 2661 2662 if (need_bswap) { 2663 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8); 2664 insn = is_ld ? LDBRX : STDBRX; 2665 tcg_out32(s, insn | TAB(d1, 0, index)); 2666 tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0)); 2667 } else { 2668 insn = is_ld ? LD : STD; 2669 tcg_out32(s, insn | TAI(d1, index, 0)); 2670 tcg_out32(s, insn | TAI(d2, index, 8)); 2671 } 2672 } 2673 2674 if (ldst) { 2675 ldst->type = TCG_TYPE_I128; 2676 ldst->datalo_reg = datalo; 2677 ldst->datahi_reg = datahi; 2678 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2679 } 2680} 2681 2682static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 2683{ 2684 int i; 2685 for (i = 0; i < count; ++i) { 2686 p[i] = NOP; 2687 } 2688} 2689 2690/* Parameters for function call generation, used in tcg.c. */ 2691#define TCG_TARGET_STACK_ALIGN 16 2692 2693#ifdef _CALL_AIX 2694# define LINK_AREA_SIZE (6 * SZR) 2695# define LR_OFFSET (1 * SZR) 2696# define TCG_TARGET_CALL_STACK_OFFSET (LINK_AREA_SIZE + 8 * SZR) 2697#elif defined(_CALL_DARWIN) 2698# define LINK_AREA_SIZE (6 * SZR) 2699# define LR_OFFSET (2 * SZR) 2700#elif TCG_TARGET_REG_BITS == 64 2701# if defined(_CALL_ELF) && _CALL_ELF == 2 2702# define LINK_AREA_SIZE (4 * SZR) 2703# define LR_OFFSET (1 * SZR) 2704# endif 2705#else /* TCG_TARGET_REG_BITS == 32 */ 2706# if defined(_CALL_SYSV) 2707# define LINK_AREA_SIZE (2 * SZR) 2708# define LR_OFFSET (1 * SZR) 2709# endif 2710#endif 2711#ifndef LR_OFFSET 2712# error "Unhandled abi" 2713#endif 2714#ifndef TCG_TARGET_CALL_STACK_OFFSET 2715# define TCG_TARGET_CALL_STACK_OFFSET LINK_AREA_SIZE 2716#endif 2717 2718#define CPU_TEMP_BUF_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) 2719#define REG_SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR) 2720 2721#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET \ 2722 + TCG_STATIC_CALL_ARGS_SIZE \ 2723 + CPU_TEMP_BUF_SIZE \ 2724 + REG_SAVE_SIZE \ 2725 + TCG_TARGET_STACK_ALIGN - 1) \ 2726 & -TCG_TARGET_STACK_ALIGN) 2727 2728#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE) 2729 2730static void tcg_target_qemu_prologue(TCGContext *s) 2731{ 2732 int i; 2733 2734#ifdef _CALL_AIX 2735 const void **desc = (const void **)s->code_ptr; 2736 desc[0] = tcg_splitwx_to_rx(desc + 2); /* entry point */ 2737 desc[1] = 0; /* environment pointer */ 2738 s->code_ptr = (void *)(desc + 2); /* skip over descriptor */ 2739#endif 2740 2741 tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE, 2742 CPU_TEMP_BUF_SIZE); 2743 2744 /* Prologue */ 2745 tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR); 2746 tcg_out32(s, (SZR == 8 ? STDU : STWU) 2747 | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE)); 2748 2749 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { 2750 tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 2751 TCG_REG_R1, REG_SAVE_BOT + i * SZR); 2752 } 2753 tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); 2754 2755 if (!tcg_use_softmmu && guest_base) { 2756 tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true); 2757 tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); 2758 } 2759 2760 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 2761 tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR); 2762 tcg_out32(s, BCCTR | BO_ALWAYS); 2763 2764 /* Epilogue */ 2765 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 2766 2767 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); 2768 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { 2769 tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 2770 TCG_REG_R1, REG_SAVE_BOT + i * SZR); 2771 } 2772 tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR); 2773 tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE)); 2774 tcg_out32(s, BCLR | BO_ALWAYS); 2775} 2776 2777static void tcg_out_tb_start(TCGContext *s) 2778{ 2779 /* Load TCG_REG_TB. */ 2780 if (USE_REG_TB) { 2781 if (have_isa_3_00) { 2782 /* lnia REG_TB */ 2783 tcg_out_addpcis(s, TCG_REG_TB, 0); 2784 } else { 2785 /* bcl 20,31,$+4 (preferred form for getting nia) */ 2786 tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK); 2787 tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR); 2788 } 2789 } 2790} 2791 2792static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) 2793{ 2794 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg); 2795 tcg_out_b(s, 0, tcg_code_gen_epilogue); 2796} 2797 2798static void tcg_out_goto_tb(TCGContext *s, int which) 2799{ 2800 uintptr_t ptr = get_jmp_target_addr(s, which); 2801 int16_t lo; 2802 2803 /* Direct branch will be patched by tb_target_set_jmp_target. */ 2804 set_jmp_insn_offset(s, which); 2805 tcg_out32(s, NOP); 2806 2807 /* When branch is out of range, fall through to indirect. */ 2808 if (USE_REG_TB) { 2809 ptrdiff_t offset = ppc_tbrel_diff(s, (void *)ptr); 2810 tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset); 2811 } else if (have_isa_3_10) { 2812 ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr); 2813 tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1); 2814 } else if (have_isa_3_00) { 2815 ptrdiff_t offset = tcg_pcrel_diff(s, (void *)ptr) - 4; 2816 lo = offset; 2817 tcg_out_addpcis(s, TCG_REG_TMP1, offset - lo); 2818 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo); 2819 } else { 2820 lo = ptr; 2821 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - lo); 2822 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo); 2823 } 2824 2825 tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); 2826 tcg_out32(s, BCCTR | BO_ALWAYS); 2827 set_jmp_reset_offset(s, which); 2828} 2829 2830void tb_target_set_jmp_target(const TranslationBlock *tb, int n, 2831 uintptr_t jmp_rx, uintptr_t jmp_rw) 2832{ 2833 uintptr_t addr = tb->jmp_target_addr[n]; 2834 intptr_t diff = addr - jmp_rx; 2835 tcg_insn_unit insn; 2836 2837 if (in_range_b(diff)) { 2838 insn = B | (diff & 0x3fffffc); 2839 } else { 2840 insn = NOP; 2841 } 2842 2843 qatomic_set((uint32_t *)jmp_rw, insn); 2844 flush_idcache_range(jmp_rx, jmp_rw, 4); 2845} 2846 2847 2848static void tgen_add(TCGContext *s, TCGType type, 2849 TCGReg a0, TCGReg a1, TCGReg a2) 2850{ 2851 tcg_out32(s, ADD | TAB(a0, a1, a2)); 2852} 2853 2854static void tgen_addi(TCGContext *s, TCGType type, 2855 TCGReg a0, TCGReg a1, tcg_target_long a2) 2856{ 2857 tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2); 2858} 2859 2860static const TCGOutOpBinary outop_add = { 2861 .base.static_constraint = C_O1_I2(r, r, rT), 2862 .out_rrr = tgen_add, 2863 .out_rri = tgen_addi, 2864}; 2865 2866static void tgen_and(TCGContext *s, TCGType type, 2867 TCGReg a0, TCGReg a1, TCGReg a2) 2868{ 2869 tcg_out32(s, AND | SAB(a1, a0, a2)); 2870} 2871 2872static void tgen_andi(TCGContext *s, TCGType type, 2873 TCGReg a0, TCGReg a1, tcg_target_long a2) 2874{ 2875 if (type == TCG_TYPE_I32) { 2876 tcg_out_andi32(s, a0, a1, a2); 2877 } else { 2878 tcg_out_andi64(s, a0, a1, a2); 2879 } 2880} 2881 2882static const TCGOutOpBinary outop_and = { 2883 .base.static_constraint = C_O1_I2(r, r, ri), 2884 .out_rrr = tgen_and, 2885 .out_rri = tgen_andi, 2886}; 2887 2888static void tgen_andc(TCGContext *s, TCGType type, 2889 TCGReg a0, TCGReg a1, TCGReg a2) 2890{ 2891 tcg_out32(s, ANDC | SAB(a1, a0, a2)); 2892} 2893 2894static const TCGOutOpBinary outop_andc = { 2895 .base.static_constraint = C_O1_I2(r, r, r), 2896 .out_rrr = tgen_andc, 2897}; 2898 2899static void tgen_clz(TCGContext *s, TCGType type, 2900 TCGReg a0, TCGReg a1, TCGReg a2) 2901{ 2902 uint32_t insn = type == TCG_TYPE_I32 ? CNTLZW : CNTLZD; 2903 tcg_out_cntxz(s, type, insn, a0, a1, a2, false); 2904} 2905 2906static void tgen_clzi(TCGContext *s, TCGType type, 2907 TCGReg a0, TCGReg a1, tcg_target_long a2) 2908{ 2909 uint32_t insn = type == TCG_TYPE_I32 ? CNTLZW : CNTLZD; 2910 tcg_out_cntxz(s, type, insn, a0, a1, a2, true); 2911} 2912 2913static const TCGOutOpBinary outop_clz = { 2914 .base.static_constraint = C_O1_I2(r, r, rZW), 2915 .out_rrr = tgen_clz, 2916 .out_rri = tgen_clzi, 2917}; 2918 2919static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) 2920{ 2921 uint32_t insn = type == TCG_TYPE_I32 ? CNTPOPW : CNTPOPD; 2922 tcg_out32(s, insn | SAB(a1, a0, 0)); 2923} 2924 2925static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags) 2926{ 2927 return have_isa_2_06 ? C_O1_I1(r, r) : C_NotImplemented; 2928} 2929 2930static const TCGOutOpUnary outop_ctpop = { 2931 .base.static_constraint = C_Dynamic, 2932 .base.dynamic_constraint = cset_ctpop, 2933 .out_rr = tgen_ctpop, 2934}; 2935 2936static void tgen_ctz(TCGContext *s, TCGType type, 2937 TCGReg a0, TCGReg a1, TCGReg a2) 2938{ 2939 uint32_t insn = type == TCG_TYPE_I32 ? CNTTZW : CNTTZD; 2940 tcg_out_cntxz(s, type, insn, a0, a1, a2, false); 2941} 2942 2943static void tgen_ctzi(TCGContext *s, TCGType type, 2944 TCGReg a0, TCGReg a1, tcg_target_long a2) 2945{ 2946 uint32_t insn = type == TCG_TYPE_I32 ? CNTTZW : CNTTZD; 2947 tcg_out_cntxz(s, type, insn, a0, a1, a2, true); 2948} 2949 2950static TCGConstraintSetIndex cset_ctz(TCGType type, unsigned flags) 2951{ 2952 return have_isa_3_00 ? C_O1_I2(r, r, rZW) : C_NotImplemented; 2953} 2954 2955static const TCGOutOpBinary outop_ctz = { 2956 .base.static_constraint = C_Dynamic, 2957 .base.dynamic_constraint = cset_ctz, 2958 .out_rrr = tgen_ctz, 2959 .out_rri = tgen_ctzi, 2960}; 2961 2962static void tgen_eqv(TCGContext *s, TCGType type, 2963 TCGReg a0, TCGReg a1, TCGReg a2) 2964{ 2965 tcg_out32(s, EQV | SAB(a1, a0, a2)); 2966} 2967 2968static void tgen_divs(TCGContext *s, TCGType type, 2969 TCGReg a0, TCGReg a1, TCGReg a2) 2970{ 2971 uint32_t insn = type == TCG_TYPE_I32 ? DIVW : DIVD; 2972 tcg_out32(s, insn | TAB(a0, a1, a2)); 2973} 2974 2975static const TCGOutOpBinary outop_divs = { 2976 .base.static_constraint = C_O1_I2(r, r, r), 2977 .out_rrr = tgen_divs, 2978}; 2979 2980static const TCGOutOpDivRem outop_divs2 = { 2981 .base.static_constraint = C_NotImplemented, 2982}; 2983 2984static void tgen_divu(TCGContext *s, TCGType type, 2985 TCGReg a0, TCGReg a1, TCGReg a2) 2986{ 2987 uint32_t insn = type == TCG_TYPE_I32 ? DIVWU : DIVDU; 2988 tcg_out32(s, insn | TAB(a0, a1, a2)); 2989} 2990 2991static const TCGOutOpBinary outop_divu = { 2992 .base.static_constraint = C_O1_I2(r, r, r), 2993 .out_rrr = tgen_divu, 2994}; 2995 2996static const TCGOutOpDivRem outop_divu2 = { 2997 .base.static_constraint = C_NotImplemented, 2998}; 2999 3000static const TCGOutOpBinary outop_eqv = { 3001 .base.static_constraint = C_O1_I2(r, r, r), 3002 .out_rrr = tgen_eqv, 3003}; 3004 3005static void tgen_mul(TCGContext *s, TCGType type, 3006 TCGReg a0, TCGReg a1, TCGReg a2) 3007{ 3008 uint32_t insn = type == TCG_TYPE_I32 ? MULLW : MULLD; 3009 tcg_out32(s, insn | TAB(a0, a1, a2)); 3010} 3011 3012static void tgen_muli(TCGContext *s, TCGType type, 3013 TCGReg a0, TCGReg a1, tcg_target_long a2) 3014{ 3015 tcg_out32(s, MULLI | TAI(a0, a1, a2)); 3016} 3017 3018static const TCGOutOpBinary outop_mul = { 3019 .base.static_constraint = C_O1_I2(r, r, rI), 3020 .out_rrr = tgen_mul, 3021 .out_rri = tgen_muli, 3022}; 3023 3024static const TCGOutOpMul2 outop_muls2 = { 3025 .base.static_constraint = C_NotImplemented, 3026}; 3027 3028static void tgen_mulsh(TCGContext *s, TCGType type, 3029 TCGReg a0, TCGReg a1, TCGReg a2) 3030{ 3031 uint32_t insn = type == TCG_TYPE_I32 ? MULHW : MULHD; 3032 tcg_out32(s, insn | TAB(a0, a1, a2)); 3033} 3034 3035static const TCGOutOpBinary outop_mulsh = { 3036 .base.static_constraint = C_O1_I2(r, r, r), 3037 .out_rrr = tgen_mulsh, 3038}; 3039 3040static const TCGOutOpMul2 outop_mulu2 = { 3041 .base.static_constraint = C_NotImplemented, 3042}; 3043 3044static void tgen_muluh(TCGContext *s, TCGType type, 3045 TCGReg a0, TCGReg a1, TCGReg a2) 3046{ 3047 uint32_t insn = type == TCG_TYPE_I32 ? MULHWU : MULHDU; 3048 tcg_out32(s, insn | TAB(a0, a1, a2)); 3049} 3050 3051static const TCGOutOpBinary outop_muluh = { 3052 .base.static_constraint = C_O1_I2(r, r, r), 3053 .out_rrr = tgen_muluh, 3054}; 3055 3056static void tgen_nand(TCGContext *s, TCGType type, 3057 TCGReg a0, TCGReg a1, TCGReg a2) 3058{ 3059 tcg_out32(s, NAND | SAB(a1, a0, a2)); 3060} 3061 3062static const TCGOutOpBinary outop_nand = { 3063 .base.static_constraint = C_O1_I2(r, r, r), 3064 .out_rrr = tgen_nand, 3065}; 3066 3067static void tgen_nor(TCGContext *s, TCGType type, 3068 TCGReg a0, TCGReg a1, TCGReg a2) 3069{ 3070 tcg_out32(s, NOR | SAB(a1, a0, a2)); 3071} 3072 3073static const TCGOutOpBinary outop_nor = { 3074 .base.static_constraint = C_O1_I2(r, r, r), 3075 .out_rrr = tgen_nor, 3076}; 3077 3078static void tgen_or(TCGContext *s, TCGType type, 3079 TCGReg a0, TCGReg a1, TCGReg a2) 3080{ 3081 tcg_out32(s, OR | SAB(a1, a0, a2)); 3082} 3083 3084static void tgen_ori(TCGContext *s, TCGType type, 3085 TCGReg a0, TCGReg a1, tcg_target_long a2) 3086{ 3087 tcg_out_ori32(s, a0, a1, a2); 3088} 3089 3090static const TCGOutOpBinary outop_or = { 3091 .base.static_constraint = C_O1_I2(r, r, rU), 3092 .out_rrr = tgen_or, 3093 .out_rri = tgen_ori, 3094}; 3095 3096static void tgen_orc(TCGContext *s, TCGType type, 3097 TCGReg a0, TCGReg a1, TCGReg a2) 3098{ 3099 tcg_out32(s, ORC | SAB(a1, a0, a2)); 3100} 3101 3102static const TCGOutOpBinary outop_orc = { 3103 .base.static_constraint = C_O1_I2(r, r, r), 3104 .out_rrr = tgen_orc, 3105}; 3106 3107static TCGConstraintSetIndex cset_mod(TCGType type, unsigned flags) 3108{ 3109 return have_isa_3_00 ? C_O1_I2(r, r, r) : C_NotImplemented; 3110} 3111 3112static void tgen_rems(TCGContext *s, TCGType type, 3113 TCGReg a0, TCGReg a1, TCGReg a2) 3114{ 3115 uint32_t insn = type == TCG_TYPE_I32 ? MODSW : MODSD; 3116 tcg_out32(s, insn | TAB(a0, a1, a2)); 3117} 3118 3119static const TCGOutOpBinary outop_rems = { 3120 .base.static_constraint = C_Dynamic, 3121 .base.dynamic_constraint = cset_mod, 3122 .out_rrr = tgen_rems, 3123}; 3124 3125static void tgen_remu(TCGContext *s, TCGType type, 3126 TCGReg a0, TCGReg a1, TCGReg a2) 3127{ 3128 uint32_t insn = type == TCG_TYPE_I32 ? MODUW : MODUD; 3129 tcg_out32(s, insn | TAB(a0, a1, a2)); 3130} 3131 3132static const TCGOutOpBinary outop_remu = { 3133 .base.static_constraint = C_Dynamic, 3134 .base.dynamic_constraint = cset_mod, 3135 .out_rrr = tgen_remu, 3136}; 3137 3138static void tgen_rotl(TCGContext *s, TCGType type, 3139 TCGReg a0, TCGReg a1, TCGReg a2) 3140{ 3141 if (type == TCG_TYPE_I32) { 3142 tcg_out32(s, RLWNM | SAB(a1, a0, a2) | MB(0) | ME(31)); 3143 } else { 3144 tcg_out32(s, RLDCL | SAB(a1, a0, a2) | MB64(0)); 3145 } 3146} 3147 3148static void tgen_rotli(TCGContext *s, TCGType type, 3149 TCGReg a0, TCGReg a1, tcg_target_long a2) 3150{ 3151 if (type == TCG_TYPE_I32) { 3152 tcg_out_rlw(s, RLWINM, a0, a1, a2, 0, 31); 3153 } else { 3154 tcg_out_rld(s, RLDICL, a0, a1, a2, 0); 3155 } 3156} 3157 3158static const TCGOutOpBinary outop_rotl = { 3159 .base.static_constraint = C_O1_I2(r, r, ri), 3160 .out_rrr = tgen_rotl, 3161 .out_rri = tgen_rotli, 3162}; 3163 3164static const TCGOutOpBinary outop_rotr = { 3165 .base.static_constraint = C_NotImplemented, 3166}; 3167 3168static void tgen_sar(TCGContext *s, TCGType type, 3169 TCGReg a0, TCGReg a1, TCGReg a2) 3170{ 3171 uint32_t insn = type == TCG_TYPE_I32 ? SRAW : SRAD; 3172 tcg_out32(s, insn | SAB(a1, a0, a2)); 3173} 3174 3175static void tgen_sari(TCGContext *s, TCGType type, 3176 TCGReg a0, TCGReg a1, tcg_target_long a2) 3177{ 3178 /* Limit immediate shift count lest we create an illegal insn. */ 3179 if (type == TCG_TYPE_I32) { 3180 tcg_out_sari32(s, a0, a1, a2 & 31); 3181 } else { 3182 tcg_out_sari64(s, a0, a1, a2 & 63); 3183 } 3184} 3185 3186static const TCGOutOpBinary outop_sar = { 3187 .base.static_constraint = C_O1_I2(r, r, ri), 3188 .out_rrr = tgen_sar, 3189 .out_rri = tgen_sari, 3190}; 3191 3192static void tgen_shl(TCGContext *s, TCGType type, 3193 TCGReg a0, TCGReg a1, TCGReg a2) 3194{ 3195 uint32_t insn = type == TCG_TYPE_I32 ? SLW : SLD; 3196 tcg_out32(s, insn | SAB(a1, a0, a2)); 3197} 3198 3199static void tgen_shli(TCGContext *s, TCGType type, 3200 TCGReg a0, TCGReg a1, tcg_target_long a2) 3201{ 3202 /* Limit immediate shift count lest we create an illegal insn. */ 3203 if (type == TCG_TYPE_I32) { 3204 tcg_out_shli32(s, a0, a1, a2 & 31); 3205 } else { 3206 tcg_out_shli64(s, a0, a1, a2 & 63); 3207 } 3208} 3209 3210static const TCGOutOpBinary outop_shl = { 3211 .base.static_constraint = C_O1_I2(r, r, ri), 3212 .out_rrr = tgen_shl, 3213 .out_rri = tgen_shli, 3214}; 3215 3216static void tgen_shr(TCGContext *s, TCGType type, 3217 TCGReg a0, TCGReg a1, TCGReg a2) 3218{ 3219 uint32_t insn = type == TCG_TYPE_I32 ? SRW : SRD; 3220 tcg_out32(s, insn | SAB(a1, a0, a2)); 3221} 3222 3223static void tgen_shri(TCGContext *s, TCGType type, 3224 TCGReg a0, TCGReg a1, tcg_target_long a2) 3225{ 3226 /* Limit immediate shift count lest we create an illegal insn. */ 3227 if (type == TCG_TYPE_I32) { 3228 tcg_out_shri32(s, a0, a1, a2 & 31); 3229 } else { 3230 tcg_out_shri64(s, a0, a1, a2 & 63); 3231 } 3232} 3233 3234static const TCGOutOpBinary outop_shr = { 3235 .base.static_constraint = C_O1_I2(r, r, ri), 3236 .out_rrr = tgen_shr, 3237 .out_rri = tgen_shri, 3238}; 3239 3240static void tgen_sub(TCGContext *s, TCGType type, 3241 TCGReg a0, TCGReg a1, TCGReg a2) 3242{ 3243 tcg_out32(s, SUBF | TAB(a0, a2, a1)); 3244} 3245 3246static void tgen_subfi(TCGContext *s, TCGType type, 3247 TCGReg a0, tcg_target_long a1, TCGReg a2) 3248{ 3249 tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); 3250} 3251 3252static const TCGOutOpSubtract outop_sub = { 3253 .base.static_constraint = C_O1_I2(r, rI, r), 3254 .out_rrr = tgen_sub, 3255 .out_rir = tgen_subfi, 3256}; 3257 3258static void tgen_xor(TCGContext *s, TCGType type, 3259 TCGReg a0, TCGReg a1, TCGReg a2) 3260{ 3261 tcg_out32(s, XOR | SAB(a1, a0, a2)); 3262} 3263 3264static void tgen_xori(TCGContext *s, TCGType type, 3265 TCGReg a0, TCGReg a1, tcg_target_long a2) 3266{ 3267 tcg_out_xori32(s, a0, a1, a2); 3268} 3269 3270static const TCGOutOpBinary outop_xor = { 3271 .base.static_constraint = C_O1_I2(r, r, rU), 3272 .out_rrr = tgen_xor, 3273 .out_rri = tgen_xori, 3274}; 3275 3276static void tgen_bswap16(TCGContext *s, TCGType type, 3277 TCGReg dst, TCGReg src, unsigned flags) 3278{ 3279 TCGReg tmp = dst == src ? TCG_REG_R0 : dst; 3280 3281 if (have_isa_3_10) { 3282 tcg_out32(s, BRH | RA(dst) | RS(src)); 3283 if (flags & TCG_BSWAP_OS) { 3284 tcg_out_ext16s(s, TCG_TYPE_REG, dst, dst); 3285 } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 3286 tcg_out_ext16u(s, dst, dst); 3287 } 3288 return; 3289 } 3290 3291 /* 3292 * In the following, 3293 * dep(a, b, m) -> (a & ~m) | (b & m) 3294 * 3295 * Begin with: src = xxxxabcd 3296 */ 3297 /* tmp = rol32(src, 24) & 0x000000ff = 0000000c */ 3298 tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31); 3299 /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00) = 000000dc */ 3300 tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23); 3301 3302 if (flags & TCG_BSWAP_OS) { 3303 tcg_out_ext16s(s, TCG_TYPE_REG, dst, tmp); 3304 } else { 3305 tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); 3306 } 3307} 3308 3309static const TCGOutOpBswap outop_bswap16 = { 3310 .base.static_constraint = C_O1_I1(r, r), 3311 .out_rr = tgen_bswap16, 3312}; 3313 3314static void tgen_bswap32(TCGContext *s, TCGType type, 3315 TCGReg dst, TCGReg src, unsigned flags) 3316{ 3317 TCGReg tmp = dst == src ? TCG_REG_R0 : dst; 3318 3319 if (have_isa_3_10) { 3320 tcg_out32(s, BRW | RA(dst) | RS(src)); 3321 if (flags & TCG_BSWAP_OS) { 3322 tcg_out_ext32s(s, dst, dst); 3323 } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 3324 tcg_out_ext32u(s, dst, dst); 3325 } 3326 return; 3327 } 3328 3329 /* 3330 * Stolen from gcc's builtin_bswap32. 3331 * In the following, 3332 * dep(a, b, m) -> (a & ~m) | (b & m) 3333 * 3334 * Begin with: src = xxxxabcd 3335 */ 3336 /* tmp = rol32(src, 8) & 0xffffffff = 0000bcda */ 3337 tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31); 3338 /* tmp = dep(tmp, rol32(src, 24), 0xff000000) = 0000dcda */ 3339 tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7); 3340 /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00) = 0000dcba */ 3341 tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23); 3342 3343 if (flags & TCG_BSWAP_OS) { 3344 tcg_out_ext32s(s, dst, tmp); 3345 } else { 3346 tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); 3347 } 3348} 3349 3350static const TCGOutOpBswap outop_bswap32 = { 3351 .base.static_constraint = C_O1_I1(r, r), 3352 .out_rr = tgen_bswap32, 3353}; 3354 3355#if TCG_TARGET_REG_BITS == 64 3356static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) 3357{ 3358 TCGReg t0 = dst == src ? TCG_REG_R0 : dst; 3359 TCGReg t1 = dst == src ? dst : TCG_REG_R0; 3360 3361 if (have_isa_3_10) { 3362 tcg_out32(s, BRD | RA(dst) | RS(src)); 3363 return; 3364 } 3365 3366 /* 3367 * In the following, 3368 * dep(a, b, m) -> (a & ~m) | (b & m) 3369 * 3370 * Begin with: src = abcdefgh 3371 */ 3372 /* t0 = rol32(src, 8) & 0xffffffff = 0000fghe */ 3373 tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31); 3374 /* t0 = dep(t0, rol32(src, 24), 0xff000000) = 0000hghe */ 3375 tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7); 3376 /* t0 = dep(t0, rol32(src, 24), 0x0000ff00) = 0000hgfe */ 3377 tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23); 3378 3379 /* t0 = rol64(t0, 32) = hgfe0000 */ 3380 tcg_out_rld(s, RLDICL, t0, t0, 32, 0); 3381 /* t1 = rol64(src, 32) = efghabcd */ 3382 tcg_out_rld(s, RLDICL, t1, src, 32, 0); 3383 3384 /* t0 = dep(t0, rol32(t1, 24), 0xffffffff) = hgfebcda */ 3385 tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31); 3386 /* t0 = dep(t0, rol32(t1, 24), 0xff000000) = hgfedcda */ 3387 tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7); 3388 /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00) = hgfedcba */ 3389 tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23); 3390 3391 tcg_out_mov(s, TCG_TYPE_REG, dst, t0); 3392} 3393 3394static const TCGOutOpUnary outop_bswap64 = { 3395 .base.static_constraint = C_O1_I1(r, r), 3396 .out_rr = tgen_bswap64, 3397}; 3398#endif /* TCG_TARGET_REG_BITS == 64 */ 3399 3400static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) 3401{ 3402 tcg_out32(s, NEG | RT(a0) | RA(a1)); 3403} 3404 3405static const TCGOutOpUnary outop_neg = { 3406 .base.static_constraint = C_O1_I1(r, r), 3407 .out_rr = tgen_neg, 3408}; 3409 3410static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) 3411{ 3412 tgen_nor(s, type, a0, a1, a1); 3413} 3414 3415static const TCGOutOpUnary outop_not = { 3416 .base.static_constraint = C_O1_I1(r, r), 3417 .out_rr = tgen_not, 3418}; 3419 3420static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 3421 unsigned ofs, unsigned len) 3422{ 3423 if (ofs == 0 && len <= 16) { 3424 tgen_andi(s, TCG_TYPE_I32, a0, a1, (1 << len) - 1); 3425 } else if (type == TCG_TYPE_I32) { 3426 tcg_out_rlw(s, RLWINM, a0, a1, 32 - ofs, 32 - len, 31); 3427 } else { 3428 tcg_out_rld(s, RLDICL, a0, a1, 64 - ofs, 64 - len); 3429 } 3430} 3431 3432static const TCGOutOpExtract outop_extract = { 3433 .base.static_constraint = C_O1_I1(r, r), 3434 .out_rr = tgen_extract, 3435}; 3436 3437static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 3438 unsigned ofs, unsigned len) 3439{ 3440 if (ofs == 0) { 3441 switch (len) { 3442 case 8: 3443 tcg_out_ext8s(s, type, a0, a1); 3444 return; 3445 case 16: 3446 tcg_out_ext16s(s, type, a0, a1); 3447 return; 3448 case 32: 3449 tcg_out_ext32s(s, a0, a1); 3450 return; 3451 } 3452 } else if (ofs + len == 32) { 3453 tcg_out_sari32(s, a0, a1, ofs); 3454 return; 3455 } 3456 g_assert_not_reached(); 3457} 3458 3459static const TCGOutOpExtract outop_sextract = { 3460 .base.static_constraint = C_O1_I1(r, r), 3461 .out_rr = tgen_sextract, 3462}; 3463 3464 3465static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, 3466 const TCGArg args[TCG_MAX_OP_ARGS], 3467 const int const_args[TCG_MAX_OP_ARGS]) 3468{ 3469 TCGArg a0, a1; 3470 3471 switch (opc) { 3472 case INDEX_op_goto_ptr: 3473 tcg_out32(s, MTSPR | RS(args[0]) | CTR); 3474 tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0)); 3475 tcg_out32(s, BCCTR | BO_ALWAYS); 3476 break; 3477 case INDEX_op_br: 3478 { 3479 TCGLabel *l = arg_label(args[0]); 3480 uint32_t insn = B; 3481 3482 if (l->has_value) { 3483 insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr), 3484 l->u.value_ptr); 3485 } else { 3486 tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0); 3487 } 3488 tcg_out32(s, insn); 3489 } 3490 break; 3491 case INDEX_op_ld8u_i32: 3492 case INDEX_op_ld8u_i64: 3493 tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); 3494 break; 3495 case INDEX_op_ld8s_i32: 3496 case INDEX_op_ld8s_i64: 3497 tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); 3498 tcg_out_ext8s(s, TCG_TYPE_REG, args[0], args[0]); 3499 break; 3500 case INDEX_op_ld16u_i32: 3501 case INDEX_op_ld16u_i64: 3502 tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]); 3503 break; 3504 case INDEX_op_ld16s_i32: 3505 case INDEX_op_ld16s_i64: 3506 tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]); 3507 break; 3508 case INDEX_op_ld_i32: 3509 case INDEX_op_ld32u_i64: 3510 tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]); 3511 break; 3512 case INDEX_op_ld32s_i64: 3513 tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]); 3514 break; 3515 case INDEX_op_ld_i64: 3516 tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]); 3517 break; 3518 case INDEX_op_st8_i32: 3519 case INDEX_op_st8_i64: 3520 tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]); 3521 break; 3522 case INDEX_op_st16_i32: 3523 case INDEX_op_st16_i64: 3524 tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]); 3525 break; 3526 case INDEX_op_st_i32: 3527 case INDEX_op_st32_i64: 3528 tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]); 3529 break; 3530 case INDEX_op_st_i64: 3531 tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); 3532 break; 3533 3534 case INDEX_op_qemu_ld_i32: 3535 tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I32); 3536 break; 3537 case INDEX_op_qemu_ld_i64: 3538 if (TCG_TARGET_REG_BITS == 64) { 3539 tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I64); 3540 } else { 3541 tcg_out_qemu_ld(s, args[0], args[1], args[2], 3542 args[3], TCG_TYPE_I64); 3543 } 3544 break; 3545 case INDEX_op_qemu_ld_i128: 3546 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 3547 tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true); 3548 break; 3549 3550 case INDEX_op_qemu_st_i32: 3551 tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I32); 3552 break; 3553 case INDEX_op_qemu_st_i64: 3554 if (TCG_TARGET_REG_BITS == 64) { 3555 tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I64); 3556 } else { 3557 tcg_out_qemu_st(s, args[0], args[1], args[2], 3558 args[3], TCG_TYPE_I64); 3559 } 3560 break; 3561 case INDEX_op_qemu_st_i128: 3562 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 3563 tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); 3564 break; 3565 3566 case INDEX_op_deposit_i32: 3567 if (const_args[2]) { 3568 uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3]; 3569 tcg_out_andi32(s, args[0], args[0], ~mask); 3570 } else { 3571 tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3], 3572 32 - args[3] - args[4], 31 - args[3]); 3573 } 3574 break; 3575 case INDEX_op_deposit_i64: 3576 if (const_args[2]) { 3577 uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3]; 3578 tcg_out_andi64(s, args[0], args[0], ~mask); 3579 } else { 3580 tcg_out_rld(s, RLDIMI, args[0], args[2], args[3], 3581 64 - args[3] - args[4]); 3582 } 3583 break; 3584 3585#if TCG_TARGET_REG_BITS == 64 3586 case INDEX_op_add2_i64: 3587#else 3588 case INDEX_op_add2_i32: 3589#endif 3590 /* Note that the CA bit is defined based on the word size of the 3591 environment. So in 64-bit mode it's always carry-out of bit 63. 3592 The fallback code using deposit works just as well for 32-bit. */ 3593 a0 = args[0], a1 = args[1]; 3594 if (a0 == args[3] || (!const_args[5] && a0 == args[5])) { 3595 a0 = TCG_REG_R0; 3596 } 3597 if (const_args[4]) { 3598 tcg_out32(s, ADDIC | TAI(a0, args[2], args[4])); 3599 } else { 3600 tcg_out32(s, ADDC | TAB(a0, args[2], args[4])); 3601 } 3602 if (const_args[5]) { 3603 tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3])); 3604 } else { 3605 tcg_out32(s, ADDE | TAB(a1, args[3], args[5])); 3606 } 3607 if (a0 != args[0]) { 3608 tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); 3609 } 3610 break; 3611 3612#if TCG_TARGET_REG_BITS == 64 3613 case INDEX_op_sub2_i64: 3614#else 3615 case INDEX_op_sub2_i32: 3616#endif 3617 a0 = args[0], a1 = args[1]; 3618 if (a0 == args[5] || (!const_args[3] && a0 == args[3])) { 3619 a0 = TCG_REG_R0; 3620 } 3621 if (const_args[2]) { 3622 tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2])); 3623 } else { 3624 tcg_out32(s, SUBFC | TAB(a0, args[4], args[2])); 3625 } 3626 if (const_args[3]) { 3627 tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5])); 3628 } else { 3629 tcg_out32(s, SUBFE | TAB(a1, args[5], args[3])); 3630 } 3631 if (a0 != args[0]) { 3632 tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); 3633 } 3634 break; 3635 3636 case INDEX_op_mb: 3637 tcg_out_mb(s, args[0]); 3638 break; 3639 3640 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 3641 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ 3642 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ 3643 case INDEX_op_ext_i32_i64: /* Always emitted via tcg_reg_alloc_op. */ 3644 case INDEX_op_extu_i32_i64: 3645 case INDEX_op_extrl_i64_i32: 3646 default: 3647 g_assert_not_reached(); 3648 } 3649} 3650 3651int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 3652{ 3653 switch (opc) { 3654 case INDEX_op_and_vec: 3655 case INDEX_op_or_vec: 3656 case INDEX_op_xor_vec: 3657 case INDEX_op_andc_vec: 3658 case INDEX_op_not_vec: 3659 case INDEX_op_nor_vec: 3660 case INDEX_op_eqv_vec: 3661 case INDEX_op_nand_vec: 3662 return 1; 3663 case INDEX_op_orc_vec: 3664 return have_isa_2_07; 3665 case INDEX_op_add_vec: 3666 case INDEX_op_sub_vec: 3667 case INDEX_op_smax_vec: 3668 case INDEX_op_smin_vec: 3669 case INDEX_op_umax_vec: 3670 case INDEX_op_umin_vec: 3671 case INDEX_op_shlv_vec: 3672 case INDEX_op_shrv_vec: 3673 case INDEX_op_sarv_vec: 3674 case INDEX_op_rotlv_vec: 3675 return vece <= MO_32 || have_isa_2_07; 3676 case INDEX_op_ssadd_vec: 3677 case INDEX_op_sssub_vec: 3678 case INDEX_op_usadd_vec: 3679 case INDEX_op_ussub_vec: 3680 return vece <= MO_32; 3681 case INDEX_op_shli_vec: 3682 case INDEX_op_shri_vec: 3683 case INDEX_op_sari_vec: 3684 case INDEX_op_rotli_vec: 3685 return vece <= MO_32 || have_isa_2_07 ? -1 : 0; 3686 case INDEX_op_cmp_vec: 3687 case INDEX_op_cmpsel_vec: 3688 return vece <= MO_32 || have_isa_2_07 ? 1 : 0; 3689 case INDEX_op_neg_vec: 3690 return vece >= MO_32 && have_isa_3_00; 3691 case INDEX_op_mul_vec: 3692 switch (vece) { 3693 case MO_8: 3694 case MO_16: 3695 return -1; 3696 case MO_32: 3697 return have_isa_2_07 ? 1 : -1; 3698 case MO_64: 3699 return have_isa_3_10; 3700 } 3701 return 0; 3702 case INDEX_op_bitsel_vec: 3703 return have_vsx; 3704 case INDEX_op_rotrv_vec: 3705 return -1; 3706 default: 3707 return 0; 3708 } 3709} 3710 3711static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 3712 TCGReg dst, TCGReg src) 3713{ 3714 tcg_debug_assert(dst >= TCG_REG_V0); 3715 3716 /* Splat from integer reg allowed via constraints for v3.00. */ 3717 if (src < TCG_REG_V0) { 3718 tcg_debug_assert(have_isa_3_00); 3719 switch (vece) { 3720 case MO_64: 3721 tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src)); 3722 return true; 3723 case MO_32: 3724 tcg_out32(s, MTVSRWS | VRT(dst) | RA(src)); 3725 return true; 3726 default: 3727 /* Fail, so that we fall back on either dupm or mov+dup. */ 3728 return false; 3729 } 3730 } 3731 3732 /* 3733 * Recall we use (or emulate) VSX integer loads, so the integer is 3734 * right justified within the left (zero-index) double-word. 3735 */ 3736 switch (vece) { 3737 case MO_8: 3738 tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16)); 3739 break; 3740 case MO_16: 3741 tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16)); 3742 break; 3743 case MO_32: 3744 tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16)); 3745 break; 3746 case MO_64: 3747 if (have_vsx) { 3748 tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src)); 3749 break; 3750 } 3751 tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8); 3752 tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8); 3753 break; 3754 default: 3755 g_assert_not_reached(); 3756 } 3757 return true; 3758} 3759 3760static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 3761 TCGReg out, TCGReg base, intptr_t offset) 3762{ 3763 int elt; 3764 3765 tcg_debug_assert(out >= TCG_REG_V0); 3766 switch (vece) { 3767 case MO_8: 3768 if (have_isa_3_00) { 3769 tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16); 3770 } else { 3771 tcg_out_mem_long(s, 0, LVEBX, out, base, offset); 3772 } 3773 elt = extract32(offset, 0, 4); 3774#if !HOST_BIG_ENDIAN 3775 elt ^= 15; 3776#endif 3777 tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16)); 3778 break; 3779 case MO_16: 3780 tcg_debug_assert((offset & 1) == 0); 3781 if (have_isa_3_00) { 3782 tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16); 3783 } else { 3784 tcg_out_mem_long(s, 0, LVEHX, out, base, offset); 3785 } 3786 elt = extract32(offset, 1, 3); 3787#if !HOST_BIG_ENDIAN 3788 elt ^= 7; 3789#endif 3790 tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16)); 3791 break; 3792 case MO_32: 3793 if (have_isa_3_00) { 3794 tcg_out_mem_long(s, 0, LXVWSX, out, base, offset); 3795 break; 3796 } 3797 tcg_debug_assert((offset & 3) == 0); 3798 tcg_out_mem_long(s, 0, LVEWX, out, base, offset); 3799 elt = extract32(offset, 2, 2); 3800#if !HOST_BIG_ENDIAN 3801 elt ^= 3; 3802#endif 3803 tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16)); 3804 break; 3805 case MO_64: 3806 if (have_vsx) { 3807 tcg_out_mem_long(s, 0, LXVDSX, out, base, offset); 3808 break; 3809 } 3810 tcg_debug_assert((offset & 7) == 0); 3811 tcg_out_mem_long(s, 0, LVX, out, base, offset & -16); 3812 tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8); 3813 elt = extract32(offset, 3, 1); 3814#if !HOST_BIG_ENDIAN 3815 elt = !elt; 3816#endif 3817 if (elt) { 3818 tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8); 3819 } else { 3820 tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8); 3821 } 3822 break; 3823 default: 3824 g_assert_not_reached(); 3825 } 3826 return true; 3827} 3828 3829static void tcg_out_not_vec(TCGContext *s, TCGReg a0, TCGReg a1) 3830{ 3831 tcg_out32(s, VNOR | VRT(a0) | VRA(a1) | VRB(a1)); 3832} 3833 3834static void tcg_out_or_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2) 3835{ 3836 tcg_out32(s, VOR | VRT(a0) | VRA(a1) | VRB(a2)); 3837} 3838 3839static void tcg_out_orc_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2) 3840{ 3841 tcg_out32(s, VORC | VRT(a0) | VRA(a1) | VRB(a2)); 3842} 3843 3844static void tcg_out_and_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2) 3845{ 3846 tcg_out32(s, VAND | VRT(a0) | VRA(a1) | VRB(a2)); 3847} 3848 3849static void tcg_out_andc_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2) 3850{ 3851 tcg_out32(s, VANDC | VRT(a0) | VRA(a1) | VRB(a2)); 3852} 3853 3854static void tcg_out_bitsel_vec(TCGContext *s, TCGReg d, 3855 TCGReg c, TCGReg t, TCGReg f) 3856{ 3857 if (TCG_TARGET_HAS_bitsel_vec) { 3858 tcg_out32(s, XXSEL | VRT(d) | VRC(c) | VRB(t) | VRA(f)); 3859 } else { 3860 tcg_out_and_vec(s, TCG_VEC_TMP2, t, c); 3861 tcg_out_andc_vec(s, d, f, c); 3862 tcg_out_or_vec(s, d, d, TCG_VEC_TMP2); 3863 } 3864} 3865 3866static bool tcg_out_cmp_vec_noinv(TCGContext *s, unsigned vece, TCGReg a0, 3867 TCGReg a1, TCGReg a2, TCGCond cond) 3868{ 3869 static const uint32_t 3870 eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD }, 3871 ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 }, 3872 gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD }, 3873 gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD }; 3874 uint32_t insn; 3875 3876 bool need_swap = false, need_inv = false; 3877 3878 tcg_debug_assert(vece <= MO_32 || have_isa_2_07); 3879 3880 switch (cond) { 3881 case TCG_COND_EQ: 3882 case TCG_COND_GT: 3883 case TCG_COND_GTU: 3884 break; 3885 case TCG_COND_NE: 3886 if (have_isa_3_00 && vece <= MO_32) { 3887 break; 3888 } 3889 /* fall through */ 3890 case TCG_COND_LE: 3891 case TCG_COND_LEU: 3892 need_inv = true; 3893 break; 3894 case TCG_COND_LT: 3895 case TCG_COND_LTU: 3896 need_swap = true; 3897 break; 3898 case TCG_COND_GE: 3899 case TCG_COND_GEU: 3900 need_swap = need_inv = true; 3901 break; 3902 default: 3903 g_assert_not_reached(); 3904 } 3905 3906 if (need_inv) { 3907 cond = tcg_invert_cond(cond); 3908 } 3909 if (need_swap) { 3910 TCGReg swap = a1; 3911 a1 = a2; 3912 a2 = swap; 3913 cond = tcg_swap_cond(cond); 3914 } 3915 3916 switch (cond) { 3917 case TCG_COND_EQ: 3918 insn = eq_op[vece]; 3919 break; 3920 case TCG_COND_NE: 3921 insn = ne_op[vece]; 3922 break; 3923 case TCG_COND_GT: 3924 insn = gts_op[vece]; 3925 break; 3926 case TCG_COND_GTU: 3927 insn = gtu_op[vece]; 3928 break; 3929 default: 3930 g_assert_not_reached(); 3931 } 3932 tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2)); 3933 3934 return need_inv; 3935} 3936 3937static void tcg_out_cmp_vec(TCGContext *s, unsigned vece, TCGReg a0, 3938 TCGReg a1, TCGReg a2, TCGCond cond) 3939{ 3940 if (tcg_out_cmp_vec_noinv(s, vece, a0, a1, a2, cond)) { 3941 tcg_out_not_vec(s, a0, a0); 3942 } 3943} 3944 3945static void tcg_out_cmpsel_vec(TCGContext *s, unsigned vece, TCGReg a0, 3946 TCGReg c1, TCGReg c2, TCGArg v3, int const_v3, 3947 TCGReg v4, TCGCond cond) 3948{ 3949 bool inv = tcg_out_cmp_vec_noinv(s, vece, TCG_VEC_TMP1, c1, c2, cond); 3950 3951 if (!const_v3) { 3952 if (inv) { 3953 tcg_out_bitsel_vec(s, a0, TCG_VEC_TMP1, v4, v3); 3954 } else { 3955 tcg_out_bitsel_vec(s, a0, TCG_VEC_TMP1, v3, v4); 3956 } 3957 } else if (v3) { 3958 if (inv) { 3959 tcg_out_orc_vec(s, a0, v4, TCG_VEC_TMP1); 3960 } else { 3961 tcg_out_or_vec(s, a0, v4, TCG_VEC_TMP1); 3962 } 3963 } else { 3964 if (inv) { 3965 tcg_out_and_vec(s, a0, v4, TCG_VEC_TMP1); 3966 } else { 3967 tcg_out_andc_vec(s, a0, v4, TCG_VEC_TMP1); 3968 } 3969 } 3970} 3971 3972static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 3973 unsigned vecl, unsigned vece, 3974 const TCGArg args[TCG_MAX_OP_ARGS], 3975 const int const_args[TCG_MAX_OP_ARGS]) 3976{ 3977 static const uint32_t 3978 add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM }, 3979 sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM }, 3980 mul_op[4] = { 0, 0, VMULUWM, VMULLD }, 3981 neg_op[4] = { 0, 0, VNEGW, VNEGD }, 3982 ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 }, 3983 usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 }, 3984 sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 }, 3985 ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 }, 3986 umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD }, 3987 smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD }, 3988 umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD }, 3989 smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD }, 3990 shlv_op[4] = { VSLB, VSLH, VSLW, VSLD }, 3991 shrv_op[4] = { VSRB, VSRH, VSRW, VSRD }, 3992 sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD }, 3993 mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 }, 3994 mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 }, 3995 muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 }, 3996 mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 }, 3997 pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 }, 3998 rotl_op[4] = { VRLB, VRLH, VRLW, VRLD }; 3999 4000 TCGType type = vecl + TCG_TYPE_V64; 4001 TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; 4002 uint32_t insn; 4003 4004 switch (opc) { 4005 case INDEX_op_ld_vec: 4006 tcg_out_ld(s, type, a0, a1, a2); 4007 return; 4008 case INDEX_op_st_vec: 4009 tcg_out_st(s, type, a0, a1, a2); 4010 return; 4011 case INDEX_op_dupm_vec: 4012 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 4013 return; 4014 4015 case INDEX_op_add_vec: 4016 insn = add_op[vece]; 4017 break; 4018 case INDEX_op_sub_vec: 4019 insn = sub_op[vece]; 4020 break; 4021 case INDEX_op_neg_vec: 4022 insn = neg_op[vece]; 4023 a2 = a1; 4024 a1 = 0; 4025 break; 4026 case INDEX_op_mul_vec: 4027 insn = mul_op[vece]; 4028 break; 4029 case INDEX_op_ssadd_vec: 4030 insn = ssadd_op[vece]; 4031 break; 4032 case INDEX_op_sssub_vec: 4033 insn = sssub_op[vece]; 4034 break; 4035 case INDEX_op_usadd_vec: 4036 insn = usadd_op[vece]; 4037 break; 4038 case INDEX_op_ussub_vec: 4039 insn = ussub_op[vece]; 4040 break; 4041 case INDEX_op_smin_vec: 4042 insn = smin_op[vece]; 4043 break; 4044 case INDEX_op_umin_vec: 4045 insn = umin_op[vece]; 4046 break; 4047 case INDEX_op_smax_vec: 4048 insn = smax_op[vece]; 4049 break; 4050 case INDEX_op_umax_vec: 4051 insn = umax_op[vece]; 4052 break; 4053 case INDEX_op_shlv_vec: 4054 insn = shlv_op[vece]; 4055 break; 4056 case INDEX_op_shrv_vec: 4057 insn = shrv_op[vece]; 4058 break; 4059 case INDEX_op_sarv_vec: 4060 insn = sarv_op[vece]; 4061 break; 4062 case INDEX_op_and_vec: 4063 tcg_out_and_vec(s, a0, a1, a2); 4064 return; 4065 case INDEX_op_or_vec: 4066 tcg_out_or_vec(s, a0, a1, a2); 4067 return; 4068 case INDEX_op_xor_vec: 4069 insn = VXOR; 4070 break; 4071 case INDEX_op_andc_vec: 4072 tcg_out_andc_vec(s, a0, a1, a2); 4073 return; 4074 case INDEX_op_not_vec: 4075 tcg_out_not_vec(s, a0, a1); 4076 return; 4077 case INDEX_op_orc_vec: 4078 tcg_out_orc_vec(s, a0, a1, a2); 4079 return; 4080 case INDEX_op_nand_vec: 4081 insn = VNAND; 4082 break; 4083 case INDEX_op_nor_vec: 4084 insn = VNOR; 4085 break; 4086 case INDEX_op_eqv_vec: 4087 insn = VEQV; 4088 break; 4089 4090 case INDEX_op_cmp_vec: 4091 tcg_out_cmp_vec(s, vece, a0, a1, a2, args[3]); 4092 return; 4093 case INDEX_op_cmpsel_vec: 4094 tcg_out_cmpsel_vec(s, vece, a0, a1, a2, 4095 args[3], const_args[3], args[4], args[5]); 4096 return; 4097 case INDEX_op_bitsel_vec: 4098 tcg_out_bitsel_vec(s, a0, a1, a2, args[3]); 4099 return; 4100 4101 case INDEX_op_dup2_vec: 4102 assert(TCG_TARGET_REG_BITS == 32); 4103 /* With inputs a1 = xLxx, a2 = xHxx */ 4104 tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1)); /* a0 = xxHL */ 4105 tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8); /* tmp = HLxx */ 4106 tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8); /* a0 = HLHL */ 4107 return; 4108 4109 case INDEX_op_ppc_mrgh_vec: 4110 insn = mrgh_op[vece]; 4111 break; 4112 case INDEX_op_ppc_mrgl_vec: 4113 insn = mrgl_op[vece]; 4114 break; 4115 case INDEX_op_ppc_muleu_vec: 4116 insn = muleu_op[vece]; 4117 break; 4118 case INDEX_op_ppc_mulou_vec: 4119 insn = mulou_op[vece]; 4120 break; 4121 case INDEX_op_ppc_pkum_vec: 4122 insn = pkum_op[vece]; 4123 break; 4124 case INDEX_op_rotlv_vec: 4125 insn = rotl_op[vece]; 4126 break; 4127 case INDEX_op_ppc_msum_vec: 4128 tcg_debug_assert(vece == MO_16); 4129 tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3])); 4130 return; 4131 4132 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 4133 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 4134 default: 4135 g_assert_not_reached(); 4136 } 4137 4138 tcg_debug_assert(insn != 0); 4139 tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2)); 4140} 4141 4142static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0, 4143 TCGv_vec v1, TCGArg imm, TCGOpcode opci) 4144{ 4145 TCGv_vec t1; 4146 4147 if (vece == MO_32) { 4148 /* 4149 * Only 5 bits are significant, and VSPLTISB can represent -16..15. 4150 * So using negative numbers gets us the 4th bit easily. 4151 */ 4152 imm = sextract32(imm, 0, 5); 4153 } else { 4154 imm &= (8 << vece) - 1; 4155 } 4156 4157 /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */ 4158 t1 = tcg_constant_vec(type, MO_8, imm); 4159 vec_gen_3(opci, type, vece, tcgv_vec_arg(v0), 4160 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 4161} 4162 4163static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0, 4164 TCGv_vec v1, TCGv_vec v2) 4165{ 4166 TCGv_vec t1 = tcg_temp_new_vec(type); 4167 TCGv_vec t2 = tcg_temp_new_vec(type); 4168 TCGv_vec c0, c16; 4169 4170 switch (vece) { 4171 case MO_8: 4172 case MO_16: 4173 vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1), 4174 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 4175 vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2), 4176 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 4177 vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0), 4178 tcgv_vec_arg(t1), tcgv_vec_arg(t2)); 4179 vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1), 4180 tcgv_vec_arg(t1), tcgv_vec_arg(t2)); 4181 vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0), 4182 tcgv_vec_arg(v0), tcgv_vec_arg(t1)); 4183 break; 4184 4185 case MO_32: 4186 tcg_debug_assert(!have_isa_2_07); 4187 /* 4188 * Only 5 bits are significant, and VSPLTISB can represent -16..15. 4189 * So using -16 is a quick way to represent 16. 4190 */ 4191 c16 = tcg_constant_vec(type, MO_8, -16); 4192 c0 = tcg_constant_vec(type, MO_8, 0); 4193 4194 vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1), 4195 tcgv_vec_arg(v2), tcgv_vec_arg(c16)); 4196 vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2), 4197 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 4198 vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1), 4199 tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0)); 4200 vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1), 4201 tcgv_vec_arg(t1), tcgv_vec_arg(c16)); 4202 tcg_gen_add_vec(MO_32, v0, t1, t2); 4203 break; 4204 4205 default: 4206 g_assert_not_reached(); 4207 } 4208 tcg_temp_free_vec(t1); 4209 tcg_temp_free_vec(t2); 4210} 4211 4212void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 4213 TCGArg a0, ...) 4214{ 4215 va_list va; 4216 TCGv_vec v0, v1, v2, t0; 4217 TCGArg a2; 4218 4219 va_start(va, a0); 4220 v0 = temp_tcgv_vec(arg_temp(a0)); 4221 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 4222 a2 = va_arg(va, TCGArg); 4223 4224 switch (opc) { 4225 case INDEX_op_shli_vec: 4226 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec); 4227 break; 4228 case INDEX_op_shri_vec: 4229 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec); 4230 break; 4231 case INDEX_op_sari_vec: 4232 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec); 4233 break; 4234 case INDEX_op_rotli_vec: 4235 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec); 4236 break; 4237 case INDEX_op_mul_vec: 4238 v2 = temp_tcgv_vec(arg_temp(a2)); 4239 expand_vec_mul(type, vece, v0, v1, v2); 4240 break; 4241 case INDEX_op_rotlv_vec: 4242 v2 = temp_tcgv_vec(arg_temp(a2)); 4243 t0 = tcg_temp_new_vec(type); 4244 tcg_gen_neg_vec(vece, t0, v2); 4245 tcg_gen_rotlv_vec(vece, v0, v1, t0); 4246 tcg_temp_free_vec(t0); 4247 break; 4248 default: 4249 g_assert_not_reached(); 4250 } 4251 va_end(va); 4252} 4253 4254static TCGConstraintSetIndex 4255tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) 4256{ 4257 switch (op) { 4258 case INDEX_op_goto_ptr: 4259 return C_O0_I1(r); 4260 4261 case INDEX_op_ld8u_i32: 4262 case INDEX_op_ld8s_i32: 4263 case INDEX_op_ld16u_i32: 4264 case INDEX_op_ld16s_i32: 4265 case INDEX_op_ld_i32: 4266 case INDEX_op_ld8u_i64: 4267 case INDEX_op_ld8s_i64: 4268 case INDEX_op_ld16u_i64: 4269 case INDEX_op_ld16s_i64: 4270 case INDEX_op_ld32u_i64: 4271 case INDEX_op_ld32s_i64: 4272 case INDEX_op_ld_i64: 4273 case INDEX_op_ext_i32_i64: 4274 case INDEX_op_extu_i32_i64: 4275 return C_O1_I1(r, r); 4276 4277 case INDEX_op_st8_i32: 4278 case INDEX_op_st16_i32: 4279 case INDEX_op_st_i32: 4280 case INDEX_op_st8_i64: 4281 case INDEX_op_st16_i64: 4282 case INDEX_op_st32_i64: 4283 case INDEX_op_st_i64: 4284 return C_O0_I2(r, r); 4285 4286 case INDEX_op_deposit_i32: 4287 case INDEX_op_deposit_i64: 4288 return C_O1_I2(r, 0, rZ); 4289 case INDEX_op_add2_i64: 4290 case INDEX_op_add2_i32: 4291 return C_O2_I4(r, r, r, r, rI, rZM); 4292 case INDEX_op_sub2_i64: 4293 case INDEX_op_sub2_i32: 4294 return C_O2_I4(r, r, rI, rZM, r, r); 4295 4296 case INDEX_op_qemu_ld_i32: 4297 return C_O1_I1(r, r); 4298 case INDEX_op_qemu_ld_i64: 4299 return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r); 4300 4301 case INDEX_op_qemu_st_i32: 4302 return C_O0_I2(r, r); 4303 case INDEX_op_qemu_st_i64: 4304 return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r); 4305 4306 case INDEX_op_qemu_ld_i128: 4307 return C_N1O1_I1(o, m, r); 4308 case INDEX_op_qemu_st_i128: 4309 return C_O0_I3(o, m, r); 4310 4311 case INDEX_op_add_vec: 4312 case INDEX_op_sub_vec: 4313 case INDEX_op_mul_vec: 4314 case INDEX_op_and_vec: 4315 case INDEX_op_or_vec: 4316 case INDEX_op_xor_vec: 4317 case INDEX_op_andc_vec: 4318 case INDEX_op_orc_vec: 4319 case INDEX_op_nor_vec: 4320 case INDEX_op_eqv_vec: 4321 case INDEX_op_nand_vec: 4322 case INDEX_op_cmp_vec: 4323 case INDEX_op_ssadd_vec: 4324 case INDEX_op_sssub_vec: 4325 case INDEX_op_usadd_vec: 4326 case INDEX_op_ussub_vec: 4327 case INDEX_op_smax_vec: 4328 case INDEX_op_smin_vec: 4329 case INDEX_op_umax_vec: 4330 case INDEX_op_umin_vec: 4331 case INDEX_op_shlv_vec: 4332 case INDEX_op_shrv_vec: 4333 case INDEX_op_sarv_vec: 4334 case INDEX_op_rotlv_vec: 4335 case INDEX_op_rotrv_vec: 4336 case INDEX_op_ppc_mrgh_vec: 4337 case INDEX_op_ppc_mrgl_vec: 4338 case INDEX_op_ppc_muleu_vec: 4339 case INDEX_op_ppc_mulou_vec: 4340 case INDEX_op_ppc_pkum_vec: 4341 case INDEX_op_dup2_vec: 4342 return C_O1_I2(v, v, v); 4343 4344 case INDEX_op_not_vec: 4345 case INDEX_op_neg_vec: 4346 return C_O1_I1(v, v); 4347 4348 case INDEX_op_dup_vec: 4349 return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v); 4350 4351 case INDEX_op_ld_vec: 4352 case INDEX_op_dupm_vec: 4353 return C_O1_I1(v, r); 4354 4355 case INDEX_op_st_vec: 4356 return C_O0_I2(v, r); 4357 4358 case INDEX_op_bitsel_vec: 4359 case INDEX_op_ppc_msum_vec: 4360 return C_O1_I3(v, v, v, v); 4361 case INDEX_op_cmpsel_vec: 4362 return C_O1_I4(v, v, v, vZM, v); 4363 4364 default: 4365 return C_NotImplemented; 4366 } 4367} 4368 4369static void tcg_target_init(TCGContext *s) 4370{ 4371 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; 4372 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; 4373 if (have_altivec) { 4374 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 4375 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 4376 } 4377 4378 tcg_target_call_clobber_regs = 0; 4379 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0); 4380 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2); 4381 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3); 4382 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4); 4383 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5); 4384 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6); 4385 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7); 4386 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); 4387 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); 4388 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); 4389 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); 4390 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12); 4391 4392 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0); 4393 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1); 4394 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2); 4395 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3); 4396 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4); 4397 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5); 4398 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6); 4399 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7); 4400 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 4401 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 4402 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 4403 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 4404 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 4405 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 4406 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 4407 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 4408 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16); 4409 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17); 4410 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18); 4411 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19); 4412 4413 s->reserved_regs = 0; 4414 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */ 4415 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */ 4416#if defined(_CALL_SYSV) 4417 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */ 4418#endif 4419#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64 4420 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ 4421#endif 4422 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); 4423 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); 4424 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1); 4425 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2); 4426 if (USE_REG_TB) { 4427 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); /* tb->tc_ptr */ 4428 } 4429} 4430 4431#ifdef __ELF__ 4432typedef struct { 4433 DebugFrameCIE cie; 4434 DebugFrameFDEHeader fde; 4435 uint8_t fde_def_cfa[4]; 4436 uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3]; 4437} DebugFrame; 4438 4439/* We're expecting a 2 byte uleb128 encoded value. */ 4440QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 4441 4442#if TCG_TARGET_REG_BITS == 64 4443# define ELF_HOST_MACHINE EM_PPC64 4444#else 4445# define ELF_HOST_MACHINE EM_PPC 4446#endif 4447 4448static DebugFrame debug_frame = { 4449 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 4450 .cie.id = -1, 4451 .cie.version = 1, 4452 .cie.code_align = 1, 4453 .cie.data_align = (-SZR & 0x7f), /* sleb128 -SZR */ 4454 .cie.return_column = 65, 4455 4456 /* Total FDE size does not include the "len" member. */ 4457 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), 4458 4459 .fde_def_cfa = { 4460 12, TCG_REG_R1, /* DW_CFA_def_cfa r1, ... */ 4461 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 4462 (FRAME_SIZE >> 7) 4463 }, 4464 .fde_reg_ofs = { 4465 /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */ 4466 0x11, 65, (LR_OFFSET / -SZR) & 0x7f, 4467 } 4468}; 4469 4470void tcg_register_jit(const void *buf, size_t buf_size) 4471{ 4472 uint8_t *p = &debug_frame.fde_reg_ofs[3]; 4473 int i; 4474 4475 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) { 4476 p[0] = 0x80 + tcg_target_callee_save_regs[i]; 4477 p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR; 4478 } 4479 4480 debug_frame.fde.func_start = (uintptr_t)buf; 4481 debug_frame.fde.func_len = buf_size; 4482 4483 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 4484} 4485#endif /* __ELF__ */ 4486#undef VMULEUB 4487#undef VMULEUH 4488#undef VMULEUW 4489#undef VMULOUB 4490#undef VMULOUH 4491#undef VMULOUW 4492#undef VMSUMUHM 4493