1/* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25#include "elf.h" 26#include "../tcg-pool.c.inc" 27#include "../tcg-ldst.c.inc" 28 29/* 30 * Standardize on the _CALL_FOO symbols used by GCC: 31 * Apple XCode does not define _CALL_DARWIN. 32 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV or _CALL_AIX. 33 */ 34#if TCG_TARGET_REG_BITS == 64 35# ifdef _CALL_AIX 36 /* ok */ 37# elif defined(_CALL_ELF) && _CALL_ELF == 1 38# define _CALL_AIX 39# elif defined(_CALL_ELF) && _CALL_ELF == 2 40 /* ok */ 41# else 42# error "Unknown ABI" 43# endif 44#else 45# if defined(_CALL_SYSV) || defined(_CALL_DARWIN) 46 /* ok */ 47# elif defined(__APPLE__) 48# define _CALL_DARWIN 49# elif defined(__ELF__) 50# define _CALL_SYSV 51# else 52# error "Unknown ABI" 53# endif 54#endif 55 56#if TCG_TARGET_REG_BITS == 64 57# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND 58# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL 59#else 60# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL 61# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF 62#endif 63#ifdef _CALL_SYSV 64# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN 65# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF 66#else 67# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL 68# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL 69#endif 70 71/* For some memory operations, we need a scratch that isn't R0. For the AIX 72 calling convention, we can re-use the TOC register since we'll be reloading 73 it at every call. Otherwise R12 will do nicely as neither a call-saved 74 register nor a parameter register. */ 75#ifdef _CALL_AIX 76# define TCG_REG_TMP1 TCG_REG_R2 77#else 78# define TCG_REG_TMP1 TCG_REG_R12 79#endif 80#define TCG_REG_TMP2 TCG_REG_R11 81 82#define TCG_VEC_TMP1 TCG_REG_V0 83#define TCG_VEC_TMP2 TCG_REG_V1 84 85#define TCG_REG_TB TCG_REG_R31 86#define USE_REG_TB (TCG_TARGET_REG_BITS == 64) 87 88/* Shorthand for size of a pointer. Avoid promotion to unsigned. */ 89#define SZP ((int)sizeof(void *)) 90 91/* Shorthand for size of a register. */ 92#define SZR (TCG_TARGET_REG_BITS / 8) 93 94#define TCG_CT_CONST_S16 0x100 95#define TCG_CT_CONST_S32 0x400 96#define TCG_CT_CONST_U32 0x800 97#define TCG_CT_CONST_ZERO 0x1000 98#define TCG_CT_CONST_MONE 0x2000 99#define TCG_CT_CONST_WSZ 0x4000 100 101#define ALL_GENERAL_REGS 0xffffffffu 102#define ALL_VECTOR_REGS 0xffffffff00000000ull 103 104TCGPowerISA have_isa; 105static bool have_isel; 106bool have_altivec; 107bool have_vsx; 108 109#ifndef CONFIG_SOFTMMU 110#define TCG_GUEST_BASE_REG 30 111#endif 112 113#ifdef CONFIG_DEBUG_TCG 114static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = { 115 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", 116 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", 117 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", 118 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", 119 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 120 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 121 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 122 "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", 123}; 124#endif 125 126static const int tcg_target_reg_alloc_order[] = { 127 TCG_REG_R14, /* call saved registers */ 128 TCG_REG_R15, 129 TCG_REG_R16, 130 TCG_REG_R17, 131 TCG_REG_R18, 132 TCG_REG_R19, 133 TCG_REG_R20, 134 TCG_REG_R21, 135 TCG_REG_R22, 136 TCG_REG_R23, 137 TCG_REG_R24, 138 TCG_REG_R25, 139 TCG_REG_R26, 140 TCG_REG_R27, 141 TCG_REG_R28, 142 TCG_REG_R29, 143 TCG_REG_R30, 144 TCG_REG_R31, 145 TCG_REG_R12, /* call clobbered, non-arguments */ 146 TCG_REG_R11, 147 TCG_REG_R2, 148 TCG_REG_R13, 149 TCG_REG_R10, /* call clobbered, arguments */ 150 TCG_REG_R9, 151 TCG_REG_R8, 152 TCG_REG_R7, 153 TCG_REG_R6, 154 TCG_REG_R5, 155 TCG_REG_R4, 156 TCG_REG_R3, 157 158 /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */ 159 TCG_REG_V2, /* call clobbered, vectors */ 160 TCG_REG_V3, 161 TCG_REG_V4, 162 TCG_REG_V5, 163 TCG_REG_V6, 164 TCG_REG_V7, 165 TCG_REG_V8, 166 TCG_REG_V9, 167 TCG_REG_V10, 168 TCG_REG_V11, 169 TCG_REG_V12, 170 TCG_REG_V13, 171 TCG_REG_V14, 172 TCG_REG_V15, 173 TCG_REG_V16, 174 TCG_REG_V17, 175 TCG_REG_V18, 176 TCG_REG_V19, 177}; 178 179static const int tcg_target_call_iarg_regs[] = { 180 TCG_REG_R3, 181 TCG_REG_R4, 182 TCG_REG_R5, 183 TCG_REG_R6, 184 TCG_REG_R7, 185 TCG_REG_R8, 186 TCG_REG_R9, 187 TCG_REG_R10 188}; 189 190static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) 191{ 192 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); 193 tcg_debug_assert(slot >= 0 && slot <= 1); 194 return TCG_REG_R3 + slot; 195} 196 197static const int tcg_target_callee_save_regs[] = { 198#ifdef _CALL_DARWIN 199 TCG_REG_R11, 200#endif 201 TCG_REG_R14, 202 TCG_REG_R15, 203 TCG_REG_R16, 204 TCG_REG_R17, 205 TCG_REG_R18, 206 TCG_REG_R19, 207 TCG_REG_R20, 208 TCG_REG_R21, 209 TCG_REG_R22, 210 TCG_REG_R23, 211 TCG_REG_R24, 212 TCG_REG_R25, 213 TCG_REG_R26, 214 TCG_REG_R27, /* currently used for the global env */ 215 TCG_REG_R28, 216 TCG_REG_R29, 217 TCG_REG_R30, 218 TCG_REG_R31 219}; 220 221static inline bool in_range_b(tcg_target_long target) 222{ 223 return target == sextract64(target, 0, 26); 224} 225 226static uint32_t reloc_pc24_val(const tcg_insn_unit *pc, 227 const tcg_insn_unit *target) 228{ 229 ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); 230 tcg_debug_assert(in_range_b(disp)); 231 return disp & 0x3fffffc; 232} 233 234static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 235{ 236 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 237 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); 238 239 if (in_range_b(disp)) { 240 *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc); 241 return true; 242 } 243 return false; 244} 245 246static uint16_t reloc_pc14_val(const tcg_insn_unit *pc, 247 const tcg_insn_unit *target) 248{ 249 ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); 250 tcg_debug_assert(disp == (int16_t) disp); 251 return disp & 0xfffc; 252} 253 254static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 255{ 256 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 257 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); 258 259 if (disp == (int16_t) disp) { 260 *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc); 261 return true; 262 } 263 return false; 264} 265 266/* test if a constant matches the constraint */ 267static bool tcg_target_const_match(int64_t val, TCGType type, int ct) 268{ 269 if (ct & TCG_CT_CONST) { 270 return 1; 271 } 272 273 /* The only 32-bit constraint we use aside from 274 TCG_CT_CONST is TCG_CT_CONST_S16. */ 275 if (type == TCG_TYPE_I32) { 276 val = (int32_t)val; 277 } 278 279 if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { 280 return 1; 281 } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { 282 return 1; 283 } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { 284 return 1; 285 } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 286 return 1; 287 } else if ((ct & TCG_CT_CONST_MONE) && val == -1) { 288 return 1; 289 } else if ((ct & TCG_CT_CONST_WSZ) 290 && val == (type == TCG_TYPE_I32 ? 32 : 64)) { 291 return 1; 292 } 293 return 0; 294} 295 296#define OPCD(opc) ((opc)<<26) 297#define XO19(opc) (OPCD(19)|((opc)<<1)) 298#define MD30(opc) (OPCD(30)|((opc)<<2)) 299#define MDS30(opc) (OPCD(30)|((opc)<<1)) 300#define XO31(opc) (OPCD(31)|((opc)<<1)) 301#define XO58(opc) (OPCD(58)|(opc)) 302#define XO62(opc) (OPCD(62)|(opc)) 303#define VX4(opc) (OPCD(4)|(opc)) 304 305#define B OPCD( 18) 306#define BC OPCD( 16) 307 308#define LBZ OPCD( 34) 309#define LHZ OPCD( 40) 310#define LHA OPCD( 42) 311#define LWZ OPCD( 32) 312#define LWZUX XO31( 55) 313#define LD XO58( 0) 314#define LDX XO31( 21) 315#define LDU XO58( 1) 316#define LDUX XO31( 53) 317#define LWA XO58( 2) 318#define LWAX XO31(341) 319#define LQ OPCD( 56) 320 321#define STB OPCD( 38) 322#define STH OPCD( 44) 323#define STW OPCD( 36) 324#define STD XO62( 0) 325#define STDU XO62( 1) 326#define STDX XO31(149) 327#define STQ XO62( 2) 328 329#define ADDIC OPCD( 12) 330#define ADDI OPCD( 14) 331#define ADDIS OPCD( 15) 332#define ORI OPCD( 24) 333#define ORIS OPCD( 25) 334#define XORI OPCD( 26) 335#define XORIS OPCD( 27) 336#define ANDI OPCD( 28) 337#define ANDIS OPCD( 29) 338#define MULLI OPCD( 7) 339#define CMPLI OPCD( 10) 340#define CMPI OPCD( 11) 341#define SUBFIC OPCD( 8) 342 343#define LWZU OPCD( 33) 344#define STWU OPCD( 37) 345 346#define RLWIMI OPCD( 20) 347#define RLWINM OPCD( 21) 348#define RLWNM OPCD( 23) 349 350#define RLDICL MD30( 0) 351#define RLDICR MD30( 1) 352#define RLDIMI MD30( 3) 353#define RLDCL MDS30( 8) 354 355#define BCLR XO19( 16) 356#define BCCTR XO19(528) 357#define CRAND XO19(257) 358#define CRANDC XO19(129) 359#define CRNAND XO19(225) 360#define CROR XO19(449) 361#define CRNOR XO19( 33) 362 363#define EXTSB XO31(954) 364#define EXTSH XO31(922) 365#define EXTSW XO31(986) 366#define ADD XO31(266) 367#define ADDE XO31(138) 368#define ADDME XO31(234) 369#define ADDZE XO31(202) 370#define ADDC XO31( 10) 371#define AND XO31( 28) 372#define SUBF XO31( 40) 373#define SUBFC XO31( 8) 374#define SUBFE XO31(136) 375#define SUBFME XO31(232) 376#define SUBFZE XO31(200) 377#define OR XO31(444) 378#define XOR XO31(316) 379#define MULLW XO31(235) 380#define MULHW XO31( 75) 381#define MULHWU XO31( 11) 382#define DIVW XO31(491) 383#define DIVWU XO31(459) 384#define MODSW XO31(779) 385#define MODUW XO31(267) 386#define CMP XO31( 0) 387#define CMPL XO31( 32) 388#define LHBRX XO31(790) 389#define LWBRX XO31(534) 390#define LDBRX XO31(532) 391#define STHBRX XO31(918) 392#define STWBRX XO31(662) 393#define STDBRX XO31(660) 394#define MFSPR XO31(339) 395#define MTSPR XO31(467) 396#define SRAWI XO31(824) 397#define NEG XO31(104) 398#define MFCR XO31( 19) 399#define MFOCRF (MFCR | (1u << 20)) 400#define NOR XO31(124) 401#define CNTLZW XO31( 26) 402#define CNTLZD XO31( 58) 403#define CNTTZW XO31(538) 404#define CNTTZD XO31(570) 405#define CNTPOPW XO31(378) 406#define CNTPOPD XO31(506) 407#define ANDC XO31( 60) 408#define ORC XO31(412) 409#define EQV XO31(284) 410#define NAND XO31(476) 411#define ISEL XO31( 15) 412 413#define MULLD XO31(233) 414#define MULHD XO31( 73) 415#define MULHDU XO31( 9) 416#define DIVD XO31(489) 417#define DIVDU XO31(457) 418#define MODSD XO31(777) 419#define MODUD XO31(265) 420 421#define LBZX XO31( 87) 422#define LHZX XO31(279) 423#define LHAX XO31(343) 424#define LWZX XO31( 23) 425#define STBX XO31(215) 426#define STHX XO31(407) 427#define STWX XO31(151) 428 429#define EIEIO XO31(854) 430#define HWSYNC XO31(598) 431#define LWSYNC (HWSYNC | (1u << 21)) 432 433#define SPR(a, b) ((((a)<<5)|(b))<<11) 434#define LR SPR(8, 0) 435#define CTR SPR(9, 0) 436 437#define SLW XO31( 24) 438#define SRW XO31(536) 439#define SRAW XO31(792) 440 441#define SLD XO31( 27) 442#define SRD XO31(539) 443#define SRAD XO31(794) 444#define SRADI XO31(413<<1) 445 446#define BRH XO31(219) 447#define BRW XO31(155) 448#define BRD XO31(187) 449 450#define TW XO31( 4) 451#define TRAP (TW | TO(31)) 452 453#define NOP ORI /* ori 0,0,0 */ 454 455#define LVX XO31(103) 456#define LVEBX XO31(7) 457#define LVEHX XO31(39) 458#define LVEWX XO31(71) 459#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */ 460#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */ 461#define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */ 462#define LXV (OPCD(61) | 8 | 1) /* v3.00, force tx=1 */ 463#define LXSD (OPCD(57) | 2) /* v3.00 */ 464#define LXVWSX (XO31(364) | 1) /* v3.00, force tx=1 */ 465 466#define STVX XO31(231) 467#define STVEWX XO31(199) 468#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */ 469#define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */ 470#define STXV (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */ 471#define STXSD (OPCD(61) | 2) /* v3.00 */ 472 473#define VADDSBS VX4(768) 474#define VADDUBS VX4(512) 475#define VADDUBM VX4(0) 476#define VADDSHS VX4(832) 477#define VADDUHS VX4(576) 478#define VADDUHM VX4(64) 479#define VADDSWS VX4(896) 480#define VADDUWS VX4(640) 481#define VADDUWM VX4(128) 482#define VADDUDM VX4(192) /* v2.07 */ 483 484#define VSUBSBS VX4(1792) 485#define VSUBUBS VX4(1536) 486#define VSUBUBM VX4(1024) 487#define VSUBSHS VX4(1856) 488#define VSUBUHS VX4(1600) 489#define VSUBUHM VX4(1088) 490#define VSUBSWS VX4(1920) 491#define VSUBUWS VX4(1664) 492#define VSUBUWM VX4(1152) 493#define VSUBUDM VX4(1216) /* v2.07 */ 494 495#define VNEGW (VX4(1538) | (6 << 16)) /* v3.00 */ 496#define VNEGD (VX4(1538) | (7 << 16)) /* v3.00 */ 497 498#define VMAXSB VX4(258) 499#define VMAXSH VX4(322) 500#define VMAXSW VX4(386) 501#define VMAXSD VX4(450) /* v2.07 */ 502#define VMAXUB VX4(2) 503#define VMAXUH VX4(66) 504#define VMAXUW VX4(130) 505#define VMAXUD VX4(194) /* v2.07 */ 506#define VMINSB VX4(770) 507#define VMINSH VX4(834) 508#define VMINSW VX4(898) 509#define VMINSD VX4(962) /* v2.07 */ 510#define VMINUB VX4(514) 511#define VMINUH VX4(578) 512#define VMINUW VX4(642) 513#define VMINUD VX4(706) /* v2.07 */ 514 515#define VCMPEQUB VX4(6) 516#define VCMPEQUH VX4(70) 517#define VCMPEQUW VX4(134) 518#define VCMPEQUD VX4(199) /* v2.07 */ 519#define VCMPGTSB VX4(774) 520#define VCMPGTSH VX4(838) 521#define VCMPGTSW VX4(902) 522#define VCMPGTSD VX4(967) /* v2.07 */ 523#define VCMPGTUB VX4(518) 524#define VCMPGTUH VX4(582) 525#define VCMPGTUW VX4(646) 526#define VCMPGTUD VX4(711) /* v2.07 */ 527#define VCMPNEB VX4(7) /* v3.00 */ 528#define VCMPNEH VX4(71) /* v3.00 */ 529#define VCMPNEW VX4(135) /* v3.00 */ 530 531#define VSLB VX4(260) 532#define VSLH VX4(324) 533#define VSLW VX4(388) 534#define VSLD VX4(1476) /* v2.07 */ 535#define VSRB VX4(516) 536#define VSRH VX4(580) 537#define VSRW VX4(644) 538#define VSRD VX4(1732) /* v2.07 */ 539#define VSRAB VX4(772) 540#define VSRAH VX4(836) 541#define VSRAW VX4(900) 542#define VSRAD VX4(964) /* v2.07 */ 543#define VRLB VX4(4) 544#define VRLH VX4(68) 545#define VRLW VX4(132) 546#define VRLD VX4(196) /* v2.07 */ 547 548#define VMULEUB VX4(520) 549#define VMULEUH VX4(584) 550#define VMULEUW VX4(648) /* v2.07 */ 551#define VMULOUB VX4(8) 552#define VMULOUH VX4(72) 553#define VMULOUW VX4(136) /* v2.07 */ 554#define VMULUWM VX4(137) /* v2.07 */ 555#define VMULLD VX4(457) /* v3.10 */ 556#define VMSUMUHM VX4(38) 557 558#define VMRGHB VX4(12) 559#define VMRGHH VX4(76) 560#define VMRGHW VX4(140) 561#define VMRGLB VX4(268) 562#define VMRGLH VX4(332) 563#define VMRGLW VX4(396) 564 565#define VPKUHUM VX4(14) 566#define VPKUWUM VX4(78) 567 568#define VAND VX4(1028) 569#define VANDC VX4(1092) 570#define VNOR VX4(1284) 571#define VOR VX4(1156) 572#define VXOR VX4(1220) 573#define VEQV VX4(1668) /* v2.07 */ 574#define VNAND VX4(1412) /* v2.07 */ 575#define VORC VX4(1348) /* v2.07 */ 576 577#define VSPLTB VX4(524) 578#define VSPLTH VX4(588) 579#define VSPLTW VX4(652) 580#define VSPLTISB VX4(780) 581#define VSPLTISH VX4(844) 582#define VSPLTISW VX4(908) 583 584#define VSLDOI VX4(44) 585 586#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */ 587#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */ 588#define XXSPLTIB (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */ 589 590#define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */ 591#define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */ 592#define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */ 593#define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */ 594#define MTVSRDD (XO31(435) | 1) /* v3.00, force tx=1 */ 595#define MTVSRWS (XO31(403) | 1) /* v3.00, force tx=1 */ 596 597#define RT(r) ((r)<<21) 598#define RS(r) ((r)<<21) 599#define RA(r) ((r)<<16) 600#define RB(r) ((r)<<11) 601#define TO(t) ((t)<<21) 602#define SH(s) ((s)<<11) 603#define MB(b) ((b)<<6) 604#define ME(e) ((e)<<1) 605#define BO(o) ((o)<<21) 606#define MB64(b) ((b)<<5) 607#define FXM(b) (1 << (19 - (b))) 608 609#define VRT(r) (((r) & 31) << 21) 610#define VRA(r) (((r) & 31) << 16) 611#define VRB(r) (((r) & 31) << 11) 612#define VRC(r) (((r) & 31) << 6) 613 614#define LK 1 615 616#define TAB(t, a, b) (RT(t) | RA(a) | RB(b)) 617#define SAB(s, a, b) (RS(s) | RA(a) | RB(b)) 618#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff)) 619#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff)) 620 621#define BF(n) ((n)<<23) 622#define BI(n, c) (((c)+((n)*4))<<16) 623#define BT(n, c) (((c)+((n)*4))<<21) 624#define BA(n, c) (((c)+((n)*4))<<16) 625#define BB(n, c) (((c)+((n)*4))<<11) 626#define BC_(n, c) (((c)+((n)*4))<<6) 627 628#define BO_COND_TRUE BO(12) 629#define BO_COND_FALSE BO( 4) 630#define BO_ALWAYS BO(20) 631 632enum { 633 CR_LT, 634 CR_GT, 635 CR_EQ, 636 CR_SO 637}; 638 639static const uint32_t tcg_to_bc[] = { 640 [TCG_COND_EQ] = BC | BI(7, CR_EQ) | BO_COND_TRUE, 641 [TCG_COND_NE] = BC | BI(7, CR_EQ) | BO_COND_FALSE, 642 [TCG_COND_LT] = BC | BI(7, CR_LT) | BO_COND_TRUE, 643 [TCG_COND_GE] = BC | BI(7, CR_LT) | BO_COND_FALSE, 644 [TCG_COND_LE] = BC | BI(7, CR_GT) | BO_COND_FALSE, 645 [TCG_COND_GT] = BC | BI(7, CR_GT) | BO_COND_TRUE, 646 [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE, 647 [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE, 648 [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE, 649 [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE, 650}; 651 652/* The low bit here is set if the RA and RB fields must be inverted. */ 653static const uint32_t tcg_to_isel[] = { 654 [TCG_COND_EQ] = ISEL | BC_(7, CR_EQ), 655 [TCG_COND_NE] = ISEL | BC_(7, CR_EQ) | 1, 656 [TCG_COND_LT] = ISEL | BC_(7, CR_LT), 657 [TCG_COND_GE] = ISEL | BC_(7, CR_LT) | 1, 658 [TCG_COND_LE] = ISEL | BC_(7, CR_GT) | 1, 659 [TCG_COND_GT] = ISEL | BC_(7, CR_GT), 660 [TCG_COND_LTU] = ISEL | BC_(7, CR_LT), 661 [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1, 662 [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1, 663 [TCG_COND_GTU] = ISEL | BC_(7, CR_GT), 664}; 665 666static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 667 intptr_t value, intptr_t addend) 668{ 669 const tcg_insn_unit *target; 670 int16_t lo; 671 int32_t hi; 672 673 value += addend; 674 target = (const tcg_insn_unit *)value; 675 676 switch (type) { 677 case R_PPC_REL14: 678 return reloc_pc14(code_ptr, target); 679 case R_PPC_REL24: 680 return reloc_pc24(code_ptr, target); 681 case R_PPC_ADDR16: 682 /* 683 * We are (slightly) abusing this relocation type. In particular, 684 * assert that the low 2 bits are zero, and do not modify them. 685 * That way we can use this with LD et al that have opcode bits 686 * in the low 2 bits of the insn. 687 */ 688 if ((value & 3) || value != (int16_t)value) { 689 return false; 690 } 691 *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc); 692 break; 693 case R_PPC_ADDR32: 694 /* 695 * We are abusing this relocation type. Again, this points to 696 * a pair of insns, lis + load. This is an absolute address 697 * relocation for PPC32 so the lis cannot be removed. 698 */ 699 lo = value; 700 hi = value - lo; 701 if (hi + lo != value) { 702 return false; 703 } 704 code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16); 705 code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo); 706 break; 707 default: 708 g_assert_not_reached(); 709 } 710 return true; 711} 712 713static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, 714 TCGReg base, tcg_target_long offset); 715 716static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 717{ 718 if (ret == arg) { 719 return true; 720 } 721 switch (type) { 722 case TCG_TYPE_I64: 723 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 724 /* fallthru */ 725 case TCG_TYPE_I32: 726 if (ret < TCG_REG_V0) { 727 if (arg < TCG_REG_V0) { 728 tcg_out32(s, OR | SAB(arg, ret, arg)); 729 break; 730 } else if (have_isa_2_07) { 731 tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD) 732 | VRT(arg) | RA(ret)); 733 break; 734 } else { 735 /* Altivec does not support vector->integer moves. */ 736 return false; 737 } 738 } else if (arg < TCG_REG_V0) { 739 if (have_isa_2_07) { 740 tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD) 741 | VRT(ret) | RA(arg)); 742 break; 743 } else { 744 /* Altivec does not support integer->vector moves. */ 745 return false; 746 } 747 } 748 /* fallthru */ 749 case TCG_TYPE_V64: 750 case TCG_TYPE_V128: 751 tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0); 752 tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg)); 753 break; 754 default: 755 g_assert_not_reached(); 756 } 757 return true; 758} 759 760static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs, 761 int sh, int mb) 762{ 763 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 764 sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1); 765 mb = MB64((mb >> 5) | ((mb << 1) & 0x3f)); 766 tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb); 767} 768 769static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs, 770 int sh, int mb, int me) 771{ 772 tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me)); 773} 774 775static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) 776{ 777 tcg_out32(s, EXTSB | RA(dst) | RS(src)); 778} 779 780static void tcg_out_ext8u(TCGContext *s, TCGReg dst, TCGReg src) 781{ 782 tcg_out32(s, ANDI | SAI(src, dst, 0xff)); 783} 784 785static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) 786{ 787 tcg_out32(s, EXTSH | RA(dst) | RS(src)); 788} 789 790static void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src) 791{ 792 tcg_out32(s, ANDI | SAI(src, dst, 0xffff)); 793} 794 795static void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src) 796{ 797 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 798 tcg_out32(s, EXTSW | RA(dst) | RS(src)); 799} 800 801static void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src) 802{ 803 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 804 tcg_out_rld(s, RLDICL, dst, src, 0, 32); 805} 806 807static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dst, TCGReg src) 808{ 809 tcg_out_ext32s(s, dst, src); 810} 811 812static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dst, TCGReg src) 813{ 814 tcg_out_ext32u(s, dst, src); 815} 816 817static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) 818{ 819 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 820 tcg_out_mov(s, TCG_TYPE_I32, rd, rn); 821} 822 823static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c) 824{ 825 tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c); 826} 827 828static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c) 829{ 830 tcg_out_rld(s, RLDICR, dst, src, c, 63 - c); 831} 832 833static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c) 834{ 835 /* Limit immediate shift count lest we create an illegal insn. */ 836 tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31)); 837} 838 839static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c) 840{ 841 tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31); 842} 843 844static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c) 845{ 846 tcg_out_rld(s, RLDICL, dst, src, 64 - c, c); 847} 848 849static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c) 850{ 851 tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2)); 852} 853 854static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags) 855{ 856 TCGReg tmp = dst == src ? TCG_REG_R0 : dst; 857 858 if (have_isa_3_10) { 859 tcg_out32(s, BRH | RA(dst) | RS(src)); 860 if (flags & TCG_BSWAP_OS) { 861 tcg_out_ext16s(s, TCG_TYPE_REG, dst, dst); 862 } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 863 tcg_out_ext16u(s, dst, dst); 864 } 865 return; 866 } 867 868 /* 869 * In the following, 870 * dep(a, b, m) -> (a & ~m) | (b & m) 871 * 872 * Begin with: src = xxxxabcd 873 */ 874 /* tmp = rol32(src, 24) & 0x000000ff = 0000000c */ 875 tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31); 876 /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00) = 000000dc */ 877 tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23); 878 879 if (flags & TCG_BSWAP_OS) { 880 tcg_out_ext16s(s, TCG_TYPE_REG, dst, tmp); 881 } else { 882 tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); 883 } 884} 885 886static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags) 887{ 888 TCGReg tmp = dst == src ? TCG_REG_R0 : dst; 889 890 if (have_isa_3_10) { 891 tcg_out32(s, BRW | RA(dst) | RS(src)); 892 if (flags & TCG_BSWAP_OS) { 893 tcg_out_ext32s(s, dst, dst); 894 } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 895 tcg_out_ext32u(s, dst, dst); 896 } 897 return; 898 } 899 900 /* 901 * Stolen from gcc's builtin_bswap32. 902 * In the following, 903 * dep(a, b, m) -> (a & ~m) | (b & m) 904 * 905 * Begin with: src = xxxxabcd 906 */ 907 /* tmp = rol32(src, 8) & 0xffffffff = 0000bcda */ 908 tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31); 909 /* tmp = dep(tmp, rol32(src, 24), 0xff000000) = 0000dcda */ 910 tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7); 911 /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00) = 0000dcba */ 912 tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23); 913 914 if (flags & TCG_BSWAP_OS) { 915 tcg_out_ext32s(s, dst, tmp); 916 } else { 917 tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); 918 } 919} 920 921static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src) 922{ 923 TCGReg t0 = dst == src ? TCG_REG_R0 : dst; 924 TCGReg t1 = dst == src ? dst : TCG_REG_R0; 925 926 if (have_isa_3_10) { 927 tcg_out32(s, BRD | RA(dst) | RS(src)); 928 return; 929 } 930 931 /* 932 * In the following, 933 * dep(a, b, m) -> (a & ~m) | (b & m) 934 * 935 * Begin with: src = abcdefgh 936 */ 937 /* t0 = rol32(src, 8) & 0xffffffff = 0000fghe */ 938 tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31); 939 /* t0 = dep(t0, rol32(src, 24), 0xff000000) = 0000hghe */ 940 tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7); 941 /* t0 = dep(t0, rol32(src, 24), 0x0000ff00) = 0000hgfe */ 942 tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23); 943 944 /* t0 = rol64(t0, 32) = hgfe0000 */ 945 tcg_out_rld(s, RLDICL, t0, t0, 32, 0); 946 /* t1 = rol64(src, 32) = efghabcd */ 947 tcg_out_rld(s, RLDICL, t1, src, 32, 0); 948 949 /* t0 = dep(t0, rol32(t1, 24), 0xffffffff) = hgfebcda */ 950 tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31); 951 /* t0 = dep(t0, rol32(t1, 24), 0xff000000) = hgfedcda */ 952 tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7); 953 /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00) = hgfedcba */ 954 tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23); 955 956 tcg_out_mov(s, TCG_TYPE_REG, dst, t0); 957} 958 959/* Emit a move into ret of arg, if it can be done in one insn. */ 960static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg) 961{ 962 if (arg == (int16_t)arg) { 963 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 964 return true; 965 } 966 if (arg == (int32_t)arg && (arg & 0xffff) == 0) { 967 tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); 968 return true; 969 } 970 return false; 971} 972 973static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, 974 tcg_target_long arg, bool in_prologue) 975{ 976 intptr_t tb_diff; 977 tcg_target_long tmp; 978 int shift; 979 980 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 981 982 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { 983 arg = (int32_t)arg; 984 } 985 986 /* Load 16-bit immediates with one insn. */ 987 if (tcg_out_movi_one(s, ret, arg)) { 988 return; 989 } 990 991 /* Load addresses within the TB with one insn. */ 992 tb_diff = tcg_tbrel_diff(s, (void *)arg); 993 if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) { 994 tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff)); 995 return; 996 } 997 998 /* Load 32-bit immediates with two insns. Note that we've already 999 eliminated bare ADDIS, so we know both insns are required. */ 1000 if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) { 1001 tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); 1002 tcg_out32(s, ORI | SAI(ret, ret, arg)); 1003 return; 1004 } 1005 if (arg == (uint32_t)arg && !(arg & 0x8000)) { 1006 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 1007 tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); 1008 return; 1009 } 1010 1011 /* Load masked 16-bit value. */ 1012 if (arg > 0 && (arg & 0x8000)) { 1013 tmp = arg | 0x7fff; 1014 if ((tmp & (tmp + 1)) == 0) { 1015 int mb = clz64(tmp + 1) + 1; 1016 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 1017 tcg_out_rld(s, RLDICL, ret, ret, 0, mb); 1018 return; 1019 } 1020 } 1021 1022 /* Load common masks with 2 insns. */ 1023 shift = ctz64(arg); 1024 tmp = arg >> shift; 1025 if (tmp == (int16_t)tmp) { 1026 tcg_out32(s, ADDI | TAI(ret, 0, tmp)); 1027 tcg_out_shli64(s, ret, ret, shift); 1028 return; 1029 } 1030 shift = clz64(arg); 1031 if (tcg_out_movi_one(s, ret, arg << shift)) { 1032 tcg_out_shri64(s, ret, ret, shift); 1033 return; 1034 } 1035 1036 /* Load addresses within 2GB of TB with 2 (or rarely 3) insns. */ 1037 if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) { 1038 tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff); 1039 return; 1040 } 1041 1042 /* Use the constant pool, if possible. */ 1043 if (!in_prologue && USE_REG_TB) { 1044 new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr, 1045 tcg_tbrel_diff(s, NULL)); 1046 tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0)); 1047 return; 1048 } 1049 1050 tmp = arg >> 31 >> 1; 1051 tcg_out_movi(s, TCG_TYPE_I32, ret, tmp); 1052 if (tmp) { 1053 tcg_out_shli64(s, ret, ret, 32); 1054 } 1055 if (arg & 0xffff0000) { 1056 tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); 1057 } 1058 if (arg & 0xffff) { 1059 tcg_out32(s, ORI | SAI(ret, ret, arg)); 1060 } 1061} 1062 1063static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 1064 TCGReg ret, int64_t val) 1065{ 1066 uint32_t load_insn; 1067 int rel, low; 1068 intptr_t add; 1069 1070 switch (vece) { 1071 case MO_8: 1072 low = (int8_t)val; 1073 if (low >= -16 && low < 16) { 1074 tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16)); 1075 return; 1076 } 1077 if (have_isa_3_00) { 1078 tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11)); 1079 return; 1080 } 1081 break; 1082 1083 case MO_16: 1084 low = (int16_t)val; 1085 if (low >= -16 && low < 16) { 1086 tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16)); 1087 return; 1088 } 1089 break; 1090 1091 case MO_32: 1092 low = (int32_t)val; 1093 if (low >= -16 && low < 16) { 1094 tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16)); 1095 return; 1096 } 1097 break; 1098 } 1099 1100 /* 1101 * Otherwise we must load the value from the constant pool. 1102 */ 1103 if (USE_REG_TB) { 1104 rel = R_PPC_ADDR16; 1105 add = tcg_tbrel_diff(s, NULL); 1106 } else { 1107 rel = R_PPC_ADDR32; 1108 add = 0; 1109 } 1110 1111 if (have_vsx) { 1112 load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX; 1113 load_insn |= VRT(ret) | RB(TCG_REG_TMP1); 1114 if (TCG_TARGET_REG_BITS == 64) { 1115 new_pool_label(s, val, rel, s->code_ptr, add); 1116 } else { 1117 new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val); 1118 } 1119 } else { 1120 load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); 1121 if (TCG_TARGET_REG_BITS == 64) { 1122 new_pool_l2(s, rel, s->code_ptr, add, val, val); 1123 } else { 1124 new_pool_l4(s, rel, s->code_ptr, add, 1125 val >> 32, val, val >> 32, val); 1126 } 1127 } 1128 1129 if (USE_REG_TB) { 1130 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0)); 1131 load_insn |= RA(TCG_REG_TB); 1132 } else { 1133 tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0)); 1134 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0)); 1135 } 1136 tcg_out32(s, load_insn); 1137} 1138 1139static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, 1140 tcg_target_long arg) 1141{ 1142 switch (type) { 1143 case TCG_TYPE_I32: 1144 case TCG_TYPE_I64: 1145 tcg_debug_assert(ret < TCG_REG_V0); 1146 tcg_out_movi_int(s, type, ret, arg, false); 1147 break; 1148 1149 default: 1150 g_assert_not_reached(); 1151 } 1152} 1153 1154static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) 1155{ 1156 return false; 1157} 1158 1159static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, 1160 tcg_target_long imm) 1161{ 1162 /* This function is only used for passing structs by reference. */ 1163 g_assert_not_reached(); 1164} 1165 1166static bool mask_operand(uint32_t c, int *mb, int *me) 1167{ 1168 uint32_t lsb, test; 1169 1170 /* Accept a bit pattern like: 1171 0....01....1 1172 1....10....0 1173 0..01..10..0 1174 Keep track of the transitions. */ 1175 if (c == 0 || c == -1) { 1176 return false; 1177 } 1178 test = c; 1179 lsb = test & -test; 1180 test += lsb; 1181 if (test & (test - 1)) { 1182 return false; 1183 } 1184 1185 *me = clz32(lsb); 1186 *mb = test ? clz32(test & -test) + 1 : 0; 1187 return true; 1188} 1189 1190static bool mask64_operand(uint64_t c, int *mb, int *me) 1191{ 1192 uint64_t lsb; 1193 1194 if (c == 0) { 1195 return false; 1196 } 1197 1198 lsb = c & -c; 1199 /* Accept 1..10..0. */ 1200 if (c == -lsb) { 1201 *mb = 0; 1202 *me = clz64(lsb); 1203 return true; 1204 } 1205 /* Accept 0..01..1. */ 1206 if (lsb == 1 && (c & (c + 1)) == 0) { 1207 *mb = clz64(c + 1) + 1; 1208 *me = 63; 1209 return true; 1210 } 1211 return false; 1212} 1213 1214static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1215{ 1216 int mb, me; 1217 1218 if (mask_operand(c, &mb, &me)) { 1219 tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); 1220 } else if ((c & 0xffff) == c) { 1221 tcg_out32(s, ANDI | SAI(src, dst, c)); 1222 return; 1223 } else if ((c & 0xffff0000) == c) { 1224 tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); 1225 return; 1226 } else { 1227 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c); 1228 tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); 1229 } 1230} 1231 1232static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c) 1233{ 1234 int mb, me; 1235 1236 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1237 if (mask64_operand(c, &mb, &me)) { 1238 if (mb == 0) { 1239 tcg_out_rld(s, RLDICR, dst, src, 0, me); 1240 } else { 1241 tcg_out_rld(s, RLDICL, dst, src, 0, mb); 1242 } 1243 } else if ((c & 0xffff) == c) { 1244 tcg_out32(s, ANDI | SAI(src, dst, c)); 1245 return; 1246 } else if ((c & 0xffff0000) == c) { 1247 tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); 1248 return; 1249 } else { 1250 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c); 1251 tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); 1252 } 1253} 1254 1255static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c, 1256 int op_lo, int op_hi) 1257{ 1258 if (c >> 16) { 1259 tcg_out32(s, op_hi | SAI(src, dst, c >> 16)); 1260 src = dst; 1261 } 1262 if (c & 0xffff) { 1263 tcg_out32(s, op_lo | SAI(src, dst, c)); 1264 src = dst; 1265 } 1266} 1267 1268static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1269{ 1270 tcg_out_zori32(s, dst, src, c, ORI, ORIS); 1271} 1272 1273static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1274{ 1275 tcg_out_zori32(s, dst, src, c, XORI, XORIS); 1276} 1277 1278static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target) 1279{ 1280 ptrdiff_t disp = tcg_pcrel_diff(s, target); 1281 if (in_range_b(disp)) { 1282 tcg_out32(s, B | (disp & 0x3fffffc) | mask); 1283 } else { 1284 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target); 1285 tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR); 1286 tcg_out32(s, BCCTR | BO_ALWAYS | mask); 1287 } 1288} 1289 1290static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, 1291 TCGReg base, tcg_target_long offset) 1292{ 1293 tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; 1294 bool is_int_store = false; 1295 TCGReg rs = TCG_REG_TMP1; 1296 1297 switch (opi) { 1298 case LD: case LWA: 1299 align = 3; 1300 /* FALLTHRU */ 1301 default: 1302 if (rt > TCG_REG_R0 && rt < TCG_REG_V0) { 1303 rs = rt; 1304 break; 1305 } 1306 break; 1307 case LXSD: 1308 case STXSD: 1309 align = 3; 1310 break; 1311 case LXV: 1312 case STXV: 1313 align = 15; 1314 break; 1315 case STD: 1316 align = 3; 1317 /* FALLTHRU */ 1318 case STB: case STH: case STW: 1319 is_int_store = true; 1320 break; 1321 } 1322 1323 /* For unaligned, or very large offsets, use the indexed form. */ 1324 if (offset & align || offset != (int32_t)offset || opi == 0) { 1325 if (rs == base) { 1326 rs = TCG_REG_R0; 1327 } 1328 tcg_debug_assert(!is_int_store || rs != rt); 1329 tcg_out_movi(s, TCG_TYPE_PTR, rs, orig); 1330 tcg_out32(s, opx | TAB(rt & 31, base, rs)); 1331 return; 1332 } 1333 1334 l0 = (int16_t)offset; 1335 offset = (offset - l0) >> 16; 1336 l1 = (int16_t)offset; 1337 1338 if (l1 < 0 && orig >= 0) { 1339 extra = 0x4000; 1340 l1 = (int16_t)(offset - 0x4000); 1341 } 1342 if (l1) { 1343 tcg_out32(s, ADDIS | TAI(rs, base, l1)); 1344 base = rs; 1345 } 1346 if (extra) { 1347 tcg_out32(s, ADDIS | TAI(rs, base, extra)); 1348 base = rs; 1349 } 1350 if (opi != ADDI || base != rt || l0 != 0) { 1351 tcg_out32(s, opi | TAI(rt & 31, base, l0)); 1352 } 1353} 1354 1355static void tcg_out_vsldoi(TCGContext *s, TCGReg ret, 1356 TCGReg va, TCGReg vb, int shb) 1357{ 1358 tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6)); 1359} 1360 1361static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1362 TCGReg base, intptr_t offset) 1363{ 1364 int shift; 1365 1366 switch (type) { 1367 case TCG_TYPE_I32: 1368 if (ret < TCG_REG_V0) { 1369 tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset); 1370 break; 1371 } 1372 if (have_isa_2_07 && have_vsx) { 1373 tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset); 1374 break; 1375 } 1376 tcg_debug_assert((offset & 3) == 0); 1377 tcg_out_mem_long(s, 0, LVEWX, ret, base, offset); 1378 shift = (offset - 4) & 0xc; 1379 if (shift) { 1380 tcg_out_vsldoi(s, ret, ret, ret, shift); 1381 } 1382 break; 1383 case TCG_TYPE_I64: 1384 if (ret < TCG_REG_V0) { 1385 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1386 tcg_out_mem_long(s, LD, LDX, ret, base, offset); 1387 break; 1388 } 1389 /* fallthru */ 1390 case TCG_TYPE_V64: 1391 tcg_debug_assert(ret >= TCG_REG_V0); 1392 if (have_vsx) { 1393 tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX, 1394 ret, base, offset); 1395 break; 1396 } 1397 tcg_debug_assert((offset & 7) == 0); 1398 tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16); 1399 if (offset & 8) { 1400 tcg_out_vsldoi(s, ret, ret, ret, 8); 1401 } 1402 break; 1403 case TCG_TYPE_V128: 1404 tcg_debug_assert(ret >= TCG_REG_V0); 1405 tcg_debug_assert((offset & 15) == 0); 1406 tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0, 1407 LVX, ret, base, offset); 1408 break; 1409 default: 1410 g_assert_not_reached(); 1411 } 1412} 1413 1414static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, 1415 TCGReg base, intptr_t offset) 1416{ 1417 int shift; 1418 1419 switch (type) { 1420 case TCG_TYPE_I32: 1421 if (arg < TCG_REG_V0) { 1422 tcg_out_mem_long(s, STW, STWX, arg, base, offset); 1423 break; 1424 } 1425 if (have_isa_2_07 && have_vsx) { 1426 tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset); 1427 break; 1428 } 1429 assert((offset & 3) == 0); 1430 tcg_debug_assert((offset & 3) == 0); 1431 shift = (offset - 4) & 0xc; 1432 if (shift) { 1433 tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift); 1434 arg = TCG_VEC_TMP1; 1435 } 1436 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); 1437 break; 1438 case TCG_TYPE_I64: 1439 if (arg < TCG_REG_V0) { 1440 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1441 tcg_out_mem_long(s, STD, STDX, arg, base, offset); 1442 break; 1443 } 1444 /* fallthru */ 1445 case TCG_TYPE_V64: 1446 tcg_debug_assert(arg >= TCG_REG_V0); 1447 if (have_vsx) { 1448 tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0, 1449 STXSDX, arg, base, offset); 1450 break; 1451 } 1452 tcg_debug_assert((offset & 7) == 0); 1453 if (offset & 8) { 1454 tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8); 1455 arg = TCG_VEC_TMP1; 1456 } 1457 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); 1458 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4); 1459 break; 1460 case TCG_TYPE_V128: 1461 tcg_debug_assert(arg >= TCG_REG_V0); 1462 tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0, 1463 STVX, arg, base, offset); 1464 break; 1465 default: 1466 g_assert_not_reached(); 1467 } 1468} 1469 1470static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1471 TCGReg base, intptr_t ofs) 1472{ 1473 return false; 1474} 1475 1476static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, 1477 int const_arg2, int cr, TCGType type) 1478{ 1479 int imm; 1480 uint32_t op; 1481 1482 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1483 1484 /* Simplify the comparisons below wrt CMPI. */ 1485 if (type == TCG_TYPE_I32) { 1486 arg2 = (int32_t)arg2; 1487 } 1488 1489 switch (cond) { 1490 case TCG_COND_EQ: 1491 case TCG_COND_NE: 1492 if (const_arg2) { 1493 if ((int16_t) arg2 == arg2) { 1494 op = CMPI; 1495 imm = 1; 1496 break; 1497 } else if ((uint16_t) arg2 == arg2) { 1498 op = CMPLI; 1499 imm = 1; 1500 break; 1501 } 1502 } 1503 op = CMPL; 1504 imm = 0; 1505 break; 1506 1507 case TCG_COND_LT: 1508 case TCG_COND_GE: 1509 case TCG_COND_LE: 1510 case TCG_COND_GT: 1511 if (const_arg2) { 1512 if ((int16_t) arg2 == arg2) { 1513 op = CMPI; 1514 imm = 1; 1515 break; 1516 } 1517 } 1518 op = CMP; 1519 imm = 0; 1520 break; 1521 1522 case TCG_COND_LTU: 1523 case TCG_COND_GEU: 1524 case TCG_COND_LEU: 1525 case TCG_COND_GTU: 1526 if (const_arg2) { 1527 if ((uint16_t) arg2 == arg2) { 1528 op = CMPLI; 1529 imm = 1; 1530 break; 1531 } 1532 } 1533 op = CMPL; 1534 imm = 0; 1535 break; 1536 1537 default: 1538 g_assert_not_reached(); 1539 } 1540 op |= BF(cr) | ((type == TCG_TYPE_I64) << 21); 1541 1542 if (imm) { 1543 tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff)); 1544 } else { 1545 if (const_arg2) { 1546 tcg_out_movi(s, type, TCG_REG_R0, arg2); 1547 arg2 = TCG_REG_R0; 1548 } 1549 tcg_out32(s, op | RA(arg1) | RB(arg2)); 1550 } 1551} 1552 1553static void tcg_out_setcond_eq0(TCGContext *s, TCGType type, 1554 TCGReg dst, TCGReg src) 1555{ 1556 if (type == TCG_TYPE_I32) { 1557 tcg_out32(s, CNTLZW | RS(src) | RA(dst)); 1558 tcg_out_shri32(s, dst, dst, 5); 1559 } else { 1560 tcg_out32(s, CNTLZD | RS(src) | RA(dst)); 1561 tcg_out_shri64(s, dst, dst, 6); 1562 } 1563} 1564 1565static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src) 1566{ 1567 /* X != 0 implies X + -1 generates a carry. Extra addition 1568 trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. */ 1569 if (dst != src) { 1570 tcg_out32(s, ADDIC | TAI(dst, src, -1)); 1571 tcg_out32(s, SUBFE | TAB(dst, dst, src)); 1572 } else { 1573 tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); 1574 tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src)); 1575 } 1576} 1577 1578static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2, 1579 bool const_arg2) 1580{ 1581 if (const_arg2) { 1582 if ((uint32_t)arg2 == arg2) { 1583 tcg_out_xori32(s, TCG_REG_R0, arg1, arg2); 1584 } else { 1585 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2); 1586 tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0)); 1587 } 1588 } else { 1589 tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2)); 1590 } 1591 return TCG_REG_R0; 1592} 1593 1594static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, 1595 TCGArg arg0, TCGArg arg1, TCGArg arg2, 1596 int const_arg2) 1597{ 1598 int crop, sh; 1599 1600 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1601 1602 /* Ignore high bits of a potential constant arg2. */ 1603 if (type == TCG_TYPE_I32) { 1604 arg2 = (uint32_t)arg2; 1605 } 1606 1607 /* Handle common and trivial cases before handling anything else. */ 1608 if (arg2 == 0) { 1609 switch (cond) { 1610 case TCG_COND_EQ: 1611 tcg_out_setcond_eq0(s, type, arg0, arg1); 1612 return; 1613 case TCG_COND_NE: 1614 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { 1615 tcg_out_ext32u(s, TCG_REG_R0, arg1); 1616 arg1 = TCG_REG_R0; 1617 } 1618 tcg_out_setcond_ne0(s, arg0, arg1); 1619 return; 1620 case TCG_COND_GE: 1621 tcg_out32(s, NOR | SAB(arg1, arg0, arg1)); 1622 arg1 = arg0; 1623 /* FALLTHRU */ 1624 case TCG_COND_LT: 1625 /* Extract the sign bit. */ 1626 if (type == TCG_TYPE_I32) { 1627 tcg_out_shri32(s, arg0, arg1, 31); 1628 } else { 1629 tcg_out_shri64(s, arg0, arg1, 63); 1630 } 1631 return; 1632 default: 1633 break; 1634 } 1635 } 1636 1637 /* If we have ISEL, we can implement everything with 3 or 4 insns. 1638 All other cases below are also at least 3 insns, so speed up the 1639 code generator by not considering them and always using ISEL. */ 1640 if (have_isel) { 1641 int isel, tab; 1642 1643 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); 1644 1645 isel = tcg_to_isel[cond]; 1646 1647 tcg_out_movi(s, type, arg0, 1); 1648 if (isel & 1) { 1649 /* arg0 = (bc ? 0 : 1) */ 1650 tab = TAB(arg0, 0, arg0); 1651 isel &= ~1; 1652 } else { 1653 /* arg0 = (bc ? 1 : 0) */ 1654 tcg_out_movi(s, type, TCG_REG_R0, 0); 1655 tab = TAB(arg0, arg0, TCG_REG_R0); 1656 } 1657 tcg_out32(s, isel | tab); 1658 return; 1659 } 1660 1661 switch (cond) { 1662 case TCG_COND_EQ: 1663 arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); 1664 tcg_out_setcond_eq0(s, type, arg0, arg1); 1665 return; 1666 1667 case TCG_COND_NE: 1668 arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); 1669 /* Discard the high bits only once, rather than both inputs. */ 1670 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { 1671 tcg_out_ext32u(s, TCG_REG_R0, arg1); 1672 arg1 = TCG_REG_R0; 1673 } 1674 tcg_out_setcond_ne0(s, arg0, arg1); 1675 return; 1676 1677 case TCG_COND_GT: 1678 case TCG_COND_GTU: 1679 sh = 30; 1680 crop = 0; 1681 goto crtest; 1682 1683 case TCG_COND_LT: 1684 case TCG_COND_LTU: 1685 sh = 29; 1686 crop = 0; 1687 goto crtest; 1688 1689 case TCG_COND_GE: 1690 case TCG_COND_GEU: 1691 sh = 31; 1692 crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT); 1693 goto crtest; 1694 1695 case TCG_COND_LE: 1696 case TCG_COND_LEU: 1697 sh = 31; 1698 crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT); 1699 crtest: 1700 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); 1701 if (crop) { 1702 tcg_out32(s, crop); 1703 } 1704 tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); 1705 tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31); 1706 break; 1707 1708 default: 1709 g_assert_not_reached(); 1710 } 1711} 1712 1713static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l) 1714{ 1715 if (l->has_value) { 1716 bc |= reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr); 1717 } else { 1718 tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0); 1719 } 1720 tcg_out32(s, bc); 1721} 1722 1723static void tcg_out_brcond(TCGContext *s, TCGCond cond, 1724 TCGArg arg1, TCGArg arg2, int const_arg2, 1725 TCGLabel *l, TCGType type) 1726{ 1727 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); 1728 tcg_out_bc(s, tcg_to_bc[cond], l); 1729} 1730 1731static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, 1732 TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1, 1733 TCGArg v2, bool const_c2) 1734{ 1735 /* If for some reason both inputs are zero, don't produce bad code. */ 1736 if (v1 == 0 && v2 == 0) { 1737 tcg_out_movi(s, type, dest, 0); 1738 return; 1739 } 1740 1741 tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type); 1742 1743 if (have_isel) { 1744 int isel = tcg_to_isel[cond]; 1745 1746 /* Swap the V operands if the operation indicates inversion. */ 1747 if (isel & 1) { 1748 int t = v1; 1749 v1 = v2; 1750 v2 = t; 1751 isel &= ~1; 1752 } 1753 /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand. */ 1754 if (v2 == 0) { 1755 tcg_out_movi(s, type, TCG_REG_R0, 0); 1756 } 1757 tcg_out32(s, isel | TAB(dest, v1, v2)); 1758 } else { 1759 if (dest == v2) { 1760 cond = tcg_invert_cond(cond); 1761 v2 = v1; 1762 } else if (dest != v1) { 1763 if (v1 == 0) { 1764 tcg_out_movi(s, type, dest, 0); 1765 } else { 1766 tcg_out_mov(s, type, dest, v1); 1767 } 1768 } 1769 /* Branch forward over one insn */ 1770 tcg_out32(s, tcg_to_bc[cond] | 8); 1771 if (v2 == 0) { 1772 tcg_out_movi(s, type, dest, 0); 1773 } else { 1774 tcg_out_mov(s, type, dest, v2); 1775 } 1776 } 1777} 1778 1779static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc, 1780 TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2) 1781{ 1782 if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) { 1783 tcg_out32(s, opc | RA(a0) | RS(a1)); 1784 } else { 1785 tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type); 1786 /* Note that the only other valid constant for a2 is 0. */ 1787 if (have_isel) { 1788 tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1)); 1789 tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0)); 1790 } else if (!const_a2 && a0 == a2) { 1791 tcg_out32(s, tcg_to_bc[TCG_COND_EQ] | 8); 1792 tcg_out32(s, opc | RA(a0) | RS(a1)); 1793 } else { 1794 tcg_out32(s, opc | RA(a0) | RS(a1)); 1795 tcg_out32(s, tcg_to_bc[TCG_COND_NE] | 8); 1796 if (const_a2) { 1797 tcg_out_movi(s, type, a0, 0); 1798 } else { 1799 tcg_out_mov(s, type, a0, a2); 1800 } 1801 } 1802 } 1803} 1804 1805static void tcg_out_cmp2(TCGContext *s, const TCGArg *args, 1806 const int *const_args) 1807{ 1808 static const struct { uint8_t bit1, bit2; } bits[] = { 1809 [TCG_COND_LT ] = { CR_LT, CR_LT }, 1810 [TCG_COND_LE ] = { CR_LT, CR_GT }, 1811 [TCG_COND_GT ] = { CR_GT, CR_GT }, 1812 [TCG_COND_GE ] = { CR_GT, CR_LT }, 1813 [TCG_COND_LTU] = { CR_LT, CR_LT }, 1814 [TCG_COND_LEU] = { CR_LT, CR_GT }, 1815 [TCG_COND_GTU] = { CR_GT, CR_GT }, 1816 [TCG_COND_GEU] = { CR_GT, CR_LT }, 1817 }; 1818 1819 TCGCond cond = args[4], cond2; 1820 TCGArg al, ah, bl, bh; 1821 int blconst, bhconst; 1822 int op, bit1, bit2; 1823 1824 al = args[0]; 1825 ah = args[1]; 1826 bl = args[2]; 1827 bh = args[3]; 1828 blconst = const_args[2]; 1829 bhconst = const_args[3]; 1830 1831 switch (cond) { 1832 case TCG_COND_EQ: 1833 op = CRAND; 1834 goto do_equality; 1835 case TCG_COND_NE: 1836 op = CRNAND; 1837 do_equality: 1838 tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32); 1839 tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32); 1840 tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); 1841 break; 1842 1843 case TCG_COND_LT: 1844 case TCG_COND_LE: 1845 case TCG_COND_GT: 1846 case TCG_COND_GE: 1847 case TCG_COND_LTU: 1848 case TCG_COND_LEU: 1849 case TCG_COND_GTU: 1850 case TCG_COND_GEU: 1851 bit1 = bits[cond].bit1; 1852 bit2 = bits[cond].bit2; 1853 op = (bit1 != bit2 ? CRANDC : CRAND); 1854 cond2 = tcg_unsigned_cond(cond); 1855 1856 tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32); 1857 tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32); 1858 tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2)); 1859 tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ)); 1860 break; 1861 1862 default: 1863 g_assert_not_reached(); 1864 } 1865} 1866 1867static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, 1868 const int *const_args) 1869{ 1870 tcg_out_cmp2(s, args + 1, const_args + 1); 1871 tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); 1872 tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31); 1873} 1874 1875static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args, 1876 const int *const_args) 1877{ 1878 tcg_out_cmp2(s, args, const_args); 1879 tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5])); 1880} 1881 1882static void tcg_out_mb(TCGContext *s, TCGArg a0) 1883{ 1884 uint32_t insn; 1885 1886 if (a0 & TCG_MO_ST_LD) { 1887 insn = HWSYNC; 1888 } else { 1889 insn = LWSYNC; 1890 } 1891 1892 tcg_out32(s, insn); 1893} 1894 1895static void tcg_out_call_int(TCGContext *s, int lk, 1896 const tcg_insn_unit *target) 1897{ 1898#ifdef _CALL_AIX 1899 /* Look through the descriptor. If the branch is in range, and we 1900 don't have to spend too much effort on building the toc. */ 1901 const void *tgt = ((const void * const *)target)[0]; 1902 uintptr_t toc = ((const uintptr_t *)target)[1]; 1903 intptr_t diff = tcg_pcrel_diff(s, tgt); 1904 1905 if (in_range_b(diff) && toc == (uint32_t)toc) { 1906 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc); 1907 tcg_out_b(s, lk, tgt); 1908 } else { 1909 /* Fold the low bits of the constant into the addresses below. */ 1910 intptr_t arg = (intptr_t)target; 1911 int ofs = (int16_t)arg; 1912 1913 if (ofs + 8 < 0x8000) { 1914 arg -= ofs; 1915 } else { 1916 ofs = 0; 1917 } 1918 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg); 1919 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs); 1920 tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR); 1921 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP); 1922 tcg_out32(s, BCCTR | BO_ALWAYS | lk); 1923 } 1924#elif defined(_CALL_ELF) && _CALL_ELF == 2 1925 intptr_t diff; 1926 1927 /* In the ELFv2 ABI, we have to set up r12 to contain the destination 1928 address, which the callee uses to compute its TOC address. */ 1929 /* FIXME: when the branch is in range, we could avoid r12 load if we 1930 knew that the destination uses the same TOC, and what its local 1931 entry point offset is. */ 1932 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target); 1933 1934 diff = tcg_pcrel_diff(s, target); 1935 if (in_range_b(diff)) { 1936 tcg_out_b(s, lk, target); 1937 } else { 1938 tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR); 1939 tcg_out32(s, BCCTR | BO_ALWAYS | lk); 1940 } 1941#else 1942 tcg_out_b(s, lk, target); 1943#endif 1944} 1945 1946static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 1947 const TCGHelperInfo *info) 1948{ 1949 tcg_out_call_int(s, LK, target); 1950} 1951 1952static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = { 1953 [MO_UB] = LBZX, 1954 [MO_UW] = LHZX, 1955 [MO_UL] = LWZX, 1956 [MO_UQ] = LDX, 1957 [MO_SW] = LHAX, 1958 [MO_SL] = LWAX, 1959 [MO_BSWAP | MO_UB] = LBZX, 1960 [MO_BSWAP | MO_UW] = LHBRX, 1961 [MO_BSWAP | MO_UL] = LWBRX, 1962 [MO_BSWAP | MO_UQ] = LDBRX, 1963}; 1964 1965static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = { 1966 [MO_UB] = STBX, 1967 [MO_UW] = STHX, 1968 [MO_UL] = STWX, 1969 [MO_UQ] = STDX, 1970 [MO_BSWAP | MO_UB] = STBX, 1971 [MO_BSWAP | MO_UW] = STHBRX, 1972 [MO_BSWAP | MO_UL] = STWBRX, 1973 [MO_BSWAP | MO_UQ] = STDBRX, 1974}; 1975 1976static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg) 1977{ 1978 if (arg < 0) { 1979 arg = TCG_REG_TMP1; 1980 } 1981 tcg_out32(s, MFSPR | RT(arg) | LR); 1982 return arg; 1983} 1984 1985/* 1986 * For the purposes of ppc32 sorting 4 input registers into 4 argument 1987 * registers, there is an outside chance we would require 3 temps. 1988 */ 1989static const TCGLdstHelperParam ldst_helper_param = { 1990 .ra_gen = ldst_ra_gen, 1991 .ntmp = 3, 1992 .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 } 1993}; 1994 1995static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1996{ 1997 MemOp opc = get_memop(lb->oi); 1998 1999 if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 2000 return false; 2001 } 2002 2003 tcg_out_ld_helper_args(s, lb, &ldst_helper_param); 2004 tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]); 2005 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); 2006 2007 tcg_out_b(s, 0, lb->raddr); 2008 return true; 2009} 2010 2011static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 2012{ 2013 MemOp opc = get_memop(lb->oi); 2014 2015 if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 2016 return false; 2017 } 2018 2019 tcg_out_st_helper_args(s, lb, &ldst_helper_param); 2020 tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]); 2021 2022 tcg_out_b(s, 0, lb->raddr); 2023 return true; 2024} 2025 2026typedef struct { 2027 TCGReg base; 2028 TCGReg index; 2029 TCGAtomAlign aa; 2030} HostAddress; 2031 2032bool tcg_target_has_memory_bswap(MemOp memop) 2033{ 2034 TCGAtomAlign aa; 2035 2036 if ((memop & MO_SIZE) <= MO_64) { 2037 return true; 2038 } 2039 2040 /* 2041 * Reject 16-byte memop with 16-byte atomicity, 2042 * but do allow a pair of 64-bit operations. 2043 */ 2044 aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true); 2045 return aa.atom <= MO_64; 2046} 2047 2048/* We expect to use a 16-bit negative offset from ENV. */ 2049#define MIN_TLB_MASK_TABLE_OFS -32768 2050 2051/* 2052 * For softmmu, perform the TLB load and compare. 2053 * For useronly, perform any required alignment tests. 2054 * In both cases, return a TCGLabelQemuLdst structure if the slow path 2055 * is required and fill in @h with the host address for the fast path. 2056 */ 2057static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, 2058 TCGReg addrlo, TCGReg addrhi, 2059 MemOpIdx oi, bool is_ld) 2060{ 2061 TCGType addr_type = s->addr_type; 2062 TCGLabelQemuLdst *ldst = NULL; 2063 MemOp opc = get_memop(oi); 2064 MemOp a_bits, s_bits; 2065 2066 /* 2067 * Book II, Section 1.4, Single-Copy Atomicity, specifies: 2068 * 2069 * Before 3.0, "An access that is not atomic is performed as a set of 2070 * smaller disjoint atomic accesses. In general, the number and alignment 2071 * of these accesses are implementation-dependent." Thus MO_ATOM_IFALIGN. 2072 * 2073 * As of 3.0, "the non-atomic access is performed as described in 2074 * the corresponding list", which matches MO_ATOM_SUBALIGN. 2075 */ 2076 s_bits = opc & MO_SIZE; 2077 h->aa = atom_and_align_for_opc(s, opc, 2078 have_isa_3_00 ? MO_ATOM_SUBALIGN 2079 : MO_ATOM_IFALIGN, 2080 s_bits == MO_128); 2081 a_bits = h->aa.align; 2082 2083#ifdef CONFIG_SOFTMMU 2084 int mem_index = get_mmuidx(oi); 2085 int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read) 2086 : offsetof(CPUTLBEntry, addr_write); 2087 int fast_off = tlb_mask_table_ofs(s, mem_index); 2088 int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); 2089 int table_off = fast_off + offsetof(CPUTLBDescFast, table); 2090 2091 ldst = new_ldst_label(s); 2092 ldst->is_ld = is_ld; 2093 ldst->oi = oi; 2094 ldst->addrlo_reg = addrlo; 2095 ldst->addrhi_reg = addrhi; 2096 2097 /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ 2098 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off); 2099 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off); 2100 2101 /* Extract the page index, shifted into place for tlb index. */ 2102 if (TCG_TARGET_REG_BITS == 32) { 2103 tcg_out_shri32(s, TCG_REG_R0, addrlo, 2104 s->page_bits - CPU_TLB_ENTRY_BITS); 2105 } else { 2106 tcg_out_shri64(s, TCG_REG_R0, addrlo, 2107 s->page_bits - CPU_TLB_ENTRY_BITS); 2108 } 2109 tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0)); 2110 2111 /* 2112 * Load the (low part) TLB comparator into TMP2. 2113 * For 64-bit host, always load the entire 64-bit slot for simplicity. 2114 * We will ignore the high bits with tcg_out_cmp(..., addr_type). 2115 */ 2116 if (TCG_TARGET_REG_BITS == 64) { 2117 if (cmp_off == 0) { 2118 tcg_out32(s, LDUX | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2)); 2119 } else { 2120 tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2)); 2121 tcg_out_ld(s, TCG_TYPE_I64, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off); 2122 } 2123 } else if (cmp_off == 0 && !HOST_BIG_ENDIAN) { 2124 tcg_out32(s, LWZUX | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2)); 2125 } else { 2126 tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2)); 2127 tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1, 2128 cmp_off + 4 * HOST_BIG_ENDIAN); 2129 } 2130 2131 /* 2132 * Load the TLB addend for use on the fast path. 2133 * Do this asap to minimize any load use delay. 2134 */ 2135 if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) { 2136 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, 2137 offsetof(CPUTLBEntry, addend)); 2138 } 2139 2140 /* Clear the non-page, non-alignment bits from the address in R0. */ 2141 if (TCG_TARGET_REG_BITS == 32) { 2142 /* 2143 * We don't support unaligned accesses on 32-bits. 2144 * Preserve the bottom bits and thus trigger a comparison 2145 * failure on unaligned accesses. 2146 */ 2147 if (a_bits < s_bits) { 2148 a_bits = s_bits; 2149 } 2150 tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0, 2151 (32 - a_bits) & 31, 31 - s->page_bits); 2152 } else { 2153 TCGReg t = addrlo; 2154 2155 /* 2156 * If the access is unaligned, we need to make sure we fail if we 2157 * cross a page boundary. The trick is to add the access size-1 2158 * to the address before masking the low bits. That will make the 2159 * address overflow to the next page if we cross a page boundary, 2160 * which will then force a mismatch of the TLB compare. 2161 */ 2162 if (a_bits < s_bits) { 2163 unsigned a_mask = (1 << a_bits) - 1; 2164 unsigned s_mask = (1 << s_bits) - 1; 2165 tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask)); 2166 t = TCG_REG_R0; 2167 } 2168 2169 /* Mask the address for the requested alignment. */ 2170 if (addr_type == TCG_TYPE_I32) { 2171 tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0, 2172 (32 - a_bits) & 31, 31 - s->page_bits); 2173 } else if (a_bits == 0) { 2174 tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits); 2175 } else { 2176 tcg_out_rld(s, RLDICL, TCG_REG_R0, t, 2177 64 - s->page_bits, s->page_bits - a_bits); 2178 tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0); 2179 } 2180 } 2181 2182 if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) { 2183 /* Low part comparison into cr7. */ 2184 tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 2185 0, 7, TCG_TYPE_I32); 2186 2187 /* Load the high part TLB comparator into TMP2. */ 2188 tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1, 2189 cmp_off + 4 * !HOST_BIG_ENDIAN); 2190 2191 /* Load addend, deferred for this case. */ 2192 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, 2193 offsetof(CPUTLBEntry, addend)); 2194 2195 /* High part comparison into cr6. */ 2196 tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2, 0, 6, TCG_TYPE_I32); 2197 2198 /* Combine comparisons into cr7. */ 2199 tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); 2200 } else { 2201 /* Full comparison into cr7. */ 2202 tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 0, 7, addr_type); 2203 } 2204 2205 /* Load a pointer into the current opcode w/conditional branch-link. */ 2206 ldst->label_ptr[0] = s->code_ptr; 2207 tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); 2208 2209 h->base = TCG_REG_TMP1; 2210#else 2211 if (a_bits) { 2212 ldst = new_ldst_label(s); 2213 ldst->is_ld = is_ld; 2214 ldst->oi = oi; 2215 ldst->addrlo_reg = addrlo; 2216 ldst->addrhi_reg = addrhi; 2217 2218 /* We are expecting a_bits to max out at 7, much lower than ANDI. */ 2219 tcg_debug_assert(a_bits < 16); 2220 tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1)); 2221 2222 ldst->label_ptr[0] = s->code_ptr; 2223 tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK); 2224 } 2225 2226 h->base = guest_base ? TCG_GUEST_BASE_REG : 0; 2227#endif 2228 2229 if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) { 2230 /* Zero-extend the guest address for use in the host address. */ 2231 tcg_out_ext32u(s, TCG_REG_R0, addrlo); 2232 h->index = TCG_REG_R0; 2233 } else { 2234 h->index = addrlo; 2235 } 2236 2237 return ldst; 2238} 2239 2240static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, 2241 TCGReg addrlo, TCGReg addrhi, 2242 MemOpIdx oi, TCGType data_type) 2243{ 2244 MemOp opc = get_memop(oi); 2245 TCGLabelQemuLdst *ldst; 2246 HostAddress h; 2247 2248 ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true); 2249 2250 if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { 2251 if (opc & MO_BSWAP) { 2252 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2253 tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index)); 2254 tcg_out32(s, LWBRX | TAB(datahi, h.base, TCG_REG_R0)); 2255 } else if (h.base != 0) { 2256 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2257 tcg_out32(s, LWZX | TAB(datahi, h.base, h.index)); 2258 tcg_out32(s, LWZX | TAB(datalo, h.base, TCG_REG_R0)); 2259 } else if (h.index == datahi) { 2260 tcg_out32(s, LWZ | TAI(datalo, h.index, 4)); 2261 tcg_out32(s, LWZ | TAI(datahi, h.index, 0)); 2262 } else { 2263 tcg_out32(s, LWZ | TAI(datahi, h.index, 0)); 2264 tcg_out32(s, LWZ | TAI(datalo, h.index, 4)); 2265 } 2266 } else { 2267 uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)]; 2268 if (!have_isa_2_06 && insn == LDBRX) { 2269 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2270 tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index)); 2271 tcg_out32(s, LWBRX | TAB(TCG_REG_R0, h.base, TCG_REG_R0)); 2272 tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0); 2273 } else if (insn) { 2274 tcg_out32(s, insn | TAB(datalo, h.base, h.index)); 2275 } else { 2276 insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)]; 2277 tcg_out32(s, insn | TAB(datalo, h.base, h.index)); 2278 tcg_out_movext(s, TCG_TYPE_REG, datalo, 2279 TCG_TYPE_REG, opc & MO_SSIZE, datalo); 2280 } 2281 } 2282 2283 if (ldst) { 2284 ldst->type = data_type; 2285 ldst->datalo_reg = datalo; 2286 ldst->datahi_reg = datahi; 2287 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2288 } 2289} 2290 2291static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, 2292 TCGReg addrlo, TCGReg addrhi, 2293 MemOpIdx oi, TCGType data_type) 2294{ 2295 MemOp opc = get_memop(oi); 2296 TCGLabelQemuLdst *ldst; 2297 HostAddress h; 2298 2299 ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false); 2300 2301 if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { 2302 if (opc & MO_BSWAP) { 2303 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2304 tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index)); 2305 tcg_out32(s, STWBRX | SAB(datahi, h.base, TCG_REG_R0)); 2306 } else if (h.base != 0) { 2307 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2308 tcg_out32(s, STWX | SAB(datahi, h.base, h.index)); 2309 tcg_out32(s, STWX | SAB(datalo, h.base, TCG_REG_R0)); 2310 } else { 2311 tcg_out32(s, STW | TAI(datahi, h.index, 0)); 2312 tcg_out32(s, STW | TAI(datalo, h.index, 4)); 2313 } 2314 } else { 2315 uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)]; 2316 if (!have_isa_2_06 && insn == STDBRX) { 2317 tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index)); 2318 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, h.index, 4)); 2319 tcg_out_shri64(s, TCG_REG_R0, datalo, 32); 2320 tcg_out32(s, STWBRX | SAB(TCG_REG_R0, h.base, TCG_REG_TMP1)); 2321 } else { 2322 tcg_out32(s, insn | SAB(datalo, h.base, h.index)); 2323 } 2324 } 2325 2326 if (ldst) { 2327 ldst->type = data_type; 2328 ldst->datalo_reg = datalo; 2329 ldst->datahi_reg = datahi; 2330 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2331 } 2332} 2333 2334static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, 2335 TCGReg addr_reg, MemOpIdx oi, bool is_ld) 2336{ 2337 TCGLabelQemuLdst *ldst; 2338 HostAddress h; 2339 bool need_bswap; 2340 uint32_t insn; 2341 TCGReg index; 2342 2343 ldst = prepare_host_addr(s, &h, addr_reg, -1, oi, is_ld); 2344 2345 /* Compose the final address, as LQ/STQ have no indexing. */ 2346 index = h.index; 2347 if (h.base != 0) { 2348 index = TCG_REG_TMP1; 2349 tcg_out32(s, ADD | TAB(index, h.base, h.index)); 2350 } 2351 need_bswap = get_memop(oi) & MO_BSWAP; 2352 2353 if (h.aa.atom == MO_128) { 2354 tcg_debug_assert(!need_bswap); 2355 tcg_debug_assert(datalo & 1); 2356 tcg_debug_assert(datahi == datalo - 1); 2357 insn = is_ld ? LQ : STQ; 2358 tcg_out32(s, insn | TAI(datahi, index, 0)); 2359 } else { 2360 TCGReg d1, d2; 2361 2362 if (HOST_BIG_ENDIAN ^ need_bswap) { 2363 d1 = datahi, d2 = datalo; 2364 } else { 2365 d1 = datalo, d2 = datahi; 2366 } 2367 2368 if (need_bswap) { 2369 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8); 2370 insn = is_ld ? LDBRX : STDBRX; 2371 tcg_out32(s, insn | TAB(d1, 0, index)); 2372 tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0)); 2373 } else { 2374 insn = is_ld ? LD : STD; 2375 tcg_out32(s, insn | TAI(d1, index, 0)); 2376 tcg_out32(s, insn | TAI(d2, index, 8)); 2377 } 2378 } 2379 2380 if (ldst) { 2381 ldst->type = TCG_TYPE_I128; 2382 ldst->datalo_reg = datalo; 2383 ldst->datahi_reg = datahi; 2384 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2385 } 2386} 2387 2388static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 2389{ 2390 int i; 2391 for (i = 0; i < count; ++i) { 2392 p[i] = NOP; 2393 } 2394} 2395 2396/* Parameters for function call generation, used in tcg.c. */ 2397#define TCG_TARGET_STACK_ALIGN 16 2398 2399#ifdef _CALL_AIX 2400# define LINK_AREA_SIZE (6 * SZR) 2401# define LR_OFFSET (1 * SZR) 2402# define TCG_TARGET_CALL_STACK_OFFSET (LINK_AREA_SIZE + 8 * SZR) 2403#elif defined(_CALL_DARWIN) 2404# define LINK_AREA_SIZE (6 * SZR) 2405# define LR_OFFSET (2 * SZR) 2406#elif TCG_TARGET_REG_BITS == 64 2407# if defined(_CALL_ELF) && _CALL_ELF == 2 2408# define LINK_AREA_SIZE (4 * SZR) 2409# define LR_OFFSET (1 * SZR) 2410# endif 2411#else /* TCG_TARGET_REG_BITS == 32 */ 2412# if defined(_CALL_SYSV) 2413# define LINK_AREA_SIZE (2 * SZR) 2414# define LR_OFFSET (1 * SZR) 2415# endif 2416#endif 2417#ifndef LR_OFFSET 2418# error "Unhandled abi" 2419#endif 2420#ifndef TCG_TARGET_CALL_STACK_OFFSET 2421# define TCG_TARGET_CALL_STACK_OFFSET LINK_AREA_SIZE 2422#endif 2423 2424#define CPU_TEMP_BUF_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) 2425#define REG_SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR) 2426 2427#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET \ 2428 + TCG_STATIC_CALL_ARGS_SIZE \ 2429 + CPU_TEMP_BUF_SIZE \ 2430 + REG_SAVE_SIZE \ 2431 + TCG_TARGET_STACK_ALIGN - 1) \ 2432 & -TCG_TARGET_STACK_ALIGN) 2433 2434#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE) 2435 2436static void tcg_target_qemu_prologue(TCGContext *s) 2437{ 2438 int i; 2439 2440#ifdef _CALL_AIX 2441 const void **desc = (const void **)s->code_ptr; 2442 desc[0] = tcg_splitwx_to_rx(desc + 2); /* entry point */ 2443 desc[1] = 0; /* environment pointer */ 2444 s->code_ptr = (void *)(desc + 2); /* skip over descriptor */ 2445#endif 2446 2447 tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE, 2448 CPU_TEMP_BUF_SIZE); 2449 2450 /* Prologue */ 2451 tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR); 2452 tcg_out32(s, (SZR == 8 ? STDU : STWU) 2453 | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE)); 2454 2455 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { 2456 tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 2457 TCG_REG_R1, REG_SAVE_BOT + i * SZR); 2458 } 2459 tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); 2460 2461#ifndef CONFIG_SOFTMMU 2462 if (guest_base) { 2463 tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true); 2464 tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); 2465 } 2466#endif 2467 2468 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 2469 tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR); 2470 if (USE_REG_TB) { 2471 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]); 2472 } 2473 tcg_out32(s, BCCTR | BO_ALWAYS); 2474 2475 /* Epilogue */ 2476 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 2477 2478 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); 2479 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { 2480 tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 2481 TCG_REG_R1, REG_SAVE_BOT + i * SZR); 2482 } 2483 tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR); 2484 tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE)); 2485 tcg_out32(s, BCLR | BO_ALWAYS); 2486} 2487 2488static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) 2489{ 2490 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg); 2491 tcg_out_b(s, 0, tcg_code_gen_epilogue); 2492} 2493 2494static void tcg_out_goto_tb(TCGContext *s, int which) 2495{ 2496 uintptr_t ptr = get_jmp_target_addr(s, which); 2497 2498 if (USE_REG_TB) { 2499 ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr); 2500 tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset); 2501 2502 /* Direct branch will be patched by tb_target_set_jmp_target. */ 2503 set_jmp_insn_offset(s, which); 2504 tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR); 2505 2506 /* When branch is out of range, fall through to indirect. */ 2507 tcg_out32(s, BCCTR | BO_ALWAYS); 2508 2509 /* For the unlinked case, need to reset TCG_REG_TB. */ 2510 set_jmp_reset_offset(s, which); 2511 tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB, 2512 -tcg_current_code_size(s)); 2513 } else { 2514 /* Direct branch will be patched by tb_target_set_jmp_target. */ 2515 set_jmp_insn_offset(s, which); 2516 tcg_out32(s, NOP); 2517 2518 /* When branch is out of range, fall through to indirect. */ 2519 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr); 2520 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, (int16_t)ptr); 2521 tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); 2522 tcg_out32(s, BCCTR | BO_ALWAYS); 2523 set_jmp_reset_offset(s, which); 2524 } 2525} 2526 2527void tb_target_set_jmp_target(const TranslationBlock *tb, int n, 2528 uintptr_t jmp_rx, uintptr_t jmp_rw) 2529{ 2530 uintptr_t addr = tb->jmp_target_addr[n]; 2531 intptr_t diff = addr - jmp_rx; 2532 tcg_insn_unit insn; 2533 2534 if (in_range_b(diff)) { 2535 insn = B | (diff & 0x3fffffc); 2536 } else if (USE_REG_TB) { 2537 insn = MTSPR | RS(TCG_REG_TB) | CTR; 2538 } else { 2539 insn = NOP; 2540 } 2541 2542 qatomic_set((uint32_t *)jmp_rw, insn); 2543 flush_idcache_range(jmp_rx, jmp_rw, 4); 2544} 2545 2546static void tcg_out_op(TCGContext *s, TCGOpcode opc, 2547 const TCGArg args[TCG_MAX_OP_ARGS], 2548 const int const_args[TCG_MAX_OP_ARGS]) 2549{ 2550 TCGArg a0, a1, a2; 2551 2552 switch (opc) { 2553 case INDEX_op_goto_ptr: 2554 tcg_out32(s, MTSPR | RS(args[0]) | CTR); 2555 if (USE_REG_TB) { 2556 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]); 2557 } 2558 tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0)); 2559 tcg_out32(s, BCCTR | BO_ALWAYS); 2560 break; 2561 case INDEX_op_br: 2562 { 2563 TCGLabel *l = arg_label(args[0]); 2564 uint32_t insn = B; 2565 2566 if (l->has_value) { 2567 insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr), 2568 l->u.value_ptr); 2569 } else { 2570 tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0); 2571 } 2572 tcg_out32(s, insn); 2573 } 2574 break; 2575 case INDEX_op_ld8u_i32: 2576 case INDEX_op_ld8u_i64: 2577 tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); 2578 break; 2579 case INDEX_op_ld8s_i32: 2580 case INDEX_op_ld8s_i64: 2581 tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); 2582 tcg_out_ext8s(s, TCG_TYPE_REG, args[0], args[0]); 2583 break; 2584 case INDEX_op_ld16u_i32: 2585 case INDEX_op_ld16u_i64: 2586 tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]); 2587 break; 2588 case INDEX_op_ld16s_i32: 2589 case INDEX_op_ld16s_i64: 2590 tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]); 2591 break; 2592 case INDEX_op_ld_i32: 2593 case INDEX_op_ld32u_i64: 2594 tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]); 2595 break; 2596 case INDEX_op_ld32s_i64: 2597 tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]); 2598 break; 2599 case INDEX_op_ld_i64: 2600 tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]); 2601 break; 2602 case INDEX_op_st8_i32: 2603 case INDEX_op_st8_i64: 2604 tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]); 2605 break; 2606 case INDEX_op_st16_i32: 2607 case INDEX_op_st16_i64: 2608 tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]); 2609 break; 2610 case INDEX_op_st_i32: 2611 case INDEX_op_st32_i64: 2612 tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]); 2613 break; 2614 case INDEX_op_st_i64: 2615 tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); 2616 break; 2617 2618 case INDEX_op_add_i32: 2619 a0 = args[0], a1 = args[1], a2 = args[2]; 2620 if (const_args[2]) { 2621 do_addi_32: 2622 tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2); 2623 } else { 2624 tcg_out32(s, ADD | TAB(a0, a1, a2)); 2625 } 2626 break; 2627 case INDEX_op_sub_i32: 2628 a0 = args[0], a1 = args[1], a2 = args[2]; 2629 if (const_args[1]) { 2630 if (const_args[2]) { 2631 tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2); 2632 } else { 2633 tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); 2634 } 2635 } else if (const_args[2]) { 2636 a2 = -a2; 2637 goto do_addi_32; 2638 } else { 2639 tcg_out32(s, SUBF | TAB(a0, a2, a1)); 2640 } 2641 break; 2642 2643 case INDEX_op_and_i32: 2644 a0 = args[0], a1 = args[1], a2 = args[2]; 2645 if (const_args[2]) { 2646 tcg_out_andi32(s, a0, a1, a2); 2647 } else { 2648 tcg_out32(s, AND | SAB(a1, a0, a2)); 2649 } 2650 break; 2651 case INDEX_op_and_i64: 2652 a0 = args[0], a1 = args[1], a2 = args[2]; 2653 if (const_args[2]) { 2654 tcg_out_andi64(s, a0, a1, a2); 2655 } else { 2656 tcg_out32(s, AND | SAB(a1, a0, a2)); 2657 } 2658 break; 2659 case INDEX_op_or_i64: 2660 case INDEX_op_or_i32: 2661 a0 = args[0], a1 = args[1], a2 = args[2]; 2662 if (const_args[2]) { 2663 tcg_out_ori32(s, a0, a1, a2); 2664 } else { 2665 tcg_out32(s, OR | SAB(a1, a0, a2)); 2666 } 2667 break; 2668 case INDEX_op_xor_i64: 2669 case INDEX_op_xor_i32: 2670 a0 = args[0], a1 = args[1], a2 = args[2]; 2671 if (const_args[2]) { 2672 tcg_out_xori32(s, a0, a1, a2); 2673 } else { 2674 tcg_out32(s, XOR | SAB(a1, a0, a2)); 2675 } 2676 break; 2677 case INDEX_op_andc_i32: 2678 a0 = args[0], a1 = args[1], a2 = args[2]; 2679 if (const_args[2]) { 2680 tcg_out_andi32(s, a0, a1, ~a2); 2681 } else { 2682 tcg_out32(s, ANDC | SAB(a1, a0, a2)); 2683 } 2684 break; 2685 case INDEX_op_andc_i64: 2686 a0 = args[0], a1 = args[1], a2 = args[2]; 2687 if (const_args[2]) { 2688 tcg_out_andi64(s, a0, a1, ~a2); 2689 } else { 2690 tcg_out32(s, ANDC | SAB(a1, a0, a2)); 2691 } 2692 break; 2693 case INDEX_op_orc_i32: 2694 if (const_args[2]) { 2695 tcg_out_ori32(s, args[0], args[1], ~args[2]); 2696 break; 2697 } 2698 /* FALLTHRU */ 2699 case INDEX_op_orc_i64: 2700 tcg_out32(s, ORC | SAB(args[1], args[0], args[2])); 2701 break; 2702 case INDEX_op_eqv_i32: 2703 if (const_args[2]) { 2704 tcg_out_xori32(s, args[0], args[1], ~args[2]); 2705 break; 2706 } 2707 /* FALLTHRU */ 2708 case INDEX_op_eqv_i64: 2709 tcg_out32(s, EQV | SAB(args[1], args[0], args[2])); 2710 break; 2711 case INDEX_op_nand_i32: 2712 case INDEX_op_nand_i64: 2713 tcg_out32(s, NAND | SAB(args[1], args[0], args[2])); 2714 break; 2715 case INDEX_op_nor_i32: 2716 case INDEX_op_nor_i64: 2717 tcg_out32(s, NOR | SAB(args[1], args[0], args[2])); 2718 break; 2719 2720 case INDEX_op_clz_i32: 2721 tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1], 2722 args[2], const_args[2]); 2723 break; 2724 case INDEX_op_ctz_i32: 2725 tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1], 2726 args[2], const_args[2]); 2727 break; 2728 case INDEX_op_ctpop_i32: 2729 tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0)); 2730 break; 2731 2732 case INDEX_op_clz_i64: 2733 tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1], 2734 args[2], const_args[2]); 2735 break; 2736 case INDEX_op_ctz_i64: 2737 tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1], 2738 args[2], const_args[2]); 2739 break; 2740 case INDEX_op_ctpop_i64: 2741 tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0)); 2742 break; 2743 2744 case INDEX_op_mul_i32: 2745 a0 = args[0], a1 = args[1], a2 = args[2]; 2746 if (const_args[2]) { 2747 tcg_out32(s, MULLI | TAI(a0, a1, a2)); 2748 } else { 2749 tcg_out32(s, MULLW | TAB(a0, a1, a2)); 2750 } 2751 break; 2752 2753 case INDEX_op_div_i32: 2754 tcg_out32(s, DIVW | TAB(args[0], args[1], args[2])); 2755 break; 2756 2757 case INDEX_op_divu_i32: 2758 tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2])); 2759 break; 2760 2761 case INDEX_op_rem_i32: 2762 tcg_out32(s, MODSW | TAB(args[0], args[1], args[2])); 2763 break; 2764 2765 case INDEX_op_remu_i32: 2766 tcg_out32(s, MODUW | TAB(args[0], args[1], args[2])); 2767 break; 2768 2769 case INDEX_op_shl_i32: 2770 if (const_args[2]) { 2771 /* Limit immediate shift count lest we create an illegal insn. */ 2772 tcg_out_shli32(s, args[0], args[1], args[2] & 31); 2773 } else { 2774 tcg_out32(s, SLW | SAB(args[1], args[0], args[2])); 2775 } 2776 break; 2777 case INDEX_op_shr_i32: 2778 if (const_args[2]) { 2779 /* Limit immediate shift count lest we create an illegal insn. */ 2780 tcg_out_shri32(s, args[0], args[1], args[2] & 31); 2781 } else { 2782 tcg_out32(s, SRW | SAB(args[1], args[0], args[2])); 2783 } 2784 break; 2785 case INDEX_op_sar_i32: 2786 if (const_args[2]) { 2787 tcg_out_sari32(s, args[0], args[1], args[2]); 2788 } else { 2789 tcg_out32(s, SRAW | SAB(args[1], args[0], args[2])); 2790 } 2791 break; 2792 case INDEX_op_rotl_i32: 2793 if (const_args[2]) { 2794 tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31); 2795 } else { 2796 tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2]) 2797 | MB(0) | ME(31)); 2798 } 2799 break; 2800 case INDEX_op_rotr_i32: 2801 if (const_args[2]) { 2802 tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31); 2803 } else { 2804 tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32)); 2805 tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0) 2806 | MB(0) | ME(31)); 2807 } 2808 break; 2809 2810 case INDEX_op_brcond_i32: 2811 tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], 2812 arg_label(args[3]), TCG_TYPE_I32); 2813 break; 2814 case INDEX_op_brcond_i64: 2815 tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], 2816 arg_label(args[3]), TCG_TYPE_I64); 2817 break; 2818 case INDEX_op_brcond2_i32: 2819 tcg_out_brcond2(s, args, const_args); 2820 break; 2821 2822 case INDEX_op_neg_i32: 2823 case INDEX_op_neg_i64: 2824 tcg_out32(s, NEG | RT(args[0]) | RA(args[1])); 2825 break; 2826 2827 case INDEX_op_not_i32: 2828 case INDEX_op_not_i64: 2829 tcg_out32(s, NOR | SAB(args[1], args[0], args[1])); 2830 break; 2831 2832 case INDEX_op_add_i64: 2833 a0 = args[0], a1 = args[1], a2 = args[2]; 2834 if (const_args[2]) { 2835 do_addi_64: 2836 tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2); 2837 } else { 2838 tcg_out32(s, ADD | TAB(a0, a1, a2)); 2839 } 2840 break; 2841 case INDEX_op_sub_i64: 2842 a0 = args[0], a1 = args[1], a2 = args[2]; 2843 if (const_args[1]) { 2844 if (const_args[2]) { 2845 tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2); 2846 } else { 2847 tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); 2848 } 2849 } else if (const_args[2]) { 2850 a2 = -a2; 2851 goto do_addi_64; 2852 } else { 2853 tcg_out32(s, SUBF | TAB(a0, a2, a1)); 2854 } 2855 break; 2856 2857 case INDEX_op_shl_i64: 2858 if (const_args[2]) { 2859 /* Limit immediate shift count lest we create an illegal insn. */ 2860 tcg_out_shli64(s, args[0], args[1], args[2] & 63); 2861 } else { 2862 tcg_out32(s, SLD | SAB(args[1], args[0], args[2])); 2863 } 2864 break; 2865 case INDEX_op_shr_i64: 2866 if (const_args[2]) { 2867 /* Limit immediate shift count lest we create an illegal insn. */ 2868 tcg_out_shri64(s, args[0], args[1], args[2] & 63); 2869 } else { 2870 tcg_out32(s, SRD | SAB(args[1], args[0], args[2])); 2871 } 2872 break; 2873 case INDEX_op_sar_i64: 2874 if (const_args[2]) { 2875 tcg_out_sari64(s, args[0], args[1], args[2]); 2876 } else { 2877 tcg_out32(s, SRAD | SAB(args[1], args[0], args[2])); 2878 } 2879 break; 2880 case INDEX_op_rotl_i64: 2881 if (const_args[2]) { 2882 tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0); 2883 } else { 2884 tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0)); 2885 } 2886 break; 2887 case INDEX_op_rotr_i64: 2888 if (const_args[2]) { 2889 tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0); 2890 } else { 2891 tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64)); 2892 tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0)); 2893 } 2894 break; 2895 2896 case INDEX_op_mul_i64: 2897 a0 = args[0], a1 = args[1], a2 = args[2]; 2898 if (const_args[2]) { 2899 tcg_out32(s, MULLI | TAI(a0, a1, a2)); 2900 } else { 2901 tcg_out32(s, MULLD | TAB(a0, a1, a2)); 2902 } 2903 break; 2904 case INDEX_op_div_i64: 2905 tcg_out32(s, DIVD | TAB(args[0], args[1], args[2])); 2906 break; 2907 case INDEX_op_divu_i64: 2908 tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2])); 2909 break; 2910 case INDEX_op_rem_i64: 2911 tcg_out32(s, MODSD | TAB(args[0], args[1], args[2])); 2912 break; 2913 case INDEX_op_remu_i64: 2914 tcg_out32(s, MODUD | TAB(args[0], args[1], args[2])); 2915 break; 2916 2917 case INDEX_op_qemu_ld_a64_i32: 2918 if (TCG_TARGET_REG_BITS == 32) { 2919 tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], 2920 args[3], TCG_TYPE_I32); 2921 break; 2922 } 2923 /* fall through */ 2924 case INDEX_op_qemu_ld_a32_i32: 2925 tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32); 2926 break; 2927 case INDEX_op_qemu_ld_a32_i64: 2928 if (TCG_TARGET_REG_BITS == 64) { 2929 tcg_out_qemu_ld(s, args[0], -1, args[1], -1, 2930 args[2], TCG_TYPE_I64); 2931 } else { 2932 tcg_out_qemu_ld(s, args[0], args[1], args[2], -1, 2933 args[3], TCG_TYPE_I64); 2934 } 2935 break; 2936 case INDEX_op_qemu_ld_a64_i64: 2937 if (TCG_TARGET_REG_BITS == 64) { 2938 tcg_out_qemu_ld(s, args[0], -1, args[1], -1, 2939 args[2], TCG_TYPE_I64); 2940 } else { 2941 tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3], 2942 args[4], TCG_TYPE_I64); 2943 } 2944 break; 2945 case INDEX_op_qemu_ld_a32_i128: 2946 case INDEX_op_qemu_ld_a64_i128: 2947 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 2948 tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true); 2949 break; 2950 2951 case INDEX_op_qemu_st_a64_i32: 2952 if (TCG_TARGET_REG_BITS == 32) { 2953 tcg_out_qemu_st(s, args[0], -1, args[1], args[2], 2954 args[3], TCG_TYPE_I32); 2955 break; 2956 } 2957 /* fall through */ 2958 case INDEX_op_qemu_st_a32_i32: 2959 tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32); 2960 break; 2961 case INDEX_op_qemu_st_a32_i64: 2962 if (TCG_TARGET_REG_BITS == 64) { 2963 tcg_out_qemu_st(s, args[0], -1, args[1], -1, 2964 args[2], TCG_TYPE_I64); 2965 } else { 2966 tcg_out_qemu_st(s, args[0], args[1], args[2], -1, 2967 args[3], TCG_TYPE_I64); 2968 } 2969 break; 2970 case INDEX_op_qemu_st_a64_i64: 2971 if (TCG_TARGET_REG_BITS == 64) { 2972 tcg_out_qemu_st(s, args[0], -1, args[1], -1, 2973 args[2], TCG_TYPE_I64); 2974 } else { 2975 tcg_out_qemu_st(s, args[0], args[1], args[2], args[3], 2976 args[4], TCG_TYPE_I64); 2977 } 2978 break; 2979 case INDEX_op_qemu_st_a32_i128: 2980 case INDEX_op_qemu_st_a64_i128: 2981 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 2982 tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); 2983 break; 2984 2985 case INDEX_op_setcond_i32: 2986 tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], 2987 const_args[2]); 2988 break; 2989 case INDEX_op_setcond_i64: 2990 tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], 2991 const_args[2]); 2992 break; 2993 case INDEX_op_setcond2_i32: 2994 tcg_out_setcond2(s, args, const_args); 2995 break; 2996 2997 case INDEX_op_bswap16_i32: 2998 case INDEX_op_bswap16_i64: 2999 tcg_out_bswap16(s, args[0], args[1], args[2]); 3000 break; 3001 case INDEX_op_bswap32_i32: 3002 tcg_out_bswap32(s, args[0], args[1], 0); 3003 break; 3004 case INDEX_op_bswap32_i64: 3005 tcg_out_bswap32(s, args[0], args[1], args[2]); 3006 break; 3007 case INDEX_op_bswap64_i64: 3008 tcg_out_bswap64(s, args[0], args[1]); 3009 break; 3010 3011 case INDEX_op_deposit_i32: 3012 if (const_args[2]) { 3013 uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3]; 3014 tcg_out_andi32(s, args[0], args[0], ~mask); 3015 } else { 3016 tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3], 3017 32 - args[3] - args[4], 31 - args[3]); 3018 } 3019 break; 3020 case INDEX_op_deposit_i64: 3021 if (const_args[2]) { 3022 uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3]; 3023 tcg_out_andi64(s, args[0], args[0], ~mask); 3024 } else { 3025 tcg_out_rld(s, RLDIMI, args[0], args[2], args[3], 3026 64 - args[3] - args[4]); 3027 } 3028 break; 3029 3030 case INDEX_op_extract_i32: 3031 tcg_out_rlw(s, RLWINM, args[0], args[1], 3032 32 - args[2], 32 - args[3], 31); 3033 break; 3034 case INDEX_op_extract_i64: 3035 tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]); 3036 break; 3037 3038 case INDEX_op_movcond_i32: 3039 tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2], 3040 args[3], args[4], const_args[2]); 3041 break; 3042 case INDEX_op_movcond_i64: 3043 tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2], 3044 args[3], args[4], const_args[2]); 3045 break; 3046 3047#if TCG_TARGET_REG_BITS == 64 3048 case INDEX_op_add2_i64: 3049#else 3050 case INDEX_op_add2_i32: 3051#endif 3052 /* Note that the CA bit is defined based on the word size of the 3053 environment. So in 64-bit mode it's always carry-out of bit 63. 3054 The fallback code using deposit works just as well for 32-bit. */ 3055 a0 = args[0], a1 = args[1]; 3056 if (a0 == args[3] || (!const_args[5] && a0 == args[5])) { 3057 a0 = TCG_REG_R0; 3058 } 3059 if (const_args[4]) { 3060 tcg_out32(s, ADDIC | TAI(a0, args[2], args[4])); 3061 } else { 3062 tcg_out32(s, ADDC | TAB(a0, args[2], args[4])); 3063 } 3064 if (const_args[5]) { 3065 tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3])); 3066 } else { 3067 tcg_out32(s, ADDE | TAB(a1, args[3], args[5])); 3068 } 3069 if (a0 != args[0]) { 3070 tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); 3071 } 3072 break; 3073 3074#if TCG_TARGET_REG_BITS == 64 3075 case INDEX_op_sub2_i64: 3076#else 3077 case INDEX_op_sub2_i32: 3078#endif 3079 a0 = args[0], a1 = args[1]; 3080 if (a0 == args[5] || (!const_args[3] && a0 == args[3])) { 3081 a0 = TCG_REG_R0; 3082 } 3083 if (const_args[2]) { 3084 tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2])); 3085 } else { 3086 tcg_out32(s, SUBFC | TAB(a0, args[4], args[2])); 3087 } 3088 if (const_args[3]) { 3089 tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5])); 3090 } else { 3091 tcg_out32(s, SUBFE | TAB(a1, args[5], args[3])); 3092 } 3093 if (a0 != args[0]) { 3094 tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); 3095 } 3096 break; 3097 3098 case INDEX_op_muluh_i32: 3099 tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2])); 3100 break; 3101 case INDEX_op_mulsh_i32: 3102 tcg_out32(s, MULHW | TAB(args[0], args[1], args[2])); 3103 break; 3104 case INDEX_op_muluh_i64: 3105 tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2])); 3106 break; 3107 case INDEX_op_mulsh_i64: 3108 tcg_out32(s, MULHD | TAB(args[0], args[1], args[2])); 3109 break; 3110 3111 case INDEX_op_mb: 3112 tcg_out_mb(s, args[0]); 3113 break; 3114 3115 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 3116 case INDEX_op_mov_i64: 3117 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 3118 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ 3119 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ 3120 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ 3121 case INDEX_op_ext8s_i64: 3122 case INDEX_op_ext8u_i32: 3123 case INDEX_op_ext8u_i64: 3124 case INDEX_op_ext16s_i32: 3125 case INDEX_op_ext16s_i64: 3126 case INDEX_op_ext16u_i32: 3127 case INDEX_op_ext16u_i64: 3128 case INDEX_op_ext32s_i64: 3129 case INDEX_op_ext32u_i64: 3130 case INDEX_op_ext_i32_i64: 3131 case INDEX_op_extu_i32_i64: 3132 case INDEX_op_extrl_i64_i32: 3133 default: 3134 g_assert_not_reached(); 3135 } 3136} 3137 3138int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 3139{ 3140 switch (opc) { 3141 case INDEX_op_and_vec: 3142 case INDEX_op_or_vec: 3143 case INDEX_op_xor_vec: 3144 case INDEX_op_andc_vec: 3145 case INDEX_op_not_vec: 3146 case INDEX_op_nor_vec: 3147 case INDEX_op_eqv_vec: 3148 case INDEX_op_nand_vec: 3149 return 1; 3150 case INDEX_op_orc_vec: 3151 return have_isa_2_07; 3152 case INDEX_op_add_vec: 3153 case INDEX_op_sub_vec: 3154 case INDEX_op_smax_vec: 3155 case INDEX_op_smin_vec: 3156 case INDEX_op_umax_vec: 3157 case INDEX_op_umin_vec: 3158 case INDEX_op_shlv_vec: 3159 case INDEX_op_shrv_vec: 3160 case INDEX_op_sarv_vec: 3161 case INDEX_op_rotlv_vec: 3162 return vece <= MO_32 || have_isa_2_07; 3163 case INDEX_op_ssadd_vec: 3164 case INDEX_op_sssub_vec: 3165 case INDEX_op_usadd_vec: 3166 case INDEX_op_ussub_vec: 3167 return vece <= MO_32; 3168 case INDEX_op_cmp_vec: 3169 case INDEX_op_shli_vec: 3170 case INDEX_op_shri_vec: 3171 case INDEX_op_sari_vec: 3172 case INDEX_op_rotli_vec: 3173 return vece <= MO_32 || have_isa_2_07 ? -1 : 0; 3174 case INDEX_op_neg_vec: 3175 return vece >= MO_32 && have_isa_3_00; 3176 case INDEX_op_mul_vec: 3177 switch (vece) { 3178 case MO_8: 3179 case MO_16: 3180 return -1; 3181 case MO_32: 3182 return have_isa_2_07 ? 1 : -1; 3183 case MO_64: 3184 return have_isa_3_10; 3185 } 3186 return 0; 3187 case INDEX_op_bitsel_vec: 3188 return have_vsx; 3189 case INDEX_op_rotrv_vec: 3190 return -1; 3191 default: 3192 return 0; 3193 } 3194} 3195 3196static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 3197 TCGReg dst, TCGReg src) 3198{ 3199 tcg_debug_assert(dst >= TCG_REG_V0); 3200 3201 /* Splat from integer reg allowed via constraints for v3.00. */ 3202 if (src < TCG_REG_V0) { 3203 tcg_debug_assert(have_isa_3_00); 3204 switch (vece) { 3205 case MO_64: 3206 tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src)); 3207 return true; 3208 case MO_32: 3209 tcg_out32(s, MTVSRWS | VRT(dst) | RA(src)); 3210 return true; 3211 default: 3212 /* Fail, so that we fall back on either dupm or mov+dup. */ 3213 return false; 3214 } 3215 } 3216 3217 /* 3218 * Recall we use (or emulate) VSX integer loads, so the integer is 3219 * right justified within the left (zero-index) double-word. 3220 */ 3221 switch (vece) { 3222 case MO_8: 3223 tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16)); 3224 break; 3225 case MO_16: 3226 tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16)); 3227 break; 3228 case MO_32: 3229 tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16)); 3230 break; 3231 case MO_64: 3232 if (have_vsx) { 3233 tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src)); 3234 break; 3235 } 3236 tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8); 3237 tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8); 3238 break; 3239 default: 3240 g_assert_not_reached(); 3241 } 3242 return true; 3243} 3244 3245static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 3246 TCGReg out, TCGReg base, intptr_t offset) 3247{ 3248 int elt; 3249 3250 tcg_debug_assert(out >= TCG_REG_V0); 3251 switch (vece) { 3252 case MO_8: 3253 if (have_isa_3_00) { 3254 tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16); 3255 } else { 3256 tcg_out_mem_long(s, 0, LVEBX, out, base, offset); 3257 } 3258 elt = extract32(offset, 0, 4); 3259#if !HOST_BIG_ENDIAN 3260 elt ^= 15; 3261#endif 3262 tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16)); 3263 break; 3264 case MO_16: 3265 tcg_debug_assert((offset & 1) == 0); 3266 if (have_isa_3_00) { 3267 tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16); 3268 } else { 3269 tcg_out_mem_long(s, 0, LVEHX, out, base, offset); 3270 } 3271 elt = extract32(offset, 1, 3); 3272#if !HOST_BIG_ENDIAN 3273 elt ^= 7; 3274#endif 3275 tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16)); 3276 break; 3277 case MO_32: 3278 if (have_isa_3_00) { 3279 tcg_out_mem_long(s, 0, LXVWSX, out, base, offset); 3280 break; 3281 } 3282 tcg_debug_assert((offset & 3) == 0); 3283 tcg_out_mem_long(s, 0, LVEWX, out, base, offset); 3284 elt = extract32(offset, 2, 2); 3285#if !HOST_BIG_ENDIAN 3286 elt ^= 3; 3287#endif 3288 tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16)); 3289 break; 3290 case MO_64: 3291 if (have_vsx) { 3292 tcg_out_mem_long(s, 0, LXVDSX, out, base, offset); 3293 break; 3294 } 3295 tcg_debug_assert((offset & 7) == 0); 3296 tcg_out_mem_long(s, 0, LVX, out, base, offset & -16); 3297 tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8); 3298 elt = extract32(offset, 3, 1); 3299#if !HOST_BIG_ENDIAN 3300 elt = !elt; 3301#endif 3302 if (elt) { 3303 tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8); 3304 } else { 3305 tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8); 3306 } 3307 break; 3308 default: 3309 g_assert_not_reached(); 3310 } 3311 return true; 3312} 3313 3314static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 3315 unsigned vecl, unsigned vece, 3316 const TCGArg args[TCG_MAX_OP_ARGS], 3317 const int const_args[TCG_MAX_OP_ARGS]) 3318{ 3319 static const uint32_t 3320 add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM }, 3321 sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM }, 3322 mul_op[4] = { 0, 0, VMULUWM, VMULLD }, 3323 neg_op[4] = { 0, 0, VNEGW, VNEGD }, 3324 eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD }, 3325 ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 }, 3326 gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD }, 3327 gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD }, 3328 ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 }, 3329 usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 }, 3330 sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 }, 3331 ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 }, 3332 umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD }, 3333 smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD }, 3334 umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD }, 3335 smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD }, 3336 shlv_op[4] = { VSLB, VSLH, VSLW, VSLD }, 3337 shrv_op[4] = { VSRB, VSRH, VSRW, VSRD }, 3338 sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD }, 3339 mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 }, 3340 mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 }, 3341 muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 }, 3342 mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 }, 3343 pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 }, 3344 rotl_op[4] = { VRLB, VRLH, VRLW, VRLD }; 3345 3346 TCGType type = vecl + TCG_TYPE_V64; 3347 TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; 3348 uint32_t insn; 3349 3350 switch (opc) { 3351 case INDEX_op_ld_vec: 3352 tcg_out_ld(s, type, a0, a1, a2); 3353 return; 3354 case INDEX_op_st_vec: 3355 tcg_out_st(s, type, a0, a1, a2); 3356 return; 3357 case INDEX_op_dupm_vec: 3358 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 3359 return; 3360 3361 case INDEX_op_add_vec: 3362 insn = add_op[vece]; 3363 break; 3364 case INDEX_op_sub_vec: 3365 insn = sub_op[vece]; 3366 break; 3367 case INDEX_op_neg_vec: 3368 insn = neg_op[vece]; 3369 a2 = a1; 3370 a1 = 0; 3371 break; 3372 case INDEX_op_mul_vec: 3373 insn = mul_op[vece]; 3374 break; 3375 case INDEX_op_ssadd_vec: 3376 insn = ssadd_op[vece]; 3377 break; 3378 case INDEX_op_sssub_vec: 3379 insn = sssub_op[vece]; 3380 break; 3381 case INDEX_op_usadd_vec: 3382 insn = usadd_op[vece]; 3383 break; 3384 case INDEX_op_ussub_vec: 3385 insn = ussub_op[vece]; 3386 break; 3387 case INDEX_op_smin_vec: 3388 insn = smin_op[vece]; 3389 break; 3390 case INDEX_op_umin_vec: 3391 insn = umin_op[vece]; 3392 break; 3393 case INDEX_op_smax_vec: 3394 insn = smax_op[vece]; 3395 break; 3396 case INDEX_op_umax_vec: 3397 insn = umax_op[vece]; 3398 break; 3399 case INDEX_op_shlv_vec: 3400 insn = shlv_op[vece]; 3401 break; 3402 case INDEX_op_shrv_vec: 3403 insn = shrv_op[vece]; 3404 break; 3405 case INDEX_op_sarv_vec: 3406 insn = sarv_op[vece]; 3407 break; 3408 case INDEX_op_and_vec: 3409 insn = VAND; 3410 break; 3411 case INDEX_op_or_vec: 3412 insn = VOR; 3413 break; 3414 case INDEX_op_xor_vec: 3415 insn = VXOR; 3416 break; 3417 case INDEX_op_andc_vec: 3418 insn = VANDC; 3419 break; 3420 case INDEX_op_not_vec: 3421 insn = VNOR; 3422 a2 = a1; 3423 break; 3424 case INDEX_op_orc_vec: 3425 insn = VORC; 3426 break; 3427 case INDEX_op_nand_vec: 3428 insn = VNAND; 3429 break; 3430 case INDEX_op_nor_vec: 3431 insn = VNOR; 3432 break; 3433 case INDEX_op_eqv_vec: 3434 insn = VEQV; 3435 break; 3436 3437 case INDEX_op_cmp_vec: 3438 switch (args[3]) { 3439 case TCG_COND_EQ: 3440 insn = eq_op[vece]; 3441 break; 3442 case TCG_COND_NE: 3443 insn = ne_op[vece]; 3444 break; 3445 case TCG_COND_GT: 3446 insn = gts_op[vece]; 3447 break; 3448 case TCG_COND_GTU: 3449 insn = gtu_op[vece]; 3450 break; 3451 default: 3452 g_assert_not_reached(); 3453 } 3454 break; 3455 3456 case INDEX_op_bitsel_vec: 3457 tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3])); 3458 return; 3459 3460 case INDEX_op_dup2_vec: 3461 assert(TCG_TARGET_REG_BITS == 32); 3462 /* With inputs a1 = xLxx, a2 = xHxx */ 3463 tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1)); /* a0 = xxHL */ 3464 tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8); /* tmp = HLxx */ 3465 tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8); /* a0 = HLHL */ 3466 return; 3467 3468 case INDEX_op_ppc_mrgh_vec: 3469 insn = mrgh_op[vece]; 3470 break; 3471 case INDEX_op_ppc_mrgl_vec: 3472 insn = mrgl_op[vece]; 3473 break; 3474 case INDEX_op_ppc_muleu_vec: 3475 insn = muleu_op[vece]; 3476 break; 3477 case INDEX_op_ppc_mulou_vec: 3478 insn = mulou_op[vece]; 3479 break; 3480 case INDEX_op_ppc_pkum_vec: 3481 insn = pkum_op[vece]; 3482 break; 3483 case INDEX_op_rotlv_vec: 3484 insn = rotl_op[vece]; 3485 break; 3486 case INDEX_op_ppc_msum_vec: 3487 tcg_debug_assert(vece == MO_16); 3488 tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3])); 3489 return; 3490 3491 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 3492 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 3493 default: 3494 g_assert_not_reached(); 3495 } 3496 3497 tcg_debug_assert(insn != 0); 3498 tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2)); 3499} 3500 3501static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0, 3502 TCGv_vec v1, TCGArg imm, TCGOpcode opci) 3503{ 3504 TCGv_vec t1; 3505 3506 if (vece == MO_32) { 3507 /* 3508 * Only 5 bits are significant, and VSPLTISB can represent -16..15. 3509 * So using negative numbers gets us the 4th bit easily. 3510 */ 3511 imm = sextract32(imm, 0, 5); 3512 } else { 3513 imm &= (8 << vece) - 1; 3514 } 3515 3516 /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */ 3517 t1 = tcg_constant_vec(type, MO_8, imm); 3518 vec_gen_3(opci, type, vece, tcgv_vec_arg(v0), 3519 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 3520} 3521 3522static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, 3523 TCGv_vec v1, TCGv_vec v2, TCGCond cond) 3524{ 3525 bool need_swap = false, need_inv = false; 3526 3527 tcg_debug_assert(vece <= MO_32 || have_isa_2_07); 3528 3529 switch (cond) { 3530 case TCG_COND_EQ: 3531 case TCG_COND_GT: 3532 case TCG_COND_GTU: 3533 break; 3534 case TCG_COND_NE: 3535 if (have_isa_3_00 && vece <= MO_32) { 3536 break; 3537 } 3538 /* fall through */ 3539 case TCG_COND_LE: 3540 case TCG_COND_LEU: 3541 need_inv = true; 3542 break; 3543 case TCG_COND_LT: 3544 case TCG_COND_LTU: 3545 need_swap = true; 3546 break; 3547 case TCG_COND_GE: 3548 case TCG_COND_GEU: 3549 need_swap = need_inv = true; 3550 break; 3551 default: 3552 g_assert_not_reached(); 3553 } 3554 3555 if (need_inv) { 3556 cond = tcg_invert_cond(cond); 3557 } 3558 if (need_swap) { 3559 TCGv_vec t1; 3560 t1 = v1, v1 = v2, v2 = t1; 3561 cond = tcg_swap_cond(cond); 3562 } 3563 3564 vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0), 3565 tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond); 3566 3567 if (need_inv) { 3568 tcg_gen_not_vec(vece, v0, v0); 3569 } 3570} 3571 3572static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0, 3573 TCGv_vec v1, TCGv_vec v2) 3574{ 3575 TCGv_vec t1 = tcg_temp_new_vec(type); 3576 TCGv_vec t2 = tcg_temp_new_vec(type); 3577 TCGv_vec c0, c16; 3578 3579 switch (vece) { 3580 case MO_8: 3581 case MO_16: 3582 vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1), 3583 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 3584 vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2), 3585 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 3586 vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0), 3587 tcgv_vec_arg(t1), tcgv_vec_arg(t2)); 3588 vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1), 3589 tcgv_vec_arg(t1), tcgv_vec_arg(t2)); 3590 vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0), 3591 tcgv_vec_arg(v0), tcgv_vec_arg(t1)); 3592 break; 3593 3594 case MO_32: 3595 tcg_debug_assert(!have_isa_2_07); 3596 /* 3597 * Only 5 bits are significant, and VSPLTISB can represent -16..15. 3598 * So using -16 is a quick way to represent 16. 3599 */ 3600 c16 = tcg_constant_vec(type, MO_8, -16); 3601 c0 = tcg_constant_vec(type, MO_8, 0); 3602 3603 vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1), 3604 tcgv_vec_arg(v2), tcgv_vec_arg(c16)); 3605 vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2), 3606 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 3607 vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1), 3608 tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0)); 3609 vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1), 3610 tcgv_vec_arg(t1), tcgv_vec_arg(c16)); 3611 tcg_gen_add_vec(MO_32, v0, t1, t2); 3612 break; 3613 3614 default: 3615 g_assert_not_reached(); 3616 } 3617 tcg_temp_free_vec(t1); 3618 tcg_temp_free_vec(t2); 3619} 3620 3621void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 3622 TCGArg a0, ...) 3623{ 3624 va_list va; 3625 TCGv_vec v0, v1, v2, t0; 3626 TCGArg a2; 3627 3628 va_start(va, a0); 3629 v0 = temp_tcgv_vec(arg_temp(a0)); 3630 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 3631 a2 = va_arg(va, TCGArg); 3632 3633 switch (opc) { 3634 case INDEX_op_shli_vec: 3635 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec); 3636 break; 3637 case INDEX_op_shri_vec: 3638 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec); 3639 break; 3640 case INDEX_op_sari_vec: 3641 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec); 3642 break; 3643 case INDEX_op_rotli_vec: 3644 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec); 3645 break; 3646 case INDEX_op_cmp_vec: 3647 v2 = temp_tcgv_vec(arg_temp(a2)); 3648 expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); 3649 break; 3650 case INDEX_op_mul_vec: 3651 v2 = temp_tcgv_vec(arg_temp(a2)); 3652 expand_vec_mul(type, vece, v0, v1, v2); 3653 break; 3654 case INDEX_op_rotlv_vec: 3655 v2 = temp_tcgv_vec(arg_temp(a2)); 3656 t0 = tcg_temp_new_vec(type); 3657 tcg_gen_neg_vec(vece, t0, v2); 3658 tcg_gen_rotlv_vec(vece, v0, v1, t0); 3659 tcg_temp_free_vec(t0); 3660 break; 3661 default: 3662 g_assert_not_reached(); 3663 } 3664 va_end(va); 3665} 3666 3667static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 3668{ 3669 switch (op) { 3670 case INDEX_op_goto_ptr: 3671 return C_O0_I1(r); 3672 3673 case INDEX_op_ld8u_i32: 3674 case INDEX_op_ld8s_i32: 3675 case INDEX_op_ld16u_i32: 3676 case INDEX_op_ld16s_i32: 3677 case INDEX_op_ld_i32: 3678 case INDEX_op_ctpop_i32: 3679 case INDEX_op_neg_i32: 3680 case INDEX_op_not_i32: 3681 case INDEX_op_ext8s_i32: 3682 case INDEX_op_ext16s_i32: 3683 case INDEX_op_bswap16_i32: 3684 case INDEX_op_bswap32_i32: 3685 case INDEX_op_extract_i32: 3686 case INDEX_op_ld8u_i64: 3687 case INDEX_op_ld8s_i64: 3688 case INDEX_op_ld16u_i64: 3689 case INDEX_op_ld16s_i64: 3690 case INDEX_op_ld32u_i64: 3691 case INDEX_op_ld32s_i64: 3692 case INDEX_op_ld_i64: 3693 case INDEX_op_ctpop_i64: 3694 case INDEX_op_neg_i64: 3695 case INDEX_op_not_i64: 3696 case INDEX_op_ext8s_i64: 3697 case INDEX_op_ext16s_i64: 3698 case INDEX_op_ext32s_i64: 3699 case INDEX_op_ext_i32_i64: 3700 case INDEX_op_extu_i32_i64: 3701 case INDEX_op_bswap16_i64: 3702 case INDEX_op_bswap32_i64: 3703 case INDEX_op_bswap64_i64: 3704 case INDEX_op_extract_i64: 3705 return C_O1_I1(r, r); 3706 3707 case INDEX_op_st8_i32: 3708 case INDEX_op_st16_i32: 3709 case INDEX_op_st_i32: 3710 case INDEX_op_st8_i64: 3711 case INDEX_op_st16_i64: 3712 case INDEX_op_st32_i64: 3713 case INDEX_op_st_i64: 3714 return C_O0_I2(r, r); 3715 3716 case INDEX_op_add_i32: 3717 case INDEX_op_and_i32: 3718 case INDEX_op_or_i32: 3719 case INDEX_op_xor_i32: 3720 case INDEX_op_andc_i32: 3721 case INDEX_op_orc_i32: 3722 case INDEX_op_eqv_i32: 3723 case INDEX_op_shl_i32: 3724 case INDEX_op_shr_i32: 3725 case INDEX_op_sar_i32: 3726 case INDEX_op_rotl_i32: 3727 case INDEX_op_rotr_i32: 3728 case INDEX_op_setcond_i32: 3729 case INDEX_op_and_i64: 3730 case INDEX_op_andc_i64: 3731 case INDEX_op_shl_i64: 3732 case INDEX_op_shr_i64: 3733 case INDEX_op_sar_i64: 3734 case INDEX_op_rotl_i64: 3735 case INDEX_op_rotr_i64: 3736 case INDEX_op_setcond_i64: 3737 return C_O1_I2(r, r, ri); 3738 3739 case INDEX_op_mul_i32: 3740 case INDEX_op_mul_i64: 3741 return C_O1_I2(r, r, rI); 3742 3743 case INDEX_op_div_i32: 3744 case INDEX_op_divu_i32: 3745 case INDEX_op_rem_i32: 3746 case INDEX_op_remu_i32: 3747 case INDEX_op_nand_i32: 3748 case INDEX_op_nor_i32: 3749 case INDEX_op_muluh_i32: 3750 case INDEX_op_mulsh_i32: 3751 case INDEX_op_orc_i64: 3752 case INDEX_op_eqv_i64: 3753 case INDEX_op_nand_i64: 3754 case INDEX_op_nor_i64: 3755 case INDEX_op_div_i64: 3756 case INDEX_op_divu_i64: 3757 case INDEX_op_rem_i64: 3758 case INDEX_op_remu_i64: 3759 case INDEX_op_mulsh_i64: 3760 case INDEX_op_muluh_i64: 3761 return C_O1_I2(r, r, r); 3762 3763 case INDEX_op_sub_i32: 3764 return C_O1_I2(r, rI, ri); 3765 case INDEX_op_add_i64: 3766 return C_O1_I2(r, r, rT); 3767 case INDEX_op_or_i64: 3768 case INDEX_op_xor_i64: 3769 return C_O1_I2(r, r, rU); 3770 case INDEX_op_sub_i64: 3771 return C_O1_I2(r, rI, rT); 3772 case INDEX_op_clz_i32: 3773 case INDEX_op_ctz_i32: 3774 case INDEX_op_clz_i64: 3775 case INDEX_op_ctz_i64: 3776 return C_O1_I2(r, r, rZW); 3777 3778 case INDEX_op_brcond_i32: 3779 case INDEX_op_brcond_i64: 3780 return C_O0_I2(r, ri); 3781 3782 case INDEX_op_movcond_i32: 3783 case INDEX_op_movcond_i64: 3784 return C_O1_I4(r, r, ri, rZ, rZ); 3785 case INDEX_op_deposit_i32: 3786 case INDEX_op_deposit_i64: 3787 return C_O1_I2(r, 0, rZ); 3788 case INDEX_op_brcond2_i32: 3789 return C_O0_I4(r, r, ri, ri); 3790 case INDEX_op_setcond2_i32: 3791 return C_O1_I4(r, r, r, ri, ri); 3792 case INDEX_op_add2_i64: 3793 case INDEX_op_add2_i32: 3794 return C_O2_I4(r, r, r, r, rI, rZM); 3795 case INDEX_op_sub2_i64: 3796 case INDEX_op_sub2_i32: 3797 return C_O2_I4(r, r, rI, rZM, r, r); 3798 3799 case INDEX_op_qemu_ld_a32_i32: 3800 return C_O1_I1(r, r); 3801 case INDEX_op_qemu_ld_a64_i32: 3802 return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r); 3803 case INDEX_op_qemu_ld_a32_i64: 3804 return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r); 3805 case INDEX_op_qemu_ld_a64_i64: 3806 return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r); 3807 3808 case INDEX_op_qemu_st_a32_i32: 3809 return C_O0_I2(r, r); 3810 case INDEX_op_qemu_st_a64_i32: 3811 return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r); 3812 case INDEX_op_qemu_st_a32_i64: 3813 return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r); 3814 case INDEX_op_qemu_st_a64_i64: 3815 return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r); 3816 3817 case INDEX_op_qemu_ld_a32_i128: 3818 case INDEX_op_qemu_ld_a64_i128: 3819 return C_O2_I1(o, m, r); 3820 case INDEX_op_qemu_st_a32_i128: 3821 case INDEX_op_qemu_st_a64_i128: 3822 return C_O0_I3(o, m, r); 3823 3824 case INDEX_op_add_vec: 3825 case INDEX_op_sub_vec: 3826 case INDEX_op_mul_vec: 3827 case INDEX_op_and_vec: 3828 case INDEX_op_or_vec: 3829 case INDEX_op_xor_vec: 3830 case INDEX_op_andc_vec: 3831 case INDEX_op_orc_vec: 3832 case INDEX_op_nor_vec: 3833 case INDEX_op_eqv_vec: 3834 case INDEX_op_nand_vec: 3835 case INDEX_op_cmp_vec: 3836 case INDEX_op_ssadd_vec: 3837 case INDEX_op_sssub_vec: 3838 case INDEX_op_usadd_vec: 3839 case INDEX_op_ussub_vec: 3840 case INDEX_op_smax_vec: 3841 case INDEX_op_smin_vec: 3842 case INDEX_op_umax_vec: 3843 case INDEX_op_umin_vec: 3844 case INDEX_op_shlv_vec: 3845 case INDEX_op_shrv_vec: 3846 case INDEX_op_sarv_vec: 3847 case INDEX_op_rotlv_vec: 3848 case INDEX_op_rotrv_vec: 3849 case INDEX_op_ppc_mrgh_vec: 3850 case INDEX_op_ppc_mrgl_vec: 3851 case INDEX_op_ppc_muleu_vec: 3852 case INDEX_op_ppc_mulou_vec: 3853 case INDEX_op_ppc_pkum_vec: 3854 case INDEX_op_dup2_vec: 3855 return C_O1_I2(v, v, v); 3856 3857 case INDEX_op_not_vec: 3858 case INDEX_op_neg_vec: 3859 return C_O1_I1(v, v); 3860 3861 case INDEX_op_dup_vec: 3862 return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v); 3863 3864 case INDEX_op_ld_vec: 3865 case INDEX_op_dupm_vec: 3866 return C_O1_I1(v, r); 3867 3868 case INDEX_op_st_vec: 3869 return C_O0_I2(v, r); 3870 3871 case INDEX_op_bitsel_vec: 3872 case INDEX_op_ppc_msum_vec: 3873 return C_O1_I3(v, v, v, v); 3874 3875 default: 3876 g_assert_not_reached(); 3877 } 3878} 3879 3880static void tcg_target_init(TCGContext *s) 3881{ 3882 unsigned long hwcap = qemu_getauxval(AT_HWCAP); 3883 unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2); 3884 3885 have_isa = tcg_isa_base; 3886 if (hwcap & PPC_FEATURE_ARCH_2_06) { 3887 have_isa = tcg_isa_2_06; 3888 } 3889#ifdef PPC_FEATURE2_ARCH_2_07 3890 if (hwcap2 & PPC_FEATURE2_ARCH_2_07) { 3891 have_isa = tcg_isa_2_07; 3892 } 3893#endif 3894#ifdef PPC_FEATURE2_ARCH_3_00 3895 if (hwcap2 & PPC_FEATURE2_ARCH_3_00) { 3896 have_isa = tcg_isa_3_00; 3897 } 3898#endif 3899#ifdef PPC_FEATURE2_ARCH_3_10 3900 if (hwcap2 & PPC_FEATURE2_ARCH_3_10) { 3901 have_isa = tcg_isa_3_10; 3902 } 3903#endif 3904 3905#ifdef PPC_FEATURE2_HAS_ISEL 3906 /* Prefer explicit instruction from the kernel. */ 3907 have_isel = (hwcap2 & PPC_FEATURE2_HAS_ISEL) != 0; 3908#else 3909 /* Fall back to knowing Power7 (2.06) has ISEL. */ 3910 have_isel = have_isa_2_06; 3911#endif 3912 3913 if (hwcap & PPC_FEATURE_HAS_ALTIVEC) { 3914 have_altivec = true; 3915 /* We only care about the portion of VSX that overlaps Altivec. */ 3916 if (hwcap & PPC_FEATURE_HAS_VSX) { 3917 have_vsx = true; 3918 } 3919 } 3920 3921 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; 3922 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; 3923 if (have_altivec) { 3924 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 3925 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 3926 } 3927 3928 tcg_target_call_clobber_regs = 0; 3929 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0); 3930 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2); 3931 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3); 3932 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4); 3933 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5); 3934 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6); 3935 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7); 3936 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); 3937 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); 3938 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); 3939 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); 3940 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12); 3941 3942 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0); 3943 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1); 3944 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2); 3945 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3); 3946 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4); 3947 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5); 3948 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6); 3949 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7); 3950 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 3951 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 3952 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 3953 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 3954 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 3955 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 3956 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 3957 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 3958 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16); 3959 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17); 3960 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18); 3961 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19); 3962 3963 s->reserved_regs = 0; 3964 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */ 3965 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */ 3966#if defined(_CALL_SYSV) 3967 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */ 3968#endif 3969#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64 3970 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ 3971#endif 3972 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); 3973 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); 3974 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1); 3975 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2); 3976 if (USE_REG_TB) { 3977 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); /* tb->tc_ptr */ 3978 } 3979} 3980 3981#ifdef __ELF__ 3982typedef struct { 3983 DebugFrameCIE cie; 3984 DebugFrameFDEHeader fde; 3985 uint8_t fde_def_cfa[4]; 3986 uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3]; 3987} DebugFrame; 3988 3989/* We're expecting a 2 byte uleb128 encoded value. */ 3990QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 3991 3992#if TCG_TARGET_REG_BITS == 64 3993# define ELF_HOST_MACHINE EM_PPC64 3994#else 3995# define ELF_HOST_MACHINE EM_PPC 3996#endif 3997 3998static DebugFrame debug_frame = { 3999 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 4000 .cie.id = -1, 4001 .cie.version = 1, 4002 .cie.code_align = 1, 4003 .cie.data_align = (-SZR & 0x7f), /* sleb128 -SZR */ 4004 .cie.return_column = 65, 4005 4006 /* Total FDE size does not include the "len" member. */ 4007 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), 4008 4009 .fde_def_cfa = { 4010 12, TCG_REG_R1, /* DW_CFA_def_cfa r1, ... */ 4011 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 4012 (FRAME_SIZE >> 7) 4013 }, 4014 .fde_reg_ofs = { 4015 /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */ 4016 0x11, 65, (LR_OFFSET / -SZR) & 0x7f, 4017 } 4018}; 4019 4020void tcg_register_jit(const void *buf, size_t buf_size) 4021{ 4022 uint8_t *p = &debug_frame.fde_reg_ofs[3]; 4023 int i; 4024 4025 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) { 4026 p[0] = 0x80 + tcg_target_callee_save_regs[i]; 4027 p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR; 4028 } 4029 4030 debug_frame.fde.func_start = (uintptr_t)buf; 4031 debug_frame.fde.func_len = buf_size; 4032 4033 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 4034} 4035#endif /* __ELF__ */ 4036#undef VMULEUB 4037#undef VMULEUH 4038#undef VMULEUW 4039#undef VMULOUB 4040#undef VMULOUH 4041#undef VMULOUW 4042#undef VMSUMUHM 4043