1/* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25#include "elf.h" 26#include "../tcg-pool.c.inc" 27#include "../tcg-ldst.c.inc" 28 29/* 30 * Standardize on the _CALL_FOO symbols used by GCC: 31 * Apple XCode does not define _CALL_DARWIN. 32 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV (32-bit). 33 */ 34#if !defined(_CALL_SYSV) && \ 35 !defined(_CALL_DARWIN) && \ 36 !defined(_CALL_AIX) && \ 37 !defined(_CALL_ELF) 38# if defined(__APPLE__) 39# define _CALL_DARWIN 40# elif defined(__ELF__) && TCG_TARGET_REG_BITS == 32 41# define _CALL_SYSV 42# else 43# error "Unknown ABI" 44# endif 45#endif 46 47#if TCG_TARGET_REG_BITS == 64 48# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND 49# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL 50#else 51# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL 52# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF 53#endif 54#ifdef _CALL_SYSV 55# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN 56# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF 57#else 58# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL 59# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL 60#endif 61 62/* For some memory operations, we need a scratch that isn't R0. For the AIX 63 calling convention, we can re-use the TOC register since we'll be reloading 64 it at every call. Otherwise R12 will do nicely as neither a call-saved 65 register nor a parameter register. */ 66#ifdef _CALL_AIX 67# define TCG_REG_TMP1 TCG_REG_R2 68#else 69# define TCG_REG_TMP1 TCG_REG_R12 70#endif 71#define TCG_REG_TMP2 TCG_REG_R11 72 73#define TCG_VEC_TMP1 TCG_REG_V0 74#define TCG_VEC_TMP2 TCG_REG_V1 75 76#define TCG_REG_TB TCG_REG_R31 77#define USE_REG_TB (TCG_TARGET_REG_BITS == 64) 78 79/* Shorthand for size of a pointer. Avoid promotion to unsigned. */ 80#define SZP ((int)sizeof(void *)) 81 82/* Shorthand for size of a register. */ 83#define SZR (TCG_TARGET_REG_BITS / 8) 84 85#define TCG_CT_CONST_S16 0x100 86#define TCG_CT_CONST_S32 0x400 87#define TCG_CT_CONST_U32 0x800 88#define TCG_CT_CONST_ZERO 0x1000 89#define TCG_CT_CONST_MONE 0x2000 90#define TCG_CT_CONST_WSZ 0x4000 91 92#define ALL_GENERAL_REGS 0xffffffffu 93#define ALL_VECTOR_REGS 0xffffffff00000000ull 94 95TCGPowerISA have_isa; 96static bool have_isel; 97bool have_altivec; 98bool have_vsx; 99 100#ifndef CONFIG_SOFTMMU 101#define TCG_GUEST_BASE_REG 30 102#endif 103 104#ifdef CONFIG_DEBUG_TCG 105static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = { 106 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", 107 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", 108 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", 109 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", 110 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 111 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 112 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 113 "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", 114}; 115#endif 116 117static const int tcg_target_reg_alloc_order[] = { 118 TCG_REG_R14, /* call saved registers */ 119 TCG_REG_R15, 120 TCG_REG_R16, 121 TCG_REG_R17, 122 TCG_REG_R18, 123 TCG_REG_R19, 124 TCG_REG_R20, 125 TCG_REG_R21, 126 TCG_REG_R22, 127 TCG_REG_R23, 128 TCG_REG_R24, 129 TCG_REG_R25, 130 TCG_REG_R26, 131 TCG_REG_R27, 132 TCG_REG_R28, 133 TCG_REG_R29, 134 TCG_REG_R30, 135 TCG_REG_R31, 136 TCG_REG_R12, /* call clobbered, non-arguments */ 137 TCG_REG_R11, 138 TCG_REG_R2, 139 TCG_REG_R13, 140 TCG_REG_R10, /* call clobbered, arguments */ 141 TCG_REG_R9, 142 TCG_REG_R8, 143 TCG_REG_R7, 144 TCG_REG_R6, 145 TCG_REG_R5, 146 TCG_REG_R4, 147 TCG_REG_R3, 148 149 /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */ 150 TCG_REG_V2, /* call clobbered, vectors */ 151 TCG_REG_V3, 152 TCG_REG_V4, 153 TCG_REG_V5, 154 TCG_REG_V6, 155 TCG_REG_V7, 156 TCG_REG_V8, 157 TCG_REG_V9, 158 TCG_REG_V10, 159 TCG_REG_V11, 160 TCG_REG_V12, 161 TCG_REG_V13, 162 TCG_REG_V14, 163 TCG_REG_V15, 164 TCG_REG_V16, 165 TCG_REG_V17, 166 TCG_REG_V18, 167 TCG_REG_V19, 168}; 169 170static const int tcg_target_call_iarg_regs[] = { 171 TCG_REG_R3, 172 TCG_REG_R4, 173 TCG_REG_R5, 174 TCG_REG_R6, 175 TCG_REG_R7, 176 TCG_REG_R8, 177 TCG_REG_R9, 178 TCG_REG_R10 179}; 180 181static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) 182{ 183 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); 184 tcg_debug_assert(slot >= 0 && slot <= 1); 185 return TCG_REG_R3 + slot; 186} 187 188static const int tcg_target_callee_save_regs[] = { 189#ifdef _CALL_DARWIN 190 TCG_REG_R11, 191#endif 192 TCG_REG_R14, 193 TCG_REG_R15, 194 TCG_REG_R16, 195 TCG_REG_R17, 196 TCG_REG_R18, 197 TCG_REG_R19, 198 TCG_REG_R20, 199 TCG_REG_R21, 200 TCG_REG_R22, 201 TCG_REG_R23, 202 TCG_REG_R24, 203 TCG_REG_R25, 204 TCG_REG_R26, 205 TCG_REG_R27, /* currently used for the global env */ 206 TCG_REG_R28, 207 TCG_REG_R29, 208 TCG_REG_R30, 209 TCG_REG_R31 210}; 211 212static inline bool in_range_b(tcg_target_long target) 213{ 214 return target == sextract64(target, 0, 26); 215} 216 217static uint32_t reloc_pc24_val(const tcg_insn_unit *pc, 218 const tcg_insn_unit *target) 219{ 220 ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); 221 tcg_debug_assert(in_range_b(disp)); 222 return disp & 0x3fffffc; 223} 224 225static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 226{ 227 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 228 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); 229 230 if (in_range_b(disp)) { 231 *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc); 232 return true; 233 } 234 return false; 235} 236 237static uint16_t reloc_pc14_val(const tcg_insn_unit *pc, 238 const tcg_insn_unit *target) 239{ 240 ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); 241 tcg_debug_assert(disp == (int16_t) disp); 242 return disp & 0xfffc; 243} 244 245static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 246{ 247 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 248 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); 249 250 if (disp == (int16_t) disp) { 251 *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc); 252 return true; 253 } 254 return false; 255} 256 257/* test if a constant matches the constraint */ 258static bool tcg_target_const_match(int64_t val, TCGType type, int ct) 259{ 260 if (ct & TCG_CT_CONST) { 261 return 1; 262 } 263 264 /* The only 32-bit constraint we use aside from 265 TCG_CT_CONST is TCG_CT_CONST_S16. */ 266 if (type == TCG_TYPE_I32) { 267 val = (int32_t)val; 268 } 269 270 if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { 271 return 1; 272 } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { 273 return 1; 274 } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { 275 return 1; 276 } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 277 return 1; 278 } else if ((ct & TCG_CT_CONST_MONE) && val == -1) { 279 return 1; 280 } else if ((ct & TCG_CT_CONST_WSZ) 281 && val == (type == TCG_TYPE_I32 ? 32 : 64)) { 282 return 1; 283 } 284 return 0; 285} 286 287#define OPCD(opc) ((opc)<<26) 288#define XO19(opc) (OPCD(19)|((opc)<<1)) 289#define MD30(opc) (OPCD(30)|((opc)<<2)) 290#define MDS30(opc) (OPCD(30)|((opc)<<1)) 291#define XO31(opc) (OPCD(31)|((opc)<<1)) 292#define XO58(opc) (OPCD(58)|(opc)) 293#define XO62(opc) (OPCD(62)|(opc)) 294#define VX4(opc) (OPCD(4)|(opc)) 295 296#define B OPCD( 18) 297#define BC OPCD( 16) 298 299#define LBZ OPCD( 34) 300#define LHZ OPCD( 40) 301#define LHA OPCD( 42) 302#define LWZ OPCD( 32) 303#define LWZUX XO31( 55) 304#define LD XO58( 0) 305#define LDX XO31( 21) 306#define LDU XO58( 1) 307#define LDUX XO31( 53) 308#define LWA XO58( 2) 309#define LWAX XO31(341) 310#define LQ OPCD( 56) 311 312#define STB OPCD( 38) 313#define STH OPCD( 44) 314#define STW OPCD( 36) 315#define STD XO62( 0) 316#define STDU XO62( 1) 317#define STDX XO31(149) 318#define STQ XO62( 2) 319 320#define ADDIC OPCD( 12) 321#define ADDI OPCD( 14) 322#define ADDIS OPCD( 15) 323#define ORI OPCD( 24) 324#define ORIS OPCD( 25) 325#define XORI OPCD( 26) 326#define XORIS OPCD( 27) 327#define ANDI OPCD( 28) 328#define ANDIS OPCD( 29) 329#define MULLI OPCD( 7) 330#define CMPLI OPCD( 10) 331#define CMPI OPCD( 11) 332#define SUBFIC OPCD( 8) 333 334#define LWZU OPCD( 33) 335#define STWU OPCD( 37) 336 337#define RLWIMI OPCD( 20) 338#define RLWINM OPCD( 21) 339#define RLWNM OPCD( 23) 340 341#define RLDICL MD30( 0) 342#define RLDICR MD30( 1) 343#define RLDIMI MD30( 3) 344#define RLDCL MDS30( 8) 345 346#define BCLR XO19( 16) 347#define BCCTR XO19(528) 348#define CRAND XO19(257) 349#define CRANDC XO19(129) 350#define CRNAND XO19(225) 351#define CROR XO19(449) 352#define CRNOR XO19( 33) 353 354#define EXTSB XO31(954) 355#define EXTSH XO31(922) 356#define EXTSW XO31(986) 357#define ADD XO31(266) 358#define ADDE XO31(138) 359#define ADDME XO31(234) 360#define ADDZE XO31(202) 361#define ADDC XO31( 10) 362#define AND XO31( 28) 363#define SUBF XO31( 40) 364#define SUBFC XO31( 8) 365#define SUBFE XO31(136) 366#define SUBFME XO31(232) 367#define SUBFZE XO31(200) 368#define OR XO31(444) 369#define XOR XO31(316) 370#define MULLW XO31(235) 371#define MULHW XO31( 75) 372#define MULHWU XO31( 11) 373#define DIVW XO31(491) 374#define DIVWU XO31(459) 375#define MODSW XO31(779) 376#define MODUW XO31(267) 377#define CMP XO31( 0) 378#define CMPL XO31( 32) 379#define LHBRX XO31(790) 380#define LWBRX XO31(534) 381#define LDBRX XO31(532) 382#define STHBRX XO31(918) 383#define STWBRX XO31(662) 384#define STDBRX XO31(660) 385#define MFSPR XO31(339) 386#define MTSPR XO31(467) 387#define SRAWI XO31(824) 388#define NEG XO31(104) 389#define MFCR XO31( 19) 390#define MFOCRF (MFCR | (1u << 20)) 391#define NOR XO31(124) 392#define CNTLZW XO31( 26) 393#define CNTLZD XO31( 58) 394#define CNTTZW XO31(538) 395#define CNTTZD XO31(570) 396#define CNTPOPW XO31(378) 397#define CNTPOPD XO31(506) 398#define ANDC XO31( 60) 399#define ORC XO31(412) 400#define EQV XO31(284) 401#define NAND XO31(476) 402#define ISEL XO31( 15) 403 404#define MULLD XO31(233) 405#define MULHD XO31( 73) 406#define MULHDU XO31( 9) 407#define DIVD XO31(489) 408#define DIVDU XO31(457) 409#define MODSD XO31(777) 410#define MODUD XO31(265) 411 412#define LBZX XO31( 87) 413#define LHZX XO31(279) 414#define LHAX XO31(343) 415#define LWZX XO31( 23) 416#define STBX XO31(215) 417#define STHX XO31(407) 418#define STWX XO31(151) 419 420#define EIEIO XO31(854) 421#define HWSYNC XO31(598) 422#define LWSYNC (HWSYNC | (1u << 21)) 423 424#define SPR(a, b) ((((a)<<5)|(b))<<11) 425#define LR SPR(8, 0) 426#define CTR SPR(9, 0) 427 428#define SLW XO31( 24) 429#define SRW XO31(536) 430#define SRAW XO31(792) 431 432#define SLD XO31( 27) 433#define SRD XO31(539) 434#define SRAD XO31(794) 435#define SRADI XO31(413<<1) 436 437#define BRH XO31(219) 438#define BRW XO31(155) 439#define BRD XO31(187) 440 441#define TW XO31( 4) 442#define TRAP (TW | TO(31)) 443 444#define NOP ORI /* ori 0,0,0 */ 445 446#define LVX XO31(103) 447#define LVEBX XO31(7) 448#define LVEHX XO31(39) 449#define LVEWX XO31(71) 450#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */ 451#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */ 452#define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */ 453#define LXV (OPCD(61) | 8 | 1) /* v3.00, force tx=1 */ 454#define LXSD (OPCD(57) | 2) /* v3.00 */ 455#define LXVWSX (XO31(364) | 1) /* v3.00, force tx=1 */ 456 457#define STVX XO31(231) 458#define STVEWX XO31(199) 459#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */ 460#define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */ 461#define STXV (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */ 462#define STXSD (OPCD(61) | 2) /* v3.00 */ 463 464#define VADDSBS VX4(768) 465#define VADDUBS VX4(512) 466#define VADDUBM VX4(0) 467#define VADDSHS VX4(832) 468#define VADDUHS VX4(576) 469#define VADDUHM VX4(64) 470#define VADDSWS VX4(896) 471#define VADDUWS VX4(640) 472#define VADDUWM VX4(128) 473#define VADDUDM VX4(192) /* v2.07 */ 474 475#define VSUBSBS VX4(1792) 476#define VSUBUBS VX4(1536) 477#define VSUBUBM VX4(1024) 478#define VSUBSHS VX4(1856) 479#define VSUBUHS VX4(1600) 480#define VSUBUHM VX4(1088) 481#define VSUBSWS VX4(1920) 482#define VSUBUWS VX4(1664) 483#define VSUBUWM VX4(1152) 484#define VSUBUDM VX4(1216) /* v2.07 */ 485 486#define VNEGW (VX4(1538) | (6 << 16)) /* v3.00 */ 487#define VNEGD (VX4(1538) | (7 << 16)) /* v3.00 */ 488 489#define VMAXSB VX4(258) 490#define VMAXSH VX4(322) 491#define VMAXSW VX4(386) 492#define VMAXSD VX4(450) /* v2.07 */ 493#define VMAXUB VX4(2) 494#define VMAXUH VX4(66) 495#define VMAXUW VX4(130) 496#define VMAXUD VX4(194) /* v2.07 */ 497#define VMINSB VX4(770) 498#define VMINSH VX4(834) 499#define VMINSW VX4(898) 500#define VMINSD VX4(962) /* v2.07 */ 501#define VMINUB VX4(514) 502#define VMINUH VX4(578) 503#define VMINUW VX4(642) 504#define VMINUD VX4(706) /* v2.07 */ 505 506#define VCMPEQUB VX4(6) 507#define VCMPEQUH VX4(70) 508#define VCMPEQUW VX4(134) 509#define VCMPEQUD VX4(199) /* v2.07 */ 510#define VCMPGTSB VX4(774) 511#define VCMPGTSH VX4(838) 512#define VCMPGTSW VX4(902) 513#define VCMPGTSD VX4(967) /* v2.07 */ 514#define VCMPGTUB VX4(518) 515#define VCMPGTUH VX4(582) 516#define VCMPGTUW VX4(646) 517#define VCMPGTUD VX4(711) /* v2.07 */ 518#define VCMPNEB VX4(7) /* v3.00 */ 519#define VCMPNEH VX4(71) /* v3.00 */ 520#define VCMPNEW VX4(135) /* v3.00 */ 521 522#define VSLB VX4(260) 523#define VSLH VX4(324) 524#define VSLW VX4(388) 525#define VSLD VX4(1476) /* v2.07 */ 526#define VSRB VX4(516) 527#define VSRH VX4(580) 528#define VSRW VX4(644) 529#define VSRD VX4(1732) /* v2.07 */ 530#define VSRAB VX4(772) 531#define VSRAH VX4(836) 532#define VSRAW VX4(900) 533#define VSRAD VX4(964) /* v2.07 */ 534#define VRLB VX4(4) 535#define VRLH VX4(68) 536#define VRLW VX4(132) 537#define VRLD VX4(196) /* v2.07 */ 538 539#define VMULEUB VX4(520) 540#define VMULEUH VX4(584) 541#define VMULEUW VX4(648) /* v2.07 */ 542#define VMULOUB VX4(8) 543#define VMULOUH VX4(72) 544#define VMULOUW VX4(136) /* v2.07 */ 545#define VMULUWM VX4(137) /* v2.07 */ 546#define VMULLD VX4(457) /* v3.10 */ 547#define VMSUMUHM VX4(38) 548 549#define VMRGHB VX4(12) 550#define VMRGHH VX4(76) 551#define VMRGHW VX4(140) 552#define VMRGLB VX4(268) 553#define VMRGLH VX4(332) 554#define VMRGLW VX4(396) 555 556#define VPKUHUM VX4(14) 557#define VPKUWUM VX4(78) 558 559#define VAND VX4(1028) 560#define VANDC VX4(1092) 561#define VNOR VX4(1284) 562#define VOR VX4(1156) 563#define VXOR VX4(1220) 564#define VEQV VX4(1668) /* v2.07 */ 565#define VNAND VX4(1412) /* v2.07 */ 566#define VORC VX4(1348) /* v2.07 */ 567 568#define VSPLTB VX4(524) 569#define VSPLTH VX4(588) 570#define VSPLTW VX4(652) 571#define VSPLTISB VX4(780) 572#define VSPLTISH VX4(844) 573#define VSPLTISW VX4(908) 574 575#define VSLDOI VX4(44) 576 577#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */ 578#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */ 579#define XXSPLTIB (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */ 580 581#define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */ 582#define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */ 583#define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */ 584#define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */ 585#define MTVSRDD (XO31(435) | 1) /* v3.00, force tx=1 */ 586#define MTVSRWS (XO31(403) | 1) /* v3.00, force tx=1 */ 587 588#define RT(r) ((r)<<21) 589#define RS(r) ((r)<<21) 590#define RA(r) ((r)<<16) 591#define RB(r) ((r)<<11) 592#define TO(t) ((t)<<21) 593#define SH(s) ((s)<<11) 594#define MB(b) ((b)<<6) 595#define ME(e) ((e)<<1) 596#define BO(o) ((o)<<21) 597#define MB64(b) ((b)<<5) 598#define FXM(b) (1 << (19 - (b))) 599 600#define VRT(r) (((r) & 31) << 21) 601#define VRA(r) (((r) & 31) << 16) 602#define VRB(r) (((r) & 31) << 11) 603#define VRC(r) (((r) & 31) << 6) 604 605#define LK 1 606 607#define TAB(t, a, b) (RT(t) | RA(a) | RB(b)) 608#define SAB(s, a, b) (RS(s) | RA(a) | RB(b)) 609#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff)) 610#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff)) 611 612#define BF(n) ((n)<<23) 613#define BI(n, c) (((c)+((n)*4))<<16) 614#define BT(n, c) (((c)+((n)*4))<<21) 615#define BA(n, c) (((c)+((n)*4))<<16) 616#define BB(n, c) (((c)+((n)*4))<<11) 617#define BC_(n, c) (((c)+((n)*4))<<6) 618 619#define BO_COND_TRUE BO(12) 620#define BO_COND_FALSE BO( 4) 621#define BO_ALWAYS BO(20) 622 623enum { 624 CR_LT, 625 CR_GT, 626 CR_EQ, 627 CR_SO 628}; 629 630static const uint32_t tcg_to_bc[] = { 631 [TCG_COND_EQ] = BC | BI(7, CR_EQ) | BO_COND_TRUE, 632 [TCG_COND_NE] = BC | BI(7, CR_EQ) | BO_COND_FALSE, 633 [TCG_COND_LT] = BC | BI(7, CR_LT) | BO_COND_TRUE, 634 [TCG_COND_GE] = BC | BI(7, CR_LT) | BO_COND_FALSE, 635 [TCG_COND_LE] = BC | BI(7, CR_GT) | BO_COND_FALSE, 636 [TCG_COND_GT] = BC | BI(7, CR_GT) | BO_COND_TRUE, 637 [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE, 638 [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE, 639 [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE, 640 [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE, 641}; 642 643/* The low bit here is set if the RA and RB fields must be inverted. */ 644static const uint32_t tcg_to_isel[] = { 645 [TCG_COND_EQ] = ISEL | BC_(7, CR_EQ), 646 [TCG_COND_NE] = ISEL | BC_(7, CR_EQ) | 1, 647 [TCG_COND_LT] = ISEL | BC_(7, CR_LT), 648 [TCG_COND_GE] = ISEL | BC_(7, CR_LT) | 1, 649 [TCG_COND_LE] = ISEL | BC_(7, CR_GT) | 1, 650 [TCG_COND_GT] = ISEL | BC_(7, CR_GT), 651 [TCG_COND_LTU] = ISEL | BC_(7, CR_LT), 652 [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1, 653 [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1, 654 [TCG_COND_GTU] = ISEL | BC_(7, CR_GT), 655}; 656 657static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 658 intptr_t value, intptr_t addend) 659{ 660 const tcg_insn_unit *target; 661 int16_t lo; 662 int32_t hi; 663 664 value += addend; 665 target = (const tcg_insn_unit *)value; 666 667 switch (type) { 668 case R_PPC_REL14: 669 return reloc_pc14(code_ptr, target); 670 case R_PPC_REL24: 671 return reloc_pc24(code_ptr, target); 672 case R_PPC_ADDR16: 673 /* 674 * We are (slightly) abusing this relocation type. In particular, 675 * assert that the low 2 bits are zero, and do not modify them. 676 * That way we can use this with LD et al that have opcode bits 677 * in the low 2 bits of the insn. 678 */ 679 if ((value & 3) || value != (int16_t)value) { 680 return false; 681 } 682 *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc); 683 break; 684 case R_PPC_ADDR32: 685 /* 686 * We are abusing this relocation type. Again, this points to 687 * a pair of insns, lis + load. This is an absolute address 688 * relocation for PPC32 so the lis cannot be removed. 689 */ 690 lo = value; 691 hi = value - lo; 692 if (hi + lo != value) { 693 return false; 694 } 695 code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16); 696 code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo); 697 break; 698 default: 699 g_assert_not_reached(); 700 } 701 return true; 702} 703 704static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, 705 TCGReg base, tcg_target_long offset); 706 707static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 708{ 709 if (ret == arg) { 710 return true; 711 } 712 switch (type) { 713 case TCG_TYPE_I64: 714 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 715 /* fallthru */ 716 case TCG_TYPE_I32: 717 if (ret < TCG_REG_V0) { 718 if (arg < TCG_REG_V0) { 719 tcg_out32(s, OR | SAB(arg, ret, arg)); 720 break; 721 } else if (have_isa_2_07) { 722 tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD) 723 | VRT(arg) | RA(ret)); 724 break; 725 } else { 726 /* Altivec does not support vector->integer moves. */ 727 return false; 728 } 729 } else if (arg < TCG_REG_V0) { 730 if (have_isa_2_07) { 731 tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD) 732 | VRT(ret) | RA(arg)); 733 break; 734 } else { 735 /* Altivec does not support integer->vector moves. */ 736 return false; 737 } 738 } 739 /* fallthru */ 740 case TCG_TYPE_V64: 741 case TCG_TYPE_V128: 742 tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0); 743 tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg)); 744 break; 745 default: 746 g_assert_not_reached(); 747 } 748 return true; 749} 750 751static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs, 752 int sh, int mb) 753{ 754 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 755 sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1); 756 mb = MB64((mb >> 5) | ((mb << 1) & 0x3f)); 757 tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb); 758} 759 760static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs, 761 int sh, int mb, int me) 762{ 763 tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me)); 764} 765 766static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) 767{ 768 tcg_out32(s, EXTSB | RA(dst) | RS(src)); 769} 770 771static void tcg_out_ext8u(TCGContext *s, TCGReg dst, TCGReg src) 772{ 773 tcg_out32(s, ANDI | SAI(src, dst, 0xff)); 774} 775 776static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) 777{ 778 tcg_out32(s, EXTSH | RA(dst) | RS(src)); 779} 780 781static void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src) 782{ 783 tcg_out32(s, ANDI | SAI(src, dst, 0xffff)); 784} 785 786static void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src) 787{ 788 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 789 tcg_out32(s, EXTSW | RA(dst) | RS(src)); 790} 791 792static void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src) 793{ 794 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 795 tcg_out_rld(s, RLDICL, dst, src, 0, 32); 796} 797 798static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dst, TCGReg src) 799{ 800 tcg_out_ext32s(s, dst, src); 801} 802 803static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dst, TCGReg src) 804{ 805 tcg_out_ext32u(s, dst, src); 806} 807 808static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) 809{ 810 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 811 tcg_out_mov(s, TCG_TYPE_I32, rd, rn); 812} 813 814static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c) 815{ 816 tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c); 817} 818 819static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c) 820{ 821 tcg_out_rld(s, RLDICR, dst, src, c, 63 - c); 822} 823 824static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c) 825{ 826 /* Limit immediate shift count lest we create an illegal insn. */ 827 tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31)); 828} 829 830static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c) 831{ 832 tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31); 833} 834 835static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c) 836{ 837 tcg_out_rld(s, RLDICL, dst, src, 64 - c, c); 838} 839 840static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c) 841{ 842 tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2)); 843} 844 845static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags) 846{ 847 TCGReg tmp = dst == src ? TCG_REG_R0 : dst; 848 849 if (have_isa_3_10) { 850 tcg_out32(s, BRH | RA(dst) | RS(src)); 851 if (flags & TCG_BSWAP_OS) { 852 tcg_out_ext16s(s, TCG_TYPE_REG, dst, dst); 853 } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 854 tcg_out_ext16u(s, dst, dst); 855 } 856 return; 857 } 858 859 /* 860 * In the following, 861 * dep(a, b, m) -> (a & ~m) | (b & m) 862 * 863 * Begin with: src = xxxxabcd 864 */ 865 /* tmp = rol32(src, 24) & 0x000000ff = 0000000c */ 866 tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31); 867 /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00) = 000000dc */ 868 tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23); 869 870 if (flags & TCG_BSWAP_OS) { 871 tcg_out_ext16s(s, TCG_TYPE_REG, dst, tmp); 872 } else { 873 tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); 874 } 875} 876 877static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags) 878{ 879 TCGReg tmp = dst == src ? TCG_REG_R0 : dst; 880 881 if (have_isa_3_10) { 882 tcg_out32(s, BRW | RA(dst) | RS(src)); 883 if (flags & TCG_BSWAP_OS) { 884 tcg_out_ext32s(s, dst, dst); 885 } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 886 tcg_out_ext32u(s, dst, dst); 887 } 888 return; 889 } 890 891 /* 892 * Stolen from gcc's builtin_bswap32. 893 * In the following, 894 * dep(a, b, m) -> (a & ~m) | (b & m) 895 * 896 * Begin with: src = xxxxabcd 897 */ 898 /* tmp = rol32(src, 8) & 0xffffffff = 0000bcda */ 899 tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31); 900 /* tmp = dep(tmp, rol32(src, 24), 0xff000000) = 0000dcda */ 901 tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7); 902 /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00) = 0000dcba */ 903 tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23); 904 905 if (flags & TCG_BSWAP_OS) { 906 tcg_out_ext32s(s, dst, tmp); 907 } else { 908 tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); 909 } 910} 911 912static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src) 913{ 914 TCGReg t0 = dst == src ? TCG_REG_R0 : dst; 915 TCGReg t1 = dst == src ? dst : TCG_REG_R0; 916 917 if (have_isa_3_10) { 918 tcg_out32(s, BRD | RA(dst) | RS(src)); 919 return; 920 } 921 922 /* 923 * In the following, 924 * dep(a, b, m) -> (a & ~m) | (b & m) 925 * 926 * Begin with: src = abcdefgh 927 */ 928 /* t0 = rol32(src, 8) & 0xffffffff = 0000fghe */ 929 tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31); 930 /* t0 = dep(t0, rol32(src, 24), 0xff000000) = 0000hghe */ 931 tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7); 932 /* t0 = dep(t0, rol32(src, 24), 0x0000ff00) = 0000hgfe */ 933 tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23); 934 935 /* t0 = rol64(t0, 32) = hgfe0000 */ 936 tcg_out_rld(s, RLDICL, t0, t0, 32, 0); 937 /* t1 = rol64(src, 32) = efghabcd */ 938 tcg_out_rld(s, RLDICL, t1, src, 32, 0); 939 940 /* t0 = dep(t0, rol32(t1, 24), 0xffffffff) = hgfebcda */ 941 tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31); 942 /* t0 = dep(t0, rol32(t1, 24), 0xff000000) = hgfedcda */ 943 tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7); 944 /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00) = hgfedcba */ 945 tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23); 946 947 tcg_out_mov(s, TCG_TYPE_REG, dst, t0); 948} 949 950/* Emit a move into ret of arg, if it can be done in one insn. */ 951static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg) 952{ 953 if (arg == (int16_t)arg) { 954 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 955 return true; 956 } 957 if (arg == (int32_t)arg && (arg & 0xffff) == 0) { 958 tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); 959 return true; 960 } 961 return false; 962} 963 964static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, 965 tcg_target_long arg, bool in_prologue) 966{ 967 intptr_t tb_diff; 968 tcg_target_long tmp; 969 int shift; 970 971 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 972 973 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { 974 arg = (int32_t)arg; 975 } 976 977 /* Load 16-bit immediates with one insn. */ 978 if (tcg_out_movi_one(s, ret, arg)) { 979 return; 980 } 981 982 /* Load addresses within the TB with one insn. */ 983 tb_diff = tcg_tbrel_diff(s, (void *)arg); 984 if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) { 985 tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff)); 986 return; 987 } 988 989 /* Load 32-bit immediates with two insns. Note that we've already 990 eliminated bare ADDIS, so we know both insns are required. */ 991 if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) { 992 tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); 993 tcg_out32(s, ORI | SAI(ret, ret, arg)); 994 return; 995 } 996 if (arg == (uint32_t)arg && !(arg & 0x8000)) { 997 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 998 tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); 999 return; 1000 } 1001 1002 /* Load masked 16-bit value. */ 1003 if (arg > 0 && (arg & 0x8000)) { 1004 tmp = arg | 0x7fff; 1005 if ((tmp & (tmp + 1)) == 0) { 1006 int mb = clz64(tmp + 1) + 1; 1007 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 1008 tcg_out_rld(s, RLDICL, ret, ret, 0, mb); 1009 return; 1010 } 1011 } 1012 1013 /* Load common masks with 2 insns. */ 1014 shift = ctz64(arg); 1015 tmp = arg >> shift; 1016 if (tmp == (int16_t)tmp) { 1017 tcg_out32(s, ADDI | TAI(ret, 0, tmp)); 1018 tcg_out_shli64(s, ret, ret, shift); 1019 return; 1020 } 1021 shift = clz64(arg); 1022 if (tcg_out_movi_one(s, ret, arg << shift)) { 1023 tcg_out_shri64(s, ret, ret, shift); 1024 return; 1025 } 1026 1027 /* Load addresses within 2GB of TB with 2 (or rarely 3) insns. */ 1028 if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) { 1029 tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff); 1030 return; 1031 } 1032 1033 /* Use the constant pool, if possible. */ 1034 if (!in_prologue && USE_REG_TB) { 1035 new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr, 1036 tcg_tbrel_diff(s, NULL)); 1037 tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0)); 1038 return; 1039 } 1040 1041 tmp = arg >> 31 >> 1; 1042 tcg_out_movi(s, TCG_TYPE_I32, ret, tmp); 1043 if (tmp) { 1044 tcg_out_shli64(s, ret, ret, 32); 1045 } 1046 if (arg & 0xffff0000) { 1047 tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); 1048 } 1049 if (arg & 0xffff) { 1050 tcg_out32(s, ORI | SAI(ret, ret, arg)); 1051 } 1052} 1053 1054static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 1055 TCGReg ret, int64_t val) 1056{ 1057 uint32_t load_insn; 1058 int rel, low; 1059 intptr_t add; 1060 1061 switch (vece) { 1062 case MO_8: 1063 low = (int8_t)val; 1064 if (low >= -16 && low < 16) { 1065 tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16)); 1066 return; 1067 } 1068 if (have_isa_3_00) { 1069 tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11)); 1070 return; 1071 } 1072 break; 1073 1074 case MO_16: 1075 low = (int16_t)val; 1076 if (low >= -16 && low < 16) { 1077 tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16)); 1078 return; 1079 } 1080 break; 1081 1082 case MO_32: 1083 low = (int32_t)val; 1084 if (low >= -16 && low < 16) { 1085 tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16)); 1086 return; 1087 } 1088 break; 1089 } 1090 1091 /* 1092 * Otherwise we must load the value from the constant pool. 1093 */ 1094 if (USE_REG_TB) { 1095 rel = R_PPC_ADDR16; 1096 add = tcg_tbrel_diff(s, NULL); 1097 } else { 1098 rel = R_PPC_ADDR32; 1099 add = 0; 1100 } 1101 1102 if (have_vsx) { 1103 load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX; 1104 load_insn |= VRT(ret) | RB(TCG_REG_TMP1); 1105 if (TCG_TARGET_REG_BITS == 64) { 1106 new_pool_label(s, val, rel, s->code_ptr, add); 1107 } else { 1108 new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val); 1109 } 1110 } else { 1111 load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); 1112 if (TCG_TARGET_REG_BITS == 64) { 1113 new_pool_l2(s, rel, s->code_ptr, add, val, val); 1114 } else { 1115 new_pool_l4(s, rel, s->code_ptr, add, 1116 val >> 32, val, val >> 32, val); 1117 } 1118 } 1119 1120 if (USE_REG_TB) { 1121 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0)); 1122 load_insn |= RA(TCG_REG_TB); 1123 } else { 1124 tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0)); 1125 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0)); 1126 } 1127 tcg_out32(s, load_insn); 1128} 1129 1130static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, 1131 tcg_target_long arg) 1132{ 1133 switch (type) { 1134 case TCG_TYPE_I32: 1135 case TCG_TYPE_I64: 1136 tcg_debug_assert(ret < TCG_REG_V0); 1137 tcg_out_movi_int(s, type, ret, arg, false); 1138 break; 1139 1140 default: 1141 g_assert_not_reached(); 1142 } 1143} 1144 1145static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) 1146{ 1147 return false; 1148} 1149 1150static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, 1151 tcg_target_long imm) 1152{ 1153 /* This function is only used for passing structs by reference. */ 1154 g_assert_not_reached(); 1155} 1156 1157static bool mask_operand(uint32_t c, int *mb, int *me) 1158{ 1159 uint32_t lsb, test; 1160 1161 /* Accept a bit pattern like: 1162 0....01....1 1163 1....10....0 1164 0..01..10..0 1165 Keep track of the transitions. */ 1166 if (c == 0 || c == -1) { 1167 return false; 1168 } 1169 test = c; 1170 lsb = test & -test; 1171 test += lsb; 1172 if (test & (test - 1)) { 1173 return false; 1174 } 1175 1176 *me = clz32(lsb); 1177 *mb = test ? clz32(test & -test) + 1 : 0; 1178 return true; 1179} 1180 1181static bool mask64_operand(uint64_t c, int *mb, int *me) 1182{ 1183 uint64_t lsb; 1184 1185 if (c == 0) { 1186 return false; 1187 } 1188 1189 lsb = c & -c; 1190 /* Accept 1..10..0. */ 1191 if (c == -lsb) { 1192 *mb = 0; 1193 *me = clz64(lsb); 1194 return true; 1195 } 1196 /* Accept 0..01..1. */ 1197 if (lsb == 1 && (c & (c + 1)) == 0) { 1198 *mb = clz64(c + 1) + 1; 1199 *me = 63; 1200 return true; 1201 } 1202 return false; 1203} 1204 1205static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1206{ 1207 int mb, me; 1208 1209 if (mask_operand(c, &mb, &me)) { 1210 tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); 1211 } else if ((c & 0xffff) == c) { 1212 tcg_out32(s, ANDI | SAI(src, dst, c)); 1213 return; 1214 } else if ((c & 0xffff0000) == c) { 1215 tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); 1216 return; 1217 } else { 1218 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c); 1219 tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); 1220 } 1221} 1222 1223static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c) 1224{ 1225 int mb, me; 1226 1227 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1228 if (mask64_operand(c, &mb, &me)) { 1229 if (mb == 0) { 1230 tcg_out_rld(s, RLDICR, dst, src, 0, me); 1231 } else { 1232 tcg_out_rld(s, RLDICL, dst, src, 0, mb); 1233 } 1234 } else if ((c & 0xffff) == c) { 1235 tcg_out32(s, ANDI | SAI(src, dst, c)); 1236 return; 1237 } else if ((c & 0xffff0000) == c) { 1238 tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); 1239 return; 1240 } else { 1241 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c); 1242 tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); 1243 } 1244} 1245 1246static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c, 1247 int op_lo, int op_hi) 1248{ 1249 if (c >> 16) { 1250 tcg_out32(s, op_hi | SAI(src, dst, c >> 16)); 1251 src = dst; 1252 } 1253 if (c & 0xffff) { 1254 tcg_out32(s, op_lo | SAI(src, dst, c)); 1255 src = dst; 1256 } 1257} 1258 1259static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1260{ 1261 tcg_out_zori32(s, dst, src, c, ORI, ORIS); 1262} 1263 1264static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1265{ 1266 tcg_out_zori32(s, dst, src, c, XORI, XORIS); 1267} 1268 1269static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target) 1270{ 1271 ptrdiff_t disp = tcg_pcrel_diff(s, target); 1272 if (in_range_b(disp)) { 1273 tcg_out32(s, B | (disp & 0x3fffffc) | mask); 1274 } else { 1275 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target); 1276 tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR); 1277 tcg_out32(s, BCCTR | BO_ALWAYS | mask); 1278 } 1279} 1280 1281static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, 1282 TCGReg base, tcg_target_long offset) 1283{ 1284 tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; 1285 bool is_int_store = false; 1286 TCGReg rs = TCG_REG_TMP1; 1287 1288 switch (opi) { 1289 case LD: case LWA: 1290 align = 3; 1291 /* FALLTHRU */ 1292 default: 1293 if (rt > TCG_REG_R0 && rt < TCG_REG_V0) { 1294 rs = rt; 1295 break; 1296 } 1297 break; 1298 case LXSD: 1299 case STXSD: 1300 align = 3; 1301 break; 1302 case LXV: 1303 case STXV: 1304 align = 15; 1305 break; 1306 case STD: 1307 align = 3; 1308 /* FALLTHRU */ 1309 case STB: case STH: case STW: 1310 is_int_store = true; 1311 break; 1312 } 1313 1314 /* For unaligned, or very large offsets, use the indexed form. */ 1315 if (offset & align || offset != (int32_t)offset || opi == 0) { 1316 if (rs == base) { 1317 rs = TCG_REG_R0; 1318 } 1319 tcg_debug_assert(!is_int_store || rs != rt); 1320 tcg_out_movi(s, TCG_TYPE_PTR, rs, orig); 1321 tcg_out32(s, opx | TAB(rt & 31, base, rs)); 1322 return; 1323 } 1324 1325 l0 = (int16_t)offset; 1326 offset = (offset - l0) >> 16; 1327 l1 = (int16_t)offset; 1328 1329 if (l1 < 0 && orig >= 0) { 1330 extra = 0x4000; 1331 l1 = (int16_t)(offset - 0x4000); 1332 } 1333 if (l1) { 1334 tcg_out32(s, ADDIS | TAI(rs, base, l1)); 1335 base = rs; 1336 } 1337 if (extra) { 1338 tcg_out32(s, ADDIS | TAI(rs, base, extra)); 1339 base = rs; 1340 } 1341 if (opi != ADDI || base != rt || l0 != 0) { 1342 tcg_out32(s, opi | TAI(rt & 31, base, l0)); 1343 } 1344} 1345 1346static void tcg_out_vsldoi(TCGContext *s, TCGReg ret, 1347 TCGReg va, TCGReg vb, int shb) 1348{ 1349 tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6)); 1350} 1351 1352static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1353 TCGReg base, intptr_t offset) 1354{ 1355 int shift; 1356 1357 switch (type) { 1358 case TCG_TYPE_I32: 1359 if (ret < TCG_REG_V0) { 1360 tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset); 1361 break; 1362 } 1363 if (have_isa_2_07 && have_vsx) { 1364 tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset); 1365 break; 1366 } 1367 tcg_debug_assert((offset & 3) == 0); 1368 tcg_out_mem_long(s, 0, LVEWX, ret, base, offset); 1369 shift = (offset - 4) & 0xc; 1370 if (shift) { 1371 tcg_out_vsldoi(s, ret, ret, ret, shift); 1372 } 1373 break; 1374 case TCG_TYPE_I64: 1375 if (ret < TCG_REG_V0) { 1376 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1377 tcg_out_mem_long(s, LD, LDX, ret, base, offset); 1378 break; 1379 } 1380 /* fallthru */ 1381 case TCG_TYPE_V64: 1382 tcg_debug_assert(ret >= TCG_REG_V0); 1383 if (have_vsx) { 1384 tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX, 1385 ret, base, offset); 1386 break; 1387 } 1388 tcg_debug_assert((offset & 7) == 0); 1389 tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16); 1390 if (offset & 8) { 1391 tcg_out_vsldoi(s, ret, ret, ret, 8); 1392 } 1393 break; 1394 case TCG_TYPE_V128: 1395 tcg_debug_assert(ret >= TCG_REG_V0); 1396 tcg_debug_assert((offset & 15) == 0); 1397 tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0, 1398 LVX, ret, base, offset); 1399 break; 1400 default: 1401 g_assert_not_reached(); 1402 } 1403} 1404 1405static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, 1406 TCGReg base, intptr_t offset) 1407{ 1408 int shift; 1409 1410 switch (type) { 1411 case TCG_TYPE_I32: 1412 if (arg < TCG_REG_V0) { 1413 tcg_out_mem_long(s, STW, STWX, arg, base, offset); 1414 break; 1415 } 1416 if (have_isa_2_07 && have_vsx) { 1417 tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset); 1418 break; 1419 } 1420 assert((offset & 3) == 0); 1421 tcg_debug_assert((offset & 3) == 0); 1422 shift = (offset - 4) & 0xc; 1423 if (shift) { 1424 tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift); 1425 arg = TCG_VEC_TMP1; 1426 } 1427 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); 1428 break; 1429 case TCG_TYPE_I64: 1430 if (arg < TCG_REG_V0) { 1431 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1432 tcg_out_mem_long(s, STD, STDX, arg, base, offset); 1433 break; 1434 } 1435 /* fallthru */ 1436 case TCG_TYPE_V64: 1437 tcg_debug_assert(arg >= TCG_REG_V0); 1438 if (have_vsx) { 1439 tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0, 1440 STXSDX, arg, base, offset); 1441 break; 1442 } 1443 tcg_debug_assert((offset & 7) == 0); 1444 if (offset & 8) { 1445 tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8); 1446 arg = TCG_VEC_TMP1; 1447 } 1448 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); 1449 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4); 1450 break; 1451 case TCG_TYPE_V128: 1452 tcg_debug_assert(arg >= TCG_REG_V0); 1453 tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0, 1454 STVX, arg, base, offset); 1455 break; 1456 default: 1457 g_assert_not_reached(); 1458 } 1459} 1460 1461static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1462 TCGReg base, intptr_t ofs) 1463{ 1464 return false; 1465} 1466 1467static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, 1468 int const_arg2, int cr, TCGType type) 1469{ 1470 int imm; 1471 uint32_t op; 1472 1473 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1474 1475 /* Simplify the comparisons below wrt CMPI. */ 1476 if (type == TCG_TYPE_I32) { 1477 arg2 = (int32_t)arg2; 1478 } 1479 1480 switch (cond) { 1481 case TCG_COND_EQ: 1482 case TCG_COND_NE: 1483 if (const_arg2) { 1484 if ((int16_t) arg2 == arg2) { 1485 op = CMPI; 1486 imm = 1; 1487 break; 1488 } else if ((uint16_t) arg2 == arg2) { 1489 op = CMPLI; 1490 imm = 1; 1491 break; 1492 } 1493 } 1494 op = CMPL; 1495 imm = 0; 1496 break; 1497 1498 case TCG_COND_LT: 1499 case TCG_COND_GE: 1500 case TCG_COND_LE: 1501 case TCG_COND_GT: 1502 if (const_arg2) { 1503 if ((int16_t) arg2 == arg2) { 1504 op = CMPI; 1505 imm = 1; 1506 break; 1507 } 1508 } 1509 op = CMP; 1510 imm = 0; 1511 break; 1512 1513 case TCG_COND_LTU: 1514 case TCG_COND_GEU: 1515 case TCG_COND_LEU: 1516 case TCG_COND_GTU: 1517 if (const_arg2) { 1518 if ((uint16_t) arg2 == arg2) { 1519 op = CMPLI; 1520 imm = 1; 1521 break; 1522 } 1523 } 1524 op = CMPL; 1525 imm = 0; 1526 break; 1527 1528 default: 1529 g_assert_not_reached(); 1530 } 1531 op |= BF(cr) | ((type == TCG_TYPE_I64) << 21); 1532 1533 if (imm) { 1534 tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff)); 1535 } else { 1536 if (const_arg2) { 1537 tcg_out_movi(s, type, TCG_REG_R0, arg2); 1538 arg2 = TCG_REG_R0; 1539 } 1540 tcg_out32(s, op | RA(arg1) | RB(arg2)); 1541 } 1542} 1543 1544static void tcg_out_setcond_eq0(TCGContext *s, TCGType type, 1545 TCGReg dst, TCGReg src) 1546{ 1547 if (type == TCG_TYPE_I32) { 1548 tcg_out32(s, CNTLZW | RS(src) | RA(dst)); 1549 tcg_out_shri32(s, dst, dst, 5); 1550 } else { 1551 tcg_out32(s, CNTLZD | RS(src) | RA(dst)); 1552 tcg_out_shri64(s, dst, dst, 6); 1553 } 1554} 1555 1556static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src) 1557{ 1558 /* X != 0 implies X + -1 generates a carry. Extra addition 1559 trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. */ 1560 if (dst != src) { 1561 tcg_out32(s, ADDIC | TAI(dst, src, -1)); 1562 tcg_out32(s, SUBFE | TAB(dst, dst, src)); 1563 } else { 1564 tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); 1565 tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src)); 1566 } 1567} 1568 1569static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2, 1570 bool const_arg2) 1571{ 1572 if (const_arg2) { 1573 if ((uint32_t)arg2 == arg2) { 1574 tcg_out_xori32(s, TCG_REG_R0, arg1, arg2); 1575 } else { 1576 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2); 1577 tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0)); 1578 } 1579 } else { 1580 tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2)); 1581 } 1582 return TCG_REG_R0; 1583} 1584 1585static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, 1586 TCGArg arg0, TCGArg arg1, TCGArg arg2, 1587 int const_arg2) 1588{ 1589 int crop, sh; 1590 1591 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1592 1593 /* Ignore high bits of a potential constant arg2. */ 1594 if (type == TCG_TYPE_I32) { 1595 arg2 = (uint32_t)arg2; 1596 } 1597 1598 /* Handle common and trivial cases before handling anything else. */ 1599 if (arg2 == 0) { 1600 switch (cond) { 1601 case TCG_COND_EQ: 1602 tcg_out_setcond_eq0(s, type, arg0, arg1); 1603 return; 1604 case TCG_COND_NE: 1605 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { 1606 tcg_out_ext32u(s, TCG_REG_R0, arg1); 1607 arg1 = TCG_REG_R0; 1608 } 1609 tcg_out_setcond_ne0(s, arg0, arg1); 1610 return; 1611 case TCG_COND_GE: 1612 tcg_out32(s, NOR | SAB(arg1, arg0, arg1)); 1613 arg1 = arg0; 1614 /* FALLTHRU */ 1615 case TCG_COND_LT: 1616 /* Extract the sign bit. */ 1617 if (type == TCG_TYPE_I32) { 1618 tcg_out_shri32(s, arg0, arg1, 31); 1619 } else { 1620 tcg_out_shri64(s, arg0, arg1, 63); 1621 } 1622 return; 1623 default: 1624 break; 1625 } 1626 } 1627 1628 /* If we have ISEL, we can implement everything with 3 or 4 insns. 1629 All other cases below are also at least 3 insns, so speed up the 1630 code generator by not considering them and always using ISEL. */ 1631 if (have_isel) { 1632 int isel, tab; 1633 1634 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); 1635 1636 isel = tcg_to_isel[cond]; 1637 1638 tcg_out_movi(s, type, arg0, 1); 1639 if (isel & 1) { 1640 /* arg0 = (bc ? 0 : 1) */ 1641 tab = TAB(arg0, 0, arg0); 1642 isel &= ~1; 1643 } else { 1644 /* arg0 = (bc ? 1 : 0) */ 1645 tcg_out_movi(s, type, TCG_REG_R0, 0); 1646 tab = TAB(arg0, arg0, TCG_REG_R0); 1647 } 1648 tcg_out32(s, isel | tab); 1649 return; 1650 } 1651 1652 switch (cond) { 1653 case TCG_COND_EQ: 1654 arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); 1655 tcg_out_setcond_eq0(s, type, arg0, arg1); 1656 return; 1657 1658 case TCG_COND_NE: 1659 arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); 1660 /* Discard the high bits only once, rather than both inputs. */ 1661 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { 1662 tcg_out_ext32u(s, TCG_REG_R0, arg1); 1663 arg1 = TCG_REG_R0; 1664 } 1665 tcg_out_setcond_ne0(s, arg0, arg1); 1666 return; 1667 1668 case TCG_COND_GT: 1669 case TCG_COND_GTU: 1670 sh = 30; 1671 crop = 0; 1672 goto crtest; 1673 1674 case TCG_COND_LT: 1675 case TCG_COND_LTU: 1676 sh = 29; 1677 crop = 0; 1678 goto crtest; 1679 1680 case TCG_COND_GE: 1681 case TCG_COND_GEU: 1682 sh = 31; 1683 crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT); 1684 goto crtest; 1685 1686 case TCG_COND_LE: 1687 case TCG_COND_LEU: 1688 sh = 31; 1689 crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT); 1690 crtest: 1691 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); 1692 if (crop) { 1693 tcg_out32(s, crop); 1694 } 1695 tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); 1696 tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31); 1697 break; 1698 1699 default: 1700 g_assert_not_reached(); 1701 } 1702} 1703 1704static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l) 1705{ 1706 if (l->has_value) { 1707 bc |= reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr); 1708 } else { 1709 tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0); 1710 } 1711 tcg_out32(s, bc); 1712} 1713 1714static void tcg_out_brcond(TCGContext *s, TCGCond cond, 1715 TCGArg arg1, TCGArg arg2, int const_arg2, 1716 TCGLabel *l, TCGType type) 1717{ 1718 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); 1719 tcg_out_bc(s, tcg_to_bc[cond], l); 1720} 1721 1722static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, 1723 TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1, 1724 TCGArg v2, bool const_c2) 1725{ 1726 /* If for some reason both inputs are zero, don't produce bad code. */ 1727 if (v1 == 0 && v2 == 0) { 1728 tcg_out_movi(s, type, dest, 0); 1729 return; 1730 } 1731 1732 tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type); 1733 1734 if (have_isel) { 1735 int isel = tcg_to_isel[cond]; 1736 1737 /* Swap the V operands if the operation indicates inversion. */ 1738 if (isel & 1) { 1739 int t = v1; 1740 v1 = v2; 1741 v2 = t; 1742 isel &= ~1; 1743 } 1744 /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand. */ 1745 if (v2 == 0) { 1746 tcg_out_movi(s, type, TCG_REG_R0, 0); 1747 } 1748 tcg_out32(s, isel | TAB(dest, v1, v2)); 1749 } else { 1750 if (dest == v2) { 1751 cond = tcg_invert_cond(cond); 1752 v2 = v1; 1753 } else if (dest != v1) { 1754 if (v1 == 0) { 1755 tcg_out_movi(s, type, dest, 0); 1756 } else { 1757 tcg_out_mov(s, type, dest, v1); 1758 } 1759 } 1760 /* Branch forward over one insn */ 1761 tcg_out32(s, tcg_to_bc[cond] | 8); 1762 if (v2 == 0) { 1763 tcg_out_movi(s, type, dest, 0); 1764 } else { 1765 tcg_out_mov(s, type, dest, v2); 1766 } 1767 } 1768} 1769 1770static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc, 1771 TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2) 1772{ 1773 if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) { 1774 tcg_out32(s, opc | RA(a0) | RS(a1)); 1775 } else { 1776 tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type); 1777 /* Note that the only other valid constant for a2 is 0. */ 1778 if (have_isel) { 1779 tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1)); 1780 tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0)); 1781 } else if (!const_a2 && a0 == a2) { 1782 tcg_out32(s, tcg_to_bc[TCG_COND_EQ] | 8); 1783 tcg_out32(s, opc | RA(a0) | RS(a1)); 1784 } else { 1785 tcg_out32(s, opc | RA(a0) | RS(a1)); 1786 tcg_out32(s, tcg_to_bc[TCG_COND_NE] | 8); 1787 if (const_a2) { 1788 tcg_out_movi(s, type, a0, 0); 1789 } else { 1790 tcg_out_mov(s, type, a0, a2); 1791 } 1792 } 1793 } 1794} 1795 1796static void tcg_out_cmp2(TCGContext *s, const TCGArg *args, 1797 const int *const_args) 1798{ 1799 static const struct { uint8_t bit1, bit2; } bits[] = { 1800 [TCG_COND_LT ] = { CR_LT, CR_LT }, 1801 [TCG_COND_LE ] = { CR_LT, CR_GT }, 1802 [TCG_COND_GT ] = { CR_GT, CR_GT }, 1803 [TCG_COND_GE ] = { CR_GT, CR_LT }, 1804 [TCG_COND_LTU] = { CR_LT, CR_LT }, 1805 [TCG_COND_LEU] = { CR_LT, CR_GT }, 1806 [TCG_COND_GTU] = { CR_GT, CR_GT }, 1807 [TCG_COND_GEU] = { CR_GT, CR_LT }, 1808 }; 1809 1810 TCGCond cond = args[4], cond2; 1811 TCGArg al, ah, bl, bh; 1812 int blconst, bhconst; 1813 int op, bit1, bit2; 1814 1815 al = args[0]; 1816 ah = args[1]; 1817 bl = args[2]; 1818 bh = args[3]; 1819 blconst = const_args[2]; 1820 bhconst = const_args[3]; 1821 1822 switch (cond) { 1823 case TCG_COND_EQ: 1824 op = CRAND; 1825 goto do_equality; 1826 case TCG_COND_NE: 1827 op = CRNAND; 1828 do_equality: 1829 tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32); 1830 tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32); 1831 tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); 1832 break; 1833 1834 case TCG_COND_LT: 1835 case TCG_COND_LE: 1836 case TCG_COND_GT: 1837 case TCG_COND_GE: 1838 case TCG_COND_LTU: 1839 case TCG_COND_LEU: 1840 case TCG_COND_GTU: 1841 case TCG_COND_GEU: 1842 bit1 = bits[cond].bit1; 1843 bit2 = bits[cond].bit2; 1844 op = (bit1 != bit2 ? CRANDC : CRAND); 1845 cond2 = tcg_unsigned_cond(cond); 1846 1847 tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32); 1848 tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32); 1849 tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2)); 1850 tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ)); 1851 break; 1852 1853 default: 1854 g_assert_not_reached(); 1855 } 1856} 1857 1858static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, 1859 const int *const_args) 1860{ 1861 tcg_out_cmp2(s, args + 1, const_args + 1); 1862 tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); 1863 tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31); 1864} 1865 1866static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args, 1867 const int *const_args) 1868{ 1869 tcg_out_cmp2(s, args, const_args); 1870 tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5])); 1871} 1872 1873static void tcg_out_mb(TCGContext *s, TCGArg a0) 1874{ 1875 uint32_t insn; 1876 1877 if (a0 & TCG_MO_ST_LD) { 1878 insn = HWSYNC; 1879 } else { 1880 insn = LWSYNC; 1881 } 1882 1883 tcg_out32(s, insn); 1884} 1885 1886static void tcg_out_call_int(TCGContext *s, int lk, 1887 const tcg_insn_unit *target) 1888{ 1889#ifdef _CALL_AIX 1890 /* Look through the descriptor. If the branch is in range, and we 1891 don't have to spend too much effort on building the toc. */ 1892 const void *tgt = ((const void * const *)target)[0]; 1893 uintptr_t toc = ((const uintptr_t *)target)[1]; 1894 intptr_t diff = tcg_pcrel_diff(s, tgt); 1895 1896 if (in_range_b(diff) && toc == (uint32_t)toc) { 1897 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc); 1898 tcg_out_b(s, lk, tgt); 1899 } else { 1900 /* Fold the low bits of the constant into the addresses below. */ 1901 intptr_t arg = (intptr_t)target; 1902 int ofs = (int16_t)arg; 1903 1904 if (ofs + 8 < 0x8000) { 1905 arg -= ofs; 1906 } else { 1907 ofs = 0; 1908 } 1909 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg); 1910 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs); 1911 tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR); 1912 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP); 1913 tcg_out32(s, BCCTR | BO_ALWAYS | lk); 1914 } 1915#elif defined(_CALL_ELF) && _CALL_ELF == 2 1916 intptr_t diff; 1917 1918 /* In the ELFv2 ABI, we have to set up r12 to contain the destination 1919 address, which the callee uses to compute its TOC address. */ 1920 /* FIXME: when the branch is in range, we could avoid r12 load if we 1921 knew that the destination uses the same TOC, and what its local 1922 entry point offset is. */ 1923 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target); 1924 1925 diff = tcg_pcrel_diff(s, target); 1926 if (in_range_b(diff)) { 1927 tcg_out_b(s, lk, target); 1928 } else { 1929 tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR); 1930 tcg_out32(s, BCCTR | BO_ALWAYS | lk); 1931 } 1932#else 1933 tcg_out_b(s, lk, target); 1934#endif 1935} 1936 1937static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 1938 const TCGHelperInfo *info) 1939{ 1940 tcg_out_call_int(s, LK, target); 1941} 1942 1943static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = { 1944 [MO_UB] = LBZX, 1945 [MO_UW] = LHZX, 1946 [MO_UL] = LWZX, 1947 [MO_UQ] = LDX, 1948 [MO_SW] = LHAX, 1949 [MO_SL] = LWAX, 1950 [MO_BSWAP | MO_UB] = LBZX, 1951 [MO_BSWAP | MO_UW] = LHBRX, 1952 [MO_BSWAP | MO_UL] = LWBRX, 1953 [MO_BSWAP | MO_UQ] = LDBRX, 1954}; 1955 1956static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = { 1957 [MO_UB] = STBX, 1958 [MO_UW] = STHX, 1959 [MO_UL] = STWX, 1960 [MO_UQ] = STDX, 1961 [MO_BSWAP | MO_UB] = STBX, 1962 [MO_BSWAP | MO_UW] = STHBRX, 1963 [MO_BSWAP | MO_UL] = STWBRX, 1964 [MO_BSWAP | MO_UQ] = STDBRX, 1965}; 1966 1967static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg) 1968{ 1969 if (arg < 0) { 1970 arg = TCG_REG_TMP1; 1971 } 1972 tcg_out32(s, MFSPR | RT(arg) | LR); 1973 return arg; 1974} 1975 1976/* 1977 * For the purposes of ppc32 sorting 4 input registers into 4 argument 1978 * registers, there is an outside chance we would require 3 temps. 1979 */ 1980static const TCGLdstHelperParam ldst_helper_param = { 1981 .ra_gen = ldst_ra_gen, 1982 .ntmp = 3, 1983 .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 } 1984}; 1985 1986static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 1987{ 1988 MemOp opc = get_memop(lb->oi); 1989 1990 if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 1991 return false; 1992 } 1993 1994 tcg_out_ld_helper_args(s, lb, &ldst_helper_param); 1995 tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]); 1996 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); 1997 1998 tcg_out_b(s, 0, lb->raddr); 1999 return true; 2000} 2001 2002static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 2003{ 2004 MemOp opc = get_memop(lb->oi); 2005 2006 if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 2007 return false; 2008 } 2009 2010 tcg_out_st_helper_args(s, lb, &ldst_helper_param); 2011 tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]); 2012 2013 tcg_out_b(s, 0, lb->raddr); 2014 return true; 2015} 2016 2017typedef struct { 2018 TCGReg base; 2019 TCGReg index; 2020 TCGAtomAlign aa; 2021} HostAddress; 2022 2023bool tcg_target_has_memory_bswap(MemOp memop) 2024{ 2025 TCGAtomAlign aa; 2026 2027 if ((memop & MO_SIZE) <= MO_64) { 2028 return true; 2029 } 2030 2031 /* 2032 * Reject 16-byte memop with 16-byte atomicity, 2033 * but do allow a pair of 64-bit operations. 2034 */ 2035 aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true); 2036 return aa.atom <= MO_64; 2037} 2038 2039/* 2040 * For softmmu, perform the TLB load and compare. 2041 * For useronly, perform any required alignment tests. 2042 * In both cases, return a TCGLabelQemuLdst structure if the slow path 2043 * is required and fill in @h with the host address for the fast path. 2044 */ 2045static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, 2046 TCGReg addrlo, TCGReg addrhi, 2047 MemOpIdx oi, bool is_ld) 2048{ 2049 TCGType addr_type = s->addr_type; 2050 TCGLabelQemuLdst *ldst = NULL; 2051 MemOp opc = get_memop(oi); 2052 MemOp a_bits, s_bits; 2053 2054 /* 2055 * Book II, Section 1.4, Single-Copy Atomicity, specifies: 2056 * 2057 * Before 3.0, "An access that is not atomic is performed as a set of 2058 * smaller disjoint atomic accesses. In general, the number and alignment 2059 * of these accesses are implementation-dependent." Thus MO_ATOM_IFALIGN. 2060 * 2061 * As of 3.0, "the non-atomic access is performed as described in 2062 * the corresponding list", which matches MO_ATOM_SUBALIGN. 2063 */ 2064 s_bits = opc & MO_SIZE; 2065 h->aa = atom_and_align_for_opc(s, opc, 2066 have_isa_3_00 ? MO_ATOM_SUBALIGN 2067 : MO_ATOM_IFALIGN, 2068 s_bits == MO_128); 2069 a_bits = h->aa.align; 2070 2071#ifdef CONFIG_SOFTMMU 2072 int mem_index = get_mmuidx(oi); 2073 int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read) 2074 : offsetof(CPUTLBEntry, addr_write); 2075 int fast_off = TLB_MASK_TABLE_OFS(mem_index); 2076 int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); 2077 int table_off = fast_off + offsetof(CPUTLBDescFast, table); 2078 2079 ldst = new_ldst_label(s); 2080 ldst->is_ld = is_ld; 2081 ldst->oi = oi; 2082 ldst->addrlo_reg = addrlo; 2083 ldst->addrhi_reg = addrhi; 2084 2085 /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ 2086 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); 2087 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768); 2088 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off); 2089 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off); 2090 2091 /* Extract the page index, shifted into place for tlb index. */ 2092 if (TCG_TARGET_REG_BITS == 32) { 2093 tcg_out_shri32(s, TCG_REG_R0, addrlo, 2094 s->page_bits - CPU_TLB_ENTRY_BITS); 2095 } else { 2096 tcg_out_shri64(s, TCG_REG_R0, addrlo, 2097 s->page_bits - CPU_TLB_ENTRY_BITS); 2098 } 2099 tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0)); 2100 2101 /* Load the (low part) TLB comparator into TMP2. */ 2102 if (cmp_off == 0 2103 && (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32)) { 2104 uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32 2105 ? LWZUX : LDUX); 2106 tcg_out32(s, lxu | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2)); 2107 } else { 2108 tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2)); 2109 if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) { 2110 tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, 2111 TCG_REG_TMP1, cmp_off + 4 * HOST_BIG_ENDIAN); 2112 } else { 2113 tcg_out_ld(s, addr_type, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off); 2114 } 2115 } 2116 2117 /* 2118 * Load the TLB addend for use on the fast path. 2119 * Do this asap to minimize any load use delay. 2120 */ 2121 if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) { 2122 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, 2123 offsetof(CPUTLBEntry, addend)); 2124 } 2125 2126 /* Clear the non-page, non-alignment bits from the address in R0. */ 2127 if (TCG_TARGET_REG_BITS == 32) { 2128 /* 2129 * We don't support unaligned accesses on 32-bits. 2130 * Preserve the bottom bits and thus trigger a comparison 2131 * failure on unaligned accesses. 2132 */ 2133 if (a_bits < s_bits) { 2134 a_bits = s_bits; 2135 } 2136 tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0, 2137 (32 - a_bits) & 31, 31 - s->page_bits); 2138 } else { 2139 TCGReg t = addrlo; 2140 2141 /* 2142 * If the access is unaligned, we need to make sure we fail if we 2143 * cross a page boundary. The trick is to add the access size-1 2144 * to the address before masking the low bits. That will make the 2145 * address overflow to the next page if we cross a page boundary, 2146 * which will then force a mismatch of the TLB compare. 2147 */ 2148 if (a_bits < s_bits) { 2149 unsigned a_mask = (1 << a_bits) - 1; 2150 unsigned s_mask = (1 << s_bits) - 1; 2151 tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask)); 2152 t = TCG_REG_R0; 2153 } 2154 2155 /* Mask the address for the requested alignment. */ 2156 if (addr_type == TCG_TYPE_I32) { 2157 tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0, 2158 (32 - a_bits) & 31, 31 - s->page_bits); 2159 } else if (a_bits == 0) { 2160 tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits); 2161 } else { 2162 tcg_out_rld(s, RLDICL, TCG_REG_R0, t, 2163 64 - s->page_bits, s->page_bits - a_bits); 2164 tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0); 2165 } 2166 } 2167 2168 if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) { 2169 /* Low part comparison into cr7. */ 2170 tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 2171 0, 7, TCG_TYPE_I32); 2172 2173 /* Load the high part TLB comparator into TMP2. */ 2174 tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1, 2175 cmp_off + 4 * !HOST_BIG_ENDIAN); 2176 2177 /* Load addend, deferred for this case. */ 2178 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, 2179 offsetof(CPUTLBEntry, addend)); 2180 2181 /* High part comparison into cr6. */ 2182 tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2, 0, 6, TCG_TYPE_I32); 2183 2184 /* Combine comparisons into cr7. */ 2185 tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); 2186 } else { 2187 /* Full comparison into cr7. */ 2188 tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 0, 7, addr_type); 2189 } 2190 2191 /* Load a pointer into the current opcode w/conditional branch-link. */ 2192 ldst->label_ptr[0] = s->code_ptr; 2193 tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); 2194 2195 h->base = TCG_REG_TMP1; 2196#else 2197 if (a_bits) { 2198 ldst = new_ldst_label(s); 2199 ldst->is_ld = is_ld; 2200 ldst->oi = oi; 2201 ldst->addrlo_reg = addrlo; 2202 ldst->addrhi_reg = addrhi; 2203 2204 /* We are expecting a_bits to max out at 7, much lower than ANDI. */ 2205 tcg_debug_assert(a_bits < 16); 2206 tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1)); 2207 2208 ldst->label_ptr[0] = s->code_ptr; 2209 tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK); 2210 } 2211 2212 h->base = guest_base ? TCG_GUEST_BASE_REG : 0; 2213#endif 2214 2215 if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) { 2216 /* Zero-extend the guest address for use in the host address. */ 2217 tcg_out_ext32u(s, TCG_REG_R0, addrlo); 2218 h->index = TCG_REG_R0; 2219 } else { 2220 h->index = addrlo; 2221 } 2222 2223 return ldst; 2224} 2225 2226static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, 2227 TCGReg addrlo, TCGReg addrhi, 2228 MemOpIdx oi, TCGType data_type) 2229{ 2230 MemOp opc = get_memop(oi); 2231 TCGLabelQemuLdst *ldst; 2232 HostAddress h; 2233 2234 ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true); 2235 2236 if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { 2237 if (opc & MO_BSWAP) { 2238 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2239 tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index)); 2240 tcg_out32(s, LWBRX | TAB(datahi, h.base, TCG_REG_R0)); 2241 } else if (h.base != 0) { 2242 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2243 tcg_out32(s, LWZX | TAB(datahi, h.base, h.index)); 2244 tcg_out32(s, LWZX | TAB(datalo, h.base, TCG_REG_R0)); 2245 } else if (h.index == datahi) { 2246 tcg_out32(s, LWZ | TAI(datalo, h.index, 4)); 2247 tcg_out32(s, LWZ | TAI(datahi, h.index, 0)); 2248 } else { 2249 tcg_out32(s, LWZ | TAI(datahi, h.index, 0)); 2250 tcg_out32(s, LWZ | TAI(datalo, h.index, 4)); 2251 } 2252 } else { 2253 uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)]; 2254 if (!have_isa_2_06 && insn == LDBRX) { 2255 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2256 tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index)); 2257 tcg_out32(s, LWBRX | TAB(TCG_REG_R0, h.base, TCG_REG_R0)); 2258 tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0); 2259 } else if (insn) { 2260 tcg_out32(s, insn | TAB(datalo, h.base, h.index)); 2261 } else { 2262 insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)]; 2263 tcg_out32(s, insn | TAB(datalo, h.base, h.index)); 2264 tcg_out_movext(s, TCG_TYPE_REG, datalo, 2265 TCG_TYPE_REG, opc & MO_SSIZE, datalo); 2266 } 2267 } 2268 2269 if (ldst) { 2270 ldst->type = data_type; 2271 ldst->datalo_reg = datalo; 2272 ldst->datahi_reg = datahi; 2273 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2274 } 2275} 2276 2277static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, 2278 TCGReg addrlo, TCGReg addrhi, 2279 MemOpIdx oi, TCGType data_type) 2280{ 2281 MemOp opc = get_memop(oi); 2282 TCGLabelQemuLdst *ldst; 2283 HostAddress h; 2284 2285 ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false); 2286 2287 if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { 2288 if (opc & MO_BSWAP) { 2289 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2290 tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index)); 2291 tcg_out32(s, STWBRX | SAB(datahi, h.base, TCG_REG_R0)); 2292 } else if (h.base != 0) { 2293 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2294 tcg_out32(s, STWX | SAB(datahi, h.base, h.index)); 2295 tcg_out32(s, STWX | SAB(datalo, h.base, TCG_REG_R0)); 2296 } else { 2297 tcg_out32(s, STW | TAI(datahi, h.index, 0)); 2298 tcg_out32(s, STW | TAI(datalo, h.index, 4)); 2299 } 2300 } else { 2301 uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)]; 2302 if (!have_isa_2_06 && insn == STDBRX) { 2303 tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index)); 2304 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, h.index, 4)); 2305 tcg_out_shri64(s, TCG_REG_R0, datalo, 32); 2306 tcg_out32(s, STWBRX | SAB(TCG_REG_R0, h.base, TCG_REG_TMP1)); 2307 } else { 2308 tcg_out32(s, insn | SAB(datalo, h.base, h.index)); 2309 } 2310 } 2311 2312 if (ldst) { 2313 ldst->type = data_type; 2314 ldst->datalo_reg = datalo; 2315 ldst->datahi_reg = datahi; 2316 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2317 } 2318} 2319 2320static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, 2321 TCGReg addr_reg, MemOpIdx oi, bool is_ld) 2322{ 2323 TCGLabelQemuLdst *ldst; 2324 HostAddress h; 2325 bool need_bswap; 2326 uint32_t insn; 2327 TCGReg index; 2328 2329 ldst = prepare_host_addr(s, &h, addr_reg, -1, oi, is_ld); 2330 2331 /* Compose the final address, as LQ/STQ have no indexing. */ 2332 index = h.index; 2333 if (h.base != 0) { 2334 index = TCG_REG_TMP1; 2335 tcg_out32(s, ADD | TAB(index, h.base, h.index)); 2336 } 2337 need_bswap = get_memop(oi) & MO_BSWAP; 2338 2339 if (h.aa.atom == MO_128) { 2340 tcg_debug_assert(!need_bswap); 2341 tcg_debug_assert(datalo & 1); 2342 tcg_debug_assert(datahi == datalo - 1); 2343 insn = is_ld ? LQ : STQ; 2344 tcg_out32(s, insn | TAI(datahi, index, 0)); 2345 } else { 2346 TCGReg d1, d2; 2347 2348 if (HOST_BIG_ENDIAN ^ need_bswap) { 2349 d1 = datahi, d2 = datalo; 2350 } else { 2351 d1 = datalo, d2 = datahi; 2352 } 2353 2354 if (need_bswap) { 2355 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8); 2356 insn = is_ld ? LDBRX : STDBRX; 2357 tcg_out32(s, insn | TAB(d1, 0, index)); 2358 tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0)); 2359 } else { 2360 insn = is_ld ? LD : STD; 2361 tcg_out32(s, insn | TAI(d1, index, 0)); 2362 tcg_out32(s, insn | TAI(d2, index, 8)); 2363 } 2364 } 2365 2366 if (ldst) { 2367 ldst->type = TCG_TYPE_I128; 2368 ldst->datalo_reg = datalo; 2369 ldst->datahi_reg = datahi; 2370 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2371 } 2372} 2373 2374static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 2375{ 2376 int i; 2377 for (i = 0; i < count; ++i) { 2378 p[i] = NOP; 2379 } 2380} 2381 2382/* Parameters for function call generation, used in tcg.c. */ 2383#define TCG_TARGET_STACK_ALIGN 16 2384 2385#ifdef _CALL_AIX 2386# define LINK_AREA_SIZE (6 * SZR) 2387# define LR_OFFSET (1 * SZR) 2388# define TCG_TARGET_CALL_STACK_OFFSET (LINK_AREA_SIZE + 8 * SZR) 2389#elif defined(_CALL_DARWIN) 2390# define LINK_AREA_SIZE (6 * SZR) 2391# define LR_OFFSET (2 * SZR) 2392#elif TCG_TARGET_REG_BITS == 64 2393# if defined(_CALL_ELF) && _CALL_ELF == 2 2394# define LINK_AREA_SIZE (4 * SZR) 2395# define LR_OFFSET (1 * SZR) 2396# endif 2397#else /* TCG_TARGET_REG_BITS == 32 */ 2398# if defined(_CALL_SYSV) 2399# define LINK_AREA_SIZE (2 * SZR) 2400# define LR_OFFSET (1 * SZR) 2401# endif 2402#endif 2403#ifndef LR_OFFSET 2404# error "Unhandled abi" 2405#endif 2406#ifndef TCG_TARGET_CALL_STACK_OFFSET 2407# define TCG_TARGET_CALL_STACK_OFFSET LINK_AREA_SIZE 2408#endif 2409 2410#define CPU_TEMP_BUF_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) 2411#define REG_SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR) 2412 2413#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET \ 2414 + TCG_STATIC_CALL_ARGS_SIZE \ 2415 + CPU_TEMP_BUF_SIZE \ 2416 + REG_SAVE_SIZE \ 2417 + TCG_TARGET_STACK_ALIGN - 1) \ 2418 & -TCG_TARGET_STACK_ALIGN) 2419 2420#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE) 2421 2422static void tcg_target_qemu_prologue(TCGContext *s) 2423{ 2424 int i; 2425 2426#ifdef _CALL_AIX 2427 const void **desc = (const void **)s->code_ptr; 2428 desc[0] = tcg_splitwx_to_rx(desc + 2); /* entry point */ 2429 desc[1] = 0; /* environment pointer */ 2430 s->code_ptr = (void *)(desc + 2); /* skip over descriptor */ 2431#endif 2432 2433 tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE, 2434 CPU_TEMP_BUF_SIZE); 2435 2436 /* Prologue */ 2437 tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR); 2438 tcg_out32(s, (SZR == 8 ? STDU : STWU) 2439 | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE)); 2440 2441 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { 2442 tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 2443 TCG_REG_R1, REG_SAVE_BOT + i * SZR); 2444 } 2445 tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); 2446 2447#ifndef CONFIG_SOFTMMU 2448 if (guest_base) { 2449 tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true); 2450 tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); 2451 } 2452#endif 2453 2454 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 2455 tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR); 2456 if (USE_REG_TB) { 2457 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]); 2458 } 2459 tcg_out32(s, BCCTR | BO_ALWAYS); 2460 2461 /* Epilogue */ 2462 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 2463 2464 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); 2465 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { 2466 tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 2467 TCG_REG_R1, REG_SAVE_BOT + i * SZR); 2468 } 2469 tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR); 2470 tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE)); 2471 tcg_out32(s, BCLR | BO_ALWAYS); 2472} 2473 2474static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) 2475{ 2476 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg); 2477 tcg_out_b(s, 0, tcg_code_gen_epilogue); 2478} 2479 2480static void tcg_out_goto_tb(TCGContext *s, int which) 2481{ 2482 uintptr_t ptr = get_jmp_target_addr(s, which); 2483 2484 if (USE_REG_TB) { 2485 ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr); 2486 tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset); 2487 2488 /* Direct branch will be patched by tb_target_set_jmp_target. */ 2489 set_jmp_insn_offset(s, which); 2490 tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR); 2491 2492 /* When branch is out of range, fall through to indirect. */ 2493 tcg_out32(s, BCCTR | BO_ALWAYS); 2494 2495 /* For the unlinked case, need to reset TCG_REG_TB. */ 2496 set_jmp_reset_offset(s, which); 2497 tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB, 2498 -tcg_current_code_size(s)); 2499 } else { 2500 /* Direct branch will be patched by tb_target_set_jmp_target. */ 2501 set_jmp_insn_offset(s, which); 2502 tcg_out32(s, NOP); 2503 2504 /* When branch is out of range, fall through to indirect. */ 2505 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr); 2506 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, (int16_t)ptr); 2507 tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); 2508 tcg_out32(s, BCCTR | BO_ALWAYS); 2509 set_jmp_reset_offset(s, which); 2510 } 2511} 2512 2513void tb_target_set_jmp_target(const TranslationBlock *tb, int n, 2514 uintptr_t jmp_rx, uintptr_t jmp_rw) 2515{ 2516 uintptr_t addr = tb->jmp_target_addr[n]; 2517 intptr_t diff = addr - jmp_rx; 2518 tcg_insn_unit insn; 2519 2520 if (in_range_b(diff)) { 2521 insn = B | (diff & 0x3fffffc); 2522 } else if (USE_REG_TB) { 2523 insn = MTSPR | RS(TCG_REG_TB) | CTR; 2524 } else { 2525 insn = NOP; 2526 } 2527 2528 qatomic_set((uint32_t *)jmp_rw, insn); 2529 flush_idcache_range(jmp_rx, jmp_rw, 4); 2530} 2531 2532static void tcg_out_op(TCGContext *s, TCGOpcode opc, 2533 const TCGArg args[TCG_MAX_OP_ARGS], 2534 const int const_args[TCG_MAX_OP_ARGS]) 2535{ 2536 TCGArg a0, a1, a2; 2537 2538 switch (opc) { 2539 case INDEX_op_goto_ptr: 2540 tcg_out32(s, MTSPR | RS(args[0]) | CTR); 2541 if (USE_REG_TB) { 2542 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]); 2543 } 2544 tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0)); 2545 tcg_out32(s, BCCTR | BO_ALWAYS); 2546 break; 2547 case INDEX_op_br: 2548 { 2549 TCGLabel *l = arg_label(args[0]); 2550 uint32_t insn = B; 2551 2552 if (l->has_value) { 2553 insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr), 2554 l->u.value_ptr); 2555 } else { 2556 tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0); 2557 } 2558 tcg_out32(s, insn); 2559 } 2560 break; 2561 case INDEX_op_ld8u_i32: 2562 case INDEX_op_ld8u_i64: 2563 tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); 2564 break; 2565 case INDEX_op_ld8s_i32: 2566 case INDEX_op_ld8s_i64: 2567 tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); 2568 tcg_out_ext8s(s, TCG_TYPE_REG, args[0], args[0]); 2569 break; 2570 case INDEX_op_ld16u_i32: 2571 case INDEX_op_ld16u_i64: 2572 tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]); 2573 break; 2574 case INDEX_op_ld16s_i32: 2575 case INDEX_op_ld16s_i64: 2576 tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]); 2577 break; 2578 case INDEX_op_ld_i32: 2579 case INDEX_op_ld32u_i64: 2580 tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]); 2581 break; 2582 case INDEX_op_ld32s_i64: 2583 tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]); 2584 break; 2585 case INDEX_op_ld_i64: 2586 tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]); 2587 break; 2588 case INDEX_op_st8_i32: 2589 case INDEX_op_st8_i64: 2590 tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]); 2591 break; 2592 case INDEX_op_st16_i32: 2593 case INDEX_op_st16_i64: 2594 tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]); 2595 break; 2596 case INDEX_op_st_i32: 2597 case INDEX_op_st32_i64: 2598 tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]); 2599 break; 2600 case INDEX_op_st_i64: 2601 tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); 2602 break; 2603 2604 case INDEX_op_add_i32: 2605 a0 = args[0], a1 = args[1], a2 = args[2]; 2606 if (const_args[2]) { 2607 do_addi_32: 2608 tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2); 2609 } else { 2610 tcg_out32(s, ADD | TAB(a0, a1, a2)); 2611 } 2612 break; 2613 case INDEX_op_sub_i32: 2614 a0 = args[0], a1 = args[1], a2 = args[2]; 2615 if (const_args[1]) { 2616 if (const_args[2]) { 2617 tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2); 2618 } else { 2619 tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); 2620 } 2621 } else if (const_args[2]) { 2622 a2 = -a2; 2623 goto do_addi_32; 2624 } else { 2625 tcg_out32(s, SUBF | TAB(a0, a2, a1)); 2626 } 2627 break; 2628 2629 case INDEX_op_and_i32: 2630 a0 = args[0], a1 = args[1], a2 = args[2]; 2631 if (const_args[2]) { 2632 tcg_out_andi32(s, a0, a1, a2); 2633 } else { 2634 tcg_out32(s, AND | SAB(a1, a0, a2)); 2635 } 2636 break; 2637 case INDEX_op_and_i64: 2638 a0 = args[0], a1 = args[1], a2 = args[2]; 2639 if (const_args[2]) { 2640 tcg_out_andi64(s, a0, a1, a2); 2641 } else { 2642 tcg_out32(s, AND | SAB(a1, a0, a2)); 2643 } 2644 break; 2645 case INDEX_op_or_i64: 2646 case INDEX_op_or_i32: 2647 a0 = args[0], a1 = args[1], a2 = args[2]; 2648 if (const_args[2]) { 2649 tcg_out_ori32(s, a0, a1, a2); 2650 } else { 2651 tcg_out32(s, OR | SAB(a1, a0, a2)); 2652 } 2653 break; 2654 case INDEX_op_xor_i64: 2655 case INDEX_op_xor_i32: 2656 a0 = args[0], a1 = args[1], a2 = args[2]; 2657 if (const_args[2]) { 2658 tcg_out_xori32(s, a0, a1, a2); 2659 } else { 2660 tcg_out32(s, XOR | SAB(a1, a0, a2)); 2661 } 2662 break; 2663 case INDEX_op_andc_i32: 2664 a0 = args[0], a1 = args[1], a2 = args[2]; 2665 if (const_args[2]) { 2666 tcg_out_andi32(s, a0, a1, ~a2); 2667 } else { 2668 tcg_out32(s, ANDC | SAB(a1, a0, a2)); 2669 } 2670 break; 2671 case INDEX_op_andc_i64: 2672 a0 = args[0], a1 = args[1], a2 = args[2]; 2673 if (const_args[2]) { 2674 tcg_out_andi64(s, a0, a1, ~a2); 2675 } else { 2676 tcg_out32(s, ANDC | SAB(a1, a0, a2)); 2677 } 2678 break; 2679 case INDEX_op_orc_i32: 2680 if (const_args[2]) { 2681 tcg_out_ori32(s, args[0], args[1], ~args[2]); 2682 break; 2683 } 2684 /* FALLTHRU */ 2685 case INDEX_op_orc_i64: 2686 tcg_out32(s, ORC | SAB(args[1], args[0], args[2])); 2687 break; 2688 case INDEX_op_eqv_i32: 2689 if (const_args[2]) { 2690 tcg_out_xori32(s, args[0], args[1], ~args[2]); 2691 break; 2692 } 2693 /* FALLTHRU */ 2694 case INDEX_op_eqv_i64: 2695 tcg_out32(s, EQV | SAB(args[1], args[0], args[2])); 2696 break; 2697 case INDEX_op_nand_i32: 2698 case INDEX_op_nand_i64: 2699 tcg_out32(s, NAND | SAB(args[1], args[0], args[2])); 2700 break; 2701 case INDEX_op_nor_i32: 2702 case INDEX_op_nor_i64: 2703 tcg_out32(s, NOR | SAB(args[1], args[0], args[2])); 2704 break; 2705 2706 case INDEX_op_clz_i32: 2707 tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1], 2708 args[2], const_args[2]); 2709 break; 2710 case INDEX_op_ctz_i32: 2711 tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1], 2712 args[2], const_args[2]); 2713 break; 2714 case INDEX_op_ctpop_i32: 2715 tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0)); 2716 break; 2717 2718 case INDEX_op_clz_i64: 2719 tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1], 2720 args[2], const_args[2]); 2721 break; 2722 case INDEX_op_ctz_i64: 2723 tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1], 2724 args[2], const_args[2]); 2725 break; 2726 case INDEX_op_ctpop_i64: 2727 tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0)); 2728 break; 2729 2730 case INDEX_op_mul_i32: 2731 a0 = args[0], a1 = args[1], a2 = args[2]; 2732 if (const_args[2]) { 2733 tcg_out32(s, MULLI | TAI(a0, a1, a2)); 2734 } else { 2735 tcg_out32(s, MULLW | TAB(a0, a1, a2)); 2736 } 2737 break; 2738 2739 case INDEX_op_div_i32: 2740 tcg_out32(s, DIVW | TAB(args[0], args[1], args[2])); 2741 break; 2742 2743 case INDEX_op_divu_i32: 2744 tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2])); 2745 break; 2746 2747 case INDEX_op_rem_i32: 2748 tcg_out32(s, MODSW | TAB(args[0], args[1], args[2])); 2749 break; 2750 2751 case INDEX_op_remu_i32: 2752 tcg_out32(s, MODUW | TAB(args[0], args[1], args[2])); 2753 break; 2754 2755 case INDEX_op_shl_i32: 2756 if (const_args[2]) { 2757 /* Limit immediate shift count lest we create an illegal insn. */ 2758 tcg_out_shli32(s, args[0], args[1], args[2] & 31); 2759 } else { 2760 tcg_out32(s, SLW | SAB(args[1], args[0], args[2])); 2761 } 2762 break; 2763 case INDEX_op_shr_i32: 2764 if (const_args[2]) { 2765 /* Limit immediate shift count lest we create an illegal insn. */ 2766 tcg_out_shri32(s, args[0], args[1], args[2] & 31); 2767 } else { 2768 tcg_out32(s, SRW | SAB(args[1], args[0], args[2])); 2769 } 2770 break; 2771 case INDEX_op_sar_i32: 2772 if (const_args[2]) { 2773 tcg_out_sari32(s, args[0], args[1], args[2]); 2774 } else { 2775 tcg_out32(s, SRAW | SAB(args[1], args[0], args[2])); 2776 } 2777 break; 2778 case INDEX_op_rotl_i32: 2779 if (const_args[2]) { 2780 tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31); 2781 } else { 2782 tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2]) 2783 | MB(0) | ME(31)); 2784 } 2785 break; 2786 case INDEX_op_rotr_i32: 2787 if (const_args[2]) { 2788 tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31); 2789 } else { 2790 tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32)); 2791 tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0) 2792 | MB(0) | ME(31)); 2793 } 2794 break; 2795 2796 case INDEX_op_brcond_i32: 2797 tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], 2798 arg_label(args[3]), TCG_TYPE_I32); 2799 break; 2800 case INDEX_op_brcond_i64: 2801 tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], 2802 arg_label(args[3]), TCG_TYPE_I64); 2803 break; 2804 case INDEX_op_brcond2_i32: 2805 tcg_out_brcond2(s, args, const_args); 2806 break; 2807 2808 case INDEX_op_neg_i32: 2809 case INDEX_op_neg_i64: 2810 tcg_out32(s, NEG | RT(args[0]) | RA(args[1])); 2811 break; 2812 2813 case INDEX_op_not_i32: 2814 case INDEX_op_not_i64: 2815 tcg_out32(s, NOR | SAB(args[1], args[0], args[1])); 2816 break; 2817 2818 case INDEX_op_add_i64: 2819 a0 = args[0], a1 = args[1], a2 = args[2]; 2820 if (const_args[2]) { 2821 do_addi_64: 2822 tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2); 2823 } else { 2824 tcg_out32(s, ADD | TAB(a0, a1, a2)); 2825 } 2826 break; 2827 case INDEX_op_sub_i64: 2828 a0 = args[0], a1 = args[1], a2 = args[2]; 2829 if (const_args[1]) { 2830 if (const_args[2]) { 2831 tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2); 2832 } else { 2833 tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); 2834 } 2835 } else if (const_args[2]) { 2836 a2 = -a2; 2837 goto do_addi_64; 2838 } else { 2839 tcg_out32(s, SUBF | TAB(a0, a2, a1)); 2840 } 2841 break; 2842 2843 case INDEX_op_shl_i64: 2844 if (const_args[2]) { 2845 /* Limit immediate shift count lest we create an illegal insn. */ 2846 tcg_out_shli64(s, args[0], args[1], args[2] & 63); 2847 } else { 2848 tcg_out32(s, SLD | SAB(args[1], args[0], args[2])); 2849 } 2850 break; 2851 case INDEX_op_shr_i64: 2852 if (const_args[2]) { 2853 /* Limit immediate shift count lest we create an illegal insn. */ 2854 tcg_out_shri64(s, args[0], args[1], args[2] & 63); 2855 } else { 2856 tcg_out32(s, SRD | SAB(args[1], args[0], args[2])); 2857 } 2858 break; 2859 case INDEX_op_sar_i64: 2860 if (const_args[2]) { 2861 tcg_out_sari64(s, args[0], args[1], args[2]); 2862 } else { 2863 tcg_out32(s, SRAD | SAB(args[1], args[0], args[2])); 2864 } 2865 break; 2866 case INDEX_op_rotl_i64: 2867 if (const_args[2]) { 2868 tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0); 2869 } else { 2870 tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0)); 2871 } 2872 break; 2873 case INDEX_op_rotr_i64: 2874 if (const_args[2]) { 2875 tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0); 2876 } else { 2877 tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64)); 2878 tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0)); 2879 } 2880 break; 2881 2882 case INDEX_op_mul_i64: 2883 a0 = args[0], a1 = args[1], a2 = args[2]; 2884 if (const_args[2]) { 2885 tcg_out32(s, MULLI | TAI(a0, a1, a2)); 2886 } else { 2887 tcg_out32(s, MULLD | TAB(a0, a1, a2)); 2888 } 2889 break; 2890 case INDEX_op_div_i64: 2891 tcg_out32(s, DIVD | TAB(args[0], args[1], args[2])); 2892 break; 2893 case INDEX_op_divu_i64: 2894 tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2])); 2895 break; 2896 case INDEX_op_rem_i64: 2897 tcg_out32(s, MODSD | TAB(args[0], args[1], args[2])); 2898 break; 2899 case INDEX_op_remu_i64: 2900 tcg_out32(s, MODUD | TAB(args[0], args[1], args[2])); 2901 break; 2902 2903 case INDEX_op_qemu_ld_a64_i32: 2904 if (TCG_TARGET_REG_BITS == 32) { 2905 tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], 2906 args[3], TCG_TYPE_I32); 2907 break; 2908 } 2909 /* fall through */ 2910 case INDEX_op_qemu_ld_a32_i32: 2911 tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32); 2912 break; 2913 case INDEX_op_qemu_ld_a32_i64: 2914 if (TCG_TARGET_REG_BITS == 64) { 2915 tcg_out_qemu_ld(s, args[0], -1, args[1], -1, 2916 args[2], TCG_TYPE_I64); 2917 } else { 2918 tcg_out_qemu_ld(s, args[0], args[1], args[2], -1, 2919 args[3], TCG_TYPE_I64); 2920 } 2921 break; 2922 case INDEX_op_qemu_ld_a64_i64: 2923 if (TCG_TARGET_REG_BITS == 64) { 2924 tcg_out_qemu_ld(s, args[0], -1, args[1], -1, 2925 args[2], TCG_TYPE_I64); 2926 } else { 2927 tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3], 2928 args[4], TCG_TYPE_I64); 2929 } 2930 break; 2931 case INDEX_op_qemu_ld_a32_i128: 2932 case INDEX_op_qemu_ld_a64_i128: 2933 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 2934 tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true); 2935 break; 2936 2937 case INDEX_op_qemu_st_a64_i32: 2938 if (TCG_TARGET_REG_BITS == 32) { 2939 tcg_out_qemu_st(s, args[0], -1, args[1], args[2], 2940 args[3], TCG_TYPE_I32); 2941 break; 2942 } 2943 /* fall through */ 2944 case INDEX_op_qemu_st_a32_i32: 2945 tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32); 2946 break; 2947 case INDEX_op_qemu_st_a32_i64: 2948 if (TCG_TARGET_REG_BITS == 64) { 2949 tcg_out_qemu_st(s, args[0], -1, args[1], -1, 2950 args[2], TCG_TYPE_I64); 2951 } else { 2952 tcg_out_qemu_st(s, args[0], args[1], args[2], -1, 2953 args[3], TCG_TYPE_I64); 2954 } 2955 break; 2956 case INDEX_op_qemu_st_a64_i64: 2957 if (TCG_TARGET_REG_BITS == 64) { 2958 tcg_out_qemu_st(s, args[0], -1, args[1], -1, 2959 args[2], TCG_TYPE_I64); 2960 } else { 2961 tcg_out_qemu_st(s, args[0], args[1], args[2], args[3], 2962 args[4], TCG_TYPE_I64); 2963 } 2964 break; 2965 case INDEX_op_qemu_st_a32_i128: 2966 case INDEX_op_qemu_st_a64_i128: 2967 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 2968 tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); 2969 break; 2970 2971 case INDEX_op_setcond_i32: 2972 tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], 2973 const_args[2]); 2974 break; 2975 case INDEX_op_setcond_i64: 2976 tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], 2977 const_args[2]); 2978 break; 2979 case INDEX_op_setcond2_i32: 2980 tcg_out_setcond2(s, args, const_args); 2981 break; 2982 2983 case INDEX_op_bswap16_i32: 2984 case INDEX_op_bswap16_i64: 2985 tcg_out_bswap16(s, args[0], args[1], args[2]); 2986 break; 2987 case INDEX_op_bswap32_i32: 2988 tcg_out_bswap32(s, args[0], args[1], 0); 2989 break; 2990 case INDEX_op_bswap32_i64: 2991 tcg_out_bswap32(s, args[0], args[1], args[2]); 2992 break; 2993 case INDEX_op_bswap64_i64: 2994 tcg_out_bswap64(s, args[0], args[1]); 2995 break; 2996 2997 case INDEX_op_deposit_i32: 2998 if (const_args[2]) { 2999 uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3]; 3000 tcg_out_andi32(s, args[0], args[0], ~mask); 3001 } else { 3002 tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3], 3003 32 - args[3] - args[4], 31 - args[3]); 3004 } 3005 break; 3006 case INDEX_op_deposit_i64: 3007 if (const_args[2]) { 3008 uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3]; 3009 tcg_out_andi64(s, args[0], args[0], ~mask); 3010 } else { 3011 tcg_out_rld(s, RLDIMI, args[0], args[2], args[3], 3012 64 - args[3] - args[4]); 3013 } 3014 break; 3015 3016 case INDEX_op_extract_i32: 3017 tcg_out_rlw(s, RLWINM, args[0], args[1], 3018 32 - args[2], 32 - args[3], 31); 3019 break; 3020 case INDEX_op_extract_i64: 3021 tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]); 3022 break; 3023 3024 case INDEX_op_movcond_i32: 3025 tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2], 3026 args[3], args[4], const_args[2]); 3027 break; 3028 case INDEX_op_movcond_i64: 3029 tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2], 3030 args[3], args[4], const_args[2]); 3031 break; 3032 3033#if TCG_TARGET_REG_BITS == 64 3034 case INDEX_op_add2_i64: 3035#else 3036 case INDEX_op_add2_i32: 3037#endif 3038 /* Note that the CA bit is defined based on the word size of the 3039 environment. So in 64-bit mode it's always carry-out of bit 63. 3040 The fallback code using deposit works just as well for 32-bit. */ 3041 a0 = args[0], a1 = args[1]; 3042 if (a0 == args[3] || (!const_args[5] && a0 == args[5])) { 3043 a0 = TCG_REG_R0; 3044 } 3045 if (const_args[4]) { 3046 tcg_out32(s, ADDIC | TAI(a0, args[2], args[4])); 3047 } else { 3048 tcg_out32(s, ADDC | TAB(a0, args[2], args[4])); 3049 } 3050 if (const_args[5]) { 3051 tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3])); 3052 } else { 3053 tcg_out32(s, ADDE | TAB(a1, args[3], args[5])); 3054 } 3055 if (a0 != args[0]) { 3056 tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); 3057 } 3058 break; 3059 3060#if TCG_TARGET_REG_BITS == 64 3061 case INDEX_op_sub2_i64: 3062#else 3063 case INDEX_op_sub2_i32: 3064#endif 3065 a0 = args[0], a1 = args[1]; 3066 if (a0 == args[5] || (!const_args[3] && a0 == args[3])) { 3067 a0 = TCG_REG_R0; 3068 } 3069 if (const_args[2]) { 3070 tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2])); 3071 } else { 3072 tcg_out32(s, SUBFC | TAB(a0, args[4], args[2])); 3073 } 3074 if (const_args[3]) { 3075 tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5])); 3076 } else { 3077 tcg_out32(s, SUBFE | TAB(a1, args[5], args[3])); 3078 } 3079 if (a0 != args[0]) { 3080 tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); 3081 } 3082 break; 3083 3084 case INDEX_op_muluh_i32: 3085 tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2])); 3086 break; 3087 case INDEX_op_mulsh_i32: 3088 tcg_out32(s, MULHW | TAB(args[0], args[1], args[2])); 3089 break; 3090 case INDEX_op_muluh_i64: 3091 tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2])); 3092 break; 3093 case INDEX_op_mulsh_i64: 3094 tcg_out32(s, MULHD | TAB(args[0], args[1], args[2])); 3095 break; 3096 3097 case INDEX_op_mb: 3098 tcg_out_mb(s, args[0]); 3099 break; 3100 3101 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 3102 case INDEX_op_mov_i64: 3103 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 3104 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ 3105 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ 3106 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ 3107 case INDEX_op_ext8s_i64: 3108 case INDEX_op_ext8u_i32: 3109 case INDEX_op_ext8u_i64: 3110 case INDEX_op_ext16s_i32: 3111 case INDEX_op_ext16s_i64: 3112 case INDEX_op_ext16u_i32: 3113 case INDEX_op_ext16u_i64: 3114 case INDEX_op_ext32s_i64: 3115 case INDEX_op_ext32u_i64: 3116 case INDEX_op_ext_i32_i64: 3117 case INDEX_op_extu_i32_i64: 3118 case INDEX_op_extrl_i64_i32: 3119 default: 3120 g_assert_not_reached(); 3121 } 3122} 3123 3124int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 3125{ 3126 switch (opc) { 3127 case INDEX_op_and_vec: 3128 case INDEX_op_or_vec: 3129 case INDEX_op_xor_vec: 3130 case INDEX_op_andc_vec: 3131 case INDEX_op_not_vec: 3132 case INDEX_op_nor_vec: 3133 case INDEX_op_eqv_vec: 3134 case INDEX_op_nand_vec: 3135 return 1; 3136 case INDEX_op_orc_vec: 3137 return have_isa_2_07; 3138 case INDEX_op_add_vec: 3139 case INDEX_op_sub_vec: 3140 case INDEX_op_smax_vec: 3141 case INDEX_op_smin_vec: 3142 case INDEX_op_umax_vec: 3143 case INDEX_op_umin_vec: 3144 case INDEX_op_shlv_vec: 3145 case INDEX_op_shrv_vec: 3146 case INDEX_op_sarv_vec: 3147 case INDEX_op_rotlv_vec: 3148 return vece <= MO_32 || have_isa_2_07; 3149 case INDEX_op_ssadd_vec: 3150 case INDEX_op_sssub_vec: 3151 case INDEX_op_usadd_vec: 3152 case INDEX_op_ussub_vec: 3153 return vece <= MO_32; 3154 case INDEX_op_cmp_vec: 3155 case INDEX_op_shli_vec: 3156 case INDEX_op_shri_vec: 3157 case INDEX_op_sari_vec: 3158 case INDEX_op_rotli_vec: 3159 return vece <= MO_32 || have_isa_2_07 ? -1 : 0; 3160 case INDEX_op_neg_vec: 3161 return vece >= MO_32 && have_isa_3_00; 3162 case INDEX_op_mul_vec: 3163 switch (vece) { 3164 case MO_8: 3165 case MO_16: 3166 return -1; 3167 case MO_32: 3168 return have_isa_2_07 ? 1 : -1; 3169 case MO_64: 3170 return have_isa_3_10; 3171 } 3172 return 0; 3173 case INDEX_op_bitsel_vec: 3174 return have_vsx; 3175 case INDEX_op_rotrv_vec: 3176 return -1; 3177 default: 3178 return 0; 3179 } 3180} 3181 3182static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 3183 TCGReg dst, TCGReg src) 3184{ 3185 tcg_debug_assert(dst >= TCG_REG_V0); 3186 3187 /* Splat from integer reg allowed via constraints for v3.00. */ 3188 if (src < TCG_REG_V0) { 3189 tcg_debug_assert(have_isa_3_00); 3190 switch (vece) { 3191 case MO_64: 3192 tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src)); 3193 return true; 3194 case MO_32: 3195 tcg_out32(s, MTVSRWS | VRT(dst) | RA(src)); 3196 return true; 3197 default: 3198 /* Fail, so that we fall back on either dupm or mov+dup. */ 3199 return false; 3200 } 3201 } 3202 3203 /* 3204 * Recall we use (or emulate) VSX integer loads, so the integer is 3205 * right justified within the left (zero-index) double-word. 3206 */ 3207 switch (vece) { 3208 case MO_8: 3209 tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16)); 3210 break; 3211 case MO_16: 3212 tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16)); 3213 break; 3214 case MO_32: 3215 tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16)); 3216 break; 3217 case MO_64: 3218 if (have_vsx) { 3219 tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src)); 3220 break; 3221 } 3222 tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8); 3223 tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8); 3224 break; 3225 default: 3226 g_assert_not_reached(); 3227 } 3228 return true; 3229} 3230 3231static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 3232 TCGReg out, TCGReg base, intptr_t offset) 3233{ 3234 int elt; 3235 3236 tcg_debug_assert(out >= TCG_REG_V0); 3237 switch (vece) { 3238 case MO_8: 3239 if (have_isa_3_00) { 3240 tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16); 3241 } else { 3242 tcg_out_mem_long(s, 0, LVEBX, out, base, offset); 3243 } 3244 elt = extract32(offset, 0, 4); 3245#if !HOST_BIG_ENDIAN 3246 elt ^= 15; 3247#endif 3248 tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16)); 3249 break; 3250 case MO_16: 3251 tcg_debug_assert((offset & 1) == 0); 3252 if (have_isa_3_00) { 3253 tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16); 3254 } else { 3255 tcg_out_mem_long(s, 0, LVEHX, out, base, offset); 3256 } 3257 elt = extract32(offset, 1, 3); 3258#if !HOST_BIG_ENDIAN 3259 elt ^= 7; 3260#endif 3261 tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16)); 3262 break; 3263 case MO_32: 3264 if (have_isa_3_00) { 3265 tcg_out_mem_long(s, 0, LXVWSX, out, base, offset); 3266 break; 3267 } 3268 tcg_debug_assert((offset & 3) == 0); 3269 tcg_out_mem_long(s, 0, LVEWX, out, base, offset); 3270 elt = extract32(offset, 2, 2); 3271#if !HOST_BIG_ENDIAN 3272 elt ^= 3; 3273#endif 3274 tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16)); 3275 break; 3276 case MO_64: 3277 if (have_vsx) { 3278 tcg_out_mem_long(s, 0, LXVDSX, out, base, offset); 3279 break; 3280 } 3281 tcg_debug_assert((offset & 7) == 0); 3282 tcg_out_mem_long(s, 0, LVX, out, base, offset & -16); 3283 tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8); 3284 elt = extract32(offset, 3, 1); 3285#if !HOST_BIG_ENDIAN 3286 elt = !elt; 3287#endif 3288 if (elt) { 3289 tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8); 3290 } else { 3291 tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8); 3292 } 3293 break; 3294 default: 3295 g_assert_not_reached(); 3296 } 3297 return true; 3298} 3299 3300static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 3301 unsigned vecl, unsigned vece, 3302 const TCGArg args[TCG_MAX_OP_ARGS], 3303 const int const_args[TCG_MAX_OP_ARGS]) 3304{ 3305 static const uint32_t 3306 add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM }, 3307 sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM }, 3308 mul_op[4] = { 0, 0, VMULUWM, VMULLD }, 3309 neg_op[4] = { 0, 0, VNEGW, VNEGD }, 3310 eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD }, 3311 ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 }, 3312 gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD }, 3313 gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD }, 3314 ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 }, 3315 usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 }, 3316 sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 }, 3317 ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 }, 3318 umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD }, 3319 smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD }, 3320 umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD }, 3321 smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD }, 3322 shlv_op[4] = { VSLB, VSLH, VSLW, VSLD }, 3323 shrv_op[4] = { VSRB, VSRH, VSRW, VSRD }, 3324 sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD }, 3325 mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 }, 3326 mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 }, 3327 muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 }, 3328 mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 }, 3329 pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 }, 3330 rotl_op[4] = { VRLB, VRLH, VRLW, VRLD }; 3331 3332 TCGType type = vecl + TCG_TYPE_V64; 3333 TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; 3334 uint32_t insn; 3335 3336 switch (opc) { 3337 case INDEX_op_ld_vec: 3338 tcg_out_ld(s, type, a0, a1, a2); 3339 return; 3340 case INDEX_op_st_vec: 3341 tcg_out_st(s, type, a0, a1, a2); 3342 return; 3343 case INDEX_op_dupm_vec: 3344 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 3345 return; 3346 3347 case INDEX_op_add_vec: 3348 insn = add_op[vece]; 3349 break; 3350 case INDEX_op_sub_vec: 3351 insn = sub_op[vece]; 3352 break; 3353 case INDEX_op_neg_vec: 3354 insn = neg_op[vece]; 3355 a2 = a1; 3356 a1 = 0; 3357 break; 3358 case INDEX_op_mul_vec: 3359 insn = mul_op[vece]; 3360 break; 3361 case INDEX_op_ssadd_vec: 3362 insn = ssadd_op[vece]; 3363 break; 3364 case INDEX_op_sssub_vec: 3365 insn = sssub_op[vece]; 3366 break; 3367 case INDEX_op_usadd_vec: 3368 insn = usadd_op[vece]; 3369 break; 3370 case INDEX_op_ussub_vec: 3371 insn = ussub_op[vece]; 3372 break; 3373 case INDEX_op_smin_vec: 3374 insn = smin_op[vece]; 3375 break; 3376 case INDEX_op_umin_vec: 3377 insn = umin_op[vece]; 3378 break; 3379 case INDEX_op_smax_vec: 3380 insn = smax_op[vece]; 3381 break; 3382 case INDEX_op_umax_vec: 3383 insn = umax_op[vece]; 3384 break; 3385 case INDEX_op_shlv_vec: 3386 insn = shlv_op[vece]; 3387 break; 3388 case INDEX_op_shrv_vec: 3389 insn = shrv_op[vece]; 3390 break; 3391 case INDEX_op_sarv_vec: 3392 insn = sarv_op[vece]; 3393 break; 3394 case INDEX_op_and_vec: 3395 insn = VAND; 3396 break; 3397 case INDEX_op_or_vec: 3398 insn = VOR; 3399 break; 3400 case INDEX_op_xor_vec: 3401 insn = VXOR; 3402 break; 3403 case INDEX_op_andc_vec: 3404 insn = VANDC; 3405 break; 3406 case INDEX_op_not_vec: 3407 insn = VNOR; 3408 a2 = a1; 3409 break; 3410 case INDEX_op_orc_vec: 3411 insn = VORC; 3412 break; 3413 case INDEX_op_nand_vec: 3414 insn = VNAND; 3415 break; 3416 case INDEX_op_nor_vec: 3417 insn = VNOR; 3418 break; 3419 case INDEX_op_eqv_vec: 3420 insn = VEQV; 3421 break; 3422 3423 case INDEX_op_cmp_vec: 3424 switch (args[3]) { 3425 case TCG_COND_EQ: 3426 insn = eq_op[vece]; 3427 break; 3428 case TCG_COND_NE: 3429 insn = ne_op[vece]; 3430 break; 3431 case TCG_COND_GT: 3432 insn = gts_op[vece]; 3433 break; 3434 case TCG_COND_GTU: 3435 insn = gtu_op[vece]; 3436 break; 3437 default: 3438 g_assert_not_reached(); 3439 } 3440 break; 3441 3442 case INDEX_op_bitsel_vec: 3443 tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3])); 3444 return; 3445 3446 case INDEX_op_dup2_vec: 3447 assert(TCG_TARGET_REG_BITS == 32); 3448 /* With inputs a1 = xLxx, a2 = xHxx */ 3449 tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1)); /* a0 = xxHL */ 3450 tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8); /* tmp = HLxx */ 3451 tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8); /* a0 = HLHL */ 3452 return; 3453 3454 case INDEX_op_ppc_mrgh_vec: 3455 insn = mrgh_op[vece]; 3456 break; 3457 case INDEX_op_ppc_mrgl_vec: 3458 insn = mrgl_op[vece]; 3459 break; 3460 case INDEX_op_ppc_muleu_vec: 3461 insn = muleu_op[vece]; 3462 break; 3463 case INDEX_op_ppc_mulou_vec: 3464 insn = mulou_op[vece]; 3465 break; 3466 case INDEX_op_ppc_pkum_vec: 3467 insn = pkum_op[vece]; 3468 break; 3469 case INDEX_op_rotlv_vec: 3470 insn = rotl_op[vece]; 3471 break; 3472 case INDEX_op_ppc_msum_vec: 3473 tcg_debug_assert(vece == MO_16); 3474 tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3])); 3475 return; 3476 3477 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 3478 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 3479 default: 3480 g_assert_not_reached(); 3481 } 3482 3483 tcg_debug_assert(insn != 0); 3484 tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2)); 3485} 3486 3487static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0, 3488 TCGv_vec v1, TCGArg imm, TCGOpcode opci) 3489{ 3490 TCGv_vec t1; 3491 3492 if (vece == MO_32) { 3493 /* 3494 * Only 5 bits are significant, and VSPLTISB can represent -16..15. 3495 * So using negative numbers gets us the 4th bit easily. 3496 */ 3497 imm = sextract32(imm, 0, 5); 3498 } else { 3499 imm &= (8 << vece) - 1; 3500 } 3501 3502 /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */ 3503 t1 = tcg_constant_vec(type, MO_8, imm); 3504 vec_gen_3(opci, type, vece, tcgv_vec_arg(v0), 3505 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 3506} 3507 3508static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, 3509 TCGv_vec v1, TCGv_vec v2, TCGCond cond) 3510{ 3511 bool need_swap = false, need_inv = false; 3512 3513 tcg_debug_assert(vece <= MO_32 || have_isa_2_07); 3514 3515 switch (cond) { 3516 case TCG_COND_EQ: 3517 case TCG_COND_GT: 3518 case TCG_COND_GTU: 3519 break; 3520 case TCG_COND_NE: 3521 if (have_isa_3_00 && vece <= MO_32) { 3522 break; 3523 } 3524 /* fall through */ 3525 case TCG_COND_LE: 3526 case TCG_COND_LEU: 3527 need_inv = true; 3528 break; 3529 case TCG_COND_LT: 3530 case TCG_COND_LTU: 3531 need_swap = true; 3532 break; 3533 case TCG_COND_GE: 3534 case TCG_COND_GEU: 3535 need_swap = need_inv = true; 3536 break; 3537 default: 3538 g_assert_not_reached(); 3539 } 3540 3541 if (need_inv) { 3542 cond = tcg_invert_cond(cond); 3543 } 3544 if (need_swap) { 3545 TCGv_vec t1; 3546 t1 = v1, v1 = v2, v2 = t1; 3547 cond = tcg_swap_cond(cond); 3548 } 3549 3550 vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0), 3551 tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond); 3552 3553 if (need_inv) { 3554 tcg_gen_not_vec(vece, v0, v0); 3555 } 3556} 3557 3558static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0, 3559 TCGv_vec v1, TCGv_vec v2) 3560{ 3561 TCGv_vec t1 = tcg_temp_new_vec(type); 3562 TCGv_vec t2 = tcg_temp_new_vec(type); 3563 TCGv_vec c0, c16; 3564 3565 switch (vece) { 3566 case MO_8: 3567 case MO_16: 3568 vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1), 3569 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 3570 vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2), 3571 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 3572 vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0), 3573 tcgv_vec_arg(t1), tcgv_vec_arg(t2)); 3574 vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1), 3575 tcgv_vec_arg(t1), tcgv_vec_arg(t2)); 3576 vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0), 3577 tcgv_vec_arg(v0), tcgv_vec_arg(t1)); 3578 break; 3579 3580 case MO_32: 3581 tcg_debug_assert(!have_isa_2_07); 3582 /* 3583 * Only 5 bits are significant, and VSPLTISB can represent -16..15. 3584 * So using -16 is a quick way to represent 16. 3585 */ 3586 c16 = tcg_constant_vec(type, MO_8, -16); 3587 c0 = tcg_constant_vec(type, MO_8, 0); 3588 3589 vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1), 3590 tcgv_vec_arg(v2), tcgv_vec_arg(c16)); 3591 vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2), 3592 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 3593 vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1), 3594 tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0)); 3595 vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1), 3596 tcgv_vec_arg(t1), tcgv_vec_arg(c16)); 3597 tcg_gen_add_vec(MO_32, v0, t1, t2); 3598 break; 3599 3600 default: 3601 g_assert_not_reached(); 3602 } 3603 tcg_temp_free_vec(t1); 3604 tcg_temp_free_vec(t2); 3605} 3606 3607void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 3608 TCGArg a0, ...) 3609{ 3610 va_list va; 3611 TCGv_vec v0, v1, v2, t0; 3612 TCGArg a2; 3613 3614 va_start(va, a0); 3615 v0 = temp_tcgv_vec(arg_temp(a0)); 3616 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 3617 a2 = va_arg(va, TCGArg); 3618 3619 switch (opc) { 3620 case INDEX_op_shli_vec: 3621 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec); 3622 break; 3623 case INDEX_op_shri_vec: 3624 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec); 3625 break; 3626 case INDEX_op_sari_vec: 3627 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec); 3628 break; 3629 case INDEX_op_rotli_vec: 3630 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec); 3631 break; 3632 case INDEX_op_cmp_vec: 3633 v2 = temp_tcgv_vec(arg_temp(a2)); 3634 expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); 3635 break; 3636 case INDEX_op_mul_vec: 3637 v2 = temp_tcgv_vec(arg_temp(a2)); 3638 expand_vec_mul(type, vece, v0, v1, v2); 3639 break; 3640 case INDEX_op_rotlv_vec: 3641 v2 = temp_tcgv_vec(arg_temp(a2)); 3642 t0 = tcg_temp_new_vec(type); 3643 tcg_gen_neg_vec(vece, t0, v2); 3644 tcg_gen_rotlv_vec(vece, v0, v1, t0); 3645 tcg_temp_free_vec(t0); 3646 break; 3647 default: 3648 g_assert_not_reached(); 3649 } 3650 va_end(va); 3651} 3652 3653static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 3654{ 3655 switch (op) { 3656 case INDEX_op_goto_ptr: 3657 return C_O0_I1(r); 3658 3659 case INDEX_op_ld8u_i32: 3660 case INDEX_op_ld8s_i32: 3661 case INDEX_op_ld16u_i32: 3662 case INDEX_op_ld16s_i32: 3663 case INDEX_op_ld_i32: 3664 case INDEX_op_ctpop_i32: 3665 case INDEX_op_neg_i32: 3666 case INDEX_op_not_i32: 3667 case INDEX_op_ext8s_i32: 3668 case INDEX_op_ext16s_i32: 3669 case INDEX_op_bswap16_i32: 3670 case INDEX_op_bswap32_i32: 3671 case INDEX_op_extract_i32: 3672 case INDEX_op_ld8u_i64: 3673 case INDEX_op_ld8s_i64: 3674 case INDEX_op_ld16u_i64: 3675 case INDEX_op_ld16s_i64: 3676 case INDEX_op_ld32u_i64: 3677 case INDEX_op_ld32s_i64: 3678 case INDEX_op_ld_i64: 3679 case INDEX_op_ctpop_i64: 3680 case INDEX_op_neg_i64: 3681 case INDEX_op_not_i64: 3682 case INDEX_op_ext8s_i64: 3683 case INDEX_op_ext16s_i64: 3684 case INDEX_op_ext32s_i64: 3685 case INDEX_op_ext_i32_i64: 3686 case INDEX_op_extu_i32_i64: 3687 case INDEX_op_bswap16_i64: 3688 case INDEX_op_bswap32_i64: 3689 case INDEX_op_bswap64_i64: 3690 case INDEX_op_extract_i64: 3691 return C_O1_I1(r, r); 3692 3693 case INDEX_op_st8_i32: 3694 case INDEX_op_st16_i32: 3695 case INDEX_op_st_i32: 3696 case INDEX_op_st8_i64: 3697 case INDEX_op_st16_i64: 3698 case INDEX_op_st32_i64: 3699 case INDEX_op_st_i64: 3700 return C_O0_I2(r, r); 3701 3702 case INDEX_op_add_i32: 3703 case INDEX_op_and_i32: 3704 case INDEX_op_or_i32: 3705 case INDEX_op_xor_i32: 3706 case INDEX_op_andc_i32: 3707 case INDEX_op_orc_i32: 3708 case INDEX_op_eqv_i32: 3709 case INDEX_op_shl_i32: 3710 case INDEX_op_shr_i32: 3711 case INDEX_op_sar_i32: 3712 case INDEX_op_rotl_i32: 3713 case INDEX_op_rotr_i32: 3714 case INDEX_op_setcond_i32: 3715 case INDEX_op_and_i64: 3716 case INDEX_op_andc_i64: 3717 case INDEX_op_shl_i64: 3718 case INDEX_op_shr_i64: 3719 case INDEX_op_sar_i64: 3720 case INDEX_op_rotl_i64: 3721 case INDEX_op_rotr_i64: 3722 case INDEX_op_setcond_i64: 3723 return C_O1_I2(r, r, ri); 3724 3725 case INDEX_op_mul_i32: 3726 case INDEX_op_mul_i64: 3727 return C_O1_I2(r, r, rI); 3728 3729 case INDEX_op_div_i32: 3730 case INDEX_op_divu_i32: 3731 case INDEX_op_rem_i32: 3732 case INDEX_op_remu_i32: 3733 case INDEX_op_nand_i32: 3734 case INDEX_op_nor_i32: 3735 case INDEX_op_muluh_i32: 3736 case INDEX_op_mulsh_i32: 3737 case INDEX_op_orc_i64: 3738 case INDEX_op_eqv_i64: 3739 case INDEX_op_nand_i64: 3740 case INDEX_op_nor_i64: 3741 case INDEX_op_div_i64: 3742 case INDEX_op_divu_i64: 3743 case INDEX_op_rem_i64: 3744 case INDEX_op_remu_i64: 3745 case INDEX_op_mulsh_i64: 3746 case INDEX_op_muluh_i64: 3747 return C_O1_I2(r, r, r); 3748 3749 case INDEX_op_sub_i32: 3750 return C_O1_I2(r, rI, ri); 3751 case INDEX_op_add_i64: 3752 return C_O1_I2(r, r, rT); 3753 case INDEX_op_or_i64: 3754 case INDEX_op_xor_i64: 3755 return C_O1_I2(r, r, rU); 3756 case INDEX_op_sub_i64: 3757 return C_O1_I2(r, rI, rT); 3758 case INDEX_op_clz_i32: 3759 case INDEX_op_ctz_i32: 3760 case INDEX_op_clz_i64: 3761 case INDEX_op_ctz_i64: 3762 return C_O1_I2(r, r, rZW); 3763 3764 case INDEX_op_brcond_i32: 3765 case INDEX_op_brcond_i64: 3766 return C_O0_I2(r, ri); 3767 3768 case INDEX_op_movcond_i32: 3769 case INDEX_op_movcond_i64: 3770 return C_O1_I4(r, r, ri, rZ, rZ); 3771 case INDEX_op_deposit_i32: 3772 case INDEX_op_deposit_i64: 3773 return C_O1_I2(r, 0, rZ); 3774 case INDEX_op_brcond2_i32: 3775 return C_O0_I4(r, r, ri, ri); 3776 case INDEX_op_setcond2_i32: 3777 return C_O1_I4(r, r, r, ri, ri); 3778 case INDEX_op_add2_i64: 3779 case INDEX_op_add2_i32: 3780 return C_O2_I4(r, r, r, r, rI, rZM); 3781 case INDEX_op_sub2_i64: 3782 case INDEX_op_sub2_i32: 3783 return C_O2_I4(r, r, rI, rZM, r, r); 3784 3785 case INDEX_op_qemu_ld_a32_i32: 3786 return C_O1_I1(r, r); 3787 case INDEX_op_qemu_ld_a64_i32: 3788 return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r); 3789 case INDEX_op_qemu_ld_a32_i64: 3790 return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r); 3791 case INDEX_op_qemu_ld_a64_i64: 3792 return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r); 3793 3794 case INDEX_op_qemu_st_a32_i32: 3795 return C_O0_I2(r, r); 3796 case INDEX_op_qemu_st_a64_i32: 3797 return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r); 3798 case INDEX_op_qemu_st_a32_i64: 3799 return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r); 3800 case INDEX_op_qemu_st_a64_i64: 3801 return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r); 3802 3803 case INDEX_op_qemu_ld_a32_i128: 3804 case INDEX_op_qemu_ld_a64_i128: 3805 return C_O2_I1(o, m, r); 3806 case INDEX_op_qemu_st_a32_i128: 3807 case INDEX_op_qemu_st_a64_i128: 3808 return C_O0_I3(o, m, r); 3809 3810 case INDEX_op_add_vec: 3811 case INDEX_op_sub_vec: 3812 case INDEX_op_mul_vec: 3813 case INDEX_op_and_vec: 3814 case INDEX_op_or_vec: 3815 case INDEX_op_xor_vec: 3816 case INDEX_op_andc_vec: 3817 case INDEX_op_orc_vec: 3818 case INDEX_op_nor_vec: 3819 case INDEX_op_eqv_vec: 3820 case INDEX_op_nand_vec: 3821 case INDEX_op_cmp_vec: 3822 case INDEX_op_ssadd_vec: 3823 case INDEX_op_sssub_vec: 3824 case INDEX_op_usadd_vec: 3825 case INDEX_op_ussub_vec: 3826 case INDEX_op_smax_vec: 3827 case INDEX_op_smin_vec: 3828 case INDEX_op_umax_vec: 3829 case INDEX_op_umin_vec: 3830 case INDEX_op_shlv_vec: 3831 case INDEX_op_shrv_vec: 3832 case INDEX_op_sarv_vec: 3833 case INDEX_op_rotlv_vec: 3834 case INDEX_op_rotrv_vec: 3835 case INDEX_op_ppc_mrgh_vec: 3836 case INDEX_op_ppc_mrgl_vec: 3837 case INDEX_op_ppc_muleu_vec: 3838 case INDEX_op_ppc_mulou_vec: 3839 case INDEX_op_ppc_pkum_vec: 3840 case INDEX_op_dup2_vec: 3841 return C_O1_I2(v, v, v); 3842 3843 case INDEX_op_not_vec: 3844 case INDEX_op_neg_vec: 3845 return C_O1_I1(v, v); 3846 3847 case INDEX_op_dup_vec: 3848 return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v); 3849 3850 case INDEX_op_ld_vec: 3851 case INDEX_op_dupm_vec: 3852 return C_O1_I1(v, r); 3853 3854 case INDEX_op_st_vec: 3855 return C_O0_I2(v, r); 3856 3857 case INDEX_op_bitsel_vec: 3858 case INDEX_op_ppc_msum_vec: 3859 return C_O1_I3(v, v, v, v); 3860 3861 default: 3862 g_assert_not_reached(); 3863 } 3864} 3865 3866static void tcg_target_init(TCGContext *s) 3867{ 3868 unsigned long hwcap = qemu_getauxval(AT_HWCAP); 3869 unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2); 3870 3871 have_isa = tcg_isa_base; 3872 if (hwcap & PPC_FEATURE_ARCH_2_06) { 3873 have_isa = tcg_isa_2_06; 3874 } 3875#ifdef PPC_FEATURE2_ARCH_2_07 3876 if (hwcap2 & PPC_FEATURE2_ARCH_2_07) { 3877 have_isa = tcg_isa_2_07; 3878 } 3879#endif 3880#ifdef PPC_FEATURE2_ARCH_3_00 3881 if (hwcap2 & PPC_FEATURE2_ARCH_3_00) { 3882 have_isa = tcg_isa_3_00; 3883 } 3884#endif 3885#ifdef PPC_FEATURE2_ARCH_3_10 3886 if (hwcap2 & PPC_FEATURE2_ARCH_3_10) { 3887 have_isa = tcg_isa_3_10; 3888 } 3889#endif 3890 3891#ifdef PPC_FEATURE2_HAS_ISEL 3892 /* Prefer explicit instruction from the kernel. */ 3893 have_isel = (hwcap2 & PPC_FEATURE2_HAS_ISEL) != 0; 3894#else 3895 /* Fall back to knowing Power7 (2.06) has ISEL. */ 3896 have_isel = have_isa_2_06; 3897#endif 3898 3899 if (hwcap & PPC_FEATURE_HAS_ALTIVEC) { 3900 have_altivec = true; 3901 /* We only care about the portion of VSX that overlaps Altivec. */ 3902 if (hwcap & PPC_FEATURE_HAS_VSX) { 3903 have_vsx = true; 3904 } 3905 } 3906 3907 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; 3908 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; 3909 if (have_altivec) { 3910 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 3911 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 3912 } 3913 3914 tcg_target_call_clobber_regs = 0; 3915 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0); 3916 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2); 3917 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3); 3918 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4); 3919 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5); 3920 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6); 3921 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7); 3922 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); 3923 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); 3924 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); 3925 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); 3926 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12); 3927 3928 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0); 3929 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1); 3930 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2); 3931 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3); 3932 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4); 3933 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5); 3934 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6); 3935 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7); 3936 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 3937 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 3938 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 3939 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 3940 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 3941 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 3942 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 3943 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 3944 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16); 3945 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17); 3946 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18); 3947 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19); 3948 3949 s->reserved_regs = 0; 3950 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */ 3951 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */ 3952#if defined(_CALL_SYSV) 3953 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */ 3954#endif 3955#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64 3956 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ 3957#endif 3958 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); 3959 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); 3960 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1); 3961 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2); 3962 if (USE_REG_TB) { 3963 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); /* tb->tc_ptr */ 3964 } 3965} 3966 3967#ifdef __ELF__ 3968typedef struct { 3969 DebugFrameCIE cie; 3970 DebugFrameFDEHeader fde; 3971 uint8_t fde_def_cfa[4]; 3972 uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3]; 3973} DebugFrame; 3974 3975/* We're expecting a 2 byte uleb128 encoded value. */ 3976QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 3977 3978#if TCG_TARGET_REG_BITS == 64 3979# define ELF_HOST_MACHINE EM_PPC64 3980#else 3981# define ELF_HOST_MACHINE EM_PPC 3982#endif 3983 3984static DebugFrame debug_frame = { 3985 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 3986 .cie.id = -1, 3987 .cie.version = 1, 3988 .cie.code_align = 1, 3989 .cie.data_align = (-SZR & 0x7f), /* sleb128 -SZR */ 3990 .cie.return_column = 65, 3991 3992 /* Total FDE size does not include the "len" member. */ 3993 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), 3994 3995 .fde_def_cfa = { 3996 12, TCG_REG_R1, /* DW_CFA_def_cfa r1, ... */ 3997 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 3998 (FRAME_SIZE >> 7) 3999 }, 4000 .fde_reg_ofs = { 4001 /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */ 4002 0x11, 65, (LR_OFFSET / -SZR) & 0x7f, 4003 } 4004}; 4005 4006void tcg_register_jit(const void *buf, size_t buf_size) 4007{ 4008 uint8_t *p = &debug_frame.fde_reg_ofs[3]; 4009 int i; 4010 4011 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) { 4012 p[0] = 0x80 + tcg_target_callee_save_regs[i]; 4013 p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR; 4014 } 4015 4016 debug_frame.fde.func_start = (uintptr_t)buf; 4017 debug_frame.fde.func_len = buf_size; 4018 4019 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 4020} 4021#endif /* __ELF__ */ 4022#undef VMULEUB 4023#undef VMULEUH 4024#undef VMULEUW 4025#undef VMULOUB 4026#undef VMULOUH 4027#undef VMULOUW 4028#undef VMSUMUHM 4029