1/* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25#include "elf.h" 26#include "../tcg-pool.c.inc" 27#include "../tcg-ldst.c.inc" 28 29/* 30 * Standardize on the _CALL_FOO symbols used by GCC: 31 * Apple XCode does not define _CALL_DARWIN. 32 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV (32-bit). 33 */ 34#if !defined(_CALL_SYSV) && \ 35 !defined(_CALL_DARWIN) && \ 36 !defined(_CALL_AIX) && \ 37 !defined(_CALL_ELF) 38# if defined(__APPLE__) 39# define _CALL_DARWIN 40# elif defined(__ELF__) && TCG_TARGET_REG_BITS == 32 41# define _CALL_SYSV 42# else 43# error "Unknown ABI" 44# endif 45#endif 46 47#ifdef _CALL_SYSV 48# define TCG_TARGET_CALL_ALIGN_ARGS 1 49#endif 50 51/* For some memory operations, we need a scratch that isn't R0. For the AIX 52 calling convention, we can re-use the TOC register since we'll be reloading 53 it at every call. Otherwise R12 will do nicely as neither a call-saved 54 register nor a parameter register. */ 55#ifdef _CALL_AIX 56# define TCG_REG_TMP1 TCG_REG_R2 57#else 58# define TCG_REG_TMP1 TCG_REG_R12 59#endif 60 61#define TCG_VEC_TMP1 TCG_REG_V0 62#define TCG_VEC_TMP2 TCG_REG_V1 63 64#define TCG_REG_TB TCG_REG_R31 65#define USE_REG_TB (TCG_TARGET_REG_BITS == 64) 66 67/* Shorthand for size of a pointer. Avoid promotion to unsigned. */ 68#define SZP ((int)sizeof(void *)) 69 70/* Shorthand for size of a register. */ 71#define SZR (TCG_TARGET_REG_BITS / 8) 72 73#define TCG_CT_CONST_S16 0x100 74#define TCG_CT_CONST_U16 0x200 75#define TCG_CT_CONST_S32 0x400 76#define TCG_CT_CONST_U32 0x800 77#define TCG_CT_CONST_ZERO 0x1000 78#define TCG_CT_CONST_MONE 0x2000 79#define TCG_CT_CONST_WSZ 0x4000 80 81#define ALL_GENERAL_REGS 0xffffffffu 82#define ALL_VECTOR_REGS 0xffffffff00000000ull 83 84#ifdef CONFIG_SOFTMMU 85#define ALL_QLOAD_REGS \ 86 (ALL_GENERAL_REGS & \ 87 ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | (1 << TCG_REG_R5))) 88#define ALL_QSTORE_REGS \ 89 (ALL_GENERAL_REGS & ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | \ 90 (1 << TCG_REG_R5) | (1 << TCG_REG_R6))) 91#else 92#define ALL_QLOAD_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_R3)) 93#define ALL_QSTORE_REGS ALL_QLOAD_REGS 94#endif 95 96TCGPowerISA have_isa; 97static bool have_isel; 98bool have_altivec; 99bool have_vsx; 100 101#ifndef CONFIG_SOFTMMU 102#define TCG_GUEST_BASE_REG 30 103#endif 104 105#ifdef CONFIG_DEBUG_TCG 106static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = { 107 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", 108 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", 109 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", 110 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", 111 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 112 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 113 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 114 "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", 115}; 116#endif 117 118static const int tcg_target_reg_alloc_order[] = { 119 TCG_REG_R14, /* call saved registers */ 120 TCG_REG_R15, 121 TCG_REG_R16, 122 TCG_REG_R17, 123 TCG_REG_R18, 124 TCG_REG_R19, 125 TCG_REG_R20, 126 TCG_REG_R21, 127 TCG_REG_R22, 128 TCG_REG_R23, 129 TCG_REG_R24, 130 TCG_REG_R25, 131 TCG_REG_R26, 132 TCG_REG_R27, 133 TCG_REG_R28, 134 TCG_REG_R29, 135 TCG_REG_R30, 136 TCG_REG_R31, 137 TCG_REG_R12, /* call clobbered, non-arguments */ 138 TCG_REG_R11, 139 TCG_REG_R2, 140 TCG_REG_R13, 141 TCG_REG_R10, /* call clobbered, arguments */ 142 TCG_REG_R9, 143 TCG_REG_R8, 144 TCG_REG_R7, 145 TCG_REG_R6, 146 TCG_REG_R5, 147 TCG_REG_R4, 148 TCG_REG_R3, 149 150 /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */ 151 TCG_REG_V2, /* call clobbered, vectors */ 152 TCG_REG_V3, 153 TCG_REG_V4, 154 TCG_REG_V5, 155 TCG_REG_V6, 156 TCG_REG_V7, 157 TCG_REG_V8, 158 TCG_REG_V9, 159 TCG_REG_V10, 160 TCG_REG_V11, 161 TCG_REG_V12, 162 TCG_REG_V13, 163 TCG_REG_V14, 164 TCG_REG_V15, 165 TCG_REG_V16, 166 TCG_REG_V17, 167 TCG_REG_V18, 168 TCG_REG_V19, 169}; 170 171static const int tcg_target_call_iarg_regs[] = { 172 TCG_REG_R3, 173 TCG_REG_R4, 174 TCG_REG_R5, 175 TCG_REG_R6, 176 TCG_REG_R7, 177 TCG_REG_R8, 178 TCG_REG_R9, 179 TCG_REG_R10 180}; 181 182static const int tcg_target_call_oarg_regs[] = { 183 TCG_REG_R3, 184 TCG_REG_R4 185}; 186 187static const int tcg_target_callee_save_regs[] = { 188#ifdef _CALL_DARWIN 189 TCG_REG_R11, 190#endif 191 TCG_REG_R14, 192 TCG_REG_R15, 193 TCG_REG_R16, 194 TCG_REG_R17, 195 TCG_REG_R18, 196 TCG_REG_R19, 197 TCG_REG_R20, 198 TCG_REG_R21, 199 TCG_REG_R22, 200 TCG_REG_R23, 201 TCG_REG_R24, 202 TCG_REG_R25, 203 TCG_REG_R26, 204 TCG_REG_R27, /* currently used for the global env */ 205 TCG_REG_R28, 206 TCG_REG_R29, 207 TCG_REG_R30, 208 TCG_REG_R31 209}; 210 211static inline bool in_range_b(tcg_target_long target) 212{ 213 return target == sextract64(target, 0, 26); 214} 215 216static uint32_t reloc_pc24_val(const tcg_insn_unit *pc, 217 const tcg_insn_unit *target) 218{ 219 ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); 220 tcg_debug_assert(in_range_b(disp)); 221 return disp & 0x3fffffc; 222} 223 224static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 225{ 226 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 227 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); 228 229 if (in_range_b(disp)) { 230 *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc); 231 return true; 232 } 233 return false; 234} 235 236static uint16_t reloc_pc14_val(const tcg_insn_unit *pc, 237 const tcg_insn_unit *target) 238{ 239 ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); 240 tcg_debug_assert(disp == (int16_t) disp); 241 return disp & 0xfffc; 242} 243 244static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 245{ 246 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 247 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); 248 249 if (disp == (int16_t) disp) { 250 *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc); 251 return true; 252 } 253 return false; 254} 255 256/* test if a constant matches the constraint */ 257static bool tcg_target_const_match(int64_t val, TCGType type, int ct) 258{ 259 if (ct & TCG_CT_CONST) { 260 return 1; 261 } 262 263 /* The only 32-bit constraint we use aside from 264 TCG_CT_CONST is TCG_CT_CONST_S16. */ 265 if (type == TCG_TYPE_I32) { 266 val = (int32_t)val; 267 } 268 269 if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { 270 return 1; 271 } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) { 272 return 1; 273 } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { 274 return 1; 275 } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { 276 return 1; 277 } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 278 return 1; 279 } else if ((ct & TCG_CT_CONST_MONE) && val == -1) { 280 return 1; 281 } else if ((ct & TCG_CT_CONST_WSZ) 282 && val == (type == TCG_TYPE_I32 ? 32 : 64)) { 283 return 1; 284 } 285 return 0; 286} 287 288#define OPCD(opc) ((opc)<<26) 289#define XO19(opc) (OPCD(19)|((opc)<<1)) 290#define MD30(opc) (OPCD(30)|((opc)<<2)) 291#define MDS30(opc) (OPCD(30)|((opc)<<1)) 292#define XO31(opc) (OPCD(31)|((opc)<<1)) 293#define XO58(opc) (OPCD(58)|(opc)) 294#define XO62(opc) (OPCD(62)|(opc)) 295#define VX4(opc) (OPCD(4)|(opc)) 296 297#define B OPCD( 18) 298#define BC OPCD( 16) 299#define LBZ OPCD( 34) 300#define LHZ OPCD( 40) 301#define LHA OPCD( 42) 302#define LWZ OPCD( 32) 303#define LWZUX XO31( 55) 304#define STB OPCD( 38) 305#define STH OPCD( 44) 306#define STW OPCD( 36) 307 308#define STD XO62( 0) 309#define STDU XO62( 1) 310#define STDX XO31(149) 311 312#define LD XO58( 0) 313#define LDX XO31( 21) 314#define LDU XO58( 1) 315#define LDUX XO31( 53) 316#define LWA XO58( 2) 317#define LWAX XO31(341) 318 319#define ADDIC OPCD( 12) 320#define ADDI OPCD( 14) 321#define ADDIS OPCD( 15) 322#define ORI OPCD( 24) 323#define ORIS OPCD( 25) 324#define XORI OPCD( 26) 325#define XORIS OPCD( 27) 326#define ANDI OPCD( 28) 327#define ANDIS OPCD( 29) 328#define MULLI OPCD( 7) 329#define CMPLI OPCD( 10) 330#define CMPI OPCD( 11) 331#define SUBFIC OPCD( 8) 332 333#define LWZU OPCD( 33) 334#define STWU OPCD( 37) 335 336#define RLWIMI OPCD( 20) 337#define RLWINM OPCD( 21) 338#define RLWNM OPCD( 23) 339 340#define RLDICL MD30( 0) 341#define RLDICR MD30( 1) 342#define RLDIMI MD30( 3) 343#define RLDCL MDS30( 8) 344 345#define BCLR XO19( 16) 346#define BCCTR XO19(528) 347#define CRAND XO19(257) 348#define CRANDC XO19(129) 349#define CRNAND XO19(225) 350#define CROR XO19(449) 351#define CRNOR XO19( 33) 352 353#define EXTSB XO31(954) 354#define EXTSH XO31(922) 355#define EXTSW XO31(986) 356#define ADD XO31(266) 357#define ADDE XO31(138) 358#define ADDME XO31(234) 359#define ADDZE XO31(202) 360#define ADDC XO31( 10) 361#define AND XO31( 28) 362#define SUBF XO31( 40) 363#define SUBFC XO31( 8) 364#define SUBFE XO31(136) 365#define SUBFME XO31(232) 366#define SUBFZE XO31(200) 367#define OR XO31(444) 368#define XOR XO31(316) 369#define MULLW XO31(235) 370#define MULHW XO31( 75) 371#define MULHWU XO31( 11) 372#define DIVW XO31(491) 373#define DIVWU XO31(459) 374#define MODSW XO31(779) 375#define MODUW XO31(267) 376#define CMP XO31( 0) 377#define CMPL XO31( 32) 378#define LHBRX XO31(790) 379#define LWBRX XO31(534) 380#define LDBRX XO31(532) 381#define STHBRX XO31(918) 382#define STWBRX XO31(662) 383#define STDBRX XO31(660) 384#define MFSPR XO31(339) 385#define MTSPR XO31(467) 386#define SRAWI XO31(824) 387#define NEG XO31(104) 388#define MFCR XO31( 19) 389#define MFOCRF (MFCR | (1u << 20)) 390#define NOR XO31(124) 391#define CNTLZW XO31( 26) 392#define CNTLZD XO31( 58) 393#define CNTTZW XO31(538) 394#define CNTTZD XO31(570) 395#define CNTPOPW XO31(378) 396#define CNTPOPD XO31(506) 397#define ANDC XO31( 60) 398#define ORC XO31(412) 399#define EQV XO31(284) 400#define NAND XO31(476) 401#define ISEL XO31( 15) 402 403#define MULLD XO31(233) 404#define MULHD XO31( 73) 405#define MULHDU XO31( 9) 406#define DIVD XO31(489) 407#define DIVDU XO31(457) 408#define MODSD XO31(777) 409#define MODUD XO31(265) 410 411#define LBZX XO31( 87) 412#define LHZX XO31(279) 413#define LHAX XO31(343) 414#define LWZX XO31( 23) 415#define STBX XO31(215) 416#define STHX XO31(407) 417#define STWX XO31(151) 418 419#define EIEIO XO31(854) 420#define HWSYNC XO31(598) 421#define LWSYNC (HWSYNC | (1u << 21)) 422 423#define SPR(a, b) ((((a)<<5)|(b))<<11) 424#define LR SPR(8, 0) 425#define CTR SPR(9, 0) 426 427#define SLW XO31( 24) 428#define SRW XO31(536) 429#define SRAW XO31(792) 430 431#define SLD XO31( 27) 432#define SRD XO31(539) 433#define SRAD XO31(794) 434#define SRADI XO31(413<<1) 435 436#define BRH XO31(219) 437#define BRW XO31(155) 438#define BRD XO31(187) 439 440#define TW XO31( 4) 441#define TRAP (TW | TO(31)) 442 443#define NOP ORI /* ori 0,0,0 */ 444 445#define LVX XO31(103) 446#define LVEBX XO31(7) 447#define LVEHX XO31(39) 448#define LVEWX XO31(71) 449#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */ 450#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */ 451#define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */ 452#define LXV (OPCD(61) | 8 | 1) /* v3.00, force tx=1 */ 453#define LXSD (OPCD(57) | 2) /* v3.00 */ 454#define LXVWSX (XO31(364) | 1) /* v3.00, force tx=1 */ 455 456#define STVX XO31(231) 457#define STVEWX XO31(199) 458#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */ 459#define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */ 460#define STXV (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */ 461#define STXSD (OPCD(61) | 2) /* v3.00 */ 462 463#define VADDSBS VX4(768) 464#define VADDUBS VX4(512) 465#define VADDUBM VX4(0) 466#define VADDSHS VX4(832) 467#define VADDUHS VX4(576) 468#define VADDUHM VX4(64) 469#define VADDSWS VX4(896) 470#define VADDUWS VX4(640) 471#define VADDUWM VX4(128) 472#define VADDUDM VX4(192) /* v2.07 */ 473 474#define VSUBSBS VX4(1792) 475#define VSUBUBS VX4(1536) 476#define VSUBUBM VX4(1024) 477#define VSUBSHS VX4(1856) 478#define VSUBUHS VX4(1600) 479#define VSUBUHM VX4(1088) 480#define VSUBSWS VX4(1920) 481#define VSUBUWS VX4(1664) 482#define VSUBUWM VX4(1152) 483#define VSUBUDM VX4(1216) /* v2.07 */ 484 485#define VNEGW (VX4(1538) | (6 << 16)) /* v3.00 */ 486#define VNEGD (VX4(1538) | (7 << 16)) /* v3.00 */ 487 488#define VMAXSB VX4(258) 489#define VMAXSH VX4(322) 490#define VMAXSW VX4(386) 491#define VMAXSD VX4(450) /* v2.07 */ 492#define VMAXUB VX4(2) 493#define VMAXUH VX4(66) 494#define VMAXUW VX4(130) 495#define VMAXUD VX4(194) /* v2.07 */ 496#define VMINSB VX4(770) 497#define VMINSH VX4(834) 498#define VMINSW VX4(898) 499#define VMINSD VX4(962) /* v2.07 */ 500#define VMINUB VX4(514) 501#define VMINUH VX4(578) 502#define VMINUW VX4(642) 503#define VMINUD VX4(706) /* v2.07 */ 504 505#define VCMPEQUB VX4(6) 506#define VCMPEQUH VX4(70) 507#define VCMPEQUW VX4(134) 508#define VCMPEQUD VX4(199) /* v2.07 */ 509#define VCMPGTSB VX4(774) 510#define VCMPGTSH VX4(838) 511#define VCMPGTSW VX4(902) 512#define VCMPGTSD VX4(967) /* v2.07 */ 513#define VCMPGTUB VX4(518) 514#define VCMPGTUH VX4(582) 515#define VCMPGTUW VX4(646) 516#define VCMPGTUD VX4(711) /* v2.07 */ 517#define VCMPNEB VX4(7) /* v3.00 */ 518#define VCMPNEH VX4(71) /* v3.00 */ 519#define VCMPNEW VX4(135) /* v3.00 */ 520 521#define VSLB VX4(260) 522#define VSLH VX4(324) 523#define VSLW VX4(388) 524#define VSLD VX4(1476) /* v2.07 */ 525#define VSRB VX4(516) 526#define VSRH VX4(580) 527#define VSRW VX4(644) 528#define VSRD VX4(1732) /* v2.07 */ 529#define VSRAB VX4(772) 530#define VSRAH VX4(836) 531#define VSRAW VX4(900) 532#define VSRAD VX4(964) /* v2.07 */ 533#define VRLB VX4(4) 534#define VRLH VX4(68) 535#define VRLW VX4(132) 536#define VRLD VX4(196) /* v2.07 */ 537 538#define VMULEUB VX4(520) 539#define VMULEUH VX4(584) 540#define VMULEUW VX4(648) /* v2.07 */ 541#define VMULOUB VX4(8) 542#define VMULOUH VX4(72) 543#define VMULOUW VX4(136) /* v2.07 */ 544#define VMULUWM VX4(137) /* v2.07 */ 545#define VMULLD VX4(457) /* v3.10 */ 546#define VMSUMUHM VX4(38) 547 548#define VMRGHB VX4(12) 549#define VMRGHH VX4(76) 550#define VMRGHW VX4(140) 551#define VMRGLB VX4(268) 552#define VMRGLH VX4(332) 553#define VMRGLW VX4(396) 554 555#define VPKUHUM VX4(14) 556#define VPKUWUM VX4(78) 557 558#define VAND VX4(1028) 559#define VANDC VX4(1092) 560#define VNOR VX4(1284) 561#define VOR VX4(1156) 562#define VXOR VX4(1220) 563#define VEQV VX4(1668) /* v2.07 */ 564#define VNAND VX4(1412) /* v2.07 */ 565#define VORC VX4(1348) /* v2.07 */ 566 567#define VSPLTB VX4(524) 568#define VSPLTH VX4(588) 569#define VSPLTW VX4(652) 570#define VSPLTISB VX4(780) 571#define VSPLTISH VX4(844) 572#define VSPLTISW VX4(908) 573 574#define VSLDOI VX4(44) 575 576#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */ 577#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */ 578#define XXSPLTIB (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */ 579 580#define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */ 581#define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */ 582#define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */ 583#define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */ 584#define MTVSRDD (XO31(435) | 1) /* v3.00, force tx=1 */ 585#define MTVSRWS (XO31(403) | 1) /* v3.00, force tx=1 */ 586 587#define RT(r) ((r)<<21) 588#define RS(r) ((r)<<21) 589#define RA(r) ((r)<<16) 590#define RB(r) ((r)<<11) 591#define TO(t) ((t)<<21) 592#define SH(s) ((s)<<11) 593#define MB(b) ((b)<<6) 594#define ME(e) ((e)<<1) 595#define BO(o) ((o)<<21) 596#define MB64(b) ((b)<<5) 597#define FXM(b) (1 << (19 - (b))) 598 599#define VRT(r) (((r) & 31) << 21) 600#define VRA(r) (((r) & 31) << 16) 601#define VRB(r) (((r) & 31) << 11) 602#define VRC(r) (((r) & 31) << 6) 603 604#define LK 1 605 606#define TAB(t, a, b) (RT(t) | RA(a) | RB(b)) 607#define SAB(s, a, b) (RS(s) | RA(a) | RB(b)) 608#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff)) 609#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff)) 610 611#define BF(n) ((n)<<23) 612#define BI(n, c) (((c)+((n)*4))<<16) 613#define BT(n, c) (((c)+((n)*4))<<21) 614#define BA(n, c) (((c)+((n)*4))<<16) 615#define BB(n, c) (((c)+((n)*4))<<11) 616#define BC_(n, c) (((c)+((n)*4))<<6) 617 618#define BO_COND_TRUE BO(12) 619#define BO_COND_FALSE BO( 4) 620#define BO_ALWAYS BO(20) 621 622enum { 623 CR_LT, 624 CR_GT, 625 CR_EQ, 626 CR_SO 627}; 628 629static const uint32_t tcg_to_bc[] = { 630 [TCG_COND_EQ] = BC | BI(7, CR_EQ) | BO_COND_TRUE, 631 [TCG_COND_NE] = BC | BI(7, CR_EQ) | BO_COND_FALSE, 632 [TCG_COND_LT] = BC | BI(7, CR_LT) | BO_COND_TRUE, 633 [TCG_COND_GE] = BC | BI(7, CR_LT) | BO_COND_FALSE, 634 [TCG_COND_LE] = BC | BI(7, CR_GT) | BO_COND_FALSE, 635 [TCG_COND_GT] = BC | BI(7, CR_GT) | BO_COND_TRUE, 636 [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE, 637 [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE, 638 [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE, 639 [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE, 640}; 641 642/* The low bit here is set if the RA and RB fields must be inverted. */ 643static const uint32_t tcg_to_isel[] = { 644 [TCG_COND_EQ] = ISEL | BC_(7, CR_EQ), 645 [TCG_COND_NE] = ISEL | BC_(7, CR_EQ) | 1, 646 [TCG_COND_LT] = ISEL | BC_(7, CR_LT), 647 [TCG_COND_GE] = ISEL | BC_(7, CR_LT) | 1, 648 [TCG_COND_LE] = ISEL | BC_(7, CR_GT) | 1, 649 [TCG_COND_GT] = ISEL | BC_(7, CR_GT), 650 [TCG_COND_LTU] = ISEL | BC_(7, CR_LT), 651 [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1, 652 [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1, 653 [TCG_COND_GTU] = ISEL | BC_(7, CR_GT), 654}; 655 656static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 657 intptr_t value, intptr_t addend) 658{ 659 const tcg_insn_unit *target; 660 int16_t lo; 661 int32_t hi; 662 663 value += addend; 664 target = (const tcg_insn_unit *)value; 665 666 switch (type) { 667 case R_PPC_REL14: 668 return reloc_pc14(code_ptr, target); 669 case R_PPC_REL24: 670 return reloc_pc24(code_ptr, target); 671 case R_PPC_ADDR16: 672 /* 673 * We are (slightly) abusing this relocation type. In particular, 674 * assert that the low 2 bits are zero, and do not modify them. 675 * That way we can use this with LD et al that have opcode bits 676 * in the low 2 bits of the insn. 677 */ 678 if ((value & 3) || value != (int16_t)value) { 679 return false; 680 } 681 *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc); 682 break; 683 case R_PPC_ADDR32: 684 /* 685 * We are abusing this relocation type. Again, this points to 686 * a pair of insns, lis + load. This is an absolute address 687 * relocation for PPC32 so the lis cannot be removed. 688 */ 689 lo = value; 690 hi = value - lo; 691 if (hi + lo != value) { 692 return false; 693 } 694 code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16); 695 code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo); 696 break; 697 default: 698 g_assert_not_reached(); 699 } 700 return true; 701} 702 703static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, 704 TCGReg base, tcg_target_long offset); 705 706static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 707{ 708 if (ret == arg) { 709 return true; 710 } 711 switch (type) { 712 case TCG_TYPE_I64: 713 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 714 /* fallthru */ 715 case TCG_TYPE_I32: 716 if (ret < TCG_REG_V0) { 717 if (arg < TCG_REG_V0) { 718 tcg_out32(s, OR | SAB(arg, ret, arg)); 719 break; 720 } else if (have_isa_2_07) { 721 tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD) 722 | VRT(arg) | RA(ret)); 723 break; 724 } else { 725 /* Altivec does not support vector->integer moves. */ 726 return false; 727 } 728 } else if (arg < TCG_REG_V0) { 729 if (have_isa_2_07) { 730 tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD) 731 | VRT(ret) | RA(arg)); 732 break; 733 } else { 734 /* Altivec does not support integer->vector moves. */ 735 return false; 736 } 737 } 738 /* fallthru */ 739 case TCG_TYPE_V64: 740 case TCG_TYPE_V128: 741 tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0); 742 tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg)); 743 break; 744 default: 745 g_assert_not_reached(); 746 } 747 return true; 748} 749 750static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs, 751 int sh, int mb) 752{ 753 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 754 sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1); 755 mb = MB64((mb >> 5) | ((mb << 1) & 0x3f)); 756 tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb); 757} 758 759static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs, 760 int sh, int mb, int me) 761{ 762 tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me)); 763} 764 765static inline void tcg_out_ext8s(TCGContext *s, TCGReg dst, TCGReg src) 766{ 767 tcg_out32(s, EXTSB | RA(dst) | RS(src)); 768} 769 770static inline void tcg_out_ext16s(TCGContext *s, TCGReg dst, TCGReg src) 771{ 772 tcg_out32(s, EXTSH | RA(dst) | RS(src)); 773} 774 775static inline void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src) 776{ 777 tcg_out32(s, ANDI | SAI(src, dst, 0xffff)); 778} 779 780static inline void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src) 781{ 782 tcg_out32(s, EXTSW | RA(dst) | RS(src)); 783} 784 785static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src) 786{ 787 tcg_out_rld(s, RLDICL, dst, src, 0, 32); 788} 789 790static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c) 791{ 792 tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c); 793} 794 795static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c) 796{ 797 tcg_out_rld(s, RLDICR, dst, src, c, 63 - c); 798} 799 800static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c) 801{ 802 /* Limit immediate shift count lest we create an illegal insn. */ 803 tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31)); 804} 805 806static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c) 807{ 808 tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31); 809} 810 811static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c) 812{ 813 tcg_out_rld(s, RLDICL, dst, src, 64 - c, c); 814} 815 816static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c) 817{ 818 tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2)); 819} 820 821static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags) 822{ 823 TCGReg tmp = dst == src ? TCG_REG_R0 : dst; 824 825 if (have_isa_3_10) { 826 tcg_out32(s, BRH | RA(dst) | RS(src)); 827 if (flags & TCG_BSWAP_OS) { 828 tcg_out_ext16s(s, dst, dst); 829 } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 830 tcg_out_ext16u(s, dst, dst); 831 } 832 return; 833 } 834 835 /* 836 * In the following, 837 * dep(a, b, m) -> (a & ~m) | (b & m) 838 * 839 * Begin with: src = xxxxabcd 840 */ 841 /* tmp = rol32(src, 24) & 0x000000ff = 0000000c */ 842 tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31); 843 /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00) = 000000dc */ 844 tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23); 845 846 if (flags & TCG_BSWAP_OS) { 847 tcg_out_ext16s(s, dst, tmp); 848 } else { 849 tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); 850 } 851} 852 853static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags) 854{ 855 TCGReg tmp = dst == src ? TCG_REG_R0 : dst; 856 857 if (have_isa_3_10) { 858 tcg_out32(s, BRW | RA(dst) | RS(src)); 859 if (flags & TCG_BSWAP_OS) { 860 tcg_out_ext32s(s, dst, dst); 861 } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 862 tcg_out_ext32u(s, dst, dst); 863 } 864 return; 865 } 866 867 /* 868 * Stolen from gcc's builtin_bswap32. 869 * In the following, 870 * dep(a, b, m) -> (a & ~m) | (b & m) 871 * 872 * Begin with: src = xxxxabcd 873 */ 874 /* tmp = rol32(src, 8) & 0xffffffff = 0000bcda */ 875 tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31); 876 /* tmp = dep(tmp, rol32(src, 24), 0xff000000) = 0000dcda */ 877 tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7); 878 /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00) = 0000dcba */ 879 tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23); 880 881 if (flags & TCG_BSWAP_OS) { 882 tcg_out_ext32s(s, dst, tmp); 883 } else { 884 tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); 885 } 886} 887 888static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src) 889{ 890 TCGReg t0 = dst == src ? TCG_REG_R0 : dst; 891 TCGReg t1 = dst == src ? dst : TCG_REG_R0; 892 893 if (have_isa_3_10) { 894 tcg_out32(s, BRD | RA(dst) | RS(src)); 895 return; 896 } 897 898 /* 899 * In the following, 900 * dep(a, b, m) -> (a & ~m) | (b & m) 901 * 902 * Begin with: src = abcdefgh 903 */ 904 /* t0 = rol32(src, 8) & 0xffffffff = 0000fghe */ 905 tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31); 906 /* t0 = dep(t0, rol32(src, 24), 0xff000000) = 0000hghe */ 907 tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7); 908 /* t0 = dep(t0, rol32(src, 24), 0x0000ff00) = 0000hgfe */ 909 tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23); 910 911 /* t0 = rol64(t0, 32) = hgfe0000 */ 912 tcg_out_rld(s, RLDICL, t0, t0, 32, 0); 913 /* t1 = rol64(src, 32) = efghabcd */ 914 tcg_out_rld(s, RLDICL, t1, src, 32, 0); 915 916 /* t0 = dep(t0, rol32(t1, 24), 0xffffffff) = hgfebcda */ 917 tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31); 918 /* t0 = dep(t0, rol32(t1, 24), 0xff000000) = hgfedcda */ 919 tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7); 920 /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00) = hgfedcba */ 921 tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23); 922 923 tcg_out_mov(s, TCG_TYPE_REG, dst, t0); 924} 925 926/* Emit a move into ret of arg, if it can be done in one insn. */ 927static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg) 928{ 929 if (arg == (int16_t)arg) { 930 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 931 return true; 932 } 933 if (arg == (int32_t)arg && (arg & 0xffff) == 0) { 934 tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); 935 return true; 936 } 937 return false; 938} 939 940static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, 941 tcg_target_long arg, bool in_prologue) 942{ 943 intptr_t tb_diff; 944 tcg_target_long tmp; 945 int shift; 946 947 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 948 949 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { 950 arg = (int32_t)arg; 951 } 952 953 /* Load 16-bit immediates with one insn. */ 954 if (tcg_out_movi_one(s, ret, arg)) { 955 return; 956 } 957 958 /* Load addresses within the TB with one insn. */ 959 tb_diff = tcg_tbrel_diff(s, (void *)arg); 960 if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) { 961 tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff)); 962 return; 963 } 964 965 /* Load 32-bit immediates with two insns. Note that we've already 966 eliminated bare ADDIS, so we know both insns are required. */ 967 if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) { 968 tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); 969 tcg_out32(s, ORI | SAI(ret, ret, arg)); 970 return; 971 } 972 if (arg == (uint32_t)arg && !(arg & 0x8000)) { 973 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 974 tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); 975 return; 976 } 977 978 /* Load masked 16-bit value. */ 979 if (arg > 0 && (arg & 0x8000)) { 980 tmp = arg | 0x7fff; 981 if ((tmp & (tmp + 1)) == 0) { 982 int mb = clz64(tmp + 1) + 1; 983 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 984 tcg_out_rld(s, RLDICL, ret, ret, 0, mb); 985 return; 986 } 987 } 988 989 /* Load common masks with 2 insns. */ 990 shift = ctz64(arg); 991 tmp = arg >> shift; 992 if (tmp == (int16_t)tmp) { 993 tcg_out32(s, ADDI | TAI(ret, 0, tmp)); 994 tcg_out_shli64(s, ret, ret, shift); 995 return; 996 } 997 shift = clz64(arg); 998 if (tcg_out_movi_one(s, ret, arg << shift)) { 999 tcg_out_shri64(s, ret, ret, shift); 1000 return; 1001 } 1002 1003 /* Load addresses within 2GB of TB with 2 (or rarely 3) insns. */ 1004 if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) { 1005 tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff); 1006 return; 1007 } 1008 1009 /* Use the constant pool, if possible. */ 1010 if (!in_prologue && USE_REG_TB) { 1011 new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr, 1012 tcg_tbrel_diff(s, NULL)); 1013 tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0)); 1014 return; 1015 } 1016 1017 tmp = arg >> 31 >> 1; 1018 tcg_out_movi(s, TCG_TYPE_I32, ret, tmp); 1019 if (tmp) { 1020 tcg_out_shli64(s, ret, ret, 32); 1021 } 1022 if (arg & 0xffff0000) { 1023 tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); 1024 } 1025 if (arg & 0xffff) { 1026 tcg_out32(s, ORI | SAI(ret, ret, arg)); 1027 } 1028} 1029 1030static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 1031 TCGReg ret, int64_t val) 1032{ 1033 uint32_t load_insn; 1034 int rel, low; 1035 intptr_t add; 1036 1037 switch (vece) { 1038 case MO_8: 1039 low = (int8_t)val; 1040 if (low >= -16 && low < 16) { 1041 tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16)); 1042 return; 1043 } 1044 if (have_isa_3_00) { 1045 tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11)); 1046 return; 1047 } 1048 break; 1049 1050 case MO_16: 1051 low = (int16_t)val; 1052 if (low >= -16 && low < 16) { 1053 tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16)); 1054 return; 1055 } 1056 break; 1057 1058 case MO_32: 1059 low = (int32_t)val; 1060 if (low >= -16 && low < 16) { 1061 tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16)); 1062 return; 1063 } 1064 break; 1065 } 1066 1067 /* 1068 * Otherwise we must load the value from the constant pool. 1069 */ 1070 if (USE_REG_TB) { 1071 rel = R_PPC_ADDR16; 1072 add = tcg_tbrel_diff(s, NULL); 1073 } else { 1074 rel = R_PPC_ADDR32; 1075 add = 0; 1076 } 1077 1078 if (have_vsx) { 1079 load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX; 1080 load_insn |= VRT(ret) | RB(TCG_REG_TMP1); 1081 if (TCG_TARGET_REG_BITS == 64) { 1082 new_pool_label(s, val, rel, s->code_ptr, add); 1083 } else { 1084 new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val); 1085 } 1086 } else { 1087 load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); 1088 if (TCG_TARGET_REG_BITS == 64) { 1089 new_pool_l2(s, rel, s->code_ptr, add, val, val); 1090 } else { 1091 new_pool_l4(s, rel, s->code_ptr, add, 1092 val >> 32, val, val >> 32, val); 1093 } 1094 } 1095 1096 if (USE_REG_TB) { 1097 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0)); 1098 load_insn |= RA(TCG_REG_TB); 1099 } else { 1100 tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0)); 1101 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0)); 1102 } 1103 tcg_out32(s, load_insn); 1104} 1105 1106static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, 1107 tcg_target_long arg) 1108{ 1109 switch (type) { 1110 case TCG_TYPE_I32: 1111 case TCG_TYPE_I64: 1112 tcg_debug_assert(ret < TCG_REG_V0); 1113 tcg_out_movi_int(s, type, ret, arg, false); 1114 break; 1115 1116 default: 1117 g_assert_not_reached(); 1118 } 1119} 1120 1121static bool mask_operand(uint32_t c, int *mb, int *me) 1122{ 1123 uint32_t lsb, test; 1124 1125 /* Accept a bit pattern like: 1126 0....01....1 1127 1....10....0 1128 0..01..10..0 1129 Keep track of the transitions. */ 1130 if (c == 0 || c == -1) { 1131 return false; 1132 } 1133 test = c; 1134 lsb = test & -test; 1135 test += lsb; 1136 if (test & (test - 1)) { 1137 return false; 1138 } 1139 1140 *me = clz32(lsb); 1141 *mb = test ? clz32(test & -test) + 1 : 0; 1142 return true; 1143} 1144 1145static bool mask64_operand(uint64_t c, int *mb, int *me) 1146{ 1147 uint64_t lsb; 1148 1149 if (c == 0) { 1150 return false; 1151 } 1152 1153 lsb = c & -c; 1154 /* Accept 1..10..0. */ 1155 if (c == -lsb) { 1156 *mb = 0; 1157 *me = clz64(lsb); 1158 return true; 1159 } 1160 /* Accept 0..01..1. */ 1161 if (lsb == 1 && (c & (c + 1)) == 0) { 1162 *mb = clz64(c + 1) + 1; 1163 *me = 63; 1164 return true; 1165 } 1166 return false; 1167} 1168 1169static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1170{ 1171 int mb, me; 1172 1173 if (mask_operand(c, &mb, &me)) { 1174 tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); 1175 } else if ((c & 0xffff) == c) { 1176 tcg_out32(s, ANDI | SAI(src, dst, c)); 1177 return; 1178 } else if ((c & 0xffff0000) == c) { 1179 tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); 1180 return; 1181 } else { 1182 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c); 1183 tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); 1184 } 1185} 1186 1187static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c) 1188{ 1189 int mb, me; 1190 1191 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1192 if (mask64_operand(c, &mb, &me)) { 1193 if (mb == 0) { 1194 tcg_out_rld(s, RLDICR, dst, src, 0, me); 1195 } else { 1196 tcg_out_rld(s, RLDICL, dst, src, 0, mb); 1197 } 1198 } else if ((c & 0xffff) == c) { 1199 tcg_out32(s, ANDI | SAI(src, dst, c)); 1200 return; 1201 } else if ((c & 0xffff0000) == c) { 1202 tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); 1203 return; 1204 } else { 1205 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c); 1206 tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); 1207 } 1208} 1209 1210static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c, 1211 int op_lo, int op_hi) 1212{ 1213 if (c >> 16) { 1214 tcg_out32(s, op_hi | SAI(src, dst, c >> 16)); 1215 src = dst; 1216 } 1217 if (c & 0xffff) { 1218 tcg_out32(s, op_lo | SAI(src, dst, c)); 1219 src = dst; 1220 } 1221} 1222 1223static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1224{ 1225 tcg_out_zori32(s, dst, src, c, ORI, ORIS); 1226} 1227 1228static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1229{ 1230 tcg_out_zori32(s, dst, src, c, XORI, XORIS); 1231} 1232 1233static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target) 1234{ 1235 ptrdiff_t disp = tcg_pcrel_diff(s, target); 1236 if (in_range_b(disp)) { 1237 tcg_out32(s, B | (disp & 0x3fffffc) | mask); 1238 } else { 1239 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target); 1240 tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR); 1241 tcg_out32(s, BCCTR | BO_ALWAYS | mask); 1242 } 1243} 1244 1245static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, 1246 TCGReg base, tcg_target_long offset) 1247{ 1248 tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; 1249 bool is_int_store = false; 1250 TCGReg rs = TCG_REG_TMP1; 1251 1252 switch (opi) { 1253 case LD: case LWA: 1254 align = 3; 1255 /* FALLTHRU */ 1256 default: 1257 if (rt > TCG_REG_R0 && rt < TCG_REG_V0) { 1258 rs = rt; 1259 break; 1260 } 1261 break; 1262 case LXSD: 1263 case STXSD: 1264 align = 3; 1265 break; 1266 case LXV: 1267 case STXV: 1268 align = 15; 1269 break; 1270 case STD: 1271 align = 3; 1272 /* FALLTHRU */ 1273 case STB: case STH: case STW: 1274 is_int_store = true; 1275 break; 1276 } 1277 1278 /* For unaligned, or very large offsets, use the indexed form. */ 1279 if (offset & align || offset != (int32_t)offset || opi == 0) { 1280 if (rs == base) { 1281 rs = TCG_REG_R0; 1282 } 1283 tcg_debug_assert(!is_int_store || rs != rt); 1284 tcg_out_movi(s, TCG_TYPE_PTR, rs, orig); 1285 tcg_out32(s, opx | TAB(rt & 31, base, rs)); 1286 return; 1287 } 1288 1289 l0 = (int16_t)offset; 1290 offset = (offset - l0) >> 16; 1291 l1 = (int16_t)offset; 1292 1293 if (l1 < 0 && orig >= 0) { 1294 extra = 0x4000; 1295 l1 = (int16_t)(offset - 0x4000); 1296 } 1297 if (l1) { 1298 tcg_out32(s, ADDIS | TAI(rs, base, l1)); 1299 base = rs; 1300 } 1301 if (extra) { 1302 tcg_out32(s, ADDIS | TAI(rs, base, extra)); 1303 base = rs; 1304 } 1305 if (opi != ADDI || base != rt || l0 != 0) { 1306 tcg_out32(s, opi | TAI(rt & 31, base, l0)); 1307 } 1308} 1309 1310static void tcg_out_vsldoi(TCGContext *s, TCGReg ret, 1311 TCGReg va, TCGReg vb, int shb) 1312{ 1313 tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6)); 1314} 1315 1316static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1317 TCGReg base, intptr_t offset) 1318{ 1319 int shift; 1320 1321 switch (type) { 1322 case TCG_TYPE_I32: 1323 if (ret < TCG_REG_V0) { 1324 tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset); 1325 break; 1326 } 1327 if (have_isa_2_07 && have_vsx) { 1328 tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset); 1329 break; 1330 } 1331 tcg_debug_assert((offset & 3) == 0); 1332 tcg_out_mem_long(s, 0, LVEWX, ret, base, offset); 1333 shift = (offset - 4) & 0xc; 1334 if (shift) { 1335 tcg_out_vsldoi(s, ret, ret, ret, shift); 1336 } 1337 break; 1338 case TCG_TYPE_I64: 1339 if (ret < TCG_REG_V0) { 1340 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1341 tcg_out_mem_long(s, LD, LDX, ret, base, offset); 1342 break; 1343 } 1344 /* fallthru */ 1345 case TCG_TYPE_V64: 1346 tcg_debug_assert(ret >= TCG_REG_V0); 1347 if (have_vsx) { 1348 tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX, 1349 ret, base, offset); 1350 break; 1351 } 1352 tcg_debug_assert((offset & 7) == 0); 1353 tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16); 1354 if (offset & 8) { 1355 tcg_out_vsldoi(s, ret, ret, ret, 8); 1356 } 1357 break; 1358 case TCG_TYPE_V128: 1359 tcg_debug_assert(ret >= TCG_REG_V0); 1360 tcg_debug_assert((offset & 15) == 0); 1361 tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0, 1362 LVX, ret, base, offset); 1363 break; 1364 default: 1365 g_assert_not_reached(); 1366 } 1367} 1368 1369static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, 1370 TCGReg base, intptr_t offset) 1371{ 1372 int shift; 1373 1374 switch (type) { 1375 case TCG_TYPE_I32: 1376 if (arg < TCG_REG_V0) { 1377 tcg_out_mem_long(s, STW, STWX, arg, base, offset); 1378 break; 1379 } 1380 if (have_isa_2_07 && have_vsx) { 1381 tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset); 1382 break; 1383 } 1384 assert((offset & 3) == 0); 1385 tcg_debug_assert((offset & 3) == 0); 1386 shift = (offset - 4) & 0xc; 1387 if (shift) { 1388 tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift); 1389 arg = TCG_VEC_TMP1; 1390 } 1391 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); 1392 break; 1393 case TCG_TYPE_I64: 1394 if (arg < TCG_REG_V0) { 1395 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1396 tcg_out_mem_long(s, STD, STDX, arg, base, offset); 1397 break; 1398 } 1399 /* fallthru */ 1400 case TCG_TYPE_V64: 1401 tcg_debug_assert(arg >= TCG_REG_V0); 1402 if (have_vsx) { 1403 tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0, 1404 STXSDX, arg, base, offset); 1405 break; 1406 } 1407 tcg_debug_assert((offset & 7) == 0); 1408 if (offset & 8) { 1409 tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8); 1410 arg = TCG_VEC_TMP1; 1411 } 1412 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); 1413 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4); 1414 break; 1415 case TCG_TYPE_V128: 1416 tcg_debug_assert(arg >= TCG_REG_V0); 1417 tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0, 1418 STVX, arg, base, offset); 1419 break; 1420 default: 1421 g_assert_not_reached(); 1422 } 1423} 1424 1425static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1426 TCGReg base, intptr_t ofs) 1427{ 1428 return false; 1429} 1430 1431static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, 1432 int const_arg2, int cr, TCGType type) 1433{ 1434 int imm; 1435 uint32_t op; 1436 1437 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1438 1439 /* Simplify the comparisons below wrt CMPI. */ 1440 if (type == TCG_TYPE_I32) { 1441 arg2 = (int32_t)arg2; 1442 } 1443 1444 switch (cond) { 1445 case TCG_COND_EQ: 1446 case TCG_COND_NE: 1447 if (const_arg2) { 1448 if ((int16_t) arg2 == arg2) { 1449 op = CMPI; 1450 imm = 1; 1451 break; 1452 } else if ((uint16_t) arg2 == arg2) { 1453 op = CMPLI; 1454 imm = 1; 1455 break; 1456 } 1457 } 1458 op = CMPL; 1459 imm = 0; 1460 break; 1461 1462 case TCG_COND_LT: 1463 case TCG_COND_GE: 1464 case TCG_COND_LE: 1465 case TCG_COND_GT: 1466 if (const_arg2) { 1467 if ((int16_t) arg2 == arg2) { 1468 op = CMPI; 1469 imm = 1; 1470 break; 1471 } 1472 } 1473 op = CMP; 1474 imm = 0; 1475 break; 1476 1477 case TCG_COND_LTU: 1478 case TCG_COND_GEU: 1479 case TCG_COND_LEU: 1480 case TCG_COND_GTU: 1481 if (const_arg2) { 1482 if ((uint16_t) arg2 == arg2) { 1483 op = CMPLI; 1484 imm = 1; 1485 break; 1486 } 1487 } 1488 op = CMPL; 1489 imm = 0; 1490 break; 1491 1492 default: 1493 tcg_abort(); 1494 } 1495 op |= BF(cr) | ((type == TCG_TYPE_I64) << 21); 1496 1497 if (imm) { 1498 tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff)); 1499 } else { 1500 if (const_arg2) { 1501 tcg_out_movi(s, type, TCG_REG_R0, arg2); 1502 arg2 = TCG_REG_R0; 1503 } 1504 tcg_out32(s, op | RA(arg1) | RB(arg2)); 1505 } 1506} 1507 1508static void tcg_out_setcond_eq0(TCGContext *s, TCGType type, 1509 TCGReg dst, TCGReg src) 1510{ 1511 if (type == TCG_TYPE_I32) { 1512 tcg_out32(s, CNTLZW | RS(src) | RA(dst)); 1513 tcg_out_shri32(s, dst, dst, 5); 1514 } else { 1515 tcg_out32(s, CNTLZD | RS(src) | RA(dst)); 1516 tcg_out_shri64(s, dst, dst, 6); 1517 } 1518} 1519 1520static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src) 1521{ 1522 /* X != 0 implies X + -1 generates a carry. Extra addition 1523 trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. */ 1524 if (dst != src) { 1525 tcg_out32(s, ADDIC | TAI(dst, src, -1)); 1526 tcg_out32(s, SUBFE | TAB(dst, dst, src)); 1527 } else { 1528 tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); 1529 tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src)); 1530 } 1531} 1532 1533static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2, 1534 bool const_arg2) 1535{ 1536 if (const_arg2) { 1537 if ((uint32_t)arg2 == arg2) { 1538 tcg_out_xori32(s, TCG_REG_R0, arg1, arg2); 1539 } else { 1540 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2); 1541 tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0)); 1542 } 1543 } else { 1544 tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2)); 1545 } 1546 return TCG_REG_R0; 1547} 1548 1549static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, 1550 TCGArg arg0, TCGArg arg1, TCGArg arg2, 1551 int const_arg2) 1552{ 1553 int crop, sh; 1554 1555 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1556 1557 /* Ignore high bits of a potential constant arg2. */ 1558 if (type == TCG_TYPE_I32) { 1559 arg2 = (uint32_t)arg2; 1560 } 1561 1562 /* Handle common and trivial cases before handling anything else. */ 1563 if (arg2 == 0) { 1564 switch (cond) { 1565 case TCG_COND_EQ: 1566 tcg_out_setcond_eq0(s, type, arg0, arg1); 1567 return; 1568 case TCG_COND_NE: 1569 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { 1570 tcg_out_ext32u(s, TCG_REG_R0, arg1); 1571 arg1 = TCG_REG_R0; 1572 } 1573 tcg_out_setcond_ne0(s, arg0, arg1); 1574 return; 1575 case TCG_COND_GE: 1576 tcg_out32(s, NOR | SAB(arg1, arg0, arg1)); 1577 arg1 = arg0; 1578 /* FALLTHRU */ 1579 case TCG_COND_LT: 1580 /* Extract the sign bit. */ 1581 if (type == TCG_TYPE_I32) { 1582 tcg_out_shri32(s, arg0, arg1, 31); 1583 } else { 1584 tcg_out_shri64(s, arg0, arg1, 63); 1585 } 1586 return; 1587 default: 1588 break; 1589 } 1590 } 1591 1592 /* If we have ISEL, we can implement everything with 3 or 4 insns. 1593 All other cases below are also at least 3 insns, so speed up the 1594 code generator by not considering them and always using ISEL. */ 1595 if (have_isel) { 1596 int isel, tab; 1597 1598 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); 1599 1600 isel = tcg_to_isel[cond]; 1601 1602 tcg_out_movi(s, type, arg0, 1); 1603 if (isel & 1) { 1604 /* arg0 = (bc ? 0 : 1) */ 1605 tab = TAB(arg0, 0, arg0); 1606 isel &= ~1; 1607 } else { 1608 /* arg0 = (bc ? 1 : 0) */ 1609 tcg_out_movi(s, type, TCG_REG_R0, 0); 1610 tab = TAB(arg0, arg0, TCG_REG_R0); 1611 } 1612 tcg_out32(s, isel | tab); 1613 return; 1614 } 1615 1616 switch (cond) { 1617 case TCG_COND_EQ: 1618 arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); 1619 tcg_out_setcond_eq0(s, type, arg0, arg1); 1620 return; 1621 1622 case TCG_COND_NE: 1623 arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); 1624 /* Discard the high bits only once, rather than both inputs. */ 1625 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { 1626 tcg_out_ext32u(s, TCG_REG_R0, arg1); 1627 arg1 = TCG_REG_R0; 1628 } 1629 tcg_out_setcond_ne0(s, arg0, arg1); 1630 return; 1631 1632 case TCG_COND_GT: 1633 case TCG_COND_GTU: 1634 sh = 30; 1635 crop = 0; 1636 goto crtest; 1637 1638 case TCG_COND_LT: 1639 case TCG_COND_LTU: 1640 sh = 29; 1641 crop = 0; 1642 goto crtest; 1643 1644 case TCG_COND_GE: 1645 case TCG_COND_GEU: 1646 sh = 31; 1647 crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT); 1648 goto crtest; 1649 1650 case TCG_COND_LE: 1651 case TCG_COND_LEU: 1652 sh = 31; 1653 crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT); 1654 crtest: 1655 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); 1656 if (crop) { 1657 tcg_out32(s, crop); 1658 } 1659 tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); 1660 tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31); 1661 break; 1662 1663 default: 1664 tcg_abort(); 1665 } 1666} 1667 1668static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l) 1669{ 1670 if (l->has_value) { 1671 bc |= reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr); 1672 } else { 1673 tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0); 1674 } 1675 tcg_out32(s, bc); 1676} 1677 1678static void tcg_out_brcond(TCGContext *s, TCGCond cond, 1679 TCGArg arg1, TCGArg arg2, int const_arg2, 1680 TCGLabel *l, TCGType type) 1681{ 1682 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); 1683 tcg_out_bc(s, tcg_to_bc[cond], l); 1684} 1685 1686static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, 1687 TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1, 1688 TCGArg v2, bool const_c2) 1689{ 1690 /* If for some reason both inputs are zero, don't produce bad code. */ 1691 if (v1 == 0 && v2 == 0) { 1692 tcg_out_movi(s, type, dest, 0); 1693 return; 1694 } 1695 1696 tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type); 1697 1698 if (have_isel) { 1699 int isel = tcg_to_isel[cond]; 1700 1701 /* Swap the V operands if the operation indicates inversion. */ 1702 if (isel & 1) { 1703 int t = v1; 1704 v1 = v2; 1705 v2 = t; 1706 isel &= ~1; 1707 } 1708 /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand. */ 1709 if (v2 == 0) { 1710 tcg_out_movi(s, type, TCG_REG_R0, 0); 1711 } 1712 tcg_out32(s, isel | TAB(dest, v1, v2)); 1713 } else { 1714 if (dest == v2) { 1715 cond = tcg_invert_cond(cond); 1716 v2 = v1; 1717 } else if (dest != v1) { 1718 if (v1 == 0) { 1719 tcg_out_movi(s, type, dest, 0); 1720 } else { 1721 tcg_out_mov(s, type, dest, v1); 1722 } 1723 } 1724 /* Branch forward over one insn */ 1725 tcg_out32(s, tcg_to_bc[cond] | 8); 1726 if (v2 == 0) { 1727 tcg_out_movi(s, type, dest, 0); 1728 } else { 1729 tcg_out_mov(s, type, dest, v2); 1730 } 1731 } 1732} 1733 1734static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc, 1735 TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2) 1736{ 1737 if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) { 1738 tcg_out32(s, opc | RA(a0) | RS(a1)); 1739 } else { 1740 tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type); 1741 /* Note that the only other valid constant for a2 is 0. */ 1742 if (have_isel) { 1743 tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1)); 1744 tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0)); 1745 } else if (!const_a2 && a0 == a2) { 1746 tcg_out32(s, tcg_to_bc[TCG_COND_EQ] | 8); 1747 tcg_out32(s, opc | RA(a0) | RS(a1)); 1748 } else { 1749 tcg_out32(s, opc | RA(a0) | RS(a1)); 1750 tcg_out32(s, tcg_to_bc[TCG_COND_NE] | 8); 1751 if (const_a2) { 1752 tcg_out_movi(s, type, a0, 0); 1753 } else { 1754 tcg_out_mov(s, type, a0, a2); 1755 } 1756 } 1757 } 1758} 1759 1760static void tcg_out_cmp2(TCGContext *s, const TCGArg *args, 1761 const int *const_args) 1762{ 1763 static const struct { uint8_t bit1, bit2; } bits[] = { 1764 [TCG_COND_LT ] = { CR_LT, CR_LT }, 1765 [TCG_COND_LE ] = { CR_LT, CR_GT }, 1766 [TCG_COND_GT ] = { CR_GT, CR_GT }, 1767 [TCG_COND_GE ] = { CR_GT, CR_LT }, 1768 [TCG_COND_LTU] = { CR_LT, CR_LT }, 1769 [TCG_COND_LEU] = { CR_LT, CR_GT }, 1770 [TCG_COND_GTU] = { CR_GT, CR_GT }, 1771 [TCG_COND_GEU] = { CR_GT, CR_LT }, 1772 }; 1773 1774 TCGCond cond = args[4], cond2; 1775 TCGArg al, ah, bl, bh; 1776 int blconst, bhconst; 1777 int op, bit1, bit2; 1778 1779 al = args[0]; 1780 ah = args[1]; 1781 bl = args[2]; 1782 bh = args[3]; 1783 blconst = const_args[2]; 1784 bhconst = const_args[3]; 1785 1786 switch (cond) { 1787 case TCG_COND_EQ: 1788 op = CRAND; 1789 goto do_equality; 1790 case TCG_COND_NE: 1791 op = CRNAND; 1792 do_equality: 1793 tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32); 1794 tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32); 1795 tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); 1796 break; 1797 1798 case TCG_COND_LT: 1799 case TCG_COND_LE: 1800 case TCG_COND_GT: 1801 case TCG_COND_GE: 1802 case TCG_COND_LTU: 1803 case TCG_COND_LEU: 1804 case TCG_COND_GTU: 1805 case TCG_COND_GEU: 1806 bit1 = bits[cond].bit1; 1807 bit2 = bits[cond].bit2; 1808 op = (bit1 != bit2 ? CRANDC : CRAND); 1809 cond2 = tcg_unsigned_cond(cond); 1810 1811 tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32); 1812 tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32); 1813 tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2)); 1814 tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ)); 1815 break; 1816 1817 default: 1818 tcg_abort(); 1819 } 1820} 1821 1822static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, 1823 const int *const_args) 1824{ 1825 tcg_out_cmp2(s, args + 1, const_args + 1); 1826 tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); 1827 tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31); 1828} 1829 1830static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args, 1831 const int *const_args) 1832{ 1833 tcg_out_cmp2(s, args, const_args); 1834 tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5])); 1835} 1836 1837static void tcg_out_mb(TCGContext *s, TCGArg a0) 1838{ 1839 uint32_t insn; 1840 1841 if (a0 & TCG_MO_ST_LD) { 1842 insn = HWSYNC; 1843 } else { 1844 insn = LWSYNC; 1845 } 1846 1847 tcg_out32(s, insn); 1848} 1849 1850void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, 1851 uintptr_t jmp_rw, uintptr_t addr) 1852{ 1853 if (TCG_TARGET_REG_BITS == 64) { 1854 tcg_insn_unit i1, i2; 1855 intptr_t tb_diff = addr - tc_ptr; 1856 intptr_t br_diff = addr - (jmp_rx + 4); 1857 uint64_t pair; 1858 1859 /* This does not exercise the range of the branch, but we do 1860 still need to be able to load the new value of TCG_REG_TB. 1861 But this does still happen quite often. */ 1862 if (tb_diff == (int16_t)tb_diff) { 1863 i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff); 1864 i2 = B | (br_diff & 0x3fffffc); 1865 } else { 1866 intptr_t lo = (int16_t)tb_diff; 1867 intptr_t hi = (int32_t)(tb_diff - lo); 1868 assert(tb_diff == hi + lo); 1869 i1 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16); 1870 i2 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo); 1871 } 1872#if HOST_BIG_ENDIAN 1873 pair = (uint64_t)i1 << 32 | i2; 1874#else 1875 pair = (uint64_t)i2 << 32 | i1; 1876#endif 1877 1878 /* As per the enclosing if, this is ppc64. Avoid the _Static_assert 1879 within qatomic_set that would fail to build a ppc32 host. */ 1880 qatomic_set__nocheck((uint64_t *)jmp_rw, pair); 1881 flush_idcache_range(jmp_rx, jmp_rw, 8); 1882 } else { 1883 intptr_t diff = addr - jmp_rx; 1884 tcg_debug_assert(in_range_b(diff)); 1885 qatomic_set((uint32_t *)jmp_rw, B | (diff & 0x3fffffc)); 1886 flush_idcache_range(jmp_rx, jmp_rw, 4); 1887 } 1888} 1889 1890static void tcg_out_call_int(TCGContext *s, int lk, 1891 const tcg_insn_unit *target) 1892{ 1893#ifdef _CALL_AIX 1894 /* Look through the descriptor. If the branch is in range, and we 1895 don't have to spend too much effort on building the toc. */ 1896 const void *tgt = ((const void * const *)target)[0]; 1897 uintptr_t toc = ((const uintptr_t *)target)[1]; 1898 intptr_t diff = tcg_pcrel_diff(s, tgt); 1899 1900 if (in_range_b(diff) && toc == (uint32_t)toc) { 1901 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc); 1902 tcg_out_b(s, lk, tgt); 1903 } else { 1904 /* Fold the low bits of the constant into the addresses below. */ 1905 intptr_t arg = (intptr_t)target; 1906 int ofs = (int16_t)arg; 1907 1908 if (ofs + 8 < 0x8000) { 1909 arg -= ofs; 1910 } else { 1911 ofs = 0; 1912 } 1913 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg); 1914 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs); 1915 tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR); 1916 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP); 1917 tcg_out32(s, BCCTR | BO_ALWAYS | lk); 1918 } 1919#elif defined(_CALL_ELF) && _CALL_ELF == 2 1920 intptr_t diff; 1921 1922 /* In the ELFv2 ABI, we have to set up r12 to contain the destination 1923 address, which the callee uses to compute its TOC address. */ 1924 /* FIXME: when the branch is in range, we could avoid r12 load if we 1925 knew that the destination uses the same TOC, and what its local 1926 entry point offset is. */ 1927 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target); 1928 1929 diff = tcg_pcrel_diff(s, target); 1930 if (in_range_b(diff)) { 1931 tcg_out_b(s, lk, target); 1932 } else { 1933 tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR); 1934 tcg_out32(s, BCCTR | BO_ALWAYS | lk); 1935 } 1936#else 1937 tcg_out_b(s, lk, target); 1938#endif 1939} 1940 1941static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) 1942{ 1943 tcg_out_call_int(s, LK, target); 1944} 1945 1946static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = { 1947 [MO_UB] = LBZX, 1948 [MO_UW] = LHZX, 1949 [MO_UL] = LWZX, 1950 [MO_UQ] = LDX, 1951 [MO_SW] = LHAX, 1952 [MO_SL] = LWAX, 1953 [MO_BSWAP | MO_UB] = LBZX, 1954 [MO_BSWAP | MO_UW] = LHBRX, 1955 [MO_BSWAP | MO_UL] = LWBRX, 1956 [MO_BSWAP | MO_UQ] = LDBRX, 1957}; 1958 1959static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = { 1960 [MO_UB] = STBX, 1961 [MO_UW] = STHX, 1962 [MO_UL] = STWX, 1963 [MO_UQ] = STDX, 1964 [MO_BSWAP | MO_UB] = STBX, 1965 [MO_BSWAP | MO_UW] = STHBRX, 1966 [MO_BSWAP | MO_UL] = STWBRX, 1967 [MO_BSWAP | MO_UQ] = STDBRX, 1968}; 1969 1970static const uint32_t qemu_exts_opc[4] = { 1971 EXTSB, EXTSH, EXTSW, 0 1972}; 1973 1974#if defined (CONFIG_SOFTMMU) 1975/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, 1976 * int mmu_idx, uintptr_t ra) 1977 */ 1978static void * const qemu_ld_helpers[(MO_SIZE | MO_BSWAP) + 1] = { 1979 [MO_UB] = helper_ret_ldub_mmu, 1980 [MO_LEUW] = helper_le_lduw_mmu, 1981 [MO_LEUL] = helper_le_ldul_mmu, 1982 [MO_LEUQ] = helper_le_ldq_mmu, 1983 [MO_BEUW] = helper_be_lduw_mmu, 1984 [MO_BEUL] = helper_be_ldul_mmu, 1985 [MO_BEUQ] = helper_be_ldq_mmu, 1986}; 1987 1988/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, 1989 * uintxx_t val, int mmu_idx, uintptr_t ra) 1990 */ 1991static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = { 1992 [MO_UB] = helper_ret_stb_mmu, 1993 [MO_LEUW] = helper_le_stw_mmu, 1994 [MO_LEUL] = helper_le_stl_mmu, 1995 [MO_LEUQ] = helper_le_stq_mmu, 1996 [MO_BEUW] = helper_be_stw_mmu, 1997 [MO_BEUL] = helper_be_stl_mmu, 1998 [MO_BEUQ] = helper_be_stq_mmu, 1999}; 2000 2001/* We expect to use a 16-bit negative offset from ENV. */ 2002QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); 2003QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768); 2004 2005/* Perform the TLB load and compare. Places the result of the comparison 2006 in CR7, loads the addend of the TLB into R3, and returns the register 2007 containing the guest address (zero-extended into R4). Clobbers R0 and R2. */ 2008 2009static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc, 2010 TCGReg addrlo, TCGReg addrhi, 2011 int mem_index, bool is_read) 2012{ 2013 int cmp_off 2014 = (is_read 2015 ? offsetof(CPUTLBEntry, addr_read) 2016 : offsetof(CPUTLBEntry, addr_write)); 2017 int fast_off = TLB_MASK_TABLE_OFS(mem_index); 2018 int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); 2019 int table_off = fast_off + offsetof(CPUTLBDescFast, table); 2020 unsigned s_bits = opc & MO_SIZE; 2021 unsigned a_bits = get_alignment_bits(opc); 2022 2023 /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ 2024 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off); 2025 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off); 2026 2027 /* Extract the page index, shifted into place for tlb index. */ 2028 if (TCG_TARGET_REG_BITS == 32) { 2029 tcg_out_shri32(s, TCG_REG_TMP1, addrlo, 2030 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); 2031 } else { 2032 tcg_out_shri64(s, TCG_REG_TMP1, addrlo, 2033 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); 2034 } 2035 tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1)); 2036 2037 /* Load the TLB comparator. */ 2038 if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) { 2039 uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32 2040 ? LWZUX : LDUX); 2041 tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4)); 2042 } else { 2043 tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4)); 2044 if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { 2045 tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4); 2046 tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off); 2047 } else { 2048 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off); 2049 } 2050 } 2051 2052 /* Load the TLB addend for use on the fast path. Do this asap 2053 to minimize any load use delay. */ 2054 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3, 2055 offsetof(CPUTLBEntry, addend)); 2056 2057 /* Clear the non-page, non-alignment bits from the address */ 2058 if (TCG_TARGET_REG_BITS == 32) { 2059 /* We don't support unaligned accesses on 32-bits. 2060 * Preserve the bottom bits and thus trigger a comparison 2061 * failure on unaligned accesses. 2062 */ 2063 if (a_bits < s_bits) { 2064 a_bits = s_bits; 2065 } 2066 tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0, 2067 (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS); 2068 } else { 2069 TCGReg t = addrlo; 2070 2071 /* If the access is unaligned, we need to make sure we fail if we 2072 * cross a page boundary. The trick is to add the access size-1 2073 * to the address before masking the low bits. That will make the 2074 * address overflow to the next page if we cross a page boundary, 2075 * which will then force a mismatch of the TLB compare. 2076 */ 2077 if (a_bits < s_bits) { 2078 unsigned a_mask = (1 << a_bits) - 1; 2079 unsigned s_mask = (1 << s_bits) - 1; 2080 tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask)); 2081 t = TCG_REG_R0; 2082 } 2083 2084 /* Mask the address for the requested alignment. */ 2085 if (TARGET_LONG_BITS == 32) { 2086 tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0, 2087 (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS); 2088 /* Zero-extend the address for use in the final address. */ 2089 tcg_out_ext32u(s, TCG_REG_R4, addrlo); 2090 addrlo = TCG_REG_R4; 2091 } else if (a_bits == 0) { 2092 tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS); 2093 } else { 2094 tcg_out_rld(s, RLDICL, TCG_REG_R0, t, 2095 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits); 2096 tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0); 2097 } 2098 } 2099 2100 if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { 2101 tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1, 2102 0, 7, TCG_TYPE_I32); 2103 tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32); 2104 tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); 2105 } else { 2106 tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1, 2107 0, 7, TCG_TYPE_TL); 2108 } 2109 2110 return addrlo; 2111} 2112 2113/* Record the context of a call to the out of line helper code for the slow 2114 path for a load or store, so that we can later generate the correct 2115 helper code. */ 2116static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi, 2117 TCGReg datalo_reg, TCGReg datahi_reg, 2118 TCGReg addrlo_reg, TCGReg addrhi_reg, 2119 tcg_insn_unit *raddr, tcg_insn_unit *lptr) 2120{ 2121 TCGLabelQemuLdst *label = new_ldst_label(s); 2122 2123 label->is_ld = is_ld; 2124 label->oi = oi; 2125 label->datalo_reg = datalo_reg; 2126 label->datahi_reg = datahi_reg; 2127 label->addrlo_reg = addrlo_reg; 2128 label->addrhi_reg = addrhi_reg; 2129 label->raddr = tcg_splitwx_to_rx(raddr); 2130 label->label_ptr[0] = lptr; 2131} 2132 2133static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 2134{ 2135 MemOpIdx oi = lb->oi; 2136 MemOp opc = get_memop(oi); 2137 TCGReg hi, lo, arg = TCG_REG_R3; 2138 2139 if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 2140 return false; 2141 } 2142 2143 tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); 2144 2145 lo = lb->addrlo_reg; 2146 hi = lb->addrhi_reg; 2147 if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { 2148#ifdef TCG_TARGET_CALL_ALIGN_ARGS 2149 arg |= 1; 2150#endif 2151 tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); 2152 tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); 2153 } else { 2154 /* If the address needed to be zero-extended, we'll have already 2155 placed it in R4. The only remaining case is 64-bit guest. */ 2156 tcg_out_mov(s, TCG_TYPE_TL, arg++, lo); 2157 } 2158 2159 tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); 2160 tcg_out32(s, MFSPR | RT(arg) | LR); 2161 2162 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); 2163 2164 lo = lb->datalo_reg; 2165 hi = lb->datahi_reg; 2166 if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { 2167 tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4); 2168 tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3); 2169 } else if (opc & MO_SIGN) { 2170 uint32_t insn = qemu_exts_opc[opc & MO_SIZE]; 2171 tcg_out32(s, insn | RA(lo) | RS(TCG_REG_R3)); 2172 } else { 2173 tcg_out_mov(s, TCG_TYPE_REG, lo, TCG_REG_R3); 2174 } 2175 2176 tcg_out_b(s, 0, lb->raddr); 2177 return true; 2178} 2179 2180static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 2181{ 2182 MemOpIdx oi = lb->oi; 2183 MemOp opc = get_memop(oi); 2184 MemOp s_bits = opc & MO_SIZE; 2185 TCGReg hi, lo, arg = TCG_REG_R3; 2186 2187 if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 2188 return false; 2189 } 2190 2191 tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); 2192 2193 lo = lb->addrlo_reg; 2194 hi = lb->addrhi_reg; 2195 if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { 2196#ifdef TCG_TARGET_CALL_ALIGN_ARGS 2197 arg |= 1; 2198#endif 2199 tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); 2200 tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); 2201 } else { 2202 /* If the address needed to be zero-extended, we'll have already 2203 placed it in R4. The only remaining case is 64-bit guest. */ 2204 tcg_out_mov(s, TCG_TYPE_TL, arg++, lo); 2205 } 2206 2207 lo = lb->datalo_reg; 2208 hi = lb->datahi_reg; 2209 if (TCG_TARGET_REG_BITS == 32) { 2210 switch (s_bits) { 2211 case MO_64: 2212#ifdef TCG_TARGET_CALL_ALIGN_ARGS 2213 arg |= 1; 2214#endif 2215 tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); 2216 /* FALLTHRU */ 2217 case MO_32: 2218 tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); 2219 break; 2220 default: 2221 tcg_out_rlw(s, RLWINM, arg++, lo, 0, 32 - (8 << s_bits), 31); 2222 break; 2223 } 2224 } else { 2225 if (s_bits == MO_64) { 2226 tcg_out_mov(s, TCG_TYPE_I64, arg++, lo); 2227 } else { 2228 tcg_out_rld(s, RLDICL, arg++, lo, 0, 64 - (8 << s_bits)); 2229 } 2230 } 2231 2232 tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); 2233 tcg_out32(s, MFSPR | RT(arg) | LR); 2234 2235 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); 2236 2237 tcg_out_b(s, 0, lb->raddr); 2238 return true; 2239} 2240#else 2241 2242static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo, 2243 TCGReg addrhi, unsigned a_bits) 2244{ 2245 unsigned a_mask = (1 << a_bits) - 1; 2246 TCGLabelQemuLdst *label = new_ldst_label(s); 2247 2248 label->is_ld = is_ld; 2249 label->addrlo_reg = addrlo; 2250 label->addrhi_reg = addrhi; 2251 2252 /* We are expecting a_bits to max out at 7, much lower than ANDI. */ 2253 tcg_debug_assert(a_bits < 16); 2254 tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, a_mask)); 2255 2256 label->label_ptr[0] = s->code_ptr; 2257 tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK); 2258 2259 label->raddr = tcg_splitwx_to_rx(s->code_ptr); 2260} 2261 2262static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) 2263{ 2264 if (!reloc_pc14(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 2265 return false; 2266 } 2267 2268 if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { 2269 TCGReg arg = TCG_REG_R4; 2270#ifdef TCG_TARGET_CALL_ALIGN_ARGS 2271 arg |= 1; 2272#endif 2273 if (l->addrlo_reg != arg) { 2274 tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg); 2275 tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg); 2276 } else if (l->addrhi_reg != arg + 1) { 2277 tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg); 2278 tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg); 2279 } else { 2280 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R0, arg); 2281 tcg_out_mov(s, TCG_TYPE_I32, arg, arg + 1); 2282 tcg_out_mov(s, TCG_TYPE_I32, arg + 1, TCG_REG_R0); 2283 } 2284 } else { 2285 tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R4, l->addrlo_reg); 2286 } 2287 tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, TCG_AREG0); 2288 2289 /* "Tail call" to the helper, with the return address back inline. */ 2290 tcg_out_call_int(s, 0, (const void *)(l->is_ld ? helper_unaligned_ld 2291 : helper_unaligned_st)); 2292 return true; 2293} 2294 2295static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 2296{ 2297 return tcg_out_fail_alignment(s, l); 2298} 2299 2300static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 2301{ 2302 return tcg_out_fail_alignment(s, l); 2303} 2304 2305#endif /* SOFTMMU */ 2306 2307static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) 2308{ 2309 TCGReg datalo, datahi, addrlo, rbase; 2310 TCGReg addrhi __attribute__((unused)); 2311 MemOpIdx oi; 2312 MemOp opc, s_bits; 2313#ifdef CONFIG_SOFTMMU 2314 int mem_index; 2315 tcg_insn_unit *label_ptr; 2316#else 2317 unsigned a_bits; 2318#endif 2319 2320 datalo = *args++; 2321 datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0); 2322 addrlo = *args++; 2323 addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0); 2324 oi = *args++; 2325 opc = get_memop(oi); 2326 s_bits = opc & MO_SIZE; 2327 2328#ifdef CONFIG_SOFTMMU 2329 mem_index = get_mmuidx(oi); 2330 addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true); 2331 2332 /* Load a pointer into the current opcode w/conditional branch-link. */ 2333 label_ptr = s->code_ptr; 2334 tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); 2335 2336 rbase = TCG_REG_R3; 2337#else /* !CONFIG_SOFTMMU */ 2338 a_bits = get_alignment_bits(opc); 2339 if (a_bits) { 2340 tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits); 2341 } 2342 rbase = guest_base ? TCG_GUEST_BASE_REG : 0; 2343 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { 2344 tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); 2345 addrlo = TCG_REG_TMP1; 2346 } 2347#endif 2348 2349 if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { 2350 if (opc & MO_BSWAP) { 2351 tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); 2352 tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo)); 2353 tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0)); 2354 } else if (rbase != 0) { 2355 tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); 2356 tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo)); 2357 tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0)); 2358 } else if (addrlo == datahi) { 2359 tcg_out32(s, LWZ | TAI(datalo, addrlo, 4)); 2360 tcg_out32(s, LWZ | TAI(datahi, addrlo, 0)); 2361 } else { 2362 tcg_out32(s, LWZ | TAI(datahi, addrlo, 0)); 2363 tcg_out32(s, LWZ | TAI(datalo, addrlo, 4)); 2364 } 2365 } else { 2366 uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)]; 2367 if (!have_isa_2_06 && insn == LDBRX) { 2368 tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); 2369 tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo)); 2370 tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0)); 2371 tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0); 2372 } else if (insn) { 2373 tcg_out32(s, insn | TAB(datalo, rbase, addrlo)); 2374 } else { 2375 insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)]; 2376 tcg_out32(s, insn | TAB(datalo, rbase, addrlo)); 2377 insn = qemu_exts_opc[s_bits]; 2378 tcg_out32(s, insn | RA(datalo) | RS(datalo)); 2379 } 2380 } 2381 2382#ifdef CONFIG_SOFTMMU 2383 add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, 2384 s->code_ptr, label_ptr); 2385#endif 2386} 2387 2388static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) 2389{ 2390 TCGReg datalo, datahi, addrlo, rbase; 2391 TCGReg addrhi __attribute__((unused)); 2392 MemOpIdx oi; 2393 MemOp opc, s_bits; 2394#ifdef CONFIG_SOFTMMU 2395 int mem_index; 2396 tcg_insn_unit *label_ptr; 2397#else 2398 unsigned a_bits; 2399#endif 2400 2401 datalo = *args++; 2402 datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0); 2403 addrlo = *args++; 2404 addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0); 2405 oi = *args++; 2406 opc = get_memop(oi); 2407 s_bits = opc & MO_SIZE; 2408 2409#ifdef CONFIG_SOFTMMU 2410 mem_index = get_mmuidx(oi); 2411 addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false); 2412 2413 /* Load a pointer into the current opcode w/conditional branch-link. */ 2414 label_ptr = s->code_ptr; 2415 tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); 2416 2417 rbase = TCG_REG_R3; 2418#else /* !CONFIG_SOFTMMU */ 2419 a_bits = get_alignment_bits(opc); 2420 if (a_bits) { 2421 tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits); 2422 } 2423 rbase = guest_base ? TCG_GUEST_BASE_REG : 0; 2424 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { 2425 tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); 2426 addrlo = TCG_REG_TMP1; 2427 } 2428#endif 2429 2430 if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { 2431 if (opc & MO_BSWAP) { 2432 tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); 2433 tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo)); 2434 tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0)); 2435 } else if (rbase != 0) { 2436 tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); 2437 tcg_out32(s, STWX | SAB(datahi, rbase, addrlo)); 2438 tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0)); 2439 } else { 2440 tcg_out32(s, STW | TAI(datahi, addrlo, 0)); 2441 tcg_out32(s, STW | TAI(datalo, addrlo, 4)); 2442 } 2443 } else { 2444 uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)]; 2445 if (!have_isa_2_06 && insn == STDBRX) { 2446 tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo)); 2447 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4)); 2448 tcg_out_shri64(s, TCG_REG_R0, datalo, 32); 2449 tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_TMP1)); 2450 } else { 2451 tcg_out32(s, insn | SAB(datalo, rbase, addrlo)); 2452 } 2453 } 2454 2455#ifdef CONFIG_SOFTMMU 2456 add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, 2457 s->code_ptr, label_ptr); 2458#endif 2459} 2460 2461static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 2462{ 2463 int i; 2464 for (i = 0; i < count; ++i) { 2465 p[i] = NOP; 2466 } 2467} 2468 2469/* Parameters for function call generation, used in tcg.c. */ 2470#define TCG_TARGET_STACK_ALIGN 16 2471#define TCG_TARGET_EXTEND_ARGS 1 2472 2473#ifdef _CALL_AIX 2474# define LINK_AREA_SIZE (6 * SZR) 2475# define LR_OFFSET (1 * SZR) 2476# define TCG_TARGET_CALL_STACK_OFFSET (LINK_AREA_SIZE + 8 * SZR) 2477#elif defined(_CALL_DARWIN) 2478# define LINK_AREA_SIZE (6 * SZR) 2479# define LR_OFFSET (2 * SZR) 2480#elif TCG_TARGET_REG_BITS == 64 2481# if defined(_CALL_ELF) && _CALL_ELF == 2 2482# define LINK_AREA_SIZE (4 * SZR) 2483# define LR_OFFSET (1 * SZR) 2484# endif 2485#else /* TCG_TARGET_REG_BITS == 32 */ 2486# if defined(_CALL_SYSV) 2487# define LINK_AREA_SIZE (2 * SZR) 2488# define LR_OFFSET (1 * SZR) 2489# endif 2490#endif 2491#ifndef LR_OFFSET 2492# error "Unhandled abi" 2493#endif 2494#ifndef TCG_TARGET_CALL_STACK_OFFSET 2495# define TCG_TARGET_CALL_STACK_OFFSET LINK_AREA_SIZE 2496#endif 2497 2498#define CPU_TEMP_BUF_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) 2499#define REG_SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR) 2500 2501#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET \ 2502 + TCG_STATIC_CALL_ARGS_SIZE \ 2503 + CPU_TEMP_BUF_SIZE \ 2504 + REG_SAVE_SIZE \ 2505 + TCG_TARGET_STACK_ALIGN - 1) \ 2506 & -TCG_TARGET_STACK_ALIGN) 2507 2508#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE) 2509 2510static void tcg_target_qemu_prologue(TCGContext *s) 2511{ 2512 int i; 2513 2514#ifdef _CALL_AIX 2515 const void **desc = (const void **)s->code_ptr; 2516 desc[0] = tcg_splitwx_to_rx(desc + 2); /* entry point */ 2517 desc[1] = 0; /* environment pointer */ 2518 s->code_ptr = (void *)(desc + 2); /* skip over descriptor */ 2519#endif 2520 2521 tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE, 2522 CPU_TEMP_BUF_SIZE); 2523 2524 /* Prologue */ 2525 tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR); 2526 tcg_out32(s, (SZR == 8 ? STDU : STWU) 2527 | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE)); 2528 2529 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { 2530 tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 2531 TCG_REG_R1, REG_SAVE_BOT + i * SZR); 2532 } 2533 tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); 2534 2535#ifndef CONFIG_SOFTMMU 2536 if (guest_base) { 2537 tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true); 2538 tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); 2539 } 2540#endif 2541 2542 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 2543 tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR); 2544 if (USE_REG_TB) { 2545 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]); 2546 } 2547 tcg_out32(s, BCCTR | BO_ALWAYS); 2548 2549 /* Epilogue */ 2550 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 2551 2552 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); 2553 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { 2554 tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 2555 TCG_REG_R1, REG_SAVE_BOT + i * SZR); 2556 } 2557 tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR); 2558 tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE)); 2559 tcg_out32(s, BCLR | BO_ALWAYS); 2560} 2561 2562static void tcg_out_op(TCGContext *s, TCGOpcode opc, 2563 const TCGArg args[TCG_MAX_OP_ARGS], 2564 const int const_args[TCG_MAX_OP_ARGS]) 2565{ 2566 TCGArg a0, a1, a2; 2567 2568 switch (opc) { 2569 case INDEX_op_exit_tb: 2570 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]); 2571 tcg_out_b(s, 0, tcg_code_gen_epilogue); 2572 break; 2573 case INDEX_op_goto_tb: 2574 if (s->tb_jmp_insn_offset) { 2575 /* Direct jump. */ 2576 if (TCG_TARGET_REG_BITS == 64) { 2577 /* Ensure the next insns are 8-byte aligned. */ 2578 if ((uintptr_t)s->code_ptr & 7) { 2579 tcg_out32(s, NOP); 2580 } 2581 s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s); 2582 tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0)); 2583 tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0)); 2584 } else { 2585 s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s); 2586 tcg_out32(s, B); 2587 s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s); 2588 break; 2589 } 2590 } else { 2591 /* Indirect jump. */ 2592 tcg_debug_assert(s->tb_jmp_insn_offset == NULL); 2593 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TB, 0, 2594 (intptr_t)(s->tb_jmp_insn_offset + args[0])); 2595 } 2596 tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR); 2597 tcg_out32(s, BCCTR | BO_ALWAYS); 2598 set_jmp_reset_offset(s, args[0]); 2599 if (USE_REG_TB) { 2600 /* For the unlinked case, need to reset TCG_REG_TB. */ 2601 tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB, 2602 -tcg_current_code_size(s)); 2603 } 2604 break; 2605 case INDEX_op_goto_ptr: 2606 tcg_out32(s, MTSPR | RS(args[0]) | CTR); 2607 if (USE_REG_TB) { 2608 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]); 2609 } 2610 tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0)); 2611 tcg_out32(s, BCCTR | BO_ALWAYS); 2612 break; 2613 case INDEX_op_br: 2614 { 2615 TCGLabel *l = arg_label(args[0]); 2616 uint32_t insn = B; 2617 2618 if (l->has_value) { 2619 insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr), 2620 l->u.value_ptr); 2621 } else { 2622 tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0); 2623 } 2624 tcg_out32(s, insn); 2625 } 2626 break; 2627 case INDEX_op_ld8u_i32: 2628 case INDEX_op_ld8u_i64: 2629 tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); 2630 break; 2631 case INDEX_op_ld8s_i32: 2632 case INDEX_op_ld8s_i64: 2633 tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); 2634 tcg_out_ext8s(s, args[0], args[0]); 2635 break; 2636 case INDEX_op_ld16u_i32: 2637 case INDEX_op_ld16u_i64: 2638 tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]); 2639 break; 2640 case INDEX_op_ld16s_i32: 2641 case INDEX_op_ld16s_i64: 2642 tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]); 2643 break; 2644 case INDEX_op_ld_i32: 2645 case INDEX_op_ld32u_i64: 2646 tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]); 2647 break; 2648 case INDEX_op_ld32s_i64: 2649 tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]); 2650 break; 2651 case INDEX_op_ld_i64: 2652 tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]); 2653 break; 2654 case INDEX_op_st8_i32: 2655 case INDEX_op_st8_i64: 2656 tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]); 2657 break; 2658 case INDEX_op_st16_i32: 2659 case INDEX_op_st16_i64: 2660 tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]); 2661 break; 2662 case INDEX_op_st_i32: 2663 case INDEX_op_st32_i64: 2664 tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]); 2665 break; 2666 case INDEX_op_st_i64: 2667 tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); 2668 break; 2669 2670 case INDEX_op_add_i32: 2671 a0 = args[0], a1 = args[1], a2 = args[2]; 2672 if (const_args[2]) { 2673 do_addi_32: 2674 tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2); 2675 } else { 2676 tcg_out32(s, ADD | TAB(a0, a1, a2)); 2677 } 2678 break; 2679 case INDEX_op_sub_i32: 2680 a0 = args[0], a1 = args[1], a2 = args[2]; 2681 if (const_args[1]) { 2682 if (const_args[2]) { 2683 tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2); 2684 } else { 2685 tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); 2686 } 2687 } else if (const_args[2]) { 2688 a2 = -a2; 2689 goto do_addi_32; 2690 } else { 2691 tcg_out32(s, SUBF | TAB(a0, a2, a1)); 2692 } 2693 break; 2694 2695 case INDEX_op_and_i32: 2696 a0 = args[0], a1 = args[1], a2 = args[2]; 2697 if (const_args[2]) { 2698 tcg_out_andi32(s, a0, a1, a2); 2699 } else { 2700 tcg_out32(s, AND | SAB(a1, a0, a2)); 2701 } 2702 break; 2703 case INDEX_op_and_i64: 2704 a0 = args[0], a1 = args[1], a2 = args[2]; 2705 if (const_args[2]) { 2706 tcg_out_andi64(s, a0, a1, a2); 2707 } else { 2708 tcg_out32(s, AND | SAB(a1, a0, a2)); 2709 } 2710 break; 2711 case INDEX_op_or_i64: 2712 case INDEX_op_or_i32: 2713 a0 = args[0], a1 = args[1], a2 = args[2]; 2714 if (const_args[2]) { 2715 tcg_out_ori32(s, a0, a1, a2); 2716 } else { 2717 tcg_out32(s, OR | SAB(a1, a0, a2)); 2718 } 2719 break; 2720 case INDEX_op_xor_i64: 2721 case INDEX_op_xor_i32: 2722 a0 = args[0], a1 = args[1], a2 = args[2]; 2723 if (const_args[2]) { 2724 tcg_out_xori32(s, a0, a1, a2); 2725 } else { 2726 tcg_out32(s, XOR | SAB(a1, a0, a2)); 2727 } 2728 break; 2729 case INDEX_op_andc_i32: 2730 a0 = args[0], a1 = args[1], a2 = args[2]; 2731 if (const_args[2]) { 2732 tcg_out_andi32(s, a0, a1, ~a2); 2733 } else { 2734 tcg_out32(s, ANDC | SAB(a1, a0, a2)); 2735 } 2736 break; 2737 case INDEX_op_andc_i64: 2738 a0 = args[0], a1 = args[1], a2 = args[2]; 2739 if (const_args[2]) { 2740 tcg_out_andi64(s, a0, a1, ~a2); 2741 } else { 2742 tcg_out32(s, ANDC | SAB(a1, a0, a2)); 2743 } 2744 break; 2745 case INDEX_op_orc_i32: 2746 if (const_args[2]) { 2747 tcg_out_ori32(s, args[0], args[1], ~args[2]); 2748 break; 2749 } 2750 /* FALLTHRU */ 2751 case INDEX_op_orc_i64: 2752 tcg_out32(s, ORC | SAB(args[1], args[0], args[2])); 2753 break; 2754 case INDEX_op_eqv_i32: 2755 if (const_args[2]) { 2756 tcg_out_xori32(s, args[0], args[1], ~args[2]); 2757 break; 2758 } 2759 /* FALLTHRU */ 2760 case INDEX_op_eqv_i64: 2761 tcg_out32(s, EQV | SAB(args[1], args[0], args[2])); 2762 break; 2763 case INDEX_op_nand_i32: 2764 case INDEX_op_nand_i64: 2765 tcg_out32(s, NAND | SAB(args[1], args[0], args[2])); 2766 break; 2767 case INDEX_op_nor_i32: 2768 case INDEX_op_nor_i64: 2769 tcg_out32(s, NOR | SAB(args[1], args[0], args[2])); 2770 break; 2771 2772 case INDEX_op_clz_i32: 2773 tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1], 2774 args[2], const_args[2]); 2775 break; 2776 case INDEX_op_ctz_i32: 2777 tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1], 2778 args[2], const_args[2]); 2779 break; 2780 case INDEX_op_ctpop_i32: 2781 tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0)); 2782 break; 2783 2784 case INDEX_op_clz_i64: 2785 tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1], 2786 args[2], const_args[2]); 2787 break; 2788 case INDEX_op_ctz_i64: 2789 tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1], 2790 args[2], const_args[2]); 2791 break; 2792 case INDEX_op_ctpop_i64: 2793 tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0)); 2794 break; 2795 2796 case INDEX_op_mul_i32: 2797 a0 = args[0], a1 = args[1], a2 = args[2]; 2798 if (const_args[2]) { 2799 tcg_out32(s, MULLI | TAI(a0, a1, a2)); 2800 } else { 2801 tcg_out32(s, MULLW | TAB(a0, a1, a2)); 2802 } 2803 break; 2804 2805 case INDEX_op_div_i32: 2806 tcg_out32(s, DIVW | TAB(args[0], args[1], args[2])); 2807 break; 2808 2809 case INDEX_op_divu_i32: 2810 tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2])); 2811 break; 2812 2813 case INDEX_op_rem_i32: 2814 tcg_out32(s, MODSW | TAB(args[0], args[1], args[2])); 2815 break; 2816 2817 case INDEX_op_remu_i32: 2818 tcg_out32(s, MODUW | TAB(args[0], args[1], args[2])); 2819 break; 2820 2821 case INDEX_op_shl_i32: 2822 if (const_args[2]) { 2823 /* Limit immediate shift count lest we create an illegal insn. */ 2824 tcg_out_shli32(s, args[0], args[1], args[2] & 31); 2825 } else { 2826 tcg_out32(s, SLW | SAB(args[1], args[0], args[2])); 2827 } 2828 break; 2829 case INDEX_op_shr_i32: 2830 if (const_args[2]) { 2831 /* Limit immediate shift count lest we create an illegal insn. */ 2832 tcg_out_shri32(s, args[0], args[1], args[2] & 31); 2833 } else { 2834 tcg_out32(s, SRW | SAB(args[1], args[0], args[2])); 2835 } 2836 break; 2837 case INDEX_op_sar_i32: 2838 if (const_args[2]) { 2839 tcg_out_sari32(s, args[0], args[1], args[2]); 2840 } else { 2841 tcg_out32(s, SRAW | SAB(args[1], args[0], args[2])); 2842 } 2843 break; 2844 case INDEX_op_rotl_i32: 2845 if (const_args[2]) { 2846 tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31); 2847 } else { 2848 tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2]) 2849 | MB(0) | ME(31)); 2850 } 2851 break; 2852 case INDEX_op_rotr_i32: 2853 if (const_args[2]) { 2854 tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31); 2855 } else { 2856 tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32)); 2857 tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0) 2858 | MB(0) | ME(31)); 2859 } 2860 break; 2861 2862 case INDEX_op_brcond_i32: 2863 tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], 2864 arg_label(args[3]), TCG_TYPE_I32); 2865 break; 2866 case INDEX_op_brcond_i64: 2867 tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], 2868 arg_label(args[3]), TCG_TYPE_I64); 2869 break; 2870 case INDEX_op_brcond2_i32: 2871 tcg_out_brcond2(s, args, const_args); 2872 break; 2873 2874 case INDEX_op_neg_i32: 2875 case INDEX_op_neg_i64: 2876 tcg_out32(s, NEG | RT(args[0]) | RA(args[1])); 2877 break; 2878 2879 case INDEX_op_not_i32: 2880 case INDEX_op_not_i64: 2881 tcg_out32(s, NOR | SAB(args[1], args[0], args[1])); 2882 break; 2883 2884 case INDEX_op_add_i64: 2885 a0 = args[0], a1 = args[1], a2 = args[2]; 2886 if (const_args[2]) { 2887 do_addi_64: 2888 tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2); 2889 } else { 2890 tcg_out32(s, ADD | TAB(a0, a1, a2)); 2891 } 2892 break; 2893 case INDEX_op_sub_i64: 2894 a0 = args[0], a1 = args[1], a2 = args[2]; 2895 if (const_args[1]) { 2896 if (const_args[2]) { 2897 tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2); 2898 } else { 2899 tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); 2900 } 2901 } else if (const_args[2]) { 2902 a2 = -a2; 2903 goto do_addi_64; 2904 } else { 2905 tcg_out32(s, SUBF | TAB(a0, a2, a1)); 2906 } 2907 break; 2908 2909 case INDEX_op_shl_i64: 2910 if (const_args[2]) { 2911 /* Limit immediate shift count lest we create an illegal insn. */ 2912 tcg_out_shli64(s, args[0], args[1], args[2] & 63); 2913 } else { 2914 tcg_out32(s, SLD | SAB(args[1], args[0], args[2])); 2915 } 2916 break; 2917 case INDEX_op_shr_i64: 2918 if (const_args[2]) { 2919 /* Limit immediate shift count lest we create an illegal insn. */ 2920 tcg_out_shri64(s, args[0], args[1], args[2] & 63); 2921 } else { 2922 tcg_out32(s, SRD | SAB(args[1], args[0], args[2])); 2923 } 2924 break; 2925 case INDEX_op_sar_i64: 2926 if (const_args[2]) { 2927 tcg_out_sari64(s, args[0], args[1], args[2]); 2928 } else { 2929 tcg_out32(s, SRAD | SAB(args[1], args[0], args[2])); 2930 } 2931 break; 2932 case INDEX_op_rotl_i64: 2933 if (const_args[2]) { 2934 tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0); 2935 } else { 2936 tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0)); 2937 } 2938 break; 2939 case INDEX_op_rotr_i64: 2940 if (const_args[2]) { 2941 tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0); 2942 } else { 2943 tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64)); 2944 tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0)); 2945 } 2946 break; 2947 2948 case INDEX_op_mul_i64: 2949 a0 = args[0], a1 = args[1], a2 = args[2]; 2950 if (const_args[2]) { 2951 tcg_out32(s, MULLI | TAI(a0, a1, a2)); 2952 } else { 2953 tcg_out32(s, MULLD | TAB(a0, a1, a2)); 2954 } 2955 break; 2956 case INDEX_op_div_i64: 2957 tcg_out32(s, DIVD | TAB(args[0], args[1], args[2])); 2958 break; 2959 case INDEX_op_divu_i64: 2960 tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2])); 2961 break; 2962 case INDEX_op_rem_i64: 2963 tcg_out32(s, MODSD | TAB(args[0], args[1], args[2])); 2964 break; 2965 case INDEX_op_remu_i64: 2966 tcg_out32(s, MODUD | TAB(args[0], args[1], args[2])); 2967 break; 2968 2969 case INDEX_op_qemu_ld_i32: 2970 tcg_out_qemu_ld(s, args, false); 2971 break; 2972 case INDEX_op_qemu_ld_i64: 2973 tcg_out_qemu_ld(s, args, true); 2974 break; 2975 case INDEX_op_qemu_st_i32: 2976 tcg_out_qemu_st(s, args, false); 2977 break; 2978 case INDEX_op_qemu_st_i64: 2979 tcg_out_qemu_st(s, args, true); 2980 break; 2981 2982 case INDEX_op_ext8s_i32: 2983 case INDEX_op_ext8s_i64: 2984 tcg_out_ext8s(s, args[0], args[1]); 2985 break; 2986 case INDEX_op_ext16s_i32: 2987 case INDEX_op_ext16s_i64: 2988 tcg_out_ext16s(s, args[0], args[1]); 2989 break; 2990 case INDEX_op_ext_i32_i64: 2991 case INDEX_op_ext32s_i64: 2992 tcg_out_ext32s(s, args[0], args[1]); 2993 break; 2994 case INDEX_op_extu_i32_i64: 2995 tcg_out_ext32u(s, args[0], args[1]); 2996 break; 2997 2998 case INDEX_op_setcond_i32: 2999 tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], 3000 const_args[2]); 3001 break; 3002 case INDEX_op_setcond_i64: 3003 tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], 3004 const_args[2]); 3005 break; 3006 case INDEX_op_setcond2_i32: 3007 tcg_out_setcond2(s, args, const_args); 3008 break; 3009 3010 case INDEX_op_bswap16_i32: 3011 case INDEX_op_bswap16_i64: 3012 tcg_out_bswap16(s, args[0], args[1], args[2]); 3013 break; 3014 case INDEX_op_bswap32_i32: 3015 tcg_out_bswap32(s, args[0], args[1], 0); 3016 break; 3017 case INDEX_op_bswap32_i64: 3018 tcg_out_bswap32(s, args[0], args[1], args[2]); 3019 break; 3020 case INDEX_op_bswap64_i64: 3021 tcg_out_bswap64(s, args[0], args[1]); 3022 break; 3023 3024 case INDEX_op_deposit_i32: 3025 if (const_args[2]) { 3026 uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3]; 3027 tcg_out_andi32(s, args[0], args[0], ~mask); 3028 } else { 3029 tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3], 3030 32 - args[3] - args[4], 31 - args[3]); 3031 } 3032 break; 3033 case INDEX_op_deposit_i64: 3034 if (const_args[2]) { 3035 uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3]; 3036 tcg_out_andi64(s, args[0], args[0], ~mask); 3037 } else { 3038 tcg_out_rld(s, RLDIMI, args[0], args[2], args[3], 3039 64 - args[3] - args[4]); 3040 } 3041 break; 3042 3043 case INDEX_op_extract_i32: 3044 tcg_out_rlw(s, RLWINM, args[0], args[1], 3045 32 - args[2], 32 - args[3], 31); 3046 break; 3047 case INDEX_op_extract_i64: 3048 tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]); 3049 break; 3050 3051 case INDEX_op_movcond_i32: 3052 tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2], 3053 args[3], args[4], const_args[2]); 3054 break; 3055 case INDEX_op_movcond_i64: 3056 tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2], 3057 args[3], args[4], const_args[2]); 3058 break; 3059 3060#if TCG_TARGET_REG_BITS == 64 3061 case INDEX_op_add2_i64: 3062#else 3063 case INDEX_op_add2_i32: 3064#endif 3065 /* Note that the CA bit is defined based on the word size of the 3066 environment. So in 64-bit mode it's always carry-out of bit 63. 3067 The fallback code using deposit works just as well for 32-bit. */ 3068 a0 = args[0], a1 = args[1]; 3069 if (a0 == args[3] || (!const_args[5] && a0 == args[5])) { 3070 a0 = TCG_REG_R0; 3071 } 3072 if (const_args[4]) { 3073 tcg_out32(s, ADDIC | TAI(a0, args[2], args[4])); 3074 } else { 3075 tcg_out32(s, ADDC | TAB(a0, args[2], args[4])); 3076 } 3077 if (const_args[5]) { 3078 tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3])); 3079 } else { 3080 tcg_out32(s, ADDE | TAB(a1, args[3], args[5])); 3081 } 3082 if (a0 != args[0]) { 3083 tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); 3084 } 3085 break; 3086 3087#if TCG_TARGET_REG_BITS == 64 3088 case INDEX_op_sub2_i64: 3089#else 3090 case INDEX_op_sub2_i32: 3091#endif 3092 a0 = args[0], a1 = args[1]; 3093 if (a0 == args[5] || (!const_args[3] && a0 == args[3])) { 3094 a0 = TCG_REG_R0; 3095 } 3096 if (const_args[2]) { 3097 tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2])); 3098 } else { 3099 tcg_out32(s, SUBFC | TAB(a0, args[4], args[2])); 3100 } 3101 if (const_args[3]) { 3102 tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5])); 3103 } else { 3104 tcg_out32(s, SUBFE | TAB(a1, args[5], args[3])); 3105 } 3106 if (a0 != args[0]) { 3107 tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); 3108 } 3109 break; 3110 3111 case INDEX_op_muluh_i32: 3112 tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2])); 3113 break; 3114 case INDEX_op_mulsh_i32: 3115 tcg_out32(s, MULHW | TAB(args[0], args[1], args[2])); 3116 break; 3117 case INDEX_op_muluh_i64: 3118 tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2])); 3119 break; 3120 case INDEX_op_mulsh_i64: 3121 tcg_out32(s, MULHD | TAB(args[0], args[1], args[2])); 3122 break; 3123 3124 case INDEX_op_mb: 3125 tcg_out_mb(s, args[0]); 3126 break; 3127 3128 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 3129 case INDEX_op_mov_i64: 3130 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 3131 default: 3132 tcg_abort(); 3133 } 3134} 3135 3136int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 3137{ 3138 switch (opc) { 3139 case INDEX_op_and_vec: 3140 case INDEX_op_or_vec: 3141 case INDEX_op_xor_vec: 3142 case INDEX_op_andc_vec: 3143 case INDEX_op_not_vec: 3144 case INDEX_op_nor_vec: 3145 case INDEX_op_eqv_vec: 3146 case INDEX_op_nand_vec: 3147 return 1; 3148 case INDEX_op_orc_vec: 3149 return have_isa_2_07; 3150 case INDEX_op_add_vec: 3151 case INDEX_op_sub_vec: 3152 case INDEX_op_smax_vec: 3153 case INDEX_op_smin_vec: 3154 case INDEX_op_umax_vec: 3155 case INDEX_op_umin_vec: 3156 case INDEX_op_shlv_vec: 3157 case INDEX_op_shrv_vec: 3158 case INDEX_op_sarv_vec: 3159 case INDEX_op_rotlv_vec: 3160 return vece <= MO_32 || have_isa_2_07; 3161 case INDEX_op_ssadd_vec: 3162 case INDEX_op_sssub_vec: 3163 case INDEX_op_usadd_vec: 3164 case INDEX_op_ussub_vec: 3165 return vece <= MO_32; 3166 case INDEX_op_cmp_vec: 3167 case INDEX_op_shli_vec: 3168 case INDEX_op_shri_vec: 3169 case INDEX_op_sari_vec: 3170 case INDEX_op_rotli_vec: 3171 return vece <= MO_32 || have_isa_2_07 ? -1 : 0; 3172 case INDEX_op_neg_vec: 3173 return vece >= MO_32 && have_isa_3_00; 3174 case INDEX_op_mul_vec: 3175 switch (vece) { 3176 case MO_8: 3177 case MO_16: 3178 return -1; 3179 case MO_32: 3180 return have_isa_2_07 ? 1 : -1; 3181 case MO_64: 3182 return have_isa_3_10; 3183 } 3184 return 0; 3185 case INDEX_op_bitsel_vec: 3186 return have_vsx; 3187 case INDEX_op_rotrv_vec: 3188 return -1; 3189 default: 3190 return 0; 3191 } 3192} 3193 3194static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 3195 TCGReg dst, TCGReg src) 3196{ 3197 tcg_debug_assert(dst >= TCG_REG_V0); 3198 3199 /* Splat from integer reg allowed via constraints for v3.00. */ 3200 if (src < TCG_REG_V0) { 3201 tcg_debug_assert(have_isa_3_00); 3202 switch (vece) { 3203 case MO_64: 3204 tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src)); 3205 return true; 3206 case MO_32: 3207 tcg_out32(s, MTVSRWS | VRT(dst) | RA(src)); 3208 return true; 3209 default: 3210 /* Fail, so that we fall back on either dupm or mov+dup. */ 3211 return false; 3212 } 3213 } 3214 3215 /* 3216 * Recall we use (or emulate) VSX integer loads, so the integer is 3217 * right justified within the left (zero-index) double-word. 3218 */ 3219 switch (vece) { 3220 case MO_8: 3221 tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16)); 3222 break; 3223 case MO_16: 3224 tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16)); 3225 break; 3226 case MO_32: 3227 tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16)); 3228 break; 3229 case MO_64: 3230 if (have_vsx) { 3231 tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src)); 3232 break; 3233 } 3234 tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8); 3235 tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8); 3236 break; 3237 default: 3238 g_assert_not_reached(); 3239 } 3240 return true; 3241} 3242 3243static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 3244 TCGReg out, TCGReg base, intptr_t offset) 3245{ 3246 int elt; 3247 3248 tcg_debug_assert(out >= TCG_REG_V0); 3249 switch (vece) { 3250 case MO_8: 3251 if (have_isa_3_00) { 3252 tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16); 3253 } else { 3254 tcg_out_mem_long(s, 0, LVEBX, out, base, offset); 3255 } 3256 elt = extract32(offset, 0, 4); 3257#if !HOST_BIG_ENDIAN 3258 elt ^= 15; 3259#endif 3260 tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16)); 3261 break; 3262 case MO_16: 3263 tcg_debug_assert((offset & 1) == 0); 3264 if (have_isa_3_00) { 3265 tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16); 3266 } else { 3267 tcg_out_mem_long(s, 0, LVEHX, out, base, offset); 3268 } 3269 elt = extract32(offset, 1, 3); 3270#if !HOST_BIG_ENDIAN 3271 elt ^= 7; 3272#endif 3273 tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16)); 3274 break; 3275 case MO_32: 3276 if (have_isa_3_00) { 3277 tcg_out_mem_long(s, 0, LXVWSX, out, base, offset); 3278 break; 3279 } 3280 tcg_debug_assert((offset & 3) == 0); 3281 tcg_out_mem_long(s, 0, LVEWX, out, base, offset); 3282 elt = extract32(offset, 2, 2); 3283#if !HOST_BIG_ENDIAN 3284 elt ^= 3; 3285#endif 3286 tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16)); 3287 break; 3288 case MO_64: 3289 if (have_vsx) { 3290 tcg_out_mem_long(s, 0, LXVDSX, out, base, offset); 3291 break; 3292 } 3293 tcg_debug_assert((offset & 7) == 0); 3294 tcg_out_mem_long(s, 0, LVX, out, base, offset & -16); 3295 tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8); 3296 elt = extract32(offset, 3, 1); 3297#if !HOST_BIG_ENDIAN 3298 elt = !elt; 3299#endif 3300 if (elt) { 3301 tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8); 3302 } else { 3303 tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8); 3304 } 3305 break; 3306 default: 3307 g_assert_not_reached(); 3308 } 3309 return true; 3310} 3311 3312static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 3313 unsigned vecl, unsigned vece, 3314 const TCGArg args[TCG_MAX_OP_ARGS], 3315 const int const_args[TCG_MAX_OP_ARGS]) 3316{ 3317 static const uint32_t 3318 add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM }, 3319 sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM }, 3320 mul_op[4] = { 0, 0, VMULUWM, VMULLD }, 3321 neg_op[4] = { 0, 0, VNEGW, VNEGD }, 3322 eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD }, 3323 ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 }, 3324 gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD }, 3325 gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD }, 3326 ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 }, 3327 usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 }, 3328 sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 }, 3329 ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 }, 3330 umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD }, 3331 smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD }, 3332 umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD }, 3333 smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD }, 3334 shlv_op[4] = { VSLB, VSLH, VSLW, VSLD }, 3335 shrv_op[4] = { VSRB, VSRH, VSRW, VSRD }, 3336 sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD }, 3337 mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 }, 3338 mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 }, 3339 muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 }, 3340 mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 }, 3341 pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 }, 3342 rotl_op[4] = { VRLB, VRLH, VRLW, VRLD }; 3343 3344 TCGType type = vecl + TCG_TYPE_V64; 3345 TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; 3346 uint32_t insn; 3347 3348 switch (opc) { 3349 case INDEX_op_ld_vec: 3350 tcg_out_ld(s, type, a0, a1, a2); 3351 return; 3352 case INDEX_op_st_vec: 3353 tcg_out_st(s, type, a0, a1, a2); 3354 return; 3355 case INDEX_op_dupm_vec: 3356 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 3357 return; 3358 3359 case INDEX_op_add_vec: 3360 insn = add_op[vece]; 3361 break; 3362 case INDEX_op_sub_vec: 3363 insn = sub_op[vece]; 3364 break; 3365 case INDEX_op_neg_vec: 3366 insn = neg_op[vece]; 3367 a2 = a1; 3368 a1 = 0; 3369 break; 3370 case INDEX_op_mul_vec: 3371 insn = mul_op[vece]; 3372 break; 3373 case INDEX_op_ssadd_vec: 3374 insn = ssadd_op[vece]; 3375 break; 3376 case INDEX_op_sssub_vec: 3377 insn = sssub_op[vece]; 3378 break; 3379 case INDEX_op_usadd_vec: 3380 insn = usadd_op[vece]; 3381 break; 3382 case INDEX_op_ussub_vec: 3383 insn = ussub_op[vece]; 3384 break; 3385 case INDEX_op_smin_vec: 3386 insn = smin_op[vece]; 3387 break; 3388 case INDEX_op_umin_vec: 3389 insn = umin_op[vece]; 3390 break; 3391 case INDEX_op_smax_vec: 3392 insn = smax_op[vece]; 3393 break; 3394 case INDEX_op_umax_vec: 3395 insn = umax_op[vece]; 3396 break; 3397 case INDEX_op_shlv_vec: 3398 insn = shlv_op[vece]; 3399 break; 3400 case INDEX_op_shrv_vec: 3401 insn = shrv_op[vece]; 3402 break; 3403 case INDEX_op_sarv_vec: 3404 insn = sarv_op[vece]; 3405 break; 3406 case INDEX_op_and_vec: 3407 insn = VAND; 3408 break; 3409 case INDEX_op_or_vec: 3410 insn = VOR; 3411 break; 3412 case INDEX_op_xor_vec: 3413 insn = VXOR; 3414 break; 3415 case INDEX_op_andc_vec: 3416 insn = VANDC; 3417 break; 3418 case INDEX_op_not_vec: 3419 insn = VNOR; 3420 a2 = a1; 3421 break; 3422 case INDEX_op_orc_vec: 3423 insn = VORC; 3424 break; 3425 case INDEX_op_nand_vec: 3426 insn = VNAND; 3427 break; 3428 case INDEX_op_nor_vec: 3429 insn = VNOR; 3430 break; 3431 case INDEX_op_eqv_vec: 3432 insn = VEQV; 3433 break; 3434 3435 case INDEX_op_cmp_vec: 3436 switch (args[3]) { 3437 case TCG_COND_EQ: 3438 insn = eq_op[vece]; 3439 break; 3440 case TCG_COND_NE: 3441 insn = ne_op[vece]; 3442 break; 3443 case TCG_COND_GT: 3444 insn = gts_op[vece]; 3445 break; 3446 case TCG_COND_GTU: 3447 insn = gtu_op[vece]; 3448 break; 3449 default: 3450 g_assert_not_reached(); 3451 } 3452 break; 3453 3454 case INDEX_op_bitsel_vec: 3455 tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3])); 3456 return; 3457 3458 case INDEX_op_dup2_vec: 3459 assert(TCG_TARGET_REG_BITS == 32); 3460 /* With inputs a1 = xLxx, a2 = xHxx */ 3461 tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1)); /* a0 = xxHL */ 3462 tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8); /* tmp = HLxx */ 3463 tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8); /* a0 = HLHL */ 3464 return; 3465 3466 case INDEX_op_ppc_mrgh_vec: 3467 insn = mrgh_op[vece]; 3468 break; 3469 case INDEX_op_ppc_mrgl_vec: 3470 insn = mrgl_op[vece]; 3471 break; 3472 case INDEX_op_ppc_muleu_vec: 3473 insn = muleu_op[vece]; 3474 break; 3475 case INDEX_op_ppc_mulou_vec: 3476 insn = mulou_op[vece]; 3477 break; 3478 case INDEX_op_ppc_pkum_vec: 3479 insn = pkum_op[vece]; 3480 break; 3481 case INDEX_op_rotlv_vec: 3482 insn = rotl_op[vece]; 3483 break; 3484 case INDEX_op_ppc_msum_vec: 3485 tcg_debug_assert(vece == MO_16); 3486 tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3])); 3487 return; 3488 3489 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 3490 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 3491 default: 3492 g_assert_not_reached(); 3493 } 3494 3495 tcg_debug_assert(insn != 0); 3496 tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2)); 3497} 3498 3499static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0, 3500 TCGv_vec v1, TCGArg imm, TCGOpcode opci) 3501{ 3502 TCGv_vec t1; 3503 3504 if (vece == MO_32) { 3505 /* 3506 * Only 5 bits are significant, and VSPLTISB can represent -16..15. 3507 * So using negative numbers gets us the 4th bit easily. 3508 */ 3509 imm = sextract32(imm, 0, 5); 3510 } else { 3511 imm &= (8 << vece) - 1; 3512 } 3513 3514 /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */ 3515 t1 = tcg_constant_vec(type, MO_8, imm); 3516 vec_gen_3(opci, type, vece, tcgv_vec_arg(v0), 3517 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 3518} 3519 3520static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, 3521 TCGv_vec v1, TCGv_vec v2, TCGCond cond) 3522{ 3523 bool need_swap = false, need_inv = false; 3524 3525 tcg_debug_assert(vece <= MO_32 || have_isa_2_07); 3526 3527 switch (cond) { 3528 case TCG_COND_EQ: 3529 case TCG_COND_GT: 3530 case TCG_COND_GTU: 3531 break; 3532 case TCG_COND_NE: 3533 if (have_isa_3_00 && vece <= MO_32) { 3534 break; 3535 } 3536 /* fall through */ 3537 case TCG_COND_LE: 3538 case TCG_COND_LEU: 3539 need_inv = true; 3540 break; 3541 case TCG_COND_LT: 3542 case TCG_COND_LTU: 3543 need_swap = true; 3544 break; 3545 case TCG_COND_GE: 3546 case TCG_COND_GEU: 3547 need_swap = need_inv = true; 3548 break; 3549 default: 3550 g_assert_not_reached(); 3551 } 3552 3553 if (need_inv) { 3554 cond = tcg_invert_cond(cond); 3555 } 3556 if (need_swap) { 3557 TCGv_vec t1; 3558 t1 = v1, v1 = v2, v2 = t1; 3559 cond = tcg_swap_cond(cond); 3560 } 3561 3562 vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0), 3563 tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond); 3564 3565 if (need_inv) { 3566 tcg_gen_not_vec(vece, v0, v0); 3567 } 3568} 3569 3570static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0, 3571 TCGv_vec v1, TCGv_vec v2) 3572{ 3573 TCGv_vec t1 = tcg_temp_new_vec(type); 3574 TCGv_vec t2 = tcg_temp_new_vec(type); 3575 TCGv_vec c0, c16; 3576 3577 switch (vece) { 3578 case MO_8: 3579 case MO_16: 3580 vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1), 3581 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 3582 vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2), 3583 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 3584 vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0), 3585 tcgv_vec_arg(t1), tcgv_vec_arg(t2)); 3586 vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1), 3587 tcgv_vec_arg(t1), tcgv_vec_arg(t2)); 3588 vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0), 3589 tcgv_vec_arg(v0), tcgv_vec_arg(t1)); 3590 break; 3591 3592 case MO_32: 3593 tcg_debug_assert(!have_isa_2_07); 3594 /* 3595 * Only 5 bits are significant, and VSPLTISB can represent -16..15. 3596 * So using -16 is a quick way to represent 16. 3597 */ 3598 c16 = tcg_constant_vec(type, MO_8, -16); 3599 c0 = tcg_constant_vec(type, MO_8, 0); 3600 3601 vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1), 3602 tcgv_vec_arg(v2), tcgv_vec_arg(c16)); 3603 vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2), 3604 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 3605 vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1), 3606 tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0)); 3607 vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1), 3608 tcgv_vec_arg(t1), tcgv_vec_arg(c16)); 3609 tcg_gen_add_vec(MO_32, v0, t1, t2); 3610 break; 3611 3612 default: 3613 g_assert_not_reached(); 3614 } 3615 tcg_temp_free_vec(t1); 3616 tcg_temp_free_vec(t2); 3617} 3618 3619void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 3620 TCGArg a0, ...) 3621{ 3622 va_list va; 3623 TCGv_vec v0, v1, v2, t0; 3624 TCGArg a2; 3625 3626 va_start(va, a0); 3627 v0 = temp_tcgv_vec(arg_temp(a0)); 3628 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 3629 a2 = va_arg(va, TCGArg); 3630 3631 switch (opc) { 3632 case INDEX_op_shli_vec: 3633 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec); 3634 break; 3635 case INDEX_op_shri_vec: 3636 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec); 3637 break; 3638 case INDEX_op_sari_vec: 3639 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec); 3640 break; 3641 case INDEX_op_rotli_vec: 3642 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec); 3643 break; 3644 case INDEX_op_cmp_vec: 3645 v2 = temp_tcgv_vec(arg_temp(a2)); 3646 expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); 3647 break; 3648 case INDEX_op_mul_vec: 3649 v2 = temp_tcgv_vec(arg_temp(a2)); 3650 expand_vec_mul(type, vece, v0, v1, v2); 3651 break; 3652 case INDEX_op_rotlv_vec: 3653 v2 = temp_tcgv_vec(arg_temp(a2)); 3654 t0 = tcg_temp_new_vec(type); 3655 tcg_gen_neg_vec(vece, t0, v2); 3656 tcg_gen_rotlv_vec(vece, v0, v1, t0); 3657 tcg_temp_free_vec(t0); 3658 break; 3659 default: 3660 g_assert_not_reached(); 3661 } 3662 va_end(va); 3663} 3664 3665static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 3666{ 3667 switch (op) { 3668 case INDEX_op_goto_ptr: 3669 return C_O0_I1(r); 3670 3671 case INDEX_op_ld8u_i32: 3672 case INDEX_op_ld8s_i32: 3673 case INDEX_op_ld16u_i32: 3674 case INDEX_op_ld16s_i32: 3675 case INDEX_op_ld_i32: 3676 case INDEX_op_ctpop_i32: 3677 case INDEX_op_neg_i32: 3678 case INDEX_op_not_i32: 3679 case INDEX_op_ext8s_i32: 3680 case INDEX_op_ext16s_i32: 3681 case INDEX_op_bswap16_i32: 3682 case INDEX_op_bswap32_i32: 3683 case INDEX_op_extract_i32: 3684 case INDEX_op_ld8u_i64: 3685 case INDEX_op_ld8s_i64: 3686 case INDEX_op_ld16u_i64: 3687 case INDEX_op_ld16s_i64: 3688 case INDEX_op_ld32u_i64: 3689 case INDEX_op_ld32s_i64: 3690 case INDEX_op_ld_i64: 3691 case INDEX_op_ctpop_i64: 3692 case INDEX_op_neg_i64: 3693 case INDEX_op_not_i64: 3694 case INDEX_op_ext8s_i64: 3695 case INDEX_op_ext16s_i64: 3696 case INDEX_op_ext32s_i64: 3697 case INDEX_op_ext_i32_i64: 3698 case INDEX_op_extu_i32_i64: 3699 case INDEX_op_bswap16_i64: 3700 case INDEX_op_bswap32_i64: 3701 case INDEX_op_bswap64_i64: 3702 case INDEX_op_extract_i64: 3703 return C_O1_I1(r, r); 3704 3705 case INDEX_op_st8_i32: 3706 case INDEX_op_st16_i32: 3707 case INDEX_op_st_i32: 3708 case INDEX_op_st8_i64: 3709 case INDEX_op_st16_i64: 3710 case INDEX_op_st32_i64: 3711 case INDEX_op_st_i64: 3712 return C_O0_I2(r, r); 3713 3714 case INDEX_op_add_i32: 3715 case INDEX_op_and_i32: 3716 case INDEX_op_or_i32: 3717 case INDEX_op_xor_i32: 3718 case INDEX_op_andc_i32: 3719 case INDEX_op_orc_i32: 3720 case INDEX_op_eqv_i32: 3721 case INDEX_op_shl_i32: 3722 case INDEX_op_shr_i32: 3723 case INDEX_op_sar_i32: 3724 case INDEX_op_rotl_i32: 3725 case INDEX_op_rotr_i32: 3726 case INDEX_op_setcond_i32: 3727 case INDEX_op_and_i64: 3728 case INDEX_op_andc_i64: 3729 case INDEX_op_shl_i64: 3730 case INDEX_op_shr_i64: 3731 case INDEX_op_sar_i64: 3732 case INDEX_op_rotl_i64: 3733 case INDEX_op_rotr_i64: 3734 case INDEX_op_setcond_i64: 3735 return C_O1_I2(r, r, ri); 3736 3737 case INDEX_op_mul_i32: 3738 case INDEX_op_mul_i64: 3739 return C_O1_I2(r, r, rI); 3740 3741 case INDEX_op_div_i32: 3742 case INDEX_op_divu_i32: 3743 case INDEX_op_rem_i32: 3744 case INDEX_op_remu_i32: 3745 case INDEX_op_nand_i32: 3746 case INDEX_op_nor_i32: 3747 case INDEX_op_muluh_i32: 3748 case INDEX_op_mulsh_i32: 3749 case INDEX_op_orc_i64: 3750 case INDEX_op_eqv_i64: 3751 case INDEX_op_nand_i64: 3752 case INDEX_op_nor_i64: 3753 case INDEX_op_div_i64: 3754 case INDEX_op_divu_i64: 3755 case INDEX_op_rem_i64: 3756 case INDEX_op_remu_i64: 3757 case INDEX_op_mulsh_i64: 3758 case INDEX_op_muluh_i64: 3759 return C_O1_I2(r, r, r); 3760 3761 case INDEX_op_sub_i32: 3762 return C_O1_I2(r, rI, ri); 3763 case INDEX_op_add_i64: 3764 return C_O1_I2(r, r, rT); 3765 case INDEX_op_or_i64: 3766 case INDEX_op_xor_i64: 3767 return C_O1_I2(r, r, rU); 3768 case INDEX_op_sub_i64: 3769 return C_O1_I2(r, rI, rT); 3770 case INDEX_op_clz_i32: 3771 case INDEX_op_ctz_i32: 3772 case INDEX_op_clz_i64: 3773 case INDEX_op_ctz_i64: 3774 return C_O1_I2(r, r, rZW); 3775 3776 case INDEX_op_brcond_i32: 3777 case INDEX_op_brcond_i64: 3778 return C_O0_I2(r, ri); 3779 3780 case INDEX_op_movcond_i32: 3781 case INDEX_op_movcond_i64: 3782 return C_O1_I4(r, r, ri, rZ, rZ); 3783 case INDEX_op_deposit_i32: 3784 case INDEX_op_deposit_i64: 3785 return C_O1_I2(r, 0, rZ); 3786 case INDEX_op_brcond2_i32: 3787 return C_O0_I4(r, r, ri, ri); 3788 case INDEX_op_setcond2_i32: 3789 return C_O1_I4(r, r, r, ri, ri); 3790 case INDEX_op_add2_i64: 3791 case INDEX_op_add2_i32: 3792 return C_O2_I4(r, r, r, r, rI, rZM); 3793 case INDEX_op_sub2_i64: 3794 case INDEX_op_sub2_i32: 3795 return C_O2_I4(r, r, rI, rZM, r, r); 3796 3797 case INDEX_op_qemu_ld_i32: 3798 return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32 3799 ? C_O1_I1(r, L) 3800 : C_O1_I2(r, L, L)); 3801 3802 case INDEX_op_qemu_st_i32: 3803 return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32 3804 ? C_O0_I2(S, S) 3805 : C_O0_I3(S, S, S)); 3806 3807 case INDEX_op_qemu_ld_i64: 3808 return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) 3809 : TARGET_LONG_BITS == 32 ? C_O2_I1(L, L, L) 3810 : C_O2_I2(L, L, L, L)); 3811 3812 case INDEX_op_qemu_st_i64: 3813 return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(S, S) 3814 : TARGET_LONG_BITS == 32 ? C_O0_I3(S, S, S) 3815 : C_O0_I4(S, S, S, S)); 3816 3817 case INDEX_op_add_vec: 3818 case INDEX_op_sub_vec: 3819 case INDEX_op_mul_vec: 3820 case INDEX_op_and_vec: 3821 case INDEX_op_or_vec: 3822 case INDEX_op_xor_vec: 3823 case INDEX_op_andc_vec: 3824 case INDEX_op_orc_vec: 3825 case INDEX_op_nor_vec: 3826 case INDEX_op_eqv_vec: 3827 case INDEX_op_nand_vec: 3828 case INDEX_op_cmp_vec: 3829 case INDEX_op_ssadd_vec: 3830 case INDEX_op_sssub_vec: 3831 case INDEX_op_usadd_vec: 3832 case INDEX_op_ussub_vec: 3833 case INDEX_op_smax_vec: 3834 case INDEX_op_smin_vec: 3835 case INDEX_op_umax_vec: 3836 case INDEX_op_umin_vec: 3837 case INDEX_op_shlv_vec: 3838 case INDEX_op_shrv_vec: 3839 case INDEX_op_sarv_vec: 3840 case INDEX_op_rotlv_vec: 3841 case INDEX_op_rotrv_vec: 3842 case INDEX_op_ppc_mrgh_vec: 3843 case INDEX_op_ppc_mrgl_vec: 3844 case INDEX_op_ppc_muleu_vec: 3845 case INDEX_op_ppc_mulou_vec: 3846 case INDEX_op_ppc_pkum_vec: 3847 case INDEX_op_dup2_vec: 3848 return C_O1_I2(v, v, v); 3849 3850 case INDEX_op_not_vec: 3851 case INDEX_op_neg_vec: 3852 return C_O1_I1(v, v); 3853 3854 case INDEX_op_dup_vec: 3855 return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v); 3856 3857 case INDEX_op_ld_vec: 3858 case INDEX_op_dupm_vec: 3859 return C_O1_I1(v, r); 3860 3861 case INDEX_op_st_vec: 3862 return C_O0_I2(v, r); 3863 3864 case INDEX_op_bitsel_vec: 3865 case INDEX_op_ppc_msum_vec: 3866 return C_O1_I3(v, v, v, v); 3867 3868 default: 3869 g_assert_not_reached(); 3870 } 3871} 3872 3873static void tcg_target_init(TCGContext *s) 3874{ 3875 unsigned long hwcap = qemu_getauxval(AT_HWCAP); 3876 unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2); 3877 3878 have_isa = tcg_isa_base; 3879 if (hwcap & PPC_FEATURE_ARCH_2_06) { 3880 have_isa = tcg_isa_2_06; 3881 } 3882#ifdef PPC_FEATURE2_ARCH_2_07 3883 if (hwcap2 & PPC_FEATURE2_ARCH_2_07) { 3884 have_isa = tcg_isa_2_07; 3885 } 3886#endif 3887#ifdef PPC_FEATURE2_ARCH_3_00 3888 if (hwcap2 & PPC_FEATURE2_ARCH_3_00) { 3889 have_isa = tcg_isa_3_00; 3890 } 3891#endif 3892#ifdef PPC_FEATURE2_ARCH_3_10 3893 if (hwcap2 & PPC_FEATURE2_ARCH_3_10) { 3894 have_isa = tcg_isa_3_10; 3895 } 3896#endif 3897 3898#ifdef PPC_FEATURE2_HAS_ISEL 3899 /* Prefer explicit instruction from the kernel. */ 3900 have_isel = (hwcap2 & PPC_FEATURE2_HAS_ISEL) != 0; 3901#else 3902 /* Fall back to knowing Power7 (2.06) has ISEL. */ 3903 have_isel = have_isa_2_06; 3904#endif 3905 3906 if (hwcap & PPC_FEATURE_HAS_ALTIVEC) { 3907 have_altivec = true; 3908 /* We only care about the portion of VSX that overlaps Altivec. */ 3909 if (hwcap & PPC_FEATURE_HAS_VSX) { 3910 have_vsx = true; 3911 } 3912 } 3913 3914 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; 3915 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; 3916 if (have_altivec) { 3917 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 3918 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 3919 } 3920 3921 tcg_target_call_clobber_regs = 0; 3922 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0); 3923 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2); 3924 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3); 3925 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4); 3926 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5); 3927 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6); 3928 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7); 3929 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); 3930 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); 3931 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); 3932 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); 3933 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12); 3934 3935 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0); 3936 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1); 3937 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2); 3938 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3); 3939 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4); 3940 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5); 3941 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6); 3942 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7); 3943 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 3944 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 3945 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 3946 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 3947 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 3948 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 3949 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 3950 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 3951 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16); 3952 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17); 3953 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18); 3954 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19); 3955 3956 s->reserved_regs = 0; 3957 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */ 3958 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */ 3959#if defined(_CALL_SYSV) 3960 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */ 3961#endif 3962#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64 3963 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ 3964#endif 3965 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */ 3966 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1); 3967 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2); 3968 if (USE_REG_TB) { 3969 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); /* tb->tc_ptr */ 3970 } 3971} 3972 3973#ifdef __ELF__ 3974typedef struct { 3975 DebugFrameCIE cie; 3976 DebugFrameFDEHeader fde; 3977 uint8_t fde_def_cfa[4]; 3978 uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3]; 3979} DebugFrame; 3980 3981/* We're expecting a 2 byte uleb128 encoded value. */ 3982QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 3983 3984#if TCG_TARGET_REG_BITS == 64 3985# define ELF_HOST_MACHINE EM_PPC64 3986#else 3987# define ELF_HOST_MACHINE EM_PPC 3988#endif 3989 3990static DebugFrame debug_frame = { 3991 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 3992 .cie.id = -1, 3993 .cie.version = 1, 3994 .cie.code_align = 1, 3995 .cie.data_align = (-SZR & 0x7f), /* sleb128 -SZR */ 3996 .cie.return_column = 65, 3997 3998 /* Total FDE size does not include the "len" member. */ 3999 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), 4000 4001 .fde_def_cfa = { 4002 12, TCG_REG_R1, /* DW_CFA_def_cfa r1, ... */ 4003 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 4004 (FRAME_SIZE >> 7) 4005 }, 4006 .fde_reg_ofs = { 4007 /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */ 4008 0x11, 65, (LR_OFFSET / -SZR) & 0x7f, 4009 } 4010}; 4011 4012void tcg_register_jit(const void *buf, size_t buf_size) 4013{ 4014 uint8_t *p = &debug_frame.fde_reg_ofs[3]; 4015 int i; 4016 4017 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) { 4018 p[0] = 0x80 + tcg_target_callee_save_regs[i]; 4019 p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR; 4020 } 4021 4022 debug_frame.fde.func_start = (uintptr_t)buf; 4023 debug_frame.fde.func_len = buf_size; 4024 4025 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 4026} 4027#endif /* __ELF__ */ 4028#undef VMULEUB 4029#undef VMULEUH 4030#undef VMULEUW 4031#undef VMULOUB 4032#undef VMULOUH 4033#undef VMULOUW 4034#undef VMSUMUHM 4035