1/* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25#include "elf.h" 26#include "../tcg-pool.c.inc" 27#include "../tcg-ldst.c.inc" 28 29/* 30 * Standardize on the _CALL_FOO symbols used by GCC: 31 * Apple XCode does not define _CALL_DARWIN. 32 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV or _CALL_AIX. 33 */ 34#if TCG_TARGET_REG_BITS == 64 35# ifdef _CALL_AIX 36 /* ok */ 37# elif defined(_CALL_ELF) && _CALL_ELF == 1 38# define _CALL_AIX 39# elif defined(_CALL_ELF) && _CALL_ELF == 2 40 /* ok */ 41# else 42# error "Unknown ABI" 43# endif 44#else 45# if defined(_CALL_SYSV) || defined(_CALL_DARWIN) 46 /* ok */ 47# elif defined(__APPLE__) 48# define _CALL_DARWIN 49# elif defined(__ELF__) 50# define _CALL_SYSV 51# else 52# error "Unknown ABI" 53# endif 54#endif 55 56#if TCG_TARGET_REG_BITS == 64 57# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND 58# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL 59#else 60# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL 61# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF 62#endif 63#ifdef _CALL_SYSV 64# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN 65# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF 66#else 67# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL 68# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL 69#endif 70 71/* For some memory operations, we need a scratch that isn't R0. For the AIX 72 calling convention, we can re-use the TOC register since we'll be reloading 73 it at every call. Otherwise R12 will do nicely as neither a call-saved 74 register nor a parameter register. */ 75#ifdef _CALL_AIX 76# define TCG_REG_TMP1 TCG_REG_R2 77#else 78# define TCG_REG_TMP1 TCG_REG_R12 79#endif 80#define TCG_REG_TMP2 TCG_REG_R11 81 82#define TCG_VEC_TMP1 TCG_REG_V0 83#define TCG_VEC_TMP2 TCG_REG_V1 84 85#define TCG_REG_TB TCG_REG_R31 86#define USE_REG_TB (TCG_TARGET_REG_BITS == 64 && !have_isa_3_00) 87 88/* Shorthand for size of a pointer. Avoid promotion to unsigned. */ 89#define SZP ((int)sizeof(void *)) 90 91/* Shorthand for size of a register. */ 92#define SZR (TCG_TARGET_REG_BITS / 8) 93 94#define TCG_CT_CONST_S16 0x100 95#define TCG_CT_CONST_U16 0x200 96#define TCG_CT_CONST_S32 0x400 97#define TCG_CT_CONST_U32 0x800 98#define TCG_CT_CONST_ZERO 0x1000 99#define TCG_CT_CONST_MONE 0x2000 100#define TCG_CT_CONST_WSZ 0x4000 101#define TCG_CT_CONST_CMP 0x8000 102 103#define ALL_GENERAL_REGS 0xffffffffu 104#define ALL_VECTOR_REGS 0xffffffff00000000ull 105 106#ifndef R_PPC64_PCREL34 107#define R_PPC64_PCREL34 132 108#endif 109 110#define have_isel (cpuinfo & CPUINFO_ISEL) 111 112#define TCG_GUEST_BASE_REG TCG_REG_R30 113 114#ifdef CONFIG_DEBUG_TCG 115static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = { 116 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", 117 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", 118 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", 119 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", 120 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 121 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 122 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 123 "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", 124}; 125#endif 126 127static const int tcg_target_reg_alloc_order[] = { 128 TCG_REG_R14, /* call saved registers */ 129 TCG_REG_R15, 130 TCG_REG_R16, 131 TCG_REG_R17, 132 TCG_REG_R18, 133 TCG_REG_R19, 134 TCG_REG_R20, 135 TCG_REG_R21, 136 TCG_REG_R22, 137 TCG_REG_R23, 138 TCG_REG_R24, 139 TCG_REG_R25, 140 TCG_REG_R26, 141 TCG_REG_R27, 142 TCG_REG_R28, 143 TCG_REG_R29, 144 TCG_REG_R30, 145 TCG_REG_R31, 146 TCG_REG_R12, /* call clobbered, non-arguments */ 147 TCG_REG_R11, 148 TCG_REG_R2, 149 TCG_REG_R13, 150 TCG_REG_R10, /* call clobbered, arguments */ 151 TCG_REG_R9, 152 TCG_REG_R8, 153 TCG_REG_R7, 154 TCG_REG_R6, 155 TCG_REG_R5, 156 TCG_REG_R4, 157 TCG_REG_R3, 158 159 /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */ 160 TCG_REG_V2, /* call clobbered, vectors */ 161 TCG_REG_V3, 162 TCG_REG_V4, 163 TCG_REG_V5, 164 TCG_REG_V6, 165 TCG_REG_V7, 166 TCG_REG_V8, 167 TCG_REG_V9, 168 TCG_REG_V10, 169 TCG_REG_V11, 170 TCG_REG_V12, 171 TCG_REG_V13, 172 TCG_REG_V14, 173 TCG_REG_V15, 174 TCG_REG_V16, 175 TCG_REG_V17, 176 TCG_REG_V18, 177 TCG_REG_V19, 178}; 179 180static const int tcg_target_call_iarg_regs[] = { 181 TCG_REG_R3, 182 TCG_REG_R4, 183 TCG_REG_R5, 184 TCG_REG_R6, 185 TCG_REG_R7, 186 TCG_REG_R8, 187 TCG_REG_R9, 188 TCG_REG_R10 189}; 190 191static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) 192{ 193 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); 194 tcg_debug_assert(slot >= 0 && slot <= 1); 195 return TCG_REG_R3 + slot; 196} 197 198static const int tcg_target_callee_save_regs[] = { 199#ifdef _CALL_DARWIN 200 TCG_REG_R11, 201#endif 202 TCG_REG_R14, 203 TCG_REG_R15, 204 TCG_REG_R16, 205 TCG_REG_R17, 206 TCG_REG_R18, 207 TCG_REG_R19, 208 TCG_REG_R20, 209 TCG_REG_R21, 210 TCG_REG_R22, 211 TCG_REG_R23, 212 TCG_REG_R24, 213 TCG_REG_R25, 214 TCG_REG_R26, 215 TCG_REG_R27, /* currently used for the global env */ 216 TCG_REG_R28, 217 TCG_REG_R29, 218 TCG_REG_R30, 219 TCG_REG_R31 220}; 221 222/* For PPC, we use TB+4 instead of TB as the base. */ 223static inline ptrdiff_t ppc_tbrel_diff(TCGContext *s, const void *target) 224{ 225 return tcg_tbrel_diff(s, target) - 4; 226} 227 228static inline bool in_range_b(tcg_target_long target) 229{ 230 return target == sextract64(target, 0, 26); 231} 232 233static uint32_t reloc_pc24_val(const tcg_insn_unit *pc, 234 const tcg_insn_unit *target) 235{ 236 ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); 237 tcg_debug_assert(in_range_b(disp)); 238 return disp & 0x3fffffc; 239} 240 241static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 242{ 243 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 244 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); 245 246 if (in_range_b(disp)) { 247 *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc); 248 return true; 249 } 250 return false; 251} 252 253static uint16_t reloc_pc14_val(const tcg_insn_unit *pc, 254 const tcg_insn_unit *target) 255{ 256 ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); 257 tcg_debug_assert(disp == (int16_t) disp); 258 return disp & 0xfffc; 259} 260 261static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 262{ 263 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 264 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); 265 266 if (disp == (int16_t) disp) { 267 *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc); 268 return true; 269 } 270 return false; 271} 272 273static bool reloc_pc34(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 274{ 275 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 276 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); 277 278 if (disp == sextract64(disp, 0, 34)) { 279 src_rw[0] = (src_rw[0] & ~0x3ffff) | ((disp >> 16) & 0x3ffff); 280 src_rw[1] = (src_rw[1] & ~0xffff) | (disp & 0xffff); 281 return true; 282 } 283 return false; 284} 285 286static bool mask_operand(uint32_t c, int *mb, int *me); 287static bool mask64_operand(uint64_t c, int *mb, int *me); 288 289/* test if a constant matches the constraint */ 290static bool tcg_target_const_match(int64_t sval, int ct, 291 TCGType type, TCGCond cond, int vece) 292{ 293 uint64_t uval = sval; 294 int mb, me; 295 296 if (ct & TCG_CT_CONST) { 297 return 1; 298 } 299 300 if (type == TCG_TYPE_I32) { 301 uval = (uint32_t)sval; 302 sval = (int32_t)sval; 303 } 304 305 if (ct & TCG_CT_CONST_CMP) { 306 switch (cond) { 307 case TCG_COND_EQ: 308 case TCG_COND_NE: 309 ct |= TCG_CT_CONST_S16 | TCG_CT_CONST_U16; 310 break; 311 case TCG_COND_LT: 312 case TCG_COND_GE: 313 case TCG_COND_LE: 314 case TCG_COND_GT: 315 ct |= TCG_CT_CONST_S16; 316 break; 317 case TCG_COND_LTU: 318 case TCG_COND_GEU: 319 case TCG_COND_LEU: 320 case TCG_COND_GTU: 321 ct |= TCG_CT_CONST_U16; 322 break; 323 case TCG_COND_TSTEQ: 324 case TCG_COND_TSTNE: 325 if ((uval & ~0xffff) == 0 || (uval & ~0xffff0000ull) == 0) { 326 return 1; 327 } 328 if (uval == (uint32_t)uval && mask_operand(uval, &mb, &me)) { 329 return 1; 330 } 331 if (TCG_TARGET_REG_BITS == 64 && 332 mask64_operand(uval << clz64(uval), &mb, &me)) { 333 return 1; 334 } 335 return 0; 336 default: 337 g_assert_not_reached(); 338 } 339 } 340 341 if ((ct & TCG_CT_CONST_S16) && sval == (int16_t)sval) { 342 return 1; 343 } 344 if ((ct & TCG_CT_CONST_U16) && uval == (uint16_t)uval) { 345 return 1; 346 } 347 if ((ct & TCG_CT_CONST_S32) && sval == (int32_t)sval) { 348 return 1; 349 } 350 if ((ct & TCG_CT_CONST_U32) && uval == (uint32_t)uval) { 351 return 1; 352 } 353 if ((ct & TCG_CT_CONST_ZERO) && sval == 0) { 354 return 1; 355 } 356 if ((ct & TCG_CT_CONST_MONE) && sval == -1) { 357 return 1; 358 } 359 if ((ct & TCG_CT_CONST_WSZ) && sval == (type == TCG_TYPE_I32 ? 32 : 64)) { 360 return 1; 361 } 362 return 0; 363} 364 365#define OPCD(opc) ((opc)<<26) 366#define XO19(opc) (OPCD(19)|((opc)<<1)) 367#define MD30(opc) (OPCD(30)|((opc)<<2)) 368#define MDS30(opc) (OPCD(30)|((opc)<<1)) 369#define XO31(opc) (OPCD(31)|((opc)<<1)) 370#define XO58(opc) (OPCD(58)|(opc)) 371#define XO62(opc) (OPCD(62)|(opc)) 372#define VX4(opc) (OPCD(4)|(opc)) 373 374#define B OPCD( 18) 375#define BC OPCD( 16) 376 377#define LBZ OPCD( 34) 378#define LHZ OPCD( 40) 379#define LHA OPCD( 42) 380#define LWZ OPCD( 32) 381#define LWZUX XO31( 55) 382#define LD XO58( 0) 383#define LDX XO31( 21) 384#define LDU XO58( 1) 385#define LDUX XO31( 53) 386#define LWA XO58( 2) 387#define LWAX XO31(341) 388#define LQ OPCD( 56) 389 390#define STB OPCD( 38) 391#define STH OPCD( 44) 392#define STW OPCD( 36) 393#define STD XO62( 0) 394#define STDU XO62( 1) 395#define STDX XO31(149) 396#define STQ XO62( 2) 397 398#define PLWA OPCD( 41) 399#define PLD OPCD( 57) 400#define PLXSD OPCD( 42) 401#define PLXV OPCD(25 * 2 + 1) /* force tx=1 */ 402 403#define PSTD OPCD( 61) 404#define PSTXSD OPCD( 46) 405#define PSTXV OPCD(27 * 2 + 1) /* force sx=1 */ 406 407#define ADDIC OPCD( 12) 408#define ADDI OPCD( 14) 409#define ADDIS OPCD( 15) 410#define ORI OPCD( 24) 411#define ORIS OPCD( 25) 412#define XORI OPCD( 26) 413#define XORIS OPCD( 27) 414#define ANDI OPCD( 28) 415#define ANDIS OPCD( 29) 416#define MULLI OPCD( 7) 417#define CMPLI OPCD( 10) 418#define CMPI OPCD( 11) 419#define SUBFIC OPCD( 8) 420 421#define LWZU OPCD( 33) 422#define STWU OPCD( 37) 423 424#define RLWIMI OPCD( 20) 425#define RLWINM OPCD( 21) 426#define RLWNM OPCD( 23) 427 428#define RLDICL MD30( 0) 429#define RLDICR MD30( 1) 430#define RLDIMI MD30( 3) 431#define RLDCL MDS30( 8) 432 433#define BCLR XO19( 16) 434#define BCCTR XO19(528) 435#define CRAND XO19(257) 436#define CRANDC XO19(129) 437#define CRNAND XO19(225) 438#define CROR XO19(449) 439#define CRNOR XO19( 33) 440#define ADDPCIS XO19( 2) 441 442#define EXTSB XO31(954) 443#define EXTSH XO31(922) 444#define EXTSW XO31(986) 445#define ADD XO31(266) 446#define ADDE XO31(138) 447#define ADDME XO31(234) 448#define ADDZE XO31(202) 449#define ADDC XO31( 10) 450#define AND XO31( 28) 451#define SUBF XO31( 40) 452#define SUBFC XO31( 8) 453#define SUBFE XO31(136) 454#define SUBFME XO31(232) 455#define SUBFZE XO31(200) 456#define OR XO31(444) 457#define XOR XO31(316) 458#define MULLW XO31(235) 459#define MULHW XO31( 75) 460#define MULHWU XO31( 11) 461#define DIVW XO31(491) 462#define DIVWU XO31(459) 463#define MODSW XO31(779) 464#define MODUW XO31(267) 465#define CMP XO31( 0) 466#define CMPL XO31( 32) 467#define LHBRX XO31(790) 468#define LWBRX XO31(534) 469#define LDBRX XO31(532) 470#define STHBRX XO31(918) 471#define STWBRX XO31(662) 472#define STDBRX XO31(660) 473#define MFSPR XO31(339) 474#define MTSPR XO31(467) 475#define SRAWI XO31(824) 476#define NEG XO31(104) 477#define MFCR XO31( 19) 478#define MFOCRF (MFCR | (1u << 20)) 479#define NOR XO31(124) 480#define CNTLZW XO31( 26) 481#define CNTLZD XO31( 58) 482#define CNTTZW XO31(538) 483#define CNTTZD XO31(570) 484#define CNTPOPW XO31(378) 485#define CNTPOPD XO31(506) 486#define ANDC XO31( 60) 487#define ORC XO31(412) 488#define EQV XO31(284) 489#define NAND XO31(476) 490#define ISEL XO31( 15) 491 492#define MULLD XO31(233) 493#define MULHD XO31( 73) 494#define MULHDU XO31( 9) 495#define DIVD XO31(489) 496#define DIVDU XO31(457) 497#define MODSD XO31(777) 498#define MODUD XO31(265) 499 500#define LBZX XO31( 87) 501#define LHZX XO31(279) 502#define LHAX XO31(343) 503#define LWZX XO31( 23) 504#define STBX XO31(215) 505#define STHX XO31(407) 506#define STWX XO31(151) 507 508#define EIEIO XO31(854) 509#define HWSYNC XO31(598) 510#define LWSYNC (HWSYNC | (1u << 21)) 511 512#define SPR(a, b) ((((a)<<5)|(b))<<11) 513#define LR SPR(8, 0) 514#define CTR SPR(9, 0) 515 516#define SLW XO31( 24) 517#define SRW XO31(536) 518#define SRAW XO31(792) 519 520#define SLD XO31( 27) 521#define SRD XO31(539) 522#define SRAD XO31(794) 523#define SRADI XO31(413<<1) 524 525#define BRH XO31(219) 526#define BRW XO31(155) 527#define BRD XO31(187) 528 529#define TW XO31( 4) 530#define TRAP (TW | TO(31)) 531 532#define SETBC XO31(384) /* v3.10 */ 533#define SETBCR XO31(416) /* v3.10 */ 534#define SETNBC XO31(448) /* v3.10 */ 535#define SETNBCR XO31(480) /* v3.10 */ 536 537#define NOP ORI /* ori 0,0,0 */ 538 539#define LVX XO31(103) 540#define LVEBX XO31(7) 541#define LVEHX XO31(39) 542#define LVEWX XO31(71) 543#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */ 544#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */ 545#define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */ 546#define LXV (OPCD(61) | 8 | 1) /* v3.00, force tx=1 */ 547#define LXSD (OPCD(57) | 2) /* v3.00 */ 548#define LXVWSX (XO31(364) | 1) /* v3.00, force tx=1 */ 549 550#define STVX XO31(231) 551#define STVEWX XO31(199) 552#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */ 553#define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */ 554#define STXV (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */ 555#define STXSD (OPCD(61) | 2) /* v3.00 */ 556 557#define VADDSBS VX4(768) 558#define VADDUBS VX4(512) 559#define VADDUBM VX4(0) 560#define VADDSHS VX4(832) 561#define VADDUHS VX4(576) 562#define VADDUHM VX4(64) 563#define VADDSWS VX4(896) 564#define VADDUWS VX4(640) 565#define VADDUWM VX4(128) 566#define VADDUDM VX4(192) /* v2.07 */ 567 568#define VSUBSBS VX4(1792) 569#define VSUBUBS VX4(1536) 570#define VSUBUBM VX4(1024) 571#define VSUBSHS VX4(1856) 572#define VSUBUHS VX4(1600) 573#define VSUBUHM VX4(1088) 574#define VSUBSWS VX4(1920) 575#define VSUBUWS VX4(1664) 576#define VSUBUWM VX4(1152) 577#define VSUBUDM VX4(1216) /* v2.07 */ 578 579#define VNEGW (VX4(1538) | (6 << 16)) /* v3.00 */ 580#define VNEGD (VX4(1538) | (7 << 16)) /* v3.00 */ 581 582#define VMAXSB VX4(258) 583#define VMAXSH VX4(322) 584#define VMAXSW VX4(386) 585#define VMAXSD VX4(450) /* v2.07 */ 586#define VMAXUB VX4(2) 587#define VMAXUH VX4(66) 588#define VMAXUW VX4(130) 589#define VMAXUD VX4(194) /* v2.07 */ 590#define VMINSB VX4(770) 591#define VMINSH VX4(834) 592#define VMINSW VX4(898) 593#define VMINSD VX4(962) /* v2.07 */ 594#define VMINUB VX4(514) 595#define VMINUH VX4(578) 596#define VMINUW VX4(642) 597#define VMINUD VX4(706) /* v2.07 */ 598 599#define VCMPEQUB VX4(6) 600#define VCMPEQUH VX4(70) 601#define VCMPEQUW VX4(134) 602#define VCMPEQUD VX4(199) /* v2.07 */ 603#define VCMPGTSB VX4(774) 604#define VCMPGTSH VX4(838) 605#define VCMPGTSW VX4(902) 606#define VCMPGTSD VX4(967) /* v2.07 */ 607#define VCMPGTUB VX4(518) 608#define VCMPGTUH VX4(582) 609#define VCMPGTUW VX4(646) 610#define VCMPGTUD VX4(711) /* v2.07 */ 611#define VCMPNEB VX4(7) /* v3.00 */ 612#define VCMPNEH VX4(71) /* v3.00 */ 613#define VCMPNEW VX4(135) /* v3.00 */ 614 615#define VSLB VX4(260) 616#define VSLH VX4(324) 617#define VSLW VX4(388) 618#define VSLD VX4(1476) /* v2.07 */ 619#define VSRB VX4(516) 620#define VSRH VX4(580) 621#define VSRW VX4(644) 622#define VSRD VX4(1732) /* v2.07 */ 623#define VSRAB VX4(772) 624#define VSRAH VX4(836) 625#define VSRAW VX4(900) 626#define VSRAD VX4(964) /* v2.07 */ 627#define VRLB VX4(4) 628#define VRLH VX4(68) 629#define VRLW VX4(132) 630#define VRLD VX4(196) /* v2.07 */ 631 632#define VMULEUB VX4(520) 633#define VMULEUH VX4(584) 634#define VMULEUW VX4(648) /* v2.07 */ 635#define VMULOUB VX4(8) 636#define VMULOUH VX4(72) 637#define VMULOUW VX4(136) /* v2.07 */ 638#define VMULUWM VX4(137) /* v2.07 */ 639#define VMULLD VX4(457) /* v3.10 */ 640#define VMSUMUHM VX4(38) 641 642#define VMRGHB VX4(12) 643#define VMRGHH VX4(76) 644#define VMRGHW VX4(140) 645#define VMRGLB VX4(268) 646#define VMRGLH VX4(332) 647#define VMRGLW VX4(396) 648 649#define VPKUHUM VX4(14) 650#define VPKUWUM VX4(78) 651 652#define VAND VX4(1028) 653#define VANDC VX4(1092) 654#define VNOR VX4(1284) 655#define VOR VX4(1156) 656#define VXOR VX4(1220) 657#define VEQV VX4(1668) /* v2.07 */ 658#define VNAND VX4(1412) /* v2.07 */ 659#define VORC VX4(1348) /* v2.07 */ 660 661#define VSPLTB VX4(524) 662#define VSPLTH VX4(588) 663#define VSPLTW VX4(652) 664#define VSPLTISB VX4(780) 665#define VSPLTISH VX4(844) 666#define VSPLTISW VX4(908) 667 668#define VSLDOI VX4(44) 669 670#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */ 671#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */ 672#define XXSPLTIB (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */ 673 674#define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */ 675#define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */ 676#define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */ 677#define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */ 678#define MTVSRDD (XO31(435) | 1) /* v3.00, force tx=1 */ 679#define MTVSRWS (XO31(403) | 1) /* v3.00, force tx=1 */ 680 681#define RT(r) ((r)<<21) 682#define RS(r) ((r)<<21) 683#define RA(r) ((r)<<16) 684#define RB(r) ((r)<<11) 685#define TO(t) ((t)<<21) 686#define SH(s) ((s)<<11) 687#define MB(b) ((b)<<6) 688#define ME(e) ((e)<<1) 689#define BO(o) ((o)<<21) 690#define MB64(b) ((b)<<5) 691#define FXM(b) (1 << (19 - (b))) 692 693#define VRT(r) (((r) & 31) << 21) 694#define VRA(r) (((r) & 31) << 16) 695#define VRB(r) (((r) & 31) << 11) 696#define VRC(r) (((r) & 31) << 6) 697 698#define LK 1 699 700#define TAB(t, a, b) (RT(t) | RA(a) | RB(b)) 701#define SAB(s, a, b) (RS(s) | RA(a) | RB(b)) 702#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff)) 703#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff)) 704 705#define BF(n) ((n)<<23) 706#define BI(n, c) (((c)+((n)*4))<<16) 707#define BT(n, c) (((c)+((n)*4))<<21) 708#define BA(n, c) (((c)+((n)*4))<<16) 709#define BB(n, c) (((c)+((n)*4))<<11) 710#define BC_(n, c) (((c)+((n)*4))<<6) 711 712#define BO_COND_TRUE BO(12) 713#define BO_COND_FALSE BO( 4) 714#define BO_ALWAYS BO(20) 715 716enum { 717 CR_LT, 718 CR_GT, 719 CR_EQ, 720 CR_SO 721}; 722 723static const uint32_t tcg_to_bc[16] = { 724 [TCG_COND_EQ] = BC | BI(0, CR_EQ) | BO_COND_TRUE, 725 [TCG_COND_NE] = BC | BI(0, CR_EQ) | BO_COND_FALSE, 726 [TCG_COND_TSTEQ] = BC | BI(0, CR_EQ) | BO_COND_TRUE, 727 [TCG_COND_TSTNE] = BC | BI(0, CR_EQ) | BO_COND_FALSE, 728 [TCG_COND_LT] = BC | BI(0, CR_LT) | BO_COND_TRUE, 729 [TCG_COND_GE] = BC | BI(0, CR_LT) | BO_COND_FALSE, 730 [TCG_COND_LE] = BC | BI(0, CR_GT) | BO_COND_FALSE, 731 [TCG_COND_GT] = BC | BI(0, CR_GT) | BO_COND_TRUE, 732 [TCG_COND_LTU] = BC | BI(0, CR_LT) | BO_COND_TRUE, 733 [TCG_COND_GEU] = BC | BI(0, CR_LT) | BO_COND_FALSE, 734 [TCG_COND_LEU] = BC | BI(0, CR_GT) | BO_COND_FALSE, 735 [TCG_COND_GTU] = BC | BI(0, CR_GT) | BO_COND_TRUE, 736}; 737 738/* The low bit here is set if the RA and RB fields must be inverted. */ 739static const uint32_t tcg_to_isel[16] = { 740 [TCG_COND_EQ] = ISEL | BC_(0, CR_EQ), 741 [TCG_COND_NE] = ISEL | BC_(0, CR_EQ) | 1, 742 [TCG_COND_TSTEQ] = ISEL | BC_(0, CR_EQ), 743 [TCG_COND_TSTNE] = ISEL | BC_(0, CR_EQ) | 1, 744 [TCG_COND_LT] = ISEL | BC_(0, CR_LT), 745 [TCG_COND_GE] = ISEL | BC_(0, CR_LT) | 1, 746 [TCG_COND_LE] = ISEL | BC_(0, CR_GT) | 1, 747 [TCG_COND_GT] = ISEL | BC_(0, CR_GT), 748 [TCG_COND_LTU] = ISEL | BC_(0, CR_LT), 749 [TCG_COND_GEU] = ISEL | BC_(0, CR_LT) | 1, 750 [TCG_COND_LEU] = ISEL | BC_(0, CR_GT) | 1, 751 [TCG_COND_GTU] = ISEL | BC_(0, CR_GT), 752}; 753 754static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 755 intptr_t value, intptr_t addend) 756{ 757 const tcg_insn_unit *target; 758 int16_t lo; 759 int32_t hi; 760 761 value += addend; 762 target = (const tcg_insn_unit *)value; 763 764 switch (type) { 765 case R_PPC_REL14: 766 return reloc_pc14(code_ptr, target); 767 case R_PPC_REL24: 768 return reloc_pc24(code_ptr, target); 769 case R_PPC64_PCREL34: 770 return reloc_pc34(code_ptr, target); 771 case R_PPC_ADDR16: 772 /* 773 * We are (slightly) abusing this relocation type. In particular, 774 * assert that the low 2 bits are zero, and do not modify them. 775 * That way we can use this with LD et al that have opcode bits 776 * in the low 2 bits of the insn. 777 */ 778 if ((value & 3) || value != (int16_t)value) { 779 return false; 780 } 781 *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc); 782 break; 783 case R_PPC_ADDR32: 784 /* 785 * We are abusing this relocation type. Again, this points to 786 * a pair of insns, lis + load. This is an absolute address 787 * relocation for PPC32 so the lis cannot be removed. 788 */ 789 lo = value; 790 hi = value - lo; 791 if (hi + lo != value) { 792 return false; 793 } 794 code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16); 795 code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo); 796 break; 797 default: 798 g_assert_not_reached(); 799 } 800 return true; 801} 802 803/* Ensure that the prefixed instruction does not cross a 64-byte boundary. */ 804static bool tcg_out_need_prefix_align(TCGContext *s) 805{ 806 return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c; 807} 808 809static void tcg_out_prefix_align(TCGContext *s) 810{ 811 if (tcg_out_need_prefix_align(s)) { 812 tcg_out32(s, NOP); 813 } 814} 815 816static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target) 817{ 818 return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0); 819} 820 821/* Output Type 00 Prefix - 8-Byte Load/Store Form (8LS:D) */ 822static void tcg_out_8ls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt, 823 unsigned ra, tcg_target_long imm, bool r) 824{ 825 tcg_insn_unit p, i; 826 827 p = OPCD(1) | (r << 20) | ((imm >> 16) & 0x3ffff); 828 i = opc | TAI(rt, ra, imm); 829 830 tcg_out_prefix_align(s); 831 tcg_out32(s, p); 832 tcg_out32(s, i); 833} 834 835/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */ 836static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt, 837 unsigned ra, tcg_target_long imm, bool r) 838{ 839 tcg_insn_unit p, i; 840 841 p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff); 842 i = opc | TAI(rt, ra, imm); 843 844 tcg_out_prefix_align(s); 845 tcg_out32(s, p); 846 tcg_out32(s, i); 847} 848 849static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, 850 TCGReg base, tcg_target_long offset); 851 852static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 853{ 854 if (ret == arg) { 855 return true; 856 } 857 switch (type) { 858 case TCG_TYPE_I64: 859 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 860 /* fallthru */ 861 case TCG_TYPE_I32: 862 if (ret < TCG_REG_V0) { 863 if (arg < TCG_REG_V0) { 864 tcg_out32(s, OR | SAB(arg, ret, arg)); 865 break; 866 } else if (have_isa_2_07) { 867 tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD) 868 | VRT(arg) | RA(ret)); 869 break; 870 } else { 871 /* Altivec does not support vector->integer moves. */ 872 return false; 873 } 874 } else if (arg < TCG_REG_V0) { 875 if (have_isa_2_07) { 876 tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD) 877 | VRT(ret) | RA(arg)); 878 break; 879 } else { 880 /* Altivec does not support integer->vector moves. */ 881 return false; 882 } 883 } 884 /* fallthru */ 885 case TCG_TYPE_V64: 886 case TCG_TYPE_V128: 887 tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0); 888 tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg)); 889 break; 890 default: 891 g_assert_not_reached(); 892 } 893 return true; 894} 895 896static void tcg_out_rld_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs, 897 int sh, int mb, bool rc) 898{ 899 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 900 sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1); 901 mb = MB64((mb >> 5) | ((mb << 1) & 0x3f)); 902 tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb | rc); 903} 904 905static void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs, 906 int sh, int mb) 907{ 908 tcg_out_rld_rc(s, op, ra, rs, sh, mb, false); 909} 910 911static void tcg_out_rlw_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs, 912 int sh, int mb, int me, bool rc) 913{ 914 tcg_debug_assert((mb & 0x1f) == mb); 915 tcg_debug_assert((me & 0x1f) == me); 916 tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh & 0x1f) | MB(mb) | ME(me) | rc); 917} 918 919static void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs, 920 int sh, int mb, int me) 921{ 922 tcg_out_rlw_rc(s, op, ra, rs, sh, mb, me, false); 923} 924 925static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) 926{ 927 tcg_out32(s, EXTSB | RA(dst) | RS(src)); 928} 929 930static void tcg_out_ext8u(TCGContext *s, TCGReg dst, TCGReg src) 931{ 932 tcg_out32(s, ANDI | SAI(src, dst, 0xff)); 933} 934 935static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) 936{ 937 tcg_out32(s, EXTSH | RA(dst) | RS(src)); 938} 939 940static void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src) 941{ 942 tcg_out32(s, ANDI | SAI(src, dst, 0xffff)); 943} 944 945static void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src) 946{ 947 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 948 tcg_out32(s, EXTSW | RA(dst) | RS(src)); 949} 950 951static void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src) 952{ 953 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 954 tcg_out_rld(s, RLDICL, dst, src, 0, 32); 955} 956 957static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dst, TCGReg src) 958{ 959 tcg_out_ext32s(s, dst, src); 960} 961 962static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dst, TCGReg src) 963{ 964 tcg_out_ext32u(s, dst, src); 965} 966 967static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) 968{ 969 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 970 tcg_out_mov(s, TCG_TYPE_I32, rd, rn); 971} 972 973static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c) 974{ 975 tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c); 976} 977 978static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c) 979{ 980 tcg_out_rld(s, RLDICR, dst, src, c, 63 - c); 981} 982 983static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c) 984{ 985 /* Limit immediate shift count lest we create an illegal insn. */ 986 tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31)); 987} 988 989static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c) 990{ 991 tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31); 992} 993 994static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c) 995{ 996 tcg_out_rld(s, RLDICL, dst, src, 64 - c, c); 997} 998 999static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c) 1000{ 1001 tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2)); 1002} 1003 1004static void tcg_out_addpcis(TCGContext *s, TCGReg dst, intptr_t imm) 1005{ 1006 uint32_t d0, d1, d2; 1007 1008 tcg_debug_assert((imm & 0xffff) == 0); 1009 tcg_debug_assert(imm == (int32_t)imm); 1010 1011 d2 = extract32(imm, 16, 1); 1012 d1 = extract32(imm, 17, 5); 1013 d0 = extract32(imm, 22, 10); 1014 tcg_out32(s, ADDPCIS | RT(dst) | (d1 << 16) | (d0 << 6) | d2); 1015} 1016 1017static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags) 1018{ 1019 TCGReg tmp = dst == src ? TCG_REG_R0 : dst; 1020 1021 if (have_isa_3_10) { 1022 tcg_out32(s, BRH | RA(dst) | RS(src)); 1023 if (flags & TCG_BSWAP_OS) { 1024 tcg_out_ext16s(s, TCG_TYPE_REG, dst, dst); 1025 } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 1026 tcg_out_ext16u(s, dst, dst); 1027 } 1028 return; 1029 } 1030 1031 /* 1032 * In the following, 1033 * dep(a, b, m) -> (a & ~m) | (b & m) 1034 * 1035 * Begin with: src = xxxxabcd 1036 */ 1037 /* tmp = rol32(src, 24) & 0x000000ff = 0000000c */ 1038 tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31); 1039 /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00) = 000000dc */ 1040 tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23); 1041 1042 if (flags & TCG_BSWAP_OS) { 1043 tcg_out_ext16s(s, TCG_TYPE_REG, dst, tmp); 1044 } else { 1045 tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); 1046 } 1047} 1048 1049static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags) 1050{ 1051 TCGReg tmp = dst == src ? TCG_REG_R0 : dst; 1052 1053 if (have_isa_3_10) { 1054 tcg_out32(s, BRW | RA(dst) | RS(src)); 1055 if (flags & TCG_BSWAP_OS) { 1056 tcg_out_ext32s(s, dst, dst); 1057 } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 1058 tcg_out_ext32u(s, dst, dst); 1059 } 1060 return; 1061 } 1062 1063 /* 1064 * Stolen from gcc's builtin_bswap32. 1065 * In the following, 1066 * dep(a, b, m) -> (a & ~m) | (b & m) 1067 * 1068 * Begin with: src = xxxxabcd 1069 */ 1070 /* tmp = rol32(src, 8) & 0xffffffff = 0000bcda */ 1071 tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31); 1072 /* tmp = dep(tmp, rol32(src, 24), 0xff000000) = 0000dcda */ 1073 tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7); 1074 /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00) = 0000dcba */ 1075 tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23); 1076 1077 if (flags & TCG_BSWAP_OS) { 1078 tcg_out_ext32s(s, dst, tmp); 1079 } else { 1080 tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); 1081 } 1082} 1083 1084static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src) 1085{ 1086 TCGReg t0 = dst == src ? TCG_REG_R0 : dst; 1087 TCGReg t1 = dst == src ? dst : TCG_REG_R0; 1088 1089 if (have_isa_3_10) { 1090 tcg_out32(s, BRD | RA(dst) | RS(src)); 1091 return; 1092 } 1093 1094 /* 1095 * In the following, 1096 * dep(a, b, m) -> (a & ~m) | (b & m) 1097 * 1098 * Begin with: src = abcdefgh 1099 */ 1100 /* t0 = rol32(src, 8) & 0xffffffff = 0000fghe */ 1101 tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31); 1102 /* t0 = dep(t0, rol32(src, 24), 0xff000000) = 0000hghe */ 1103 tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7); 1104 /* t0 = dep(t0, rol32(src, 24), 0x0000ff00) = 0000hgfe */ 1105 tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23); 1106 1107 /* t0 = rol64(t0, 32) = hgfe0000 */ 1108 tcg_out_rld(s, RLDICL, t0, t0, 32, 0); 1109 /* t1 = rol64(src, 32) = efghabcd */ 1110 tcg_out_rld(s, RLDICL, t1, src, 32, 0); 1111 1112 /* t0 = dep(t0, rol32(t1, 24), 0xffffffff) = hgfebcda */ 1113 tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31); 1114 /* t0 = dep(t0, rol32(t1, 24), 0xff000000) = hgfedcda */ 1115 tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7); 1116 /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00) = hgfedcba */ 1117 tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23); 1118 1119 tcg_out_mov(s, TCG_TYPE_REG, dst, t0); 1120} 1121 1122/* Emit a move into ret of arg, if it can be done in one insn. */ 1123static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg) 1124{ 1125 if (arg == (int16_t)arg) { 1126 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 1127 return true; 1128 } 1129 if (arg == (int32_t)arg && (arg & 0xffff) == 0) { 1130 tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); 1131 return true; 1132 } 1133 return false; 1134} 1135 1136static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, 1137 tcg_target_long arg, bool in_prologue) 1138{ 1139 intptr_t tb_diff; 1140 tcg_target_long tmp; 1141 int shift; 1142 1143 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1144 1145 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { 1146 arg = (int32_t)arg; 1147 } 1148 1149 /* Load 16-bit immediates with one insn. */ 1150 if (tcg_out_movi_one(s, ret, arg)) { 1151 return; 1152 } 1153 1154 /* Load addresses within the TB with one insn. */ 1155 tb_diff = ppc_tbrel_diff(s, (void *)arg); 1156 if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) { 1157 tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff)); 1158 return; 1159 } 1160 1161 /* 1162 * Load values up to 34 bits, and pc-relative addresses, 1163 * with one prefixed insn. 1164 */ 1165 if (have_isa_3_10) { 1166 if (arg == sextract64(arg, 0, 34)) { 1167 /* pli ret,value = paddi ret,0,value,0 */ 1168 tcg_out_mls_d(s, ADDI, ret, 0, arg, 0); 1169 return; 1170 } 1171 1172 tmp = tcg_pcrel_diff_for_prefix(s, (void *)arg); 1173 if (tmp == sextract64(tmp, 0, 34)) { 1174 /* pla ret,value = paddi ret,0,value,1 */ 1175 tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1); 1176 return; 1177 } 1178 } 1179 1180 /* Load 32-bit immediates with two insns. Note that we've already 1181 eliminated bare ADDIS, so we know both insns are required. */ 1182 if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) { 1183 tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); 1184 tcg_out32(s, ORI | SAI(ret, ret, arg)); 1185 return; 1186 } 1187 if (arg == (uint32_t)arg && !(arg & 0x8000)) { 1188 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 1189 tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); 1190 return; 1191 } 1192 1193 /* Load masked 16-bit value. */ 1194 if (arg > 0 && (arg & 0x8000)) { 1195 tmp = arg | 0x7fff; 1196 if ((tmp & (tmp + 1)) == 0) { 1197 int mb = clz64(tmp + 1) + 1; 1198 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 1199 tcg_out_rld(s, RLDICL, ret, ret, 0, mb); 1200 return; 1201 } 1202 } 1203 1204 /* Load common masks with 2 insns. */ 1205 shift = ctz64(arg); 1206 tmp = arg >> shift; 1207 if (tmp == (int16_t)tmp) { 1208 tcg_out32(s, ADDI | TAI(ret, 0, tmp)); 1209 tcg_out_shli64(s, ret, ret, shift); 1210 return; 1211 } 1212 shift = clz64(arg); 1213 if (tcg_out_movi_one(s, ret, arg << shift)) { 1214 tcg_out_shri64(s, ret, ret, shift); 1215 return; 1216 } 1217 1218 /* Load addresses within 2GB with 2 insns. */ 1219 if (have_isa_3_00) { 1220 intptr_t hi = tcg_pcrel_diff(s, (void *)arg) - 4; 1221 int16_t lo = hi; 1222 1223 hi -= lo; 1224 if (hi == (int32_t)hi) { 1225 tcg_out_addpcis(s, TCG_REG_TMP2, hi); 1226 tcg_out32(s, ADDI | TAI(ret, TCG_REG_TMP2, lo)); 1227 return; 1228 } 1229 } 1230 1231 /* Load addresses within 2GB of TB with 2 (or rarely 3) insns. */ 1232 if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) { 1233 tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff); 1234 return; 1235 } 1236 1237 /* Use the constant pool, if possible. */ 1238 if (!in_prologue && USE_REG_TB) { 1239 new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr, 1240 ppc_tbrel_diff(s, NULL)); 1241 tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0)); 1242 return; 1243 } 1244 if (have_isa_3_10) { 1245 tcg_out_8ls_d(s, PLD, ret, 0, 0, 1); 1246 new_pool_label(s, arg, R_PPC64_PCREL34, s->code_ptr - 2, 0); 1247 return; 1248 } 1249 if (have_isa_3_00) { 1250 tcg_out_addpcis(s, TCG_REG_TMP2, 0); 1251 new_pool_label(s, arg, R_PPC_REL14, s->code_ptr, 0); 1252 tcg_out32(s, LD | TAI(ret, TCG_REG_TMP2, 0)); 1253 return; 1254 } 1255 1256 tmp = arg >> 31 >> 1; 1257 tcg_out_movi(s, TCG_TYPE_I32, ret, tmp); 1258 if (tmp) { 1259 tcg_out_shli64(s, ret, ret, 32); 1260 } 1261 if (arg & 0xffff0000) { 1262 tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); 1263 } 1264 if (arg & 0xffff) { 1265 tcg_out32(s, ORI | SAI(ret, ret, arg)); 1266 } 1267} 1268 1269static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 1270 TCGReg ret, int64_t val) 1271{ 1272 uint32_t load_insn; 1273 int rel, low; 1274 intptr_t add; 1275 1276 switch (vece) { 1277 case MO_8: 1278 low = (int8_t)val; 1279 if (low >= -16 && low < 16) { 1280 tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16)); 1281 return; 1282 } 1283 if (have_isa_3_00) { 1284 tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11)); 1285 return; 1286 } 1287 break; 1288 1289 case MO_16: 1290 low = (int16_t)val; 1291 if (low >= -16 && low < 16) { 1292 tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16)); 1293 return; 1294 } 1295 break; 1296 1297 case MO_32: 1298 low = (int32_t)val; 1299 if (low >= -16 && low < 16) { 1300 tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16)); 1301 return; 1302 } 1303 break; 1304 } 1305 1306 /* 1307 * Otherwise we must load the value from the constant pool. 1308 */ 1309 if (USE_REG_TB) { 1310 rel = R_PPC_ADDR16; 1311 add = ppc_tbrel_diff(s, NULL); 1312 } else if (have_isa_3_10) { 1313 if (type == TCG_TYPE_V64) { 1314 tcg_out_8ls_d(s, PLXSD, ret & 31, 0, 0, 1); 1315 new_pool_label(s, val, R_PPC64_PCREL34, s->code_ptr - 2, 0); 1316 } else { 1317 tcg_out_8ls_d(s, PLXV, ret & 31, 0, 0, 1); 1318 new_pool_l2(s, R_PPC64_PCREL34, s->code_ptr - 2, 0, val, val); 1319 } 1320 return; 1321 } else if (have_isa_3_00) { 1322 tcg_out_addpcis(s, TCG_REG_TMP1, 0); 1323 rel = R_PPC_REL14; 1324 add = 0; 1325 } else { 1326 rel = R_PPC_ADDR32; 1327 add = 0; 1328 } 1329 1330 if (have_vsx) { 1331 load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX; 1332 load_insn |= VRT(ret) | RB(TCG_REG_TMP1); 1333 if (TCG_TARGET_REG_BITS == 64) { 1334 new_pool_label(s, val, rel, s->code_ptr, add); 1335 } else { 1336 new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val); 1337 } 1338 } else { 1339 load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); 1340 if (TCG_TARGET_REG_BITS == 64) { 1341 new_pool_l2(s, rel, s->code_ptr, add, val, val); 1342 } else { 1343 new_pool_l4(s, rel, s->code_ptr, add, 1344 val >> 32, val, val >> 32, val); 1345 } 1346 } 1347 1348 if (USE_REG_TB) { 1349 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0)); 1350 load_insn |= RA(TCG_REG_TB); 1351 } else if (have_isa_3_00) { 1352 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0)); 1353 } else { 1354 tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0)); 1355 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0)); 1356 } 1357 tcg_out32(s, load_insn); 1358} 1359 1360static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, 1361 tcg_target_long arg) 1362{ 1363 switch (type) { 1364 case TCG_TYPE_I32: 1365 case TCG_TYPE_I64: 1366 tcg_debug_assert(ret < TCG_REG_V0); 1367 tcg_out_movi_int(s, type, ret, arg, false); 1368 break; 1369 1370 default: 1371 g_assert_not_reached(); 1372 } 1373} 1374 1375static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) 1376{ 1377 return false; 1378} 1379 1380static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, 1381 tcg_target_long imm) 1382{ 1383 /* This function is only used for passing structs by reference. */ 1384 g_assert_not_reached(); 1385} 1386 1387static bool mask_operand(uint32_t c, int *mb, int *me) 1388{ 1389 uint32_t lsb, test; 1390 1391 /* Accept a bit pattern like: 1392 0....01....1 1393 1....10....0 1394 0..01..10..0 1395 Keep track of the transitions. */ 1396 if (c == 0 || c == -1) { 1397 return false; 1398 } 1399 test = c; 1400 lsb = test & -test; 1401 test += lsb; 1402 if (test & (test - 1)) { 1403 return false; 1404 } 1405 1406 *me = clz32(lsb); 1407 *mb = test ? clz32(test & -test) + 1 : 0; 1408 return true; 1409} 1410 1411static bool mask64_operand(uint64_t c, int *mb, int *me) 1412{ 1413 uint64_t lsb; 1414 1415 if (c == 0) { 1416 return false; 1417 } 1418 1419 lsb = c & -c; 1420 /* Accept 1..10..0. */ 1421 if (c == -lsb) { 1422 *mb = 0; 1423 *me = clz64(lsb); 1424 return true; 1425 } 1426 /* Accept 0..01..1. */ 1427 if (lsb == 1 && (c & (c + 1)) == 0) { 1428 *mb = clz64(c + 1) + 1; 1429 *me = 63; 1430 return true; 1431 } 1432 return false; 1433} 1434 1435static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1436{ 1437 int mb, me; 1438 1439 if (mask_operand(c, &mb, &me)) { 1440 tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); 1441 } else if ((c & 0xffff) == c) { 1442 tcg_out32(s, ANDI | SAI(src, dst, c)); 1443 return; 1444 } else if ((c & 0xffff0000) == c) { 1445 tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); 1446 return; 1447 } else { 1448 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c); 1449 tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); 1450 } 1451} 1452 1453static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c) 1454{ 1455 int mb, me; 1456 1457 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1458 if (mask64_operand(c, &mb, &me)) { 1459 if (mb == 0) { 1460 tcg_out_rld(s, RLDICR, dst, src, 0, me); 1461 } else { 1462 tcg_out_rld(s, RLDICL, dst, src, 0, mb); 1463 } 1464 } else if ((c & 0xffff) == c) { 1465 tcg_out32(s, ANDI | SAI(src, dst, c)); 1466 return; 1467 } else if ((c & 0xffff0000) == c) { 1468 tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); 1469 return; 1470 } else { 1471 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c); 1472 tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); 1473 } 1474} 1475 1476static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c, 1477 int op_lo, int op_hi) 1478{ 1479 if (c >> 16) { 1480 tcg_out32(s, op_hi | SAI(src, dst, c >> 16)); 1481 src = dst; 1482 } 1483 if (c & 0xffff) { 1484 tcg_out32(s, op_lo | SAI(src, dst, c)); 1485 src = dst; 1486 } 1487} 1488 1489static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1490{ 1491 tcg_out_zori32(s, dst, src, c, ORI, ORIS); 1492} 1493 1494static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1495{ 1496 tcg_out_zori32(s, dst, src, c, XORI, XORIS); 1497} 1498 1499static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target) 1500{ 1501 ptrdiff_t disp = tcg_pcrel_diff(s, target); 1502 if (in_range_b(disp)) { 1503 tcg_out32(s, B | (disp & 0x3fffffc) | mask); 1504 } else { 1505 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target); 1506 tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR); 1507 tcg_out32(s, BCCTR | BO_ALWAYS | mask); 1508 } 1509} 1510 1511static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, 1512 TCGReg base, tcg_target_long offset) 1513{ 1514 tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; 1515 bool is_int_store = false; 1516 TCGReg rs = TCG_REG_TMP1; 1517 1518 switch (opi) { 1519 case LD: case LWA: 1520 align = 3; 1521 /* FALLTHRU */ 1522 default: 1523 if (rt > TCG_REG_R0 && rt < TCG_REG_V0) { 1524 rs = rt; 1525 break; 1526 } 1527 break; 1528 case LXSD: 1529 case STXSD: 1530 align = 3; 1531 break; 1532 case LXV: 1533 case STXV: 1534 align = 15; 1535 break; 1536 case STD: 1537 align = 3; 1538 /* FALLTHRU */ 1539 case STB: case STH: case STW: 1540 is_int_store = true; 1541 break; 1542 } 1543 1544 /* For unaligned or large offsets, use the prefixed form. */ 1545 if (have_isa_3_10 1546 && (offset != (int16_t)offset || (offset & align)) 1547 && offset == sextract64(offset, 0, 34)) { 1548 /* 1549 * Note that the MLS:D insns retain their un-prefixed opcode, 1550 * while the 8LS:D insns use a different opcode space. 1551 */ 1552 switch (opi) { 1553 case LBZ: 1554 case LHZ: 1555 case LHA: 1556 case LWZ: 1557 case STB: 1558 case STH: 1559 case STW: 1560 case ADDI: 1561 tcg_out_mls_d(s, opi, rt, base, offset, 0); 1562 return; 1563 case LWA: 1564 tcg_out_8ls_d(s, PLWA, rt, base, offset, 0); 1565 return; 1566 case LD: 1567 tcg_out_8ls_d(s, PLD, rt, base, offset, 0); 1568 return; 1569 case STD: 1570 tcg_out_8ls_d(s, PSTD, rt, base, offset, 0); 1571 return; 1572 case LXSD: 1573 tcg_out_8ls_d(s, PLXSD, rt & 31, base, offset, 0); 1574 return; 1575 case STXSD: 1576 tcg_out_8ls_d(s, PSTXSD, rt & 31, base, offset, 0); 1577 return; 1578 case LXV: 1579 tcg_out_8ls_d(s, PLXV, rt & 31, base, offset, 0); 1580 return; 1581 case STXV: 1582 tcg_out_8ls_d(s, PSTXV, rt & 31, base, offset, 0); 1583 return; 1584 } 1585 } 1586 1587 /* For unaligned, or very large offsets, use the indexed form. */ 1588 if (offset & align || offset != (int32_t)offset || opi == 0) { 1589 if (rs == base) { 1590 rs = TCG_REG_R0; 1591 } 1592 tcg_debug_assert(!is_int_store || rs != rt); 1593 tcg_out_movi(s, TCG_TYPE_PTR, rs, orig); 1594 tcg_out32(s, opx | TAB(rt & 31, base, rs)); 1595 return; 1596 } 1597 1598 l0 = (int16_t)offset; 1599 offset = (offset - l0) >> 16; 1600 l1 = (int16_t)offset; 1601 1602 if (l1 < 0 && orig >= 0) { 1603 extra = 0x4000; 1604 l1 = (int16_t)(offset - 0x4000); 1605 } 1606 if (l1) { 1607 tcg_out32(s, ADDIS | TAI(rs, base, l1)); 1608 base = rs; 1609 } 1610 if (extra) { 1611 tcg_out32(s, ADDIS | TAI(rs, base, extra)); 1612 base = rs; 1613 } 1614 if (opi != ADDI || base != rt || l0 != 0) { 1615 tcg_out32(s, opi | TAI(rt & 31, base, l0)); 1616 } 1617} 1618 1619static void tcg_out_vsldoi(TCGContext *s, TCGReg ret, 1620 TCGReg va, TCGReg vb, int shb) 1621{ 1622 tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6)); 1623} 1624 1625static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1626 TCGReg base, intptr_t offset) 1627{ 1628 int shift; 1629 1630 switch (type) { 1631 case TCG_TYPE_I32: 1632 if (ret < TCG_REG_V0) { 1633 tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset); 1634 break; 1635 } 1636 if (have_isa_2_07 && have_vsx) { 1637 tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset); 1638 break; 1639 } 1640 tcg_debug_assert((offset & 3) == 0); 1641 tcg_out_mem_long(s, 0, LVEWX, ret, base, offset); 1642 shift = (offset - 4) & 0xc; 1643 if (shift) { 1644 tcg_out_vsldoi(s, ret, ret, ret, shift); 1645 } 1646 break; 1647 case TCG_TYPE_I64: 1648 if (ret < TCG_REG_V0) { 1649 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1650 tcg_out_mem_long(s, LD, LDX, ret, base, offset); 1651 break; 1652 } 1653 /* fallthru */ 1654 case TCG_TYPE_V64: 1655 tcg_debug_assert(ret >= TCG_REG_V0); 1656 if (have_vsx) { 1657 tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX, 1658 ret, base, offset); 1659 break; 1660 } 1661 tcg_debug_assert((offset & 7) == 0); 1662 tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16); 1663 if (offset & 8) { 1664 tcg_out_vsldoi(s, ret, ret, ret, 8); 1665 } 1666 break; 1667 case TCG_TYPE_V128: 1668 tcg_debug_assert(ret >= TCG_REG_V0); 1669 tcg_debug_assert((offset & 15) == 0); 1670 tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0, 1671 LVX, ret, base, offset); 1672 break; 1673 default: 1674 g_assert_not_reached(); 1675 } 1676} 1677 1678static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, 1679 TCGReg base, intptr_t offset) 1680{ 1681 int shift; 1682 1683 switch (type) { 1684 case TCG_TYPE_I32: 1685 if (arg < TCG_REG_V0) { 1686 tcg_out_mem_long(s, STW, STWX, arg, base, offset); 1687 break; 1688 } 1689 if (have_isa_2_07 && have_vsx) { 1690 tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset); 1691 break; 1692 } 1693 assert((offset & 3) == 0); 1694 tcg_debug_assert((offset & 3) == 0); 1695 shift = (offset - 4) & 0xc; 1696 if (shift) { 1697 tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift); 1698 arg = TCG_VEC_TMP1; 1699 } 1700 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); 1701 break; 1702 case TCG_TYPE_I64: 1703 if (arg < TCG_REG_V0) { 1704 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1705 tcg_out_mem_long(s, STD, STDX, arg, base, offset); 1706 break; 1707 } 1708 /* fallthru */ 1709 case TCG_TYPE_V64: 1710 tcg_debug_assert(arg >= TCG_REG_V0); 1711 if (have_vsx) { 1712 tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0, 1713 STXSDX, arg, base, offset); 1714 break; 1715 } 1716 tcg_debug_assert((offset & 7) == 0); 1717 if (offset & 8) { 1718 tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8); 1719 arg = TCG_VEC_TMP1; 1720 } 1721 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); 1722 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4); 1723 break; 1724 case TCG_TYPE_V128: 1725 tcg_debug_assert(arg >= TCG_REG_V0); 1726 tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0, 1727 STVX, arg, base, offset); 1728 break; 1729 default: 1730 g_assert_not_reached(); 1731 } 1732} 1733 1734static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1735 TCGReg base, intptr_t ofs) 1736{ 1737 return false; 1738} 1739 1740/* 1741 * Set dest non-zero if and only if (arg1 & arg2) is non-zero. 1742 * If RC, then also set RC0. 1743 */ 1744static void tcg_out_test(TCGContext *s, TCGReg dest, TCGReg arg1, TCGArg arg2, 1745 bool const_arg2, TCGType type, bool rc) 1746{ 1747 int mb, me; 1748 1749 if (!const_arg2) { 1750 tcg_out32(s, AND | SAB(arg1, dest, arg2) | rc); 1751 return; 1752 } 1753 1754 if (type == TCG_TYPE_I32) { 1755 arg2 = (uint32_t)arg2; 1756 } 1757 1758 if ((arg2 & ~0xffff) == 0) { 1759 tcg_out32(s, ANDI | SAI(arg1, dest, arg2)); 1760 return; 1761 } 1762 if ((arg2 & ~0xffff0000ull) == 0) { 1763 tcg_out32(s, ANDIS | SAI(arg1, dest, arg2 >> 16)); 1764 return; 1765 } 1766 if (arg2 == (uint32_t)arg2 && mask_operand(arg2, &mb, &me)) { 1767 tcg_out_rlw_rc(s, RLWINM, dest, arg1, 0, mb, me, rc); 1768 return; 1769 } 1770 if (TCG_TARGET_REG_BITS == 64) { 1771 int sh = clz64(arg2); 1772 if (mask64_operand(arg2 << sh, &mb, &me)) { 1773 tcg_out_rld_rc(s, RLDICR, dest, arg1, sh, me, rc); 1774 return; 1775 } 1776 } 1777 /* Constraints should satisfy this. */ 1778 g_assert_not_reached(); 1779} 1780 1781static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, 1782 int const_arg2, int cr, TCGType type) 1783{ 1784 int imm; 1785 uint32_t op; 1786 1787 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1788 1789 /* 1790 * Simplify the comparisons below wrt CMPI. 1791 * All of the tests are 16-bit, so a 32-bit sign extend always works. 1792 */ 1793 if (type == TCG_TYPE_I32) { 1794 arg2 = (int32_t)arg2; 1795 } 1796 1797 switch (cond) { 1798 case TCG_COND_EQ: 1799 case TCG_COND_NE: 1800 if (const_arg2) { 1801 if ((int16_t) arg2 == arg2) { 1802 op = CMPI; 1803 imm = 1; 1804 break; 1805 } else if ((uint16_t) arg2 == arg2) { 1806 op = CMPLI; 1807 imm = 1; 1808 break; 1809 } 1810 } 1811 op = CMPL; 1812 imm = 0; 1813 break; 1814 1815 case TCG_COND_TSTEQ: 1816 case TCG_COND_TSTNE: 1817 tcg_debug_assert(cr == 0); 1818 tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, true); 1819 return; 1820 1821 case TCG_COND_LT: 1822 case TCG_COND_GE: 1823 case TCG_COND_LE: 1824 case TCG_COND_GT: 1825 if (const_arg2) { 1826 if ((int16_t) arg2 == arg2) { 1827 op = CMPI; 1828 imm = 1; 1829 break; 1830 } 1831 } 1832 op = CMP; 1833 imm = 0; 1834 break; 1835 1836 case TCG_COND_LTU: 1837 case TCG_COND_GEU: 1838 case TCG_COND_LEU: 1839 case TCG_COND_GTU: 1840 if (const_arg2) { 1841 if ((uint16_t) arg2 == arg2) { 1842 op = CMPLI; 1843 imm = 1; 1844 break; 1845 } 1846 } 1847 op = CMPL; 1848 imm = 0; 1849 break; 1850 1851 default: 1852 g_assert_not_reached(); 1853 } 1854 op |= BF(cr) | ((type == TCG_TYPE_I64) << 21); 1855 1856 if (imm) { 1857 tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff)); 1858 } else { 1859 if (const_arg2) { 1860 tcg_out_movi(s, type, TCG_REG_R0, arg2); 1861 arg2 = TCG_REG_R0; 1862 } 1863 tcg_out32(s, op | RA(arg1) | RB(arg2)); 1864 } 1865} 1866 1867static void tcg_out_setcond_eq0(TCGContext *s, TCGType type, 1868 TCGReg dst, TCGReg src, bool neg) 1869{ 1870 if (neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) { 1871 /* 1872 * X != 0 implies X + -1 generates a carry. 1873 * RT = (~X + X) + CA 1874 * = -1 + CA 1875 * = CA ? 0 : -1 1876 */ 1877 tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); 1878 tcg_out32(s, SUBFE | TAB(dst, src, src)); 1879 return; 1880 } 1881 1882 if (type == TCG_TYPE_I32) { 1883 tcg_out32(s, CNTLZW | RS(src) | RA(dst)); 1884 tcg_out_shri32(s, dst, dst, 5); 1885 } else { 1886 tcg_out32(s, CNTLZD | RS(src) | RA(dst)); 1887 tcg_out_shri64(s, dst, dst, 6); 1888 } 1889 if (neg) { 1890 tcg_out32(s, NEG | RT(dst) | RA(dst)); 1891 } 1892} 1893 1894static void tcg_out_setcond_ne0(TCGContext *s, TCGType type, 1895 TCGReg dst, TCGReg src, bool neg) 1896{ 1897 if (!neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) { 1898 /* 1899 * X != 0 implies X + -1 generates a carry. Extra addition 1900 * trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. 1901 */ 1902 tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); 1903 tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src)); 1904 return; 1905 } 1906 tcg_out_setcond_eq0(s, type, dst, src, false); 1907 if (neg) { 1908 tcg_out32(s, ADDI | TAI(dst, dst, -1)); 1909 } else { 1910 tcg_out_xori32(s, dst, dst, 1); 1911 } 1912} 1913 1914static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2, 1915 bool const_arg2) 1916{ 1917 if (const_arg2) { 1918 if ((uint32_t)arg2 == arg2) { 1919 tcg_out_xori32(s, TCG_REG_R0, arg1, arg2); 1920 } else { 1921 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2); 1922 tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0)); 1923 } 1924 } else { 1925 tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2)); 1926 } 1927 return TCG_REG_R0; 1928} 1929 1930static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, 1931 TCGArg arg0, TCGArg arg1, TCGArg arg2, 1932 int const_arg2, bool neg) 1933{ 1934 int sh; 1935 bool inv; 1936 1937 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1938 1939 /* Ignore high bits of a potential constant arg2. */ 1940 if (type == TCG_TYPE_I32) { 1941 arg2 = (uint32_t)arg2; 1942 } 1943 1944 /* With SETBC/SETBCR, we can always implement with 2 insns. */ 1945 if (have_isa_3_10) { 1946 tcg_insn_unit bi, opc; 1947 1948 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type); 1949 1950 /* Re-use tcg_to_bc for BI and BO_COND_{TRUE,FALSE}. */ 1951 bi = tcg_to_bc[cond] & (0x1f << 16); 1952 if (tcg_to_bc[cond] & BO(8)) { 1953 opc = neg ? SETNBC : SETBC; 1954 } else { 1955 opc = neg ? SETNBCR : SETBCR; 1956 } 1957 tcg_out32(s, opc | RT(arg0) | bi); 1958 return; 1959 } 1960 1961 /* Handle common and trivial cases before handling anything else. */ 1962 if (arg2 == 0) { 1963 switch (cond) { 1964 case TCG_COND_EQ: 1965 tcg_out_setcond_eq0(s, type, arg0, arg1, neg); 1966 return; 1967 case TCG_COND_NE: 1968 tcg_out_setcond_ne0(s, type, arg0, arg1, neg); 1969 return; 1970 case TCG_COND_GE: 1971 tcg_out32(s, NOR | SAB(arg1, arg0, arg1)); 1972 arg1 = arg0; 1973 /* FALLTHRU */ 1974 case TCG_COND_LT: 1975 /* Extract the sign bit. */ 1976 if (type == TCG_TYPE_I32) { 1977 if (neg) { 1978 tcg_out_sari32(s, arg0, arg1, 31); 1979 } else { 1980 tcg_out_shri32(s, arg0, arg1, 31); 1981 } 1982 } else { 1983 if (neg) { 1984 tcg_out_sari64(s, arg0, arg1, 63); 1985 } else { 1986 tcg_out_shri64(s, arg0, arg1, 63); 1987 } 1988 } 1989 return; 1990 default: 1991 break; 1992 } 1993 } 1994 1995 /* If we have ISEL, we can implement everything with 3 or 4 insns. 1996 All other cases below are also at least 3 insns, so speed up the 1997 code generator by not considering them and always using ISEL. */ 1998 if (have_isel) { 1999 int isel, tab; 2000 2001 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type); 2002 2003 isel = tcg_to_isel[cond]; 2004 2005 tcg_out_movi(s, type, arg0, neg ? -1 : 1); 2006 if (isel & 1) { 2007 /* arg0 = (bc ? 0 : 1) */ 2008 tab = TAB(arg0, 0, arg0); 2009 isel &= ~1; 2010 } else { 2011 /* arg0 = (bc ? 1 : 0) */ 2012 tcg_out_movi(s, type, TCG_REG_R0, 0); 2013 tab = TAB(arg0, arg0, TCG_REG_R0); 2014 } 2015 tcg_out32(s, isel | tab); 2016 return; 2017 } 2018 2019 inv = false; 2020 switch (cond) { 2021 case TCG_COND_EQ: 2022 arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); 2023 tcg_out_setcond_eq0(s, type, arg0, arg1, neg); 2024 break; 2025 2026 case TCG_COND_NE: 2027 arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); 2028 tcg_out_setcond_ne0(s, type, arg0, arg1, neg); 2029 break; 2030 2031 case TCG_COND_TSTEQ: 2032 tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false); 2033 tcg_out_setcond_eq0(s, type, arg0, TCG_REG_R0, neg); 2034 break; 2035 2036 case TCG_COND_TSTNE: 2037 tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false); 2038 tcg_out_setcond_ne0(s, type, arg0, TCG_REG_R0, neg); 2039 break; 2040 2041 case TCG_COND_LE: 2042 case TCG_COND_LEU: 2043 inv = true; 2044 /* fall through */ 2045 case TCG_COND_GT: 2046 case TCG_COND_GTU: 2047 sh = 30; /* CR7 CR_GT */ 2048 goto crtest; 2049 2050 case TCG_COND_GE: 2051 case TCG_COND_GEU: 2052 inv = true; 2053 /* fall through */ 2054 case TCG_COND_LT: 2055 case TCG_COND_LTU: 2056 sh = 29; /* CR7 CR_LT */ 2057 goto crtest; 2058 2059 crtest: 2060 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); 2061 tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); 2062 tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31); 2063 if (neg && inv) { 2064 tcg_out32(s, ADDI | TAI(arg0, arg0, -1)); 2065 } else if (neg) { 2066 tcg_out32(s, NEG | RT(arg0) | RA(arg0)); 2067 } else if (inv) { 2068 tcg_out_xori32(s, arg0, arg0, 1); 2069 } 2070 break; 2071 2072 default: 2073 g_assert_not_reached(); 2074 } 2075} 2076 2077static void tcg_out_bc(TCGContext *s, TCGCond cond, int bd) 2078{ 2079 tcg_out32(s, tcg_to_bc[cond] | bd); 2080} 2081 2082static void tcg_out_bc_lab(TCGContext *s, TCGCond cond, TCGLabel *l) 2083{ 2084 int bd = 0; 2085 if (l->has_value) { 2086 bd = reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr); 2087 } else { 2088 tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0); 2089 } 2090 tcg_out_bc(s, cond, bd); 2091} 2092 2093static void tcg_out_brcond(TCGContext *s, TCGCond cond, 2094 TCGArg arg1, TCGArg arg2, int const_arg2, 2095 TCGLabel *l, TCGType type) 2096{ 2097 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type); 2098 tcg_out_bc_lab(s, cond, l); 2099} 2100 2101static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, 2102 TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1, 2103 TCGArg v2, bool const_c2) 2104{ 2105 /* If for some reason both inputs are zero, don't produce bad code. */ 2106 if (v1 == 0 && v2 == 0) { 2107 tcg_out_movi(s, type, dest, 0); 2108 return; 2109 } 2110 2111 tcg_out_cmp(s, cond, c1, c2, const_c2, 0, type); 2112 2113 if (have_isel) { 2114 int isel = tcg_to_isel[cond]; 2115 2116 /* Swap the V operands if the operation indicates inversion. */ 2117 if (isel & 1) { 2118 int t = v1; 2119 v1 = v2; 2120 v2 = t; 2121 isel &= ~1; 2122 } 2123 /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand. */ 2124 if (v2 == 0) { 2125 tcg_out_movi(s, type, TCG_REG_R0, 0); 2126 } 2127 tcg_out32(s, isel | TAB(dest, v1, v2)); 2128 } else { 2129 if (dest == v2) { 2130 cond = tcg_invert_cond(cond); 2131 v2 = v1; 2132 } else if (dest != v1) { 2133 if (v1 == 0) { 2134 tcg_out_movi(s, type, dest, 0); 2135 } else { 2136 tcg_out_mov(s, type, dest, v1); 2137 } 2138 } 2139 /* Branch forward over one insn */ 2140 tcg_out_bc(s, cond, 8); 2141 if (v2 == 0) { 2142 tcg_out_movi(s, type, dest, 0); 2143 } else { 2144 tcg_out_mov(s, type, dest, v2); 2145 } 2146 } 2147} 2148 2149static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc, 2150 TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2) 2151{ 2152 if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) { 2153 tcg_out32(s, opc | RA(a0) | RS(a1)); 2154 } else { 2155 tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 0, type); 2156 /* Note that the only other valid constant for a2 is 0. */ 2157 if (have_isel) { 2158 tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1)); 2159 tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0)); 2160 } else if (!const_a2 && a0 == a2) { 2161 tcg_out_bc(s, TCG_COND_EQ, 8); 2162 tcg_out32(s, opc | RA(a0) | RS(a1)); 2163 } else { 2164 tcg_out32(s, opc | RA(a0) | RS(a1)); 2165 tcg_out_bc(s, TCG_COND_NE, 8); 2166 if (const_a2) { 2167 tcg_out_movi(s, type, a0, 0); 2168 } else { 2169 tcg_out_mov(s, type, a0, a2); 2170 } 2171 } 2172 } 2173} 2174 2175static void tcg_out_cmp2(TCGContext *s, const TCGArg *args, 2176 const int *const_args) 2177{ 2178 static const struct { uint8_t bit1, bit2; } bits[] = { 2179 [TCG_COND_LT ] = { CR_LT, CR_LT }, 2180 [TCG_COND_LE ] = { CR_LT, CR_GT }, 2181 [TCG_COND_GT ] = { CR_GT, CR_GT }, 2182 [TCG_COND_GE ] = { CR_GT, CR_LT }, 2183 [TCG_COND_LTU] = { CR_LT, CR_LT }, 2184 [TCG_COND_LEU] = { CR_LT, CR_GT }, 2185 [TCG_COND_GTU] = { CR_GT, CR_GT }, 2186 [TCG_COND_GEU] = { CR_GT, CR_LT }, 2187 }; 2188 2189 TCGCond cond = args[4], cond2; 2190 TCGArg al, ah, bl, bh; 2191 int blconst, bhconst; 2192 int op, bit1, bit2; 2193 2194 al = args[0]; 2195 ah = args[1]; 2196 bl = args[2]; 2197 bh = args[3]; 2198 blconst = const_args[2]; 2199 bhconst = const_args[3]; 2200 2201 switch (cond) { 2202 case TCG_COND_EQ: 2203 op = CRAND; 2204 goto do_equality; 2205 case TCG_COND_NE: 2206 op = CRNAND; 2207 do_equality: 2208 tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32); 2209 tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32); 2210 tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); 2211 break; 2212 2213 case TCG_COND_TSTEQ: 2214 case TCG_COND_TSTNE: 2215 if (blconst) { 2216 tcg_out_andi32(s, TCG_REG_R0, al, bl); 2217 } else { 2218 tcg_out32(s, AND | SAB(al, TCG_REG_R0, bl)); 2219 } 2220 if (bhconst) { 2221 tcg_out_andi32(s, TCG_REG_TMP1, ah, bh); 2222 } else { 2223 tcg_out32(s, AND | SAB(ah, TCG_REG_TMP1, bh)); 2224 } 2225 tcg_out32(s, OR | SAB(TCG_REG_R0, TCG_REG_R0, TCG_REG_TMP1) | 1); 2226 break; 2227 2228 case TCG_COND_LT: 2229 case TCG_COND_LE: 2230 case TCG_COND_GT: 2231 case TCG_COND_GE: 2232 case TCG_COND_LTU: 2233 case TCG_COND_LEU: 2234 case TCG_COND_GTU: 2235 case TCG_COND_GEU: 2236 bit1 = bits[cond].bit1; 2237 bit2 = bits[cond].bit2; 2238 op = (bit1 != bit2 ? CRANDC : CRAND); 2239 cond2 = tcg_unsigned_cond(cond); 2240 2241 tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32); 2242 tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32); 2243 tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2)); 2244 tcg_out32(s, CROR | BT(0, CR_EQ) | BA(6, bit1) | BB(0, CR_EQ)); 2245 break; 2246 2247 default: 2248 g_assert_not_reached(); 2249 } 2250} 2251 2252static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, 2253 const int *const_args) 2254{ 2255 tcg_out_cmp2(s, args + 1, const_args + 1); 2256 tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(0)); 2257 tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, CR_EQ + 0*4 + 1, 31, 31); 2258} 2259 2260static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, 2261 const int *const_args) 2262{ 2263 tcg_out_cmp2(s, args, const_args); 2264 tcg_out_bc_lab(s, TCG_COND_EQ, arg_label(args[5])); 2265} 2266 2267static void tcg_out_mb(TCGContext *s, TCGArg a0) 2268{ 2269 uint32_t insn; 2270 2271 if (a0 & TCG_MO_ST_LD) { 2272 insn = HWSYNC; 2273 } else { 2274 insn = LWSYNC; 2275 } 2276 2277 tcg_out32(s, insn); 2278} 2279 2280static void tcg_out_call_int(TCGContext *s, int lk, 2281 const tcg_insn_unit *target) 2282{ 2283#ifdef _CALL_AIX 2284 /* Look through the descriptor. If the branch is in range, and we 2285 don't have to spend too much effort on building the toc. */ 2286 const void *tgt = ((const void * const *)target)[0]; 2287 uintptr_t toc = ((const uintptr_t *)target)[1]; 2288 intptr_t diff = tcg_pcrel_diff(s, tgt); 2289 2290 if (in_range_b(diff) && toc == (uint32_t)toc) { 2291 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc); 2292 tcg_out_b(s, lk, tgt); 2293 } else { 2294 /* Fold the low bits of the constant into the addresses below. */ 2295 intptr_t arg = (intptr_t)target; 2296 int ofs = (int16_t)arg; 2297 2298 if (ofs + 8 < 0x8000) { 2299 arg -= ofs; 2300 } else { 2301 ofs = 0; 2302 } 2303 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg); 2304 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs); 2305 tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR); 2306 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP); 2307 tcg_out32(s, BCCTR | BO_ALWAYS | lk); 2308 } 2309#elif defined(_CALL_ELF) && _CALL_ELF == 2 2310 intptr_t diff; 2311 2312 /* In the ELFv2 ABI, we have to set up r12 to contain the destination 2313 address, which the callee uses to compute its TOC address. */ 2314 /* FIXME: when the branch is in range, we could avoid r12 load if we 2315 knew that the destination uses the same TOC, and what its local 2316 entry point offset is. */ 2317 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target); 2318 2319 diff = tcg_pcrel_diff(s, target); 2320 if (in_range_b(diff)) { 2321 tcg_out_b(s, lk, target); 2322 } else { 2323 tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR); 2324 tcg_out32(s, BCCTR | BO_ALWAYS | lk); 2325 } 2326#else 2327 tcg_out_b(s, lk, target); 2328#endif 2329} 2330 2331static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 2332 const TCGHelperInfo *info) 2333{ 2334 tcg_out_call_int(s, LK, target); 2335} 2336 2337static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = { 2338 [MO_UB] = LBZX, 2339 [MO_UW] = LHZX, 2340 [MO_UL] = LWZX, 2341 [MO_UQ] = LDX, 2342 [MO_SW] = LHAX, 2343 [MO_SL] = LWAX, 2344 [MO_BSWAP | MO_UB] = LBZX, 2345 [MO_BSWAP | MO_UW] = LHBRX, 2346 [MO_BSWAP | MO_UL] = LWBRX, 2347 [MO_BSWAP | MO_UQ] = LDBRX, 2348}; 2349 2350static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = { 2351 [MO_UB] = STBX, 2352 [MO_UW] = STHX, 2353 [MO_UL] = STWX, 2354 [MO_UQ] = STDX, 2355 [MO_BSWAP | MO_UB] = STBX, 2356 [MO_BSWAP | MO_UW] = STHBRX, 2357 [MO_BSWAP | MO_UL] = STWBRX, 2358 [MO_BSWAP | MO_UQ] = STDBRX, 2359}; 2360 2361static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg) 2362{ 2363 if (arg < 0) { 2364 arg = TCG_REG_TMP1; 2365 } 2366 tcg_out32(s, MFSPR | RT(arg) | LR); 2367 return arg; 2368} 2369 2370/* 2371 * For the purposes of ppc32 sorting 4 input registers into 4 argument 2372 * registers, there is an outside chance we would require 3 temps. 2373 */ 2374static const TCGLdstHelperParam ldst_helper_param = { 2375 .ra_gen = ldst_ra_gen, 2376 .ntmp = 3, 2377 .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 } 2378}; 2379 2380static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 2381{ 2382 MemOp opc = get_memop(lb->oi); 2383 2384 if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 2385 return false; 2386 } 2387 2388 tcg_out_ld_helper_args(s, lb, &ldst_helper_param); 2389 tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]); 2390 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); 2391 2392 tcg_out_b(s, 0, lb->raddr); 2393 return true; 2394} 2395 2396static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 2397{ 2398 MemOp opc = get_memop(lb->oi); 2399 2400 if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 2401 return false; 2402 } 2403 2404 tcg_out_st_helper_args(s, lb, &ldst_helper_param); 2405 tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]); 2406 2407 tcg_out_b(s, 0, lb->raddr); 2408 return true; 2409} 2410 2411typedef struct { 2412 TCGReg base; 2413 TCGReg index; 2414 TCGAtomAlign aa; 2415} HostAddress; 2416 2417bool tcg_target_has_memory_bswap(MemOp memop) 2418{ 2419 TCGAtomAlign aa; 2420 2421 if ((memop & MO_SIZE) <= MO_64) { 2422 return true; 2423 } 2424 2425 /* 2426 * Reject 16-byte memop with 16-byte atomicity, 2427 * but do allow a pair of 64-bit operations. 2428 */ 2429 aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true); 2430 return aa.atom <= MO_64; 2431} 2432 2433/* We expect to use a 16-bit negative offset from ENV. */ 2434#define MIN_TLB_MASK_TABLE_OFS -32768 2435 2436/* 2437 * For system-mode, perform the TLB load and compare. 2438 * For user-mode, perform any required alignment tests. 2439 * In both cases, return a TCGLabelQemuLdst structure if the slow path 2440 * is required and fill in @h with the host address for the fast path. 2441 */ 2442static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, 2443 TCGReg addrlo, TCGReg addrhi, 2444 MemOpIdx oi, bool is_ld) 2445{ 2446 TCGType addr_type = s->addr_type; 2447 TCGLabelQemuLdst *ldst = NULL; 2448 MemOp opc = get_memop(oi); 2449 MemOp a_bits, s_bits; 2450 2451 /* 2452 * Book II, Section 1.4, Single-Copy Atomicity, specifies: 2453 * 2454 * Before 3.0, "An access that is not atomic is performed as a set of 2455 * smaller disjoint atomic accesses. In general, the number and alignment 2456 * of these accesses are implementation-dependent." Thus MO_ATOM_IFALIGN. 2457 * 2458 * As of 3.0, "the non-atomic access is performed as described in 2459 * the corresponding list", which matches MO_ATOM_SUBALIGN. 2460 */ 2461 s_bits = opc & MO_SIZE; 2462 h->aa = atom_and_align_for_opc(s, opc, 2463 have_isa_3_00 ? MO_ATOM_SUBALIGN 2464 : MO_ATOM_IFALIGN, 2465 s_bits == MO_128); 2466 a_bits = h->aa.align; 2467 2468 if (tcg_use_softmmu) { 2469 int mem_index = get_mmuidx(oi); 2470 int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read) 2471 : offsetof(CPUTLBEntry, addr_write); 2472 int fast_off = tlb_mask_table_ofs(s, mem_index); 2473 int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); 2474 int table_off = fast_off + offsetof(CPUTLBDescFast, table); 2475 2476 ldst = new_ldst_label(s); 2477 ldst->is_ld = is_ld; 2478 ldst->oi = oi; 2479 ldst->addrlo_reg = addrlo; 2480 ldst->addrhi_reg = addrhi; 2481 2482 /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ 2483 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off); 2484 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off); 2485 2486 /* Extract the page index, shifted into place for tlb index. */ 2487 if (TCG_TARGET_REG_BITS == 32) { 2488 tcg_out_shri32(s, TCG_REG_R0, addrlo, 2489 s->page_bits - CPU_TLB_ENTRY_BITS); 2490 } else { 2491 tcg_out_shri64(s, TCG_REG_R0, addrlo, 2492 s->page_bits - CPU_TLB_ENTRY_BITS); 2493 } 2494 tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0)); 2495 2496 /* 2497 * Load the (low part) TLB comparator into TMP2. 2498 * For 64-bit host, always load the entire 64-bit slot for simplicity. 2499 * We will ignore the high bits with tcg_out_cmp(..., addr_type). 2500 */ 2501 if (TCG_TARGET_REG_BITS == 64) { 2502 if (cmp_off == 0) { 2503 tcg_out32(s, LDUX | TAB(TCG_REG_TMP2, 2504 TCG_REG_TMP1, TCG_REG_TMP2)); 2505 } else { 2506 tcg_out32(s, ADD | TAB(TCG_REG_TMP1, 2507 TCG_REG_TMP1, TCG_REG_TMP2)); 2508 tcg_out_ld(s, TCG_TYPE_I64, TCG_REG_TMP2, 2509 TCG_REG_TMP1, cmp_off); 2510 } 2511 } else if (cmp_off == 0 && !HOST_BIG_ENDIAN) { 2512 tcg_out32(s, LWZUX | TAB(TCG_REG_TMP2, 2513 TCG_REG_TMP1, TCG_REG_TMP2)); 2514 } else { 2515 tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2)); 2516 tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1, 2517 cmp_off + 4 * HOST_BIG_ENDIAN); 2518 } 2519 2520 /* 2521 * Load the TLB addend for use on the fast path. 2522 * Do this asap to minimize any load use delay. 2523 */ 2524 if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) { 2525 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, 2526 offsetof(CPUTLBEntry, addend)); 2527 } 2528 2529 /* Clear the non-page, non-alignment bits from the address in R0. */ 2530 if (TCG_TARGET_REG_BITS == 32) { 2531 /* 2532 * We don't support unaligned accesses on 32-bits. 2533 * Preserve the bottom bits and thus trigger a comparison 2534 * failure on unaligned accesses. 2535 */ 2536 if (a_bits < s_bits) { 2537 a_bits = s_bits; 2538 } 2539 tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0, 2540 (32 - a_bits) & 31, 31 - s->page_bits); 2541 } else { 2542 TCGReg t = addrlo; 2543 2544 /* 2545 * If the access is unaligned, we need to make sure we fail if we 2546 * cross a page boundary. The trick is to add the access size-1 2547 * to the address before masking the low bits. That will make the 2548 * address overflow to the next page if we cross a page boundary, 2549 * which will then force a mismatch of the TLB compare. 2550 */ 2551 if (a_bits < s_bits) { 2552 unsigned a_mask = (1 << a_bits) - 1; 2553 unsigned s_mask = (1 << s_bits) - 1; 2554 tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask)); 2555 t = TCG_REG_R0; 2556 } 2557 2558 /* Mask the address for the requested alignment. */ 2559 if (addr_type == TCG_TYPE_I32) { 2560 tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0, 2561 (32 - a_bits) & 31, 31 - s->page_bits); 2562 } else if (a_bits == 0) { 2563 tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits); 2564 } else { 2565 tcg_out_rld(s, RLDICL, TCG_REG_R0, t, 2566 64 - s->page_bits, s->page_bits - a_bits); 2567 tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0); 2568 } 2569 } 2570 2571 if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) { 2572 /* Low part comparison into cr7. */ 2573 tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 2574 0, 7, TCG_TYPE_I32); 2575 2576 /* Load the high part TLB comparator into TMP2. */ 2577 tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1, 2578 cmp_off + 4 * !HOST_BIG_ENDIAN); 2579 2580 /* Load addend, deferred for this case. */ 2581 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, 2582 offsetof(CPUTLBEntry, addend)); 2583 2584 /* High part comparison into cr6. */ 2585 tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2, 2586 0, 6, TCG_TYPE_I32); 2587 2588 /* Combine comparisons into cr0. */ 2589 tcg_out32(s, CRAND | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); 2590 } else { 2591 /* Full comparison into cr0. */ 2592 tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 2593 0, 0, addr_type); 2594 } 2595 2596 /* Load a pointer into the current opcode w/conditional branch-link. */ 2597 ldst->label_ptr[0] = s->code_ptr; 2598 tcg_out_bc(s, TCG_COND_NE, LK); 2599 2600 h->base = TCG_REG_TMP1; 2601 } else { 2602 if (a_bits) { 2603 ldst = new_ldst_label(s); 2604 ldst->is_ld = is_ld; 2605 ldst->oi = oi; 2606 ldst->addrlo_reg = addrlo; 2607 ldst->addrhi_reg = addrhi; 2608 2609 /* We are expecting a_bits to max out at 7, much lower than ANDI. */ 2610 tcg_debug_assert(a_bits < 16); 2611 tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1)); 2612 2613 ldst->label_ptr[0] = s->code_ptr; 2614 tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK); 2615 } 2616 2617 h->base = guest_base ? TCG_GUEST_BASE_REG : 0; 2618 } 2619 2620 if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) { 2621 /* Zero-extend the guest address for use in the host address. */ 2622 tcg_out_ext32u(s, TCG_REG_TMP2, addrlo); 2623 h->index = TCG_REG_TMP2; 2624 } else { 2625 h->index = addrlo; 2626 } 2627 2628 return ldst; 2629} 2630 2631static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, 2632 TCGReg addrlo, TCGReg addrhi, 2633 MemOpIdx oi, TCGType data_type) 2634{ 2635 MemOp opc = get_memop(oi); 2636 TCGLabelQemuLdst *ldst; 2637 HostAddress h; 2638 2639 ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true); 2640 2641 if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { 2642 if (opc & MO_BSWAP) { 2643 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2644 tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index)); 2645 tcg_out32(s, LWBRX | TAB(datahi, h.base, TCG_REG_R0)); 2646 } else if (h.base != 0) { 2647 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2648 tcg_out32(s, LWZX | TAB(datahi, h.base, h.index)); 2649 tcg_out32(s, LWZX | TAB(datalo, h.base, TCG_REG_R0)); 2650 } else if (h.index == datahi) { 2651 tcg_out32(s, LWZ | TAI(datalo, h.index, 4)); 2652 tcg_out32(s, LWZ | TAI(datahi, h.index, 0)); 2653 } else { 2654 tcg_out32(s, LWZ | TAI(datahi, h.index, 0)); 2655 tcg_out32(s, LWZ | TAI(datalo, h.index, 4)); 2656 } 2657 } else { 2658 uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)]; 2659 if (!have_isa_2_06 && insn == LDBRX) { 2660 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2661 tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index)); 2662 tcg_out32(s, LWBRX | TAB(TCG_REG_R0, h.base, TCG_REG_R0)); 2663 tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0); 2664 } else if (insn) { 2665 tcg_out32(s, insn | TAB(datalo, h.base, h.index)); 2666 } else { 2667 insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)]; 2668 tcg_out32(s, insn | TAB(datalo, h.base, h.index)); 2669 tcg_out_movext(s, TCG_TYPE_REG, datalo, 2670 TCG_TYPE_REG, opc & MO_SSIZE, datalo); 2671 } 2672 } 2673 2674 if (ldst) { 2675 ldst->type = data_type; 2676 ldst->datalo_reg = datalo; 2677 ldst->datahi_reg = datahi; 2678 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2679 } 2680} 2681 2682static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, 2683 TCGReg addrlo, TCGReg addrhi, 2684 MemOpIdx oi, TCGType data_type) 2685{ 2686 MemOp opc = get_memop(oi); 2687 TCGLabelQemuLdst *ldst; 2688 HostAddress h; 2689 2690 ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false); 2691 2692 if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { 2693 if (opc & MO_BSWAP) { 2694 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2695 tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index)); 2696 tcg_out32(s, STWBRX | SAB(datahi, h.base, TCG_REG_R0)); 2697 } else if (h.base != 0) { 2698 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2699 tcg_out32(s, STWX | SAB(datahi, h.base, h.index)); 2700 tcg_out32(s, STWX | SAB(datalo, h.base, TCG_REG_R0)); 2701 } else { 2702 tcg_out32(s, STW | TAI(datahi, h.index, 0)); 2703 tcg_out32(s, STW | TAI(datalo, h.index, 4)); 2704 } 2705 } else { 2706 uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)]; 2707 if (!have_isa_2_06 && insn == STDBRX) { 2708 tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index)); 2709 tcg_out32(s, ADDI | TAI(TCG_REG_TMP2, h.index, 4)); 2710 tcg_out_shri64(s, TCG_REG_R0, datalo, 32); 2711 tcg_out32(s, STWBRX | SAB(TCG_REG_R0, h.base, TCG_REG_TMP2)); 2712 } else { 2713 tcg_out32(s, insn | SAB(datalo, h.base, h.index)); 2714 } 2715 } 2716 2717 if (ldst) { 2718 ldst->type = data_type; 2719 ldst->datalo_reg = datalo; 2720 ldst->datahi_reg = datahi; 2721 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2722 } 2723} 2724 2725static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, 2726 TCGReg addr_reg, MemOpIdx oi, bool is_ld) 2727{ 2728 TCGLabelQemuLdst *ldst; 2729 HostAddress h; 2730 bool need_bswap; 2731 uint32_t insn; 2732 TCGReg index; 2733 2734 ldst = prepare_host_addr(s, &h, addr_reg, -1, oi, is_ld); 2735 2736 /* Compose the final address, as LQ/STQ have no indexing. */ 2737 index = h.index; 2738 if (h.base != 0) { 2739 index = TCG_REG_TMP1; 2740 tcg_out32(s, ADD | TAB(index, h.base, h.index)); 2741 } 2742 need_bswap = get_memop(oi) & MO_BSWAP; 2743 2744 if (h.aa.atom == MO_128) { 2745 tcg_debug_assert(!need_bswap); 2746 tcg_debug_assert(datalo & 1); 2747 tcg_debug_assert(datahi == datalo - 1); 2748 tcg_debug_assert(!is_ld || datahi != index); 2749 insn = is_ld ? LQ : STQ; 2750 tcg_out32(s, insn | TAI(datahi, index, 0)); 2751 } else { 2752 TCGReg d1, d2; 2753 2754 if (HOST_BIG_ENDIAN ^ need_bswap) { 2755 d1 = datahi, d2 = datalo; 2756 } else { 2757 d1 = datalo, d2 = datahi; 2758 } 2759 2760 if (need_bswap) { 2761 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8); 2762 insn = is_ld ? LDBRX : STDBRX; 2763 tcg_out32(s, insn | TAB(d1, 0, index)); 2764 tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0)); 2765 } else { 2766 insn = is_ld ? LD : STD; 2767 tcg_out32(s, insn | TAI(d1, index, 0)); 2768 tcg_out32(s, insn | TAI(d2, index, 8)); 2769 } 2770 } 2771 2772 if (ldst) { 2773 ldst->type = TCG_TYPE_I128; 2774 ldst->datalo_reg = datalo; 2775 ldst->datahi_reg = datahi; 2776 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2777 } 2778} 2779 2780static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 2781{ 2782 int i; 2783 for (i = 0; i < count; ++i) { 2784 p[i] = NOP; 2785 } 2786} 2787 2788/* Parameters for function call generation, used in tcg.c. */ 2789#define TCG_TARGET_STACK_ALIGN 16 2790 2791#ifdef _CALL_AIX 2792# define LINK_AREA_SIZE (6 * SZR) 2793# define LR_OFFSET (1 * SZR) 2794# define TCG_TARGET_CALL_STACK_OFFSET (LINK_AREA_SIZE + 8 * SZR) 2795#elif defined(_CALL_DARWIN) 2796# define LINK_AREA_SIZE (6 * SZR) 2797# define LR_OFFSET (2 * SZR) 2798#elif TCG_TARGET_REG_BITS == 64 2799# if defined(_CALL_ELF) && _CALL_ELF == 2 2800# define LINK_AREA_SIZE (4 * SZR) 2801# define LR_OFFSET (1 * SZR) 2802# endif 2803#else /* TCG_TARGET_REG_BITS == 32 */ 2804# if defined(_CALL_SYSV) 2805# define LINK_AREA_SIZE (2 * SZR) 2806# define LR_OFFSET (1 * SZR) 2807# endif 2808#endif 2809#ifndef LR_OFFSET 2810# error "Unhandled abi" 2811#endif 2812#ifndef TCG_TARGET_CALL_STACK_OFFSET 2813# define TCG_TARGET_CALL_STACK_OFFSET LINK_AREA_SIZE 2814#endif 2815 2816#define CPU_TEMP_BUF_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) 2817#define REG_SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR) 2818 2819#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET \ 2820 + TCG_STATIC_CALL_ARGS_SIZE \ 2821 + CPU_TEMP_BUF_SIZE \ 2822 + REG_SAVE_SIZE \ 2823 + TCG_TARGET_STACK_ALIGN - 1) \ 2824 & -TCG_TARGET_STACK_ALIGN) 2825 2826#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE) 2827 2828static void tcg_target_qemu_prologue(TCGContext *s) 2829{ 2830 int i; 2831 2832#ifdef _CALL_AIX 2833 const void **desc = (const void **)s->code_ptr; 2834 desc[0] = tcg_splitwx_to_rx(desc + 2); /* entry point */ 2835 desc[1] = 0; /* environment pointer */ 2836 s->code_ptr = (void *)(desc + 2); /* skip over descriptor */ 2837#endif 2838 2839 tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE, 2840 CPU_TEMP_BUF_SIZE); 2841 2842 /* Prologue */ 2843 tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR); 2844 tcg_out32(s, (SZR == 8 ? STDU : STWU) 2845 | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE)); 2846 2847 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { 2848 tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 2849 TCG_REG_R1, REG_SAVE_BOT + i * SZR); 2850 } 2851 tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); 2852 2853 if (!tcg_use_softmmu && guest_base) { 2854 tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true); 2855 tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); 2856 } 2857 2858 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 2859 tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR); 2860 tcg_out32(s, BCCTR | BO_ALWAYS); 2861 2862 /* Epilogue */ 2863 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 2864 2865 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); 2866 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { 2867 tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 2868 TCG_REG_R1, REG_SAVE_BOT + i * SZR); 2869 } 2870 tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR); 2871 tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE)); 2872 tcg_out32(s, BCLR | BO_ALWAYS); 2873} 2874 2875static void tcg_out_tb_start(TCGContext *s) 2876{ 2877 /* Load TCG_REG_TB. */ 2878 if (USE_REG_TB) { 2879 if (have_isa_3_00) { 2880 /* lnia REG_TB */ 2881 tcg_out_addpcis(s, TCG_REG_TB, 0); 2882 } else { 2883 /* bcl 20,31,$+4 (preferred form for getting nia) */ 2884 tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK); 2885 tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR); 2886 } 2887 } 2888} 2889 2890static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) 2891{ 2892 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg); 2893 tcg_out_b(s, 0, tcg_code_gen_epilogue); 2894} 2895 2896static void tcg_out_goto_tb(TCGContext *s, int which) 2897{ 2898 uintptr_t ptr = get_jmp_target_addr(s, which); 2899 int16_t lo; 2900 2901 /* Direct branch will be patched by tb_target_set_jmp_target. */ 2902 set_jmp_insn_offset(s, which); 2903 tcg_out32(s, NOP); 2904 2905 /* When branch is out of range, fall through to indirect. */ 2906 if (USE_REG_TB) { 2907 ptrdiff_t offset = ppc_tbrel_diff(s, (void *)ptr); 2908 tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset); 2909 } else if (have_isa_3_10) { 2910 ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr); 2911 tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1); 2912 } else if (have_isa_3_00) { 2913 ptrdiff_t offset = tcg_pcrel_diff(s, (void *)ptr) - 4; 2914 lo = offset; 2915 tcg_out_addpcis(s, TCG_REG_TMP1, offset - lo); 2916 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo); 2917 } else { 2918 lo = ptr; 2919 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - lo); 2920 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo); 2921 } 2922 2923 tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); 2924 tcg_out32(s, BCCTR | BO_ALWAYS); 2925 set_jmp_reset_offset(s, which); 2926} 2927 2928void tb_target_set_jmp_target(const TranslationBlock *tb, int n, 2929 uintptr_t jmp_rx, uintptr_t jmp_rw) 2930{ 2931 uintptr_t addr = tb->jmp_target_addr[n]; 2932 intptr_t diff = addr - jmp_rx; 2933 tcg_insn_unit insn; 2934 2935 if (in_range_b(diff)) { 2936 insn = B | (diff & 0x3fffffc); 2937 } else { 2938 insn = NOP; 2939 } 2940 2941 qatomic_set((uint32_t *)jmp_rw, insn); 2942 flush_idcache_range(jmp_rx, jmp_rw, 4); 2943} 2944 2945static void tcg_out_op(TCGContext *s, TCGOpcode opc, 2946 const TCGArg args[TCG_MAX_OP_ARGS], 2947 const int const_args[TCG_MAX_OP_ARGS]) 2948{ 2949 TCGArg a0, a1, a2; 2950 2951 switch (opc) { 2952 case INDEX_op_goto_ptr: 2953 tcg_out32(s, MTSPR | RS(args[0]) | CTR); 2954 tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0)); 2955 tcg_out32(s, BCCTR | BO_ALWAYS); 2956 break; 2957 case INDEX_op_br: 2958 { 2959 TCGLabel *l = arg_label(args[0]); 2960 uint32_t insn = B; 2961 2962 if (l->has_value) { 2963 insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr), 2964 l->u.value_ptr); 2965 } else { 2966 tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0); 2967 } 2968 tcg_out32(s, insn); 2969 } 2970 break; 2971 case INDEX_op_ld8u_i32: 2972 case INDEX_op_ld8u_i64: 2973 tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); 2974 break; 2975 case INDEX_op_ld8s_i32: 2976 case INDEX_op_ld8s_i64: 2977 tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); 2978 tcg_out_ext8s(s, TCG_TYPE_REG, args[0], args[0]); 2979 break; 2980 case INDEX_op_ld16u_i32: 2981 case INDEX_op_ld16u_i64: 2982 tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]); 2983 break; 2984 case INDEX_op_ld16s_i32: 2985 case INDEX_op_ld16s_i64: 2986 tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]); 2987 break; 2988 case INDEX_op_ld_i32: 2989 case INDEX_op_ld32u_i64: 2990 tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]); 2991 break; 2992 case INDEX_op_ld32s_i64: 2993 tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]); 2994 break; 2995 case INDEX_op_ld_i64: 2996 tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]); 2997 break; 2998 case INDEX_op_st8_i32: 2999 case INDEX_op_st8_i64: 3000 tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]); 3001 break; 3002 case INDEX_op_st16_i32: 3003 case INDEX_op_st16_i64: 3004 tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]); 3005 break; 3006 case INDEX_op_st_i32: 3007 case INDEX_op_st32_i64: 3008 tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]); 3009 break; 3010 case INDEX_op_st_i64: 3011 tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); 3012 break; 3013 3014 case INDEX_op_add_i32: 3015 a0 = args[0], a1 = args[1], a2 = args[2]; 3016 if (const_args[2]) { 3017 do_addi_32: 3018 tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2); 3019 } else { 3020 tcg_out32(s, ADD | TAB(a0, a1, a2)); 3021 } 3022 break; 3023 case INDEX_op_sub_i32: 3024 a0 = args[0], a1 = args[1], a2 = args[2]; 3025 if (const_args[1]) { 3026 if (const_args[2]) { 3027 tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2); 3028 } else { 3029 tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); 3030 } 3031 } else if (const_args[2]) { 3032 a2 = -a2; 3033 goto do_addi_32; 3034 } else { 3035 tcg_out32(s, SUBF | TAB(a0, a2, a1)); 3036 } 3037 break; 3038 3039 case INDEX_op_and_i32: 3040 a0 = args[0], a1 = args[1], a2 = args[2]; 3041 if (const_args[2]) { 3042 tcg_out_andi32(s, a0, a1, a2); 3043 } else { 3044 tcg_out32(s, AND | SAB(a1, a0, a2)); 3045 } 3046 break; 3047 case INDEX_op_and_i64: 3048 a0 = args[0], a1 = args[1], a2 = args[2]; 3049 if (const_args[2]) { 3050 tcg_out_andi64(s, a0, a1, a2); 3051 } else { 3052 tcg_out32(s, AND | SAB(a1, a0, a2)); 3053 } 3054 break; 3055 case INDEX_op_or_i64: 3056 case INDEX_op_or_i32: 3057 a0 = args[0], a1 = args[1], a2 = args[2]; 3058 if (const_args[2]) { 3059 tcg_out_ori32(s, a0, a1, a2); 3060 } else { 3061 tcg_out32(s, OR | SAB(a1, a0, a2)); 3062 } 3063 break; 3064 case INDEX_op_xor_i64: 3065 case INDEX_op_xor_i32: 3066 a0 = args[0], a1 = args[1], a2 = args[2]; 3067 if (const_args[2]) { 3068 tcg_out_xori32(s, a0, a1, a2); 3069 } else { 3070 tcg_out32(s, XOR | SAB(a1, a0, a2)); 3071 } 3072 break; 3073 case INDEX_op_andc_i32: 3074 a0 = args[0], a1 = args[1], a2 = args[2]; 3075 if (const_args[2]) { 3076 tcg_out_andi32(s, a0, a1, ~a2); 3077 } else { 3078 tcg_out32(s, ANDC | SAB(a1, a0, a2)); 3079 } 3080 break; 3081 case INDEX_op_andc_i64: 3082 a0 = args[0], a1 = args[1], a2 = args[2]; 3083 if (const_args[2]) { 3084 tcg_out_andi64(s, a0, a1, ~a2); 3085 } else { 3086 tcg_out32(s, ANDC | SAB(a1, a0, a2)); 3087 } 3088 break; 3089 case INDEX_op_orc_i32: 3090 if (const_args[2]) { 3091 tcg_out_ori32(s, args[0], args[1], ~args[2]); 3092 break; 3093 } 3094 /* FALLTHRU */ 3095 case INDEX_op_orc_i64: 3096 tcg_out32(s, ORC | SAB(args[1], args[0], args[2])); 3097 break; 3098 case INDEX_op_eqv_i32: 3099 if (const_args[2]) { 3100 tcg_out_xori32(s, args[0], args[1], ~args[2]); 3101 break; 3102 } 3103 /* FALLTHRU */ 3104 case INDEX_op_eqv_i64: 3105 tcg_out32(s, EQV | SAB(args[1], args[0], args[2])); 3106 break; 3107 case INDEX_op_nand_i32: 3108 case INDEX_op_nand_i64: 3109 tcg_out32(s, NAND | SAB(args[1], args[0], args[2])); 3110 break; 3111 case INDEX_op_nor_i32: 3112 case INDEX_op_nor_i64: 3113 tcg_out32(s, NOR | SAB(args[1], args[0], args[2])); 3114 break; 3115 3116 case INDEX_op_clz_i32: 3117 tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1], 3118 args[2], const_args[2]); 3119 break; 3120 case INDEX_op_ctz_i32: 3121 tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1], 3122 args[2], const_args[2]); 3123 break; 3124 case INDEX_op_ctpop_i32: 3125 tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0)); 3126 break; 3127 3128 case INDEX_op_clz_i64: 3129 tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1], 3130 args[2], const_args[2]); 3131 break; 3132 case INDEX_op_ctz_i64: 3133 tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1], 3134 args[2], const_args[2]); 3135 break; 3136 case INDEX_op_ctpop_i64: 3137 tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0)); 3138 break; 3139 3140 case INDEX_op_mul_i32: 3141 a0 = args[0], a1 = args[1], a2 = args[2]; 3142 if (const_args[2]) { 3143 tcg_out32(s, MULLI | TAI(a0, a1, a2)); 3144 } else { 3145 tcg_out32(s, MULLW | TAB(a0, a1, a2)); 3146 } 3147 break; 3148 3149 case INDEX_op_div_i32: 3150 tcg_out32(s, DIVW | TAB(args[0], args[1], args[2])); 3151 break; 3152 3153 case INDEX_op_divu_i32: 3154 tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2])); 3155 break; 3156 3157 case INDEX_op_rem_i32: 3158 tcg_out32(s, MODSW | TAB(args[0], args[1], args[2])); 3159 break; 3160 3161 case INDEX_op_remu_i32: 3162 tcg_out32(s, MODUW | TAB(args[0], args[1], args[2])); 3163 break; 3164 3165 case INDEX_op_shl_i32: 3166 if (const_args[2]) { 3167 /* Limit immediate shift count lest we create an illegal insn. */ 3168 tcg_out_shli32(s, args[0], args[1], args[2] & 31); 3169 } else { 3170 tcg_out32(s, SLW | SAB(args[1], args[0], args[2])); 3171 } 3172 break; 3173 case INDEX_op_shr_i32: 3174 if (const_args[2]) { 3175 /* Limit immediate shift count lest we create an illegal insn. */ 3176 tcg_out_shri32(s, args[0], args[1], args[2] & 31); 3177 } else { 3178 tcg_out32(s, SRW | SAB(args[1], args[0], args[2])); 3179 } 3180 break; 3181 case INDEX_op_sar_i32: 3182 if (const_args[2]) { 3183 tcg_out_sari32(s, args[0], args[1], args[2]); 3184 } else { 3185 tcg_out32(s, SRAW | SAB(args[1], args[0], args[2])); 3186 } 3187 break; 3188 case INDEX_op_rotl_i32: 3189 if (const_args[2]) { 3190 tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31); 3191 } else { 3192 tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2]) 3193 | MB(0) | ME(31)); 3194 } 3195 break; 3196 case INDEX_op_rotr_i32: 3197 if (const_args[2]) { 3198 tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31); 3199 } else { 3200 tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32)); 3201 tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0) 3202 | MB(0) | ME(31)); 3203 } 3204 break; 3205 3206 case INDEX_op_brcond_i32: 3207 tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], 3208 arg_label(args[3]), TCG_TYPE_I32); 3209 break; 3210 case INDEX_op_brcond_i64: 3211 tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], 3212 arg_label(args[3]), TCG_TYPE_I64); 3213 break; 3214 case INDEX_op_brcond2_i32: 3215 tcg_out_brcond2(s, args, const_args); 3216 break; 3217 3218 case INDEX_op_neg_i32: 3219 case INDEX_op_neg_i64: 3220 tcg_out32(s, NEG | RT(args[0]) | RA(args[1])); 3221 break; 3222 3223 case INDEX_op_not_i32: 3224 case INDEX_op_not_i64: 3225 tcg_out32(s, NOR | SAB(args[1], args[0], args[1])); 3226 break; 3227 3228 case INDEX_op_add_i64: 3229 a0 = args[0], a1 = args[1], a2 = args[2]; 3230 if (const_args[2]) { 3231 do_addi_64: 3232 tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2); 3233 } else { 3234 tcg_out32(s, ADD | TAB(a0, a1, a2)); 3235 } 3236 break; 3237 case INDEX_op_sub_i64: 3238 a0 = args[0], a1 = args[1], a2 = args[2]; 3239 if (const_args[1]) { 3240 if (const_args[2]) { 3241 tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2); 3242 } else { 3243 tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); 3244 } 3245 } else if (const_args[2]) { 3246 a2 = -a2; 3247 goto do_addi_64; 3248 } else { 3249 tcg_out32(s, SUBF | TAB(a0, a2, a1)); 3250 } 3251 break; 3252 3253 case INDEX_op_shl_i64: 3254 if (const_args[2]) { 3255 /* Limit immediate shift count lest we create an illegal insn. */ 3256 tcg_out_shli64(s, args[0], args[1], args[2] & 63); 3257 } else { 3258 tcg_out32(s, SLD | SAB(args[1], args[0], args[2])); 3259 } 3260 break; 3261 case INDEX_op_shr_i64: 3262 if (const_args[2]) { 3263 /* Limit immediate shift count lest we create an illegal insn. */ 3264 tcg_out_shri64(s, args[0], args[1], args[2] & 63); 3265 } else { 3266 tcg_out32(s, SRD | SAB(args[1], args[0], args[2])); 3267 } 3268 break; 3269 case INDEX_op_sar_i64: 3270 if (const_args[2]) { 3271 tcg_out_sari64(s, args[0], args[1], args[2]); 3272 } else { 3273 tcg_out32(s, SRAD | SAB(args[1], args[0], args[2])); 3274 } 3275 break; 3276 case INDEX_op_rotl_i64: 3277 if (const_args[2]) { 3278 tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0); 3279 } else { 3280 tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0)); 3281 } 3282 break; 3283 case INDEX_op_rotr_i64: 3284 if (const_args[2]) { 3285 tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0); 3286 } else { 3287 tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64)); 3288 tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0)); 3289 } 3290 break; 3291 3292 case INDEX_op_mul_i64: 3293 a0 = args[0], a1 = args[1], a2 = args[2]; 3294 if (const_args[2]) { 3295 tcg_out32(s, MULLI | TAI(a0, a1, a2)); 3296 } else { 3297 tcg_out32(s, MULLD | TAB(a0, a1, a2)); 3298 } 3299 break; 3300 case INDEX_op_div_i64: 3301 tcg_out32(s, DIVD | TAB(args[0], args[1], args[2])); 3302 break; 3303 case INDEX_op_divu_i64: 3304 tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2])); 3305 break; 3306 case INDEX_op_rem_i64: 3307 tcg_out32(s, MODSD | TAB(args[0], args[1], args[2])); 3308 break; 3309 case INDEX_op_remu_i64: 3310 tcg_out32(s, MODUD | TAB(args[0], args[1], args[2])); 3311 break; 3312 3313 case INDEX_op_qemu_ld_a64_i32: 3314 if (TCG_TARGET_REG_BITS == 32) { 3315 tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], 3316 args[3], TCG_TYPE_I32); 3317 break; 3318 } 3319 /* fall through */ 3320 case INDEX_op_qemu_ld_a32_i32: 3321 tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32); 3322 break; 3323 case INDEX_op_qemu_ld_a32_i64: 3324 if (TCG_TARGET_REG_BITS == 64) { 3325 tcg_out_qemu_ld(s, args[0], -1, args[1], -1, 3326 args[2], TCG_TYPE_I64); 3327 } else { 3328 tcg_out_qemu_ld(s, args[0], args[1], args[2], -1, 3329 args[3], TCG_TYPE_I64); 3330 } 3331 break; 3332 case INDEX_op_qemu_ld_a64_i64: 3333 if (TCG_TARGET_REG_BITS == 64) { 3334 tcg_out_qemu_ld(s, args[0], -1, args[1], -1, 3335 args[2], TCG_TYPE_I64); 3336 } else { 3337 tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3], 3338 args[4], TCG_TYPE_I64); 3339 } 3340 break; 3341 case INDEX_op_qemu_ld_a32_i128: 3342 case INDEX_op_qemu_ld_a64_i128: 3343 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 3344 tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true); 3345 break; 3346 3347 case INDEX_op_qemu_st_a64_i32: 3348 if (TCG_TARGET_REG_BITS == 32) { 3349 tcg_out_qemu_st(s, args[0], -1, args[1], args[2], 3350 args[3], TCG_TYPE_I32); 3351 break; 3352 } 3353 /* fall through */ 3354 case INDEX_op_qemu_st_a32_i32: 3355 tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32); 3356 break; 3357 case INDEX_op_qemu_st_a32_i64: 3358 if (TCG_TARGET_REG_BITS == 64) { 3359 tcg_out_qemu_st(s, args[0], -1, args[1], -1, 3360 args[2], TCG_TYPE_I64); 3361 } else { 3362 tcg_out_qemu_st(s, args[0], args[1], args[2], -1, 3363 args[3], TCG_TYPE_I64); 3364 } 3365 break; 3366 case INDEX_op_qemu_st_a64_i64: 3367 if (TCG_TARGET_REG_BITS == 64) { 3368 tcg_out_qemu_st(s, args[0], -1, args[1], -1, 3369 args[2], TCG_TYPE_I64); 3370 } else { 3371 tcg_out_qemu_st(s, args[0], args[1], args[2], args[3], 3372 args[4], TCG_TYPE_I64); 3373 } 3374 break; 3375 case INDEX_op_qemu_st_a32_i128: 3376 case INDEX_op_qemu_st_a64_i128: 3377 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 3378 tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); 3379 break; 3380 3381 case INDEX_op_setcond_i32: 3382 tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], 3383 const_args[2], false); 3384 break; 3385 case INDEX_op_setcond_i64: 3386 tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], 3387 const_args[2], false); 3388 break; 3389 case INDEX_op_negsetcond_i32: 3390 tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], 3391 const_args[2], true); 3392 break; 3393 case INDEX_op_negsetcond_i64: 3394 tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], 3395 const_args[2], true); 3396 break; 3397 case INDEX_op_setcond2_i32: 3398 tcg_out_setcond2(s, args, const_args); 3399 break; 3400 3401 case INDEX_op_bswap16_i32: 3402 case INDEX_op_bswap16_i64: 3403 tcg_out_bswap16(s, args[0], args[1], args[2]); 3404 break; 3405 case INDEX_op_bswap32_i32: 3406 tcg_out_bswap32(s, args[0], args[1], 0); 3407 break; 3408 case INDEX_op_bswap32_i64: 3409 tcg_out_bswap32(s, args[0], args[1], args[2]); 3410 break; 3411 case INDEX_op_bswap64_i64: 3412 tcg_out_bswap64(s, args[0], args[1]); 3413 break; 3414 3415 case INDEX_op_deposit_i32: 3416 if (const_args[2]) { 3417 uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3]; 3418 tcg_out_andi32(s, args[0], args[0], ~mask); 3419 } else { 3420 tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3], 3421 32 - args[3] - args[4], 31 - args[3]); 3422 } 3423 break; 3424 case INDEX_op_deposit_i64: 3425 if (const_args[2]) { 3426 uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3]; 3427 tcg_out_andi64(s, args[0], args[0], ~mask); 3428 } else { 3429 tcg_out_rld(s, RLDIMI, args[0], args[2], args[3], 3430 64 - args[3] - args[4]); 3431 } 3432 break; 3433 3434 case INDEX_op_extract_i32: 3435 tcg_out_rlw(s, RLWINM, args[0], args[1], 3436 32 - args[2], 32 - args[3], 31); 3437 break; 3438 case INDEX_op_extract_i64: 3439 tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]); 3440 break; 3441 3442 case INDEX_op_movcond_i32: 3443 tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2], 3444 args[3], args[4], const_args[2]); 3445 break; 3446 case INDEX_op_movcond_i64: 3447 tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2], 3448 args[3], args[4], const_args[2]); 3449 break; 3450 3451#if TCG_TARGET_REG_BITS == 64 3452 case INDEX_op_add2_i64: 3453#else 3454 case INDEX_op_add2_i32: 3455#endif 3456 /* Note that the CA bit is defined based on the word size of the 3457 environment. So in 64-bit mode it's always carry-out of bit 63. 3458 The fallback code using deposit works just as well for 32-bit. */ 3459 a0 = args[0], a1 = args[1]; 3460 if (a0 == args[3] || (!const_args[5] && a0 == args[5])) { 3461 a0 = TCG_REG_R0; 3462 } 3463 if (const_args[4]) { 3464 tcg_out32(s, ADDIC | TAI(a0, args[2], args[4])); 3465 } else { 3466 tcg_out32(s, ADDC | TAB(a0, args[2], args[4])); 3467 } 3468 if (const_args[5]) { 3469 tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3])); 3470 } else { 3471 tcg_out32(s, ADDE | TAB(a1, args[3], args[5])); 3472 } 3473 if (a0 != args[0]) { 3474 tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); 3475 } 3476 break; 3477 3478#if TCG_TARGET_REG_BITS == 64 3479 case INDEX_op_sub2_i64: 3480#else 3481 case INDEX_op_sub2_i32: 3482#endif 3483 a0 = args[0], a1 = args[1]; 3484 if (a0 == args[5] || (!const_args[3] && a0 == args[3])) { 3485 a0 = TCG_REG_R0; 3486 } 3487 if (const_args[2]) { 3488 tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2])); 3489 } else { 3490 tcg_out32(s, SUBFC | TAB(a0, args[4], args[2])); 3491 } 3492 if (const_args[3]) { 3493 tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5])); 3494 } else { 3495 tcg_out32(s, SUBFE | TAB(a1, args[5], args[3])); 3496 } 3497 if (a0 != args[0]) { 3498 tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); 3499 } 3500 break; 3501 3502 case INDEX_op_muluh_i32: 3503 tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2])); 3504 break; 3505 case INDEX_op_mulsh_i32: 3506 tcg_out32(s, MULHW | TAB(args[0], args[1], args[2])); 3507 break; 3508 case INDEX_op_muluh_i64: 3509 tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2])); 3510 break; 3511 case INDEX_op_mulsh_i64: 3512 tcg_out32(s, MULHD | TAB(args[0], args[1], args[2])); 3513 break; 3514 3515 case INDEX_op_mb: 3516 tcg_out_mb(s, args[0]); 3517 break; 3518 3519 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 3520 case INDEX_op_mov_i64: 3521 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 3522 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ 3523 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ 3524 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ 3525 case INDEX_op_ext8s_i64: 3526 case INDEX_op_ext8u_i32: 3527 case INDEX_op_ext8u_i64: 3528 case INDEX_op_ext16s_i32: 3529 case INDEX_op_ext16s_i64: 3530 case INDEX_op_ext16u_i32: 3531 case INDEX_op_ext16u_i64: 3532 case INDEX_op_ext32s_i64: 3533 case INDEX_op_ext32u_i64: 3534 case INDEX_op_ext_i32_i64: 3535 case INDEX_op_extu_i32_i64: 3536 case INDEX_op_extrl_i64_i32: 3537 default: 3538 g_assert_not_reached(); 3539 } 3540} 3541 3542int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 3543{ 3544 switch (opc) { 3545 case INDEX_op_and_vec: 3546 case INDEX_op_or_vec: 3547 case INDEX_op_xor_vec: 3548 case INDEX_op_andc_vec: 3549 case INDEX_op_not_vec: 3550 case INDEX_op_nor_vec: 3551 case INDEX_op_eqv_vec: 3552 case INDEX_op_nand_vec: 3553 return 1; 3554 case INDEX_op_orc_vec: 3555 return have_isa_2_07; 3556 case INDEX_op_add_vec: 3557 case INDEX_op_sub_vec: 3558 case INDEX_op_smax_vec: 3559 case INDEX_op_smin_vec: 3560 case INDEX_op_umax_vec: 3561 case INDEX_op_umin_vec: 3562 case INDEX_op_shlv_vec: 3563 case INDEX_op_shrv_vec: 3564 case INDEX_op_sarv_vec: 3565 case INDEX_op_rotlv_vec: 3566 return vece <= MO_32 || have_isa_2_07; 3567 case INDEX_op_ssadd_vec: 3568 case INDEX_op_sssub_vec: 3569 case INDEX_op_usadd_vec: 3570 case INDEX_op_ussub_vec: 3571 return vece <= MO_32; 3572 case INDEX_op_shli_vec: 3573 case INDEX_op_shri_vec: 3574 case INDEX_op_sari_vec: 3575 case INDEX_op_rotli_vec: 3576 return vece <= MO_32 || have_isa_2_07 ? -1 : 0; 3577 case INDEX_op_cmp_vec: 3578 case INDEX_op_cmpsel_vec: 3579 return vece <= MO_32 || have_isa_2_07 ? 1 : 0; 3580 case INDEX_op_neg_vec: 3581 return vece >= MO_32 && have_isa_3_00; 3582 case INDEX_op_mul_vec: 3583 switch (vece) { 3584 case MO_8: 3585 case MO_16: 3586 return -1; 3587 case MO_32: 3588 return have_isa_2_07 ? 1 : -1; 3589 case MO_64: 3590 return have_isa_3_10; 3591 } 3592 return 0; 3593 case INDEX_op_bitsel_vec: 3594 return have_vsx; 3595 case INDEX_op_rotrv_vec: 3596 return -1; 3597 default: 3598 return 0; 3599 } 3600} 3601 3602static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 3603 TCGReg dst, TCGReg src) 3604{ 3605 tcg_debug_assert(dst >= TCG_REG_V0); 3606 3607 /* Splat from integer reg allowed via constraints for v3.00. */ 3608 if (src < TCG_REG_V0) { 3609 tcg_debug_assert(have_isa_3_00); 3610 switch (vece) { 3611 case MO_64: 3612 tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src)); 3613 return true; 3614 case MO_32: 3615 tcg_out32(s, MTVSRWS | VRT(dst) | RA(src)); 3616 return true; 3617 default: 3618 /* Fail, so that we fall back on either dupm or mov+dup. */ 3619 return false; 3620 } 3621 } 3622 3623 /* 3624 * Recall we use (or emulate) VSX integer loads, so the integer is 3625 * right justified within the left (zero-index) double-word. 3626 */ 3627 switch (vece) { 3628 case MO_8: 3629 tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16)); 3630 break; 3631 case MO_16: 3632 tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16)); 3633 break; 3634 case MO_32: 3635 tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16)); 3636 break; 3637 case MO_64: 3638 if (have_vsx) { 3639 tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src)); 3640 break; 3641 } 3642 tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8); 3643 tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8); 3644 break; 3645 default: 3646 g_assert_not_reached(); 3647 } 3648 return true; 3649} 3650 3651static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 3652 TCGReg out, TCGReg base, intptr_t offset) 3653{ 3654 int elt; 3655 3656 tcg_debug_assert(out >= TCG_REG_V0); 3657 switch (vece) { 3658 case MO_8: 3659 if (have_isa_3_00) { 3660 tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16); 3661 } else { 3662 tcg_out_mem_long(s, 0, LVEBX, out, base, offset); 3663 } 3664 elt = extract32(offset, 0, 4); 3665#if !HOST_BIG_ENDIAN 3666 elt ^= 15; 3667#endif 3668 tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16)); 3669 break; 3670 case MO_16: 3671 tcg_debug_assert((offset & 1) == 0); 3672 if (have_isa_3_00) { 3673 tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16); 3674 } else { 3675 tcg_out_mem_long(s, 0, LVEHX, out, base, offset); 3676 } 3677 elt = extract32(offset, 1, 3); 3678#if !HOST_BIG_ENDIAN 3679 elt ^= 7; 3680#endif 3681 tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16)); 3682 break; 3683 case MO_32: 3684 if (have_isa_3_00) { 3685 tcg_out_mem_long(s, 0, LXVWSX, out, base, offset); 3686 break; 3687 } 3688 tcg_debug_assert((offset & 3) == 0); 3689 tcg_out_mem_long(s, 0, LVEWX, out, base, offset); 3690 elt = extract32(offset, 2, 2); 3691#if !HOST_BIG_ENDIAN 3692 elt ^= 3; 3693#endif 3694 tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16)); 3695 break; 3696 case MO_64: 3697 if (have_vsx) { 3698 tcg_out_mem_long(s, 0, LXVDSX, out, base, offset); 3699 break; 3700 } 3701 tcg_debug_assert((offset & 7) == 0); 3702 tcg_out_mem_long(s, 0, LVX, out, base, offset & -16); 3703 tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8); 3704 elt = extract32(offset, 3, 1); 3705#if !HOST_BIG_ENDIAN 3706 elt = !elt; 3707#endif 3708 if (elt) { 3709 tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8); 3710 } else { 3711 tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8); 3712 } 3713 break; 3714 default: 3715 g_assert_not_reached(); 3716 } 3717 return true; 3718} 3719 3720static void tcg_out_not_vec(TCGContext *s, TCGReg a0, TCGReg a1) 3721{ 3722 tcg_out32(s, VNOR | VRT(a0) | VRA(a1) | VRB(a1)); 3723} 3724 3725static void tcg_out_or_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2) 3726{ 3727 tcg_out32(s, VOR | VRT(a0) | VRA(a1) | VRB(a2)); 3728} 3729 3730static void tcg_out_orc_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2) 3731{ 3732 tcg_out32(s, VORC | VRT(a0) | VRA(a1) | VRB(a2)); 3733} 3734 3735static void tcg_out_and_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2) 3736{ 3737 tcg_out32(s, VAND | VRT(a0) | VRA(a1) | VRB(a2)); 3738} 3739 3740static void tcg_out_andc_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2) 3741{ 3742 tcg_out32(s, VANDC | VRT(a0) | VRA(a1) | VRB(a2)); 3743} 3744 3745static void tcg_out_bitsel_vec(TCGContext *s, TCGReg d, 3746 TCGReg c, TCGReg t, TCGReg f) 3747{ 3748 if (TCG_TARGET_HAS_bitsel_vec) { 3749 tcg_out32(s, XXSEL | VRT(d) | VRC(c) | VRB(t) | VRA(f)); 3750 } else { 3751 tcg_out_and_vec(s, TCG_VEC_TMP2, t, c); 3752 tcg_out_andc_vec(s, d, f, c); 3753 tcg_out_or_vec(s, d, d, TCG_VEC_TMP2); 3754 } 3755} 3756 3757static bool tcg_out_cmp_vec_noinv(TCGContext *s, unsigned vece, TCGReg a0, 3758 TCGReg a1, TCGReg a2, TCGCond cond) 3759{ 3760 static const uint32_t 3761 eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD }, 3762 ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 }, 3763 gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD }, 3764 gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD }; 3765 uint32_t insn; 3766 3767 bool need_swap = false, need_inv = false; 3768 3769 tcg_debug_assert(vece <= MO_32 || have_isa_2_07); 3770 3771 switch (cond) { 3772 case TCG_COND_EQ: 3773 case TCG_COND_GT: 3774 case TCG_COND_GTU: 3775 break; 3776 case TCG_COND_NE: 3777 if (have_isa_3_00 && vece <= MO_32) { 3778 break; 3779 } 3780 /* fall through */ 3781 case TCG_COND_LE: 3782 case TCG_COND_LEU: 3783 need_inv = true; 3784 break; 3785 case TCG_COND_LT: 3786 case TCG_COND_LTU: 3787 need_swap = true; 3788 break; 3789 case TCG_COND_GE: 3790 case TCG_COND_GEU: 3791 need_swap = need_inv = true; 3792 break; 3793 default: 3794 g_assert_not_reached(); 3795 } 3796 3797 if (need_inv) { 3798 cond = tcg_invert_cond(cond); 3799 } 3800 if (need_swap) { 3801 TCGReg swap = a1; 3802 a1 = a2; 3803 a2 = swap; 3804 cond = tcg_swap_cond(cond); 3805 } 3806 3807 switch (cond) { 3808 case TCG_COND_EQ: 3809 insn = eq_op[vece]; 3810 break; 3811 case TCG_COND_NE: 3812 insn = ne_op[vece]; 3813 break; 3814 case TCG_COND_GT: 3815 insn = gts_op[vece]; 3816 break; 3817 case TCG_COND_GTU: 3818 insn = gtu_op[vece]; 3819 break; 3820 default: 3821 g_assert_not_reached(); 3822 } 3823 tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2)); 3824 3825 return need_inv; 3826} 3827 3828static void tcg_out_cmp_vec(TCGContext *s, unsigned vece, TCGReg a0, 3829 TCGReg a1, TCGReg a2, TCGCond cond) 3830{ 3831 if (tcg_out_cmp_vec_noinv(s, vece, a0, a1, a2, cond)) { 3832 tcg_out_not_vec(s, a0, a0); 3833 } 3834} 3835 3836static void tcg_out_cmpsel_vec(TCGContext *s, unsigned vece, TCGReg a0, 3837 TCGReg c1, TCGReg c2, TCGArg v3, int const_v3, 3838 TCGReg v4, TCGCond cond) 3839{ 3840 bool inv = tcg_out_cmp_vec_noinv(s, vece, TCG_VEC_TMP1, c1, c2, cond); 3841 3842 if (!const_v3) { 3843 if (inv) { 3844 tcg_out_bitsel_vec(s, a0, TCG_VEC_TMP1, v4, v3); 3845 } else { 3846 tcg_out_bitsel_vec(s, a0, TCG_VEC_TMP1, v3, v4); 3847 } 3848 } else if (v3) { 3849 if (inv) { 3850 tcg_out_orc_vec(s, a0, v4, TCG_VEC_TMP1); 3851 } else { 3852 tcg_out_or_vec(s, a0, v4, TCG_VEC_TMP1); 3853 } 3854 } else { 3855 if (inv) { 3856 tcg_out_and_vec(s, a0, v4, TCG_VEC_TMP1); 3857 } else { 3858 tcg_out_andc_vec(s, a0, v4, TCG_VEC_TMP1); 3859 } 3860 } 3861} 3862 3863static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 3864 unsigned vecl, unsigned vece, 3865 const TCGArg args[TCG_MAX_OP_ARGS], 3866 const int const_args[TCG_MAX_OP_ARGS]) 3867{ 3868 static const uint32_t 3869 add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM }, 3870 sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM }, 3871 mul_op[4] = { 0, 0, VMULUWM, VMULLD }, 3872 neg_op[4] = { 0, 0, VNEGW, VNEGD }, 3873 ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 }, 3874 usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 }, 3875 sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 }, 3876 ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 }, 3877 umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD }, 3878 smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD }, 3879 umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD }, 3880 smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD }, 3881 shlv_op[4] = { VSLB, VSLH, VSLW, VSLD }, 3882 shrv_op[4] = { VSRB, VSRH, VSRW, VSRD }, 3883 sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD }, 3884 mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 }, 3885 mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 }, 3886 muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 }, 3887 mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 }, 3888 pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 }, 3889 rotl_op[4] = { VRLB, VRLH, VRLW, VRLD }; 3890 3891 TCGType type = vecl + TCG_TYPE_V64; 3892 TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; 3893 uint32_t insn; 3894 3895 switch (opc) { 3896 case INDEX_op_ld_vec: 3897 tcg_out_ld(s, type, a0, a1, a2); 3898 return; 3899 case INDEX_op_st_vec: 3900 tcg_out_st(s, type, a0, a1, a2); 3901 return; 3902 case INDEX_op_dupm_vec: 3903 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 3904 return; 3905 3906 case INDEX_op_add_vec: 3907 insn = add_op[vece]; 3908 break; 3909 case INDEX_op_sub_vec: 3910 insn = sub_op[vece]; 3911 break; 3912 case INDEX_op_neg_vec: 3913 insn = neg_op[vece]; 3914 a2 = a1; 3915 a1 = 0; 3916 break; 3917 case INDEX_op_mul_vec: 3918 insn = mul_op[vece]; 3919 break; 3920 case INDEX_op_ssadd_vec: 3921 insn = ssadd_op[vece]; 3922 break; 3923 case INDEX_op_sssub_vec: 3924 insn = sssub_op[vece]; 3925 break; 3926 case INDEX_op_usadd_vec: 3927 insn = usadd_op[vece]; 3928 break; 3929 case INDEX_op_ussub_vec: 3930 insn = ussub_op[vece]; 3931 break; 3932 case INDEX_op_smin_vec: 3933 insn = smin_op[vece]; 3934 break; 3935 case INDEX_op_umin_vec: 3936 insn = umin_op[vece]; 3937 break; 3938 case INDEX_op_smax_vec: 3939 insn = smax_op[vece]; 3940 break; 3941 case INDEX_op_umax_vec: 3942 insn = umax_op[vece]; 3943 break; 3944 case INDEX_op_shlv_vec: 3945 insn = shlv_op[vece]; 3946 break; 3947 case INDEX_op_shrv_vec: 3948 insn = shrv_op[vece]; 3949 break; 3950 case INDEX_op_sarv_vec: 3951 insn = sarv_op[vece]; 3952 break; 3953 case INDEX_op_and_vec: 3954 tcg_out_and_vec(s, a0, a1, a2); 3955 return; 3956 case INDEX_op_or_vec: 3957 tcg_out_or_vec(s, a0, a1, a2); 3958 return; 3959 case INDEX_op_xor_vec: 3960 insn = VXOR; 3961 break; 3962 case INDEX_op_andc_vec: 3963 tcg_out_andc_vec(s, a0, a1, a2); 3964 return; 3965 case INDEX_op_not_vec: 3966 tcg_out_not_vec(s, a0, a1); 3967 return; 3968 case INDEX_op_orc_vec: 3969 tcg_out_orc_vec(s, a0, a1, a2); 3970 return; 3971 case INDEX_op_nand_vec: 3972 insn = VNAND; 3973 break; 3974 case INDEX_op_nor_vec: 3975 insn = VNOR; 3976 break; 3977 case INDEX_op_eqv_vec: 3978 insn = VEQV; 3979 break; 3980 3981 case INDEX_op_cmp_vec: 3982 tcg_out_cmp_vec(s, vece, a0, a1, a2, args[3]); 3983 return; 3984 case INDEX_op_cmpsel_vec: 3985 tcg_out_cmpsel_vec(s, vece, a0, a1, a2, 3986 args[3], const_args[3], args[4], args[5]); 3987 return; 3988 case INDEX_op_bitsel_vec: 3989 tcg_out_bitsel_vec(s, a0, a1, a2, args[3]); 3990 return; 3991 3992 case INDEX_op_dup2_vec: 3993 assert(TCG_TARGET_REG_BITS == 32); 3994 /* With inputs a1 = xLxx, a2 = xHxx */ 3995 tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1)); /* a0 = xxHL */ 3996 tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8); /* tmp = HLxx */ 3997 tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8); /* a0 = HLHL */ 3998 return; 3999 4000 case INDEX_op_ppc_mrgh_vec: 4001 insn = mrgh_op[vece]; 4002 break; 4003 case INDEX_op_ppc_mrgl_vec: 4004 insn = mrgl_op[vece]; 4005 break; 4006 case INDEX_op_ppc_muleu_vec: 4007 insn = muleu_op[vece]; 4008 break; 4009 case INDEX_op_ppc_mulou_vec: 4010 insn = mulou_op[vece]; 4011 break; 4012 case INDEX_op_ppc_pkum_vec: 4013 insn = pkum_op[vece]; 4014 break; 4015 case INDEX_op_rotlv_vec: 4016 insn = rotl_op[vece]; 4017 break; 4018 case INDEX_op_ppc_msum_vec: 4019 tcg_debug_assert(vece == MO_16); 4020 tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3])); 4021 return; 4022 4023 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 4024 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 4025 default: 4026 g_assert_not_reached(); 4027 } 4028 4029 tcg_debug_assert(insn != 0); 4030 tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2)); 4031} 4032 4033static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0, 4034 TCGv_vec v1, TCGArg imm, TCGOpcode opci) 4035{ 4036 TCGv_vec t1; 4037 4038 if (vece == MO_32) { 4039 /* 4040 * Only 5 bits are significant, and VSPLTISB can represent -16..15. 4041 * So using negative numbers gets us the 4th bit easily. 4042 */ 4043 imm = sextract32(imm, 0, 5); 4044 } else { 4045 imm &= (8 << vece) - 1; 4046 } 4047 4048 /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */ 4049 t1 = tcg_constant_vec(type, MO_8, imm); 4050 vec_gen_3(opci, type, vece, tcgv_vec_arg(v0), 4051 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 4052} 4053 4054static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0, 4055 TCGv_vec v1, TCGv_vec v2) 4056{ 4057 TCGv_vec t1 = tcg_temp_new_vec(type); 4058 TCGv_vec t2 = tcg_temp_new_vec(type); 4059 TCGv_vec c0, c16; 4060 4061 switch (vece) { 4062 case MO_8: 4063 case MO_16: 4064 vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1), 4065 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 4066 vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2), 4067 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 4068 vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0), 4069 tcgv_vec_arg(t1), tcgv_vec_arg(t2)); 4070 vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1), 4071 tcgv_vec_arg(t1), tcgv_vec_arg(t2)); 4072 vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0), 4073 tcgv_vec_arg(v0), tcgv_vec_arg(t1)); 4074 break; 4075 4076 case MO_32: 4077 tcg_debug_assert(!have_isa_2_07); 4078 /* 4079 * Only 5 bits are significant, and VSPLTISB can represent -16..15. 4080 * So using -16 is a quick way to represent 16. 4081 */ 4082 c16 = tcg_constant_vec(type, MO_8, -16); 4083 c0 = tcg_constant_vec(type, MO_8, 0); 4084 4085 vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1), 4086 tcgv_vec_arg(v2), tcgv_vec_arg(c16)); 4087 vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2), 4088 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 4089 vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1), 4090 tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0)); 4091 vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1), 4092 tcgv_vec_arg(t1), tcgv_vec_arg(c16)); 4093 tcg_gen_add_vec(MO_32, v0, t1, t2); 4094 break; 4095 4096 default: 4097 g_assert_not_reached(); 4098 } 4099 tcg_temp_free_vec(t1); 4100 tcg_temp_free_vec(t2); 4101} 4102 4103void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 4104 TCGArg a0, ...) 4105{ 4106 va_list va; 4107 TCGv_vec v0, v1, v2, t0; 4108 TCGArg a2; 4109 4110 va_start(va, a0); 4111 v0 = temp_tcgv_vec(arg_temp(a0)); 4112 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 4113 a2 = va_arg(va, TCGArg); 4114 4115 switch (opc) { 4116 case INDEX_op_shli_vec: 4117 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec); 4118 break; 4119 case INDEX_op_shri_vec: 4120 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec); 4121 break; 4122 case INDEX_op_sari_vec: 4123 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec); 4124 break; 4125 case INDEX_op_rotli_vec: 4126 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec); 4127 break; 4128 case INDEX_op_mul_vec: 4129 v2 = temp_tcgv_vec(arg_temp(a2)); 4130 expand_vec_mul(type, vece, v0, v1, v2); 4131 break; 4132 case INDEX_op_rotlv_vec: 4133 v2 = temp_tcgv_vec(arg_temp(a2)); 4134 t0 = tcg_temp_new_vec(type); 4135 tcg_gen_neg_vec(vece, t0, v2); 4136 tcg_gen_rotlv_vec(vece, v0, v1, t0); 4137 tcg_temp_free_vec(t0); 4138 break; 4139 default: 4140 g_assert_not_reached(); 4141 } 4142 va_end(va); 4143} 4144 4145static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 4146{ 4147 switch (op) { 4148 case INDEX_op_goto_ptr: 4149 return C_O0_I1(r); 4150 4151 case INDEX_op_ld8u_i32: 4152 case INDEX_op_ld8s_i32: 4153 case INDEX_op_ld16u_i32: 4154 case INDEX_op_ld16s_i32: 4155 case INDEX_op_ld_i32: 4156 case INDEX_op_ctpop_i32: 4157 case INDEX_op_neg_i32: 4158 case INDEX_op_not_i32: 4159 case INDEX_op_ext8s_i32: 4160 case INDEX_op_ext16s_i32: 4161 case INDEX_op_bswap16_i32: 4162 case INDEX_op_bswap32_i32: 4163 case INDEX_op_extract_i32: 4164 case INDEX_op_ld8u_i64: 4165 case INDEX_op_ld8s_i64: 4166 case INDEX_op_ld16u_i64: 4167 case INDEX_op_ld16s_i64: 4168 case INDEX_op_ld32u_i64: 4169 case INDEX_op_ld32s_i64: 4170 case INDEX_op_ld_i64: 4171 case INDEX_op_ctpop_i64: 4172 case INDEX_op_neg_i64: 4173 case INDEX_op_not_i64: 4174 case INDEX_op_ext8s_i64: 4175 case INDEX_op_ext16s_i64: 4176 case INDEX_op_ext32s_i64: 4177 case INDEX_op_ext_i32_i64: 4178 case INDEX_op_extu_i32_i64: 4179 case INDEX_op_bswap16_i64: 4180 case INDEX_op_bswap32_i64: 4181 case INDEX_op_bswap64_i64: 4182 case INDEX_op_extract_i64: 4183 return C_O1_I1(r, r); 4184 4185 case INDEX_op_st8_i32: 4186 case INDEX_op_st16_i32: 4187 case INDEX_op_st_i32: 4188 case INDEX_op_st8_i64: 4189 case INDEX_op_st16_i64: 4190 case INDEX_op_st32_i64: 4191 case INDEX_op_st_i64: 4192 return C_O0_I2(r, r); 4193 4194 case INDEX_op_add_i32: 4195 case INDEX_op_and_i32: 4196 case INDEX_op_or_i32: 4197 case INDEX_op_xor_i32: 4198 case INDEX_op_andc_i32: 4199 case INDEX_op_orc_i32: 4200 case INDEX_op_eqv_i32: 4201 case INDEX_op_shl_i32: 4202 case INDEX_op_shr_i32: 4203 case INDEX_op_sar_i32: 4204 case INDEX_op_rotl_i32: 4205 case INDEX_op_rotr_i32: 4206 case INDEX_op_and_i64: 4207 case INDEX_op_andc_i64: 4208 case INDEX_op_shl_i64: 4209 case INDEX_op_shr_i64: 4210 case INDEX_op_sar_i64: 4211 case INDEX_op_rotl_i64: 4212 case INDEX_op_rotr_i64: 4213 return C_O1_I2(r, r, ri); 4214 4215 case INDEX_op_mul_i32: 4216 case INDEX_op_mul_i64: 4217 return C_O1_I2(r, r, rI); 4218 4219 case INDEX_op_div_i32: 4220 case INDEX_op_divu_i32: 4221 case INDEX_op_rem_i32: 4222 case INDEX_op_remu_i32: 4223 case INDEX_op_nand_i32: 4224 case INDEX_op_nor_i32: 4225 case INDEX_op_muluh_i32: 4226 case INDEX_op_mulsh_i32: 4227 case INDEX_op_orc_i64: 4228 case INDEX_op_eqv_i64: 4229 case INDEX_op_nand_i64: 4230 case INDEX_op_nor_i64: 4231 case INDEX_op_div_i64: 4232 case INDEX_op_divu_i64: 4233 case INDEX_op_rem_i64: 4234 case INDEX_op_remu_i64: 4235 case INDEX_op_mulsh_i64: 4236 case INDEX_op_muluh_i64: 4237 return C_O1_I2(r, r, r); 4238 4239 case INDEX_op_sub_i32: 4240 return C_O1_I2(r, rI, ri); 4241 case INDEX_op_add_i64: 4242 return C_O1_I2(r, r, rT); 4243 case INDEX_op_or_i64: 4244 case INDEX_op_xor_i64: 4245 return C_O1_I2(r, r, rU); 4246 case INDEX_op_sub_i64: 4247 return C_O1_I2(r, rI, rT); 4248 case INDEX_op_clz_i32: 4249 case INDEX_op_ctz_i32: 4250 case INDEX_op_clz_i64: 4251 case INDEX_op_ctz_i64: 4252 return C_O1_I2(r, r, rZW); 4253 4254 case INDEX_op_brcond_i32: 4255 case INDEX_op_brcond_i64: 4256 return C_O0_I2(r, rC); 4257 case INDEX_op_setcond_i32: 4258 case INDEX_op_setcond_i64: 4259 case INDEX_op_negsetcond_i32: 4260 case INDEX_op_negsetcond_i64: 4261 return C_O1_I2(r, r, rC); 4262 case INDEX_op_movcond_i32: 4263 case INDEX_op_movcond_i64: 4264 return C_O1_I4(r, r, rC, rZ, rZ); 4265 4266 case INDEX_op_deposit_i32: 4267 case INDEX_op_deposit_i64: 4268 return C_O1_I2(r, 0, rZ); 4269 case INDEX_op_brcond2_i32: 4270 return C_O0_I4(r, r, ri, ri); 4271 case INDEX_op_setcond2_i32: 4272 return C_O1_I4(r, r, r, ri, ri); 4273 case INDEX_op_add2_i64: 4274 case INDEX_op_add2_i32: 4275 return C_O2_I4(r, r, r, r, rI, rZM); 4276 case INDEX_op_sub2_i64: 4277 case INDEX_op_sub2_i32: 4278 return C_O2_I4(r, r, rI, rZM, r, r); 4279 4280 case INDEX_op_qemu_ld_a32_i32: 4281 return C_O1_I1(r, r); 4282 case INDEX_op_qemu_ld_a64_i32: 4283 return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r); 4284 case INDEX_op_qemu_ld_a32_i64: 4285 return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r); 4286 case INDEX_op_qemu_ld_a64_i64: 4287 return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r); 4288 4289 case INDEX_op_qemu_st_a32_i32: 4290 return C_O0_I2(r, r); 4291 case INDEX_op_qemu_st_a64_i32: 4292 return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r); 4293 case INDEX_op_qemu_st_a32_i64: 4294 return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r); 4295 case INDEX_op_qemu_st_a64_i64: 4296 return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r); 4297 4298 case INDEX_op_qemu_ld_a32_i128: 4299 case INDEX_op_qemu_ld_a64_i128: 4300 return C_N1O1_I1(o, m, r); 4301 case INDEX_op_qemu_st_a32_i128: 4302 case INDEX_op_qemu_st_a64_i128: 4303 return C_O0_I3(o, m, r); 4304 4305 case INDEX_op_add_vec: 4306 case INDEX_op_sub_vec: 4307 case INDEX_op_mul_vec: 4308 case INDEX_op_and_vec: 4309 case INDEX_op_or_vec: 4310 case INDEX_op_xor_vec: 4311 case INDEX_op_andc_vec: 4312 case INDEX_op_orc_vec: 4313 case INDEX_op_nor_vec: 4314 case INDEX_op_eqv_vec: 4315 case INDEX_op_nand_vec: 4316 case INDEX_op_cmp_vec: 4317 case INDEX_op_ssadd_vec: 4318 case INDEX_op_sssub_vec: 4319 case INDEX_op_usadd_vec: 4320 case INDEX_op_ussub_vec: 4321 case INDEX_op_smax_vec: 4322 case INDEX_op_smin_vec: 4323 case INDEX_op_umax_vec: 4324 case INDEX_op_umin_vec: 4325 case INDEX_op_shlv_vec: 4326 case INDEX_op_shrv_vec: 4327 case INDEX_op_sarv_vec: 4328 case INDEX_op_rotlv_vec: 4329 case INDEX_op_rotrv_vec: 4330 case INDEX_op_ppc_mrgh_vec: 4331 case INDEX_op_ppc_mrgl_vec: 4332 case INDEX_op_ppc_muleu_vec: 4333 case INDEX_op_ppc_mulou_vec: 4334 case INDEX_op_ppc_pkum_vec: 4335 case INDEX_op_dup2_vec: 4336 return C_O1_I2(v, v, v); 4337 4338 case INDEX_op_not_vec: 4339 case INDEX_op_neg_vec: 4340 return C_O1_I1(v, v); 4341 4342 case INDEX_op_dup_vec: 4343 return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v); 4344 4345 case INDEX_op_ld_vec: 4346 case INDEX_op_dupm_vec: 4347 return C_O1_I1(v, r); 4348 4349 case INDEX_op_st_vec: 4350 return C_O0_I2(v, r); 4351 4352 case INDEX_op_bitsel_vec: 4353 case INDEX_op_ppc_msum_vec: 4354 return C_O1_I3(v, v, v, v); 4355 case INDEX_op_cmpsel_vec: 4356 return C_O1_I4(v, v, v, vZM, v); 4357 4358 default: 4359 g_assert_not_reached(); 4360 } 4361} 4362 4363static void tcg_target_init(TCGContext *s) 4364{ 4365 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; 4366 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; 4367 if (have_altivec) { 4368 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 4369 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 4370 } 4371 4372 tcg_target_call_clobber_regs = 0; 4373 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0); 4374 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2); 4375 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3); 4376 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4); 4377 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5); 4378 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6); 4379 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7); 4380 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); 4381 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); 4382 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); 4383 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); 4384 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12); 4385 4386 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0); 4387 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1); 4388 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2); 4389 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3); 4390 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4); 4391 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5); 4392 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6); 4393 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7); 4394 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 4395 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 4396 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 4397 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 4398 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 4399 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 4400 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 4401 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 4402 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16); 4403 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17); 4404 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18); 4405 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19); 4406 4407 s->reserved_regs = 0; 4408 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */ 4409 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */ 4410#if defined(_CALL_SYSV) 4411 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */ 4412#endif 4413#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64 4414 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ 4415#endif 4416 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); 4417 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); 4418 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1); 4419 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2); 4420 if (USE_REG_TB) { 4421 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); /* tb->tc_ptr */ 4422 } 4423} 4424 4425#ifdef __ELF__ 4426typedef struct { 4427 DebugFrameCIE cie; 4428 DebugFrameFDEHeader fde; 4429 uint8_t fde_def_cfa[4]; 4430 uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3]; 4431} DebugFrame; 4432 4433/* We're expecting a 2 byte uleb128 encoded value. */ 4434QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 4435 4436#if TCG_TARGET_REG_BITS == 64 4437# define ELF_HOST_MACHINE EM_PPC64 4438#else 4439# define ELF_HOST_MACHINE EM_PPC 4440#endif 4441 4442static DebugFrame debug_frame = { 4443 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 4444 .cie.id = -1, 4445 .cie.version = 1, 4446 .cie.code_align = 1, 4447 .cie.data_align = (-SZR & 0x7f), /* sleb128 -SZR */ 4448 .cie.return_column = 65, 4449 4450 /* Total FDE size does not include the "len" member. */ 4451 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), 4452 4453 .fde_def_cfa = { 4454 12, TCG_REG_R1, /* DW_CFA_def_cfa r1, ... */ 4455 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 4456 (FRAME_SIZE >> 7) 4457 }, 4458 .fde_reg_ofs = { 4459 /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */ 4460 0x11, 65, (LR_OFFSET / -SZR) & 0x7f, 4461 } 4462}; 4463 4464void tcg_register_jit(const void *buf, size_t buf_size) 4465{ 4466 uint8_t *p = &debug_frame.fde_reg_ofs[3]; 4467 int i; 4468 4469 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) { 4470 p[0] = 0x80 + tcg_target_callee_save_regs[i]; 4471 p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR; 4472 } 4473 4474 debug_frame.fde.func_start = (uintptr_t)buf; 4475 debug_frame.fde.func_len = buf_size; 4476 4477 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 4478} 4479#endif /* __ELF__ */ 4480#undef VMULEUB 4481#undef VMULEUH 4482#undef VMULEUW 4483#undef VMULOUB 4484#undef VMULOUH 4485#undef VMULOUW 4486#undef VMSUMUHM 4487