1/* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25#include "elf.h" 26#include "../tcg-pool.c.inc" 27#include "../tcg-ldst.c.inc" 28 29/* 30 * Standardize on the _CALL_FOO symbols used by GCC: 31 * Apple XCode does not define _CALL_DARWIN. 32 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV or _CALL_AIX. 33 */ 34#if TCG_TARGET_REG_BITS == 64 35# ifdef _CALL_AIX 36 /* ok */ 37# elif defined(_CALL_ELF) && _CALL_ELF == 1 38# define _CALL_AIX 39# elif defined(_CALL_ELF) && _CALL_ELF == 2 40 /* ok */ 41# else 42# error "Unknown ABI" 43# endif 44#else 45# if defined(_CALL_SYSV) || defined(_CALL_DARWIN) 46 /* ok */ 47# elif defined(__APPLE__) 48# define _CALL_DARWIN 49# elif defined(__ELF__) 50# define _CALL_SYSV 51# else 52# error "Unknown ABI" 53# endif 54#endif 55 56#if TCG_TARGET_REG_BITS == 64 57# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND 58# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL 59#else 60# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL 61# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF 62#endif 63#ifdef _CALL_SYSV 64# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN 65# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF 66#else 67# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL 68# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL 69#endif 70 71/* For some memory operations, we need a scratch that isn't R0. For the AIX 72 calling convention, we can re-use the TOC register since we'll be reloading 73 it at every call. Otherwise R12 will do nicely as neither a call-saved 74 register nor a parameter register. */ 75#ifdef _CALL_AIX 76# define TCG_REG_TMP1 TCG_REG_R2 77#else 78# define TCG_REG_TMP1 TCG_REG_R12 79#endif 80#define TCG_REG_TMP2 TCG_REG_R11 81 82#define TCG_VEC_TMP1 TCG_REG_V0 83#define TCG_VEC_TMP2 TCG_REG_V1 84 85#define TCG_REG_TB TCG_REG_R31 86#define USE_REG_TB (TCG_TARGET_REG_BITS == 64 && !have_isa_3_00) 87 88/* Shorthand for size of a pointer. Avoid promotion to unsigned. */ 89#define SZP ((int)sizeof(void *)) 90 91/* Shorthand for size of a register. */ 92#define SZR (TCG_TARGET_REG_BITS / 8) 93 94#define TCG_CT_CONST_S16 0x100 95#define TCG_CT_CONST_S32 0x400 96#define TCG_CT_CONST_U32 0x800 97#define TCG_CT_CONST_ZERO 0x1000 98#define TCG_CT_CONST_MONE 0x2000 99#define TCG_CT_CONST_WSZ 0x4000 100 101#define ALL_GENERAL_REGS 0xffffffffu 102#define ALL_VECTOR_REGS 0xffffffff00000000ull 103 104#ifndef R_PPC64_PCREL34 105#define R_PPC64_PCREL34 132 106#endif 107 108#define have_isel (cpuinfo & CPUINFO_ISEL) 109 110#define TCG_GUEST_BASE_REG TCG_REG_R30 111 112#ifdef CONFIG_DEBUG_TCG 113static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = { 114 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", 115 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", 116 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", 117 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", 118 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 119 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 120 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 121 "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", 122}; 123#endif 124 125static const int tcg_target_reg_alloc_order[] = { 126 TCG_REG_R14, /* call saved registers */ 127 TCG_REG_R15, 128 TCG_REG_R16, 129 TCG_REG_R17, 130 TCG_REG_R18, 131 TCG_REG_R19, 132 TCG_REG_R20, 133 TCG_REG_R21, 134 TCG_REG_R22, 135 TCG_REG_R23, 136 TCG_REG_R24, 137 TCG_REG_R25, 138 TCG_REG_R26, 139 TCG_REG_R27, 140 TCG_REG_R28, 141 TCG_REG_R29, 142 TCG_REG_R30, 143 TCG_REG_R31, 144 TCG_REG_R12, /* call clobbered, non-arguments */ 145 TCG_REG_R11, 146 TCG_REG_R2, 147 TCG_REG_R13, 148 TCG_REG_R10, /* call clobbered, arguments */ 149 TCG_REG_R9, 150 TCG_REG_R8, 151 TCG_REG_R7, 152 TCG_REG_R6, 153 TCG_REG_R5, 154 TCG_REG_R4, 155 TCG_REG_R3, 156 157 /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */ 158 TCG_REG_V2, /* call clobbered, vectors */ 159 TCG_REG_V3, 160 TCG_REG_V4, 161 TCG_REG_V5, 162 TCG_REG_V6, 163 TCG_REG_V7, 164 TCG_REG_V8, 165 TCG_REG_V9, 166 TCG_REG_V10, 167 TCG_REG_V11, 168 TCG_REG_V12, 169 TCG_REG_V13, 170 TCG_REG_V14, 171 TCG_REG_V15, 172 TCG_REG_V16, 173 TCG_REG_V17, 174 TCG_REG_V18, 175 TCG_REG_V19, 176}; 177 178static const int tcg_target_call_iarg_regs[] = { 179 TCG_REG_R3, 180 TCG_REG_R4, 181 TCG_REG_R5, 182 TCG_REG_R6, 183 TCG_REG_R7, 184 TCG_REG_R8, 185 TCG_REG_R9, 186 TCG_REG_R10 187}; 188 189static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) 190{ 191 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); 192 tcg_debug_assert(slot >= 0 && slot <= 1); 193 return TCG_REG_R3 + slot; 194} 195 196static const int tcg_target_callee_save_regs[] = { 197#ifdef _CALL_DARWIN 198 TCG_REG_R11, 199#endif 200 TCG_REG_R14, 201 TCG_REG_R15, 202 TCG_REG_R16, 203 TCG_REG_R17, 204 TCG_REG_R18, 205 TCG_REG_R19, 206 TCG_REG_R20, 207 TCG_REG_R21, 208 TCG_REG_R22, 209 TCG_REG_R23, 210 TCG_REG_R24, 211 TCG_REG_R25, 212 TCG_REG_R26, 213 TCG_REG_R27, /* currently used for the global env */ 214 TCG_REG_R28, 215 TCG_REG_R29, 216 TCG_REG_R30, 217 TCG_REG_R31 218}; 219 220/* For PPC, we use TB+4 instead of TB as the base. */ 221static inline ptrdiff_t ppc_tbrel_diff(TCGContext *s, const void *target) 222{ 223 return tcg_tbrel_diff(s, target) - 4; 224} 225 226static inline bool in_range_b(tcg_target_long target) 227{ 228 return target == sextract64(target, 0, 26); 229} 230 231static uint32_t reloc_pc24_val(const tcg_insn_unit *pc, 232 const tcg_insn_unit *target) 233{ 234 ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); 235 tcg_debug_assert(in_range_b(disp)); 236 return disp & 0x3fffffc; 237} 238 239static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 240{ 241 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 242 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); 243 244 if (in_range_b(disp)) { 245 *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc); 246 return true; 247 } 248 return false; 249} 250 251static uint16_t reloc_pc14_val(const tcg_insn_unit *pc, 252 const tcg_insn_unit *target) 253{ 254 ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); 255 tcg_debug_assert(disp == (int16_t) disp); 256 return disp & 0xfffc; 257} 258 259static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 260{ 261 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 262 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); 263 264 if (disp == (int16_t) disp) { 265 *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc); 266 return true; 267 } 268 return false; 269} 270 271static bool reloc_pc34(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 272{ 273 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 274 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); 275 276 if (disp == sextract64(disp, 0, 34)) { 277 src_rw[0] = (src_rw[0] & ~0x3ffff) | ((disp >> 16) & 0x3ffff); 278 src_rw[1] = (src_rw[1] & ~0xffff) | (disp & 0xffff); 279 return true; 280 } 281 return false; 282} 283 284/* test if a constant matches the constraint */ 285static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece) 286{ 287 if (ct & TCG_CT_CONST) { 288 return 1; 289 } 290 291 /* The only 32-bit constraint we use aside from 292 TCG_CT_CONST is TCG_CT_CONST_S16. */ 293 if (type == TCG_TYPE_I32) { 294 val = (int32_t)val; 295 } 296 297 if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { 298 return 1; 299 } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { 300 return 1; 301 } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { 302 return 1; 303 } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 304 return 1; 305 } else if ((ct & TCG_CT_CONST_MONE) && val == -1) { 306 return 1; 307 } else if ((ct & TCG_CT_CONST_WSZ) 308 && val == (type == TCG_TYPE_I32 ? 32 : 64)) { 309 return 1; 310 } 311 return 0; 312} 313 314#define OPCD(opc) ((opc)<<26) 315#define XO19(opc) (OPCD(19)|((opc)<<1)) 316#define MD30(opc) (OPCD(30)|((opc)<<2)) 317#define MDS30(opc) (OPCD(30)|((opc)<<1)) 318#define XO31(opc) (OPCD(31)|((opc)<<1)) 319#define XO58(opc) (OPCD(58)|(opc)) 320#define XO62(opc) (OPCD(62)|(opc)) 321#define VX4(opc) (OPCD(4)|(opc)) 322 323#define B OPCD( 18) 324#define BC OPCD( 16) 325 326#define LBZ OPCD( 34) 327#define LHZ OPCD( 40) 328#define LHA OPCD( 42) 329#define LWZ OPCD( 32) 330#define LWZUX XO31( 55) 331#define LD XO58( 0) 332#define LDX XO31( 21) 333#define LDU XO58( 1) 334#define LDUX XO31( 53) 335#define LWA XO58( 2) 336#define LWAX XO31(341) 337#define LQ OPCD( 56) 338 339#define STB OPCD( 38) 340#define STH OPCD( 44) 341#define STW OPCD( 36) 342#define STD XO62( 0) 343#define STDU XO62( 1) 344#define STDX XO31(149) 345#define STQ XO62( 2) 346 347#define PLWA OPCD( 41) 348#define PLD OPCD( 57) 349#define PLXSD OPCD( 42) 350#define PLXV OPCD(25 * 2 + 1) /* force tx=1 */ 351 352#define PSTD OPCD( 61) 353#define PSTXSD OPCD( 46) 354#define PSTXV OPCD(27 * 2 + 1) /* force sx=1 */ 355 356#define ADDIC OPCD( 12) 357#define ADDI OPCD( 14) 358#define ADDIS OPCD( 15) 359#define ORI OPCD( 24) 360#define ORIS OPCD( 25) 361#define XORI OPCD( 26) 362#define XORIS OPCD( 27) 363#define ANDI OPCD( 28) 364#define ANDIS OPCD( 29) 365#define MULLI OPCD( 7) 366#define CMPLI OPCD( 10) 367#define CMPI OPCD( 11) 368#define SUBFIC OPCD( 8) 369 370#define LWZU OPCD( 33) 371#define STWU OPCD( 37) 372 373#define RLWIMI OPCD( 20) 374#define RLWINM OPCD( 21) 375#define RLWNM OPCD( 23) 376 377#define RLDICL MD30( 0) 378#define RLDICR MD30( 1) 379#define RLDIMI MD30( 3) 380#define RLDCL MDS30( 8) 381 382#define BCLR XO19( 16) 383#define BCCTR XO19(528) 384#define CRAND XO19(257) 385#define CRANDC XO19(129) 386#define CRNAND XO19(225) 387#define CROR XO19(449) 388#define CRNOR XO19( 33) 389#define ADDPCIS XO19( 2) 390 391#define EXTSB XO31(954) 392#define EXTSH XO31(922) 393#define EXTSW XO31(986) 394#define ADD XO31(266) 395#define ADDE XO31(138) 396#define ADDME XO31(234) 397#define ADDZE XO31(202) 398#define ADDC XO31( 10) 399#define AND XO31( 28) 400#define SUBF XO31( 40) 401#define SUBFC XO31( 8) 402#define SUBFE XO31(136) 403#define SUBFME XO31(232) 404#define SUBFZE XO31(200) 405#define OR XO31(444) 406#define XOR XO31(316) 407#define MULLW XO31(235) 408#define MULHW XO31( 75) 409#define MULHWU XO31( 11) 410#define DIVW XO31(491) 411#define DIVWU XO31(459) 412#define MODSW XO31(779) 413#define MODUW XO31(267) 414#define CMP XO31( 0) 415#define CMPL XO31( 32) 416#define LHBRX XO31(790) 417#define LWBRX XO31(534) 418#define LDBRX XO31(532) 419#define STHBRX XO31(918) 420#define STWBRX XO31(662) 421#define STDBRX XO31(660) 422#define MFSPR XO31(339) 423#define MTSPR XO31(467) 424#define SRAWI XO31(824) 425#define NEG XO31(104) 426#define MFCR XO31( 19) 427#define MFOCRF (MFCR | (1u << 20)) 428#define NOR XO31(124) 429#define CNTLZW XO31( 26) 430#define CNTLZD XO31( 58) 431#define CNTTZW XO31(538) 432#define CNTTZD XO31(570) 433#define CNTPOPW XO31(378) 434#define CNTPOPD XO31(506) 435#define ANDC XO31( 60) 436#define ORC XO31(412) 437#define EQV XO31(284) 438#define NAND XO31(476) 439#define ISEL XO31( 15) 440 441#define MULLD XO31(233) 442#define MULHD XO31( 73) 443#define MULHDU XO31( 9) 444#define DIVD XO31(489) 445#define DIVDU XO31(457) 446#define MODSD XO31(777) 447#define MODUD XO31(265) 448 449#define LBZX XO31( 87) 450#define LHZX XO31(279) 451#define LHAX XO31(343) 452#define LWZX XO31( 23) 453#define STBX XO31(215) 454#define STHX XO31(407) 455#define STWX XO31(151) 456 457#define EIEIO XO31(854) 458#define HWSYNC XO31(598) 459#define LWSYNC (HWSYNC | (1u << 21)) 460 461#define SPR(a, b) ((((a)<<5)|(b))<<11) 462#define LR SPR(8, 0) 463#define CTR SPR(9, 0) 464 465#define SLW XO31( 24) 466#define SRW XO31(536) 467#define SRAW XO31(792) 468 469#define SLD XO31( 27) 470#define SRD XO31(539) 471#define SRAD XO31(794) 472#define SRADI XO31(413<<1) 473 474#define BRH XO31(219) 475#define BRW XO31(155) 476#define BRD XO31(187) 477 478#define TW XO31( 4) 479#define TRAP (TW | TO(31)) 480 481#define SETBC XO31(384) /* v3.10 */ 482#define SETBCR XO31(416) /* v3.10 */ 483#define SETNBC XO31(448) /* v3.10 */ 484#define SETNBCR XO31(480) /* v3.10 */ 485 486#define NOP ORI /* ori 0,0,0 */ 487 488#define LVX XO31(103) 489#define LVEBX XO31(7) 490#define LVEHX XO31(39) 491#define LVEWX XO31(71) 492#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */ 493#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */ 494#define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */ 495#define LXV (OPCD(61) | 8 | 1) /* v3.00, force tx=1 */ 496#define LXSD (OPCD(57) | 2) /* v3.00 */ 497#define LXVWSX (XO31(364) | 1) /* v3.00, force tx=1 */ 498 499#define STVX XO31(231) 500#define STVEWX XO31(199) 501#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */ 502#define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */ 503#define STXV (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */ 504#define STXSD (OPCD(61) | 2) /* v3.00 */ 505 506#define VADDSBS VX4(768) 507#define VADDUBS VX4(512) 508#define VADDUBM VX4(0) 509#define VADDSHS VX4(832) 510#define VADDUHS VX4(576) 511#define VADDUHM VX4(64) 512#define VADDSWS VX4(896) 513#define VADDUWS VX4(640) 514#define VADDUWM VX4(128) 515#define VADDUDM VX4(192) /* v2.07 */ 516 517#define VSUBSBS VX4(1792) 518#define VSUBUBS VX4(1536) 519#define VSUBUBM VX4(1024) 520#define VSUBSHS VX4(1856) 521#define VSUBUHS VX4(1600) 522#define VSUBUHM VX4(1088) 523#define VSUBSWS VX4(1920) 524#define VSUBUWS VX4(1664) 525#define VSUBUWM VX4(1152) 526#define VSUBUDM VX4(1216) /* v2.07 */ 527 528#define VNEGW (VX4(1538) | (6 << 16)) /* v3.00 */ 529#define VNEGD (VX4(1538) | (7 << 16)) /* v3.00 */ 530 531#define VMAXSB VX4(258) 532#define VMAXSH VX4(322) 533#define VMAXSW VX4(386) 534#define VMAXSD VX4(450) /* v2.07 */ 535#define VMAXUB VX4(2) 536#define VMAXUH VX4(66) 537#define VMAXUW VX4(130) 538#define VMAXUD VX4(194) /* v2.07 */ 539#define VMINSB VX4(770) 540#define VMINSH VX4(834) 541#define VMINSW VX4(898) 542#define VMINSD VX4(962) /* v2.07 */ 543#define VMINUB VX4(514) 544#define VMINUH VX4(578) 545#define VMINUW VX4(642) 546#define VMINUD VX4(706) /* v2.07 */ 547 548#define VCMPEQUB VX4(6) 549#define VCMPEQUH VX4(70) 550#define VCMPEQUW VX4(134) 551#define VCMPEQUD VX4(199) /* v2.07 */ 552#define VCMPGTSB VX4(774) 553#define VCMPGTSH VX4(838) 554#define VCMPGTSW VX4(902) 555#define VCMPGTSD VX4(967) /* v2.07 */ 556#define VCMPGTUB VX4(518) 557#define VCMPGTUH VX4(582) 558#define VCMPGTUW VX4(646) 559#define VCMPGTUD VX4(711) /* v2.07 */ 560#define VCMPNEB VX4(7) /* v3.00 */ 561#define VCMPNEH VX4(71) /* v3.00 */ 562#define VCMPNEW VX4(135) /* v3.00 */ 563 564#define VSLB VX4(260) 565#define VSLH VX4(324) 566#define VSLW VX4(388) 567#define VSLD VX4(1476) /* v2.07 */ 568#define VSRB VX4(516) 569#define VSRH VX4(580) 570#define VSRW VX4(644) 571#define VSRD VX4(1732) /* v2.07 */ 572#define VSRAB VX4(772) 573#define VSRAH VX4(836) 574#define VSRAW VX4(900) 575#define VSRAD VX4(964) /* v2.07 */ 576#define VRLB VX4(4) 577#define VRLH VX4(68) 578#define VRLW VX4(132) 579#define VRLD VX4(196) /* v2.07 */ 580 581#define VMULEUB VX4(520) 582#define VMULEUH VX4(584) 583#define VMULEUW VX4(648) /* v2.07 */ 584#define VMULOUB VX4(8) 585#define VMULOUH VX4(72) 586#define VMULOUW VX4(136) /* v2.07 */ 587#define VMULUWM VX4(137) /* v2.07 */ 588#define VMULLD VX4(457) /* v3.10 */ 589#define VMSUMUHM VX4(38) 590 591#define VMRGHB VX4(12) 592#define VMRGHH VX4(76) 593#define VMRGHW VX4(140) 594#define VMRGLB VX4(268) 595#define VMRGLH VX4(332) 596#define VMRGLW VX4(396) 597 598#define VPKUHUM VX4(14) 599#define VPKUWUM VX4(78) 600 601#define VAND VX4(1028) 602#define VANDC VX4(1092) 603#define VNOR VX4(1284) 604#define VOR VX4(1156) 605#define VXOR VX4(1220) 606#define VEQV VX4(1668) /* v2.07 */ 607#define VNAND VX4(1412) /* v2.07 */ 608#define VORC VX4(1348) /* v2.07 */ 609 610#define VSPLTB VX4(524) 611#define VSPLTH VX4(588) 612#define VSPLTW VX4(652) 613#define VSPLTISB VX4(780) 614#define VSPLTISH VX4(844) 615#define VSPLTISW VX4(908) 616 617#define VSLDOI VX4(44) 618 619#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */ 620#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */ 621#define XXSPLTIB (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */ 622 623#define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */ 624#define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */ 625#define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */ 626#define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */ 627#define MTVSRDD (XO31(435) | 1) /* v3.00, force tx=1 */ 628#define MTVSRWS (XO31(403) | 1) /* v3.00, force tx=1 */ 629 630#define RT(r) ((r)<<21) 631#define RS(r) ((r)<<21) 632#define RA(r) ((r)<<16) 633#define RB(r) ((r)<<11) 634#define TO(t) ((t)<<21) 635#define SH(s) ((s)<<11) 636#define MB(b) ((b)<<6) 637#define ME(e) ((e)<<1) 638#define BO(o) ((o)<<21) 639#define MB64(b) ((b)<<5) 640#define FXM(b) (1 << (19 - (b))) 641 642#define VRT(r) (((r) & 31) << 21) 643#define VRA(r) (((r) & 31) << 16) 644#define VRB(r) (((r) & 31) << 11) 645#define VRC(r) (((r) & 31) << 6) 646 647#define LK 1 648 649#define TAB(t, a, b) (RT(t) | RA(a) | RB(b)) 650#define SAB(s, a, b) (RS(s) | RA(a) | RB(b)) 651#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff)) 652#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff)) 653 654#define BF(n) ((n)<<23) 655#define BI(n, c) (((c)+((n)*4))<<16) 656#define BT(n, c) (((c)+((n)*4))<<21) 657#define BA(n, c) (((c)+((n)*4))<<16) 658#define BB(n, c) (((c)+((n)*4))<<11) 659#define BC_(n, c) (((c)+((n)*4))<<6) 660 661#define BO_COND_TRUE BO(12) 662#define BO_COND_FALSE BO( 4) 663#define BO_ALWAYS BO(20) 664 665enum { 666 CR_LT, 667 CR_GT, 668 CR_EQ, 669 CR_SO 670}; 671 672static const uint32_t tcg_to_bc[] = { 673 [TCG_COND_EQ] = BC | BI(7, CR_EQ) | BO_COND_TRUE, 674 [TCG_COND_NE] = BC | BI(7, CR_EQ) | BO_COND_FALSE, 675 [TCG_COND_LT] = BC | BI(7, CR_LT) | BO_COND_TRUE, 676 [TCG_COND_GE] = BC | BI(7, CR_LT) | BO_COND_FALSE, 677 [TCG_COND_LE] = BC | BI(7, CR_GT) | BO_COND_FALSE, 678 [TCG_COND_GT] = BC | BI(7, CR_GT) | BO_COND_TRUE, 679 [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE, 680 [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE, 681 [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE, 682 [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE, 683}; 684 685/* The low bit here is set if the RA and RB fields must be inverted. */ 686static const uint32_t tcg_to_isel[] = { 687 [TCG_COND_EQ] = ISEL | BC_(7, CR_EQ), 688 [TCG_COND_NE] = ISEL | BC_(7, CR_EQ) | 1, 689 [TCG_COND_LT] = ISEL | BC_(7, CR_LT), 690 [TCG_COND_GE] = ISEL | BC_(7, CR_LT) | 1, 691 [TCG_COND_LE] = ISEL | BC_(7, CR_GT) | 1, 692 [TCG_COND_GT] = ISEL | BC_(7, CR_GT), 693 [TCG_COND_LTU] = ISEL | BC_(7, CR_LT), 694 [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1, 695 [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1, 696 [TCG_COND_GTU] = ISEL | BC_(7, CR_GT), 697}; 698 699static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 700 intptr_t value, intptr_t addend) 701{ 702 const tcg_insn_unit *target; 703 int16_t lo; 704 int32_t hi; 705 706 value += addend; 707 target = (const tcg_insn_unit *)value; 708 709 switch (type) { 710 case R_PPC_REL14: 711 return reloc_pc14(code_ptr, target); 712 case R_PPC_REL24: 713 return reloc_pc24(code_ptr, target); 714 case R_PPC64_PCREL34: 715 return reloc_pc34(code_ptr, target); 716 case R_PPC_ADDR16: 717 /* 718 * We are (slightly) abusing this relocation type. In particular, 719 * assert that the low 2 bits are zero, and do not modify them. 720 * That way we can use this with LD et al that have opcode bits 721 * in the low 2 bits of the insn. 722 */ 723 if ((value & 3) || value != (int16_t)value) { 724 return false; 725 } 726 *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc); 727 break; 728 case R_PPC_ADDR32: 729 /* 730 * We are abusing this relocation type. Again, this points to 731 * a pair of insns, lis + load. This is an absolute address 732 * relocation for PPC32 so the lis cannot be removed. 733 */ 734 lo = value; 735 hi = value - lo; 736 if (hi + lo != value) { 737 return false; 738 } 739 code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16); 740 code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo); 741 break; 742 default: 743 g_assert_not_reached(); 744 } 745 return true; 746} 747 748/* Ensure that the prefixed instruction does not cross a 64-byte boundary. */ 749static bool tcg_out_need_prefix_align(TCGContext *s) 750{ 751 return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c; 752} 753 754static void tcg_out_prefix_align(TCGContext *s) 755{ 756 if (tcg_out_need_prefix_align(s)) { 757 tcg_out32(s, NOP); 758 } 759} 760 761static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target) 762{ 763 return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0); 764} 765 766/* Output Type 00 Prefix - 8-Byte Load/Store Form (8LS:D) */ 767static void tcg_out_8ls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt, 768 unsigned ra, tcg_target_long imm, bool r) 769{ 770 tcg_insn_unit p, i; 771 772 p = OPCD(1) | (r << 20) | ((imm >> 16) & 0x3ffff); 773 i = opc | TAI(rt, ra, imm); 774 775 tcg_out_prefix_align(s); 776 tcg_out32(s, p); 777 tcg_out32(s, i); 778} 779 780/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */ 781static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt, 782 unsigned ra, tcg_target_long imm, bool r) 783{ 784 tcg_insn_unit p, i; 785 786 p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff); 787 i = opc | TAI(rt, ra, imm); 788 789 tcg_out_prefix_align(s); 790 tcg_out32(s, p); 791 tcg_out32(s, i); 792} 793 794static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, 795 TCGReg base, tcg_target_long offset); 796 797static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 798{ 799 if (ret == arg) { 800 return true; 801 } 802 switch (type) { 803 case TCG_TYPE_I64: 804 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 805 /* fallthru */ 806 case TCG_TYPE_I32: 807 if (ret < TCG_REG_V0) { 808 if (arg < TCG_REG_V0) { 809 tcg_out32(s, OR | SAB(arg, ret, arg)); 810 break; 811 } else if (have_isa_2_07) { 812 tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD) 813 | VRT(arg) | RA(ret)); 814 break; 815 } else { 816 /* Altivec does not support vector->integer moves. */ 817 return false; 818 } 819 } else if (arg < TCG_REG_V0) { 820 if (have_isa_2_07) { 821 tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD) 822 | VRT(ret) | RA(arg)); 823 break; 824 } else { 825 /* Altivec does not support integer->vector moves. */ 826 return false; 827 } 828 } 829 /* fallthru */ 830 case TCG_TYPE_V64: 831 case TCG_TYPE_V128: 832 tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0); 833 tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg)); 834 break; 835 default: 836 g_assert_not_reached(); 837 } 838 return true; 839} 840 841static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs, 842 int sh, int mb) 843{ 844 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 845 sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1); 846 mb = MB64((mb >> 5) | ((mb << 1) & 0x3f)); 847 tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb); 848} 849 850static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs, 851 int sh, int mb, int me) 852{ 853 tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me)); 854} 855 856static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) 857{ 858 tcg_out32(s, EXTSB | RA(dst) | RS(src)); 859} 860 861static void tcg_out_ext8u(TCGContext *s, TCGReg dst, TCGReg src) 862{ 863 tcg_out32(s, ANDI | SAI(src, dst, 0xff)); 864} 865 866static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) 867{ 868 tcg_out32(s, EXTSH | RA(dst) | RS(src)); 869} 870 871static void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src) 872{ 873 tcg_out32(s, ANDI | SAI(src, dst, 0xffff)); 874} 875 876static void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src) 877{ 878 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 879 tcg_out32(s, EXTSW | RA(dst) | RS(src)); 880} 881 882static void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src) 883{ 884 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 885 tcg_out_rld(s, RLDICL, dst, src, 0, 32); 886} 887 888static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dst, TCGReg src) 889{ 890 tcg_out_ext32s(s, dst, src); 891} 892 893static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dst, TCGReg src) 894{ 895 tcg_out_ext32u(s, dst, src); 896} 897 898static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) 899{ 900 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 901 tcg_out_mov(s, TCG_TYPE_I32, rd, rn); 902} 903 904static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c) 905{ 906 tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c); 907} 908 909static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c) 910{ 911 tcg_out_rld(s, RLDICR, dst, src, c, 63 - c); 912} 913 914static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c) 915{ 916 /* Limit immediate shift count lest we create an illegal insn. */ 917 tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31)); 918} 919 920static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c) 921{ 922 tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31); 923} 924 925static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c) 926{ 927 tcg_out_rld(s, RLDICL, dst, src, 64 - c, c); 928} 929 930static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c) 931{ 932 tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2)); 933} 934 935static void tcg_out_addpcis(TCGContext *s, TCGReg dst, intptr_t imm) 936{ 937 uint32_t d0, d1, d2; 938 939 tcg_debug_assert((imm & 0xffff) == 0); 940 tcg_debug_assert(imm == (int32_t)imm); 941 942 d2 = extract32(imm, 16, 1); 943 d1 = extract32(imm, 17, 5); 944 d0 = extract32(imm, 22, 10); 945 tcg_out32(s, ADDPCIS | RT(dst) | (d1 << 16) | (d0 << 6) | d2); 946} 947 948static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags) 949{ 950 TCGReg tmp = dst == src ? TCG_REG_R0 : dst; 951 952 if (have_isa_3_10) { 953 tcg_out32(s, BRH | RA(dst) | RS(src)); 954 if (flags & TCG_BSWAP_OS) { 955 tcg_out_ext16s(s, TCG_TYPE_REG, dst, dst); 956 } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 957 tcg_out_ext16u(s, dst, dst); 958 } 959 return; 960 } 961 962 /* 963 * In the following, 964 * dep(a, b, m) -> (a & ~m) | (b & m) 965 * 966 * Begin with: src = xxxxabcd 967 */ 968 /* tmp = rol32(src, 24) & 0x000000ff = 0000000c */ 969 tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31); 970 /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00) = 000000dc */ 971 tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23); 972 973 if (flags & TCG_BSWAP_OS) { 974 tcg_out_ext16s(s, TCG_TYPE_REG, dst, tmp); 975 } else { 976 tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); 977 } 978} 979 980static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags) 981{ 982 TCGReg tmp = dst == src ? TCG_REG_R0 : dst; 983 984 if (have_isa_3_10) { 985 tcg_out32(s, BRW | RA(dst) | RS(src)); 986 if (flags & TCG_BSWAP_OS) { 987 tcg_out_ext32s(s, dst, dst); 988 } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 989 tcg_out_ext32u(s, dst, dst); 990 } 991 return; 992 } 993 994 /* 995 * Stolen from gcc's builtin_bswap32. 996 * In the following, 997 * dep(a, b, m) -> (a & ~m) | (b & m) 998 * 999 * Begin with: src = xxxxabcd 1000 */ 1001 /* tmp = rol32(src, 8) & 0xffffffff = 0000bcda */ 1002 tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31); 1003 /* tmp = dep(tmp, rol32(src, 24), 0xff000000) = 0000dcda */ 1004 tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7); 1005 /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00) = 0000dcba */ 1006 tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23); 1007 1008 if (flags & TCG_BSWAP_OS) { 1009 tcg_out_ext32s(s, dst, tmp); 1010 } else { 1011 tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); 1012 } 1013} 1014 1015static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src) 1016{ 1017 TCGReg t0 = dst == src ? TCG_REG_R0 : dst; 1018 TCGReg t1 = dst == src ? dst : TCG_REG_R0; 1019 1020 if (have_isa_3_10) { 1021 tcg_out32(s, BRD | RA(dst) | RS(src)); 1022 return; 1023 } 1024 1025 /* 1026 * In the following, 1027 * dep(a, b, m) -> (a & ~m) | (b & m) 1028 * 1029 * Begin with: src = abcdefgh 1030 */ 1031 /* t0 = rol32(src, 8) & 0xffffffff = 0000fghe */ 1032 tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31); 1033 /* t0 = dep(t0, rol32(src, 24), 0xff000000) = 0000hghe */ 1034 tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7); 1035 /* t0 = dep(t0, rol32(src, 24), 0x0000ff00) = 0000hgfe */ 1036 tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23); 1037 1038 /* t0 = rol64(t0, 32) = hgfe0000 */ 1039 tcg_out_rld(s, RLDICL, t0, t0, 32, 0); 1040 /* t1 = rol64(src, 32) = efghabcd */ 1041 tcg_out_rld(s, RLDICL, t1, src, 32, 0); 1042 1043 /* t0 = dep(t0, rol32(t1, 24), 0xffffffff) = hgfebcda */ 1044 tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31); 1045 /* t0 = dep(t0, rol32(t1, 24), 0xff000000) = hgfedcda */ 1046 tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7); 1047 /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00) = hgfedcba */ 1048 tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23); 1049 1050 tcg_out_mov(s, TCG_TYPE_REG, dst, t0); 1051} 1052 1053/* Emit a move into ret of arg, if it can be done in one insn. */ 1054static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg) 1055{ 1056 if (arg == (int16_t)arg) { 1057 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 1058 return true; 1059 } 1060 if (arg == (int32_t)arg && (arg & 0xffff) == 0) { 1061 tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); 1062 return true; 1063 } 1064 return false; 1065} 1066 1067static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, 1068 tcg_target_long arg, bool in_prologue) 1069{ 1070 intptr_t tb_diff; 1071 tcg_target_long tmp; 1072 int shift; 1073 1074 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1075 1076 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { 1077 arg = (int32_t)arg; 1078 } 1079 1080 /* Load 16-bit immediates with one insn. */ 1081 if (tcg_out_movi_one(s, ret, arg)) { 1082 return; 1083 } 1084 1085 /* Load addresses within the TB with one insn. */ 1086 tb_diff = ppc_tbrel_diff(s, (void *)arg); 1087 if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) { 1088 tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff)); 1089 return; 1090 } 1091 1092 /* 1093 * Load values up to 34 bits, and pc-relative addresses, 1094 * with one prefixed insn. 1095 */ 1096 if (have_isa_3_10) { 1097 if (arg == sextract64(arg, 0, 34)) { 1098 /* pli ret,value = paddi ret,0,value,0 */ 1099 tcg_out_mls_d(s, ADDI, ret, 0, arg, 0); 1100 return; 1101 } 1102 1103 tmp = tcg_pcrel_diff_for_prefix(s, (void *)arg); 1104 if (tmp == sextract64(tmp, 0, 34)) { 1105 /* pla ret,value = paddi ret,0,value,1 */ 1106 tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1); 1107 return; 1108 } 1109 } 1110 1111 /* Load 32-bit immediates with two insns. Note that we've already 1112 eliminated bare ADDIS, so we know both insns are required. */ 1113 if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) { 1114 tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); 1115 tcg_out32(s, ORI | SAI(ret, ret, arg)); 1116 return; 1117 } 1118 if (arg == (uint32_t)arg && !(arg & 0x8000)) { 1119 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 1120 tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); 1121 return; 1122 } 1123 1124 /* Load masked 16-bit value. */ 1125 if (arg > 0 && (arg & 0x8000)) { 1126 tmp = arg | 0x7fff; 1127 if ((tmp & (tmp + 1)) == 0) { 1128 int mb = clz64(tmp + 1) + 1; 1129 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 1130 tcg_out_rld(s, RLDICL, ret, ret, 0, mb); 1131 return; 1132 } 1133 } 1134 1135 /* Load common masks with 2 insns. */ 1136 shift = ctz64(arg); 1137 tmp = arg >> shift; 1138 if (tmp == (int16_t)tmp) { 1139 tcg_out32(s, ADDI | TAI(ret, 0, tmp)); 1140 tcg_out_shli64(s, ret, ret, shift); 1141 return; 1142 } 1143 shift = clz64(arg); 1144 if (tcg_out_movi_one(s, ret, arg << shift)) { 1145 tcg_out_shri64(s, ret, ret, shift); 1146 return; 1147 } 1148 1149 /* Load addresses within 2GB with 2 insns. */ 1150 if (have_isa_3_00) { 1151 intptr_t hi = tcg_pcrel_diff(s, (void *)arg) - 4; 1152 int16_t lo = hi; 1153 1154 hi -= lo; 1155 if (hi == (int32_t)hi) { 1156 tcg_out_addpcis(s, TCG_REG_TMP2, hi); 1157 tcg_out32(s, ADDI | TAI(ret, TCG_REG_TMP2, lo)); 1158 return; 1159 } 1160 } 1161 1162 /* Load addresses within 2GB of TB with 2 (or rarely 3) insns. */ 1163 if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) { 1164 tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff); 1165 return; 1166 } 1167 1168 /* Use the constant pool, if possible. */ 1169 if (!in_prologue && USE_REG_TB) { 1170 new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr, 1171 ppc_tbrel_diff(s, NULL)); 1172 tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0)); 1173 return; 1174 } 1175 if (have_isa_3_10) { 1176 tcg_out_8ls_d(s, PLD, ret, 0, 0, 1); 1177 new_pool_label(s, arg, R_PPC64_PCREL34, s->code_ptr - 2, 0); 1178 return; 1179 } 1180 if (have_isa_3_00) { 1181 tcg_out_addpcis(s, TCG_REG_TMP2, 0); 1182 new_pool_label(s, arg, R_PPC_REL14, s->code_ptr, 0); 1183 tcg_out32(s, LD | TAI(ret, TCG_REG_TMP2, 0)); 1184 return; 1185 } 1186 1187 tmp = arg >> 31 >> 1; 1188 tcg_out_movi(s, TCG_TYPE_I32, ret, tmp); 1189 if (tmp) { 1190 tcg_out_shli64(s, ret, ret, 32); 1191 } 1192 if (arg & 0xffff0000) { 1193 tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); 1194 } 1195 if (arg & 0xffff) { 1196 tcg_out32(s, ORI | SAI(ret, ret, arg)); 1197 } 1198} 1199 1200static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 1201 TCGReg ret, int64_t val) 1202{ 1203 uint32_t load_insn; 1204 int rel, low; 1205 intptr_t add; 1206 1207 switch (vece) { 1208 case MO_8: 1209 low = (int8_t)val; 1210 if (low >= -16 && low < 16) { 1211 tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16)); 1212 return; 1213 } 1214 if (have_isa_3_00) { 1215 tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11)); 1216 return; 1217 } 1218 break; 1219 1220 case MO_16: 1221 low = (int16_t)val; 1222 if (low >= -16 && low < 16) { 1223 tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16)); 1224 return; 1225 } 1226 break; 1227 1228 case MO_32: 1229 low = (int32_t)val; 1230 if (low >= -16 && low < 16) { 1231 tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16)); 1232 return; 1233 } 1234 break; 1235 } 1236 1237 /* 1238 * Otherwise we must load the value from the constant pool. 1239 */ 1240 if (USE_REG_TB) { 1241 rel = R_PPC_ADDR16; 1242 add = ppc_tbrel_diff(s, NULL); 1243 } else if (have_isa_3_10) { 1244 if (type == TCG_TYPE_V64) { 1245 tcg_out_8ls_d(s, PLXSD, ret & 31, 0, 0, 1); 1246 new_pool_label(s, val, R_PPC64_PCREL34, s->code_ptr - 2, 0); 1247 } else { 1248 tcg_out_8ls_d(s, PLXV, ret & 31, 0, 0, 1); 1249 new_pool_l2(s, R_PPC64_PCREL34, s->code_ptr - 2, 0, val, val); 1250 } 1251 return; 1252 } else if (have_isa_3_00) { 1253 tcg_out_addpcis(s, TCG_REG_TMP1, 0); 1254 rel = R_PPC_REL14; 1255 add = 0; 1256 } else { 1257 rel = R_PPC_ADDR32; 1258 add = 0; 1259 } 1260 1261 if (have_vsx) { 1262 load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX; 1263 load_insn |= VRT(ret) | RB(TCG_REG_TMP1); 1264 if (TCG_TARGET_REG_BITS == 64) { 1265 new_pool_label(s, val, rel, s->code_ptr, add); 1266 } else { 1267 new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val); 1268 } 1269 } else { 1270 load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); 1271 if (TCG_TARGET_REG_BITS == 64) { 1272 new_pool_l2(s, rel, s->code_ptr, add, val, val); 1273 } else { 1274 new_pool_l4(s, rel, s->code_ptr, add, 1275 val >> 32, val, val >> 32, val); 1276 } 1277 } 1278 1279 if (USE_REG_TB) { 1280 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0)); 1281 load_insn |= RA(TCG_REG_TB); 1282 } else if (have_isa_3_00) { 1283 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0)); 1284 } else { 1285 tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0)); 1286 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0)); 1287 } 1288 tcg_out32(s, load_insn); 1289} 1290 1291static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, 1292 tcg_target_long arg) 1293{ 1294 switch (type) { 1295 case TCG_TYPE_I32: 1296 case TCG_TYPE_I64: 1297 tcg_debug_assert(ret < TCG_REG_V0); 1298 tcg_out_movi_int(s, type, ret, arg, false); 1299 break; 1300 1301 default: 1302 g_assert_not_reached(); 1303 } 1304} 1305 1306static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) 1307{ 1308 return false; 1309} 1310 1311static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, 1312 tcg_target_long imm) 1313{ 1314 /* This function is only used for passing structs by reference. */ 1315 g_assert_not_reached(); 1316} 1317 1318static bool mask_operand(uint32_t c, int *mb, int *me) 1319{ 1320 uint32_t lsb, test; 1321 1322 /* Accept a bit pattern like: 1323 0....01....1 1324 1....10....0 1325 0..01..10..0 1326 Keep track of the transitions. */ 1327 if (c == 0 || c == -1) { 1328 return false; 1329 } 1330 test = c; 1331 lsb = test & -test; 1332 test += lsb; 1333 if (test & (test - 1)) { 1334 return false; 1335 } 1336 1337 *me = clz32(lsb); 1338 *mb = test ? clz32(test & -test) + 1 : 0; 1339 return true; 1340} 1341 1342static bool mask64_operand(uint64_t c, int *mb, int *me) 1343{ 1344 uint64_t lsb; 1345 1346 if (c == 0) { 1347 return false; 1348 } 1349 1350 lsb = c & -c; 1351 /* Accept 1..10..0. */ 1352 if (c == -lsb) { 1353 *mb = 0; 1354 *me = clz64(lsb); 1355 return true; 1356 } 1357 /* Accept 0..01..1. */ 1358 if (lsb == 1 && (c & (c + 1)) == 0) { 1359 *mb = clz64(c + 1) + 1; 1360 *me = 63; 1361 return true; 1362 } 1363 return false; 1364} 1365 1366static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1367{ 1368 int mb, me; 1369 1370 if (mask_operand(c, &mb, &me)) { 1371 tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); 1372 } else if ((c & 0xffff) == c) { 1373 tcg_out32(s, ANDI | SAI(src, dst, c)); 1374 return; 1375 } else if ((c & 0xffff0000) == c) { 1376 tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); 1377 return; 1378 } else { 1379 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c); 1380 tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); 1381 } 1382} 1383 1384static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c) 1385{ 1386 int mb, me; 1387 1388 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1389 if (mask64_operand(c, &mb, &me)) { 1390 if (mb == 0) { 1391 tcg_out_rld(s, RLDICR, dst, src, 0, me); 1392 } else { 1393 tcg_out_rld(s, RLDICL, dst, src, 0, mb); 1394 } 1395 } else if ((c & 0xffff) == c) { 1396 tcg_out32(s, ANDI | SAI(src, dst, c)); 1397 return; 1398 } else if ((c & 0xffff0000) == c) { 1399 tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); 1400 return; 1401 } else { 1402 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c); 1403 tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); 1404 } 1405} 1406 1407static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c, 1408 int op_lo, int op_hi) 1409{ 1410 if (c >> 16) { 1411 tcg_out32(s, op_hi | SAI(src, dst, c >> 16)); 1412 src = dst; 1413 } 1414 if (c & 0xffff) { 1415 tcg_out32(s, op_lo | SAI(src, dst, c)); 1416 src = dst; 1417 } 1418} 1419 1420static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1421{ 1422 tcg_out_zori32(s, dst, src, c, ORI, ORIS); 1423} 1424 1425static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1426{ 1427 tcg_out_zori32(s, dst, src, c, XORI, XORIS); 1428} 1429 1430static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target) 1431{ 1432 ptrdiff_t disp = tcg_pcrel_diff(s, target); 1433 if (in_range_b(disp)) { 1434 tcg_out32(s, B | (disp & 0x3fffffc) | mask); 1435 } else { 1436 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target); 1437 tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR); 1438 tcg_out32(s, BCCTR | BO_ALWAYS | mask); 1439 } 1440} 1441 1442static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, 1443 TCGReg base, tcg_target_long offset) 1444{ 1445 tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; 1446 bool is_int_store = false; 1447 TCGReg rs = TCG_REG_TMP1; 1448 1449 switch (opi) { 1450 case LD: case LWA: 1451 align = 3; 1452 /* FALLTHRU */ 1453 default: 1454 if (rt > TCG_REG_R0 && rt < TCG_REG_V0) { 1455 rs = rt; 1456 break; 1457 } 1458 break; 1459 case LXSD: 1460 case STXSD: 1461 align = 3; 1462 break; 1463 case LXV: 1464 case STXV: 1465 align = 15; 1466 break; 1467 case STD: 1468 align = 3; 1469 /* FALLTHRU */ 1470 case STB: case STH: case STW: 1471 is_int_store = true; 1472 break; 1473 } 1474 1475 /* For unaligned or large offsets, use the prefixed form. */ 1476 if (have_isa_3_10 1477 && (offset != (int16_t)offset || (offset & align)) 1478 && offset == sextract64(offset, 0, 34)) { 1479 /* 1480 * Note that the MLS:D insns retain their un-prefixed opcode, 1481 * while the 8LS:D insns use a different opcode space. 1482 */ 1483 switch (opi) { 1484 case LBZ: 1485 case LHZ: 1486 case LHA: 1487 case LWZ: 1488 case STB: 1489 case STH: 1490 case STW: 1491 case ADDI: 1492 tcg_out_mls_d(s, opi, rt, base, offset, 0); 1493 return; 1494 case LWA: 1495 tcg_out_8ls_d(s, PLWA, rt, base, offset, 0); 1496 return; 1497 case LD: 1498 tcg_out_8ls_d(s, PLD, rt, base, offset, 0); 1499 return; 1500 case STD: 1501 tcg_out_8ls_d(s, PSTD, rt, base, offset, 0); 1502 return; 1503 case LXSD: 1504 tcg_out_8ls_d(s, PLXSD, rt & 31, base, offset, 0); 1505 return; 1506 case STXSD: 1507 tcg_out_8ls_d(s, PSTXSD, rt & 31, base, offset, 0); 1508 return; 1509 case LXV: 1510 tcg_out_8ls_d(s, PLXV, rt & 31, base, offset, 0); 1511 return; 1512 case STXV: 1513 tcg_out_8ls_d(s, PSTXV, rt & 31, base, offset, 0); 1514 return; 1515 } 1516 } 1517 1518 /* For unaligned, or very large offsets, use the indexed form. */ 1519 if (offset & align || offset != (int32_t)offset || opi == 0) { 1520 if (rs == base) { 1521 rs = TCG_REG_R0; 1522 } 1523 tcg_debug_assert(!is_int_store || rs != rt); 1524 tcg_out_movi(s, TCG_TYPE_PTR, rs, orig); 1525 tcg_out32(s, opx | TAB(rt & 31, base, rs)); 1526 return; 1527 } 1528 1529 l0 = (int16_t)offset; 1530 offset = (offset - l0) >> 16; 1531 l1 = (int16_t)offset; 1532 1533 if (l1 < 0 && orig >= 0) { 1534 extra = 0x4000; 1535 l1 = (int16_t)(offset - 0x4000); 1536 } 1537 if (l1) { 1538 tcg_out32(s, ADDIS | TAI(rs, base, l1)); 1539 base = rs; 1540 } 1541 if (extra) { 1542 tcg_out32(s, ADDIS | TAI(rs, base, extra)); 1543 base = rs; 1544 } 1545 if (opi != ADDI || base != rt || l0 != 0) { 1546 tcg_out32(s, opi | TAI(rt & 31, base, l0)); 1547 } 1548} 1549 1550static void tcg_out_vsldoi(TCGContext *s, TCGReg ret, 1551 TCGReg va, TCGReg vb, int shb) 1552{ 1553 tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6)); 1554} 1555 1556static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1557 TCGReg base, intptr_t offset) 1558{ 1559 int shift; 1560 1561 switch (type) { 1562 case TCG_TYPE_I32: 1563 if (ret < TCG_REG_V0) { 1564 tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset); 1565 break; 1566 } 1567 if (have_isa_2_07 && have_vsx) { 1568 tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset); 1569 break; 1570 } 1571 tcg_debug_assert((offset & 3) == 0); 1572 tcg_out_mem_long(s, 0, LVEWX, ret, base, offset); 1573 shift = (offset - 4) & 0xc; 1574 if (shift) { 1575 tcg_out_vsldoi(s, ret, ret, ret, shift); 1576 } 1577 break; 1578 case TCG_TYPE_I64: 1579 if (ret < TCG_REG_V0) { 1580 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1581 tcg_out_mem_long(s, LD, LDX, ret, base, offset); 1582 break; 1583 } 1584 /* fallthru */ 1585 case TCG_TYPE_V64: 1586 tcg_debug_assert(ret >= TCG_REG_V0); 1587 if (have_vsx) { 1588 tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX, 1589 ret, base, offset); 1590 break; 1591 } 1592 tcg_debug_assert((offset & 7) == 0); 1593 tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16); 1594 if (offset & 8) { 1595 tcg_out_vsldoi(s, ret, ret, ret, 8); 1596 } 1597 break; 1598 case TCG_TYPE_V128: 1599 tcg_debug_assert(ret >= TCG_REG_V0); 1600 tcg_debug_assert((offset & 15) == 0); 1601 tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0, 1602 LVX, ret, base, offset); 1603 break; 1604 default: 1605 g_assert_not_reached(); 1606 } 1607} 1608 1609static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, 1610 TCGReg base, intptr_t offset) 1611{ 1612 int shift; 1613 1614 switch (type) { 1615 case TCG_TYPE_I32: 1616 if (arg < TCG_REG_V0) { 1617 tcg_out_mem_long(s, STW, STWX, arg, base, offset); 1618 break; 1619 } 1620 if (have_isa_2_07 && have_vsx) { 1621 tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset); 1622 break; 1623 } 1624 assert((offset & 3) == 0); 1625 tcg_debug_assert((offset & 3) == 0); 1626 shift = (offset - 4) & 0xc; 1627 if (shift) { 1628 tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift); 1629 arg = TCG_VEC_TMP1; 1630 } 1631 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); 1632 break; 1633 case TCG_TYPE_I64: 1634 if (arg < TCG_REG_V0) { 1635 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1636 tcg_out_mem_long(s, STD, STDX, arg, base, offset); 1637 break; 1638 } 1639 /* fallthru */ 1640 case TCG_TYPE_V64: 1641 tcg_debug_assert(arg >= TCG_REG_V0); 1642 if (have_vsx) { 1643 tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0, 1644 STXSDX, arg, base, offset); 1645 break; 1646 } 1647 tcg_debug_assert((offset & 7) == 0); 1648 if (offset & 8) { 1649 tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8); 1650 arg = TCG_VEC_TMP1; 1651 } 1652 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); 1653 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4); 1654 break; 1655 case TCG_TYPE_V128: 1656 tcg_debug_assert(arg >= TCG_REG_V0); 1657 tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0, 1658 STVX, arg, base, offset); 1659 break; 1660 default: 1661 g_assert_not_reached(); 1662 } 1663} 1664 1665static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1666 TCGReg base, intptr_t ofs) 1667{ 1668 return false; 1669} 1670 1671static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, 1672 int const_arg2, int cr, TCGType type) 1673{ 1674 int imm; 1675 uint32_t op; 1676 1677 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1678 1679 /* Simplify the comparisons below wrt CMPI. */ 1680 if (type == TCG_TYPE_I32) { 1681 arg2 = (int32_t)arg2; 1682 } 1683 1684 switch (cond) { 1685 case TCG_COND_EQ: 1686 case TCG_COND_NE: 1687 if (const_arg2) { 1688 if ((int16_t) arg2 == arg2) { 1689 op = CMPI; 1690 imm = 1; 1691 break; 1692 } else if ((uint16_t) arg2 == arg2) { 1693 op = CMPLI; 1694 imm = 1; 1695 break; 1696 } 1697 } 1698 op = CMPL; 1699 imm = 0; 1700 break; 1701 1702 case TCG_COND_LT: 1703 case TCG_COND_GE: 1704 case TCG_COND_LE: 1705 case TCG_COND_GT: 1706 if (const_arg2) { 1707 if ((int16_t) arg2 == arg2) { 1708 op = CMPI; 1709 imm = 1; 1710 break; 1711 } 1712 } 1713 op = CMP; 1714 imm = 0; 1715 break; 1716 1717 case TCG_COND_LTU: 1718 case TCG_COND_GEU: 1719 case TCG_COND_LEU: 1720 case TCG_COND_GTU: 1721 if (const_arg2) { 1722 if ((uint16_t) arg2 == arg2) { 1723 op = CMPLI; 1724 imm = 1; 1725 break; 1726 } 1727 } 1728 op = CMPL; 1729 imm = 0; 1730 break; 1731 1732 default: 1733 g_assert_not_reached(); 1734 } 1735 op |= BF(cr) | ((type == TCG_TYPE_I64) << 21); 1736 1737 if (imm) { 1738 tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff)); 1739 } else { 1740 if (const_arg2) { 1741 tcg_out_movi(s, type, TCG_REG_R0, arg2); 1742 arg2 = TCG_REG_R0; 1743 } 1744 tcg_out32(s, op | RA(arg1) | RB(arg2)); 1745 } 1746} 1747 1748static void tcg_out_setcond_eq0(TCGContext *s, TCGType type, 1749 TCGReg dst, TCGReg src, bool neg) 1750{ 1751 if (neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) { 1752 /* 1753 * X != 0 implies X + -1 generates a carry. 1754 * RT = (~X + X) + CA 1755 * = -1 + CA 1756 * = CA ? 0 : -1 1757 */ 1758 tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); 1759 tcg_out32(s, SUBFE | TAB(dst, src, src)); 1760 return; 1761 } 1762 1763 if (type == TCG_TYPE_I32) { 1764 tcg_out32(s, CNTLZW | RS(src) | RA(dst)); 1765 tcg_out_shri32(s, dst, dst, 5); 1766 } else { 1767 tcg_out32(s, CNTLZD | RS(src) | RA(dst)); 1768 tcg_out_shri64(s, dst, dst, 6); 1769 } 1770 if (neg) { 1771 tcg_out32(s, NEG | RT(dst) | RA(dst)); 1772 } 1773} 1774 1775static void tcg_out_setcond_ne0(TCGContext *s, TCGType type, 1776 TCGReg dst, TCGReg src, bool neg) 1777{ 1778 if (!neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) { 1779 /* 1780 * X != 0 implies X + -1 generates a carry. Extra addition 1781 * trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. 1782 */ 1783 tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); 1784 tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src)); 1785 return; 1786 } 1787 tcg_out_setcond_eq0(s, type, dst, src, false); 1788 if (neg) { 1789 tcg_out32(s, ADDI | TAI(dst, dst, -1)); 1790 } else { 1791 tcg_out_xori32(s, dst, dst, 1); 1792 } 1793} 1794 1795static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2, 1796 bool const_arg2) 1797{ 1798 if (const_arg2) { 1799 if ((uint32_t)arg2 == arg2) { 1800 tcg_out_xori32(s, TCG_REG_R0, arg1, arg2); 1801 } else { 1802 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2); 1803 tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0)); 1804 } 1805 } else { 1806 tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2)); 1807 } 1808 return TCG_REG_R0; 1809} 1810 1811static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, 1812 TCGArg arg0, TCGArg arg1, TCGArg arg2, 1813 int const_arg2, bool neg) 1814{ 1815 int sh; 1816 bool inv; 1817 1818 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1819 1820 /* Ignore high bits of a potential constant arg2. */ 1821 if (type == TCG_TYPE_I32) { 1822 arg2 = (uint32_t)arg2; 1823 } 1824 1825 /* With SETBC/SETBCR, we can always implement with 2 insns. */ 1826 if (have_isa_3_10) { 1827 tcg_insn_unit bi, opc; 1828 1829 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); 1830 1831 /* Re-use tcg_to_bc for BI and BO_COND_{TRUE,FALSE}. */ 1832 bi = tcg_to_bc[cond] & (0x1f << 16); 1833 if (tcg_to_bc[cond] & BO(8)) { 1834 opc = neg ? SETNBC : SETBC; 1835 } else { 1836 opc = neg ? SETNBCR : SETBCR; 1837 } 1838 tcg_out32(s, opc | RT(arg0) | bi); 1839 return; 1840 } 1841 1842 /* Handle common and trivial cases before handling anything else. */ 1843 if (arg2 == 0) { 1844 switch (cond) { 1845 case TCG_COND_EQ: 1846 tcg_out_setcond_eq0(s, type, arg0, arg1, neg); 1847 return; 1848 case TCG_COND_NE: 1849 tcg_out_setcond_ne0(s, type, arg0, arg1, neg); 1850 return; 1851 case TCG_COND_GE: 1852 tcg_out32(s, NOR | SAB(arg1, arg0, arg1)); 1853 arg1 = arg0; 1854 /* FALLTHRU */ 1855 case TCG_COND_LT: 1856 /* Extract the sign bit. */ 1857 if (type == TCG_TYPE_I32) { 1858 if (neg) { 1859 tcg_out_sari32(s, arg0, arg1, 31); 1860 } else { 1861 tcg_out_shri32(s, arg0, arg1, 31); 1862 } 1863 } else { 1864 if (neg) { 1865 tcg_out_sari64(s, arg0, arg1, 63); 1866 } else { 1867 tcg_out_shri64(s, arg0, arg1, 63); 1868 } 1869 } 1870 return; 1871 default: 1872 break; 1873 } 1874 } 1875 1876 /* If we have ISEL, we can implement everything with 3 or 4 insns. 1877 All other cases below are also at least 3 insns, so speed up the 1878 code generator by not considering them and always using ISEL. */ 1879 if (have_isel) { 1880 int isel, tab; 1881 1882 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); 1883 1884 isel = tcg_to_isel[cond]; 1885 1886 tcg_out_movi(s, type, arg0, neg ? -1 : 1); 1887 if (isel & 1) { 1888 /* arg0 = (bc ? 0 : 1) */ 1889 tab = TAB(arg0, 0, arg0); 1890 isel &= ~1; 1891 } else { 1892 /* arg0 = (bc ? 1 : 0) */ 1893 tcg_out_movi(s, type, TCG_REG_R0, 0); 1894 tab = TAB(arg0, arg0, TCG_REG_R0); 1895 } 1896 tcg_out32(s, isel | tab); 1897 return; 1898 } 1899 1900 inv = false; 1901 switch (cond) { 1902 case TCG_COND_EQ: 1903 arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); 1904 tcg_out_setcond_eq0(s, type, arg0, arg1, neg); 1905 break; 1906 1907 case TCG_COND_NE: 1908 arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); 1909 tcg_out_setcond_ne0(s, type, arg0, arg1, neg); 1910 break; 1911 1912 case TCG_COND_LE: 1913 case TCG_COND_LEU: 1914 inv = true; 1915 /* fall through */ 1916 case TCG_COND_GT: 1917 case TCG_COND_GTU: 1918 sh = 30; /* CR7 CR_GT */ 1919 goto crtest; 1920 1921 case TCG_COND_GE: 1922 case TCG_COND_GEU: 1923 inv = true; 1924 /* fall through */ 1925 case TCG_COND_LT: 1926 case TCG_COND_LTU: 1927 sh = 29; /* CR7 CR_LT */ 1928 goto crtest; 1929 1930 crtest: 1931 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); 1932 tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); 1933 tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31); 1934 if (neg && inv) { 1935 tcg_out32(s, ADDI | TAI(arg0, arg0, -1)); 1936 } else if (neg) { 1937 tcg_out32(s, NEG | RT(arg0) | RA(arg0)); 1938 } else if (inv) { 1939 tcg_out_xori32(s, arg0, arg0, 1); 1940 } 1941 break; 1942 1943 default: 1944 g_assert_not_reached(); 1945 } 1946} 1947 1948static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l) 1949{ 1950 if (l->has_value) { 1951 bc |= reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr); 1952 } else { 1953 tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0); 1954 } 1955 tcg_out32(s, bc); 1956} 1957 1958static void tcg_out_brcond(TCGContext *s, TCGCond cond, 1959 TCGArg arg1, TCGArg arg2, int const_arg2, 1960 TCGLabel *l, TCGType type) 1961{ 1962 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); 1963 tcg_out_bc(s, tcg_to_bc[cond], l); 1964} 1965 1966static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, 1967 TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1, 1968 TCGArg v2, bool const_c2) 1969{ 1970 /* If for some reason both inputs are zero, don't produce bad code. */ 1971 if (v1 == 0 && v2 == 0) { 1972 tcg_out_movi(s, type, dest, 0); 1973 return; 1974 } 1975 1976 tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type); 1977 1978 if (have_isel) { 1979 int isel = tcg_to_isel[cond]; 1980 1981 /* Swap the V operands if the operation indicates inversion. */ 1982 if (isel & 1) { 1983 int t = v1; 1984 v1 = v2; 1985 v2 = t; 1986 isel &= ~1; 1987 } 1988 /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand. */ 1989 if (v2 == 0) { 1990 tcg_out_movi(s, type, TCG_REG_R0, 0); 1991 } 1992 tcg_out32(s, isel | TAB(dest, v1, v2)); 1993 } else { 1994 if (dest == v2) { 1995 cond = tcg_invert_cond(cond); 1996 v2 = v1; 1997 } else if (dest != v1) { 1998 if (v1 == 0) { 1999 tcg_out_movi(s, type, dest, 0); 2000 } else { 2001 tcg_out_mov(s, type, dest, v1); 2002 } 2003 } 2004 /* Branch forward over one insn */ 2005 tcg_out32(s, tcg_to_bc[cond] | 8); 2006 if (v2 == 0) { 2007 tcg_out_movi(s, type, dest, 0); 2008 } else { 2009 tcg_out_mov(s, type, dest, v2); 2010 } 2011 } 2012} 2013 2014static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc, 2015 TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2) 2016{ 2017 if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) { 2018 tcg_out32(s, opc | RA(a0) | RS(a1)); 2019 } else { 2020 tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type); 2021 /* Note that the only other valid constant for a2 is 0. */ 2022 if (have_isel) { 2023 tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1)); 2024 tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0)); 2025 } else if (!const_a2 && a0 == a2) { 2026 tcg_out32(s, tcg_to_bc[TCG_COND_EQ] | 8); 2027 tcg_out32(s, opc | RA(a0) | RS(a1)); 2028 } else { 2029 tcg_out32(s, opc | RA(a0) | RS(a1)); 2030 tcg_out32(s, tcg_to_bc[TCG_COND_NE] | 8); 2031 if (const_a2) { 2032 tcg_out_movi(s, type, a0, 0); 2033 } else { 2034 tcg_out_mov(s, type, a0, a2); 2035 } 2036 } 2037 } 2038} 2039 2040static void tcg_out_cmp2(TCGContext *s, const TCGArg *args, 2041 const int *const_args) 2042{ 2043 static const struct { uint8_t bit1, bit2; } bits[] = { 2044 [TCG_COND_LT ] = { CR_LT, CR_LT }, 2045 [TCG_COND_LE ] = { CR_LT, CR_GT }, 2046 [TCG_COND_GT ] = { CR_GT, CR_GT }, 2047 [TCG_COND_GE ] = { CR_GT, CR_LT }, 2048 [TCG_COND_LTU] = { CR_LT, CR_LT }, 2049 [TCG_COND_LEU] = { CR_LT, CR_GT }, 2050 [TCG_COND_GTU] = { CR_GT, CR_GT }, 2051 [TCG_COND_GEU] = { CR_GT, CR_LT }, 2052 }; 2053 2054 TCGCond cond = args[4], cond2; 2055 TCGArg al, ah, bl, bh; 2056 int blconst, bhconst; 2057 int op, bit1, bit2; 2058 2059 al = args[0]; 2060 ah = args[1]; 2061 bl = args[2]; 2062 bh = args[3]; 2063 blconst = const_args[2]; 2064 bhconst = const_args[3]; 2065 2066 switch (cond) { 2067 case TCG_COND_EQ: 2068 op = CRAND; 2069 goto do_equality; 2070 case TCG_COND_NE: 2071 op = CRNAND; 2072 do_equality: 2073 tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32); 2074 tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32); 2075 tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); 2076 break; 2077 2078 case TCG_COND_LT: 2079 case TCG_COND_LE: 2080 case TCG_COND_GT: 2081 case TCG_COND_GE: 2082 case TCG_COND_LTU: 2083 case TCG_COND_LEU: 2084 case TCG_COND_GTU: 2085 case TCG_COND_GEU: 2086 bit1 = bits[cond].bit1; 2087 bit2 = bits[cond].bit2; 2088 op = (bit1 != bit2 ? CRANDC : CRAND); 2089 cond2 = tcg_unsigned_cond(cond); 2090 2091 tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32); 2092 tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32); 2093 tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2)); 2094 tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ)); 2095 break; 2096 2097 default: 2098 g_assert_not_reached(); 2099 } 2100} 2101 2102static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, 2103 const int *const_args) 2104{ 2105 tcg_out_cmp2(s, args + 1, const_args + 1); 2106 tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); 2107 tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31); 2108} 2109 2110static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args, 2111 const int *const_args) 2112{ 2113 tcg_out_cmp2(s, args, const_args); 2114 tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5])); 2115} 2116 2117static void tcg_out_mb(TCGContext *s, TCGArg a0) 2118{ 2119 uint32_t insn; 2120 2121 if (a0 & TCG_MO_ST_LD) { 2122 insn = HWSYNC; 2123 } else { 2124 insn = LWSYNC; 2125 } 2126 2127 tcg_out32(s, insn); 2128} 2129 2130static void tcg_out_call_int(TCGContext *s, int lk, 2131 const tcg_insn_unit *target) 2132{ 2133#ifdef _CALL_AIX 2134 /* Look through the descriptor. If the branch is in range, and we 2135 don't have to spend too much effort on building the toc. */ 2136 const void *tgt = ((const void * const *)target)[0]; 2137 uintptr_t toc = ((const uintptr_t *)target)[1]; 2138 intptr_t diff = tcg_pcrel_diff(s, tgt); 2139 2140 if (in_range_b(diff) && toc == (uint32_t)toc) { 2141 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc); 2142 tcg_out_b(s, lk, tgt); 2143 } else { 2144 /* Fold the low bits of the constant into the addresses below. */ 2145 intptr_t arg = (intptr_t)target; 2146 int ofs = (int16_t)arg; 2147 2148 if (ofs + 8 < 0x8000) { 2149 arg -= ofs; 2150 } else { 2151 ofs = 0; 2152 } 2153 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg); 2154 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs); 2155 tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR); 2156 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP); 2157 tcg_out32(s, BCCTR | BO_ALWAYS | lk); 2158 } 2159#elif defined(_CALL_ELF) && _CALL_ELF == 2 2160 intptr_t diff; 2161 2162 /* In the ELFv2 ABI, we have to set up r12 to contain the destination 2163 address, which the callee uses to compute its TOC address. */ 2164 /* FIXME: when the branch is in range, we could avoid r12 load if we 2165 knew that the destination uses the same TOC, and what its local 2166 entry point offset is. */ 2167 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target); 2168 2169 diff = tcg_pcrel_diff(s, target); 2170 if (in_range_b(diff)) { 2171 tcg_out_b(s, lk, target); 2172 } else { 2173 tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR); 2174 tcg_out32(s, BCCTR | BO_ALWAYS | lk); 2175 } 2176#else 2177 tcg_out_b(s, lk, target); 2178#endif 2179} 2180 2181static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 2182 const TCGHelperInfo *info) 2183{ 2184 tcg_out_call_int(s, LK, target); 2185} 2186 2187static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = { 2188 [MO_UB] = LBZX, 2189 [MO_UW] = LHZX, 2190 [MO_UL] = LWZX, 2191 [MO_UQ] = LDX, 2192 [MO_SW] = LHAX, 2193 [MO_SL] = LWAX, 2194 [MO_BSWAP | MO_UB] = LBZX, 2195 [MO_BSWAP | MO_UW] = LHBRX, 2196 [MO_BSWAP | MO_UL] = LWBRX, 2197 [MO_BSWAP | MO_UQ] = LDBRX, 2198}; 2199 2200static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = { 2201 [MO_UB] = STBX, 2202 [MO_UW] = STHX, 2203 [MO_UL] = STWX, 2204 [MO_UQ] = STDX, 2205 [MO_BSWAP | MO_UB] = STBX, 2206 [MO_BSWAP | MO_UW] = STHBRX, 2207 [MO_BSWAP | MO_UL] = STWBRX, 2208 [MO_BSWAP | MO_UQ] = STDBRX, 2209}; 2210 2211static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg) 2212{ 2213 if (arg < 0) { 2214 arg = TCG_REG_TMP1; 2215 } 2216 tcg_out32(s, MFSPR | RT(arg) | LR); 2217 return arg; 2218} 2219 2220/* 2221 * For the purposes of ppc32 sorting 4 input registers into 4 argument 2222 * registers, there is an outside chance we would require 3 temps. 2223 */ 2224static const TCGLdstHelperParam ldst_helper_param = { 2225 .ra_gen = ldst_ra_gen, 2226 .ntmp = 3, 2227 .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 } 2228}; 2229 2230static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 2231{ 2232 MemOp opc = get_memop(lb->oi); 2233 2234 if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 2235 return false; 2236 } 2237 2238 tcg_out_ld_helper_args(s, lb, &ldst_helper_param); 2239 tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]); 2240 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); 2241 2242 tcg_out_b(s, 0, lb->raddr); 2243 return true; 2244} 2245 2246static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 2247{ 2248 MemOp opc = get_memop(lb->oi); 2249 2250 if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 2251 return false; 2252 } 2253 2254 tcg_out_st_helper_args(s, lb, &ldst_helper_param); 2255 tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]); 2256 2257 tcg_out_b(s, 0, lb->raddr); 2258 return true; 2259} 2260 2261typedef struct { 2262 TCGReg base; 2263 TCGReg index; 2264 TCGAtomAlign aa; 2265} HostAddress; 2266 2267bool tcg_target_has_memory_bswap(MemOp memop) 2268{ 2269 TCGAtomAlign aa; 2270 2271 if ((memop & MO_SIZE) <= MO_64) { 2272 return true; 2273 } 2274 2275 /* 2276 * Reject 16-byte memop with 16-byte atomicity, 2277 * but do allow a pair of 64-bit operations. 2278 */ 2279 aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true); 2280 return aa.atom <= MO_64; 2281} 2282 2283/* We expect to use a 16-bit negative offset from ENV. */ 2284#define MIN_TLB_MASK_TABLE_OFS -32768 2285 2286/* 2287 * For system-mode, perform the TLB load and compare. 2288 * For user-mode, perform any required alignment tests. 2289 * In both cases, return a TCGLabelQemuLdst structure if the slow path 2290 * is required and fill in @h with the host address for the fast path. 2291 */ 2292static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, 2293 TCGReg addrlo, TCGReg addrhi, 2294 MemOpIdx oi, bool is_ld) 2295{ 2296 TCGType addr_type = s->addr_type; 2297 TCGLabelQemuLdst *ldst = NULL; 2298 MemOp opc = get_memop(oi); 2299 MemOp a_bits, s_bits; 2300 2301 /* 2302 * Book II, Section 1.4, Single-Copy Atomicity, specifies: 2303 * 2304 * Before 3.0, "An access that is not atomic is performed as a set of 2305 * smaller disjoint atomic accesses. In general, the number and alignment 2306 * of these accesses are implementation-dependent." Thus MO_ATOM_IFALIGN. 2307 * 2308 * As of 3.0, "the non-atomic access is performed as described in 2309 * the corresponding list", which matches MO_ATOM_SUBALIGN. 2310 */ 2311 s_bits = opc & MO_SIZE; 2312 h->aa = atom_and_align_for_opc(s, opc, 2313 have_isa_3_00 ? MO_ATOM_SUBALIGN 2314 : MO_ATOM_IFALIGN, 2315 s_bits == MO_128); 2316 a_bits = h->aa.align; 2317 2318 if (tcg_use_softmmu) { 2319 int mem_index = get_mmuidx(oi); 2320 int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read) 2321 : offsetof(CPUTLBEntry, addr_write); 2322 int fast_off = tlb_mask_table_ofs(s, mem_index); 2323 int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); 2324 int table_off = fast_off + offsetof(CPUTLBDescFast, table); 2325 2326 ldst = new_ldst_label(s); 2327 ldst->is_ld = is_ld; 2328 ldst->oi = oi; 2329 ldst->addrlo_reg = addrlo; 2330 ldst->addrhi_reg = addrhi; 2331 2332 /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ 2333 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off); 2334 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off); 2335 2336 /* Extract the page index, shifted into place for tlb index. */ 2337 if (TCG_TARGET_REG_BITS == 32) { 2338 tcg_out_shri32(s, TCG_REG_R0, addrlo, 2339 s->page_bits - CPU_TLB_ENTRY_BITS); 2340 } else { 2341 tcg_out_shri64(s, TCG_REG_R0, addrlo, 2342 s->page_bits - CPU_TLB_ENTRY_BITS); 2343 } 2344 tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0)); 2345 2346 /* 2347 * Load the (low part) TLB comparator into TMP2. 2348 * For 64-bit host, always load the entire 64-bit slot for simplicity. 2349 * We will ignore the high bits with tcg_out_cmp(..., addr_type). 2350 */ 2351 if (TCG_TARGET_REG_BITS == 64) { 2352 if (cmp_off == 0) { 2353 tcg_out32(s, LDUX | TAB(TCG_REG_TMP2, 2354 TCG_REG_TMP1, TCG_REG_TMP2)); 2355 } else { 2356 tcg_out32(s, ADD | TAB(TCG_REG_TMP1, 2357 TCG_REG_TMP1, TCG_REG_TMP2)); 2358 tcg_out_ld(s, TCG_TYPE_I64, TCG_REG_TMP2, 2359 TCG_REG_TMP1, cmp_off); 2360 } 2361 } else if (cmp_off == 0 && !HOST_BIG_ENDIAN) { 2362 tcg_out32(s, LWZUX | TAB(TCG_REG_TMP2, 2363 TCG_REG_TMP1, TCG_REG_TMP2)); 2364 } else { 2365 tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2)); 2366 tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1, 2367 cmp_off + 4 * HOST_BIG_ENDIAN); 2368 } 2369 2370 /* 2371 * Load the TLB addend for use on the fast path. 2372 * Do this asap to minimize any load use delay. 2373 */ 2374 if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) { 2375 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, 2376 offsetof(CPUTLBEntry, addend)); 2377 } 2378 2379 /* Clear the non-page, non-alignment bits from the address in R0. */ 2380 if (TCG_TARGET_REG_BITS == 32) { 2381 /* 2382 * We don't support unaligned accesses on 32-bits. 2383 * Preserve the bottom bits and thus trigger a comparison 2384 * failure on unaligned accesses. 2385 */ 2386 if (a_bits < s_bits) { 2387 a_bits = s_bits; 2388 } 2389 tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0, 2390 (32 - a_bits) & 31, 31 - s->page_bits); 2391 } else { 2392 TCGReg t = addrlo; 2393 2394 /* 2395 * If the access is unaligned, we need to make sure we fail if we 2396 * cross a page boundary. The trick is to add the access size-1 2397 * to the address before masking the low bits. That will make the 2398 * address overflow to the next page if we cross a page boundary, 2399 * which will then force a mismatch of the TLB compare. 2400 */ 2401 if (a_bits < s_bits) { 2402 unsigned a_mask = (1 << a_bits) - 1; 2403 unsigned s_mask = (1 << s_bits) - 1; 2404 tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask)); 2405 t = TCG_REG_R0; 2406 } 2407 2408 /* Mask the address for the requested alignment. */ 2409 if (addr_type == TCG_TYPE_I32) { 2410 tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0, 2411 (32 - a_bits) & 31, 31 - s->page_bits); 2412 } else if (a_bits == 0) { 2413 tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits); 2414 } else { 2415 tcg_out_rld(s, RLDICL, TCG_REG_R0, t, 2416 64 - s->page_bits, s->page_bits - a_bits); 2417 tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0); 2418 } 2419 } 2420 2421 if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) { 2422 /* Low part comparison into cr7. */ 2423 tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 2424 0, 7, TCG_TYPE_I32); 2425 2426 /* Load the high part TLB comparator into TMP2. */ 2427 tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1, 2428 cmp_off + 4 * !HOST_BIG_ENDIAN); 2429 2430 /* Load addend, deferred for this case. */ 2431 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, 2432 offsetof(CPUTLBEntry, addend)); 2433 2434 /* High part comparison into cr6. */ 2435 tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2, 2436 0, 6, TCG_TYPE_I32); 2437 2438 /* Combine comparisons into cr7. */ 2439 tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); 2440 } else { 2441 /* Full comparison into cr7. */ 2442 tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 2443 0, 7, addr_type); 2444 } 2445 2446 /* Load a pointer into the current opcode w/conditional branch-link. */ 2447 ldst->label_ptr[0] = s->code_ptr; 2448 tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); 2449 2450 h->base = TCG_REG_TMP1; 2451 } else { 2452 if (a_bits) { 2453 ldst = new_ldst_label(s); 2454 ldst->is_ld = is_ld; 2455 ldst->oi = oi; 2456 ldst->addrlo_reg = addrlo; 2457 ldst->addrhi_reg = addrhi; 2458 2459 /* We are expecting a_bits to max out at 7, much lower than ANDI. */ 2460 tcg_debug_assert(a_bits < 16); 2461 tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1)); 2462 2463 ldst->label_ptr[0] = s->code_ptr; 2464 tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK); 2465 } 2466 2467 h->base = guest_base ? TCG_GUEST_BASE_REG : 0; 2468 } 2469 2470 if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) { 2471 /* Zero-extend the guest address for use in the host address. */ 2472 tcg_out_ext32u(s, TCG_REG_R0, addrlo); 2473 h->index = TCG_REG_R0; 2474 } else { 2475 h->index = addrlo; 2476 } 2477 2478 return ldst; 2479} 2480 2481static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, 2482 TCGReg addrlo, TCGReg addrhi, 2483 MemOpIdx oi, TCGType data_type) 2484{ 2485 MemOp opc = get_memop(oi); 2486 TCGLabelQemuLdst *ldst; 2487 HostAddress h; 2488 2489 ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true); 2490 2491 if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { 2492 if (opc & MO_BSWAP) { 2493 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2494 tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index)); 2495 tcg_out32(s, LWBRX | TAB(datahi, h.base, TCG_REG_R0)); 2496 } else if (h.base != 0) { 2497 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2498 tcg_out32(s, LWZX | TAB(datahi, h.base, h.index)); 2499 tcg_out32(s, LWZX | TAB(datalo, h.base, TCG_REG_R0)); 2500 } else if (h.index == datahi) { 2501 tcg_out32(s, LWZ | TAI(datalo, h.index, 4)); 2502 tcg_out32(s, LWZ | TAI(datahi, h.index, 0)); 2503 } else { 2504 tcg_out32(s, LWZ | TAI(datahi, h.index, 0)); 2505 tcg_out32(s, LWZ | TAI(datalo, h.index, 4)); 2506 } 2507 } else { 2508 uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)]; 2509 if (!have_isa_2_06 && insn == LDBRX) { 2510 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2511 tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index)); 2512 tcg_out32(s, LWBRX | TAB(TCG_REG_R0, h.base, TCG_REG_R0)); 2513 tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0); 2514 } else if (insn) { 2515 tcg_out32(s, insn | TAB(datalo, h.base, h.index)); 2516 } else { 2517 insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)]; 2518 tcg_out32(s, insn | TAB(datalo, h.base, h.index)); 2519 tcg_out_movext(s, TCG_TYPE_REG, datalo, 2520 TCG_TYPE_REG, opc & MO_SSIZE, datalo); 2521 } 2522 } 2523 2524 if (ldst) { 2525 ldst->type = data_type; 2526 ldst->datalo_reg = datalo; 2527 ldst->datahi_reg = datahi; 2528 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2529 } 2530} 2531 2532static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, 2533 TCGReg addrlo, TCGReg addrhi, 2534 MemOpIdx oi, TCGType data_type) 2535{ 2536 MemOp opc = get_memop(oi); 2537 TCGLabelQemuLdst *ldst; 2538 HostAddress h; 2539 2540 ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false); 2541 2542 if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { 2543 if (opc & MO_BSWAP) { 2544 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2545 tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index)); 2546 tcg_out32(s, STWBRX | SAB(datahi, h.base, TCG_REG_R0)); 2547 } else if (h.base != 0) { 2548 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2549 tcg_out32(s, STWX | SAB(datahi, h.base, h.index)); 2550 tcg_out32(s, STWX | SAB(datalo, h.base, TCG_REG_R0)); 2551 } else { 2552 tcg_out32(s, STW | TAI(datahi, h.index, 0)); 2553 tcg_out32(s, STW | TAI(datalo, h.index, 4)); 2554 } 2555 } else { 2556 uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)]; 2557 if (!have_isa_2_06 && insn == STDBRX) { 2558 tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index)); 2559 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, h.index, 4)); 2560 tcg_out_shri64(s, TCG_REG_R0, datalo, 32); 2561 tcg_out32(s, STWBRX | SAB(TCG_REG_R0, h.base, TCG_REG_TMP1)); 2562 } else { 2563 tcg_out32(s, insn | SAB(datalo, h.base, h.index)); 2564 } 2565 } 2566 2567 if (ldst) { 2568 ldst->type = data_type; 2569 ldst->datalo_reg = datalo; 2570 ldst->datahi_reg = datahi; 2571 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2572 } 2573} 2574 2575static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, 2576 TCGReg addr_reg, MemOpIdx oi, bool is_ld) 2577{ 2578 TCGLabelQemuLdst *ldst; 2579 HostAddress h; 2580 bool need_bswap; 2581 uint32_t insn; 2582 TCGReg index; 2583 2584 ldst = prepare_host_addr(s, &h, addr_reg, -1, oi, is_ld); 2585 2586 /* Compose the final address, as LQ/STQ have no indexing. */ 2587 index = h.index; 2588 if (h.base != 0) { 2589 index = TCG_REG_TMP1; 2590 tcg_out32(s, ADD | TAB(index, h.base, h.index)); 2591 } 2592 need_bswap = get_memop(oi) & MO_BSWAP; 2593 2594 if (h.aa.atom == MO_128) { 2595 tcg_debug_assert(!need_bswap); 2596 tcg_debug_assert(datalo & 1); 2597 tcg_debug_assert(datahi == datalo - 1); 2598 insn = is_ld ? LQ : STQ; 2599 tcg_out32(s, insn | TAI(datahi, index, 0)); 2600 } else { 2601 TCGReg d1, d2; 2602 2603 if (HOST_BIG_ENDIAN ^ need_bswap) { 2604 d1 = datahi, d2 = datalo; 2605 } else { 2606 d1 = datalo, d2 = datahi; 2607 } 2608 2609 if (need_bswap) { 2610 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8); 2611 insn = is_ld ? LDBRX : STDBRX; 2612 tcg_out32(s, insn | TAB(d1, 0, index)); 2613 tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0)); 2614 } else { 2615 insn = is_ld ? LD : STD; 2616 tcg_out32(s, insn | TAI(d1, index, 0)); 2617 tcg_out32(s, insn | TAI(d2, index, 8)); 2618 } 2619 } 2620 2621 if (ldst) { 2622 ldst->type = TCG_TYPE_I128; 2623 ldst->datalo_reg = datalo; 2624 ldst->datahi_reg = datahi; 2625 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2626 } 2627} 2628 2629static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 2630{ 2631 int i; 2632 for (i = 0; i < count; ++i) { 2633 p[i] = NOP; 2634 } 2635} 2636 2637/* Parameters for function call generation, used in tcg.c. */ 2638#define TCG_TARGET_STACK_ALIGN 16 2639 2640#ifdef _CALL_AIX 2641# define LINK_AREA_SIZE (6 * SZR) 2642# define LR_OFFSET (1 * SZR) 2643# define TCG_TARGET_CALL_STACK_OFFSET (LINK_AREA_SIZE + 8 * SZR) 2644#elif defined(_CALL_DARWIN) 2645# define LINK_AREA_SIZE (6 * SZR) 2646# define LR_OFFSET (2 * SZR) 2647#elif TCG_TARGET_REG_BITS == 64 2648# if defined(_CALL_ELF) && _CALL_ELF == 2 2649# define LINK_AREA_SIZE (4 * SZR) 2650# define LR_OFFSET (1 * SZR) 2651# endif 2652#else /* TCG_TARGET_REG_BITS == 32 */ 2653# if defined(_CALL_SYSV) 2654# define LINK_AREA_SIZE (2 * SZR) 2655# define LR_OFFSET (1 * SZR) 2656# endif 2657#endif 2658#ifndef LR_OFFSET 2659# error "Unhandled abi" 2660#endif 2661#ifndef TCG_TARGET_CALL_STACK_OFFSET 2662# define TCG_TARGET_CALL_STACK_OFFSET LINK_AREA_SIZE 2663#endif 2664 2665#define CPU_TEMP_BUF_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) 2666#define REG_SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR) 2667 2668#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET \ 2669 + TCG_STATIC_CALL_ARGS_SIZE \ 2670 + CPU_TEMP_BUF_SIZE \ 2671 + REG_SAVE_SIZE \ 2672 + TCG_TARGET_STACK_ALIGN - 1) \ 2673 & -TCG_TARGET_STACK_ALIGN) 2674 2675#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE) 2676 2677static void tcg_target_qemu_prologue(TCGContext *s) 2678{ 2679 int i; 2680 2681#ifdef _CALL_AIX 2682 const void **desc = (const void **)s->code_ptr; 2683 desc[0] = tcg_splitwx_to_rx(desc + 2); /* entry point */ 2684 desc[1] = 0; /* environment pointer */ 2685 s->code_ptr = (void *)(desc + 2); /* skip over descriptor */ 2686#endif 2687 2688 tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE, 2689 CPU_TEMP_BUF_SIZE); 2690 2691 /* Prologue */ 2692 tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR); 2693 tcg_out32(s, (SZR == 8 ? STDU : STWU) 2694 | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE)); 2695 2696 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { 2697 tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 2698 TCG_REG_R1, REG_SAVE_BOT + i * SZR); 2699 } 2700 tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); 2701 2702 if (!tcg_use_softmmu && guest_base) { 2703 tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true); 2704 tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); 2705 } 2706 2707 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 2708 tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR); 2709 tcg_out32(s, BCCTR | BO_ALWAYS); 2710 2711 /* Epilogue */ 2712 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 2713 2714 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); 2715 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { 2716 tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 2717 TCG_REG_R1, REG_SAVE_BOT + i * SZR); 2718 } 2719 tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR); 2720 tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE)); 2721 tcg_out32(s, BCLR | BO_ALWAYS); 2722} 2723 2724static void tcg_out_tb_start(TCGContext *s) 2725{ 2726 /* Load TCG_REG_TB. */ 2727 if (USE_REG_TB) { 2728 if (have_isa_3_00) { 2729 /* lnia REG_TB */ 2730 tcg_out_addpcis(s, TCG_REG_TB, 0); 2731 } else { 2732 /* bcl 20,31,$+4 (preferred form for getting nia) */ 2733 tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK); 2734 tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR); 2735 } 2736 } 2737} 2738 2739static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) 2740{ 2741 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg); 2742 tcg_out_b(s, 0, tcg_code_gen_epilogue); 2743} 2744 2745static void tcg_out_goto_tb(TCGContext *s, int which) 2746{ 2747 uintptr_t ptr = get_jmp_target_addr(s, which); 2748 int16_t lo; 2749 2750 /* Direct branch will be patched by tb_target_set_jmp_target. */ 2751 set_jmp_insn_offset(s, which); 2752 tcg_out32(s, NOP); 2753 2754 /* When branch is out of range, fall through to indirect. */ 2755 if (USE_REG_TB) { 2756 ptrdiff_t offset = ppc_tbrel_diff(s, (void *)ptr); 2757 tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset); 2758 } else if (have_isa_3_10) { 2759 ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr); 2760 tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1); 2761 } else if (have_isa_3_00) { 2762 ptrdiff_t offset = tcg_pcrel_diff(s, (void *)ptr) - 4; 2763 lo = offset; 2764 tcg_out_addpcis(s, TCG_REG_TMP1, offset - lo); 2765 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo); 2766 } else { 2767 lo = ptr; 2768 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - lo); 2769 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo); 2770 } 2771 2772 tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); 2773 tcg_out32(s, BCCTR | BO_ALWAYS); 2774 set_jmp_reset_offset(s, which); 2775} 2776 2777void tb_target_set_jmp_target(const TranslationBlock *tb, int n, 2778 uintptr_t jmp_rx, uintptr_t jmp_rw) 2779{ 2780 uintptr_t addr = tb->jmp_target_addr[n]; 2781 intptr_t diff = addr - jmp_rx; 2782 tcg_insn_unit insn; 2783 2784 if (in_range_b(diff)) { 2785 insn = B | (diff & 0x3fffffc); 2786 } else { 2787 insn = NOP; 2788 } 2789 2790 qatomic_set((uint32_t *)jmp_rw, insn); 2791 flush_idcache_range(jmp_rx, jmp_rw, 4); 2792} 2793 2794static void tcg_out_op(TCGContext *s, TCGOpcode opc, 2795 const TCGArg args[TCG_MAX_OP_ARGS], 2796 const int const_args[TCG_MAX_OP_ARGS]) 2797{ 2798 TCGArg a0, a1, a2; 2799 2800 switch (opc) { 2801 case INDEX_op_goto_ptr: 2802 tcg_out32(s, MTSPR | RS(args[0]) | CTR); 2803 tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0)); 2804 tcg_out32(s, BCCTR | BO_ALWAYS); 2805 break; 2806 case INDEX_op_br: 2807 { 2808 TCGLabel *l = arg_label(args[0]); 2809 uint32_t insn = B; 2810 2811 if (l->has_value) { 2812 insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr), 2813 l->u.value_ptr); 2814 } else { 2815 tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0); 2816 } 2817 tcg_out32(s, insn); 2818 } 2819 break; 2820 case INDEX_op_ld8u_i32: 2821 case INDEX_op_ld8u_i64: 2822 tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); 2823 break; 2824 case INDEX_op_ld8s_i32: 2825 case INDEX_op_ld8s_i64: 2826 tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); 2827 tcg_out_ext8s(s, TCG_TYPE_REG, args[0], args[0]); 2828 break; 2829 case INDEX_op_ld16u_i32: 2830 case INDEX_op_ld16u_i64: 2831 tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]); 2832 break; 2833 case INDEX_op_ld16s_i32: 2834 case INDEX_op_ld16s_i64: 2835 tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]); 2836 break; 2837 case INDEX_op_ld_i32: 2838 case INDEX_op_ld32u_i64: 2839 tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]); 2840 break; 2841 case INDEX_op_ld32s_i64: 2842 tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]); 2843 break; 2844 case INDEX_op_ld_i64: 2845 tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]); 2846 break; 2847 case INDEX_op_st8_i32: 2848 case INDEX_op_st8_i64: 2849 tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]); 2850 break; 2851 case INDEX_op_st16_i32: 2852 case INDEX_op_st16_i64: 2853 tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]); 2854 break; 2855 case INDEX_op_st_i32: 2856 case INDEX_op_st32_i64: 2857 tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]); 2858 break; 2859 case INDEX_op_st_i64: 2860 tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); 2861 break; 2862 2863 case INDEX_op_add_i32: 2864 a0 = args[0], a1 = args[1], a2 = args[2]; 2865 if (const_args[2]) { 2866 do_addi_32: 2867 tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2); 2868 } else { 2869 tcg_out32(s, ADD | TAB(a0, a1, a2)); 2870 } 2871 break; 2872 case INDEX_op_sub_i32: 2873 a0 = args[0], a1 = args[1], a2 = args[2]; 2874 if (const_args[1]) { 2875 if (const_args[2]) { 2876 tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2); 2877 } else { 2878 tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); 2879 } 2880 } else if (const_args[2]) { 2881 a2 = -a2; 2882 goto do_addi_32; 2883 } else { 2884 tcg_out32(s, SUBF | TAB(a0, a2, a1)); 2885 } 2886 break; 2887 2888 case INDEX_op_and_i32: 2889 a0 = args[0], a1 = args[1], a2 = args[2]; 2890 if (const_args[2]) { 2891 tcg_out_andi32(s, a0, a1, a2); 2892 } else { 2893 tcg_out32(s, AND | SAB(a1, a0, a2)); 2894 } 2895 break; 2896 case INDEX_op_and_i64: 2897 a0 = args[0], a1 = args[1], a2 = args[2]; 2898 if (const_args[2]) { 2899 tcg_out_andi64(s, a0, a1, a2); 2900 } else { 2901 tcg_out32(s, AND | SAB(a1, a0, a2)); 2902 } 2903 break; 2904 case INDEX_op_or_i64: 2905 case INDEX_op_or_i32: 2906 a0 = args[0], a1 = args[1], a2 = args[2]; 2907 if (const_args[2]) { 2908 tcg_out_ori32(s, a0, a1, a2); 2909 } else { 2910 tcg_out32(s, OR | SAB(a1, a0, a2)); 2911 } 2912 break; 2913 case INDEX_op_xor_i64: 2914 case INDEX_op_xor_i32: 2915 a0 = args[0], a1 = args[1], a2 = args[2]; 2916 if (const_args[2]) { 2917 tcg_out_xori32(s, a0, a1, a2); 2918 } else { 2919 tcg_out32(s, XOR | SAB(a1, a0, a2)); 2920 } 2921 break; 2922 case INDEX_op_andc_i32: 2923 a0 = args[0], a1 = args[1], a2 = args[2]; 2924 if (const_args[2]) { 2925 tcg_out_andi32(s, a0, a1, ~a2); 2926 } else { 2927 tcg_out32(s, ANDC | SAB(a1, a0, a2)); 2928 } 2929 break; 2930 case INDEX_op_andc_i64: 2931 a0 = args[0], a1 = args[1], a2 = args[2]; 2932 if (const_args[2]) { 2933 tcg_out_andi64(s, a0, a1, ~a2); 2934 } else { 2935 tcg_out32(s, ANDC | SAB(a1, a0, a2)); 2936 } 2937 break; 2938 case INDEX_op_orc_i32: 2939 if (const_args[2]) { 2940 tcg_out_ori32(s, args[0], args[1], ~args[2]); 2941 break; 2942 } 2943 /* FALLTHRU */ 2944 case INDEX_op_orc_i64: 2945 tcg_out32(s, ORC | SAB(args[1], args[0], args[2])); 2946 break; 2947 case INDEX_op_eqv_i32: 2948 if (const_args[2]) { 2949 tcg_out_xori32(s, args[0], args[1], ~args[2]); 2950 break; 2951 } 2952 /* FALLTHRU */ 2953 case INDEX_op_eqv_i64: 2954 tcg_out32(s, EQV | SAB(args[1], args[0], args[2])); 2955 break; 2956 case INDEX_op_nand_i32: 2957 case INDEX_op_nand_i64: 2958 tcg_out32(s, NAND | SAB(args[1], args[0], args[2])); 2959 break; 2960 case INDEX_op_nor_i32: 2961 case INDEX_op_nor_i64: 2962 tcg_out32(s, NOR | SAB(args[1], args[0], args[2])); 2963 break; 2964 2965 case INDEX_op_clz_i32: 2966 tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1], 2967 args[2], const_args[2]); 2968 break; 2969 case INDEX_op_ctz_i32: 2970 tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1], 2971 args[2], const_args[2]); 2972 break; 2973 case INDEX_op_ctpop_i32: 2974 tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0)); 2975 break; 2976 2977 case INDEX_op_clz_i64: 2978 tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1], 2979 args[2], const_args[2]); 2980 break; 2981 case INDEX_op_ctz_i64: 2982 tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1], 2983 args[2], const_args[2]); 2984 break; 2985 case INDEX_op_ctpop_i64: 2986 tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0)); 2987 break; 2988 2989 case INDEX_op_mul_i32: 2990 a0 = args[0], a1 = args[1], a2 = args[2]; 2991 if (const_args[2]) { 2992 tcg_out32(s, MULLI | TAI(a0, a1, a2)); 2993 } else { 2994 tcg_out32(s, MULLW | TAB(a0, a1, a2)); 2995 } 2996 break; 2997 2998 case INDEX_op_div_i32: 2999 tcg_out32(s, DIVW | TAB(args[0], args[1], args[2])); 3000 break; 3001 3002 case INDEX_op_divu_i32: 3003 tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2])); 3004 break; 3005 3006 case INDEX_op_rem_i32: 3007 tcg_out32(s, MODSW | TAB(args[0], args[1], args[2])); 3008 break; 3009 3010 case INDEX_op_remu_i32: 3011 tcg_out32(s, MODUW | TAB(args[0], args[1], args[2])); 3012 break; 3013 3014 case INDEX_op_shl_i32: 3015 if (const_args[2]) { 3016 /* Limit immediate shift count lest we create an illegal insn. */ 3017 tcg_out_shli32(s, args[0], args[1], args[2] & 31); 3018 } else { 3019 tcg_out32(s, SLW | SAB(args[1], args[0], args[2])); 3020 } 3021 break; 3022 case INDEX_op_shr_i32: 3023 if (const_args[2]) { 3024 /* Limit immediate shift count lest we create an illegal insn. */ 3025 tcg_out_shri32(s, args[0], args[1], args[2] & 31); 3026 } else { 3027 tcg_out32(s, SRW | SAB(args[1], args[0], args[2])); 3028 } 3029 break; 3030 case INDEX_op_sar_i32: 3031 if (const_args[2]) { 3032 tcg_out_sari32(s, args[0], args[1], args[2]); 3033 } else { 3034 tcg_out32(s, SRAW | SAB(args[1], args[0], args[2])); 3035 } 3036 break; 3037 case INDEX_op_rotl_i32: 3038 if (const_args[2]) { 3039 tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31); 3040 } else { 3041 tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2]) 3042 | MB(0) | ME(31)); 3043 } 3044 break; 3045 case INDEX_op_rotr_i32: 3046 if (const_args[2]) { 3047 tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31); 3048 } else { 3049 tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32)); 3050 tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0) 3051 | MB(0) | ME(31)); 3052 } 3053 break; 3054 3055 case INDEX_op_brcond_i32: 3056 tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], 3057 arg_label(args[3]), TCG_TYPE_I32); 3058 break; 3059 case INDEX_op_brcond_i64: 3060 tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], 3061 arg_label(args[3]), TCG_TYPE_I64); 3062 break; 3063 case INDEX_op_brcond2_i32: 3064 tcg_out_brcond2(s, args, const_args); 3065 break; 3066 3067 case INDEX_op_neg_i32: 3068 case INDEX_op_neg_i64: 3069 tcg_out32(s, NEG | RT(args[0]) | RA(args[1])); 3070 break; 3071 3072 case INDEX_op_not_i32: 3073 case INDEX_op_not_i64: 3074 tcg_out32(s, NOR | SAB(args[1], args[0], args[1])); 3075 break; 3076 3077 case INDEX_op_add_i64: 3078 a0 = args[0], a1 = args[1], a2 = args[2]; 3079 if (const_args[2]) { 3080 do_addi_64: 3081 tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2); 3082 } else { 3083 tcg_out32(s, ADD | TAB(a0, a1, a2)); 3084 } 3085 break; 3086 case INDEX_op_sub_i64: 3087 a0 = args[0], a1 = args[1], a2 = args[2]; 3088 if (const_args[1]) { 3089 if (const_args[2]) { 3090 tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2); 3091 } else { 3092 tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); 3093 } 3094 } else if (const_args[2]) { 3095 a2 = -a2; 3096 goto do_addi_64; 3097 } else { 3098 tcg_out32(s, SUBF | TAB(a0, a2, a1)); 3099 } 3100 break; 3101 3102 case INDEX_op_shl_i64: 3103 if (const_args[2]) { 3104 /* Limit immediate shift count lest we create an illegal insn. */ 3105 tcg_out_shli64(s, args[0], args[1], args[2] & 63); 3106 } else { 3107 tcg_out32(s, SLD | SAB(args[1], args[0], args[2])); 3108 } 3109 break; 3110 case INDEX_op_shr_i64: 3111 if (const_args[2]) { 3112 /* Limit immediate shift count lest we create an illegal insn. */ 3113 tcg_out_shri64(s, args[0], args[1], args[2] & 63); 3114 } else { 3115 tcg_out32(s, SRD | SAB(args[1], args[0], args[2])); 3116 } 3117 break; 3118 case INDEX_op_sar_i64: 3119 if (const_args[2]) { 3120 tcg_out_sari64(s, args[0], args[1], args[2]); 3121 } else { 3122 tcg_out32(s, SRAD | SAB(args[1], args[0], args[2])); 3123 } 3124 break; 3125 case INDEX_op_rotl_i64: 3126 if (const_args[2]) { 3127 tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0); 3128 } else { 3129 tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0)); 3130 } 3131 break; 3132 case INDEX_op_rotr_i64: 3133 if (const_args[2]) { 3134 tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0); 3135 } else { 3136 tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64)); 3137 tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0)); 3138 } 3139 break; 3140 3141 case INDEX_op_mul_i64: 3142 a0 = args[0], a1 = args[1], a2 = args[2]; 3143 if (const_args[2]) { 3144 tcg_out32(s, MULLI | TAI(a0, a1, a2)); 3145 } else { 3146 tcg_out32(s, MULLD | TAB(a0, a1, a2)); 3147 } 3148 break; 3149 case INDEX_op_div_i64: 3150 tcg_out32(s, DIVD | TAB(args[0], args[1], args[2])); 3151 break; 3152 case INDEX_op_divu_i64: 3153 tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2])); 3154 break; 3155 case INDEX_op_rem_i64: 3156 tcg_out32(s, MODSD | TAB(args[0], args[1], args[2])); 3157 break; 3158 case INDEX_op_remu_i64: 3159 tcg_out32(s, MODUD | TAB(args[0], args[1], args[2])); 3160 break; 3161 3162 case INDEX_op_qemu_ld_a64_i32: 3163 if (TCG_TARGET_REG_BITS == 32) { 3164 tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], 3165 args[3], TCG_TYPE_I32); 3166 break; 3167 } 3168 /* fall through */ 3169 case INDEX_op_qemu_ld_a32_i32: 3170 tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32); 3171 break; 3172 case INDEX_op_qemu_ld_a32_i64: 3173 if (TCG_TARGET_REG_BITS == 64) { 3174 tcg_out_qemu_ld(s, args[0], -1, args[1], -1, 3175 args[2], TCG_TYPE_I64); 3176 } else { 3177 tcg_out_qemu_ld(s, args[0], args[1], args[2], -1, 3178 args[3], TCG_TYPE_I64); 3179 } 3180 break; 3181 case INDEX_op_qemu_ld_a64_i64: 3182 if (TCG_TARGET_REG_BITS == 64) { 3183 tcg_out_qemu_ld(s, args[0], -1, args[1], -1, 3184 args[2], TCG_TYPE_I64); 3185 } else { 3186 tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3], 3187 args[4], TCG_TYPE_I64); 3188 } 3189 break; 3190 case INDEX_op_qemu_ld_a32_i128: 3191 case INDEX_op_qemu_ld_a64_i128: 3192 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 3193 tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true); 3194 break; 3195 3196 case INDEX_op_qemu_st_a64_i32: 3197 if (TCG_TARGET_REG_BITS == 32) { 3198 tcg_out_qemu_st(s, args[0], -1, args[1], args[2], 3199 args[3], TCG_TYPE_I32); 3200 break; 3201 } 3202 /* fall through */ 3203 case INDEX_op_qemu_st_a32_i32: 3204 tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32); 3205 break; 3206 case INDEX_op_qemu_st_a32_i64: 3207 if (TCG_TARGET_REG_BITS == 64) { 3208 tcg_out_qemu_st(s, args[0], -1, args[1], -1, 3209 args[2], TCG_TYPE_I64); 3210 } else { 3211 tcg_out_qemu_st(s, args[0], args[1], args[2], -1, 3212 args[3], TCG_TYPE_I64); 3213 } 3214 break; 3215 case INDEX_op_qemu_st_a64_i64: 3216 if (TCG_TARGET_REG_BITS == 64) { 3217 tcg_out_qemu_st(s, args[0], -1, args[1], -1, 3218 args[2], TCG_TYPE_I64); 3219 } else { 3220 tcg_out_qemu_st(s, args[0], args[1], args[2], args[3], 3221 args[4], TCG_TYPE_I64); 3222 } 3223 break; 3224 case INDEX_op_qemu_st_a32_i128: 3225 case INDEX_op_qemu_st_a64_i128: 3226 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 3227 tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); 3228 break; 3229 3230 case INDEX_op_setcond_i32: 3231 tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], 3232 const_args[2], false); 3233 break; 3234 case INDEX_op_setcond_i64: 3235 tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], 3236 const_args[2], false); 3237 break; 3238 case INDEX_op_negsetcond_i32: 3239 tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], 3240 const_args[2], true); 3241 break; 3242 case INDEX_op_negsetcond_i64: 3243 tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], 3244 const_args[2], true); 3245 break; 3246 case INDEX_op_setcond2_i32: 3247 tcg_out_setcond2(s, args, const_args); 3248 break; 3249 3250 case INDEX_op_bswap16_i32: 3251 case INDEX_op_bswap16_i64: 3252 tcg_out_bswap16(s, args[0], args[1], args[2]); 3253 break; 3254 case INDEX_op_bswap32_i32: 3255 tcg_out_bswap32(s, args[0], args[1], 0); 3256 break; 3257 case INDEX_op_bswap32_i64: 3258 tcg_out_bswap32(s, args[0], args[1], args[2]); 3259 break; 3260 case INDEX_op_bswap64_i64: 3261 tcg_out_bswap64(s, args[0], args[1]); 3262 break; 3263 3264 case INDEX_op_deposit_i32: 3265 if (const_args[2]) { 3266 uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3]; 3267 tcg_out_andi32(s, args[0], args[0], ~mask); 3268 } else { 3269 tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3], 3270 32 - args[3] - args[4], 31 - args[3]); 3271 } 3272 break; 3273 case INDEX_op_deposit_i64: 3274 if (const_args[2]) { 3275 uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3]; 3276 tcg_out_andi64(s, args[0], args[0], ~mask); 3277 } else { 3278 tcg_out_rld(s, RLDIMI, args[0], args[2], args[3], 3279 64 - args[3] - args[4]); 3280 } 3281 break; 3282 3283 case INDEX_op_extract_i32: 3284 tcg_out_rlw(s, RLWINM, args[0], args[1], 3285 32 - args[2], 32 - args[3], 31); 3286 break; 3287 case INDEX_op_extract_i64: 3288 tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]); 3289 break; 3290 3291 case INDEX_op_movcond_i32: 3292 tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2], 3293 args[3], args[4], const_args[2]); 3294 break; 3295 case INDEX_op_movcond_i64: 3296 tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2], 3297 args[3], args[4], const_args[2]); 3298 break; 3299 3300#if TCG_TARGET_REG_BITS == 64 3301 case INDEX_op_add2_i64: 3302#else 3303 case INDEX_op_add2_i32: 3304#endif 3305 /* Note that the CA bit is defined based on the word size of the 3306 environment. So in 64-bit mode it's always carry-out of bit 63. 3307 The fallback code using deposit works just as well for 32-bit. */ 3308 a0 = args[0], a1 = args[1]; 3309 if (a0 == args[3] || (!const_args[5] && a0 == args[5])) { 3310 a0 = TCG_REG_R0; 3311 } 3312 if (const_args[4]) { 3313 tcg_out32(s, ADDIC | TAI(a0, args[2], args[4])); 3314 } else { 3315 tcg_out32(s, ADDC | TAB(a0, args[2], args[4])); 3316 } 3317 if (const_args[5]) { 3318 tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3])); 3319 } else { 3320 tcg_out32(s, ADDE | TAB(a1, args[3], args[5])); 3321 } 3322 if (a0 != args[0]) { 3323 tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); 3324 } 3325 break; 3326 3327#if TCG_TARGET_REG_BITS == 64 3328 case INDEX_op_sub2_i64: 3329#else 3330 case INDEX_op_sub2_i32: 3331#endif 3332 a0 = args[0], a1 = args[1]; 3333 if (a0 == args[5] || (!const_args[3] && a0 == args[3])) { 3334 a0 = TCG_REG_R0; 3335 } 3336 if (const_args[2]) { 3337 tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2])); 3338 } else { 3339 tcg_out32(s, SUBFC | TAB(a0, args[4], args[2])); 3340 } 3341 if (const_args[3]) { 3342 tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5])); 3343 } else { 3344 tcg_out32(s, SUBFE | TAB(a1, args[5], args[3])); 3345 } 3346 if (a0 != args[0]) { 3347 tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); 3348 } 3349 break; 3350 3351 case INDEX_op_muluh_i32: 3352 tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2])); 3353 break; 3354 case INDEX_op_mulsh_i32: 3355 tcg_out32(s, MULHW | TAB(args[0], args[1], args[2])); 3356 break; 3357 case INDEX_op_muluh_i64: 3358 tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2])); 3359 break; 3360 case INDEX_op_mulsh_i64: 3361 tcg_out32(s, MULHD | TAB(args[0], args[1], args[2])); 3362 break; 3363 3364 case INDEX_op_mb: 3365 tcg_out_mb(s, args[0]); 3366 break; 3367 3368 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 3369 case INDEX_op_mov_i64: 3370 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 3371 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ 3372 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ 3373 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ 3374 case INDEX_op_ext8s_i64: 3375 case INDEX_op_ext8u_i32: 3376 case INDEX_op_ext8u_i64: 3377 case INDEX_op_ext16s_i32: 3378 case INDEX_op_ext16s_i64: 3379 case INDEX_op_ext16u_i32: 3380 case INDEX_op_ext16u_i64: 3381 case INDEX_op_ext32s_i64: 3382 case INDEX_op_ext32u_i64: 3383 case INDEX_op_ext_i32_i64: 3384 case INDEX_op_extu_i32_i64: 3385 case INDEX_op_extrl_i64_i32: 3386 default: 3387 g_assert_not_reached(); 3388 } 3389} 3390 3391int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 3392{ 3393 switch (opc) { 3394 case INDEX_op_and_vec: 3395 case INDEX_op_or_vec: 3396 case INDEX_op_xor_vec: 3397 case INDEX_op_andc_vec: 3398 case INDEX_op_not_vec: 3399 case INDEX_op_nor_vec: 3400 case INDEX_op_eqv_vec: 3401 case INDEX_op_nand_vec: 3402 return 1; 3403 case INDEX_op_orc_vec: 3404 return have_isa_2_07; 3405 case INDEX_op_add_vec: 3406 case INDEX_op_sub_vec: 3407 case INDEX_op_smax_vec: 3408 case INDEX_op_smin_vec: 3409 case INDEX_op_umax_vec: 3410 case INDEX_op_umin_vec: 3411 case INDEX_op_shlv_vec: 3412 case INDEX_op_shrv_vec: 3413 case INDEX_op_sarv_vec: 3414 case INDEX_op_rotlv_vec: 3415 return vece <= MO_32 || have_isa_2_07; 3416 case INDEX_op_ssadd_vec: 3417 case INDEX_op_sssub_vec: 3418 case INDEX_op_usadd_vec: 3419 case INDEX_op_ussub_vec: 3420 return vece <= MO_32; 3421 case INDEX_op_cmp_vec: 3422 case INDEX_op_shli_vec: 3423 case INDEX_op_shri_vec: 3424 case INDEX_op_sari_vec: 3425 case INDEX_op_rotli_vec: 3426 return vece <= MO_32 || have_isa_2_07 ? -1 : 0; 3427 case INDEX_op_neg_vec: 3428 return vece >= MO_32 && have_isa_3_00; 3429 case INDEX_op_mul_vec: 3430 switch (vece) { 3431 case MO_8: 3432 case MO_16: 3433 return -1; 3434 case MO_32: 3435 return have_isa_2_07 ? 1 : -1; 3436 case MO_64: 3437 return have_isa_3_10; 3438 } 3439 return 0; 3440 case INDEX_op_bitsel_vec: 3441 return have_vsx; 3442 case INDEX_op_rotrv_vec: 3443 return -1; 3444 default: 3445 return 0; 3446 } 3447} 3448 3449static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 3450 TCGReg dst, TCGReg src) 3451{ 3452 tcg_debug_assert(dst >= TCG_REG_V0); 3453 3454 /* Splat from integer reg allowed via constraints for v3.00. */ 3455 if (src < TCG_REG_V0) { 3456 tcg_debug_assert(have_isa_3_00); 3457 switch (vece) { 3458 case MO_64: 3459 tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src)); 3460 return true; 3461 case MO_32: 3462 tcg_out32(s, MTVSRWS | VRT(dst) | RA(src)); 3463 return true; 3464 default: 3465 /* Fail, so that we fall back on either dupm or mov+dup. */ 3466 return false; 3467 } 3468 } 3469 3470 /* 3471 * Recall we use (or emulate) VSX integer loads, so the integer is 3472 * right justified within the left (zero-index) double-word. 3473 */ 3474 switch (vece) { 3475 case MO_8: 3476 tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16)); 3477 break; 3478 case MO_16: 3479 tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16)); 3480 break; 3481 case MO_32: 3482 tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16)); 3483 break; 3484 case MO_64: 3485 if (have_vsx) { 3486 tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src)); 3487 break; 3488 } 3489 tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8); 3490 tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8); 3491 break; 3492 default: 3493 g_assert_not_reached(); 3494 } 3495 return true; 3496} 3497 3498static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 3499 TCGReg out, TCGReg base, intptr_t offset) 3500{ 3501 int elt; 3502 3503 tcg_debug_assert(out >= TCG_REG_V0); 3504 switch (vece) { 3505 case MO_8: 3506 if (have_isa_3_00) { 3507 tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16); 3508 } else { 3509 tcg_out_mem_long(s, 0, LVEBX, out, base, offset); 3510 } 3511 elt = extract32(offset, 0, 4); 3512#if !HOST_BIG_ENDIAN 3513 elt ^= 15; 3514#endif 3515 tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16)); 3516 break; 3517 case MO_16: 3518 tcg_debug_assert((offset & 1) == 0); 3519 if (have_isa_3_00) { 3520 tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16); 3521 } else { 3522 tcg_out_mem_long(s, 0, LVEHX, out, base, offset); 3523 } 3524 elt = extract32(offset, 1, 3); 3525#if !HOST_BIG_ENDIAN 3526 elt ^= 7; 3527#endif 3528 tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16)); 3529 break; 3530 case MO_32: 3531 if (have_isa_3_00) { 3532 tcg_out_mem_long(s, 0, LXVWSX, out, base, offset); 3533 break; 3534 } 3535 tcg_debug_assert((offset & 3) == 0); 3536 tcg_out_mem_long(s, 0, LVEWX, out, base, offset); 3537 elt = extract32(offset, 2, 2); 3538#if !HOST_BIG_ENDIAN 3539 elt ^= 3; 3540#endif 3541 tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16)); 3542 break; 3543 case MO_64: 3544 if (have_vsx) { 3545 tcg_out_mem_long(s, 0, LXVDSX, out, base, offset); 3546 break; 3547 } 3548 tcg_debug_assert((offset & 7) == 0); 3549 tcg_out_mem_long(s, 0, LVX, out, base, offset & -16); 3550 tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8); 3551 elt = extract32(offset, 3, 1); 3552#if !HOST_BIG_ENDIAN 3553 elt = !elt; 3554#endif 3555 if (elt) { 3556 tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8); 3557 } else { 3558 tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8); 3559 } 3560 break; 3561 default: 3562 g_assert_not_reached(); 3563 } 3564 return true; 3565} 3566 3567static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 3568 unsigned vecl, unsigned vece, 3569 const TCGArg args[TCG_MAX_OP_ARGS], 3570 const int const_args[TCG_MAX_OP_ARGS]) 3571{ 3572 static const uint32_t 3573 add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM }, 3574 sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM }, 3575 mul_op[4] = { 0, 0, VMULUWM, VMULLD }, 3576 neg_op[4] = { 0, 0, VNEGW, VNEGD }, 3577 eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD }, 3578 ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 }, 3579 gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD }, 3580 gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD }, 3581 ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 }, 3582 usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 }, 3583 sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 }, 3584 ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 }, 3585 umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD }, 3586 smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD }, 3587 umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD }, 3588 smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD }, 3589 shlv_op[4] = { VSLB, VSLH, VSLW, VSLD }, 3590 shrv_op[4] = { VSRB, VSRH, VSRW, VSRD }, 3591 sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD }, 3592 mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 }, 3593 mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 }, 3594 muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 }, 3595 mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 }, 3596 pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 }, 3597 rotl_op[4] = { VRLB, VRLH, VRLW, VRLD }; 3598 3599 TCGType type = vecl + TCG_TYPE_V64; 3600 TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; 3601 uint32_t insn; 3602 3603 switch (opc) { 3604 case INDEX_op_ld_vec: 3605 tcg_out_ld(s, type, a0, a1, a2); 3606 return; 3607 case INDEX_op_st_vec: 3608 tcg_out_st(s, type, a0, a1, a2); 3609 return; 3610 case INDEX_op_dupm_vec: 3611 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 3612 return; 3613 3614 case INDEX_op_add_vec: 3615 insn = add_op[vece]; 3616 break; 3617 case INDEX_op_sub_vec: 3618 insn = sub_op[vece]; 3619 break; 3620 case INDEX_op_neg_vec: 3621 insn = neg_op[vece]; 3622 a2 = a1; 3623 a1 = 0; 3624 break; 3625 case INDEX_op_mul_vec: 3626 insn = mul_op[vece]; 3627 break; 3628 case INDEX_op_ssadd_vec: 3629 insn = ssadd_op[vece]; 3630 break; 3631 case INDEX_op_sssub_vec: 3632 insn = sssub_op[vece]; 3633 break; 3634 case INDEX_op_usadd_vec: 3635 insn = usadd_op[vece]; 3636 break; 3637 case INDEX_op_ussub_vec: 3638 insn = ussub_op[vece]; 3639 break; 3640 case INDEX_op_smin_vec: 3641 insn = smin_op[vece]; 3642 break; 3643 case INDEX_op_umin_vec: 3644 insn = umin_op[vece]; 3645 break; 3646 case INDEX_op_smax_vec: 3647 insn = smax_op[vece]; 3648 break; 3649 case INDEX_op_umax_vec: 3650 insn = umax_op[vece]; 3651 break; 3652 case INDEX_op_shlv_vec: 3653 insn = shlv_op[vece]; 3654 break; 3655 case INDEX_op_shrv_vec: 3656 insn = shrv_op[vece]; 3657 break; 3658 case INDEX_op_sarv_vec: 3659 insn = sarv_op[vece]; 3660 break; 3661 case INDEX_op_and_vec: 3662 insn = VAND; 3663 break; 3664 case INDEX_op_or_vec: 3665 insn = VOR; 3666 break; 3667 case INDEX_op_xor_vec: 3668 insn = VXOR; 3669 break; 3670 case INDEX_op_andc_vec: 3671 insn = VANDC; 3672 break; 3673 case INDEX_op_not_vec: 3674 insn = VNOR; 3675 a2 = a1; 3676 break; 3677 case INDEX_op_orc_vec: 3678 insn = VORC; 3679 break; 3680 case INDEX_op_nand_vec: 3681 insn = VNAND; 3682 break; 3683 case INDEX_op_nor_vec: 3684 insn = VNOR; 3685 break; 3686 case INDEX_op_eqv_vec: 3687 insn = VEQV; 3688 break; 3689 3690 case INDEX_op_cmp_vec: 3691 switch (args[3]) { 3692 case TCG_COND_EQ: 3693 insn = eq_op[vece]; 3694 break; 3695 case TCG_COND_NE: 3696 insn = ne_op[vece]; 3697 break; 3698 case TCG_COND_GT: 3699 insn = gts_op[vece]; 3700 break; 3701 case TCG_COND_GTU: 3702 insn = gtu_op[vece]; 3703 break; 3704 default: 3705 g_assert_not_reached(); 3706 } 3707 break; 3708 3709 case INDEX_op_bitsel_vec: 3710 tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3])); 3711 return; 3712 3713 case INDEX_op_dup2_vec: 3714 assert(TCG_TARGET_REG_BITS == 32); 3715 /* With inputs a1 = xLxx, a2 = xHxx */ 3716 tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1)); /* a0 = xxHL */ 3717 tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8); /* tmp = HLxx */ 3718 tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8); /* a0 = HLHL */ 3719 return; 3720 3721 case INDEX_op_ppc_mrgh_vec: 3722 insn = mrgh_op[vece]; 3723 break; 3724 case INDEX_op_ppc_mrgl_vec: 3725 insn = mrgl_op[vece]; 3726 break; 3727 case INDEX_op_ppc_muleu_vec: 3728 insn = muleu_op[vece]; 3729 break; 3730 case INDEX_op_ppc_mulou_vec: 3731 insn = mulou_op[vece]; 3732 break; 3733 case INDEX_op_ppc_pkum_vec: 3734 insn = pkum_op[vece]; 3735 break; 3736 case INDEX_op_rotlv_vec: 3737 insn = rotl_op[vece]; 3738 break; 3739 case INDEX_op_ppc_msum_vec: 3740 tcg_debug_assert(vece == MO_16); 3741 tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3])); 3742 return; 3743 3744 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 3745 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 3746 default: 3747 g_assert_not_reached(); 3748 } 3749 3750 tcg_debug_assert(insn != 0); 3751 tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2)); 3752} 3753 3754static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0, 3755 TCGv_vec v1, TCGArg imm, TCGOpcode opci) 3756{ 3757 TCGv_vec t1; 3758 3759 if (vece == MO_32) { 3760 /* 3761 * Only 5 bits are significant, and VSPLTISB can represent -16..15. 3762 * So using negative numbers gets us the 4th bit easily. 3763 */ 3764 imm = sextract32(imm, 0, 5); 3765 } else { 3766 imm &= (8 << vece) - 1; 3767 } 3768 3769 /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */ 3770 t1 = tcg_constant_vec(type, MO_8, imm); 3771 vec_gen_3(opci, type, vece, tcgv_vec_arg(v0), 3772 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 3773} 3774 3775static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, 3776 TCGv_vec v1, TCGv_vec v2, TCGCond cond) 3777{ 3778 bool need_swap = false, need_inv = false; 3779 3780 tcg_debug_assert(vece <= MO_32 || have_isa_2_07); 3781 3782 switch (cond) { 3783 case TCG_COND_EQ: 3784 case TCG_COND_GT: 3785 case TCG_COND_GTU: 3786 break; 3787 case TCG_COND_NE: 3788 if (have_isa_3_00 && vece <= MO_32) { 3789 break; 3790 } 3791 /* fall through */ 3792 case TCG_COND_LE: 3793 case TCG_COND_LEU: 3794 need_inv = true; 3795 break; 3796 case TCG_COND_LT: 3797 case TCG_COND_LTU: 3798 need_swap = true; 3799 break; 3800 case TCG_COND_GE: 3801 case TCG_COND_GEU: 3802 need_swap = need_inv = true; 3803 break; 3804 default: 3805 g_assert_not_reached(); 3806 } 3807 3808 if (need_inv) { 3809 cond = tcg_invert_cond(cond); 3810 } 3811 if (need_swap) { 3812 TCGv_vec t1; 3813 t1 = v1, v1 = v2, v2 = t1; 3814 cond = tcg_swap_cond(cond); 3815 } 3816 3817 vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0), 3818 tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond); 3819 3820 if (need_inv) { 3821 tcg_gen_not_vec(vece, v0, v0); 3822 } 3823} 3824 3825static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0, 3826 TCGv_vec v1, TCGv_vec v2) 3827{ 3828 TCGv_vec t1 = tcg_temp_new_vec(type); 3829 TCGv_vec t2 = tcg_temp_new_vec(type); 3830 TCGv_vec c0, c16; 3831 3832 switch (vece) { 3833 case MO_8: 3834 case MO_16: 3835 vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1), 3836 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 3837 vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2), 3838 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 3839 vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0), 3840 tcgv_vec_arg(t1), tcgv_vec_arg(t2)); 3841 vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1), 3842 tcgv_vec_arg(t1), tcgv_vec_arg(t2)); 3843 vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0), 3844 tcgv_vec_arg(v0), tcgv_vec_arg(t1)); 3845 break; 3846 3847 case MO_32: 3848 tcg_debug_assert(!have_isa_2_07); 3849 /* 3850 * Only 5 bits are significant, and VSPLTISB can represent -16..15. 3851 * So using -16 is a quick way to represent 16. 3852 */ 3853 c16 = tcg_constant_vec(type, MO_8, -16); 3854 c0 = tcg_constant_vec(type, MO_8, 0); 3855 3856 vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1), 3857 tcgv_vec_arg(v2), tcgv_vec_arg(c16)); 3858 vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2), 3859 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 3860 vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1), 3861 tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0)); 3862 vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1), 3863 tcgv_vec_arg(t1), tcgv_vec_arg(c16)); 3864 tcg_gen_add_vec(MO_32, v0, t1, t2); 3865 break; 3866 3867 default: 3868 g_assert_not_reached(); 3869 } 3870 tcg_temp_free_vec(t1); 3871 tcg_temp_free_vec(t2); 3872} 3873 3874void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 3875 TCGArg a0, ...) 3876{ 3877 va_list va; 3878 TCGv_vec v0, v1, v2, t0; 3879 TCGArg a2; 3880 3881 va_start(va, a0); 3882 v0 = temp_tcgv_vec(arg_temp(a0)); 3883 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 3884 a2 = va_arg(va, TCGArg); 3885 3886 switch (opc) { 3887 case INDEX_op_shli_vec: 3888 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec); 3889 break; 3890 case INDEX_op_shri_vec: 3891 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec); 3892 break; 3893 case INDEX_op_sari_vec: 3894 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec); 3895 break; 3896 case INDEX_op_rotli_vec: 3897 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec); 3898 break; 3899 case INDEX_op_cmp_vec: 3900 v2 = temp_tcgv_vec(arg_temp(a2)); 3901 expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); 3902 break; 3903 case INDEX_op_mul_vec: 3904 v2 = temp_tcgv_vec(arg_temp(a2)); 3905 expand_vec_mul(type, vece, v0, v1, v2); 3906 break; 3907 case INDEX_op_rotlv_vec: 3908 v2 = temp_tcgv_vec(arg_temp(a2)); 3909 t0 = tcg_temp_new_vec(type); 3910 tcg_gen_neg_vec(vece, t0, v2); 3911 tcg_gen_rotlv_vec(vece, v0, v1, t0); 3912 tcg_temp_free_vec(t0); 3913 break; 3914 default: 3915 g_assert_not_reached(); 3916 } 3917 va_end(va); 3918} 3919 3920static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 3921{ 3922 switch (op) { 3923 case INDEX_op_goto_ptr: 3924 return C_O0_I1(r); 3925 3926 case INDEX_op_ld8u_i32: 3927 case INDEX_op_ld8s_i32: 3928 case INDEX_op_ld16u_i32: 3929 case INDEX_op_ld16s_i32: 3930 case INDEX_op_ld_i32: 3931 case INDEX_op_ctpop_i32: 3932 case INDEX_op_neg_i32: 3933 case INDEX_op_not_i32: 3934 case INDEX_op_ext8s_i32: 3935 case INDEX_op_ext16s_i32: 3936 case INDEX_op_bswap16_i32: 3937 case INDEX_op_bswap32_i32: 3938 case INDEX_op_extract_i32: 3939 case INDEX_op_ld8u_i64: 3940 case INDEX_op_ld8s_i64: 3941 case INDEX_op_ld16u_i64: 3942 case INDEX_op_ld16s_i64: 3943 case INDEX_op_ld32u_i64: 3944 case INDEX_op_ld32s_i64: 3945 case INDEX_op_ld_i64: 3946 case INDEX_op_ctpop_i64: 3947 case INDEX_op_neg_i64: 3948 case INDEX_op_not_i64: 3949 case INDEX_op_ext8s_i64: 3950 case INDEX_op_ext16s_i64: 3951 case INDEX_op_ext32s_i64: 3952 case INDEX_op_ext_i32_i64: 3953 case INDEX_op_extu_i32_i64: 3954 case INDEX_op_bswap16_i64: 3955 case INDEX_op_bswap32_i64: 3956 case INDEX_op_bswap64_i64: 3957 case INDEX_op_extract_i64: 3958 return C_O1_I1(r, r); 3959 3960 case INDEX_op_st8_i32: 3961 case INDEX_op_st16_i32: 3962 case INDEX_op_st_i32: 3963 case INDEX_op_st8_i64: 3964 case INDEX_op_st16_i64: 3965 case INDEX_op_st32_i64: 3966 case INDEX_op_st_i64: 3967 return C_O0_I2(r, r); 3968 3969 case INDEX_op_add_i32: 3970 case INDEX_op_and_i32: 3971 case INDEX_op_or_i32: 3972 case INDEX_op_xor_i32: 3973 case INDEX_op_andc_i32: 3974 case INDEX_op_orc_i32: 3975 case INDEX_op_eqv_i32: 3976 case INDEX_op_shl_i32: 3977 case INDEX_op_shr_i32: 3978 case INDEX_op_sar_i32: 3979 case INDEX_op_rotl_i32: 3980 case INDEX_op_rotr_i32: 3981 case INDEX_op_setcond_i32: 3982 case INDEX_op_negsetcond_i32: 3983 case INDEX_op_and_i64: 3984 case INDEX_op_andc_i64: 3985 case INDEX_op_shl_i64: 3986 case INDEX_op_shr_i64: 3987 case INDEX_op_sar_i64: 3988 case INDEX_op_rotl_i64: 3989 case INDEX_op_rotr_i64: 3990 case INDEX_op_setcond_i64: 3991 case INDEX_op_negsetcond_i64: 3992 return C_O1_I2(r, r, ri); 3993 3994 case INDEX_op_mul_i32: 3995 case INDEX_op_mul_i64: 3996 return C_O1_I2(r, r, rI); 3997 3998 case INDEX_op_div_i32: 3999 case INDEX_op_divu_i32: 4000 case INDEX_op_rem_i32: 4001 case INDEX_op_remu_i32: 4002 case INDEX_op_nand_i32: 4003 case INDEX_op_nor_i32: 4004 case INDEX_op_muluh_i32: 4005 case INDEX_op_mulsh_i32: 4006 case INDEX_op_orc_i64: 4007 case INDEX_op_eqv_i64: 4008 case INDEX_op_nand_i64: 4009 case INDEX_op_nor_i64: 4010 case INDEX_op_div_i64: 4011 case INDEX_op_divu_i64: 4012 case INDEX_op_rem_i64: 4013 case INDEX_op_remu_i64: 4014 case INDEX_op_mulsh_i64: 4015 case INDEX_op_muluh_i64: 4016 return C_O1_I2(r, r, r); 4017 4018 case INDEX_op_sub_i32: 4019 return C_O1_I2(r, rI, ri); 4020 case INDEX_op_add_i64: 4021 return C_O1_I2(r, r, rT); 4022 case INDEX_op_or_i64: 4023 case INDEX_op_xor_i64: 4024 return C_O1_I2(r, r, rU); 4025 case INDEX_op_sub_i64: 4026 return C_O1_I2(r, rI, rT); 4027 case INDEX_op_clz_i32: 4028 case INDEX_op_ctz_i32: 4029 case INDEX_op_clz_i64: 4030 case INDEX_op_ctz_i64: 4031 return C_O1_I2(r, r, rZW); 4032 4033 case INDEX_op_brcond_i32: 4034 case INDEX_op_brcond_i64: 4035 return C_O0_I2(r, ri); 4036 4037 case INDEX_op_movcond_i32: 4038 case INDEX_op_movcond_i64: 4039 return C_O1_I4(r, r, ri, rZ, rZ); 4040 case INDEX_op_deposit_i32: 4041 case INDEX_op_deposit_i64: 4042 return C_O1_I2(r, 0, rZ); 4043 case INDEX_op_brcond2_i32: 4044 return C_O0_I4(r, r, ri, ri); 4045 case INDEX_op_setcond2_i32: 4046 return C_O1_I4(r, r, r, ri, ri); 4047 case INDEX_op_add2_i64: 4048 case INDEX_op_add2_i32: 4049 return C_O2_I4(r, r, r, r, rI, rZM); 4050 case INDEX_op_sub2_i64: 4051 case INDEX_op_sub2_i32: 4052 return C_O2_I4(r, r, rI, rZM, r, r); 4053 4054 case INDEX_op_qemu_ld_a32_i32: 4055 return C_O1_I1(r, r); 4056 case INDEX_op_qemu_ld_a64_i32: 4057 return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r); 4058 case INDEX_op_qemu_ld_a32_i64: 4059 return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r); 4060 case INDEX_op_qemu_ld_a64_i64: 4061 return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r); 4062 4063 case INDEX_op_qemu_st_a32_i32: 4064 return C_O0_I2(r, r); 4065 case INDEX_op_qemu_st_a64_i32: 4066 return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r); 4067 case INDEX_op_qemu_st_a32_i64: 4068 return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r); 4069 case INDEX_op_qemu_st_a64_i64: 4070 return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r); 4071 4072 case INDEX_op_qemu_ld_a32_i128: 4073 case INDEX_op_qemu_ld_a64_i128: 4074 return C_O2_I1(o, m, r); 4075 case INDEX_op_qemu_st_a32_i128: 4076 case INDEX_op_qemu_st_a64_i128: 4077 return C_O0_I3(o, m, r); 4078 4079 case INDEX_op_add_vec: 4080 case INDEX_op_sub_vec: 4081 case INDEX_op_mul_vec: 4082 case INDEX_op_and_vec: 4083 case INDEX_op_or_vec: 4084 case INDEX_op_xor_vec: 4085 case INDEX_op_andc_vec: 4086 case INDEX_op_orc_vec: 4087 case INDEX_op_nor_vec: 4088 case INDEX_op_eqv_vec: 4089 case INDEX_op_nand_vec: 4090 case INDEX_op_cmp_vec: 4091 case INDEX_op_ssadd_vec: 4092 case INDEX_op_sssub_vec: 4093 case INDEX_op_usadd_vec: 4094 case INDEX_op_ussub_vec: 4095 case INDEX_op_smax_vec: 4096 case INDEX_op_smin_vec: 4097 case INDEX_op_umax_vec: 4098 case INDEX_op_umin_vec: 4099 case INDEX_op_shlv_vec: 4100 case INDEX_op_shrv_vec: 4101 case INDEX_op_sarv_vec: 4102 case INDEX_op_rotlv_vec: 4103 case INDEX_op_rotrv_vec: 4104 case INDEX_op_ppc_mrgh_vec: 4105 case INDEX_op_ppc_mrgl_vec: 4106 case INDEX_op_ppc_muleu_vec: 4107 case INDEX_op_ppc_mulou_vec: 4108 case INDEX_op_ppc_pkum_vec: 4109 case INDEX_op_dup2_vec: 4110 return C_O1_I2(v, v, v); 4111 4112 case INDEX_op_not_vec: 4113 case INDEX_op_neg_vec: 4114 return C_O1_I1(v, v); 4115 4116 case INDEX_op_dup_vec: 4117 return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v); 4118 4119 case INDEX_op_ld_vec: 4120 case INDEX_op_dupm_vec: 4121 return C_O1_I1(v, r); 4122 4123 case INDEX_op_st_vec: 4124 return C_O0_I2(v, r); 4125 4126 case INDEX_op_bitsel_vec: 4127 case INDEX_op_ppc_msum_vec: 4128 return C_O1_I3(v, v, v, v); 4129 4130 default: 4131 g_assert_not_reached(); 4132 } 4133} 4134 4135static void tcg_target_init(TCGContext *s) 4136{ 4137 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; 4138 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; 4139 if (have_altivec) { 4140 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 4141 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 4142 } 4143 4144 tcg_target_call_clobber_regs = 0; 4145 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0); 4146 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2); 4147 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3); 4148 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4); 4149 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5); 4150 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6); 4151 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7); 4152 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); 4153 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); 4154 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); 4155 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); 4156 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12); 4157 4158 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0); 4159 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1); 4160 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2); 4161 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3); 4162 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4); 4163 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5); 4164 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6); 4165 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7); 4166 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 4167 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 4168 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 4169 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 4170 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 4171 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 4172 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 4173 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 4174 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16); 4175 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17); 4176 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18); 4177 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19); 4178 4179 s->reserved_regs = 0; 4180 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */ 4181 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */ 4182#if defined(_CALL_SYSV) 4183 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */ 4184#endif 4185#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64 4186 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ 4187#endif 4188 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); 4189 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); 4190 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1); 4191 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2); 4192 if (USE_REG_TB) { 4193 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); /* tb->tc_ptr */ 4194 } 4195} 4196 4197#ifdef __ELF__ 4198typedef struct { 4199 DebugFrameCIE cie; 4200 DebugFrameFDEHeader fde; 4201 uint8_t fde_def_cfa[4]; 4202 uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3]; 4203} DebugFrame; 4204 4205/* We're expecting a 2 byte uleb128 encoded value. */ 4206QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 4207 4208#if TCG_TARGET_REG_BITS == 64 4209# define ELF_HOST_MACHINE EM_PPC64 4210#else 4211# define ELF_HOST_MACHINE EM_PPC 4212#endif 4213 4214static DebugFrame debug_frame = { 4215 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 4216 .cie.id = -1, 4217 .cie.version = 1, 4218 .cie.code_align = 1, 4219 .cie.data_align = (-SZR & 0x7f), /* sleb128 -SZR */ 4220 .cie.return_column = 65, 4221 4222 /* Total FDE size does not include the "len" member. */ 4223 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), 4224 4225 .fde_def_cfa = { 4226 12, TCG_REG_R1, /* DW_CFA_def_cfa r1, ... */ 4227 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 4228 (FRAME_SIZE >> 7) 4229 }, 4230 .fde_reg_ofs = { 4231 /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */ 4232 0x11, 65, (LR_OFFSET / -SZR) & 0x7f, 4233 } 4234}; 4235 4236void tcg_register_jit(const void *buf, size_t buf_size) 4237{ 4238 uint8_t *p = &debug_frame.fde_reg_ofs[3]; 4239 int i; 4240 4241 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) { 4242 p[0] = 0x80 + tcg_target_callee_save_regs[i]; 4243 p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR; 4244 } 4245 4246 debug_frame.fde.func_start = (uintptr_t)buf; 4247 debug_frame.fde.func_len = buf_size; 4248 4249 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 4250} 4251#endif /* __ELF__ */ 4252#undef VMULEUB 4253#undef VMULEUH 4254#undef VMULEUW 4255#undef VMULOUB 4256#undef VMULOUH 4257#undef VMULOUW 4258#undef VMSUMUHM 4259