1/* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25#include "elf.h" 26#include "../tcg-pool.c.inc" 27#include "../tcg-ldst.c.inc" 28 29/* 30 * Standardize on the _CALL_FOO symbols used by GCC: 31 * Apple XCode does not define _CALL_DARWIN. 32 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV or _CALL_AIX. 33 */ 34#if TCG_TARGET_REG_BITS == 64 35# ifdef _CALL_AIX 36 /* ok */ 37# elif defined(_CALL_ELF) && _CALL_ELF == 1 38# define _CALL_AIX 39# elif defined(_CALL_ELF) && _CALL_ELF == 2 40 /* ok */ 41# else 42# error "Unknown ABI" 43# endif 44#else 45# if defined(_CALL_SYSV) || defined(_CALL_DARWIN) 46 /* ok */ 47# elif defined(__APPLE__) 48# define _CALL_DARWIN 49# elif defined(__ELF__) 50# define _CALL_SYSV 51# else 52# error "Unknown ABI" 53# endif 54#endif 55 56#if TCG_TARGET_REG_BITS == 64 57# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND 58# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL 59#else 60# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL 61# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF 62#endif 63#ifdef _CALL_SYSV 64# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN 65# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF 66#else 67# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL 68# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL 69#endif 70 71/* For some memory operations, we need a scratch that isn't R0. For the AIX 72 calling convention, we can re-use the TOC register since we'll be reloading 73 it at every call. Otherwise R12 will do nicely as neither a call-saved 74 register nor a parameter register. */ 75#ifdef _CALL_AIX 76# define TCG_REG_TMP1 TCG_REG_R2 77#else 78# define TCG_REG_TMP1 TCG_REG_R12 79#endif 80#define TCG_REG_TMP2 TCG_REG_R11 81 82#define TCG_VEC_TMP1 TCG_REG_V0 83#define TCG_VEC_TMP2 TCG_REG_V1 84 85#define TCG_REG_TB TCG_REG_R31 86#define USE_REG_TB (TCG_TARGET_REG_BITS == 64 && !have_isa_3_00) 87 88/* Shorthand for size of a pointer. Avoid promotion to unsigned. */ 89#define SZP ((int)sizeof(void *)) 90 91/* Shorthand for size of a register. */ 92#define SZR (TCG_TARGET_REG_BITS / 8) 93 94#define TCG_CT_CONST_S16 0x100 95#define TCG_CT_CONST_S32 0x400 96#define TCG_CT_CONST_U32 0x800 97#define TCG_CT_CONST_ZERO 0x1000 98#define TCG_CT_CONST_MONE 0x2000 99#define TCG_CT_CONST_WSZ 0x4000 100 101#define ALL_GENERAL_REGS 0xffffffffu 102#define ALL_VECTOR_REGS 0xffffffff00000000ull 103 104#ifndef R_PPC64_PCREL34 105#define R_PPC64_PCREL34 132 106#endif 107 108#define have_isel (cpuinfo & CPUINFO_ISEL) 109 110#define TCG_GUEST_BASE_REG TCG_REG_R30 111 112#ifdef CONFIG_DEBUG_TCG 113static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = { 114 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", 115 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", 116 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", 117 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", 118 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 119 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 120 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 121 "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", 122}; 123#endif 124 125static const int tcg_target_reg_alloc_order[] = { 126 TCG_REG_R14, /* call saved registers */ 127 TCG_REG_R15, 128 TCG_REG_R16, 129 TCG_REG_R17, 130 TCG_REG_R18, 131 TCG_REG_R19, 132 TCG_REG_R20, 133 TCG_REG_R21, 134 TCG_REG_R22, 135 TCG_REG_R23, 136 TCG_REG_R24, 137 TCG_REG_R25, 138 TCG_REG_R26, 139 TCG_REG_R27, 140 TCG_REG_R28, 141 TCG_REG_R29, 142 TCG_REG_R30, 143 TCG_REG_R31, 144 TCG_REG_R12, /* call clobbered, non-arguments */ 145 TCG_REG_R11, 146 TCG_REG_R2, 147 TCG_REG_R13, 148 TCG_REG_R10, /* call clobbered, arguments */ 149 TCG_REG_R9, 150 TCG_REG_R8, 151 TCG_REG_R7, 152 TCG_REG_R6, 153 TCG_REG_R5, 154 TCG_REG_R4, 155 TCG_REG_R3, 156 157 /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */ 158 TCG_REG_V2, /* call clobbered, vectors */ 159 TCG_REG_V3, 160 TCG_REG_V4, 161 TCG_REG_V5, 162 TCG_REG_V6, 163 TCG_REG_V7, 164 TCG_REG_V8, 165 TCG_REG_V9, 166 TCG_REG_V10, 167 TCG_REG_V11, 168 TCG_REG_V12, 169 TCG_REG_V13, 170 TCG_REG_V14, 171 TCG_REG_V15, 172 TCG_REG_V16, 173 TCG_REG_V17, 174 TCG_REG_V18, 175 TCG_REG_V19, 176}; 177 178static const int tcg_target_call_iarg_regs[] = { 179 TCG_REG_R3, 180 TCG_REG_R4, 181 TCG_REG_R5, 182 TCG_REG_R6, 183 TCG_REG_R7, 184 TCG_REG_R8, 185 TCG_REG_R9, 186 TCG_REG_R10 187}; 188 189static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) 190{ 191 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); 192 tcg_debug_assert(slot >= 0 && slot <= 1); 193 return TCG_REG_R3 + slot; 194} 195 196static const int tcg_target_callee_save_regs[] = { 197#ifdef _CALL_DARWIN 198 TCG_REG_R11, 199#endif 200 TCG_REG_R14, 201 TCG_REG_R15, 202 TCG_REG_R16, 203 TCG_REG_R17, 204 TCG_REG_R18, 205 TCG_REG_R19, 206 TCG_REG_R20, 207 TCG_REG_R21, 208 TCG_REG_R22, 209 TCG_REG_R23, 210 TCG_REG_R24, 211 TCG_REG_R25, 212 TCG_REG_R26, 213 TCG_REG_R27, /* currently used for the global env */ 214 TCG_REG_R28, 215 TCG_REG_R29, 216 TCG_REG_R30, 217 TCG_REG_R31 218}; 219 220/* For PPC, we use TB+4 instead of TB as the base. */ 221static inline ptrdiff_t ppc_tbrel_diff(TCGContext *s, const void *target) 222{ 223 return tcg_tbrel_diff(s, target) - 4; 224} 225 226static inline bool in_range_b(tcg_target_long target) 227{ 228 return target == sextract64(target, 0, 26); 229} 230 231static uint32_t reloc_pc24_val(const tcg_insn_unit *pc, 232 const tcg_insn_unit *target) 233{ 234 ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); 235 tcg_debug_assert(in_range_b(disp)); 236 return disp & 0x3fffffc; 237} 238 239static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 240{ 241 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 242 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); 243 244 if (in_range_b(disp)) { 245 *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc); 246 return true; 247 } 248 return false; 249} 250 251static uint16_t reloc_pc14_val(const tcg_insn_unit *pc, 252 const tcg_insn_unit *target) 253{ 254 ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); 255 tcg_debug_assert(disp == (int16_t) disp); 256 return disp & 0xfffc; 257} 258 259static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 260{ 261 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 262 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); 263 264 if (disp == (int16_t) disp) { 265 *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc); 266 return true; 267 } 268 return false; 269} 270 271static bool reloc_pc34(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 272{ 273 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 274 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); 275 276 if (disp == sextract64(disp, 0, 34)) { 277 src_rw[0] = (src_rw[0] & ~0x3ffff) | ((disp >> 16) & 0x3ffff); 278 src_rw[1] = (src_rw[1] & ~0xffff) | (disp & 0xffff); 279 return true; 280 } 281 return false; 282} 283 284/* test if a constant matches the constraint */ 285static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece) 286{ 287 if (ct & TCG_CT_CONST) { 288 return 1; 289 } 290 291 /* The only 32-bit constraint we use aside from 292 TCG_CT_CONST is TCG_CT_CONST_S16. */ 293 if (type == TCG_TYPE_I32) { 294 val = (int32_t)val; 295 } 296 297 if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { 298 return 1; 299 } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { 300 return 1; 301 } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { 302 return 1; 303 } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 304 return 1; 305 } else if ((ct & TCG_CT_CONST_MONE) && val == -1) { 306 return 1; 307 } else if ((ct & TCG_CT_CONST_WSZ) 308 && val == (type == TCG_TYPE_I32 ? 32 : 64)) { 309 return 1; 310 } 311 return 0; 312} 313 314#define OPCD(opc) ((opc)<<26) 315#define XO19(opc) (OPCD(19)|((opc)<<1)) 316#define MD30(opc) (OPCD(30)|((opc)<<2)) 317#define MDS30(opc) (OPCD(30)|((opc)<<1)) 318#define XO31(opc) (OPCD(31)|((opc)<<1)) 319#define XO58(opc) (OPCD(58)|(opc)) 320#define XO62(opc) (OPCD(62)|(opc)) 321#define VX4(opc) (OPCD(4)|(opc)) 322 323#define B OPCD( 18) 324#define BC OPCD( 16) 325 326#define LBZ OPCD( 34) 327#define LHZ OPCD( 40) 328#define LHA OPCD( 42) 329#define LWZ OPCD( 32) 330#define LWZUX XO31( 55) 331#define LD XO58( 0) 332#define LDX XO31( 21) 333#define LDU XO58( 1) 334#define LDUX XO31( 53) 335#define LWA XO58( 2) 336#define LWAX XO31(341) 337#define LQ OPCD( 56) 338 339#define STB OPCD( 38) 340#define STH OPCD( 44) 341#define STW OPCD( 36) 342#define STD XO62( 0) 343#define STDU XO62( 1) 344#define STDX XO31(149) 345#define STQ XO62( 2) 346 347#define PLWA OPCD( 41) 348#define PLD OPCD( 57) 349#define PLXSD OPCD( 42) 350#define PLXV OPCD(25 * 2 + 1) /* force tx=1 */ 351 352#define PSTD OPCD( 61) 353#define PSTXSD OPCD( 46) 354#define PSTXV OPCD(27 * 2 + 1) /* force sx=1 */ 355 356#define ADDIC OPCD( 12) 357#define ADDI OPCD( 14) 358#define ADDIS OPCD( 15) 359#define ORI OPCD( 24) 360#define ORIS OPCD( 25) 361#define XORI OPCD( 26) 362#define XORIS OPCD( 27) 363#define ANDI OPCD( 28) 364#define ANDIS OPCD( 29) 365#define MULLI OPCD( 7) 366#define CMPLI OPCD( 10) 367#define CMPI OPCD( 11) 368#define SUBFIC OPCD( 8) 369 370#define LWZU OPCD( 33) 371#define STWU OPCD( 37) 372 373#define RLWIMI OPCD( 20) 374#define RLWINM OPCD( 21) 375#define RLWNM OPCD( 23) 376 377#define RLDICL MD30( 0) 378#define RLDICR MD30( 1) 379#define RLDIMI MD30( 3) 380#define RLDCL MDS30( 8) 381 382#define BCLR XO19( 16) 383#define BCCTR XO19(528) 384#define CRAND XO19(257) 385#define CRANDC XO19(129) 386#define CRNAND XO19(225) 387#define CROR XO19(449) 388#define CRNOR XO19( 33) 389#define ADDPCIS XO19( 2) 390 391#define EXTSB XO31(954) 392#define EXTSH XO31(922) 393#define EXTSW XO31(986) 394#define ADD XO31(266) 395#define ADDE XO31(138) 396#define ADDME XO31(234) 397#define ADDZE XO31(202) 398#define ADDC XO31( 10) 399#define AND XO31( 28) 400#define SUBF XO31( 40) 401#define SUBFC XO31( 8) 402#define SUBFE XO31(136) 403#define SUBFME XO31(232) 404#define SUBFZE XO31(200) 405#define OR XO31(444) 406#define XOR XO31(316) 407#define MULLW XO31(235) 408#define MULHW XO31( 75) 409#define MULHWU XO31( 11) 410#define DIVW XO31(491) 411#define DIVWU XO31(459) 412#define MODSW XO31(779) 413#define MODUW XO31(267) 414#define CMP XO31( 0) 415#define CMPL XO31( 32) 416#define LHBRX XO31(790) 417#define LWBRX XO31(534) 418#define LDBRX XO31(532) 419#define STHBRX XO31(918) 420#define STWBRX XO31(662) 421#define STDBRX XO31(660) 422#define MFSPR XO31(339) 423#define MTSPR XO31(467) 424#define SRAWI XO31(824) 425#define NEG XO31(104) 426#define MFCR XO31( 19) 427#define MFOCRF (MFCR | (1u << 20)) 428#define NOR XO31(124) 429#define CNTLZW XO31( 26) 430#define CNTLZD XO31( 58) 431#define CNTTZW XO31(538) 432#define CNTTZD XO31(570) 433#define CNTPOPW XO31(378) 434#define CNTPOPD XO31(506) 435#define ANDC XO31( 60) 436#define ORC XO31(412) 437#define EQV XO31(284) 438#define NAND XO31(476) 439#define ISEL XO31( 15) 440 441#define MULLD XO31(233) 442#define MULHD XO31( 73) 443#define MULHDU XO31( 9) 444#define DIVD XO31(489) 445#define DIVDU XO31(457) 446#define MODSD XO31(777) 447#define MODUD XO31(265) 448 449#define LBZX XO31( 87) 450#define LHZX XO31(279) 451#define LHAX XO31(343) 452#define LWZX XO31( 23) 453#define STBX XO31(215) 454#define STHX XO31(407) 455#define STWX XO31(151) 456 457#define EIEIO XO31(854) 458#define HWSYNC XO31(598) 459#define LWSYNC (HWSYNC | (1u << 21)) 460 461#define SPR(a, b) ((((a)<<5)|(b))<<11) 462#define LR SPR(8, 0) 463#define CTR SPR(9, 0) 464 465#define SLW XO31( 24) 466#define SRW XO31(536) 467#define SRAW XO31(792) 468 469#define SLD XO31( 27) 470#define SRD XO31(539) 471#define SRAD XO31(794) 472#define SRADI XO31(413<<1) 473 474#define BRH XO31(219) 475#define BRW XO31(155) 476#define BRD XO31(187) 477 478#define TW XO31( 4) 479#define TRAP (TW | TO(31)) 480 481#define SETBC XO31(384) /* v3.10 */ 482#define SETBCR XO31(416) /* v3.10 */ 483#define SETNBC XO31(448) /* v3.10 */ 484#define SETNBCR XO31(480) /* v3.10 */ 485 486#define NOP ORI /* ori 0,0,0 */ 487 488#define LVX XO31(103) 489#define LVEBX XO31(7) 490#define LVEHX XO31(39) 491#define LVEWX XO31(71) 492#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */ 493#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */ 494#define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */ 495#define LXV (OPCD(61) | 8 | 1) /* v3.00, force tx=1 */ 496#define LXSD (OPCD(57) | 2) /* v3.00 */ 497#define LXVWSX (XO31(364) | 1) /* v3.00, force tx=1 */ 498 499#define STVX XO31(231) 500#define STVEWX XO31(199) 501#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */ 502#define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */ 503#define STXV (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */ 504#define STXSD (OPCD(61) | 2) /* v3.00 */ 505 506#define VADDSBS VX4(768) 507#define VADDUBS VX4(512) 508#define VADDUBM VX4(0) 509#define VADDSHS VX4(832) 510#define VADDUHS VX4(576) 511#define VADDUHM VX4(64) 512#define VADDSWS VX4(896) 513#define VADDUWS VX4(640) 514#define VADDUWM VX4(128) 515#define VADDUDM VX4(192) /* v2.07 */ 516 517#define VSUBSBS VX4(1792) 518#define VSUBUBS VX4(1536) 519#define VSUBUBM VX4(1024) 520#define VSUBSHS VX4(1856) 521#define VSUBUHS VX4(1600) 522#define VSUBUHM VX4(1088) 523#define VSUBSWS VX4(1920) 524#define VSUBUWS VX4(1664) 525#define VSUBUWM VX4(1152) 526#define VSUBUDM VX4(1216) /* v2.07 */ 527 528#define VNEGW (VX4(1538) | (6 << 16)) /* v3.00 */ 529#define VNEGD (VX4(1538) | (7 << 16)) /* v3.00 */ 530 531#define VMAXSB VX4(258) 532#define VMAXSH VX4(322) 533#define VMAXSW VX4(386) 534#define VMAXSD VX4(450) /* v2.07 */ 535#define VMAXUB VX4(2) 536#define VMAXUH VX4(66) 537#define VMAXUW VX4(130) 538#define VMAXUD VX4(194) /* v2.07 */ 539#define VMINSB VX4(770) 540#define VMINSH VX4(834) 541#define VMINSW VX4(898) 542#define VMINSD VX4(962) /* v2.07 */ 543#define VMINUB VX4(514) 544#define VMINUH VX4(578) 545#define VMINUW VX4(642) 546#define VMINUD VX4(706) /* v2.07 */ 547 548#define VCMPEQUB VX4(6) 549#define VCMPEQUH VX4(70) 550#define VCMPEQUW VX4(134) 551#define VCMPEQUD VX4(199) /* v2.07 */ 552#define VCMPGTSB VX4(774) 553#define VCMPGTSH VX4(838) 554#define VCMPGTSW VX4(902) 555#define VCMPGTSD VX4(967) /* v2.07 */ 556#define VCMPGTUB VX4(518) 557#define VCMPGTUH VX4(582) 558#define VCMPGTUW VX4(646) 559#define VCMPGTUD VX4(711) /* v2.07 */ 560#define VCMPNEB VX4(7) /* v3.00 */ 561#define VCMPNEH VX4(71) /* v3.00 */ 562#define VCMPNEW VX4(135) /* v3.00 */ 563 564#define VSLB VX4(260) 565#define VSLH VX4(324) 566#define VSLW VX4(388) 567#define VSLD VX4(1476) /* v2.07 */ 568#define VSRB VX4(516) 569#define VSRH VX4(580) 570#define VSRW VX4(644) 571#define VSRD VX4(1732) /* v2.07 */ 572#define VSRAB VX4(772) 573#define VSRAH VX4(836) 574#define VSRAW VX4(900) 575#define VSRAD VX4(964) /* v2.07 */ 576#define VRLB VX4(4) 577#define VRLH VX4(68) 578#define VRLW VX4(132) 579#define VRLD VX4(196) /* v2.07 */ 580 581#define VMULEUB VX4(520) 582#define VMULEUH VX4(584) 583#define VMULEUW VX4(648) /* v2.07 */ 584#define VMULOUB VX4(8) 585#define VMULOUH VX4(72) 586#define VMULOUW VX4(136) /* v2.07 */ 587#define VMULUWM VX4(137) /* v2.07 */ 588#define VMULLD VX4(457) /* v3.10 */ 589#define VMSUMUHM VX4(38) 590 591#define VMRGHB VX4(12) 592#define VMRGHH VX4(76) 593#define VMRGHW VX4(140) 594#define VMRGLB VX4(268) 595#define VMRGLH VX4(332) 596#define VMRGLW VX4(396) 597 598#define VPKUHUM VX4(14) 599#define VPKUWUM VX4(78) 600 601#define VAND VX4(1028) 602#define VANDC VX4(1092) 603#define VNOR VX4(1284) 604#define VOR VX4(1156) 605#define VXOR VX4(1220) 606#define VEQV VX4(1668) /* v2.07 */ 607#define VNAND VX4(1412) /* v2.07 */ 608#define VORC VX4(1348) /* v2.07 */ 609 610#define VSPLTB VX4(524) 611#define VSPLTH VX4(588) 612#define VSPLTW VX4(652) 613#define VSPLTISB VX4(780) 614#define VSPLTISH VX4(844) 615#define VSPLTISW VX4(908) 616 617#define VSLDOI VX4(44) 618 619#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */ 620#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */ 621#define XXSPLTIB (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */ 622 623#define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */ 624#define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */ 625#define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */ 626#define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */ 627#define MTVSRDD (XO31(435) | 1) /* v3.00, force tx=1 */ 628#define MTVSRWS (XO31(403) | 1) /* v3.00, force tx=1 */ 629 630#define RT(r) ((r)<<21) 631#define RS(r) ((r)<<21) 632#define RA(r) ((r)<<16) 633#define RB(r) ((r)<<11) 634#define TO(t) ((t)<<21) 635#define SH(s) ((s)<<11) 636#define MB(b) ((b)<<6) 637#define ME(e) ((e)<<1) 638#define BO(o) ((o)<<21) 639#define MB64(b) ((b)<<5) 640#define FXM(b) (1 << (19 - (b))) 641 642#define VRT(r) (((r) & 31) << 21) 643#define VRA(r) (((r) & 31) << 16) 644#define VRB(r) (((r) & 31) << 11) 645#define VRC(r) (((r) & 31) << 6) 646 647#define LK 1 648 649#define TAB(t, a, b) (RT(t) | RA(a) | RB(b)) 650#define SAB(s, a, b) (RS(s) | RA(a) | RB(b)) 651#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff)) 652#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff)) 653 654#define BF(n) ((n)<<23) 655#define BI(n, c) (((c)+((n)*4))<<16) 656#define BT(n, c) (((c)+((n)*4))<<21) 657#define BA(n, c) (((c)+((n)*4))<<16) 658#define BB(n, c) (((c)+((n)*4))<<11) 659#define BC_(n, c) (((c)+((n)*4))<<6) 660 661#define BO_COND_TRUE BO(12) 662#define BO_COND_FALSE BO( 4) 663#define BO_ALWAYS BO(20) 664 665enum { 666 CR_LT, 667 CR_GT, 668 CR_EQ, 669 CR_SO 670}; 671 672static const uint32_t tcg_to_bc[] = { 673 [TCG_COND_EQ] = BC | BI(7, CR_EQ) | BO_COND_TRUE, 674 [TCG_COND_NE] = BC | BI(7, CR_EQ) | BO_COND_FALSE, 675 [TCG_COND_LT] = BC | BI(7, CR_LT) | BO_COND_TRUE, 676 [TCG_COND_GE] = BC | BI(7, CR_LT) | BO_COND_FALSE, 677 [TCG_COND_LE] = BC | BI(7, CR_GT) | BO_COND_FALSE, 678 [TCG_COND_GT] = BC | BI(7, CR_GT) | BO_COND_TRUE, 679 [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE, 680 [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE, 681 [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE, 682 [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE, 683}; 684 685/* The low bit here is set if the RA and RB fields must be inverted. */ 686static const uint32_t tcg_to_isel[] = { 687 [TCG_COND_EQ] = ISEL | BC_(7, CR_EQ), 688 [TCG_COND_NE] = ISEL | BC_(7, CR_EQ) | 1, 689 [TCG_COND_LT] = ISEL | BC_(7, CR_LT), 690 [TCG_COND_GE] = ISEL | BC_(7, CR_LT) | 1, 691 [TCG_COND_LE] = ISEL | BC_(7, CR_GT) | 1, 692 [TCG_COND_GT] = ISEL | BC_(7, CR_GT), 693 [TCG_COND_LTU] = ISEL | BC_(7, CR_LT), 694 [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1, 695 [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1, 696 [TCG_COND_GTU] = ISEL | BC_(7, CR_GT), 697}; 698 699static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 700 intptr_t value, intptr_t addend) 701{ 702 const tcg_insn_unit *target; 703 int16_t lo; 704 int32_t hi; 705 706 value += addend; 707 target = (const tcg_insn_unit *)value; 708 709 switch (type) { 710 case R_PPC_REL14: 711 return reloc_pc14(code_ptr, target); 712 case R_PPC_REL24: 713 return reloc_pc24(code_ptr, target); 714 case R_PPC64_PCREL34: 715 return reloc_pc34(code_ptr, target); 716 case R_PPC_ADDR16: 717 /* 718 * We are (slightly) abusing this relocation type. In particular, 719 * assert that the low 2 bits are zero, and do not modify them. 720 * That way we can use this with LD et al that have opcode bits 721 * in the low 2 bits of the insn. 722 */ 723 if ((value & 3) || value != (int16_t)value) { 724 return false; 725 } 726 *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc); 727 break; 728 case R_PPC_ADDR32: 729 /* 730 * We are abusing this relocation type. Again, this points to 731 * a pair of insns, lis + load. This is an absolute address 732 * relocation for PPC32 so the lis cannot be removed. 733 */ 734 lo = value; 735 hi = value - lo; 736 if (hi + lo != value) { 737 return false; 738 } 739 code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16); 740 code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo); 741 break; 742 default: 743 g_assert_not_reached(); 744 } 745 return true; 746} 747 748/* Ensure that the prefixed instruction does not cross a 64-byte boundary. */ 749static bool tcg_out_need_prefix_align(TCGContext *s) 750{ 751 return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c; 752} 753 754static void tcg_out_prefix_align(TCGContext *s) 755{ 756 if (tcg_out_need_prefix_align(s)) { 757 tcg_out32(s, NOP); 758 } 759} 760 761static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target) 762{ 763 return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0); 764} 765 766/* Output Type 00 Prefix - 8-Byte Load/Store Form (8LS:D) */ 767static void tcg_out_8ls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt, 768 unsigned ra, tcg_target_long imm, bool r) 769{ 770 tcg_insn_unit p, i; 771 772 p = OPCD(1) | (r << 20) | ((imm >> 16) & 0x3ffff); 773 i = opc | TAI(rt, ra, imm); 774 775 tcg_out_prefix_align(s); 776 tcg_out32(s, p); 777 tcg_out32(s, i); 778} 779 780/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */ 781static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt, 782 unsigned ra, tcg_target_long imm, bool r) 783{ 784 tcg_insn_unit p, i; 785 786 p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff); 787 i = opc | TAI(rt, ra, imm); 788 789 tcg_out_prefix_align(s); 790 tcg_out32(s, p); 791 tcg_out32(s, i); 792} 793 794static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, 795 TCGReg base, tcg_target_long offset); 796 797static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 798{ 799 if (ret == arg) { 800 return true; 801 } 802 switch (type) { 803 case TCG_TYPE_I64: 804 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 805 /* fallthru */ 806 case TCG_TYPE_I32: 807 if (ret < TCG_REG_V0) { 808 if (arg < TCG_REG_V0) { 809 tcg_out32(s, OR | SAB(arg, ret, arg)); 810 break; 811 } else if (have_isa_2_07) { 812 tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD) 813 | VRT(arg) | RA(ret)); 814 break; 815 } else { 816 /* Altivec does not support vector->integer moves. */ 817 return false; 818 } 819 } else if (arg < TCG_REG_V0) { 820 if (have_isa_2_07) { 821 tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD) 822 | VRT(ret) | RA(arg)); 823 break; 824 } else { 825 /* Altivec does not support integer->vector moves. */ 826 return false; 827 } 828 } 829 /* fallthru */ 830 case TCG_TYPE_V64: 831 case TCG_TYPE_V128: 832 tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0); 833 tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg)); 834 break; 835 default: 836 g_assert_not_reached(); 837 } 838 return true; 839} 840 841static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs, 842 int sh, int mb) 843{ 844 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 845 sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1); 846 mb = MB64((mb >> 5) | ((mb << 1) & 0x3f)); 847 tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb); 848} 849 850static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs, 851 int sh, int mb, int me) 852{ 853 tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me)); 854} 855 856static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) 857{ 858 tcg_out32(s, EXTSB | RA(dst) | RS(src)); 859} 860 861static void tcg_out_ext8u(TCGContext *s, TCGReg dst, TCGReg src) 862{ 863 tcg_out32(s, ANDI | SAI(src, dst, 0xff)); 864} 865 866static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) 867{ 868 tcg_out32(s, EXTSH | RA(dst) | RS(src)); 869} 870 871static void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src) 872{ 873 tcg_out32(s, ANDI | SAI(src, dst, 0xffff)); 874} 875 876static void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src) 877{ 878 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 879 tcg_out32(s, EXTSW | RA(dst) | RS(src)); 880} 881 882static void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src) 883{ 884 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 885 tcg_out_rld(s, RLDICL, dst, src, 0, 32); 886} 887 888static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dst, TCGReg src) 889{ 890 tcg_out_ext32s(s, dst, src); 891} 892 893static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dst, TCGReg src) 894{ 895 tcg_out_ext32u(s, dst, src); 896} 897 898static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) 899{ 900 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 901 tcg_out_mov(s, TCG_TYPE_I32, rd, rn); 902} 903 904static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c) 905{ 906 tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c); 907} 908 909static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c) 910{ 911 tcg_out_rld(s, RLDICR, dst, src, c, 63 - c); 912} 913 914static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c) 915{ 916 /* Limit immediate shift count lest we create an illegal insn. */ 917 tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31)); 918} 919 920static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c) 921{ 922 tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31); 923} 924 925static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c) 926{ 927 tcg_out_rld(s, RLDICL, dst, src, 64 - c, c); 928} 929 930static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c) 931{ 932 tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2)); 933} 934 935static void tcg_out_addpcis(TCGContext *s, TCGReg dst, intptr_t imm) 936{ 937 uint32_t d0, d1, d2; 938 939 tcg_debug_assert((imm & 0xffff) == 0); 940 tcg_debug_assert(imm == (int32_t)imm); 941 942 d2 = extract32(imm, 16, 1); 943 d1 = extract32(imm, 17, 5); 944 d0 = extract32(imm, 22, 10); 945 tcg_out32(s, ADDPCIS | RT(dst) | (d1 << 16) | (d0 << 6) | d2); 946} 947 948static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags) 949{ 950 TCGReg tmp = dst == src ? TCG_REG_R0 : dst; 951 952 if (have_isa_3_10) { 953 tcg_out32(s, BRH | RA(dst) | RS(src)); 954 if (flags & TCG_BSWAP_OS) { 955 tcg_out_ext16s(s, TCG_TYPE_REG, dst, dst); 956 } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 957 tcg_out_ext16u(s, dst, dst); 958 } 959 return; 960 } 961 962 /* 963 * In the following, 964 * dep(a, b, m) -> (a & ~m) | (b & m) 965 * 966 * Begin with: src = xxxxabcd 967 */ 968 /* tmp = rol32(src, 24) & 0x000000ff = 0000000c */ 969 tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31); 970 /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00) = 000000dc */ 971 tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23); 972 973 if (flags & TCG_BSWAP_OS) { 974 tcg_out_ext16s(s, TCG_TYPE_REG, dst, tmp); 975 } else { 976 tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); 977 } 978} 979 980static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags) 981{ 982 TCGReg tmp = dst == src ? TCG_REG_R0 : dst; 983 984 if (have_isa_3_10) { 985 tcg_out32(s, BRW | RA(dst) | RS(src)); 986 if (flags & TCG_BSWAP_OS) { 987 tcg_out_ext32s(s, dst, dst); 988 } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 989 tcg_out_ext32u(s, dst, dst); 990 } 991 return; 992 } 993 994 /* 995 * Stolen from gcc's builtin_bswap32. 996 * In the following, 997 * dep(a, b, m) -> (a & ~m) | (b & m) 998 * 999 * Begin with: src = xxxxabcd 1000 */ 1001 /* tmp = rol32(src, 8) & 0xffffffff = 0000bcda */ 1002 tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31); 1003 /* tmp = dep(tmp, rol32(src, 24), 0xff000000) = 0000dcda */ 1004 tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7); 1005 /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00) = 0000dcba */ 1006 tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23); 1007 1008 if (flags & TCG_BSWAP_OS) { 1009 tcg_out_ext32s(s, dst, tmp); 1010 } else { 1011 tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); 1012 } 1013} 1014 1015static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src) 1016{ 1017 TCGReg t0 = dst == src ? TCG_REG_R0 : dst; 1018 TCGReg t1 = dst == src ? dst : TCG_REG_R0; 1019 1020 if (have_isa_3_10) { 1021 tcg_out32(s, BRD | RA(dst) | RS(src)); 1022 return; 1023 } 1024 1025 /* 1026 * In the following, 1027 * dep(a, b, m) -> (a & ~m) | (b & m) 1028 * 1029 * Begin with: src = abcdefgh 1030 */ 1031 /* t0 = rol32(src, 8) & 0xffffffff = 0000fghe */ 1032 tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31); 1033 /* t0 = dep(t0, rol32(src, 24), 0xff000000) = 0000hghe */ 1034 tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7); 1035 /* t0 = dep(t0, rol32(src, 24), 0x0000ff00) = 0000hgfe */ 1036 tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23); 1037 1038 /* t0 = rol64(t0, 32) = hgfe0000 */ 1039 tcg_out_rld(s, RLDICL, t0, t0, 32, 0); 1040 /* t1 = rol64(src, 32) = efghabcd */ 1041 tcg_out_rld(s, RLDICL, t1, src, 32, 0); 1042 1043 /* t0 = dep(t0, rol32(t1, 24), 0xffffffff) = hgfebcda */ 1044 tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31); 1045 /* t0 = dep(t0, rol32(t1, 24), 0xff000000) = hgfedcda */ 1046 tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7); 1047 /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00) = hgfedcba */ 1048 tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23); 1049 1050 tcg_out_mov(s, TCG_TYPE_REG, dst, t0); 1051} 1052 1053/* Emit a move into ret of arg, if it can be done in one insn. */ 1054static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg) 1055{ 1056 if (arg == (int16_t)arg) { 1057 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 1058 return true; 1059 } 1060 if (arg == (int32_t)arg && (arg & 0xffff) == 0) { 1061 tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); 1062 return true; 1063 } 1064 return false; 1065} 1066 1067static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, 1068 tcg_target_long arg, bool in_prologue) 1069{ 1070 intptr_t tb_diff; 1071 tcg_target_long tmp; 1072 int shift; 1073 1074 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1075 1076 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { 1077 arg = (int32_t)arg; 1078 } 1079 1080 /* Load 16-bit immediates with one insn. */ 1081 if (tcg_out_movi_one(s, ret, arg)) { 1082 return; 1083 } 1084 1085 /* Load addresses within the TB with one insn. */ 1086 tb_diff = ppc_tbrel_diff(s, (void *)arg); 1087 if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) { 1088 tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff)); 1089 return; 1090 } 1091 1092 /* 1093 * Load values up to 34 bits, and pc-relative addresses, 1094 * with one prefixed insn. 1095 */ 1096 if (have_isa_3_10) { 1097 if (arg == sextract64(arg, 0, 34)) { 1098 /* pli ret,value = paddi ret,0,value,0 */ 1099 tcg_out_mls_d(s, ADDI, ret, 0, arg, 0); 1100 return; 1101 } 1102 1103 tmp = tcg_pcrel_diff_for_prefix(s, (void *)arg); 1104 if (tmp == sextract64(tmp, 0, 34)) { 1105 /* pla ret,value = paddi ret,0,value,1 */ 1106 tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1); 1107 return; 1108 } 1109 } 1110 1111 /* Load 32-bit immediates with two insns. Note that we've already 1112 eliminated bare ADDIS, so we know both insns are required. */ 1113 if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) { 1114 tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); 1115 tcg_out32(s, ORI | SAI(ret, ret, arg)); 1116 return; 1117 } 1118 if (arg == (uint32_t)arg && !(arg & 0x8000)) { 1119 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 1120 tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); 1121 return; 1122 } 1123 1124 /* Load masked 16-bit value. */ 1125 if (arg > 0 && (arg & 0x8000)) { 1126 tmp = arg | 0x7fff; 1127 if ((tmp & (tmp + 1)) == 0) { 1128 int mb = clz64(tmp + 1) + 1; 1129 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 1130 tcg_out_rld(s, RLDICL, ret, ret, 0, mb); 1131 return; 1132 } 1133 } 1134 1135 /* Load common masks with 2 insns. */ 1136 shift = ctz64(arg); 1137 tmp = arg >> shift; 1138 if (tmp == (int16_t)tmp) { 1139 tcg_out32(s, ADDI | TAI(ret, 0, tmp)); 1140 tcg_out_shli64(s, ret, ret, shift); 1141 return; 1142 } 1143 shift = clz64(arg); 1144 if (tcg_out_movi_one(s, ret, arg << shift)) { 1145 tcg_out_shri64(s, ret, ret, shift); 1146 return; 1147 } 1148 1149 /* Load addresses within 2GB with 2 insns. */ 1150 if (have_isa_3_00) { 1151 intptr_t hi = tcg_pcrel_diff(s, (void *)arg) - 4; 1152 int16_t lo = hi; 1153 1154 hi -= lo; 1155 if (hi == (int32_t)hi) { 1156 tcg_out_addpcis(s, TCG_REG_TMP2, hi); 1157 tcg_out32(s, ADDI | TAI(ret, TCG_REG_TMP2, lo)); 1158 return; 1159 } 1160 } 1161 1162 /* Load addresses within 2GB of TB with 2 (or rarely 3) insns. */ 1163 if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) { 1164 tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff); 1165 return; 1166 } 1167 1168 /* Use the constant pool, if possible. */ 1169 if (!in_prologue && USE_REG_TB) { 1170 new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr, 1171 ppc_tbrel_diff(s, NULL)); 1172 tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0)); 1173 return; 1174 } 1175 if (have_isa_3_10) { 1176 tcg_out_8ls_d(s, PLD, ret, 0, 0, 1); 1177 new_pool_label(s, arg, R_PPC64_PCREL34, s->code_ptr - 2, 0); 1178 return; 1179 } 1180 if (have_isa_3_00) { 1181 tcg_out_addpcis(s, TCG_REG_TMP2, 0); 1182 new_pool_label(s, arg, R_PPC_REL14, s->code_ptr, 0); 1183 tcg_out32(s, LD | TAI(ret, TCG_REG_TMP2, 0)); 1184 return; 1185 } 1186 1187 tmp = arg >> 31 >> 1; 1188 tcg_out_movi(s, TCG_TYPE_I32, ret, tmp); 1189 if (tmp) { 1190 tcg_out_shli64(s, ret, ret, 32); 1191 } 1192 if (arg & 0xffff0000) { 1193 tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); 1194 } 1195 if (arg & 0xffff) { 1196 tcg_out32(s, ORI | SAI(ret, ret, arg)); 1197 } 1198} 1199 1200static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 1201 TCGReg ret, int64_t val) 1202{ 1203 uint32_t load_insn; 1204 int rel, low; 1205 intptr_t add; 1206 1207 switch (vece) { 1208 case MO_8: 1209 low = (int8_t)val; 1210 if (low >= -16 && low < 16) { 1211 tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16)); 1212 return; 1213 } 1214 if (have_isa_3_00) { 1215 tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11)); 1216 return; 1217 } 1218 break; 1219 1220 case MO_16: 1221 low = (int16_t)val; 1222 if (low >= -16 && low < 16) { 1223 tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16)); 1224 return; 1225 } 1226 break; 1227 1228 case MO_32: 1229 low = (int32_t)val; 1230 if (low >= -16 && low < 16) { 1231 tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16)); 1232 return; 1233 } 1234 break; 1235 } 1236 1237 /* 1238 * Otherwise we must load the value from the constant pool. 1239 */ 1240 if (USE_REG_TB) { 1241 rel = R_PPC_ADDR16; 1242 add = ppc_tbrel_diff(s, NULL); 1243 } else if (have_isa_3_10) { 1244 if (type == TCG_TYPE_V64) { 1245 tcg_out_8ls_d(s, PLXSD, ret & 31, 0, 0, 1); 1246 new_pool_label(s, val, R_PPC64_PCREL34, s->code_ptr - 2, 0); 1247 } else { 1248 tcg_out_8ls_d(s, PLXV, ret & 31, 0, 0, 1); 1249 new_pool_l2(s, R_PPC64_PCREL34, s->code_ptr - 2, 0, val, val); 1250 } 1251 return; 1252 } else if (have_isa_3_00) { 1253 tcg_out_addpcis(s, TCG_REG_TMP1, 0); 1254 rel = R_PPC_REL14; 1255 add = 0; 1256 } else { 1257 rel = R_PPC_ADDR32; 1258 add = 0; 1259 } 1260 1261 if (have_vsx) { 1262 load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX; 1263 load_insn |= VRT(ret) | RB(TCG_REG_TMP1); 1264 if (TCG_TARGET_REG_BITS == 64) { 1265 new_pool_label(s, val, rel, s->code_ptr, add); 1266 } else { 1267 new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val); 1268 } 1269 } else { 1270 load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); 1271 if (TCG_TARGET_REG_BITS == 64) { 1272 new_pool_l2(s, rel, s->code_ptr, add, val, val); 1273 } else { 1274 new_pool_l4(s, rel, s->code_ptr, add, 1275 val >> 32, val, val >> 32, val); 1276 } 1277 } 1278 1279 if (USE_REG_TB) { 1280 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0)); 1281 load_insn |= RA(TCG_REG_TB); 1282 } else if (have_isa_3_00) { 1283 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0)); 1284 } else { 1285 tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0)); 1286 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0)); 1287 } 1288 tcg_out32(s, load_insn); 1289} 1290 1291static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, 1292 tcg_target_long arg) 1293{ 1294 switch (type) { 1295 case TCG_TYPE_I32: 1296 case TCG_TYPE_I64: 1297 tcg_debug_assert(ret < TCG_REG_V0); 1298 tcg_out_movi_int(s, type, ret, arg, false); 1299 break; 1300 1301 default: 1302 g_assert_not_reached(); 1303 } 1304} 1305 1306static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) 1307{ 1308 return false; 1309} 1310 1311static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, 1312 tcg_target_long imm) 1313{ 1314 /* This function is only used for passing structs by reference. */ 1315 g_assert_not_reached(); 1316} 1317 1318static bool mask_operand(uint32_t c, int *mb, int *me) 1319{ 1320 uint32_t lsb, test; 1321 1322 /* Accept a bit pattern like: 1323 0....01....1 1324 1....10....0 1325 0..01..10..0 1326 Keep track of the transitions. */ 1327 if (c == 0 || c == -1) { 1328 return false; 1329 } 1330 test = c; 1331 lsb = test & -test; 1332 test += lsb; 1333 if (test & (test - 1)) { 1334 return false; 1335 } 1336 1337 *me = clz32(lsb); 1338 *mb = test ? clz32(test & -test) + 1 : 0; 1339 return true; 1340} 1341 1342static bool mask64_operand(uint64_t c, int *mb, int *me) 1343{ 1344 uint64_t lsb; 1345 1346 if (c == 0) { 1347 return false; 1348 } 1349 1350 lsb = c & -c; 1351 /* Accept 1..10..0. */ 1352 if (c == -lsb) { 1353 *mb = 0; 1354 *me = clz64(lsb); 1355 return true; 1356 } 1357 /* Accept 0..01..1. */ 1358 if (lsb == 1 && (c & (c + 1)) == 0) { 1359 *mb = clz64(c + 1) + 1; 1360 *me = 63; 1361 return true; 1362 } 1363 return false; 1364} 1365 1366static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1367{ 1368 int mb, me; 1369 1370 if (mask_operand(c, &mb, &me)) { 1371 tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); 1372 } else if ((c & 0xffff) == c) { 1373 tcg_out32(s, ANDI | SAI(src, dst, c)); 1374 return; 1375 } else if ((c & 0xffff0000) == c) { 1376 tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); 1377 return; 1378 } else { 1379 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c); 1380 tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); 1381 } 1382} 1383 1384static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c) 1385{ 1386 int mb, me; 1387 1388 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1389 if (mask64_operand(c, &mb, &me)) { 1390 if (mb == 0) { 1391 tcg_out_rld(s, RLDICR, dst, src, 0, me); 1392 } else { 1393 tcg_out_rld(s, RLDICL, dst, src, 0, mb); 1394 } 1395 } else if ((c & 0xffff) == c) { 1396 tcg_out32(s, ANDI | SAI(src, dst, c)); 1397 return; 1398 } else if ((c & 0xffff0000) == c) { 1399 tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); 1400 return; 1401 } else { 1402 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c); 1403 tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); 1404 } 1405} 1406 1407static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c, 1408 int op_lo, int op_hi) 1409{ 1410 if (c >> 16) { 1411 tcg_out32(s, op_hi | SAI(src, dst, c >> 16)); 1412 src = dst; 1413 } 1414 if (c & 0xffff) { 1415 tcg_out32(s, op_lo | SAI(src, dst, c)); 1416 src = dst; 1417 } 1418} 1419 1420static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1421{ 1422 tcg_out_zori32(s, dst, src, c, ORI, ORIS); 1423} 1424 1425static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1426{ 1427 tcg_out_zori32(s, dst, src, c, XORI, XORIS); 1428} 1429 1430static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target) 1431{ 1432 ptrdiff_t disp = tcg_pcrel_diff(s, target); 1433 if (in_range_b(disp)) { 1434 tcg_out32(s, B | (disp & 0x3fffffc) | mask); 1435 } else { 1436 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target); 1437 tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR); 1438 tcg_out32(s, BCCTR | BO_ALWAYS | mask); 1439 } 1440} 1441 1442static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, 1443 TCGReg base, tcg_target_long offset) 1444{ 1445 tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; 1446 bool is_int_store = false; 1447 TCGReg rs = TCG_REG_TMP1; 1448 1449 switch (opi) { 1450 case LD: case LWA: 1451 align = 3; 1452 /* FALLTHRU */ 1453 default: 1454 if (rt > TCG_REG_R0 && rt < TCG_REG_V0) { 1455 rs = rt; 1456 break; 1457 } 1458 break; 1459 case LXSD: 1460 case STXSD: 1461 align = 3; 1462 break; 1463 case LXV: 1464 case STXV: 1465 align = 15; 1466 break; 1467 case STD: 1468 align = 3; 1469 /* FALLTHRU */ 1470 case STB: case STH: case STW: 1471 is_int_store = true; 1472 break; 1473 } 1474 1475 /* For unaligned or large offsets, use the prefixed form. */ 1476 if (have_isa_3_10 1477 && (offset != (int16_t)offset || (offset & align)) 1478 && offset == sextract64(offset, 0, 34)) { 1479 /* 1480 * Note that the MLS:D insns retain their un-prefixed opcode, 1481 * while the 8LS:D insns use a different opcode space. 1482 */ 1483 switch (opi) { 1484 case LBZ: 1485 case LHZ: 1486 case LHA: 1487 case LWZ: 1488 case STB: 1489 case STH: 1490 case STW: 1491 case ADDI: 1492 tcg_out_mls_d(s, opi, rt, base, offset, 0); 1493 return; 1494 case LWA: 1495 tcg_out_8ls_d(s, PLWA, rt, base, offset, 0); 1496 return; 1497 case LD: 1498 tcg_out_8ls_d(s, PLD, rt, base, offset, 0); 1499 return; 1500 case STD: 1501 tcg_out_8ls_d(s, PSTD, rt, base, offset, 0); 1502 return; 1503 case LXSD: 1504 tcg_out_8ls_d(s, PLXSD, rt & 31, base, offset, 0); 1505 return; 1506 case STXSD: 1507 tcg_out_8ls_d(s, PSTXSD, rt & 31, base, offset, 0); 1508 return; 1509 case LXV: 1510 tcg_out_8ls_d(s, PLXV, rt & 31, base, offset, 0); 1511 return; 1512 case STXV: 1513 tcg_out_8ls_d(s, PSTXV, rt & 31, base, offset, 0); 1514 return; 1515 } 1516 } 1517 1518 /* For unaligned, or very large offsets, use the indexed form. */ 1519 if (offset & align || offset != (int32_t)offset || opi == 0) { 1520 if (rs == base) { 1521 rs = TCG_REG_R0; 1522 } 1523 tcg_debug_assert(!is_int_store || rs != rt); 1524 tcg_out_movi(s, TCG_TYPE_PTR, rs, orig); 1525 tcg_out32(s, opx | TAB(rt & 31, base, rs)); 1526 return; 1527 } 1528 1529 l0 = (int16_t)offset; 1530 offset = (offset - l0) >> 16; 1531 l1 = (int16_t)offset; 1532 1533 if (l1 < 0 && orig >= 0) { 1534 extra = 0x4000; 1535 l1 = (int16_t)(offset - 0x4000); 1536 } 1537 if (l1) { 1538 tcg_out32(s, ADDIS | TAI(rs, base, l1)); 1539 base = rs; 1540 } 1541 if (extra) { 1542 tcg_out32(s, ADDIS | TAI(rs, base, extra)); 1543 base = rs; 1544 } 1545 if (opi != ADDI || base != rt || l0 != 0) { 1546 tcg_out32(s, opi | TAI(rt & 31, base, l0)); 1547 } 1548} 1549 1550static void tcg_out_vsldoi(TCGContext *s, TCGReg ret, 1551 TCGReg va, TCGReg vb, int shb) 1552{ 1553 tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6)); 1554} 1555 1556static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1557 TCGReg base, intptr_t offset) 1558{ 1559 int shift; 1560 1561 switch (type) { 1562 case TCG_TYPE_I32: 1563 if (ret < TCG_REG_V0) { 1564 tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset); 1565 break; 1566 } 1567 if (have_isa_2_07 && have_vsx) { 1568 tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset); 1569 break; 1570 } 1571 tcg_debug_assert((offset & 3) == 0); 1572 tcg_out_mem_long(s, 0, LVEWX, ret, base, offset); 1573 shift = (offset - 4) & 0xc; 1574 if (shift) { 1575 tcg_out_vsldoi(s, ret, ret, ret, shift); 1576 } 1577 break; 1578 case TCG_TYPE_I64: 1579 if (ret < TCG_REG_V0) { 1580 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1581 tcg_out_mem_long(s, LD, LDX, ret, base, offset); 1582 break; 1583 } 1584 /* fallthru */ 1585 case TCG_TYPE_V64: 1586 tcg_debug_assert(ret >= TCG_REG_V0); 1587 if (have_vsx) { 1588 tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX, 1589 ret, base, offset); 1590 break; 1591 } 1592 tcg_debug_assert((offset & 7) == 0); 1593 tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16); 1594 if (offset & 8) { 1595 tcg_out_vsldoi(s, ret, ret, ret, 8); 1596 } 1597 break; 1598 case TCG_TYPE_V128: 1599 tcg_debug_assert(ret >= TCG_REG_V0); 1600 tcg_debug_assert((offset & 15) == 0); 1601 tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0, 1602 LVX, ret, base, offset); 1603 break; 1604 default: 1605 g_assert_not_reached(); 1606 } 1607} 1608 1609static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, 1610 TCGReg base, intptr_t offset) 1611{ 1612 int shift; 1613 1614 switch (type) { 1615 case TCG_TYPE_I32: 1616 if (arg < TCG_REG_V0) { 1617 tcg_out_mem_long(s, STW, STWX, arg, base, offset); 1618 break; 1619 } 1620 if (have_isa_2_07 && have_vsx) { 1621 tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset); 1622 break; 1623 } 1624 assert((offset & 3) == 0); 1625 tcg_debug_assert((offset & 3) == 0); 1626 shift = (offset - 4) & 0xc; 1627 if (shift) { 1628 tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift); 1629 arg = TCG_VEC_TMP1; 1630 } 1631 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); 1632 break; 1633 case TCG_TYPE_I64: 1634 if (arg < TCG_REG_V0) { 1635 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1636 tcg_out_mem_long(s, STD, STDX, arg, base, offset); 1637 break; 1638 } 1639 /* fallthru */ 1640 case TCG_TYPE_V64: 1641 tcg_debug_assert(arg >= TCG_REG_V0); 1642 if (have_vsx) { 1643 tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0, 1644 STXSDX, arg, base, offset); 1645 break; 1646 } 1647 tcg_debug_assert((offset & 7) == 0); 1648 if (offset & 8) { 1649 tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8); 1650 arg = TCG_VEC_TMP1; 1651 } 1652 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); 1653 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4); 1654 break; 1655 case TCG_TYPE_V128: 1656 tcg_debug_assert(arg >= TCG_REG_V0); 1657 tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0, 1658 STVX, arg, base, offset); 1659 break; 1660 default: 1661 g_assert_not_reached(); 1662 } 1663} 1664 1665static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1666 TCGReg base, intptr_t ofs) 1667{ 1668 return false; 1669} 1670 1671static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, 1672 int const_arg2, int cr, TCGType type) 1673{ 1674 int imm; 1675 uint32_t op; 1676 1677 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1678 1679 /* Simplify the comparisons below wrt CMPI. */ 1680 if (type == TCG_TYPE_I32) { 1681 arg2 = (int32_t)arg2; 1682 } 1683 1684 switch (cond) { 1685 case TCG_COND_EQ: 1686 case TCG_COND_NE: 1687 if (const_arg2) { 1688 if ((int16_t) arg2 == arg2) { 1689 op = CMPI; 1690 imm = 1; 1691 break; 1692 } else if ((uint16_t) arg2 == arg2) { 1693 op = CMPLI; 1694 imm = 1; 1695 break; 1696 } 1697 } 1698 op = CMPL; 1699 imm = 0; 1700 break; 1701 1702 case TCG_COND_LT: 1703 case TCG_COND_GE: 1704 case TCG_COND_LE: 1705 case TCG_COND_GT: 1706 if (const_arg2) { 1707 if ((int16_t) arg2 == arg2) { 1708 op = CMPI; 1709 imm = 1; 1710 break; 1711 } 1712 } 1713 op = CMP; 1714 imm = 0; 1715 break; 1716 1717 case TCG_COND_LTU: 1718 case TCG_COND_GEU: 1719 case TCG_COND_LEU: 1720 case TCG_COND_GTU: 1721 if (const_arg2) { 1722 if ((uint16_t) arg2 == arg2) { 1723 op = CMPLI; 1724 imm = 1; 1725 break; 1726 } 1727 } 1728 op = CMPL; 1729 imm = 0; 1730 break; 1731 1732 default: 1733 g_assert_not_reached(); 1734 } 1735 op |= BF(cr) | ((type == TCG_TYPE_I64) << 21); 1736 1737 if (imm) { 1738 tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff)); 1739 } else { 1740 if (const_arg2) { 1741 tcg_out_movi(s, type, TCG_REG_R0, arg2); 1742 arg2 = TCG_REG_R0; 1743 } 1744 tcg_out32(s, op | RA(arg1) | RB(arg2)); 1745 } 1746} 1747 1748static void tcg_out_setcond_eq0(TCGContext *s, TCGType type, 1749 TCGReg dst, TCGReg src, bool neg) 1750{ 1751 if (neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) { 1752 /* 1753 * X != 0 implies X + -1 generates a carry. 1754 * RT = (~X + X) + CA 1755 * = -1 + CA 1756 * = CA ? 0 : -1 1757 */ 1758 tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); 1759 tcg_out32(s, SUBFE | TAB(dst, src, src)); 1760 return; 1761 } 1762 1763 if (type == TCG_TYPE_I32) { 1764 tcg_out32(s, CNTLZW | RS(src) | RA(dst)); 1765 tcg_out_shri32(s, dst, dst, 5); 1766 } else { 1767 tcg_out32(s, CNTLZD | RS(src) | RA(dst)); 1768 tcg_out_shri64(s, dst, dst, 6); 1769 } 1770 if (neg) { 1771 tcg_out32(s, NEG | RT(dst) | RA(dst)); 1772 } 1773} 1774 1775static void tcg_out_setcond_ne0(TCGContext *s, TCGType type, 1776 TCGReg dst, TCGReg src, bool neg) 1777{ 1778 if (!neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) { 1779 /* 1780 * X != 0 implies X + -1 generates a carry. Extra addition 1781 * trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. 1782 */ 1783 tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); 1784 tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src)); 1785 return; 1786 } 1787 tcg_out_setcond_eq0(s, type, dst, src, false); 1788 if (neg) { 1789 tcg_out32(s, ADDI | TAI(dst, dst, -1)); 1790 } else { 1791 tcg_out_xori32(s, dst, dst, 1); 1792 } 1793} 1794 1795static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2, 1796 bool const_arg2) 1797{ 1798 if (const_arg2) { 1799 if ((uint32_t)arg2 == arg2) { 1800 tcg_out_xori32(s, TCG_REG_R0, arg1, arg2); 1801 } else { 1802 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2); 1803 tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0)); 1804 } 1805 } else { 1806 tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2)); 1807 } 1808 return TCG_REG_R0; 1809} 1810 1811static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, 1812 TCGArg arg0, TCGArg arg1, TCGArg arg2, 1813 int const_arg2, bool neg) 1814{ 1815 int sh; 1816 bool inv; 1817 1818 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1819 1820 /* Ignore high bits of a potential constant arg2. */ 1821 if (type == TCG_TYPE_I32) { 1822 arg2 = (uint32_t)arg2; 1823 } 1824 1825 /* With SETBC/SETBCR, we can always implement with 2 insns. */ 1826 if (have_isa_3_10) { 1827 tcg_insn_unit bi, opc; 1828 1829 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); 1830 1831 /* Re-use tcg_to_bc for BI and BO_COND_{TRUE,FALSE}. */ 1832 bi = tcg_to_bc[cond] & (0x1f << 16); 1833 if (tcg_to_bc[cond] & BO(8)) { 1834 opc = neg ? SETNBC : SETBC; 1835 } else { 1836 opc = neg ? SETNBCR : SETBCR; 1837 } 1838 tcg_out32(s, opc | RT(arg0) | bi); 1839 return; 1840 } 1841 1842 /* Handle common and trivial cases before handling anything else. */ 1843 if (arg2 == 0) { 1844 switch (cond) { 1845 case TCG_COND_EQ: 1846 tcg_out_setcond_eq0(s, type, arg0, arg1, neg); 1847 return; 1848 case TCG_COND_NE: 1849 tcg_out_setcond_ne0(s, type, arg0, arg1, neg); 1850 return; 1851 case TCG_COND_GE: 1852 tcg_out32(s, NOR | SAB(arg1, arg0, arg1)); 1853 arg1 = arg0; 1854 /* FALLTHRU */ 1855 case TCG_COND_LT: 1856 /* Extract the sign bit. */ 1857 if (type == TCG_TYPE_I32) { 1858 if (neg) { 1859 tcg_out_sari32(s, arg0, arg1, 31); 1860 } else { 1861 tcg_out_shri32(s, arg0, arg1, 31); 1862 } 1863 } else { 1864 if (neg) { 1865 tcg_out_sari64(s, arg0, arg1, 63); 1866 } else { 1867 tcg_out_shri64(s, arg0, arg1, 63); 1868 } 1869 } 1870 return; 1871 default: 1872 break; 1873 } 1874 } 1875 1876 /* If we have ISEL, we can implement everything with 3 or 4 insns. 1877 All other cases below are also at least 3 insns, so speed up the 1878 code generator by not considering them and always using ISEL. */ 1879 if (have_isel) { 1880 int isel, tab; 1881 1882 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); 1883 1884 isel = tcg_to_isel[cond]; 1885 1886 tcg_out_movi(s, type, arg0, neg ? -1 : 1); 1887 if (isel & 1) { 1888 /* arg0 = (bc ? 0 : 1) */ 1889 tab = TAB(arg0, 0, arg0); 1890 isel &= ~1; 1891 } else { 1892 /* arg0 = (bc ? 1 : 0) */ 1893 tcg_out_movi(s, type, TCG_REG_R0, 0); 1894 tab = TAB(arg0, arg0, TCG_REG_R0); 1895 } 1896 tcg_out32(s, isel | tab); 1897 return; 1898 } 1899 1900 inv = false; 1901 switch (cond) { 1902 case TCG_COND_EQ: 1903 arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); 1904 tcg_out_setcond_eq0(s, type, arg0, arg1, neg); 1905 break; 1906 1907 case TCG_COND_NE: 1908 arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); 1909 tcg_out_setcond_ne0(s, type, arg0, arg1, neg); 1910 break; 1911 1912 case TCG_COND_LE: 1913 case TCG_COND_LEU: 1914 inv = true; 1915 /* fall through */ 1916 case TCG_COND_GT: 1917 case TCG_COND_GTU: 1918 sh = 30; /* CR7 CR_GT */ 1919 goto crtest; 1920 1921 case TCG_COND_GE: 1922 case TCG_COND_GEU: 1923 inv = true; 1924 /* fall through */ 1925 case TCG_COND_LT: 1926 case TCG_COND_LTU: 1927 sh = 29; /* CR7 CR_LT */ 1928 goto crtest; 1929 1930 crtest: 1931 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); 1932 tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); 1933 tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31); 1934 if (neg && inv) { 1935 tcg_out32(s, ADDI | TAI(arg0, arg0, -1)); 1936 } else if (neg) { 1937 tcg_out32(s, NEG | RT(arg0) | RA(arg0)); 1938 } else if (inv) { 1939 tcg_out_xori32(s, arg0, arg0, 1); 1940 } 1941 break; 1942 1943 default: 1944 g_assert_not_reached(); 1945 } 1946} 1947 1948static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l) 1949{ 1950 if (l->has_value) { 1951 bc |= reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr); 1952 } else { 1953 tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0); 1954 } 1955 tcg_out32(s, bc); 1956} 1957 1958static void tcg_out_brcond(TCGContext *s, TCGCond cond, 1959 TCGArg arg1, TCGArg arg2, int const_arg2, 1960 TCGLabel *l, TCGType type) 1961{ 1962 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); 1963 tcg_out_bc(s, tcg_to_bc[cond], l); 1964} 1965 1966static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, 1967 TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1, 1968 TCGArg v2, bool const_c2) 1969{ 1970 /* If for some reason both inputs are zero, don't produce bad code. */ 1971 if (v1 == 0 && v2 == 0) { 1972 tcg_out_movi(s, type, dest, 0); 1973 return; 1974 } 1975 1976 tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type); 1977 1978 if (have_isel) { 1979 int isel = tcg_to_isel[cond]; 1980 1981 /* Swap the V operands if the operation indicates inversion. */ 1982 if (isel & 1) { 1983 int t = v1; 1984 v1 = v2; 1985 v2 = t; 1986 isel &= ~1; 1987 } 1988 /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand. */ 1989 if (v2 == 0) { 1990 tcg_out_movi(s, type, TCG_REG_R0, 0); 1991 } 1992 tcg_out32(s, isel | TAB(dest, v1, v2)); 1993 } else { 1994 if (dest == v2) { 1995 cond = tcg_invert_cond(cond); 1996 v2 = v1; 1997 } else if (dest != v1) { 1998 if (v1 == 0) { 1999 tcg_out_movi(s, type, dest, 0); 2000 } else { 2001 tcg_out_mov(s, type, dest, v1); 2002 } 2003 } 2004 /* Branch forward over one insn */ 2005 tcg_out32(s, tcg_to_bc[cond] | 8); 2006 if (v2 == 0) { 2007 tcg_out_movi(s, type, dest, 0); 2008 } else { 2009 tcg_out_mov(s, type, dest, v2); 2010 } 2011 } 2012} 2013 2014static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc, 2015 TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2) 2016{ 2017 if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) { 2018 tcg_out32(s, opc | RA(a0) | RS(a1)); 2019 } else { 2020 tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type); 2021 /* Note that the only other valid constant for a2 is 0. */ 2022 if (have_isel) { 2023 tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1)); 2024 tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0)); 2025 } else if (!const_a2 && a0 == a2) { 2026 tcg_out32(s, tcg_to_bc[TCG_COND_EQ] | 8); 2027 tcg_out32(s, opc | RA(a0) | RS(a1)); 2028 } else { 2029 tcg_out32(s, opc | RA(a0) | RS(a1)); 2030 tcg_out32(s, tcg_to_bc[TCG_COND_NE] | 8); 2031 if (const_a2) { 2032 tcg_out_movi(s, type, a0, 0); 2033 } else { 2034 tcg_out_mov(s, type, a0, a2); 2035 } 2036 } 2037 } 2038} 2039 2040static void tcg_out_cmp2(TCGContext *s, const TCGArg *args, 2041 const int *const_args) 2042{ 2043 static const struct { uint8_t bit1, bit2; } bits[] = { 2044 [TCG_COND_LT ] = { CR_LT, CR_LT }, 2045 [TCG_COND_LE ] = { CR_LT, CR_GT }, 2046 [TCG_COND_GT ] = { CR_GT, CR_GT }, 2047 [TCG_COND_GE ] = { CR_GT, CR_LT }, 2048 [TCG_COND_LTU] = { CR_LT, CR_LT }, 2049 [TCG_COND_LEU] = { CR_LT, CR_GT }, 2050 [TCG_COND_GTU] = { CR_GT, CR_GT }, 2051 [TCG_COND_GEU] = { CR_GT, CR_LT }, 2052 }; 2053 2054 TCGCond cond = args[4], cond2; 2055 TCGArg al, ah, bl, bh; 2056 int blconst, bhconst; 2057 int op, bit1, bit2; 2058 2059 al = args[0]; 2060 ah = args[1]; 2061 bl = args[2]; 2062 bh = args[3]; 2063 blconst = const_args[2]; 2064 bhconst = const_args[3]; 2065 2066 switch (cond) { 2067 case TCG_COND_EQ: 2068 op = CRAND; 2069 goto do_equality; 2070 case TCG_COND_NE: 2071 op = CRNAND; 2072 do_equality: 2073 tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32); 2074 tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32); 2075 tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); 2076 break; 2077 2078 case TCG_COND_LT: 2079 case TCG_COND_LE: 2080 case TCG_COND_GT: 2081 case TCG_COND_GE: 2082 case TCG_COND_LTU: 2083 case TCG_COND_LEU: 2084 case TCG_COND_GTU: 2085 case TCG_COND_GEU: 2086 bit1 = bits[cond].bit1; 2087 bit2 = bits[cond].bit2; 2088 op = (bit1 != bit2 ? CRANDC : CRAND); 2089 cond2 = tcg_unsigned_cond(cond); 2090 2091 tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32); 2092 tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32); 2093 tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2)); 2094 tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ)); 2095 break; 2096 2097 default: 2098 g_assert_not_reached(); 2099 } 2100} 2101 2102static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, 2103 const int *const_args) 2104{ 2105 tcg_out_cmp2(s, args + 1, const_args + 1); 2106 tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); 2107 tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31); 2108} 2109 2110static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args, 2111 const int *const_args) 2112{ 2113 tcg_out_cmp2(s, args, const_args); 2114 tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5])); 2115} 2116 2117static void tcg_out_mb(TCGContext *s, TCGArg a0) 2118{ 2119 uint32_t insn; 2120 2121 if (a0 & TCG_MO_ST_LD) { 2122 insn = HWSYNC; 2123 } else { 2124 insn = LWSYNC; 2125 } 2126 2127 tcg_out32(s, insn); 2128} 2129 2130static void tcg_out_call_int(TCGContext *s, int lk, 2131 const tcg_insn_unit *target) 2132{ 2133#ifdef _CALL_AIX 2134 /* Look through the descriptor. If the branch is in range, and we 2135 don't have to spend too much effort on building the toc. */ 2136 const void *tgt = ((const void * const *)target)[0]; 2137 uintptr_t toc = ((const uintptr_t *)target)[1]; 2138 intptr_t diff = tcg_pcrel_diff(s, tgt); 2139 2140 if (in_range_b(diff) && toc == (uint32_t)toc) { 2141 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc); 2142 tcg_out_b(s, lk, tgt); 2143 } else { 2144 /* Fold the low bits of the constant into the addresses below. */ 2145 intptr_t arg = (intptr_t)target; 2146 int ofs = (int16_t)arg; 2147 2148 if (ofs + 8 < 0x8000) { 2149 arg -= ofs; 2150 } else { 2151 ofs = 0; 2152 } 2153 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg); 2154 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs); 2155 tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR); 2156 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP); 2157 tcg_out32(s, BCCTR | BO_ALWAYS | lk); 2158 } 2159#elif defined(_CALL_ELF) && _CALL_ELF == 2 2160 intptr_t diff; 2161 2162 /* In the ELFv2 ABI, we have to set up r12 to contain the destination 2163 address, which the callee uses to compute its TOC address. */ 2164 /* FIXME: when the branch is in range, we could avoid r12 load if we 2165 knew that the destination uses the same TOC, and what its local 2166 entry point offset is. */ 2167 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target); 2168 2169 diff = tcg_pcrel_diff(s, target); 2170 if (in_range_b(diff)) { 2171 tcg_out_b(s, lk, target); 2172 } else { 2173 tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR); 2174 tcg_out32(s, BCCTR | BO_ALWAYS | lk); 2175 } 2176#else 2177 tcg_out_b(s, lk, target); 2178#endif 2179} 2180 2181static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 2182 const TCGHelperInfo *info) 2183{ 2184 tcg_out_call_int(s, LK, target); 2185} 2186 2187static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = { 2188 [MO_UB] = LBZX, 2189 [MO_UW] = LHZX, 2190 [MO_UL] = LWZX, 2191 [MO_UQ] = LDX, 2192 [MO_SW] = LHAX, 2193 [MO_SL] = LWAX, 2194 [MO_BSWAP | MO_UB] = LBZX, 2195 [MO_BSWAP | MO_UW] = LHBRX, 2196 [MO_BSWAP | MO_UL] = LWBRX, 2197 [MO_BSWAP | MO_UQ] = LDBRX, 2198}; 2199 2200static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = { 2201 [MO_UB] = STBX, 2202 [MO_UW] = STHX, 2203 [MO_UL] = STWX, 2204 [MO_UQ] = STDX, 2205 [MO_BSWAP | MO_UB] = STBX, 2206 [MO_BSWAP | MO_UW] = STHBRX, 2207 [MO_BSWAP | MO_UL] = STWBRX, 2208 [MO_BSWAP | MO_UQ] = STDBRX, 2209}; 2210 2211static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg) 2212{ 2213 if (arg < 0) { 2214 arg = TCG_REG_TMP1; 2215 } 2216 tcg_out32(s, MFSPR | RT(arg) | LR); 2217 return arg; 2218} 2219 2220/* 2221 * For the purposes of ppc32 sorting 4 input registers into 4 argument 2222 * registers, there is an outside chance we would require 3 temps. 2223 */ 2224static const TCGLdstHelperParam ldst_helper_param = { 2225 .ra_gen = ldst_ra_gen, 2226 .ntmp = 3, 2227 .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 } 2228}; 2229 2230static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 2231{ 2232 MemOp opc = get_memop(lb->oi); 2233 2234 if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 2235 return false; 2236 } 2237 2238 tcg_out_ld_helper_args(s, lb, &ldst_helper_param); 2239 tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]); 2240 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); 2241 2242 tcg_out_b(s, 0, lb->raddr); 2243 return true; 2244} 2245 2246static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 2247{ 2248 MemOp opc = get_memop(lb->oi); 2249 2250 if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 2251 return false; 2252 } 2253 2254 tcg_out_st_helper_args(s, lb, &ldst_helper_param); 2255 tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]); 2256 2257 tcg_out_b(s, 0, lb->raddr); 2258 return true; 2259} 2260 2261typedef struct { 2262 TCGReg base; 2263 TCGReg index; 2264 TCGAtomAlign aa; 2265} HostAddress; 2266 2267bool tcg_target_has_memory_bswap(MemOp memop) 2268{ 2269 TCGAtomAlign aa; 2270 2271 if ((memop & MO_SIZE) <= MO_64) { 2272 return true; 2273 } 2274 2275 /* 2276 * Reject 16-byte memop with 16-byte atomicity, 2277 * but do allow a pair of 64-bit operations. 2278 */ 2279 aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true); 2280 return aa.atom <= MO_64; 2281} 2282 2283/* We expect to use a 16-bit negative offset from ENV. */ 2284#define MIN_TLB_MASK_TABLE_OFS -32768 2285 2286/* 2287 * For system-mode, perform the TLB load and compare. 2288 * For user-mode, perform any required alignment tests. 2289 * In both cases, return a TCGLabelQemuLdst structure if the slow path 2290 * is required and fill in @h with the host address for the fast path. 2291 */ 2292static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, 2293 TCGReg addrlo, TCGReg addrhi, 2294 MemOpIdx oi, bool is_ld) 2295{ 2296 TCGType addr_type = s->addr_type; 2297 TCGLabelQemuLdst *ldst = NULL; 2298 MemOp opc = get_memop(oi); 2299 MemOp a_bits, s_bits; 2300 2301 /* 2302 * Book II, Section 1.4, Single-Copy Atomicity, specifies: 2303 * 2304 * Before 3.0, "An access that is not atomic is performed as a set of 2305 * smaller disjoint atomic accesses. In general, the number and alignment 2306 * of these accesses are implementation-dependent." Thus MO_ATOM_IFALIGN. 2307 * 2308 * As of 3.0, "the non-atomic access is performed as described in 2309 * the corresponding list", which matches MO_ATOM_SUBALIGN. 2310 */ 2311 s_bits = opc & MO_SIZE; 2312 h->aa = atom_and_align_for_opc(s, opc, 2313 have_isa_3_00 ? MO_ATOM_SUBALIGN 2314 : MO_ATOM_IFALIGN, 2315 s_bits == MO_128); 2316 a_bits = h->aa.align; 2317 2318 if (tcg_use_softmmu) { 2319 int mem_index = get_mmuidx(oi); 2320 int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read) 2321 : offsetof(CPUTLBEntry, addr_write); 2322 int fast_off = tlb_mask_table_ofs(s, mem_index); 2323 int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); 2324 int table_off = fast_off + offsetof(CPUTLBDescFast, table); 2325 2326 ldst = new_ldst_label(s); 2327 ldst->is_ld = is_ld; 2328 ldst->oi = oi; 2329 ldst->addrlo_reg = addrlo; 2330 ldst->addrhi_reg = addrhi; 2331 2332 /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ 2333 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off); 2334 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off); 2335 2336 /* Extract the page index, shifted into place for tlb index. */ 2337 if (TCG_TARGET_REG_BITS == 32) { 2338 tcg_out_shri32(s, TCG_REG_R0, addrlo, 2339 s->page_bits - CPU_TLB_ENTRY_BITS); 2340 } else { 2341 tcg_out_shri64(s, TCG_REG_R0, addrlo, 2342 s->page_bits - CPU_TLB_ENTRY_BITS); 2343 } 2344 tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0)); 2345 2346 /* 2347 * Load the (low part) TLB comparator into TMP2. 2348 * For 64-bit host, always load the entire 64-bit slot for simplicity. 2349 * We will ignore the high bits with tcg_out_cmp(..., addr_type). 2350 */ 2351 if (TCG_TARGET_REG_BITS == 64) { 2352 if (cmp_off == 0) { 2353 tcg_out32(s, LDUX | TAB(TCG_REG_TMP2, 2354 TCG_REG_TMP1, TCG_REG_TMP2)); 2355 } else { 2356 tcg_out32(s, ADD | TAB(TCG_REG_TMP1, 2357 TCG_REG_TMP1, TCG_REG_TMP2)); 2358 tcg_out_ld(s, TCG_TYPE_I64, TCG_REG_TMP2, 2359 TCG_REG_TMP1, cmp_off); 2360 } 2361 } else if (cmp_off == 0 && !HOST_BIG_ENDIAN) { 2362 tcg_out32(s, LWZUX | TAB(TCG_REG_TMP2, 2363 TCG_REG_TMP1, TCG_REG_TMP2)); 2364 } else { 2365 tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2)); 2366 tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1, 2367 cmp_off + 4 * HOST_BIG_ENDIAN); 2368 } 2369 2370 /* 2371 * Load the TLB addend for use on the fast path. 2372 * Do this asap to minimize any load use delay. 2373 */ 2374 if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) { 2375 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, 2376 offsetof(CPUTLBEntry, addend)); 2377 } 2378 2379 /* Clear the non-page, non-alignment bits from the address in R0. */ 2380 if (TCG_TARGET_REG_BITS == 32) { 2381 /* 2382 * We don't support unaligned accesses on 32-bits. 2383 * Preserve the bottom bits and thus trigger a comparison 2384 * failure on unaligned accesses. 2385 */ 2386 if (a_bits < s_bits) { 2387 a_bits = s_bits; 2388 } 2389 tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0, 2390 (32 - a_bits) & 31, 31 - s->page_bits); 2391 } else { 2392 TCGReg t = addrlo; 2393 2394 /* 2395 * If the access is unaligned, we need to make sure we fail if we 2396 * cross a page boundary. The trick is to add the access size-1 2397 * to the address before masking the low bits. That will make the 2398 * address overflow to the next page if we cross a page boundary, 2399 * which will then force a mismatch of the TLB compare. 2400 */ 2401 if (a_bits < s_bits) { 2402 unsigned a_mask = (1 << a_bits) - 1; 2403 unsigned s_mask = (1 << s_bits) - 1; 2404 tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask)); 2405 t = TCG_REG_R0; 2406 } 2407 2408 /* Mask the address for the requested alignment. */ 2409 if (addr_type == TCG_TYPE_I32) { 2410 tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0, 2411 (32 - a_bits) & 31, 31 - s->page_bits); 2412 } else if (a_bits == 0) { 2413 tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits); 2414 } else { 2415 tcg_out_rld(s, RLDICL, TCG_REG_R0, t, 2416 64 - s->page_bits, s->page_bits - a_bits); 2417 tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0); 2418 } 2419 } 2420 2421 if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) { 2422 /* Low part comparison into cr7. */ 2423 tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 2424 0, 7, TCG_TYPE_I32); 2425 2426 /* Load the high part TLB comparator into TMP2. */ 2427 tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1, 2428 cmp_off + 4 * !HOST_BIG_ENDIAN); 2429 2430 /* Load addend, deferred for this case. */ 2431 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, 2432 offsetof(CPUTLBEntry, addend)); 2433 2434 /* High part comparison into cr6. */ 2435 tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2, 2436 0, 6, TCG_TYPE_I32); 2437 2438 /* Combine comparisons into cr7. */ 2439 tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); 2440 } else { 2441 /* Full comparison into cr7. */ 2442 tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 2443 0, 7, addr_type); 2444 } 2445 2446 /* Load a pointer into the current opcode w/conditional branch-link. */ 2447 ldst->label_ptr[0] = s->code_ptr; 2448 tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); 2449 2450 h->base = TCG_REG_TMP1; 2451 } else { 2452 if (a_bits) { 2453 ldst = new_ldst_label(s); 2454 ldst->is_ld = is_ld; 2455 ldst->oi = oi; 2456 ldst->addrlo_reg = addrlo; 2457 ldst->addrhi_reg = addrhi; 2458 2459 /* We are expecting a_bits to max out at 7, much lower than ANDI. */ 2460 tcg_debug_assert(a_bits < 16); 2461 tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1)); 2462 2463 ldst->label_ptr[0] = s->code_ptr; 2464 tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK); 2465 } 2466 2467 h->base = guest_base ? TCG_GUEST_BASE_REG : 0; 2468 } 2469 2470 if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) { 2471 /* Zero-extend the guest address for use in the host address. */ 2472 tcg_out_ext32u(s, TCG_REG_R0, addrlo); 2473 h->index = TCG_REG_R0; 2474 } else { 2475 h->index = addrlo; 2476 } 2477 2478 return ldst; 2479} 2480 2481static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, 2482 TCGReg addrlo, TCGReg addrhi, 2483 MemOpIdx oi, TCGType data_type) 2484{ 2485 MemOp opc = get_memop(oi); 2486 TCGLabelQemuLdst *ldst; 2487 HostAddress h; 2488 2489 ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true); 2490 2491 if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { 2492 if (opc & MO_BSWAP) { 2493 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2494 tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index)); 2495 tcg_out32(s, LWBRX | TAB(datahi, h.base, TCG_REG_R0)); 2496 } else if (h.base != 0) { 2497 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2498 tcg_out32(s, LWZX | TAB(datahi, h.base, h.index)); 2499 tcg_out32(s, LWZX | TAB(datalo, h.base, TCG_REG_R0)); 2500 } else if (h.index == datahi) { 2501 tcg_out32(s, LWZ | TAI(datalo, h.index, 4)); 2502 tcg_out32(s, LWZ | TAI(datahi, h.index, 0)); 2503 } else { 2504 tcg_out32(s, LWZ | TAI(datahi, h.index, 0)); 2505 tcg_out32(s, LWZ | TAI(datalo, h.index, 4)); 2506 } 2507 } else { 2508 uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)]; 2509 if (!have_isa_2_06 && insn == LDBRX) { 2510 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2511 tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index)); 2512 tcg_out32(s, LWBRX | TAB(TCG_REG_R0, h.base, TCG_REG_R0)); 2513 tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0); 2514 } else if (insn) { 2515 tcg_out32(s, insn | TAB(datalo, h.base, h.index)); 2516 } else { 2517 insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)]; 2518 tcg_out32(s, insn | TAB(datalo, h.base, h.index)); 2519 tcg_out_movext(s, TCG_TYPE_REG, datalo, 2520 TCG_TYPE_REG, opc & MO_SSIZE, datalo); 2521 } 2522 } 2523 2524 if (ldst) { 2525 ldst->type = data_type; 2526 ldst->datalo_reg = datalo; 2527 ldst->datahi_reg = datahi; 2528 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2529 } 2530} 2531 2532static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, 2533 TCGReg addrlo, TCGReg addrhi, 2534 MemOpIdx oi, TCGType data_type) 2535{ 2536 MemOp opc = get_memop(oi); 2537 TCGLabelQemuLdst *ldst; 2538 HostAddress h; 2539 2540 ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false); 2541 2542 if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { 2543 if (opc & MO_BSWAP) { 2544 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2545 tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index)); 2546 tcg_out32(s, STWBRX | SAB(datahi, h.base, TCG_REG_R0)); 2547 } else if (h.base != 0) { 2548 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2549 tcg_out32(s, STWX | SAB(datahi, h.base, h.index)); 2550 tcg_out32(s, STWX | SAB(datalo, h.base, TCG_REG_R0)); 2551 } else { 2552 tcg_out32(s, STW | TAI(datahi, h.index, 0)); 2553 tcg_out32(s, STW | TAI(datalo, h.index, 4)); 2554 } 2555 } else { 2556 uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)]; 2557 if (!have_isa_2_06 && insn == STDBRX) { 2558 tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index)); 2559 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, h.index, 4)); 2560 tcg_out_shri64(s, TCG_REG_R0, datalo, 32); 2561 tcg_out32(s, STWBRX | SAB(TCG_REG_R0, h.base, TCG_REG_TMP1)); 2562 } else { 2563 tcg_out32(s, insn | SAB(datalo, h.base, h.index)); 2564 } 2565 } 2566 2567 if (ldst) { 2568 ldst->type = data_type; 2569 ldst->datalo_reg = datalo; 2570 ldst->datahi_reg = datahi; 2571 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2572 } 2573} 2574 2575static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, 2576 TCGReg addr_reg, MemOpIdx oi, bool is_ld) 2577{ 2578 TCGLabelQemuLdst *ldst; 2579 HostAddress h; 2580 bool need_bswap; 2581 uint32_t insn; 2582 TCGReg index; 2583 2584 ldst = prepare_host_addr(s, &h, addr_reg, -1, oi, is_ld); 2585 2586 /* Compose the final address, as LQ/STQ have no indexing. */ 2587 index = h.index; 2588 if (h.base != 0) { 2589 index = TCG_REG_TMP1; 2590 tcg_out32(s, ADD | TAB(index, h.base, h.index)); 2591 } 2592 need_bswap = get_memop(oi) & MO_BSWAP; 2593 2594 if (h.aa.atom == MO_128) { 2595 tcg_debug_assert(!need_bswap); 2596 tcg_debug_assert(datalo & 1); 2597 tcg_debug_assert(datahi == datalo - 1); 2598 tcg_debug_assert(!is_ld || datahi != index); 2599 insn = is_ld ? LQ : STQ; 2600 tcg_out32(s, insn | TAI(datahi, index, 0)); 2601 } else { 2602 TCGReg d1, d2; 2603 2604 if (HOST_BIG_ENDIAN ^ need_bswap) { 2605 d1 = datahi, d2 = datalo; 2606 } else { 2607 d1 = datalo, d2 = datahi; 2608 } 2609 2610 if (need_bswap) { 2611 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8); 2612 insn = is_ld ? LDBRX : STDBRX; 2613 tcg_out32(s, insn | TAB(d1, 0, index)); 2614 tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0)); 2615 } else { 2616 insn = is_ld ? LD : STD; 2617 tcg_out32(s, insn | TAI(d1, index, 0)); 2618 tcg_out32(s, insn | TAI(d2, index, 8)); 2619 } 2620 } 2621 2622 if (ldst) { 2623 ldst->type = TCG_TYPE_I128; 2624 ldst->datalo_reg = datalo; 2625 ldst->datahi_reg = datahi; 2626 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2627 } 2628} 2629 2630static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 2631{ 2632 int i; 2633 for (i = 0; i < count; ++i) { 2634 p[i] = NOP; 2635 } 2636} 2637 2638/* Parameters for function call generation, used in tcg.c. */ 2639#define TCG_TARGET_STACK_ALIGN 16 2640 2641#ifdef _CALL_AIX 2642# define LINK_AREA_SIZE (6 * SZR) 2643# define LR_OFFSET (1 * SZR) 2644# define TCG_TARGET_CALL_STACK_OFFSET (LINK_AREA_SIZE + 8 * SZR) 2645#elif defined(_CALL_DARWIN) 2646# define LINK_AREA_SIZE (6 * SZR) 2647# define LR_OFFSET (2 * SZR) 2648#elif TCG_TARGET_REG_BITS == 64 2649# if defined(_CALL_ELF) && _CALL_ELF == 2 2650# define LINK_AREA_SIZE (4 * SZR) 2651# define LR_OFFSET (1 * SZR) 2652# endif 2653#else /* TCG_TARGET_REG_BITS == 32 */ 2654# if defined(_CALL_SYSV) 2655# define LINK_AREA_SIZE (2 * SZR) 2656# define LR_OFFSET (1 * SZR) 2657# endif 2658#endif 2659#ifndef LR_OFFSET 2660# error "Unhandled abi" 2661#endif 2662#ifndef TCG_TARGET_CALL_STACK_OFFSET 2663# define TCG_TARGET_CALL_STACK_OFFSET LINK_AREA_SIZE 2664#endif 2665 2666#define CPU_TEMP_BUF_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) 2667#define REG_SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR) 2668 2669#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET \ 2670 + TCG_STATIC_CALL_ARGS_SIZE \ 2671 + CPU_TEMP_BUF_SIZE \ 2672 + REG_SAVE_SIZE \ 2673 + TCG_TARGET_STACK_ALIGN - 1) \ 2674 & -TCG_TARGET_STACK_ALIGN) 2675 2676#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE) 2677 2678static void tcg_target_qemu_prologue(TCGContext *s) 2679{ 2680 int i; 2681 2682#ifdef _CALL_AIX 2683 const void **desc = (const void **)s->code_ptr; 2684 desc[0] = tcg_splitwx_to_rx(desc + 2); /* entry point */ 2685 desc[1] = 0; /* environment pointer */ 2686 s->code_ptr = (void *)(desc + 2); /* skip over descriptor */ 2687#endif 2688 2689 tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE, 2690 CPU_TEMP_BUF_SIZE); 2691 2692 /* Prologue */ 2693 tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR); 2694 tcg_out32(s, (SZR == 8 ? STDU : STWU) 2695 | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE)); 2696 2697 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { 2698 tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 2699 TCG_REG_R1, REG_SAVE_BOT + i * SZR); 2700 } 2701 tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); 2702 2703 if (!tcg_use_softmmu && guest_base) { 2704 tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true); 2705 tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); 2706 } 2707 2708 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 2709 tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR); 2710 tcg_out32(s, BCCTR | BO_ALWAYS); 2711 2712 /* Epilogue */ 2713 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 2714 2715 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); 2716 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { 2717 tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 2718 TCG_REG_R1, REG_SAVE_BOT + i * SZR); 2719 } 2720 tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR); 2721 tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE)); 2722 tcg_out32(s, BCLR | BO_ALWAYS); 2723} 2724 2725static void tcg_out_tb_start(TCGContext *s) 2726{ 2727 /* Load TCG_REG_TB. */ 2728 if (USE_REG_TB) { 2729 if (have_isa_3_00) { 2730 /* lnia REG_TB */ 2731 tcg_out_addpcis(s, TCG_REG_TB, 0); 2732 } else { 2733 /* bcl 20,31,$+4 (preferred form for getting nia) */ 2734 tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK); 2735 tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR); 2736 } 2737 } 2738} 2739 2740static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) 2741{ 2742 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg); 2743 tcg_out_b(s, 0, tcg_code_gen_epilogue); 2744} 2745 2746static void tcg_out_goto_tb(TCGContext *s, int which) 2747{ 2748 uintptr_t ptr = get_jmp_target_addr(s, which); 2749 int16_t lo; 2750 2751 /* Direct branch will be patched by tb_target_set_jmp_target. */ 2752 set_jmp_insn_offset(s, which); 2753 tcg_out32(s, NOP); 2754 2755 /* When branch is out of range, fall through to indirect. */ 2756 if (USE_REG_TB) { 2757 ptrdiff_t offset = ppc_tbrel_diff(s, (void *)ptr); 2758 tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset); 2759 } else if (have_isa_3_10) { 2760 ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr); 2761 tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1); 2762 } else if (have_isa_3_00) { 2763 ptrdiff_t offset = tcg_pcrel_diff(s, (void *)ptr) - 4; 2764 lo = offset; 2765 tcg_out_addpcis(s, TCG_REG_TMP1, offset - lo); 2766 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo); 2767 } else { 2768 lo = ptr; 2769 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - lo); 2770 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo); 2771 } 2772 2773 tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); 2774 tcg_out32(s, BCCTR | BO_ALWAYS); 2775 set_jmp_reset_offset(s, which); 2776} 2777 2778void tb_target_set_jmp_target(const TranslationBlock *tb, int n, 2779 uintptr_t jmp_rx, uintptr_t jmp_rw) 2780{ 2781 uintptr_t addr = tb->jmp_target_addr[n]; 2782 intptr_t diff = addr - jmp_rx; 2783 tcg_insn_unit insn; 2784 2785 if (in_range_b(diff)) { 2786 insn = B | (diff & 0x3fffffc); 2787 } else { 2788 insn = NOP; 2789 } 2790 2791 qatomic_set((uint32_t *)jmp_rw, insn); 2792 flush_idcache_range(jmp_rx, jmp_rw, 4); 2793} 2794 2795static void tcg_out_op(TCGContext *s, TCGOpcode opc, 2796 const TCGArg args[TCG_MAX_OP_ARGS], 2797 const int const_args[TCG_MAX_OP_ARGS]) 2798{ 2799 TCGArg a0, a1, a2; 2800 2801 switch (opc) { 2802 case INDEX_op_goto_ptr: 2803 tcg_out32(s, MTSPR | RS(args[0]) | CTR); 2804 tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0)); 2805 tcg_out32(s, BCCTR | BO_ALWAYS); 2806 break; 2807 case INDEX_op_br: 2808 { 2809 TCGLabel *l = arg_label(args[0]); 2810 uint32_t insn = B; 2811 2812 if (l->has_value) { 2813 insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr), 2814 l->u.value_ptr); 2815 } else { 2816 tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0); 2817 } 2818 tcg_out32(s, insn); 2819 } 2820 break; 2821 case INDEX_op_ld8u_i32: 2822 case INDEX_op_ld8u_i64: 2823 tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); 2824 break; 2825 case INDEX_op_ld8s_i32: 2826 case INDEX_op_ld8s_i64: 2827 tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); 2828 tcg_out_ext8s(s, TCG_TYPE_REG, args[0], args[0]); 2829 break; 2830 case INDEX_op_ld16u_i32: 2831 case INDEX_op_ld16u_i64: 2832 tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]); 2833 break; 2834 case INDEX_op_ld16s_i32: 2835 case INDEX_op_ld16s_i64: 2836 tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]); 2837 break; 2838 case INDEX_op_ld_i32: 2839 case INDEX_op_ld32u_i64: 2840 tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]); 2841 break; 2842 case INDEX_op_ld32s_i64: 2843 tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]); 2844 break; 2845 case INDEX_op_ld_i64: 2846 tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]); 2847 break; 2848 case INDEX_op_st8_i32: 2849 case INDEX_op_st8_i64: 2850 tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]); 2851 break; 2852 case INDEX_op_st16_i32: 2853 case INDEX_op_st16_i64: 2854 tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]); 2855 break; 2856 case INDEX_op_st_i32: 2857 case INDEX_op_st32_i64: 2858 tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]); 2859 break; 2860 case INDEX_op_st_i64: 2861 tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); 2862 break; 2863 2864 case INDEX_op_add_i32: 2865 a0 = args[0], a1 = args[1], a2 = args[2]; 2866 if (const_args[2]) { 2867 do_addi_32: 2868 tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2); 2869 } else { 2870 tcg_out32(s, ADD | TAB(a0, a1, a2)); 2871 } 2872 break; 2873 case INDEX_op_sub_i32: 2874 a0 = args[0], a1 = args[1], a2 = args[2]; 2875 if (const_args[1]) { 2876 if (const_args[2]) { 2877 tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2); 2878 } else { 2879 tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); 2880 } 2881 } else if (const_args[2]) { 2882 a2 = -a2; 2883 goto do_addi_32; 2884 } else { 2885 tcg_out32(s, SUBF | TAB(a0, a2, a1)); 2886 } 2887 break; 2888 2889 case INDEX_op_and_i32: 2890 a0 = args[0], a1 = args[1], a2 = args[2]; 2891 if (const_args[2]) { 2892 tcg_out_andi32(s, a0, a1, a2); 2893 } else { 2894 tcg_out32(s, AND | SAB(a1, a0, a2)); 2895 } 2896 break; 2897 case INDEX_op_and_i64: 2898 a0 = args[0], a1 = args[1], a2 = args[2]; 2899 if (const_args[2]) { 2900 tcg_out_andi64(s, a0, a1, a2); 2901 } else { 2902 tcg_out32(s, AND | SAB(a1, a0, a2)); 2903 } 2904 break; 2905 case INDEX_op_or_i64: 2906 case INDEX_op_or_i32: 2907 a0 = args[0], a1 = args[1], a2 = args[2]; 2908 if (const_args[2]) { 2909 tcg_out_ori32(s, a0, a1, a2); 2910 } else { 2911 tcg_out32(s, OR | SAB(a1, a0, a2)); 2912 } 2913 break; 2914 case INDEX_op_xor_i64: 2915 case INDEX_op_xor_i32: 2916 a0 = args[0], a1 = args[1], a2 = args[2]; 2917 if (const_args[2]) { 2918 tcg_out_xori32(s, a0, a1, a2); 2919 } else { 2920 tcg_out32(s, XOR | SAB(a1, a0, a2)); 2921 } 2922 break; 2923 case INDEX_op_andc_i32: 2924 a0 = args[0], a1 = args[1], a2 = args[2]; 2925 if (const_args[2]) { 2926 tcg_out_andi32(s, a0, a1, ~a2); 2927 } else { 2928 tcg_out32(s, ANDC | SAB(a1, a0, a2)); 2929 } 2930 break; 2931 case INDEX_op_andc_i64: 2932 a0 = args[0], a1 = args[1], a2 = args[2]; 2933 if (const_args[2]) { 2934 tcg_out_andi64(s, a0, a1, ~a2); 2935 } else { 2936 tcg_out32(s, ANDC | SAB(a1, a0, a2)); 2937 } 2938 break; 2939 case INDEX_op_orc_i32: 2940 if (const_args[2]) { 2941 tcg_out_ori32(s, args[0], args[1], ~args[2]); 2942 break; 2943 } 2944 /* FALLTHRU */ 2945 case INDEX_op_orc_i64: 2946 tcg_out32(s, ORC | SAB(args[1], args[0], args[2])); 2947 break; 2948 case INDEX_op_eqv_i32: 2949 if (const_args[2]) { 2950 tcg_out_xori32(s, args[0], args[1], ~args[2]); 2951 break; 2952 } 2953 /* FALLTHRU */ 2954 case INDEX_op_eqv_i64: 2955 tcg_out32(s, EQV | SAB(args[1], args[0], args[2])); 2956 break; 2957 case INDEX_op_nand_i32: 2958 case INDEX_op_nand_i64: 2959 tcg_out32(s, NAND | SAB(args[1], args[0], args[2])); 2960 break; 2961 case INDEX_op_nor_i32: 2962 case INDEX_op_nor_i64: 2963 tcg_out32(s, NOR | SAB(args[1], args[0], args[2])); 2964 break; 2965 2966 case INDEX_op_clz_i32: 2967 tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1], 2968 args[2], const_args[2]); 2969 break; 2970 case INDEX_op_ctz_i32: 2971 tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1], 2972 args[2], const_args[2]); 2973 break; 2974 case INDEX_op_ctpop_i32: 2975 tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0)); 2976 break; 2977 2978 case INDEX_op_clz_i64: 2979 tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1], 2980 args[2], const_args[2]); 2981 break; 2982 case INDEX_op_ctz_i64: 2983 tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1], 2984 args[2], const_args[2]); 2985 break; 2986 case INDEX_op_ctpop_i64: 2987 tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0)); 2988 break; 2989 2990 case INDEX_op_mul_i32: 2991 a0 = args[0], a1 = args[1], a2 = args[2]; 2992 if (const_args[2]) { 2993 tcg_out32(s, MULLI | TAI(a0, a1, a2)); 2994 } else { 2995 tcg_out32(s, MULLW | TAB(a0, a1, a2)); 2996 } 2997 break; 2998 2999 case INDEX_op_div_i32: 3000 tcg_out32(s, DIVW | TAB(args[0], args[1], args[2])); 3001 break; 3002 3003 case INDEX_op_divu_i32: 3004 tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2])); 3005 break; 3006 3007 case INDEX_op_rem_i32: 3008 tcg_out32(s, MODSW | TAB(args[0], args[1], args[2])); 3009 break; 3010 3011 case INDEX_op_remu_i32: 3012 tcg_out32(s, MODUW | TAB(args[0], args[1], args[2])); 3013 break; 3014 3015 case INDEX_op_shl_i32: 3016 if (const_args[2]) { 3017 /* Limit immediate shift count lest we create an illegal insn. */ 3018 tcg_out_shli32(s, args[0], args[1], args[2] & 31); 3019 } else { 3020 tcg_out32(s, SLW | SAB(args[1], args[0], args[2])); 3021 } 3022 break; 3023 case INDEX_op_shr_i32: 3024 if (const_args[2]) { 3025 /* Limit immediate shift count lest we create an illegal insn. */ 3026 tcg_out_shri32(s, args[0], args[1], args[2] & 31); 3027 } else { 3028 tcg_out32(s, SRW | SAB(args[1], args[0], args[2])); 3029 } 3030 break; 3031 case INDEX_op_sar_i32: 3032 if (const_args[2]) { 3033 tcg_out_sari32(s, args[0], args[1], args[2]); 3034 } else { 3035 tcg_out32(s, SRAW | SAB(args[1], args[0], args[2])); 3036 } 3037 break; 3038 case INDEX_op_rotl_i32: 3039 if (const_args[2]) { 3040 tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31); 3041 } else { 3042 tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2]) 3043 | MB(0) | ME(31)); 3044 } 3045 break; 3046 case INDEX_op_rotr_i32: 3047 if (const_args[2]) { 3048 tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31); 3049 } else { 3050 tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32)); 3051 tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0) 3052 | MB(0) | ME(31)); 3053 } 3054 break; 3055 3056 case INDEX_op_brcond_i32: 3057 tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], 3058 arg_label(args[3]), TCG_TYPE_I32); 3059 break; 3060 case INDEX_op_brcond_i64: 3061 tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], 3062 arg_label(args[3]), TCG_TYPE_I64); 3063 break; 3064 case INDEX_op_brcond2_i32: 3065 tcg_out_brcond2(s, args, const_args); 3066 break; 3067 3068 case INDEX_op_neg_i32: 3069 case INDEX_op_neg_i64: 3070 tcg_out32(s, NEG | RT(args[0]) | RA(args[1])); 3071 break; 3072 3073 case INDEX_op_not_i32: 3074 case INDEX_op_not_i64: 3075 tcg_out32(s, NOR | SAB(args[1], args[0], args[1])); 3076 break; 3077 3078 case INDEX_op_add_i64: 3079 a0 = args[0], a1 = args[1], a2 = args[2]; 3080 if (const_args[2]) { 3081 do_addi_64: 3082 tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2); 3083 } else { 3084 tcg_out32(s, ADD | TAB(a0, a1, a2)); 3085 } 3086 break; 3087 case INDEX_op_sub_i64: 3088 a0 = args[0], a1 = args[1], a2 = args[2]; 3089 if (const_args[1]) { 3090 if (const_args[2]) { 3091 tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2); 3092 } else { 3093 tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); 3094 } 3095 } else if (const_args[2]) { 3096 a2 = -a2; 3097 goto do_addi_64; 3098 } else { 3099 tcg_out32(s, SUBF | TAB(a0, a2, a1)); 3100 } 3101 break; 3102 3103 case INDEX_op_shl_i64: 3104 if (const_args[2]) { 3105 /* Limit immediate shift count lest we create an illegal insn. */ 3106 tcg_out_shli64(s, args[0], args[1], args[2] & 63); 3107 } else { 3108 tcg_out32(s, SLD | SAB(args[1], args[0], args[2])); 3109 } 3110 break; 3111 case INDEX_op_shr_i64: 3112 if (const_args[2]) { 3113 /* Limit immediate shift count lest we create an illegal insn. */ 3114 tcg_out_shri64(s, args[0], args[1], args[2] & 63); 3115 } else { 3116 tcg_out32(s, SRD | SAB(args[1], args[0], args[2])); 3117 } 3118 break; 3119 case INDEX_op_sar_i64: 3120 if (const_args[2]) { 3121 tcg_out_sari64(s, args[0], args[1], args[2]); 3122 } else { 3123 tcg_out32(s, SRAD | SAB(args[1], args[0], args[2])); 3124 } 3125 break; 3126 case INDEX_op_rotl_i64: 3127 if (const_args[2]) { 3128 tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0); 3129 } else { 3130 tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0)); 3131 } 3132 break; 3133 case INDEX_op_rotr_i64: 3134 if (const_args[2]) { 3135 tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0); 3136 } else { 3137 tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64)); 3138 tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0)); 3139 } 3140 break; 3141 3142 case INDEX_op_mul_i64: 3143 a0 = args[0], a1 = args[1], a2 = args[2]; 3144 if (const_args[2]) { 3145 tcg_out32(s, MULLI | TAI(a0, a1, a2)); 3146 } else { 3147 tcg_out32(s, MULLD | TAB(a0, a1, a2)); 3148 } 3149 break; 3150 case INDEX_op_div_i64: 3151 tcg_out32(s, DIVD | TAB(args[0], args[1], args[2])); 3152 break; 3153 case INDEX_op_divu_i64: 3154 tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2])); 3155 break; 3156 case INDEX_op_rem_i64: 3157 tcg_out32(s, MODSD | TAB(args[0], args[1], args[2])); 3158 break; 3159 case INDEX_op_remu_i64: 3160 tcg_out32(s, MODUD | TAB(args[0], args[1], args[2])); 3161 break; 3162 3163 case INDEX_op_qemu_ld_a64_i32: 3164 if (TCG_TARGET_REG_BITS == 32) { 3165 tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], 3166 args[3], TCG_TYPE_I32); 3167 break; 3168 } 3169 /* fall through */ 3170 case INDEX_op_qemu_ld_a32_i32: 3171 tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32); 3172 break; 3173 case INDEX_op_qemu_ld_a32_i64: 3174 if (TCG_TARGET_REG_BITS == 64) { 3175 tcg_out_qemu_ld(s, args[0], -1, args[1], -1, 3176 args[2], TCG_TYPE_I64); 3177 } else { 3178 tcg_out_qemu_ld(s, args[0], args[1], args[2], -1, 3179 args[3], TCG_TYPE_I64); 3180 } 3181 break; 3182 case INDEX_op_qemu_ld_a64_i64: 3183 if (TCG_TARGET_REG_BITS == 64) { 3184 tcg_out_qemu_ld(s, args[0], -1, args[1], -1, 3185 args[2], TCG_TYPE_I64); 3186 } else { 3187 tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3], 3188 args[4], TCG_TYPE_I64); 3189 } 3190 break; 3191 case INDEX_op_qemu_ld_a32_i128: 3192 case INDEX_op_qemu_ld_a64_i128: 3193 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 3194 tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true); 3195 break; 3196 3197 case INDEX_op_qemu_st_a64_i32: 3198 if (TCG_TARGET_REG_BITS == 32) { 3199 tcg_out_qemu_st(s, args[0], -1, args[1], args[2], 3200 args[3], TCG_TYPE_I32); 3201 break; 3202 } 3203 /* fall through */ 3204 case INDEX_op_qemu_st_a32_i32: 3205 tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32); 3206 break; 3207 case INDEX_op_qemu_st_a32_i64: 3208 if (TCG_TARGET_REG_BITS == 64) { 3209 tcg_out_qemu_st(s, args[0], -1, args[1], -1, 3210 args[2], TCG_TYPE_I64); 3211 } else { 3212 tcg_out_qemu_st(s, args[0], args[1], args[2], -1, 3213 args[3], TCG_TYPE_I64); 3214 } 3215 break; 3216 case INDEX_op_qemu_st_a64_i64: 3217 if (TCG_TARGET_REG_BITS == 64) { 3218 tcg_out_qemu_st(s, args[0], -1, args[1], -1, 3219 args[2], TCG_TYPE_I64); 3220 } else { 3221 tcg_out_qemu_st(s, args[0], args[1], args[2], args[3], 3222 args[4], TCG_TYPE_I64); 3223 } 3224 break; 3225 case INDEX_op_qemu_st_a32_i128: 3226 case INDEX_op_qemu_st_a64_i128: 3227 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 3228 tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); 3229 break; 3230 3231 case INDEX_op_setcond_i32: 3232 tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], 3233 const_args[2], false); 3234 break; 3235 case INDEX_op_setcond_i64: 3236 tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], 3237 const_args[2], false); 3238 break; 3239 case INDEX_op_negsetcond_i32: 3240 tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], 3241 const_args[2], true); 3242 break; 3243 case INDEX_op_negsetcond_i64: 3244 tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], 3245 const_args[2], true); 3246 break; 3247 case INDEX_op_setcond2_i32: 3248 tcg_out_setcond2(s, args, const_args); 3249 break; 3250 3251 case INDEX_op_bswap16_i32: 3252 case INDEX_op_bswap16_i64: 3253 tcg_out_bswap16(s, args[0], args[1], args[2]); 3254 break; 3255 case INDEX_op_bswap32_i32: 3256 tcg_out_bswap32(s, args[0], args[1], 0); 3257 break; 3258 case INDEX_op_bswap32_i64: 3259 tcg_out_bswap32(s, args[0], args[1], args[2]); 3260 break; 3261 case INDEX_op_bswap64_i64: 3262 tcg_out_bswap64(s, args[0], args[1]); 3263 break; 3264 3265 case INDEX_op_deposit_i32: 3266 if (const_args[2]) { 3267 uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3]; 3268 tcg_out_andi32(s, args[0], args[0], ~mask); 3269 } else { 3270 tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3], 3271 32 - args[3] - args[4], 31 - args[3]); 3272 } 3273 break; 3274 case INDEX_op_deposit_i64: 3275 if (const_args[2]) { 3276 uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3]; 3277 tcg_out_andi64(s, args[0], args[0], ~mask); 3278 } else { 3279 tcg_out_rld(s, RLDIMI, args[0], args[2], args[3], 3280 64 - args[3] - args[4]); 3281 } 3282 break; 3283 3284 case INDEX_op_extract_i32: 3285 tcg_out_rlw(s, RLWINM, args[0], args[1], 3286 32 - args[2], 32 - args[3], 31); 3287 break; 3288 case INDEX_op_extract_i64: 3289 tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]); 3290 break; 3291 3292 case INDEX_op_movcond_i32: 3293 tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2], 3294 args[3], args[4], const_args[2]); 3295 break; 3296 case INDEX_op_movcond_i64: 3297 tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2], 3298 args[3], args[4], const_args[2]); 3299 break; 3300 3301#if TCG_TARGET_REG_BITS == 64 3302 case INDEX_op_add2_i64: 3303#else 3304 case INDEX_op_add2_i32: 3305#endif 3306 /* Note that the CA bit is defined based on the word size of the 3307 environment. So in 64-bit mode it's always carry-out of bit 63. 3308 The fallback code using deposit works just as well for 32-bit. */ 3309 a0 = args[0], a1 = args[1]; 3310 if (a0 == args[3] || (!const_args[5] && a0 == args[5])) { 3311 a0 = TCG_REG_R0; 3312 } 3313 if (const_args[4]) { 3314 tcg_out32(s, ADDIC | TAI(a0, args[2], args[4])); 3315 } else { 3316 tcg_out32(s, ADDC | TAB(a0, args[2], args[4])); 3317 } 3318 if (const_args[5]) { 3319 tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3])); 3320 } else { 3321 tcg_out32(s, ADDE | TAB(a1, args[3], args[5])); 3322 } 3323 if (a0 != args[0]) { 3324 tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); 3325 } 3326 break; 3327 3328#if TCG_TARGET_REG_BITS == 64 3329 case INDEX_op_sub2_i64: 3330#else 3331 case INDEX_op_sub2_i32: 3332#endif 3333 a0 = args[0], a1 = args[1]; 3334 if (a0 == args[5] || (!const_args[3] && a0 == args[3])) { 3335 a0 = TCG_REG_R0; 3336 } 3337 if (const_args[2]) { 3338 tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2])); 3339 } else { 3340 tcg_out32(s, SUBFC | TAB(a0, args[4], args[2])); 3341 } 3342 if (const_args[3]) { 3343 tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5])); 3344 } else { 3345 tcg_out32(s, SUBFE | TAB(a1, args[5], args[3])); 3346 } 3347 if (a0 != args[0]) { 3348 tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); 3349 } 3350 break; 3351 3352 case INDEX_op_muluh_i32: 3353 tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2])); 3354 break; 3355 case INDEX_op_mulsh_i32: 3356 tcg_out32(s, MULHW | TAB(args[0], args[1], args[2])); 3357 break; 3358 case INDEX_op_muluh_i64: 3359 tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2])); 3360 break; 3361 case INDEX_op_mulsh_i64: 3362 tcg_out32(s, MULHD | TAB(args[0], args[1], args[2])); 3363 break; 3364 3365 case INDEX_op_mb: 3366 tcg_out_mb(s, args[0]); 3367 break; 3368 3369 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 3370 case INDEX_op_mov_i64: 3371 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 3372 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ 3373 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ 3374 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ 3375 case INDEX_op_ext8s_i64: 3376 case INDEX_op_ext8u_i32: 3377 case INDEX_op_ext8u_i64: 3378 case INDEX_op_ext16s_i32: 3379 case INDEX_op_ext16s_i64: 3380 case INDEX_op_ext16u_i32: 3381 case INDEX_op_ext16u_i64: 3382 case INDEX_op_ext32s_i64: 3383 case INDEX_op_ext32u_i64: 3384 case INDEX_op_ext_i32_i64: 3385 case INDEX_op_extu_i32_i64: 3386 case INDEX_op_extrl_i64_i32: 3387 default: 3388 g_assert_not_reached(); 3389 } 3390} 3391 3392int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 3393{ 3394 switch (opc) { 3395 case INDEX_op_and_vec: 3396 case INDEX_op_or_vec: 3397 case INDEX_op_xor_vec: 3398 case INDEX_op_andc_vec: 3399 case INDEX_op_not_vec: 3400 case INDEX_op_nor_vec: 3401 case INDEX_op_eqv_vec: 3402 case INDEX_op_nand_vec: 3403 return 1; 3404 case INDEX_op_orc_vec: 3405 return have_isa_2_07; 3406 case INDEX_op_add_vec: 3407 case INDEX_op_sub_vec: 3408 case INDEX_op_smax_vec: 3409 case INDEX_op_smin_vec: 3410 case INDEX_op_umax_vec: 3411 case INDEX_op_umin_vec: 3412 case INDEX_op_shlv_vec: 3413 case INDEX_op_shrv_vec: 3414 case INDEX_op_sarv_vec: 3415 case INDEX_op_rotlv_vec: 3416 return vece <= MO_32 || have_isa_2_07; 3417 case INDEX_op_ssadd_vec: 3418 case INDEX_op_sssub_vec: 3419 case INDEX_op_usadd_vec: 3420 case INDEX_op_ussub_vec: 3421 return vece <= MO_32; 3422 case INDEX_op_cmp_vec: 3423 case INDEX_op_shli_vec: 3424 case INDEX_op_shri_vec: 3425 case INDEX_op_sari_vec: 3426 case INDEX_op_rotli_vec: 3427 return vece <= MO_32 || have_isa_2_07 ? -1 : 0; 3428 case INDEX_op_neg_vec: 3429 return vece >= MO_32 && have_isa_3_00; 3430 case INDEX_op_mul_vec: 3431 switch (vece) { 3432 case MO_8: 3433 case MO_16: 3434 return -1; 3435 case MO_32: 3436 return have_isa_2_07 ? 1 : -1; 3437 case MO_64: 3438 return have_isa_3_10; 3439 } 3440 return 0; 3441 case INDEX_op_bitsel_vec: 3442 return have_vsx; 3443 case INDEX_op_rotrv_vec: 3444 return -1; 3445 default: 3446 return 0; 3447 } 3448} 3449 3450static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 3451 TCGReg dst, TCGReg src) 3452{ 3453 tcg_debug_assert(dst >= TCG_REG_V0); 3454 3455 /* Splat from integer reg allowed via constraints for v3.00. */ 3456 if (src < TCG_REG_V0) { 3457 tcg_debug_assert(have_isa_3_00); 3458 switch (vece) { 3459 case MO_64: 3460 tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src)); 3461 return true; 3462 case MO_32: 3463 tcg_out32(s, MTVSRWS | VRT(dst) | RA(src)); 3464 return true; 3465 default: 3466 /* Fail, so that we fall back on either dupm or mov+dup. */ 3467 return false; 3468 } 3469 } 3470 3471 /* 3472 * Recall we use (or emulate) VSX integer loads, so the integer is 3473 * right justified within the left (zero-index) double-word. 3474 */ 3475 switch (vece) { 3476 case MO_8: 3477 tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16)); 3478 break; 3479 case MO_16: 3480 tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16)); 3481 break; 3482 case MO_32: 3483 tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16)); 3484 break; 3485 case MO_64: 3486 if (have_vsx) { 3487 tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src)); 3488 break; 3489 } 3490 tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8); 3491 tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8); 3492 break; 3493 default: 3494 g_assert_not_reached(); 3495 } 3496 return true; 3497} 3498 3499static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 3500 TCGReg out, TCGReg base, intptr_t offset) 3501{ 3502 int elt; 3503 3504 tcg_debug_assert(out >= TCG_REG_V0); 3505 switch (vece) { 3506 case MO_8: 3507 if (have_isa_3_00) { 3508 tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16); 3509 } else { 3510 tcg_out_mem_long(s, 0, LVEBX, out, base, offset); 3511 } 3512 elt = extract32(offset, 0, 4); 3513#if !HOST_BIG_ENDIAN 3514 elt ^= 15; 3515#endif 3516 tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16)); 3517 break; 3518 case MO_16: 3519 tcg_debug_assert((offset & 1) == 0); 3520 if (have_isa_3_00) { 3521 tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16); 3522 } else { 3523 tcg_out_mem_long(s, 0, LVEHX, out, base, offset); 3524 } 3525 elt = extract32(offset, 1, 3); 3526#if !HOST_BIG_ENDIAN 3527 elt ^= 7; 3528#endif 3529 tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16)); 3530 break; 3531 case MO_32: 3532 if (have_isa_3_00) { 3533 tcg_out_mem_long(s, 0, LXVWSX, out, base, offset); 3534 break; 3535 } 3536 tcg_debug_assert((offset & 3) == 0); 3537 tcg_out_mem_long(s, 0, LVEWX, out, base, offset); 3538 elt = extract32(offset, 2, 2); 3539#if !HOST_BIG_ENDIAN 3540 elt ^= 3; 3541#endif 3542 tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16)); 3543 break; 3544 case MO_64: 3545 if (have_vsx) { 3546 tcg_out_mem_long(s, 0, LXVDSX, out, base, offset); 3547 break; 3548 } 3549 tcg_debug_assert((offset & 7) == 0); 3550 tcg_out_mem_long(s, 0, LVX, out, base, offset & -16); 3551 tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8); 3552 elt = extract32(offset, 3, 1); 3553#if !HOST_BIG_ENDIAN 3554 elt = !elt; 3555#endif 3556 if (elt) { 3557 tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8); 3558 } else { 3559 tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8); 3560 } 3561 break; 3562 default: 3563 g_assert_not_reached(); 3564 } 3565 return true; 3566} 3567 3568static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 3569 unsigned vecl, unsigned vece, 3570 const TCGArg args[TCG_MAX_OP_ARGS], 3571 const int const_args[TCG_MAX_OP_ARGS]) 3572{ 3573 static const uint32_t 3574 add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM }, 3575 sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM }, 3576 mul_op[4] = { 0, 0, VMULUWM, VMULLD }, 3577 neg_op[4] = { 0, 0, VNEGW, VNEGD }, 3578 eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD }, 3579 ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 }, 3580 gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD }, 3581 gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD }, 3582 ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 }, 3583 usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 }, 3584 sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 }, 3585 ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 }, 3586 umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD }, 3587 smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD }, 3588 umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD }, 3589 smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD }, 3590 shlv_op[4] = { VSLB, VSLH, VSLW, VSLD }, 3591 shrv_op[4] = { VSRB, VSRH, VSRW, VSRD }, 3592 sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD }, 3593 mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 }, 3594 mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 }, 3595 muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 }, 3596 mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 }, 3597 pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 }, 3598 rotl_op[4] = { VRLB, VRLH, VRLW, VRLD }; 3599 3600 TCGType type = vecl + TCG_TYPE_V64; 3601 TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; 3602 uint32_t insn; 3603 3604 switch (opc) { 3605 case INDEX_op_ld_vec: 3606 tcg_out_ld(s, type, a0, a1, a2); 3607 return; 3608 case INDEX_op_st_vec: 3609 tcg_out_st(s, type, a0, a1, a2); 3610 return; 3611 case INDEX_op_dupm_vec: 3612 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 3613 return; 3614 3615 case INDEX_op_add_vec: 3616 insn = add_op[vece]; 3617 break; 3618 case INDEX_op_sub_vec: 3619 insn = sub_op[vece]; 3620 break; 3621 case INDEX_op_neg_vec: 3622 insn = neg_op[vece]; 3623 a2 = a1; 3624 a1 = 0; 3625 break; 3626 case INDEX_op_mul_vec: 3627 insn = mul_op[vece]; 3628 break; 3629 case INDEX_op_ssadd_vec: 3630 insn = ssadd_op[vece]; 3631 break; 3632 case INDEX_op_sssub_vec: 3633 insn = sssub_op[vece]; 3634 break; 3635 case INDEX_op_usadd_vec: 3636 insn = usadd_op[vece]; 3637 break; 3638 case INDEX_op_ussub_vec: 3639 insn = ussub_op[vece]; 3640 break; 3641 case INDEX_op_smin_vec: 3642 insn = smin_op[vece]; 3643 break; 3644 case INDEX_op_umin_vec: 3645 insn = umin_op[vece]; 3646 break; 3647 case INDEX_op_smax_vec: 3648 insn = smax_op[vece]; 3649 break; 3650 case INDEX_op_umax_vec: 3651 insn = umax_op[vece]; 3652 break; 3653 case INDEX_op_shlv_vec: 3654 insn = shlv_op[vece]; 3655 break; 3656 case INDEX_op_shrv_vec: 3657 insn = shrv_op[vece]; 3658 break; 3659 case INDEX_op_sarv_vec: 3660 insn = sarv_op[vece]; 3661 break; 3662 case INDEX_op_and_vec: 3663 insn = VAND; 3664 break; 3665 case INDEX_op_or_vec: 3666 insn = VOR; 3667 break; 3668 case INDEX_op_xor_vec: 3669 insn = VXOR; 3670 break; 3671 case INDEX_op_andc_vec: 3672 insn = VANDC; 3673 break; 3674 case INDEX_op_not_vec: 3675 insn = VNOR; 3676 a2 = a1; 3677 break; 3678 case INDEX_op_orc_vec: 3679 insn = VORC; 3680 break; 3681 case INDEX_op_nand_vec: 3682 insn = VNAND; 3683 break; 3684 case INDEX_op_nor_vec: 3685 insn = VNOR; 3686 break; 3687 case INDEX_op_eqv_vec: 3688 insn = VEQV; 3689 break; 3690 3691 case INDEX_op_cmp_vec: 3692 switch (args[3]) { 3693 case TCG_COND_EQ: 3694 insn = eq_op[vece]; 3695 break; 3696 case TCG_COND_NE: 3697 insn = ne_op[vece]; 3698 break; 3699 case TCG_COND_GT: 3700 insn = gts_op[vece]; 3701 break; 3702 case TCG_COND_GTU: 3703 insn = gtu_op[vece]; 3704 break; 3705 default: 3706 g_assert_not_reached(); 3707 } 3708 break; 3709 3710 case INDEX_op_bitsel_vec: 3711 tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3])); 3712 return; 3713 3714 case INDEX_op_dup2_vec: 3715 assert(TCG_TARGET_REG_BITS == 32); 3716 /* With inputs a1 = xLxx, a2 = xHxx */ 3717 tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1)); /* a0 = xxHL */ 3718 tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8); /* tmp = HLxx */ 3719 tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8); /* a0 = HLHL */ 3720 return; 3721 3722 case INDEX_op_ppc_mrgh_vec: 3723 insn = mrgh_op[vece]; 3724 break; 3725 case INDEX_op_ppc_mrgl_vec: 3726 insn = mrgl_op[vece]; 3727 break; 3728 case INDEX_op_ppc_muleu_vec: 3729 insn = muleu_op[vece]; 3730 break; 3731 case INDEX_op_ppc_mulou_vec: 3732 insn = mulou_op[vece]; 3733 break; 3734 case INDEX_op_ppc_pkum_vec: 3735 insn = pkum_op[vece]; 3736 break; 3737 case INDEX_op_rotlv_vec: 3738 insn = rotl_op[vece]; 3739 break; 3740 case INDEX_op_ppc_msum_vec: 3741 tcg_debug_assert(vece == MO_16); 3742 tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3])); 3743 return; 3744 3745 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 3746 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 3747 default: 3748 g_assert_not_reached(); 3749 } 3750 3751 tcg_debug_assert(insn != 0); 3752 tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2)); 3753} 3754 3755static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0, 3756 TCGv_vec v1, TCGArg imm, TCGOpcode opci) 3757{ 3758 TCGv_vec t1; 3759 3760 if (vece == MO_32) { 3761 /* 3762 * Only 5 bits are significant, and VSPLTISB can represent -16..15. 3763 * So using negative numbers gets us the 4th bit easily. 3764 */ 3765 imm = sextract32(imm, 0, 5); 3766 } else { 3767 imm &= (8 << vece) - 1; 3768 } 3769 3770 /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */ 3771 t1 = tcg_constant_vec(type, MO_8, imm); 3772 vec_gen_3(opci, type, vece, tcgv_vec_arg(v0), 3773 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 3774} 3775 3776static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, 3777 TCGv_vec v1, TCGv_vec v2, TCGCond cond) 3778{ 3779 bool need_swap = false, need_inv = false; 3780 3781 tcg_debug_assert(vece <= MO_32 || have_isa_2_07); 3782 3783 switch (cond) { 3784 case TCG_COND_EQ: 3785 case TCG_COND_GT: 3786 case TCG_COND_GTU: 3787 break; 3788 case TCG_COND_NE: 3789 if (have_isa_3_00 && vece <= MO_32) { 3790 break; 3791 } 3792 /* fall through */ 3793 case TCG_COND_LE: 3794 case TCG_COND_LEU: 3795 need_inv = true; 3796 break; 3797 case TCG_COND_LT: 3798 case TCG_COND_LTU: 3799 need_swap = true; 3800 break; 3801 case TCG_COND_GE: 3802 case TCG_COND_GEU: 3803 need_swap = need_inv = true; 3804 break; 3805 default: 3806 g_assert_not_reached(); 3807 } 3808 3809 if (need_inv) { 3810 cond = tcg_invert_cond(cond); 3811 } 3812 if (need_swap) { 3813 TCGv_vec t1; 3814 t1 = v1, v1 = v2, v2 = t1; 3815 cond = tcg_swap_cond(cond); 3816 } 3817 3818 vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0), 3819 tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond); 3820 3821 if (need_inv) { 3822 tcg_gen_not_vec(vece, v0, v0); 3823 } 3824} 3825 3826static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0, 3827 TCGv_vec v1, TCGv_vec v2) 3828{ 3829 TCGv_vec t1 = tcg_temp_new_vec(type); 3830 TCGv_vec t2 = tcg_temp_new_vec(type); 3831 TCGv_vec c0, c16; 3832 3833 switch (vece) { 3834 case MO_8: 3835 case MO_16: 3836 vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1), 3837 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 3838 vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2), 3839 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 3840 vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0), 3841 tcgv_vec_arg(t1), tcgv_vec_arg(t2)); 3842 vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1), 3843 tcgv_vec_arg(t1), tcgv_vec_arg(t2)); 3844 vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0), 3845 tcgv_vec_arg(v0), tcgv_vec_arg(t1)); 3846 break; 3847 3848 case MO_32: 3849 tcg_debug_assert(!have_isa_2_07); 3850 /* 3851 * Only 5 bits are significant, and VSPLTISB can represent -16..15. 3852 * So using -16 is a quick way to represent 16. 3853 */ 3854 c16 = tcg_constant_vec(type, MO_8, -16); 3855 c0 = tcg_constant_vec(type, MO_8, 0); 3856 3857 vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1), 3858 tcgv_vec_arg(v2), tcgv_vec_arg(c16)); 3859 vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2), 3860 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 3861 vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1), 3862 tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0)); 3863 vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1), 3864 tcgv_vec_arg(t1), tcgv_vec_arg(c16)); 3865 tcg_gen_add_vec(MO_32, v0, t1, t2); 3866 break; 3867 3868 default: 3869 g_assert_not_reached(); 3870 } 3871 tcg_temp_free_vec(t1); 3872 tcg_temp_free_vec(t2); 3873} 3874 3875void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 3876 TCGArg a0, ...) 3877{ 3878 va_list va; 3879 TCGv_vec v0, v1, v2, t0; 3880 TCGArg a2; 3881 3882 va_start(va, a0); 3883 v0 = temp_tcgv_vec(arg_temp(a0)); 3884 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 3885 a2 = va_arg(va, TCGArg); 3886 3887 switch (opc) { 3888 case INDEX_op_shli_vec: 3889 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec); 3890 break; 3891 case INDEX_op_shri_vec: 3892 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec); 3893 break; 3894 case INDEX_op_sari_vec: 3895 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec); 3896 break; 3897 case INDEX_op_rotli_vec: 3898 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec); 3899 break; 3900 case INDEX_op_cmp_vec: 3901 v2 = temp_tcgv_vec(arg_temp(a2)); 3902 expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); 3903 break; 3904 case INDEX_op_mul_vec: 3905 v2 = temp_tcgv_vec(arg_temp(a2)); 3906 expand_vec_mul(type, vece, v0, v1, v2); 3907 break; 3908 case INDEX_op_rotlv_vec: 3909 v2 = temp_tcgv_vec(arg_temp(a2)); 3910 t0 = tcg_temp_new_vec(type); 3911 tcg_gen_neg_vec(vece, t0, v2); 3912 tcg_gen_rotlv_vec(vece, v0, v1, t0); 3913 tcg_temp_free_vec(t0); 3914 break; 3915 default: 3916 g_assert_not_reached(); 3917 } 3918 va_end(va); 3919} 3920 3921static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 3922{ 3923 switch (op) { 3924 case INDEX_op_goto_ptr: 3925 return C_O0_I1(r); 3926 3927 case INDEX_op_ld8u_i32: 3928 case INDEX_op_ld8s_i32: 3929 case INDEX_op_ld16u_i32: 3930 case INDEX_op_ld16s_i32: 3931 case INDEX_op_ld_i32: 3932 case INDEX_op_ctpop_i32: 3933 case INDEX_op_neg_i32: 3934 case INDEX_op_not_i32: 3935 case INDEX_op_ext8s_i32: 3936 case INDEX_op_ext16s_i32: 3937 case INDEX_op_bswap16_i32: 3938 case INDEX_op_bswap32_i32: 3939 case INDEX_op_extract_i32: 3940 case INDEX_op_ld8u_i64: 3941 case INDEX_op_ld8s_i64: 3942 case INDEX_op_ld16u_i64: 3943 case INDEX_op_ld16s_i64: 3944 case INDEX_op_ld32u_i64: 3945 case INDEX_op_ld32s_i64: 3946 case INDEX_op_ld_i64: 3947 case INDEX_op_ctpop_i64: 3948 case INDEX_op_neg_i64: 3949 case INDEX_op_not_i64: 3950 case INDEX_op_ext8s_i64: 3951 case INDEX_op_ext16s_i64: 3952 case INDEX_op_ext32s_i64: 3953 case INDEX_op_ext_i32_i64: 3954 case INDEX_op_extu_i32_i64: 3955 case INDEX_op_bswap16_i64: 3956 case INDEX_op_bswap32_i64: 3957 case INDEX_op_bswap64_i64: 3958 case INDEX_op_extract_i64: 3959 return C_O1_I1(r, r); 3960 3961 case INDEX_op_st8_i32: 3962 case INDEX_op_st16_i32: 3963 case INDEX_op_st_i32: 3964 case INDEX_op_st8_i64: 3965 case INDEX_op_st16_i64: 3966 case INDEX_op_st32_i64: 3967 case INDEX_op_st_i64: 3968 return C_O0_I2(r, r); 3969 3970 case INDEX_op_add_i32: 3971 case INDEX_op_and_i32: 3972 case INDEX_op_or_i32: 3973 case INDEX_op_xor_i32: 3974 case INDEX_op_andc_i32: 3975 case INDEX_op_orc_i32: 3976 case INDEX_op_eqv_i32: 3977 case INDEX_op_shl_i32: 3978 case INDEX_op_shr_i32: 3979 case INDEX_op_sar_i32: 3980 case INDEX_op_rotl_i32: 3981 case INDEX_op_rotr_i32: 3982 case INDEX_op_setcond_i32: 3983 case INDEX_op_negsetcond_i32: 3984 case INDEX_op_and_i64: 3985 case INDEX_op_andc_i64: 3986 case INDEX_op_shl_i64: 3987 case INDEX_op_shr_i64: 3988 case INDEX_op_sar_i64: 3989 case INDEX_op_rotl_i64: 3990 case INDEX_op_rotr_i64: 3991 case INDEX_op_setcond_i64: 3992 case INDEX_op_negsetcond_i64: 3993 return C_O1_I2(r, r, ri); 3994 3995 case INDEX_op_mul_i32: 3996 case INDEX_op_mul_i64: 3997 return C_O1_I2(r, r, rI); 3998 3999 case INDEX_op_div_i32: 4000 case INDEX_op_divu_i32: 4001 case INDEX_op_rem_i32: 4002 case INDEX_op_remu_i32: 4003 case INDEX_op_nand_i32: 4004 case INDEX_op_nor_i32: 4005 case INDEX_op_muluh_i32: 4006 case INDEX_op_mulsh_i32: 4007 case INDEX_op_orc_i64: 4008 case INDEX_op_eqv_i64: 4009 case INDEX_op_nand_i64: 4010 case INDEX_op_nor_i64: 4011 case INDEX_op_div_i64: 4012 case INDEX_op_divu_i64: 4013 case INDEX_op_rem_i64: 4014 case INDEX_op_remu_i64: 4015 case INDEX_op_mulsh_i64: 4016 case INDEX_op_muluh_i64: 4017 return C_O1_I2(r, r, r); 4018 4019 case INDEX_op_sub_i32: 4020 return C_O1_I2(r, rI, ri); 4021 case INDEX_op_add_i64: 4022 return C_O1_I2(r, r, rT); 4023 case INDEX_op_or_i64: 4024 case INDEX_op_xor_i64: 4025 return C_O1_I2(r, r, rU); 4026 case INDEX_op_sub_i64: 4027 return C_O1_I2(r, rI, rT); 4028 case INDEX_op_clz_i32: 4029 case INDEX_op_ctz_i32: 4030 case INDEX_op_clz_i64: 4031 case INDEX_op_ctz_i64: 4032 return C_O1_I2(r, r, rZW); 4033 4034 case INDEX_op_brcond_i32: 4035 case INDEX_op_brcond_i64: 4036 return C_O0_I2(r, ri); 4037 4038 case INDEX_op_movcond_i32: 4039 case INDEX_op_movcond_i64: 4040 return C_O1_I4(r, r, ri, rZ, rZ); 4041 case INDEX_op_deposit_i32: 4042 case INDEX_op_deposit_i64: 4043 return C_O1_I2(r, 0, rZ); 4044 case INDEX_op_brcond2_i32: 4045 return C_O0_I4(r, r, ri, ri); 4046 case INDEX_op_setcond2_i32: 4047 return C_O1_I4(r, r, r, ri, ri); 4048 case INDEX_op_add2_i64: 4049 case INDEX_op_add2_i32: 4050 return C_O2_I4(r, r, r, r, rI, rZM); 4051 case INDEX_op_sub2_i64: 4052 case INDEX_op_sub2_i32: 4053 return C_O2_I4(r, r, rI, rZM, r, r); 4054 4055 case INDEX_op_qemu_ld_a32_i32: 4056 return C_O1_I1(r, r); 4057 case INDEX_op_qemu_ld_a64_i32: 4058 return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r); 4059 case INDEX_op_qemu_ld_a32_i64: 4060 return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r); 4061 case INDEX_op_qemu_ld_a64_i64: 4062 return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r); 4063 4064 case INDEX_op_qemu_st_a32_i32: 4065 return C_O0_I2(r, r); 4066 case INDEX_op_qemu_st_a64_i32: 4067 return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r); 4068 case INDEX_op_qemu_st_a32_i64: 4069 return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r); 4070 case INDEX_op_qemu_st_a64_i64: 4071 return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r); 4072 4073 case INDEX_op_qemu_ld_a32_i128: 4074 case INDEX_op_qemu_ld_a64_i128: 4075 return C_N1O1_I1(o, m, r); 4076 case INDEX_op_qemu_st_a32_i128: 4077 case INDEX_op_qemu_st_a64_i128: 4078 return C_O0_I3(o, m, r); 4079 4080 case INDEX_op_add_vec: 4081 case INDEX_op_sub_vec: 4082 case INDEX_op_mul_vec: 4083 case INDEX_op_and_vec: 4084 case INDEX_op_or_vec: 4085 case INDEX_op_xor_vec: 4086 case INDEX_op_andc_vec: 4087 case INDEX_op_orc_vec: 4088 case INDEX_op_nor_vec: 4089 case INDEX_op_eqv_vec: 4090 case INDEX_op_nand_vec: 4091 case INDEX_op_cmp_vec: 4092 case INDEX_op_ssadd_vec: 4093 case INDEX_op_sssub_vec: 4094 case INDEX_op_usadd_vec: 4095 case INDEX_op_ussub_vec: 4096 case INDEX_op_smax_vec: 4097 case INDEX_op_smin_vec: 4098 case INDEX_op_umax_vec: 4099 case INDEX_op_umin_vec: 4100 case INDEX_op_shlv_vec: 4101 case INDEX_op_shrv_vec: 4102 case INDEX_op_sarv_vec: 4103 case INDEX_op_rotlv_vec: 4104 case INDEX_op_rotrv_vec: 4105 case INDEX_op_ppc_mrgh_vec: 4106 case INDEX_op_ppc_mrgl_vec: 4107 case INDEX_op_ppc_muleu_vec: 4108 case INDEX_op_ppc_mulou_vec: 4109 case INDEX_op_ppc_pkum_vec: 4110 case INDEX_op_dup2_vec: 4111 return C_O1_I2(v, v, v); 4112 4113 case INDEX_op_not_vec: 4114 case INDEX_op_neg_vec: 4115 return C_O1_I1(v, v); 4116 4117 case INDEX_op_dup_vec: 4118 return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v); 4119 4120 case INDEX_op_ld_vec: 4121 case INDEX_op_dupm_vec: 4122 return C_O1_I1(v, r); 4123 4124 case INDEX_op_st_vec: 4125 return C_O0_I2(v, r); 4126 4127 case INDEX_op_bitsel_vec: 4128 case INDEX_op_ppc_msum_vec: 4129 return C_O1_I3(v, v, v, v); 4130 4131 default: 4132 g_assert_not_reached(); 4133 } 4134} 4135 4136static void tcg_target_init(TCGContext *s) 4137{ 4138 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; 4139 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; 4140 if (have_altivec) { 4141 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 4142 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 4143 } 4144 4145 tcg_target_call_clobber_regs = 0; 4146 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0); 4147 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2); 4148 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3); 4149 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4); 4150 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5); 4151 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6); 4152 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7); 4153 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); 4154 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); 4155 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); 4156 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); 4157 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12); 4158 4159 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0); 4160 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1); 4161 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2); 4162 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3); 4163 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4); 4164 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5); 4165 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6); 4166 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7); 4167 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 4168 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 4169 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 4170 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 4171 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 4172 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 4173 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 4174 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 4175 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16); 4176 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17); 4177 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18); 4178 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19); 4179 4180 s->reserved_regs = 0; 4181 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */ 4182 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */ 4183#if defined(_CALL_SYSV) 4184 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */ 4185#endif 4186#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64 4187 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ 4188#endif 4189 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); 4190 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); 4191 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1); 4192 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2); 4193 if (USE_REG_TB) { 4194 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); /* tb->tc_ptr */ 4195 } 4196} 4197 4198#ifdef __ELF__ 4199typedef struct { 4200 DebugFrameCIE cie; 4201 DebugFrameFDEHeader fde; 4202 uint8_t fde_def_cfa[4]; 4203 uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3]; 4204} DebugFrame; 4205 4206/* We're expecting a 2 byte uleb128 encoded value. */ 4207QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 4208 4209#if TCG_TARGET_REG_BITS == 64 4210# define ELF_HOST_MACHINE EM_PPC64 4211#else 4212# define ELF_HOST_MACHINE EM_PPC 4213#endif 4214 4215static DebugFrame debug_frame = { 4216 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 4217 .cie.id = -1, 4218 .cie.version = 1, 4219 .cie.code_align = 1, 4220 .cie.data_align = (-SZR & 0x7f), /* sleb128 -SZR */ 4221 .cie.return_column = 65, 4222 4223 /* Total FDE size does not include the "len" member. */ 4224 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), 4225 4226 .fde_def_cfa = { 4227 12, TCG_REG_R1, /* DW_CFA_def_cfa r1, ... */ 4228 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 4229 (FRAME_SIZE >> 7) 4230 }, 4231 .fde_reg_ofs = { 4232 /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */ 4233 0x11, 65, (LR_OFFSET / -SZR) & 0x7f, 4234 } 4235}; 4236 4237void tcg_register_jit(const void *buf, size_t buf_size) 4238{ 4239 uint8_t *p = &debug_frame.fde_reg_ofs[3]; 4240 int i; 4241 4242 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) { 4243 p[0] = 0x80 + tcg_target_callee_save_regs[i]; 4244 p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR; 4245 } 4246 4247 debug_frame.fde.func_start = (uintptr_t)buf; 4248 debug_frame.fde.func_len = buf_size; 4249 4250 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 4251} 4252#endif /* __ELF__ */ 4253#undef VMULEUB 4254#undef VMULEUH 4255#undef VMULEUW 4256#undef VMULOUB 4257#undef VMULOUH 4258#undef VMULOUW 4259#undef VMSUMUHM 4260