1/* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25#include "elf.h" 26#include "../tcg-pool.c.inc" 27#include "../tcg-ldst.c.inc" 28 29/* 30 * Standardize on the _CALL_FOO symbols used by GCC: 31 * Apple XCode does not define _CALL_DARWIN. 32 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV or _CALL_AIX. 33 */ 34#if TCG_TARGET_REG_BITS == 64 35# ifdef _CALL_AIX 36 /* ok */ 37# elif defined(_CALL_ELF) && _CALL_ELF == 1 38# define _CALL_AIX 39# elif defined(_CALL_ELF) && _CALL_ELF == 2 40 /* ok */ 41# else 42# error "Unknown ABI" 43# endif 44#else 45# if defined(_CALL_SYSV) || defined(_CALL_DARWIN) 46 /* ok */ 47# elif defined(__APPLE__) 48# define _CALL_DARWIN 49# elif defined(__ELF__) 50# define _CALL_SYSV 51# else 52# error "Unknown ABI" 53# endif 54#endif 55 56#if TCG_TARGET_REG_BITS == 64 57# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND 58# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL 59#else 60# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL 61# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF 62#endif 63#ifdef _CALL_SYSV 64# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN 65# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF 66#else 67# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL 68# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL 69#endif 70 71/* For some memory operations, we need a scratch that isn't R0. For the AIX 72 calling convention, we can re-use the TOC register since we'll be reloading 73 it at every call. Otherwise R12 will do nicely as neither a call-saved 74 register nor a parameter register. */ 75#ifdef _CALL_AIX 76# define TCG_REG_TMP1 TCG_REG_R2 77#else 78# define TCG_REG_TMP1 TCG_REG_R12 79#endif 80#define TCG_REG_TMP2 TCG_REG_R11 81 82#define TCG_VEC_TMP1 TCG_REG_V0 83#define TCG_VEC_TMP2 TCG_REG_V1 84 85#define TCG_REG_TB TCG_REG_R31 86#define USE_REG_TB (TCG_TARGET_REG_BITS == 64 && !have_isa_3_00) 87 88/* Shorthand for size of a pointer. Avoid promotion to unsigned. */ 89#define SZP ((int)sizeof(void *)) 90 91/* Shorthand for size of a register. */ 92#define SZR (TCG_TARGET_REG_BITS / 8) 93 94#define TCG_CT_CONST_S16 0x100 95#define TCG_CT_CONST_S32 0x400 96#define TCG_CT_CONST_U32 0x800 97#define TCG_CT_CONST_ZERO 0x1000 98#define TCG_CT_CONST_MONE 0x2000 99#define TCG_CT_CONST_WSZ 0x4000 100 101#define ALL_GENERAL_REGS 0xffffffffu 102#define ALL_VECTOR_REGS 0xffffffff00000000ull 103 104#ifndef R_PPC64_PCREL34 105#define R_PPC64_PCREL34 132 106#endif 107 108#define have_isel (cpuinfo & CPUINFO_ISEL) 109 110#define TCG_GUEST_BASE_REG TCG_REG_R30 111 112#ifdef CONFIG_DEBUG_TCG 113static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = { 114 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", 115 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", 116 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", 117 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", 118 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 119 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 120 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 121 "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", 122}; 123#endif 124 125static const int tcg_target_reg_alloc_order[] = { 126 TCG_REG_R14, /* call saved registers */ 127 TCG_REG_R15, 128 TCG_REG_R16, 129 TCG_REG_R17, 130 TCG_REG_R18, 131 TCG_REG_R19, 132 TCG_REG_R20, 133 TCG_REG_R21, 134 TCG_REG_R22, 135 TCG_REG_R23, 136 TCG_REG_R24, 137 TCG_REG_R25, 138 TCG_REG_R26, 139 TCG_REG_R27, 140 TCG_REG_R28, 141 TCG_REG_R29, 142 TCG_REG_R30, 143 TCG_REG_R31, 144 TCG_REG_R12, /* call clobbered, non-arguments */ 145 TCG_REG_R11, 146 TCG_REG_R2, 147 TCG_REG_R13, 148 TCG_REG_R10, /* call clobbered, arguments */ 149 TCG_REG_R9, 150 TCG_REG_R8, 151 TCG_REG_R7, 152 TCG_REG_R6, 153 TCG_REG_R5, 154 TCG_REG_R4, 155 TCG_REG_R3, 156 157 /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */ 158 TCG_REG_V2, /* call clobbered, vectors */ 159 TCG_REG_V3, 160 TCG_REG_V4, 161 TCG_REG_V5, 162 TCG_REG_V6, 163 TCG_REG_V7, 164 TCG_REG_V8, 165 TCG_REG_V9, 166 TCG_REG_V10, 167 TCG_REG_V11, 168 TCG_REG_V12, 169 TCG_REG_V13, 170 TCG_REG_V14, 171 TCG_REG_V15, 172 TCG_REG_V16, 173 TCG_REG_V17, 174 TCG_REG_V18, 175 TCG_REG_V19, 176}; 177 178static const int tcg_target_call_iarg_regs[] = { 179 TCG_REG_R3, 180 TCG_REG_R4, 181 TCG_REG_R5, 182 TCG_REG_R6, 183 TCG_REG_R7, 184 TCG_REG_R8, 185 TCG_REG_R9, 186 TCG_REG_R10 187}; 188 189static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) 190{ 191 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); 192 tcg_debug_assert(slot >= 0 && slot <= 1); 193 return TCG_REG_R3 + slot; 194} 195 196static const int tcg_target_callee_save_regs[] = { 197#ifdef _CALL_DARWIN 198 TCG_REG_R11, 199#endif 200 TCG_REG_R14, 201 TCG_REG_R15, 202 TCG_REG_R16, 203 TCG_REG_R17, 204 TCG_REG_R18, 205 TCG_REG_R19, 206 TCG_REG_R20, 207 TCG_REG_R21, 208 TCG_REG_R22, 209 TCG_REG_R23, 210 TCG_REG_R24, 211 TCG_REG_R25, 212 TCG_REG_R26, 213 TCG_REG_R27, /* currently used for the global env */ 214 TCG_REG_R28, 215 TCG_REG_R29, 216 TCG_REG_R30, 217 TCG_REG_R31 218}; 219 220/* For PPC, we use TB+4 instead of TB as the base. */ 221static inline ptrdiff_t ppc_tbrel_diff(TCGContext *s, const void *target) 222{ 223 return tcg_tbrel_diff(s, target) - 4; 224} 225 226static inline bool in_range_b(tcg_target_long target) 227{ 228 return target == sextract64(target, 0, 26); 229} 230 231static uint32_t reloc_pc24_val(const tcg_insn_unit *pc, 232 const tcg_insn_unit *target) 233{ 234 ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); 235 tcg_debug_assert(in_range_b(disp)); 236 return disp & 0x3fffffc; 237} 238 239static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 240{ 241 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 242 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); 243 244 if (in_range_b(disp)) { 245 *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc); 246 return true; 247 } 248 return false; 249} 250 251static uint16_t reloc_pc14_val(const tcg_insn_unit *pc, 252 const tcg_insn_unit *target) 253{ 254 ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); 255 tcg_debug_assert(disp == (int16_t) disp); 256 return disp & 0xfffc; 257} 258 259static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 260{ 261 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 262 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); 263 264 if (disp == (int16_t) disp) { 265 *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc); 266 return true; 267 } 268 return false; 269} 270 271static bool reloc_pc34(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 272{ 273 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 274 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); 275 276 if (disp == sextract64(disp, 0, 34)) { 277 src_rw[0] = (src_rw[0] & ~0x3ffff) | ((disp >> 16) & 0x3ffff); 278 src_rw[1] = (src_rw[1] & ~0xffff) | (disp & 0xffff); 279 return true; 280 } 281 return false; 282} 283 284/* test if a constant matches the constraint */ 285static bool tcg_target_const_match(int64_t val, int ct, 286 TCGType type, TCGCond cond, int vece) 287{ 288 if (ct & TCG_CT_CONST) { 289 return 1; 290 } 291 292 /* The only 32-bit constraint we use aside from 293 TCG_CT_CONST is TCG_CT_CONST_S16. */ 294 if (type == TCG_TYPE_I32) { 295 val = (int32_t)val; 296 } 297 298 if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { 299 return 1; 300 } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { 301 return 1; 302 } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { 303 return 1; 304 } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { 305 return 1; 306 } else if ((ct & TCG_CT_CONST_MONE) && val == -1) { 307 return 1; 308 } else if ((ct & TCG_CT_CONST_WSZ) 309 && val == (type == TCG_TYPE_I32 ? 32 : 64)) { 310 return 1; 311 } 312 return 0; 313} 314 315#define OPCD(opc) ((opc)<<26) 316#define XO19(opc) (OPCD(19)|((opc)<<1)) 317#define MD30(opc) (OPCD(30)|((opc)<<2)) 318#define MDS30(opc) (OPCD(30)|((opc)<<1)) 319#define XO31(opc) (OPCD(31)|((opc)<<1)) 320#define XO58(opc) (OPCD(58)|(opc)) 321#define XO62(opc) (OPCD(62)|(opc)) 322#define VX4(opc) (OPCD(4)|(opc)) 323 324#define B OPCD( 18) 325#define BC OPCD( 16) 326 327#define LBZ OPCD( 34) 328#define LHZ OPCD( 40) 329#define LHA OPCD( 42) 330#define LWZ OPCD( 32) 331#define LWZUX XO31( 55) 332#define LD XO58( 0) 333#define LDX XO31( 21) 334#define LDU XO58( 1) 335#define LDUX XO31( 53) 336#define LWA XO58( 2) 337#define LWAX XO31(341) 338#define LQ OPCD( 56) 339 340#define STB OPCD( 38) 341#define STH OPCD( 44) 342#define STW OPCD( 36) 343#define STD XO62( 0) 344#define STDU XO62( 1) 345#define STDX XO31(149) 346#define STQ XO62( 2) 347 348#define PLWA OPCD( 41) 349#define PLD OPCD( 57) 350#define PLXSD OPCD( 42) 351#define PLXV OPCD(25 * 2 + 1) /* force tx=1 */ 352 353#define PSTD OPCD( 61) 354#define PSTXSD OPCD( 46) 355#define PSTXV OPCD(27 * 2 + 1) /* force sx=1 */ 356 357#define ADDIC OPCD( 12) 358#define ADDI OPCD( 14) 359#define ADDIS OPCD( 15) 360#define ORI OPCD( 24) 361#define ORIS OPCD( 25) 362#define XORI OPCD( 26) 363#define XORIS OPCD( 27) 364#define ANDI OPCD( 28) 365#define ANDIS OPCD( 29) 366#define MULLI OPCD( 7) 367#define CMPLI OPCD( 10) 368#define CMPI OPCD( 11) 369#define SUBFIC OPCD( 8) 370 371#define LWZU OPCD( 33) 372#define STWU OPCD( 37) 373 374#define RLWIMI OPCD( 20) 375#define RLWINM OPCD( 21) 376#define RLWNM OPCD( 23) 377 378#define RLDICL MD30( 0) 379#define RLDICR MD30( 1) 380#define RLDIMI MD30( 3) 381#define RLDCL MDS30( 8) 382 383#define BCLR XO19( 16) 384#define BCCTR XO19(528) 385#define CRAND XO19(257) 386#define CRANDC XO19(129) 387#define CRNAND XO19(225) 388#define CROR XO19(449) 389#define CRNOR XO19( 33) 390#define ADDPCIS XO19( 2) 391 392#define EXTSB XO31(954) 393#define EXTSH XO31(922) 394#define EXTSW XO31(986) 395#define ADD XO31(266) 396#define ADDE XO31(138) 397#define ADDME XO31(234) 398#define ADDZE XO31(202) 399#define ADDC XO31( 10) 400#define AND XO31( 28) 401#define SUBF XO31( 40) 402#define SUBFC XO31( 8) 403#define SUBFE XO31(136) 404#define SUBFME XO31(232) 405#define SUBFZE XO31(200) 406#define OR XO31(444) 407#define XOR XO31(316) 408#define MULLW XO31(235) 409#define MULHW XO31( 75) 410#define MULHWU XO31( 11) 411#define DIVW XO31(491) 412#define DIVWU XO31(459) 413#define MODSW XO31(779) 414#define MODUW XO31(267) 415#define CMP XO31( 0) 416#define CMPL XO31( 32) 417#define LHBRX XO31(790) 418#define LWBRX XO31(534) 419#define LDBRX XO31(532) 420#define STHBRX XO31(918) 421#define STWBRX XO31(662) 422#define STDBRX XO31(660) 423#define MFSPR XO31(339) 424#define MTSPR XO31(467) 425#define SRAWI XO31(824) 426#define NEG XO31(104) 427#define MFCR XO31( 19) 428#define MFOCRF (MFCR | (1u << 20)) 429#define NOR XO31(124) 430#define CNTLZW XO31( 26) 431#define CNTLZD XO31( 58) 432#define CNTTZW XO31(538) 433#define CNTTZD XO31(570) 434#define CNTPOPW XO31(378) 435#define CNTPOPD XO31(506) 436#define ANDC XO31( 60) 437#define ORC XO31(412) 438#define EQV XO31(284) 439#define NAND XO31(476) 440#define ISEL XO31( 15) 441 442#define MULLD XO31(233) 443#define MULHD XO31( 73) 444#define MULHDU XO31( 9) 445#define DIVD XO31(489) 446#define DIVDU XO31(457) 447#define MODSD XO31(777) 448#define MODUD XO31(265) 449 450#define LBZX XO31( 87) 451#define LHZX XO31(279) 452#define LHAX XO31(343) 453#define LWZX XO31( 23) 454#define STBX XO31(215) 455#define STHX XO31(407) 456#define STWX XO31(151) 457 458#define EIEIO XO31(854) 459#define HWSYNC XO31(598) 460#define LWSYNC (HWSYNC | (1u << 21)) 461 462#define SPR(a, b) ((((a)<<5)|(b))<<11) 463#define LR SPR(8, 0) 464#define CTR SPR(9, 0) 465 466#define SLW XO31( 24) 467#define SRW XO31(536) 468#define SRAW XO31(792) 469 470#define SLD XO31( 27) 471#define SRD XO31(539) 472#define SRAD XO31(794) 473#define SRADI XO31(413<<1) 474 475#define BRH XO31(219) 476#define BRW XO31(155) 477#define BRD XO31(187) 478 479#define TW XO31( 4) 480#define TRAP (TW | TO(31)) 481 482#define SETBC XO31(384) /* v3.10 */ 483#define SETBCR XO31(416) /* v3.10 */ 484#define SETNBC XO31(448) /* v3.10 */ 485#define SETNBCR XO31(480) /* v3.10 */ 486 487#define NOP ORI /* ori 0,0,0 */ 488 489#define LVX XO31(103) 490#define LVEBX XO31(7) 491#define LVEHX XO31(39) 492#define LVEWX XO31(71) 493#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */ 494#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */ 495#define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */ 496#define LXV (OPCD(61) | 8 | 1) /* v3.00, force tx=1 */ 497#define LXSD (OPCD(57) | 2) /* v3.00 */ 498#define LXVWSX (XO31(364) | 1) /* v3.00, force tx=1 */ 499 500#define STVX XO31(231) 501#define STVEWX XO31(199) 502#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */ 503#define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */ 504#define STXV (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */ 505#define STXSD (OPCD(61) | 2) /* v3.00 */ 506 507#define VADDSBS VX4(768) 508#define VADDUBS VX4(512) 509#define VADDUBM VX4(0) 510#define VADDSHS VX4(832) 511#define VADDUHS VX4(576) 512#define VADDUHM VX4(64) 513#define VADDSWS VX4(896) 514#define VADDUWS VX4(640) 515#define VADDUWM VX4(128) 516#define VADDUDM VX4(192) /* v2.07 */ 517 518#define VSUBSBS VX4(1792) 519#define VSUBUBS VX4(1536) 520#define VSUBUBM VX4(1024) 521#define VSUBSHS VX4(1856) 522#define VSUBUHS VX4(1600) 523#define VSUBUHM VX4(1088) 524#define VSUBSWS VX4(1920) 525#define VSUBUWS VX4(1664) 526#define VSUBUWM VX4(1152) 527#define VSUBUDM VX4(1216) /* v2.07 */ 528 529#define VNEGW (VX4(1538) | (6 << 16)) /* v3.00 */ 530#define VNEGD (VX4(1538) | (7 << 16)) /* v3.00 */ 531 532#define VMAXSB VX4(258) 533#define VMAXSH VX4(322) 534#define VMAXSW VX4(386) 535#define VMAXSD VX4(450) /* v2.07 */ 536#define VMAXUB VX4(2) 537#define VMAXUH VX4(66) 538#define VMAXUW VX4(130) 539#define VMAXUD VX4(194) /* v2.07 */ 540#define VMINSB VX4(770) 541#define VMINSH VX4(834) 542#define VMINSW VX4(898) 543#define VMINSD VX4(962) /* v2.07 */ 544#define VMINUB VX4(514) 545#define VMINUH VX4(578) 546#define VMINUW VX4(642) 547#define VMINUD VX4(706) /* v2.07 */ 548 549#define VCMPEQUB VX4(6) 550#define VCMPEQUH VX4(70) 551#define VCMPEQUW VX4(134) 552#define VCMPEQUD VX4(199) /* v2.07 */ 553#define VCMPGTSB VX4(774) 554#define VCMPGTSH VX4(838) 555#define VCMPGTSW VX4(902) 556#define VCMPGTSD VX4(967) /* v2.07 */ 557#define VCMPGTUB VX4(518) 558#define VCMPGTUH VX4(582) 559#define VCMPGTUW VX4(646) 560#define VCMPGTUD VX4(711) /* v2.07 */ 561#define VCMPNEB VX4(7) /* v3.00 */ 562#define VCMPNEH VX4(71) /* v3.00 */ 563#define VCMPNEW VX4(135) /* v3.00 */ 564 565#define VSLB VX4(260) 566#define VSLH VX4(324) 567#define VSLW VX4(388) 568#define VSLD VX4(1476) /* v2.07 */ 569#define VSRB VX4(516) 570#define VSRH VX4(580) 571#define VSRW VX4(644) 572#define VSRD VX4(1732) /* v2.07 */ 573#define VSRAB VX4(772) 574#define VSRAH VX4(836) 575#define VSRAW VX4(900) 576#define VSRAD VX4(964) /* v2.07 */ 577#define VRLB VX4(4) 578#define VRLH VX4(68) 579#define VRLW VX4(132) 580#define VRLD VX4(196) /* v2.07 */ 581 582#define VMULEUB VX4(520) 583#define VMULEUH VX4(584) 584#define VMULEUW VX4(648) /* v2.07 */ 585#define VMULOUB VX4(8) 586#define VMULOUH VX4(72) 587#define VMULOUW VX4(136) /* v2.07 */ 588#define VMULUWM VX4(137) /* v2.07 */ 589#define VMULLD VX4(457) /* v3.10 */ 590#define VMSUMUHM VX4(38) 591 592#define VMRGHB VX4(12) 593#define VMRGHH VX4(76) 594#define VMRGHW VX4(140) 595#define VMRGLB VX4(268) 596#define VMRGLH VX4(332) 597#define VMRGLW VX4(396) 598 599#define VPKUHUM VX4(14) 600#define VPKUWUM VX4(78) 601 602#define VAND VX4(1028) 603#define VANDC VX4(1092) 604#define VNOR VX4(1284) 605#define VOR VX4(1156) 606#define VXOR VX4(1220) 607#define VEQV VX4(1668) /* v2.07 */ 608#define VNAND VX4(1412) /* v2.07 */ 609#define VORC VX4(1348) /* v2.07 */ 610 611#define VSPLTB VX4(524) 612#define VSPLTH VX4(588) 613#define VSPLTW VX4(652) 614#define VSPLTISB VX4(780) 615#define VSPLTISH VX4(844) 616#define VSPLTISW VX4(908) 617 618#define VSLDOI VX4(44) 619 620#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */ 621#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */ 622#define XXSPLTIB (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */ 623 624#define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */ 625#define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */ 626#define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */ 627#define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */ 628#define MTVSRDD (XO31(435) | 1) /* v3.00, force tx=1 */ 629#define MTVSRWS (XO31(403) | 1) /* v3.00, force tx=1 */ 630 631#define RT(r) ((r)<<21) 632#define RS(r) ((r)<<21) 633#define RA(r) ((r)<<16) 634#define RB(r) ((r)<<11) 635#define TO(t) ((t)<<21) 636#define SH(s) ((s)<<11) 637#define MB(b) ((b)<<6) 638#define ME(e) ((e)<<1) 639#define BO(o) ((o)<<21) 640#define MB64(b) ((b)<<5) 641#define FXM(b) (1 << (19 - (b))) 642 643#define VRT(r) (((r) & 31) << 21) 644#define VRA(r) (((r) & 31) << 16) 645#define VRB(r) (((r) & 31) << 11) 646#define VRC(r) (((r) & 31) << 6) 647 648#define LK 1 649 650#define TAB(t, a, b) (RT(t) | RA(a) | RB(b)) 651#define SAB(s, a, b) (RS(s) | RA(a) | RB(b)) 652#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff)) 653#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff)) 654 655#define BF(n) ((n)<<23) 656#define BI(n, c) (((c)+((n)*4))<<16) 657#define BT(n, c) (((c)+((n)*4))<<21) 658#define BA(n, c) (((c)+((n)*4))<<16) 659#define BB(n, c) (((c)+((n)*4))<<11) 660#define BC_(n, c) (((c)+((n)*4))<<6) 661 662#define BO_COND_TRUE BO(12) 663#define BO_COND_FALSE BO( 4) 664#define BO_ALWAYS BO(20) 665 666enum { 667 CR_LT, 668 CR_GT, 669 CR_EQ, 670 CR_SO 671}; 672 673static const uint32_t tcg_to_bc[] = { 674 [TCG_COND_EQ] = BC | BI(7, CR_EQ) | BO_COND_TRUE, 675 [TCG_COND_NE] = BC | BI(7, CR_EQ) | BO_COND_FALSE, 676 [TCG_COND_LT] = BC | BI(7, CR_LT) | BO_COND_TRUE, 677 [TCG_COND_GE] = BC | BI(7, CR_LT) | BO_COND_FALSE, 678 [TCG_COND_LE] = BC | BI(7, CR_GT) | BO_COND_FALSE, 679 [TCG_COND_GT] = BC | BI(7, CR_GT) | BO_COND_TRUE, 680 [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE, 681 [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE, 682 [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE, 683 [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE, 684}; 685 686/* The low bit here is set if the RA and RB fields must be inverted. */ 687static const uint32_t tcg_to_isel[] = { 688 [TCG_COND_EQ] = ISEL | BC_(7, CR_EQ), 689 [TCG_COND_NE] = ISEL | BC_(7, CR_EQ) | 1, 690 [TCG_COND_LT] = ISEL | BC_(7, CR_LT), 691 [TCG_COND_GE] = ISEL | BC_(7, CR_LT) | 1, 692 [TCG_COND_LE] = ISEL | BC_(7, CR_GT) | 1, 693 [TCG_COND_GT] = ISEL | BC_(7, CR_GT), 694 [TCG_COND_LTU] = ISEL | BC_(7, CR_LT), 695 [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1, 696 [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1, 697 [TCG_COND_GTU] = ISEL | BC_(7, CR_GT), 698}; 699 700static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 701 intptr_t value, intptr_t addend) 702{ 703 const tcg_insn_unit *target; 704 int16_t lo; 705 int32_t hi; 706 707 value += addend; 708 target = (const tcg_insn_unit *)value; 709 710 switch (type) { 711 case R_PPC_REL14: 712 return reloc_pc14(code_ptr, target); 713 case R_PPC_REL24: 714 return reloc_pc24(code_ptr, target); 715 case R_PPC64_PCREL34: 716 return reloc_pc34(code_ptr, target); 717 case R_PPC_ADDR16: 718 /* 719 * We are (slightly) abusing this relocation type. In particular, 720 * assert that the low 2 bits are zero, and do not modify them. 721 * That way we can use this with LD et al that have opcode bits 722 * in the low 2 bits of the insn. 723 */ 724 if ((value & 3) || value != (int16_t)value) { 725 return false; 726 } 727 *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc); 728 break; 729 case R_PPC_ADDR32: 730 /* 731 * We are abusing this relocation type. Again, this points to 732 * a pair of insns, lis + load. This is an absolute address 733 * relocation for PPC32 so the lis cannot be removed. 734 */ 735 lo = value; 736 hi = value - lo; 737 if (hi + lo != value) { 738 return false; 739 } 740 code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16); 741 code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo); 742 break; 743 default: 744 g_assert_not_reached(); 745 } 746 return true; 747} 748 749/* Ensure that the prefixed instruction does not cross a 64-byte boundary. */ 750static bool tcg_out_need_prefix_align(TCGContext *s) 751{ 752 return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c; 753} 754 755static void tcg_out_prefix_align(TCGContext *s) 756{ 757 if (tcg_out_need_prefix_align(s)) { 758 tcg_out32(s, NOP); 759 } 760} 761 762static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target) 763{ 764 return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0); 765} 766 767/* Output Type 00 Prefix - 8-Byte Load/Store Form (8LS:D) */ 768static void tcg_out_8ls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt, 769 unsigned ra, tcg_target_long imm, bool r) 770{ 771 tcg_insn_unit p, i; 772 773 p = OPCD(1) | (r << 20) | ((imm >> 16) & 0x3ffff); 774 i = opc | TAI(rt, ra, imm); 775 776 tcg_out_prefix_align(s); 777 tcg_out32(s, p); 778 tcg_out32(s, i); 779} 780 781/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */ 782static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt, 783 unsigned ra, tcg_target_long imm, bool r) 784{ 785 tcg_insn_unit p, i; 786 787 p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff); 788 i = opc | TAI(rt, ra, imm); 789 790 tcg_out_prefix_align(s); 791 tcg_out32(s, p); 792 tcg_out32(s, i); 793} 794 795static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, 796 TCGReg base, tcg_target_long offset); 797 798static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 799{ 800 if (ret == arg) { 801 return true; 802 } 803 switch (type) { 804 case TCG_TYPE_I64: 805 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 806 /* fallthru */ 807 case TCG_TYPE_I32: 808 if (ret < TCG_REG_V0) { 809 if (arg < TCG_REG_V0) { 810 tcg_out32(s, OR | SAB(arg, ret, arg)); 811 break; 812 } else if (have_isa_2_07) { 813 tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD) 814 | VRT(arg) | RA(ret)); 815 break; 816 } else { 817 /* Altivec does not support vector->integer moves. */ 818 return false; 819 } 820 } else if (arg < TCG_REG_V0) { 821 if (have_isa_2_07) { 822 tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD) 823 | VRT(ret) | RA(arg)); 824 break; 825 } else { 826 /* Altivec does not support integer->vector moves. */ 827 return false; 828 } 829 } 830 /* fallthru */ 831 case TCG_TYPE_V64: 832 case TCG_TYPE_V128: 833 tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0); 834 tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg)); 835 break; 836 default: 837 g_assert_not_reached(); 838 } 839 return true; 840} 841 842static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs, 843 int sh, int mb) 844{ 845 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 846 sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1); 847 mb = MB64((mb >> 5) | ((mb << 1) & 0x3f)); 848 tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb); 849} 850 851static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs, 852 int sh, int mb, int me) 853{ 854 tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me)); 855} 856 857static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) 858{ 859 tcg_out32(s, EXTSB | RA(dst) | RS(src)); 860} 861 862static void tcg_out_ext8u(TCGContext *s, TCGReg dst, TCGReg src) 863{ 864 tcg_out32(s, ANDI | SAI(src, dst, 0xff)); 865} 866 867static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) 868{ 869 tcg_out32(s, EXTSH | RA(dst) | RS(src)); 870} 871 872static void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src) 873{ 874 tcg_out32(s, ANDI | SAI(src, dst, 0xffff)); 875} 876 877static void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src) 878{ 879 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 880 tcg_out32(s, EXTSW | RA(dst) | RS(src)); 881} 882 883static void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src) 884{ 885 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 886 tcg_out_rld(s, RLDICL, dst, src, 0, 32); 887} 888 889static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dst, TCGReg src) 890{ 891 tcg_out_ext32s(s, dst, src); 892} 893 894static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dst, TCGReg src) 895{ 896 tcg_out_ext32u(s, dst, src); 897} 898 899static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) 900{ 901 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 902 tcg_out_mov(s, TCG_TYPE_I32, rd, rn); 903} 904 905static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c) 906{ 907 tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c); 908} 909 910static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c) 911{ 912 tcg_out_rld(s, RLDICR, dst, src, c, 63 - c); 913} 914 915static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c) 916{ 917 /* Limit immediate shift count lest we create an illegal insn. */ 918 tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31)); 919} 920 921static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c) 922{ 923 tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31); 924} 925 926static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c) 927{ 928 tcg_out_rld(s, RLDICL, dst, src, 64 - c, c); 929} 930 931static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c) 932{ 933 tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2)); 934} 935 936static void tcg_out_addpcis(TCGContext *s, TCGReg dst, intptr_t imm) 937{ 938 uint32_t d0, d1, d2; 939 940 tcg_debug_assert((imm & 0xffff) == 0); 941 tcg_debug_assert(imm == (int32_t)imm); 942 943 d2 = extract32(imm, 16, 1); 944 d1 = extract32(imm, 17, 5); 945 d0 = extract32(imm, 22, 10); 946 tcg_out32(s, ADDPCIS | RT(dst) | (d1 << 16) | (d0 << 6) | d2); 947} 948 949static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags) 950{ 951 TCGReg tmp = dst == src ? TCG_REG_R0 : dst; 952 953 if (have_isa_3_10) { 954 tcg_out32(s, BRH | RA(dst) | RS(src)); 955 if (flags & TCG_BSWAP_OS) { 956 tcg_out_ext16s(s, TCG_TYPE_REG, dst, dst); 957 } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 958 tcg_out_ext16u(s, dst, dst); 959 } 960 return; 961 } 962 963 /* 964 * In the following, 965 * dep(a, b, m) -> (a & ~m) | (b & m) 966 * 967 * Begin with: src = xxxxabcd 968 */ 969 /* tmp = rol32(src, 24) & 0x000000ff = 0000000c */ 970 tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31); 971 /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00) = 000000dc */ 972 tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23); 973 974 if (flags & TCG_BSWAP_OS) { 975 tcg_out_ext16s(s, TCG_TYPE_REG, dst, tmp); 976 } else { 977 tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); 978 } 979} 980 981static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags) 982{ 983 TCGReg tmp = dst == src ? TCG_REG_R0 : dst; 984 985 if (have_isa_3_10) { 986 tcg_out32(s, BRW | RA(dst) | RS(src)); 987 if (flags & TCG_BSWAP_OS) { 988 tcg_out_ext32s(s, dst, dst); 989 } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 990 tcg_out_ext32u(s, dst, dst); 991 } 992 return; 993 } 994 995 /* 996 * Stolen from gcc's builtin_bswap32. 997 * In the following, 998 * dep(a, b, m) -> (a & ~m) | (b & m) 999 * 1000 * Begin with: src = xxxxabcd 1001 */ 1002 /* tmp = rol32(src, 8) & 0xffffffff = 0000bcda */ 1003 tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31); 1004 /* tmp = dep(tmp, rol32(src, 24), 0xff000000) = 0000dcda */ 1005 tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7); 1006 /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00) = 0000dcba */ 1007 tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23); 1008 1009 if (flags & TCG_BSWAP_OS) { 1010 tcg_out_ext32s(s, dst, tmp); 1011 } else { 1012 tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); 1013 } 1014} 1015 1016static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src) 1017{ 1018 TCGReg t0 = dst == src ? TCG_REG_R0 : dst; 1019 TCGReg t1 = dst == src ? dst : TCG_REG_R0; 1020 1021 if (have_isa_3_10) { 1022 tcg_out32(s, BRD | RA(dst) | RS(src)); 1023 return; 1024 } 1025 1026 /* 1027 * In the following, 1028 * dep(a, b, m) -> (a & ~m) | (b & m) 1029 * 1030 * Begin with: src = abcdefgh 1031 */ 1032 /* t0 = rol32(src, 8) & 0xffffffff = 0000fghe */ 1033 tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31); 1034 /* t0 = dep(t0, rol32(src, 24), 0xff000000) = 0000hghe */ 1035 tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7); 1036 /* t0 = dep(t0, rol32(src, 24), 0x0000ff00) = 0000hgfe */ 1037 tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23); 1038 1039 /* t0 = rol64(t0, 32) = hgfe0000 */ 1040 tcg_out_rld(s, RLDICL, t0, t0, 32, 0); 1041 /* t1 = rol64(src, 32) = efghabcd */ 1042 tcg_out_rld(s, RLDICL, t1, src, 32, 0); 1043 1044 /* t0 = dep(t0, rol32(t1, 24), 0xffffffff) = hgfebcda */ 1045 tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31); 1046 /* t0 = dep(t0, rol32(t1, 24), 0xff000000) = hgfedcda */ 1047 tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7); 1048 /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00) = hgfedcba */ 1049 tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23); 1050 1051 tcg_out_mov(s, TCG_TYPE_REG, dst, t0); 1052} 1053 1054/* Emit a move into ret of arg, if it can be done in one insn. */ 1055static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg) 1056{ 1057 if (arg == (int16_t)arg) { 1058 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 1059 return true; 1060 } 1061 if (arg == (int32_t)arg && (arg & 0xffff) == 0) { 1062 tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); 1063 return true; 1064 } 1065 return false; 1066} 1067 1068static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, 1069 tcg_target_long arg, bool in_prologue) 1070{ 1071 intptr_t tb_diff; 1072 tcg_target_long tmp; 1073 int shift; 1074 1075 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1076 1077 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { 1078 arg = (int32_t)arg; 1079 } 1080 1081 /* Load 16-bit immediates with one insn. */ 1082 if (tcg_out_movi_one(s, ret, arg)) { 1083 return; 1084 } 1085 1086 /* Load addresses within the TB with one insn. */ 1087 tb_diff = ppc_tbrel_diff(s, (void *)arg); 1088 if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) { 1089 tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff)); 1090 return; 1091 } 1092 1093 /* 1094 * Load values up to 34 bits, and pc-relative addresses, 1095 * with one prefixed insn. 1096 */ 1097 if (have_isa_3_10) { 1098 if (arg == sextract64(arg, 0, 34)) { 1099 /* pli ret,value = paddi ret,0,value,0 */ 1100 tcg_out_mls_d(s, ADDI, ret, 0, arg, 0); 1101 return; 1102 } 1103 1104 tmp = tcg_pcrel_diff_for_prefix(s, (void *)arg); 1105 if (tmp == sextract64(tmp, 0, 34)) { 1106 /* pla ret,value = paddi ret,0,value,1 */ 1107 tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1); 1108 return; 1109 } 1110 } 1111 1112 /* Load 32-bit immediates with two insns. Note that we've already 1113 eliminated bare ADDIS, so we know both insns are required. */ 1114 if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) { 1115 tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); 1116 tcg_out32(s, ORI | SAI(ret, ret, arg)); 1117 return; 1118 } 1119 if (arg == (uint32_t)arg && !(arg & 0x8000)) { 1120 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 1121 tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); 1122 return; 1123 } 1124 1125 /* Load masked 16-bit value. */ 1126 if (arg > 0 && (arg & 0x8000)) { 1127 tmp = arg | 0x7fff; 1128 if ((tmp & (tmp + 1)) == 0) { 1129 int mb = clz64(tmp + 1) + 1; 1130 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 1131 tcg_out_rld(s, RLDICL, ret, ret, 0, mb); 1132 return; 1133 } 1134 } 1135 1136 /* Load common masks with 2 insns. */ 1137 shift = ctz64(arg); 1138 tmp = arg >> shift; 1139 if (tmp == (int16_t)tmp) { 1140 tcg_out32(s, ADDI | TAI(ret, 0, tmp)); 1141 tcg_out_shli64(s, ret, ret, shift); 1142 return; 1143 } 1144 shift = clz64(arg); 1145 if (tcg_out_movi_one(s, ret, arg << shift)) { 1146 tcg_out_shri64(s, ret, ret, shift); 1147 return; 1148 } 1149 1150 /* Load addresses within 2GB with 2 insns. */ 1151 if (have_isa_3_00) { 1152 intptr_t hi = tcg_pcrel_diff(s, (void *)arg) - 4; 1153 int16_t lo = hi; 1154 1155 hi -= lo; 1156 if (hi == (int32_t)hi) { 1157 tcg_out_addpcis(s, TCG_REG_TMP2, hi); 1158 tcg_out32(s, ADDI | TAI(ret, TCG_REG_TMP2, lo)); 1159 return; 1160 } 1161 } 1162 1163 /* Load addresses within 2GB of TB with 2 (or rarely 3) insns. */ 1164 if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) { 1165 tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff); 1166 return; 1167 } 1168 1169 /* Use the constant pool, if possible. */ 1170 if (!in_prologue && USE_REG_TB) { 1171 new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr, 1172 ppc_tbrel_diff(s, NULL)); 1173 tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0)); 1174 return; 1175 } 1176 if (have_isa_3_10) { 1177 tcg_out_8ls_d(s, PLD, ret, 0, 0, 1); 1178 new_pool_label(s, arg, R_PPC64_PCREL34, s->code_ptr - 2, 0); 1179 return; 1180 } 1181 if (have_isa_3_00) { 1182 tcg_out_addpcis(s, TCG_REG_TMP2, 0); 1183 new_pool_label(s, arg, R_PPC_REL14, s->code_ptr, 0); 1184 tcg_out32(s, LD | TAI(ret, TCG_REG_TMP2, 0)); 1185 return; 1186 } 1187 1188 tmp = arg >> 31 >> 1; 1189 tcg_out_movi(s, TCG_TYPE_I32, ret, tmp); 1190 if (tmp) { 1191 tcg_out_shli64(s, ret, ret, 32); 1192 } 1193 if (arg & 0xffff0000) { 1194 tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); 1195 } 1196 if (arg & 0xffff) { 1197 tcg_out32(s, ORI | SAI(ret, ret, arg)); 1198 } 1199} 1200 1201static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 1202 TCGReg ret, int64_t val) 1203{ 1204 uint32_t load_insn; 1205 int rel, low; 1206 intptr_t add; 1207 1208 switch (vece) { 1209 case MO_8: 1210 low = (int8_t)val; 1211 if (low >= -16 && low < 16) { 1212 tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16)); 1213 return; 1214 } 1215 if (have_isa_3_00) { 1216 tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11)); 1217 return; 1218 } 1219 break; 1220 1221 case MO_16: 1222 low = (int16_t)val; 1223 if (low >= -16 && low < 16) { 1224 tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16)); 1225 return; 1226 } 1227 break; 1228 1229 case MO_32: 1230 low = (int32_t)val; 1231 if (low >= -16 && low < 16) { 1232 tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16)); 1233 return; 1234 } 1235 break; 1236 } 1237 1238 /* 1239 * Otherwise we must load the value from the constant pool. 1240 */ 1241 if (USE_REG_TB) { 1242 rel = R_PPC_ADDR16; 1243 add = ppc_tbrel_diff(s, NULL); 1244 } else if (have_isa_3_10) { 1245 if (type == TCG_TYPE_V64) { 1246 tcg_out_8ls_d(s, PLXSD, ret & 31, 0, 0, 1); 1247 new_pool_label(s, val, R_PPC64_PCREL34, s->code_ptr - 2, 0); 1248 } else { 1249 tcg_out_8ls_d(s, PLXV, ret & 31, 0, 0, 1); 1250 new_pool_l2(s, R_PPC64_PCREL34, s->code_ptr - 2, 0, val, val); 1251 } 1252 return; 1253 } else if (have_isa_3_00) { 1254 tcg_out_addpcis(s, TCG_REG_TMP1, 0); 1255 rel = R_PPC_REL14; 1256 add = 0; 1257 } else { 1258 rel = R_PPC_ADDR32; 1259 add = 0; 1260 } 1261 1262 if (have_vsx) { 1263 load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX; 1264 load_insn |= VRT(ret) | RB(TCG_REG_TMP1); 1265 if (TCG_TARGET_REG_BITS == 64) { 1266 new_pool_label(s, val, rel, s->code_ptr, add); 1267 } else { 1268 new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val); 1269 } 1270 } else { 1271 load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); 1272 if (TCG_TARGET_REG_BITS == 64) { 1273 new_pool_l2(s, rel, s->code_ptr, add, val, val); 1274 } else { 1275 new_pool_l4(s, rel, s->code_ptr, add, 1276 val >> 32, val, val >> 32, val); 1277 } 1278 } 1279 1280 if (USE_REG_TB) { 1281 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0)); 1282 load_insn |= RA(TCG_REG_TB); 1283 } else if (have_isa_3_00) { 1284 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0)); 1285 } else { 1286 tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0)); 1287 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0)); 1288 } 1289 tcg_out32(s, load_insn); 1290} 1291 1292static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, 1293 tcg_target_long arg) 1294{ 1295 switch (type) { 1296 case TCG_TYPE_I32: 1297 case TCG_TYPE_I64: 1298 tcg_debug_assert(ret < TCG_REG_V0); 1299 tcg_out_movi_int(s, type, ret, arg, false); 1300 break; 1301 1302 default: 1303 g_assert_not_reached(); 1304 } 1305} 1306 1307static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) 1308{ 1309 return false; 1310} 1311 1312static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, 1313 tcg_target_long imm) 1314{ 1315 /* This function is only used for passing structs by reference. */ 1316 g_assert_not_reached(); 1317} 1318 1319static bool mask_operand(uint32_t c, int *mb, int *me) 1320{ 1321 uint32_t lsb, test; 1322 1323 /* Accept a bit pattern like: 1324 0....01....1 1325 1....10....0 1326 0..01..10..0 1327 Keep track of the transitions. */ 1328 if (c == 0 || c == -1) { 1329 return false; 1330 } 1331 test = c; 1332 lsb = test & -test; 1333 test += lsb; 1334 if (test & (test - 1)) { 1335 return false; 1336 } 1337 1338 *me = clz32(lsb); 1339 *mb = test ? clz32(test & -test) + 1 : 0; 1340 return true; 1341} 1342 1343static bool mask64_operand(uint64_t c, int *mb, int *me) 1344{ 1345 uint64_t lsb; 1346 1347 if (c == 0) { 1348 return false; 1349 } 1350 1351 lsb = c & -c; 1352 /* Accept 1..10..0. */ 1353 if (c == -lsb) { 1354 *mb = 0; 1355 *me = clz64(lsb); 1356 return true; 1357 } 1358 /* Accept 0..01..1. */ 1359 if (lsb == 1 && (c & (c + 1)) == 0) { 1360 *mb = clz64(c + 1) + 1; 1361 *me = 63; 1362 return true; 1363 } 1364 return false; 1365} 1366 1367static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1368{ 1369 int mb, me; 1370 1371 if (mask_operand(c, &mb, &me)) { 1372 tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); 1373 } else if ((c & 0xffff) == c) { 1374 tcg_out32(s, ANDI | SAI(src, dst, c)); 1375 return; 1376 } else if ((c & 0xffff0000) == c) { 1377 tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); 1378 return; 1379 } else { 1380 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c); 1381 tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); 1382 } 1383} 1384 1385static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c) 1386{ 1387 int mb, me; 1388 1389 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1390 if (mask64_operand(c, &mb, &me)) { 1391 if (mb == 0) { 1392 tcg_out_rld(s, RLDICR, dst, src, 0, me); 1393 } else { 1394 tcg_out_rld(s, RLDICL, dst, src, 0, mb); 1395 } 1396 } else if ((c & 0xffff) == c) { 1397 tcg_out32(s, ANDI | SAI(src, dst, c)); 1398 return; 1399 } else if ((c & 0xffff0000) == c) { 1400 tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); 1401 return; 1402 } else { 1403 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c); 1404 tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); 1405 } 1406} 1407 1408static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c, 1409 int op_lo, int op_hi) 1410{ 1411 if (c >> 16) { 1412 tcg_out32(s, op_hi | SAI(src, dst, c >> 16)); 1413 src = dst; 1414 } 1415 if (c & 0xffff) { 1416 tcg_out32(s, op_lo | SAI(src, dst, c)); 1417 src = dst; 1418 } 1419} 1420 1421static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1422{ 1423 tcg_out_zori32(s, dst, src, c, ORI, ORIS); 1424} 1425 1426static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1427{ 1428 tcg_out_zori32(s, dst, src, c, XORI, XORIS); 1429} 1430 1431static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target) 1432{ 1433 ptrdiff_t disp = tcg_pcrel_diff(s, target); 1434 if (in_range_b(disp)) { 1435 tcg_out32(s, B | (disp & 0x3fffffc) | mask); 1436 } else { 1437 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target); 1438 tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR); 1439 tcg_out32(s, BCCTR | BO_ALWAYS | mask); 1440 } 1441} 1442 1443static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, 1444 TCGReg base, tcg_target_long offset) 1445{ 1446 tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; 1447 bool is_int_store = false; 1448 TCGReg rs = TCG_REG_TMP1; 1449 1450 switch (opi) { 1451 case LD: case LWA: 1452 align = 3; 1453 /* FALLTHRU */ 1454 default: 1455 if (rt > TCG_REG_R0 && rt < TCG_REG_V0) { 1456 rs = rt; 1457 break; 1458 } 1459 break; 1460 case LXSD: 1461 case STXSD: 1462 align = 3; 1463 break; 1464 case LXV: 1465 case STXV: 1466 align = 15; 1467 break; 1468 case STD: 1469 align = 3; 1470 /* FALLTHRU */ 1471 case STB: case STH: case STW: 1472 is_int_store = true; 1473 break; 1474 } 1475 1476 /* For unaligned or large offsets, use the prefixed form. */ 1477 if (have_isa_3_10 1478 && (offset != (int16_t)offset || (offset & align)) 1479 && offset == sextract64(offset, 0, 34)) { 1480 /* 1481 * Note that the MLS:D insns retain their un-prefixed opcode, 1482 * while the 8LS:D insns use a different opcode space. 1483 */ 1484 switch (opi) { 1485 case LBZ: 1486 case LHZ: 1487 case LHA: 1488 case LWZ: 1489 case STB: 1490 case STH: 1491 case STW: 1492 case ADDI: 1493 tcg_out_mls_d(s, opi, rt, base, offset, 0); 1494 return; 1495 case LWA: 1496 tcg_out_8ls_d(s, PLWA, rt, base, offset, 0); 1497 return; 1498 case LD: 1499 tcg_out_8ls_d(s, PLD, rt, base, offset, 0); 1500 return; 1501 case STD: 1502 tcg_out_8ls_d(s, PSTD, rt, base, offset, 0); 1503 return; 1504 case LXSD: 1505 tcg_out_8ls_d(s, PLXSD, rt & 31, base, offset, 0); 1506 return; 1507 case STXSD: 1508 tcg_out_8ls_d(s, PSTXSD, rt & 31, base, offset, 0); 1509 return; 1510 case LXV: 1511 tcg_out_8ls_d(s, PLXV, rt & 31, base, offset, 0); 1512 return; 1513 case STXV: 1514 tcg_out_8ls_d(s, PSTXV, rt & 31, base, offset, 0); 1515 return; 1516 } 1517 } 1518 1519 /* For unaligned, or very large offsets, use the indexed form. */ 1520 if (offset & align || offset != (int32_t)offset || opi == 0) { 1521 if (rs == base) { 1522 rs = TCG_REG_R0; 1523 } 1524 tcg_debug_assert(!is_int_store || rs != rt); 1525 tcg_out_movi(s, TCG_TYPE_PTR, rs, orig); 1526 tcg_out32(s, opx | TAB(rt & 31, base, rs)); 1527 return; 1528 } 1529 1530 l0 = (int16_t)offset; 1531 offset = (offset - l0) >> 16; 1532 l1 = (int16_t)offset; 1533 1534 if (l1 < 0 && orig >= 0) { 1535 extra = 0x4000; 1536 l1 = (int16_t)(offset - 0x4000); 1537 } 1538 if (l1) { 1539 tcg_out32(s, ADDIS | TAI(rs, base, l1)); 1540 base = rs; 1541 } 1542 if (extra) { 1543 tcg_out32(s, ADDIS | TAI(rs, base, extra)); 1544 base = rs; 1545 } 1546 if (opi != ADDI || base != rt || l0 != 0) { 1547 tcg_out32(s, opi | TAI(rt & 31, base, l0)); 1548 } 1549} 1550 1551static void tcg_out_vsldoi(TCGContext *s, TCGReg ret, 1552 TCGReg va, TCGReg vb, int shb) 1553{ 1554 tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6)); 1555} 1556 1557static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1558 TCGReg base, intptr_t offset) 1559{ 1560 int shift; 1561 1562 switch (type) { 1563 case TCG_TYPE_I32: 1564 if (ret < TCG_REG_V0) { 1565 tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset); 1566 break; 1567 } 1568 if (have_isa_2_07 && have_vsx) { 1569 tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset); 1570 break; 1571 } 1572 tcg_debug_assert((offset & 3) == 0); 1573 tcg_out_mem_long(s, 0, LVEWX, ret, base, offset); 1574 shift = (offset - 4) & 0xc; 1575 if (shift) { 1576 tcg_out_vsldoi(s, ret, ret, ret, shift); 1577 } 1578 break; 1579 case TCG_TYPE_I64: 1580 if (ret < TCG_REG_V0) { 1581 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1582 tcg_out_mem_long(s, LD, LDX, ret, base, offset); 1583 break; 1584 } 1585 /* fallthru */ 1586 case TCG_TYPE_V64: 1587 tcg_debug_assert(ret >= TCG_REG_V0); 1588 if (have_vsx) { 1589 tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX, 1590 ret, base, offset); 1591 break; 1592 } 1593 tcg_debug_assert((offset & 7) == 0); 1594 tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16); 1595 if (offset & 8) { 1596 tcg_out_vsldoi(s, ret, ret, ret, 8); 1597 } 1598 break; 1599 case TCG_TYPE_V128: 1600 tcg_debug_assert(ret >= TCG_REG_V0); 1601 tcg_debug_assert((offset & 15) == 0); 1602 tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0, 1603 LVX, ret, base, offset); 1604 break; 1605 default: 1606 g_assert_not_reached(); 1607 } 1608} 1609 1610static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, 1611 TCGReg base, intptr_t offset) 1612{ 1613 int shift; 1614 1615 switch (type) { 1616 case TCG_TYPE_I32: 1617 if (arg < TCG_REG_V0) { 1618 tcg_out_mem_long(s, STW, STWX, arg, base, offset); 1619 break; 1620 } 1621 if (have_isa_2_07 && have_vsx) { 1622 tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset); 1623 break; 1624 } 1625 assert((offset & 3) == 0); 1626 tcg_debug_assert((offset & 3) == 0); 1627 shift = (offset - 4) & 0xc; 1628 if (shift) { 1629 tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift); 1630 arg = TCG_VEC_TMP1; 1631 } 1632 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); 1633 break; 1634 case TCG_TYPE_I64: 1635 if (arg < TCG_REG_V0) { 1636 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1637 tcg_out_mem_long(s, STD, STDX, arg, base, offset); 1638 break; 1639 } 1640 /* fallthru */ 1641 case TCG_TYPE_V64: 1642 tcg_debug_assert(arg >= TCG_REG_V0); 1643 if (have_vsx) { 1644 tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0, 1645 STXSDX, arg, base, offset); 1646 break; 1647 } 1648 tcg_debug_assert((offset & 7) == 0); 1649 if (offset & 8) { 1650 tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8); 1651 arg = TCG_VEC_TMP1; 1652 } 1653 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); 1654 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4); 1655 break; 1656 case TCG_TYPE_V128: 1657 tcg_debug_assert(arg >= TCG_REG_V0); 1658 tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0, 1659 STVX, arg, base, offset); 1660 break; 1661 default: 1662 g_assert_not_reached(); 1663 } 1664} 1665 1666static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1667 TCGReg base, intptr_t ofs) 1668{ 1669 return false; 1670} 1671 1672static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, 1673 int const_arg2, int cr, TCGType type) 1674{ 1675 int imm; 1676 uint32_t op; 1677 1678 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1679 1680 /* Simplify the comparisons below wrt CMPI. */ 1681 if (type == TCG_TYPE_I32) { 1682 arg2 = (int32_t)arg2; 1683 } 1684 1685 switch (cond) { 1686 case TCG_COND_EQ: 1687 case TCG_COND_NE: 1688 if (const_arg2) { 1689 if ((int16_t) arg2 == arg2) { 1690 op = CMPI; 1691 imm = 1; 1692 break; 1693 } else if ((uint16_t) arg2 == arg2) { 1694 op = CMPLI; 1695 imm = 1; 1696 break; 1697 } 1698 } 1699 op = CMPL; 1700 imm = 0; 1701 break; 1702 1703 case TCG_COND_LT: 1704 case TCG_COND_GE: 1705 case TCG_COND_LE: 1706 case TCG_COND_GT: 1707 if (const_arg2) { 1708 if ((int16_t) arg2 == arg2) { 1709 op = CMPI; 1710 imm = 1; 1711 break; 1712 } 1713 } 1714 op = CMP; 1715 imm = 0; 1716 break; 1717 1718 case TCG_COND_LTU: 1719 case TCG_COND_GEU: 1720 case TCG_COND_LEU: 1721 case TCG_COND_GTU: 1722 if (const_arg2) { 1723 if ((uint16_t) arg2 == arg2) { 1724 op = CMPLI; 1725 imm = 1; 1726 break; 1727 } 1728 } 1729 op = CMPL; 1730 imm = 0; 1731 break; 1732 1733 default: 1734 g_assert_not_reached(); 1735 } 1736 op |= BF(cr) | ((type == TCG_TYPE_I64) << 21); 1737 1738 if (imm) { 1739 tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff)); 1740 } else { 1741 if (const_arg2) { 1742 tcg_out_movi(s, type, TCG_REG_R0, arg2); 1743 arg2 = TCG_REG_R0; 1744 } 1745 tcg_out32(s, op | RA(arg1) | RB(arg2)); 1746 } 1747} 1748 1749static void tcg_out_setcond_eq0(TCGContext *s, TCGType type, 1750 TCGReg dst, TCGReg src, bool neg) 1751{ 1752 if (neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) { 1753 /* 1754 * X != 0 implies X + -1 generates a carry. 1755 * RT = (~X + X) + CA 1756 * = -1 + CA 1757 * = CA ? 0 : -1 1758 */ 1759 tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); 1760 tcg_out32(s, SUBFE | TAB(dst, src, src)); 1761 return; 1762 } 1763 1764 if (type == TCG_TYPE_I32) { 1765 tcg_out32(s, CNTLZW | RS(src) | RA(dst)); 1766 tcg_out_shri32(s, dst, dst, 5); 1767 } else { 1768 tcg_out32(s, CNTLZD | RS(src) | RA(dst)); 1769 tcg_out_shri64(s, dst, dst, 6); 1770 } 1771 if (neg) { 1772 tcg_out32(s, NEG | RT(dst) | RA(dst)); 1773 } 1774} 1775 1776static void tcg_out_setcond_ne0(TCGContext *s, TCGType type, 1777 TCGReg dst, TCGReg src, bool neg) 1778{ 1779 if (!neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) { 1780 /* 1781 * X != 0 implies X + -1 generates a carry. Extra addition 1782 * trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. 1783 */ 1784 tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); 1785 tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src)); 1786 return; 1787 } 1788 tcg_out_setcond_eq0(s, type, dst, src, false); 1789 if (neg) { 1790 tcg_out32(s, ADDI | TAI(dst, dst, -1)); 1791 } else { 1792 tcg_out_xori32(s, dst, dst, 1); 1793 } 1794} 1795 1796static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2, 1797 bool const_arg2) 1798{ 1799 if (const_arg2) { 1800 if ((uint32_t)arg2 == arg2) { 1801 tcg_out_xori32(s, TCG_REG_R0, arg1, arg2); 1802 } else { 1803 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2); 1804 tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0)); 1805 } 1806 } else { 1807 tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2)); 1808 } 1809 return TCG_REG_R0; 1810} 1811 1812static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, 1813 TCGArg arg0, TCGArg arg1, TCGArg arg2, 1814 int const_arg2, bool neg) 1815{ 1816 int sh; 1817 bool inv; 1818 1819 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1820 1821 /* Ignore high bits of a potential constant arg2. */ 1822 if (type == TCG_TYPE_I32) { 1823 arg2 = (uint32_t)arg2; 1824 } 1825 1826 /* With SETBC/SETBCR, we can always implement with 2 insns. */ 1827 if (have_isa_3_10) { 1828 tcg_insn_unit bi, opc; 1829 1830 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); 1831 1832 /* Re-use tcg_to_bc for BI and BO_COND_{TRUE,FALSE}. */ 1833 bi = tcg_to_bc[cond] & (0x1f << 16); 1834 if (tcg_to_bc[cond] & BO(8)) { 1835 opc = neg ? SETNBC : SETBC; 1836 } else { 1837 opc = neg ? SETNBCR : SETBCR; 1838 } 1839 tcg_out32(s, opc | RT(arg0) | bi); 1840 return; 1841 } 1842 1843 /* Handle common and trivial cases before handling anything else. */ 1844 if (arg2 == 0) { 1845 switch (cond) { 1846 case TCG_COND_EQ: 1847 tcg_out_setcond_eq0(s, type, arg0, arg1, neg); 1848 return; 1849 case TCG_COND_NE: 1850 tcg_out_setcond_ne0(s, type, arg0, arg1, neg); 1851 return; 1852 case TCG_COND_GE: 1853 tcg_out32(s, NOR | SAB(arg1, arg0, arg1)); 1854 arg1 = arg0; 1855 /* FALLTHRU */ 1856 case TCG_COND_LT: 1857 /* Extract the sign bit. */ 1858 if (type == TCG_TYPE_I32) { 1859 if (neg) { 1860 tcg_out_sari32(s, arg0, arg1, 31); 1861 } else { 1862 tcg_out_shri32(s, arg0, arg1, 31); 1863 } 1864 } else { 1865 if (neg) { 1866 tcg_out_sari64(s, arg0, arg1, 63); 1867 } else { 1868 tcg_out_shri64(s, arg0, arg1, 63); 1869 } 1870 } 1871 return; 1872 default: 1873 break; 1874 } 1875 } 1876 1877 /* If we have ISEL, we can implement everything with 3 or 4 insns. 1878 All other cases below are also at least 3 insns, so speed up the 1879 code generator by not considering them and always using ISEL. */ 1880 if (have_isel) { 1881 int isel, tab; 1882 1883 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); 1884 1885 isel = tcg_to_isel[cond]; 1886 1887 tcg_out_movi(s, type, arg0, neg ? -1 : 1); 1888 if (isel & 1) { 1889 /* arg0 = (bc ? 0 : 1) */ 1890 tab = TAB(arg0, 0, arg0); 1891 isel &= ~1; 1892 } else { 1893 /* arg0 = (bc ? 1 : 0) */ 1894 tcg_out_movi(s, type, TCG_REG_R0, 0); 1895 tab = TAB(arg0, arg0, TCG_REG_R0); 1896 } 1897 tcg_out32(s, isel | tab); 1898 return; 1899 } 1900 1901 inv = false; 1902 switch (cond) { 1903 case TCG_COND_EQ: 1904 arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); 1905 tcg_out_setcond_eq0(s, type, arg0, arg1, neg); 1906 break; 1907 1908 case TCG_COND_NE: 1909 arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); 1910 tcg_out_setcond_ne0(s, type, arg0, arg1, neg); 1911 break; 1912 1913 case TCG_COND_LE: 1914 case TCG_COND_LEU: 1915 inv = true; 1916 /* fall through */ 1917 case TCG_COND_GT: 1918 case TCG_COND_GTU: 1919 sh = 30; /* CR7 CR_GT */ 1920 goto crtest; 1921 1922 case TCG_COND_GE: 1923 case TCG_COND_GEU: 1924 inv = true; 1925 /* fall through */ 1926 case TCG_COND_LT: 1927 case TCG_COND_LTU: 1928 sh = 29; /* CR7 CR_LT */ 1929 goto crtest; 1930 1931 crtest: 1932 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); 1933 tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); 1934 tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31); 1935 if (neg && inv) { 1936 tcg_out32(s, ADDI | TAI(arg0, arg0, -1)); 1937 } else if (neg) { 1938 tcg_out32(s, NEG | RT(arg0) | RA(arg0)); 1939 } else if (inv) { 1940 tcg_out_xori32(s, arg0, arg0, 1); 1941 } 1942 break; 1943 1944 default: 1945 g_assert_not_reached(); 1946 } 1947} 1948 1949static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l) 1950{ 1951 if (l->has_value) { 1952 bc |= reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr); 1953 } else { 1954 tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0); 1955 } 1956 tcg_out32(s, bc); 1957} 1958 1959static void tcg_out_brcond(TCGContext *s, TCGCond cond, 1960 TCGArg arg1, TCGArg arg2, int const_arg2, 1961 TCGLabel *l, TCGType type) 1962{ 1963 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); 1964 tcg_out_bc(s, tcg_to_bc[cond], l); 1965} 1966 1967static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, 1968 TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1, 1969 TCGArg v2, bool const_c2) 1970{ 1971 /* If for some reason both inputs are zero, don't produce bad code. */ 1972 if (v1 == 0 && v2 == 0) { 1973 tcg_out_movi(s, type, dest, 0); 1974 return; 1975 } 1976 1977 tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type); 1978 1979 if (have_isel) { 1980 int isel = tcg_to_isel[cond]; 1981 1982 /* Swap the V operands if the operation indicates inversion. */ 1983 if (isel & 1) { 1984 int t = v1; 1985 v1 = v2; 1986 v2 = t; 1987 isel &= ~1; 1988 } 1989 /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand. */ 1990 if (v2 == 0) { 1991 tcg_out_movi(s, type, TCG_REG_R0, 0); 1992 } 1993 tcg_out32(s, isel | TAB(dest, v1, v2)); 1994 } else { 1995 if (dest == v2) { 1996 cond = tcg_invert_cond(cond); 1997 v2 = v1; 1998 } else if (dest != v1) { 1999 if (v1 == 0) { 2000 tcg_out_movi(s, type, dest, 0); 2001 } else { 2002 tcg_out_mov(s, type, dest, v1); 2003 } 2004 } 2005 /* Branch forward over one insn */ 2006 tcg_out32(s, tcg_to_bc[cond] | 8); 2007 if (v2 == 0) { 2008 tcg_out_movi(s, type, dest, 0); 2009 } else { 2010 tcg_out_mov(s, type, dest, v2); 2011 } 2012 } 2013} 2014 2015static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc, 2016 TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2) 2017{ 2018 if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) { 2019 tcg_out32(s, opc | RA(a0) | RS(a1)); 2020 } else { 2021 tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type); 2022 /* Note that the only other valid constant for a2 is 0. */ 2023 if (have_isel) { 2024 tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1)); 2025 tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0)); 2026 } else if (!const_a2 && a0 == a2) { 2027 tcg_out32(s, tcg_to_bc[TCG_COND_EQ] | 8); 2028 tcg_out32(s, opc | RA(a0) | RS(a1)); 2029 } else { 2030 tcg_out32(s, opc | RA(a0) | RS(a1)); 2031 tcg_out32(s, tcg_to_bc[TCG_COND_NE] | 8); 2032 if (const_a2) { 2033 tcg_out_movi(s, type, a0, 0); 2034 } else { 2035 tcg_out_mov(s, type, a0, a2); 2036 } 2037 } 2038 } 2039} 2040 2041static void tcg_out_cmp2(TCGContext *s, const TCGArg *args, 2042 const int *const_args) 2043{ 2044 static const struct { uint8_t bit1, bit2; } bits[] = { 2045 [TCG_COND_LT ] = { CR_LT, CR_LT }, 2046 [TCG_COND_LE ] = { CR_LT, CR_GT }, 2047 [TCG_COND_GT ] = { CR_GT, CR_GT }, 2048 [TCG_COND_GE ] = { CR_GT, CR_LT }, 2049 [TCG_COND_LTU] = { CR_LT, CR_LT }, 2050 [TCG_COND_LEU] = { CR_LT, CR_GT }, 2051 [TCG_COND_GTU] = { CR_GT, CR_GT }, 2052 [TCG_COND_GEU] = { CR_GT, CR_LT }, 2053 }; 2054 2055 TCGCond cond = args[4], cond2; 2056 TCGArg al, ah, bl, bh; 2057 int blconst, bhconst; 2058 int op, bit1, bit2; 2059 2060 al = args[0]; 2061 ah = args[1]; 2062 bl = args[2]; 2063 bh = args[3]; 2064 blconst = const_args[2]; 2065 bhconst = const_args[3]; 2066 2067 switch (cond) { 2068 case TCG_COND_EQ: 2069 op = CRAND; 2070 goto do_equality; 2071 case TCG_COND_NE: 2072 op = CRNAND; 2073 do_equality: 2074 tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32); 2075 tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32); 2076 tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); 2077 break; 2078 2079 case TCG_COND_LT: 2080 case TCG_COND_LE: 2081 case TCG_COND_GT: 2082 case TCG_COND_GE: 2083 case TCG_COND_LTU: 2084 case TCG_COND_LEU: 2085 case TCG_COND_GTU: 2086 case TCG_COND_GEU: 2087 bit1 = bits[cond].bit1; 2088 bit2 = bits[cond].bit2; 2089 op = (bit1 != bit2 ? CRANDC : CRAND); 2090 cond2 = tcg_unsigned_cond(cond); 2091 2092 tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32); 2093 tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32); 2094 tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2)); 2095 tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ)); 2096 break; 2097 2098 default: 2099 g_assert_not_reached(); 2100 } 2101} 2102 2103static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, 2104 const int *const_args) 2105{ 2106 tcg_out_cmp2(s, args + 1, const_args + 1); 2107 tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); 2108 tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31); 2109} 2110 2111static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args, 2112 const int *const_args) 2113{ 2114 tcg_out_cmp2(s, args, const_args); 2115 tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5])); 2116} 2117 2118static void tcg_out_mb(TCGContext *s, TCGArg a0) 2119{ 2120 uint32_t insn; 2121 2122 if (a0 & TCG_MO_ST_LD) { 2123 insn = HWSYNC; 2124 } else { 2125 insn = LWSYNC; 2126 } 2127 2128 tcg_out32(s, insn); 2129} 2130 2131static void tcg_out_call_int(TCGContext *s, int lk, 2132 const tcg_insn_unit *target) 2133{ 2134#ifdef _CALL_AIX 2135 /* Look through the descriptor. If the branch is in range, and we 2136 don't have to spend too much effort on building the toc. */ 2137 const void *tgt = ((const void * const *)target)[0]; 2138 uintptr_t toc = ((const uintptr_t *)target)[1]; 2139 intptr_t diff = tcg_pcrel_diff(s, tgt); 2140 2141 if (in_range_b(diff) && toc == (uint32_t)toc) { 2142 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc); 2143 tcg_out_b(s, lk, tgt); 2144 } else { 2145 /* Fold the low bits of the constant into the addresses below. */ 2146 intptr_t arg = (intptr_t)target; 2147 int ofs = (int16_t)arg; 2148 2149 if (ofs + 8 < 0x8000) { 2150 arg -= ofs; 2151 } else { 2152 ofs = 0; 2153 } 2154 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg); 2155 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs); 2156 tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR); 2157 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP); 2158 tcg_out32(s, BCCTR | BO_ALWAYS | lk); 2159 } 2160#elif defined(_CALL_ELF) && _CALL_ELF == 2 2161 intptr_t diff; 2162 2163 /* In the ELFv2 ABI, we have to set up r12 to contain the destination 2164 address, which the callee uses to compute its TOC address. */ 2165 /* FIXME: when the branch is in range, we could avoid r12 load if we 2166 knew that the destination uses the same TOC, and what its local 2167 entry point offset is. */ 2168 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target); 2169 2170 diff = tcg_pcrel_diff(s, target); 2171 if (in_range_b(diff)) { 2172 tcg_out_b(s, lk, target); 2173 } else { 2174 tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR); 2175 tcg_out32(s, BCCTR | BO_ALWAYS | lk); 2176 } 2177#else 2178 tcg_out_b(s, lk, target); 2179#endif 2180} 2181 2182static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 2183 const TCGHelperInfo *info) 2184{ 2185 tcg_out_call_int(s, LK, target); 2186} 2187 2188static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = { 2189 [MO_UB] = LBZX, 2190 [MO_UW] = LHZX, 2191 [MO_UL] = LWZX, 2192 [MO_UQ] = LDX, 2193 [MO_SW] = LHAX, 2194 [MO_SL] = LWAX, 2195 [MO_BSWAP | MO_UB] = LBZX, 2196 [MO_BSWAP | MO_UW] = LHBRX, 2197 [MO_BSWAP | MO_UL] = LWBRX, 2198 [MO_BSWAP | MO_UQ] = LDBRX, 2199}; 2200 2201static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = { 2202 [MO_UB] = STBX, 2203 [MO_UW] = STHX, 2204 [MO_UL] = STWX, 2205 [MO_UQ] = STDX, 2206 [MO_BSWAP | MO_UB] = STBX, 2207 [MO_BSWAP | MO_UW] = STHBRX, 2208 [MO_BSWAP | MO_UL] = STWBRX, 2209 [MO_BSWAP | MO_UQ] = STDBRX, 2210}; 2211 2212static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg) 2213{ 2214 if (arg < 0) { 2215 arg = TCG_REG_TMP1; 2216 } 2217 tcg_out32(s, MFSPR | RT(arg) | LR); 2218 return arg; 2219} 2220 2221/* 2222 * For the purposes of ppc32 sorting 4 input registers into 4 argument 2223 * registers, there is an outside chance we would require 3 temps. 2224 */ 2225static const TCGLdstHelperParam ldst_helper_param = { 2226 .ra_gen = ldst_ra_gen, 2227 .ntmp = 3, 2228 .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 } 2229}; 2230 2231static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 2232{ 2233 MemOp opc = get_memop(lb->oi); 2234 2235 if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 2236 return false; 2237 } 2238 2239 tcg_out_ld_helper_args(s, lb, &ldst_helper_param); 2240 tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]); 2241 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); 2242 2243 tcg_out_b(s, 0, lb->raddr); 2244 return true; 2245} 2246 2247static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 2248{ 2249 MemOp opc = get_memop(lb->oi); 2250 2251 if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 2252 return false; 2253 } 2254 2255 tcg_out_st_helper_args(s, lb, &ldst_helper_param); 2256 tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]); 2257 2258 tcg_out_b(s, 0, lb->raddr); 2259 return true; 2260} 2261 2262typedef struct { 2263 TCGReg base; 2264 TCGReg index; 2265 TCGAtomAlign aa; 2266} HostAddress; 2267 2268bool tcg_target_has_memory_bswap(MemOp memop) 2269{ 2270 TCGAtomAlign aa; 2271 2272 if ((memop & MO_SIZE) <= MO_64) { 2273 return true; 2274 } 2275 2276 /* 2277 * Reject 16-byte memop with 16-byte atomicity, 2278 * but do allow a pair of 64-bit operations. 2279 */ 2280 aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true); 2281 return aa.atom <= MO_64; 2282} 2283 2284/* We expect to use a 16-bit negative offset from ENV. */ 2285#define MIN_TLB_MASK_TABLE_OFS -32768 2286 2287/* 2288 * For system-mode, perform the TLB load and compare. 2289 * For user-mode, perform any required alignment tests. 2290 * In both cases, return a TCGLabelQemuLdst structure if the slow path 2291 * is required and fill in @h with the host address for the fast path. 2292 */ 2293static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, 2294 TCGReg addrlo, TCGReg addrhi, 2295 MemOpIdx oi, bool is_ld) 2296{ 2297 TCGType addr_type = s->addr_type; 2298 TCGLabelQemuLdst *ldst = NULL; 2299 MemOp opc = get_memop(oi); 2300 MemOp a_bits, s_bits; 2301 2302 /* 2303 * Book II, Section 1.4, Single-Copy Atomicity, specifies: 2304 * 2305 * Before 3.0, "An access that is not atomic is performed as a set of 2306 * smaller disjoint atomic accesses. In general, the number and alignment 2307 * of these accesses are implementation-dependent." Thus MO_ATOM_IFALIGN. 2308 * 2309 * As of 3.0, "the non-atomic access is performed as described in 2310 * the corresponding list", which matches MO_ATOM_SUBALIGN. 2311 */ 2312 s_bits = opc & MO_SIZE; 2313 h->aa = atom_and_align_for_opc(s, opc, 2314 have_isa_3_00 ? MO_ATOM_SUBALIGN 2315 : MO_ATOM_IFALIGN, 2316 s_bits == MO_128); 2317 a_bits = h->aa.align; 2318 2319 if (tcg_use_softmmu) { 2320 int mem_index = get_mmuidx(oi); 2321 int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read) 2322 : offsetof(CPUTLBEntry, addr_write); 2323 int fast_off = tlb_mask_table_ofs(s, mem_index); 2324 int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); 2325 int table_off = fast_off + offsetof(CPUTLBDescFast, table); 2326 2327 ldst = new_ldst_label(s); 2328 ldst->is_ld = is_ld; 2329 ldst->oi = oi; 2330 ldst->addrlo_reg = addrlo; 2331 ldst->addrhi_reg = addrhi; 2332 2333 /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ 2334 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off); 2335 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off); 2336 2337 /* Extract the page index, shifted into place for tlb index. */ 2338 if (TCG_TARGET_REG_BITS == 32) { 2339 tcg_out_shri32(s, TCG_REG_R0, addrlo, 2340 s->page_bits - CPU_TLB_ENTRY_BITS); 2341 } else { 2342 tcg_out_shri64(s, TCG_REG_R0, addrlo, 2343 s->page_bits - CPU_TLB_ENTRY_BITS); 2344 } 2345 tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0)); 2346 2347 /* 2348 * Load the (low part) TLB comparator into TMP2. 2349 * For 64-bit host, always load the entire 64-bit slot for simplicity. 2350 * We will ignore the high bits with tcg_out_cmp(..., addr_type). 2351 */ 2352 if (TCG_TARGET_REG_BITS == 64) { 2353 if (cmp_off == 0) { 2354 tcg_out32(s, LDUX | TAB(TCG_REG_TMP2, 2355 TCG_REG_TMP1, TCG_REG_TMP2)); 2356 } else { 2357 tcg_out32(s, ADD | TAB(TCG_REG_TMP1, 2358 TCG_REG_TMP1, TCG_REG_TMP2)); 2359 tcg_out_ld(s, TCG_TYPE_I64, TCG_REG_TMP2, 2360 TCG_REG_TMP1, cmp_off); 2361 } 2362 } else if (cmp_off == 0 && !HOST_BIG_ENDIAN) { 2363 tcg_out32(s, LWZUX | TAB(TCG_REG_TMP2, 2364 TCG_REG_TMP1, TCG_REG_TMP2)); 2365 } else { 2366 tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2)); 2367 tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1, 2368 cmp_off + 4 * HOST_BIG_ENDIAN); 2369 } 2370 2371 /* 2372 * Load the TLB addend for use on the fast path. 2373 * Do this asap to minimize any load use delay. 2374 */ 2375 if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) { 2376 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, 2377 offsetof(CPUTLBEntry, addend)); 2378 } 2379 2380 /* Clear the non-page, non-alignment bits from the address in R0. */ 2381 if (TCG_TARGET_REG_BITS == 32) { 2382 /* 2383 * We don't support unaligned accesses on 32-bits. 2384 * Preserve the bottom bits and thus trigger a comparison 2385 * failure on unaligned accesses. 2386 */ 2387 if (a_bits < s_bits) { 2388 a_bits = s_bits; 2389 } 2390 tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0, 2391 (32 - a_bits) & 31, 31 - s->page_bits); 2392 } else { 2393 TCGReg t = addrlo; 2394 2395 /* 2396 * If the access is unaligned, we need to make sure we fail if we 2397 * cross a page boundary. The trick is to add the access size-1 2398 * to the address before masking the low bits. That will make the 2399 * address overflow to the next page if we cross a page boundary, 2400 * which will then force a mismatch of the TLB compare. 2401 */ 2402 if (a_bits < s_bits) { 2403 unsigned a_mask = (1 << a_bits) - 1; 2404 unsigned s_mask = (1 << s_bits) - 1; 2405 tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask)); 2406 t = TCG_REG_R0; 2407 } 2408 2409 /* Mask the address for the requested alignment. */ 2410 if (addr_type == TCG_TYPE_I32) { 2411 tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0, 2412 (32 - a_bits) & 31, 31 - s->page_bits); 2413 } else if (a_bits == 0) { 2414 tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits); 2415 } else { 2416 tcg_out_rld(s, RLDICL, TCG_REG_R0, t, 2417 64 - s->page_bits, s->page_bits - a_bits); 2418 tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0); 2419 } 2420 } 2421 2422 if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) { 2423 /* Low part comparison into cr7. */ 2424 tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 2425 0, 7, TCG_TYPE_I32); 2426 2427 /* Load the high part TLB comparator into TMP2. */ 2428 tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1, 2429 cmp_off + 4 * !HOST_BIG_ENDIAN); 2430 2431 /* Load addend, deferred for this case. */ 2432 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, 2433 offsetof(CPUTLBEntry, addend)); 2434 2435 /* High part comparison into cr6. */ 2436 tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2, 2437 0, 6, TCG_TYPE_I32); 2438 2439 /* Combine comparisons into cr7. */ 2440 tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); 2441 } else { 2442 /* Full comparison into cr7. */ 2443 tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 2444 0, 7, addr_type); 2445 } 2446 2447 /* Load a pointer into the current opcode w/conditional branch-link. */ 2448 ldst->label_ptr[0] = s->code_ptr; 2449 tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); 2450 2451 h->base = TCG_REG_TMP1; 2452 } else { 2453 if (a_bits) { 2454 ldst = new_ldst_label(s); 2455 ldst->is_ld = is_ld; 2456 ldst->oi = oi; 2457 ldst->addrlo_reg = addrlo; 2458 ldst->addrhi_reg = addrhi; 2459 2460 /* We are expecting a_bits to max out at 7, much lower than ANDI. */ 2461 tcg_debug_assert(a_bits < 16); 2462 tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1)); 2463 2464 ldst->label_ptr[0] = s->code_ptr; 2465 tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK); 2466 } 2467 2468 h->base = guest_base ? TCG_GUEST_BASE_REG : 0; 2469 } 2470 2471 if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) { 2472 /* Zero-extend the guest address for use in the host address. */ 2473 tcg_out_ext32u(s, TCG_REG_R0, addrlo); 2474 h->index = TCG_REG_R0; 2475 } else { 2476 h->index = addrlo; 2477 } 2478 2479 return ldst; 2480} 2481 2482static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, 2483 TCGReg addrlo, TCGReg addrhi, 2484 MemOpIdx oi, TCGType data_type) 2485{ 2486 MemOp opc = get_memop(oi); 2487 TCGLabelQemuLdst *ldst; 2488 HostAddress h; 2489 2490 ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true); 2491 2492 if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { 2493 if (opc & MO_BSWAP) { 2494 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2495 tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index)); 2496 tcg_out32(s, LWBRX | TAB(datahi, h.base, TCG_REG_R0)); 2497 } else if (h.base != 0) { 2498 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2499 tcg_out32(s, LWZX | TAB(datahi, h.base, h.index)); 2500 tcg_out32(s, LWZX | TAB(datalo, h.base, TCG_REG_R0)); 2501 } else if (h.index == datahi) { 2502 tcg_out32(s, LWZ | TAI(datalo, h.index, 4)); 2503 tcg_out32(s, LWZ | TAI(datahi, h.index, 0)); 2504 } else { 2505 tcg_out32(s, LWZ | TAI(datahi, h.index, 0)); 2506 tcg_out32(s, LWZ | TAI(datalo, h.index, 4)); 2507 } 2508 } else { 2509 uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)]; 2510 if (!have_isa_2_06 && insn == LDBRX) { 2511 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2512 tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index)); 2513 tcg_out32(s, LWBRX | TAB(TCG_REG_R0, h.base, TCG_REG_R0)); 2514 tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0); 2515 } else if (insn) { 2516 tcg_out32(s, insn | TAB(datalo, h.base, h.index)); 2517 } else { 2518 insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)]; 2519 tcg_out32(s, insn | TAB(datalo, h.base, h.index)); 2520 tcg_out_movext(s, TCG_TYPE_REG, datalo, 2521 TCG_TYPE_REG, opc & MO_SSIZE, datalo); 2522 } 2523 } 2524 2525 if (ldst) { 2526 ldst->type = data_type; 2527 ldst->datalo_reg = datalo; 2528 ldst->datahi_reg = datahi; 2529 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2530 } 2531} 2532 2533static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, 2534 TCGReg addrlo, TCGReg addrhi, 2535 MemOpIdx oi, TCGType data_type) 2536{ 2537 MemOp opc = get_memop(oi); 2538 TCGLabelQemuLdst *ldst; 2539 HostAddress h; 2540 2541 ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false); 2542 2543 if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { 2544 if (opc & MO_BSWAP) { 2545 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2546 tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index)); 2547 tcg_out32(s, STWBRX | SAB(datahi, h.base, TCG_REG_R0)); 2548 } else if (h.base != 0) { 2549 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2550 tcg_out32(s, STWX | SAB(datahi, h.base, h.index)); 2551 tcg_out32(s, STWX | SAB(datalo, h.base, TCG_REG_R0)); 2552 } else { 2553 tcg_out32(s, STW | TAI(datahi, h.index, 0)); 2554 tcg_out32(s, STW | TAI(datalo, h.index, 4)); 2555 } 2556 } else { 2557 uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)]; 2558 if (!have_isa_2_06 && insn == STDBRX) { 2559 tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index)); 2560 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, h.index, 4)); 2561 tcg_out_shri64(s, TCG_REG_R0, datalo, 32); 2562 tcg_out32(s, STWBRX | SAB(TCG_REG_R0, h.base, TCG_REG_TMP1)); 2563 } else { 2564 tcg_out32(s, insn | SAB(datalo, h.base, h.index)); 2565 } 2566 } 2567 2568 if (ldst) { 2569 ldst->type = data_type; 2570 ldst->datalo_reg = datalo; 2571 ldst->datahi_reg = datahi; 2572 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2573 } 2574} 2575 2576static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, 2577 TCGReg addr_reg, MemOpIdx oi, bool is_ld) 2578{ 2579 TCGLabelQemuLdst *ldst; 2580 HostAddress h; 2581 bool need_bswap; 2582 uint32_t insn; 2583 TCGReg index; 2584 2585 ldst = prepare_host_addr(s, &h, addr_reg, -1, oi, is_ld); 2586 2587 /* Compose the final address, as LQ/STQ have no indexing. */ 2588 index = h.index; 2589 if (h.base != 0) { 2590 index = TCG_REG_TMP1; 2591 tcg_out32(s, ADD | TAB(index, h.base, h.index)); 2592 } 2593 need_bswap = get_memop(oi) & MO_BSWAP; 2594 2595 if (h.aa.atom == MO_128) { 2596 tcg_debug_assert(!need_bswap); 2597 tcg_debug_assert(datalo & 1); 2598 tcg_debug_assert(datahi == datalo - 1); 2599 tcg_debug_assert(!is_ld || datahi != index); 2600 insn = is_ld ? LQ : STQ; 2601 tcg_out32(s, insn | TAI(datahi, index, 0)); 2602 } else { 2603 TCGReg d1, d2; 2604 2605 if (HOST_BIG_ENDIAN ^ need_bswap) { 2606 d1 = datahi, d2 = datalo; 2607 } else { 2608 d1 = datalo, d2 = datahi; 2609 } 2610 2611 if (need_bswap) { 2612 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8); 2613 insn = is_ld ? LDBRX : STDBRX; 2614 tcg_out32(s, insn | TAB(d1, 0, index)); 2615 tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0)); 2616 } else { 2617 insn = is_ld ? LD : STD; 2618 tcg_out32(s, insn | TAI(d1, index, 0)); 2619 tcg_out32(s, insn | TAI(d2, index, 8)); 2620 } 2621 } 2622 2623 if (ldst) { 2624 ldst->type = TCG_TYPE_I128; 2625 ldst->datalo_reg = datalo; 2626 ldst->datahi_reg = datahi; 2627 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2628 } 2629} 2630 2631static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 2632{ 2633 int i; 2634 for (i = 0; i < count; ++i) { 2635 p[i] = NOP; 2636 } 2637} 2638 2639/* Parameters for function call generation, used in tcg.c. */ 2640#define TCG_TARGET_STACK_ALIGN 16 2641 2642#ifdef _CALL_AIX 2643# define LINK_AREA_SIZE (6 * SZR) 2644# define LR_OFFSET (1 * SZR) 2645# define TCG_TARGET_CALL_STACK_OFFSET (LINK_AREA_SIZE + 8 * SZR) 2646#elif defined(_CALL_DARWIN) 2647# define LINK_AREA_SIZE (6 * SZR) 2648# define LR_OFFSET (2 * SZR) 2649#elif TCG_TARGET_REG_BITS == 64 2650# if defined(_CALL_ELF) && _CALL_ELF == 2 2651# define LINK_AREA_SIZE (4 * SZR) 2652# define LR_OFFSET (1 * SZR) 2653# endif 2654#else /* TCG_TARGET_REG_BITS == 32 */ 2655# if defined(_CALL_SYSV) 2656# define LINK_AREA_SIZE (2 * SZR) 2657# define LR_OFFSET (1 * SZR) 2658# endif 2659#endif 2660#ifndef LR_OFFSET 2661# error "Unhandled abi" 2662#endif 2663#ifndef TCG_TARGET_CALL_STACK_OFFSET 2664# define TCG_TARGET_CALL_STACK_OFFSET LINK_AREA_SIZE 2665#endif 2666 2667#define CPU_TEMP_BUF_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) 2668#define REG_SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR) 2669 2670#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET \ 2671 + TCG_STATIC_CALL_ARGS_SIZE \ 2672 + CPU_TEMP_BUF_SIZE \ 2673 + REG_SAVE_SIZE \ 2674 + TCG_TARGET_STACK_ALIGN - 1) \ 2675 & -TCG_TARGET_STACK_ALIGN) 2676 2677#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE) 2678 2679static void tcg_target_qemu_prologue(TCGContext *s) 2680{ 2681 int i; 2682 2683#ifdef _CALL_AIX 2684 const void **desc = (const void **)s->code_ptr; 2685 desc[0] = tcg_splitwx_to_rx(desc + 2); /* entry point */ 2686 desc[1] = 0; /* environment pointer */ 2687 s->code_ptr = (void *)(desc + 2); /* skip over descriptor */ 2688#endif 2689 2690 tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE, 2691 CPU_TEMP_BUF_SIZE); 2692 2693 /* Prologue */ 2694 tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR); 2695 tcg_out32(s, (SZR == 8 ? STDU : STWU) 2696 | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE)); 2697 2698 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { 2699 tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 2700 TCG_REG_R1, REG_SAVE_BOT + i * SZR); 2701 } 2702 tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); 2703 2704 if (!tcg_use_softmmu && guest_base) { 2705 tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true); 2706 tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); 2707 } 2708 2709 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 2710 tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR); 2711 tcg_out32(s, BCCTR | BO_ALWAYS); 2712 2713 /* Epilogue */ 2714 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 2715 2716 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); 2717 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { 2718 tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 2719 TCG_REG_R1, REG_SAVE_BOT + i * SZR); 2720 } 2721 tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR); 2722 tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE)); 2723 tcg_out32(s, BCLR | BO_ALWAYS); 2724} 2725 2726static void tcg_out_tb_start(TCGContext *s) 2727{ 2728 /* Load TCG_REG_TB. */ 2729 if (USE_REG_TB) { 2730 if (have_isa_3_00) { 2731 /* lnia REG_TB */ 2732 tcg_out_addpcis(s, TCG_REG_TB, 0); 2733 } else { 2734 /* bcl 20,31,$+4 (preferred form for getting nia) */ 2735 tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK); 2736 tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR); 2737 } 2738 } 2739} 2740 2741static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) 2742{ 2743 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg); 2744 tcg_out_b(s, 0, tcg_code_gen_epilogue); 2745} 2746 2747static void tcg_out_goto_tb(TCGContext *s, int which) 2748{ 2749 uintptr_t ptr = get_jmp_target_addr(s, which); 2750 int16_t lo; 2751 2752 /* Direct branch will be patched by tb_target_set_jmp_target. */ 2753 set_jmp_insn_offset(s, which); 2754 tcg_out32(s, NOP); 2755 2756 /* When branch is out of range, fall through to indirect. */ 2757 if (USE_REG_TB) { 2758 ptrdiff_t offset = ppc_tbrel_diff(s, (void *)ptr); 2759 tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset); 2760 } else if (have_isa_3_10) { 2761 ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr); 2762 tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1); 2763 } else if (have_isa_3_00) { 2764 ptrdiff_t offset = tcg_pcrel_diff(s, (void *)ptr) - 4; 2765 lo = offset; 2766 tcg_out_addpcis(s, TCG_REG_TMP1, offset - lo); 2767 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo); 2768 } else { 2769 lo = ptr; 2770 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - lo); 2771 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo); 2772 } 2773 2774 tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); 2775 tcg_out32(s, BCCTR | BO_ALWAYS); 2776 set_jmp_reset_offset(s, which); 2777} 2778 2779void tb_target_set_jmp_target(const TranslationBlock *tb, int n, 2780 uintptr_t jmp_rx, uintptr_t jmp_rw) 2781{ 2782 uintptr_t addr = tb->jmp_target_addr[n]; 2783 intptr_t diff = addr - jmp_rx; 2784 tcg_insn_unit insn; 2785 2786 if (in_range_b(diff)) { 2787 insn = B | (diff & 0x3fffffc); 2788 } else { 2789 insn = NOP; 2790 } 2791 2792 qatomic_set((uint32_t *)jmp_rw, insn); 2793 flush_idcache_range(jmp_rx, jmp_rw, 4); 2794} 2795 2796static void tcg_out_op(TCGContext *s, TCGOpcode opc, 2797 const TCGArg args[TCG_MAX_OP_ARGS], 2798 const int const_args[TCG_MAX_OP_ARGS]) 2799{ 2800 TCGArg a0, a1, a2; 2801 2802 switch (opc) { 2803 case INDEX_op_goto_ptr: 2804 tcg_out32(s, MTSPR | RS(args[0]) | CTR); 2805 tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0)); 2806 tcg_out32(s, BCCTR | BO_ALWAYS); 2807 break; 2808 case INDEX_op_br: 2809 { 2810 TCGLabel *l = arg_label(args[0]); 2811 uint32_t insn = B; 2812 2813 if (l->has_value) { 2814 insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr), 2815 l->u.value_ptr); 2816 } else { 2817 tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0); 2818 } 2819 tcg_out32(s, insn); 2820 } 2821 break; 2822 case INDEX_op_ld8u_i32: 2823 case INDEX_op_ld8u_i64: 2824 tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); 2825 break; 2826 case INDEX_op_ld8s_i32: 2827 case INDEX_op_ld8s_i64: 2828 tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); 2829 tcg_out_ext8s(s, TCG_TYPE_REG, args[0], args[0]); 2830 break; 2831 case INDEX_op_ld16u_i32: 2832 case INDEX_op_ld16u_i64: 2833 tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]); 2834 break; 2835 case INDEX_op_ld16s_i32: 2836 case INDEX_op_ld16s_i64: 2837 tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]); 2838 break; 2839 case INDEX_op_ld_i32: 2840 case INDEX_op_ld32u_i64: 2841 tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]); 2842 break; 2843 case INDEX_op_ld32s_i64: 2844 tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]); 2845 break; 2846 case INDEX_op_ld_i64: 2847 tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]); 2848 break; 2849 case INDEX_op_st8_i32: 2850 case INDEX_op_st8_i64: 2851 tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]); 2852 break; 2853 case INDEX_op_st16_i32: 2854 case INDEX_op_st16_i64: 2855 tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]); 2856 break; 2857 case INDEX_op_st_i32: 2858 case INDEX_op_st32_i64: 2859 tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]); 2860 break; 2861 case INDEX_op_st_i64: 2862 tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); 2863 break; 2864 2865 case INDEX_op_add_i32: 2866 a0 = args[0], a1 = args[1], a2 = args[2]; 2867 if (const_args[2]) { 2868 do_addi_32: 2869 tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2); 2870 } else { 2871 tcg_out32(s, ADD | TAB(a0, a1, a2)); 2872 } 2873 break; 2874 case INDEX_op_sub_i32: 2875 a0 = args[0], a1 = args[1], a2 = args[2]; 2876 if (const_args[1]) { 2877 if (const_args[2]) { 2878 tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2); 2879 } else { 2880 tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); 2881 } 2882 } else if (const_args[2]) { 2883 a2 = -a2; 2884 goto do_addi_32; 2885 } else { 2886 tcg_out32(s, SUBF | TAB(a0, a2, a1)); 2887 } 2888 break; 2889 2890 case INDEX_op_and_i32: 2891 a0 = args[0], a1 = args[1], a2 = args[2]; 2892 if (const_args[2]) { 2893 tcg_out_andi32(s, a0, a1, a2); 2894 } else { 2895 tcg_out32(s, AND | SAB(a1, a0, a2)); 2896 } 2897 break; 2898 case INDEX_op_and_i64: 2899 a0 = args[0], a1 = args[1], a2 = args[2]; 2900 if (const_args[2]) { 2901 tcg_out_andi64(s, a0, a1, a2); 2902 } else { 2903 tcg_out32(s, AND | SAB(a1, a0, a2)); 2904 } 2905 break; 2906 case INDEX_op_or_i64: 2907 case INDEX_op_or_i32: 2908 a0 = args[0], a1 = args[1], a2 = args[2]; 2909 if (const_args[2]) { 2910 tcg_out_ori32(s, a0, a1, a2); 2911 } else { 2912 tcg_out32(s, OR | SAB(a1, a0, a2)); 2913 } 2914 break; 2915 case INDEX_op_xor_i64: 2916 case INDEX_op_xor_i32: 2917 a0 = args[0], a1 = args[1], a2 = args[2]; 2918 if (const_args[2]) { 2919 tcg_out_xori32(s, a0, a1, a2); 2920 } else { 2921 tcg_out32(s, XOR | SAB(a1, a0, a2)); 2922 } 2923 break; 2924 case INDEX_op_andc_i32: 2925 a0 = args[0], a1 = args[1], a2 = args[2]; 2926 if (const_args[2]) { 2927 tcg_out_andi32(s, a0, a1, ~a2); 2928 } else { 2929 tcg_out32(s, ANDC | SAB(a1, a0, a2)); 2930 } 2931 break; 2932 case INDEX_op_andc_i64: 2933 a0 = args[0], a1 = args[1], a2 = args[2]; 2934 if (const_args[2]) { 2935 tcg_out_andi64(s, a0, a1, ~a2); 2936 } else { 2937 tcg_out32(s, ANDC | SAB(a1, a0, a2)); 2938 } 2939 break; 2940 case INDEX_op_orc_i32: 2941 if (const_args[2]) { 2942 tcg_out_ori32(s, args[0], args[1], ~args[2]); 2943 break; 2944 } 2945 /* FALLTHRU */ 2946 case INDEX_op_orc_i64: 2947 tcg_out32(s, ORC | SAB(args[1], args[0], args[2])); 2948 break; 2949 case INDEX_op_eqv_i32: 2950 if (const_args[2]) { 2951 tcg_out_xori32(s, args[0], args[1], ~args[2]); 2952 break; 2953 } 2954 /* FALLTHRU */ 2955 case INDEX_op_eqv_i64: 2956 tcg_out32(s, EQV | SAB(args[1], args[0], args[2])); 2957 break; 2958 case INDEX_op_nand_i32: 2959 case INDEX_op_nand_i64: 2960 tcg_out32(s, NAND | SAB(args[1], args[0], args[2])); 2961 break; 2962 case INDEX_op_nor_i32: 2963 case INDEX_op_nor_i64: 2964 tcg_out32(s, NOR | SAB(args[1], args[0], args[2])); 2965 break; 2966 2967 case INDEX_op_clz_i32: 2968 tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1], 2969 args[2], const_args[2]); 2970 break; 2971 case INDEX_op_ctz_i32: 2972 tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1], 2973 args[2], const_args[2]); 2974 break; 2975 case INDEX_op_ctpop_i32: 2976 tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0)); 2977 break; 2978 2979 case INDEX_op_clz_i64: 2980 tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1], 2981 args[2], const_args[2]); 2982 break; 2983 case INDEX_op_ctz_i64: 2984 tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1], 2985 args[2], const_args[2]); 2986 break; 2987 case INDEX_op_ctpop_i64: 2988 tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0)); 2989 break; 2990 2991 case INDEX_op_mul_i32: 2992 a0 = args[0], a1 = args[1], a2 = args[2]; 2993 if (const_args[2]) { 2994 tcg_out32(s, MULLI | TAI(a0, a1, a2)); 2995 } else { 2996 tcg_out32(s, MULLW | TAB(a0, a1, a2)); 2997 } 2998 break; 2999 3000 case INDEX_op_div_i32: 3001 tcg_out32(s, DIVW | TAB(args[0], args[1], args[2])); 3002 break; 3003 3004 case INDEX_op_divu_i32: 3005 tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2])); 3006 break; 3007 3008 case INDEX_op_rem_i32: 3009 tcg_out32(s, MODSW | TAB(args[0], args[1], args[2])); 3010 break; 3011 3012 case INDEX_op_remu_i32: 3013 tcg_out32(s, MODUW | TAB(args[0], args[1], args[2])); 3014 break; 3015 3016 case INDEX_op_shl_i32: 3017 if (const_args[2]) { 3018 /* Limit immediate shift count lest we create an illegal insn. */ 3019 tcg_out_shli32(s, args[0], args[1], args[2] & 31); 3020 } else { 3021 tcg_out32(s, SLW | SAB(args[1], args[0], args[2])); 3022 } 3023 break; 3024 case INDEX_op_shr_i32: 3025 if (const_args[2]) { 3026 /* Limit immediate shift count lest we create an illegal insn. */ 3027 tcg_out_shri32(s, args[0], args[1], args[2] & 31); 3028 } else { 3029 tcg_out32(s, SRW | SAB(args[1], args[0], args[2])); 3030 } 3031 break; 3032 case INDEX_op_sar_i32: 3033 if (const_args[2]) { 3034 tcg_out_sari32(s, args[0], args[1], args[2]); 3035 } else { 3036 tcg_out32(s, SRAW | SAB(args[1], args[0], args[2])); 3037 } 3038 break; 3039 case INDEX_op_rotl_i32: 3040 if (const_args[2]) { 3041 tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31); 3042 } else { 3043 tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2]) 3044 | MB(0) | ME(31)); 3045 } 3046 break; 3047 case INDEX_op_rotr_i32: 3048 if (const_args[2]) { 3049 tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31); 3050 } else { 3051 tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32)); 3052 tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0) 3053 | MB(0) | ME(31)); 3054 } 3055 break; 3056 3057 case INDEX_op_brcond_i32: 3058 tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], 3059 arg_label(args[3]), TCG_TYPE_I32); 3060 break; 3061 case INDEX_op_brcond_i64: 3062 tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], 3063 arg_label(args[3]), TCG_TYPE_I64); 3064 break; 3065 case INDEX_op_brcond2_i32: 3066 tcg_out_brcond2(s, args, const_args); 3067 break; 3068 3069 case INDEX_op_neg_i32: 3070 case INDEX_op_neg_i64: 3071 tcg_out32(s, NEG | RT(args[0]) | RA(args[1])); 3072 break; 3073 3074 case INDEX_op_not_i32: 3075 case INDEX_op_not_i64: 3076 tcg_out32(s, NOR | SAB(args[1], args[0], args[1])); 3077 break; 3078 3079 case INDEX_op_add_i64: 3080 a0 = args[0], a1 = args[1], a2 = args[2]; 3081 if (const_args[2]) { 3082 do_addi_64: 3083 tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2); 3084 } else { 3085 tcg_out32(s, ADD | TAB(a0, a1, a2)); 3086 } 3087 break; 3088 case INDEX_op_sub_i64: 3089 a0 = args[0], a1 = args[1], a2 = args[2]; 3090 if (const_args[1]) { 3091 if (const_args[2]) { 3092 tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2); 3093 } else { 3094 tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); 3095 } 3096 } else if (const_args[2]) { 3097 a2 = -a2; 3098 goto do_addi_64; 3099 } else { 3100 tcg_out32(s, SUBF | TAB(a0, a2, a1)); 3101 } 3102 break; 3103 3104 case INDEX_op_shl_i64: 3105 if (const_args[2]) { 3106 /* Limit immediate shift count lest we create an illegal insn. */ 3107 tcg_out_shli64(s, args[0], args[1], args[2] & 63); 3108 } else { 3109 tcg_out32(s, SLD | SAB(args[1], args[0], args[2])); 3110 } 3111 break; 3112 case INDEX_op_shr_i64: 3113 if (const_args[2]) { 3114 /* Limit immediate shift count lest we create an illegal insn. */ 3115 tcg_out_shri64(s, args[0], args[1], args[2] & 63); 3116 } else { 3117 tcg_out32(s, SRD | SAB(args[1], args[0], args[2])); 3118 } 3119 break; 3120 case INDEX_op_sar_i64: 3121 if (const_args[2]) { 3122 tcg_out_sari64(s, args[0], args[1], args[2]); 3123 } else { 3124 tcg_out32(s, SRAD | SAB(args[1], args[0], args[2])); 3125 } 3126 break; 3127 case INDEX_op_rotl_i64: 3128 if (const_args[2]) { 3129 tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0); 3130 } else { 3131 tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0)); 3132 } 3133 break; 3134 case INDEX_op_rotr_i64: 3135 if (const_args[2]) { 3136 tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0); 3137 } else { 3138 tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64)); 3139 tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0)); 3140 } 3141 break; 3142 3143 case INDEX_op_mul_i64: 3144 a0 = args[0], a1 = args[1], a2 = args[2]; 3145 if (const_args[2]) { 3146 tcg_out32(s, MULLI | TAI(a0, a1, a2)); 3147 } else { 3148 tcg_out32(s, MULLD | TAB(a0, a1, a2)); 3149 } 3150 break; 3151 case INDEX_op_div_i64: 3152 tcg_out32(s, DIVD | TAB(args[0], args[1], args[2])); 3153 break; 3154 case INDEX_op_divu_i64: 3155 tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2])); 3156 break; 3157 case INDEX_op_rem_i64: 3158 tcg_out32(s, MODSD | TAB(args[0], args[1], args[2])); 3159 break; 3160 case INDEX_op_remu_i64: 3161 tcg_out32(s, MODUD | TAB(args[0], args[1], args[2])); 3162 break; 3163 3164 case INDEX_op_qemu_ld_a64_i32: 3165 if (TCG_TARGET_REG_BITS == 32) { 3166 tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], 3167 args[3], TCG_TYPE_I32); 3168 break; 3169 } 3170 /* fall through */ 3171 case INDEX_op_qemu_ld_a32_i32: 3172 tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32); 3173 break; 3174 case INDEX_op_qemu_ld_a32_i64: 3175 if (TCG_TARGET_REG_BITS == 64) { 3176 tcg_out_qemu_ld(s, args[0], -1, args[1], -1, 3177 args[2], TCG_TYPE_I64); 3178 } else { 3179 tcg_out_qemu_ld(s, args[0], args[1], args[2], -1, 3180 args[3], TCG_TYPE_I64); 3181 } 3182 break; 3183 case INDEX_op_qemu_ld_a64_i64: 3184 if (TCG_TARGET_REG_BITS == 64) { 3185 tcg_out_qemu_ld(s, args[0], -1, args[1], -1, 3186 args[2], TCG_TYPE_I64); 3187 } else { 3188 tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3], 3189 args[4], TCG_TYPE_I64); 3190 } 3191 break; 3192 case INDEX_op_qemu_ld_a32_i128: 3193 case INDEX_op_qemu_ld_a64_i128: 3194 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 3195 tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true); 3196 break; 3197 3198 case INDEX_op_qemu_st_a64_i32: 3199 if (TCG_TARGET_REG_BITS == 32) { 3200 tcg_out_qemu_st(s, args[0], -1, args[1], args[2], 3201 args[3], TCG_TYPE_I32); 3202 break; 3203 } 3204 /* fall through */ 3205 case INDEX_op_qemu_st_a32_i32: 3206 tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32); 3207 break; 3208 case INDEX_op_qemu_st_a32_i64: 3209 if (TCG_TARGET_REG_BITS == 64) { 3210 tcg_out_qemu_st(s, args[0], -1, args[1], -1, 3211 args[2], TCG_TYPE_I64); 3212 } else { 3213 tcg_out_qemu_st(s, args[0], args[1], args[2], -1, 3214 args[3], TCG_TYPE_I64); 3215 } 3216 break; 3217 case INDEX_op_qemu_st_a64_i64: 3218 if (TCG_TARGET_REG_BITS == 64) { 3219 tcg_out_qemu_st(s, args[0], -1, args[1], -1, 3220 args[2], TCG_TYPE_I64); 3221 } else { 3222 tcg_out_qemu_st(s, args[0], args[1], args[2], args[3], 3223 args[4], TCG_TYPE_I64); 3224 } 3225 break; 3226 case INDEX_op_qemu_st_a32_i128: 3227 case INDEX_op_qemu_st_a64_i128: 3228 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 3229 tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); 3230 break; 3231 3232 case INDEX_op_setcond_i32: 3233 tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], 3234 const_args[2], false); 3235 break; 3236 case INDEX_op_setcond_i64: 3237 tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], 3238 const_args[2], false); 3239 break; 3240 case INDEX_op_negsetcond_i32: 3241 tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], 3242 const_args[2], true); 3243 break; 3244 case INDEX_op_negsetcond_i64: 3245 tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], 3246 const_args[2], true); 3247 break; 3248 case INDEX_op_setcond2_i32: 3249 tcg_out_setcond2(s, args, const_args); 3250 break; 3251 3252 case INDEX_op_bswap16_i32: 3253 case INDEX_op_bswap16_i64: 3254 tcg_out_bswap16(s, args[0], args[1], args[2]); 3255 break; 3256 case INDEX_op_bswap32_i32: 3257 tcg_out_bswap32(s, args[0], args[1], 0); 3258 break; 3259 case INDEX_op_bswap32_i64: 3260 tcg_out_bswap32(s, args[0], args[1], args[2]); 3261 break; 3262 case INDEX_op_bswap64_i64: 3263 tcg_out_bswap64(s, args[0], args[1]); 3264 break; 3265 3266 case INDEX_op_deposit_i32: 3267 if (const_args[2]) { 3268 uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3]; 3269 tcg_out_andi32(s, args[0], args[0], ~mask); 3270 } else { 3271 tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3], 3272 32 - args[3] - args[4], 31 - args[3]); 3273 } 3274 break; 3275 case INDEX_op_deposit_i64: 3276 if (const_args[2]) { 3277 uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3]; 3278 tcg_out_andi64(s, args[0], args[0], ~mask); 3279 } else { 3280 tcg_out_rld(s, RLDIMI, args[0], args[2], args[3], 3281 64 - args[3] - args[4]); 3282 } 3283 break; 3284 3285 case INDEX_op_extract_i32: 3286 tcg_out_rlw(s, RLWINM, args[0], args[1], 3287 32 - args[2], 32 - args[3], 31); 3288 break; 3289 case INDEX_op_extract_i64: 3290 tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]); 3291 break; 3292 3293 case INDEX_op_movcond_i32: 3294 tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2], 3295 args[3], args[4], const_args[2]); 3296 break; 3297 case INDEX_op_movcond_i64: 3298 tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2], 3299 args[3], args[4], const_args[2]); 3300 break; 3301 3302#if TCG_TARGET_REG_BITS == 64 3303 case INDEX_op_add2_i64: 3304#else 3305 case INDEX_op_add2_i32: 3306#endif 3307 /* Note that the CA bit is defined based on the word size of the 3308 environment. So in 64-bit mode it's always carry-out of bit 63. 3309 The fallback code using deposit works just as well for 32-bit. */ 3310 a0 = args[0], a1 = args[1]; 3311 if (a0 == args[3] || (!const_args[5] && a0 == args[5])) { 3312 a0 = TCG_REG_R0; 3313 } 3314 if (const_args[4]) { 3315 tcg_out32(s, ADDIC | TAI(a0, args[2], args[4])); 3316 } else { 3317 tcg_out32(s, ADDC | TAB(a0, args[2], args[4])); 3318 } 3319 if (const_args[5]) { 3320 tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3])); 3321 } else { 3322 tcg_out32(s, ADDE | TAB(a1, args[3], args[5])); 3323 } 3324 if (a0 != args[0]) { 3325 tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); 3326 } 3327 break; 3328 3329#if TCG_TARGET_REG_BITS == 64 3330 case INDEX_op_sub2_i64: 3331#else 3332 case INDEX_op_sub2_i32: 3333#endif 3334 a0 = args[0], a1 = args[1]; 3335 if (a0 == args[5] || (!const_args[3] && a0 == args[3])) { 3336 a0 = TCG_REG_R0; 3337 } 3338 if (const_args[2]) { 3339 tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2])); 3340 } else { 3341 tcg_out32(s, SUBFC | TAB(a0, args[4], args[2])); 3342 } 3343 if (const_args[3]) { 3344 tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5])); 3345 } else { 3346 tcg_out32(s, SUBFE | TAB(a1, args[5], args[3])); 3347 } 3348 if (a0 != args[0]) { 3349 tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); 3350 } 3351 break; 3352 3353 case INDEX_op_muluh_i32: 3354 tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2])); 3355 break; 3356 case INDEX_op_mulsh_i32: 3357 tcg_out32(s, MULHW | TAB(args[0], args[1], args[2])); 3358 break; 3359 case INDEX_op_muluh_i64: 3360 tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2])); 3361 break; 3362 case INDEX_op_mulsh_i64: 3363 tcg_out32(s, MULHD | TAB(args[0], args[1], args[2])); 3364 break; 3365 3366 case INDEX_op_mb: 3367 tcg_out_mb(s, args[0]); 3368 break; 3369 3370 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ 3371 case INDEX_op_mov_i64: 3372 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 3373 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ 3374 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ 3375 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ 3376 case INDEX_op_ext8s_i64: 3377 case INDEX_op_ext8u_i32: 3378 case INDEX_op_ext8u_i64: 3379 case INDEX_op_ext16s_i32: 3380 case INDEX_op_ext16s_i64: 3381 case INDEX_op_ext16u_i32: 3382 case INDEX_op_ext16u_i64: 3383 case INDEX_op_ext32s_i64: 3384 case INDEX_op_ext32u_i64: 3385 case INDEX_op_ext_i32_i64: 3386 case INDEX_op_extu_i32_i64: 3387 case INDEX_op_extrl_i64_i32: 3388 default: 3389 g_assert_not_reached(); 3390 } 3391} 3392 3393int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 3394{ 3395 switch (opc) { 3396 case INDEX_op_and_vec: 3397 case INDEX_op_or_vec: 3398 case INDEX_op_xor_vec: 3399 case INDEX_op_andc_vec: 3400 case INDEX_op_not_vec: 3401 case INDEX_op_nor_vec: 3402 case INDEX_op_eqv_vec: 3403 case INDEX_op_nand_vec: 3404 return 1; 3405 case INDEX_op_orc_vec: 3406 return have_isa_2_07; 3407 case INDEX_op_add_vec: 3408 case INDEX_op_sub_vec: 3409 case INDEX_op_smax_vec: 3410 case INDEX_op_smin_vec: 3411 case INDEX_op_umax_vec: 3412 case INDEX_op_umin_vec: 3413 case INDEX_op_shlv_vec: 3414 case INDEX_op_shrv_vec: 3415 case INDEX_op_sarv_vec: 3416 case INDEX_op_rotlv_vec: 3417 return vece <= MO_32 || have_isa_2_07; 3418 case INDEX_op_ssadd_vec: 3419 case INDEX_op_sssub_vec: 3420 case INDEX_op_usadd_vec: 3421 case INDEX_op_ussub_vec: 3422 return vece <= MO_32; 3423 case INDEX_op_cmp_vec: 3424 case INDEX_op_shli_vec: 3425 case INDEX_op_shri_vec: 3426 case INDEX_op_sari_vec: 3427 case INDEX_op_rotli_vec: 3428 return vece <= MO_32 || have_isa_2_07 ? -1 : 0; 3429 case INDEX_op_neg_vec: 3430 return vece >= MO_32 && have_isa_3_00; 3431 case INDEX_op_mul_vec: 3432 switch (vece) { 3433 case MO_8: 3434 case MO_16: 3435 return -1; 3436 case MO_32: 3437 return have_isa_2_07 ? 1 : -1; 3438 case MO_64: 3439 return have_isa_3_10; 3440 } 3441 return 0; 3442 case INDEX_op_bitsel_vec: 3443 return have_vsx; 3444 case INDEX_op_rotrv_vec: 3445 return -1; 3446 default: 3447 return 0; 3448 } 3449} 3450 3451static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 3452 TCGReg dst, TCGReg src) 3453{ 3454 tcg_debug_assert(dst >= TCG_REG_V0); 3455 3456 /* Splat from integer reg allowed via constraints for v3.00. */ 3457 if (src < TCG_REG_V0) { 3458 tcg_debug_assert(have_isa_3_00); 3459 switch (vece) { 3460 case MO_64: 3461 tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src)); 3462 return true; 3463 case MO_32: 3464 tcg_out32(s, MTVSRWS | VRT(dst) | RA(src)); 3465 return true; 3466 default: 3467 /* Fail, so that we fall back on either dupm or mov+dup. */ 3468 return false; 3469 } 3470 } 3471 3472 /* 3473 * Recall we use (or emulate) VSX integer loads, so the integer is 3474 * right justified within the left (zero-index) double-word. 3475 */ 3476 switch (vece) { 3477 case MO_8: 3478 tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16)); 3479 break; 3480 case MO_16: 3481 tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16)); 3482 break; 3483 case MO_32: 3484 tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16)); 3485 break; 3486 case MO_64: 3487 if (have_vsx) { 3488 tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src)); 3489 break; 3490 } 3491 tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8); 3492 tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8); 3493 break; 3494 default: 3495 g_assert_not_reached(); 3496 } 3497 return true; 3498} 3499 3500static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 3501 TCGReg out, TCGReg base, intptr_t offset) 3502{ 3503 int elt; 3504 3505 tcg_debug_assert(out >= TCG_REG_V0); 3506 switch (vece) { 3507 case MO_8: 3508 if (have_isa_3_00) { 3509 tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16); 3510 } else { 3511 tcg_out_mem_long(s, 0, LVEBX, out, base, offset); 3512 } 3513 elt = extract32(offset, 0, 4); 3514#if !HOST_BIG_ENDIAN 3515 elt ^= 15; 3516#endif 3517 tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16)); 3518 break; 3519 case MO_16: 3520 tcg_debug_assert((offset & 1) == 0); 3521 if (have_isa_3_00) { 3522 tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16); 3523 } else { 3524 tcg_out_mem_long(s, 0, LVEHX, out, base, offset); 3525 } 3526 elt = extract32(offset, 1, 3); 3527#if !HOST_BIG_ENDIAN 3528 elt ^= 7; 3529#endif 3530 tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16)); 3531 break; 3532 case MO_32: 3533 if (have_isa_3_00) { 3534 tcg_out_mem_long(s, 0, LXVWSX, out, base, offset); 3535 break; 3536 } 3537 tcg_debug_assert((offset & 3) == 0); 3538 tcg_out_mem_long(s, 0, LVEWX, out, base, offset); 3539 elt = extract32(offset, 2, 2); 3540#if !HOST_BIG_ENDIAN 3541 elt ^= 3; 3542#endif 3543 tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16)); 3544 break; 3545 case MO_64: 3546 if (have_vsx) { 3547 tcg_out_mem_long(s, 0, LXVDSX, out, base, offset); 3548 break; 3549 } 3550 tcg_debug_assert((offset & 7) == 0); 3551 tcg_out_mem_long(s, 0, LVX, out, base, offset & -16); 3552 tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8); 3553 elt = extract32(offset, 3, 1); 3554#if !HOST_BIG_ENDIAN 3555 elt = !elt; 3556#endif 3557 if (elt) { 3558 tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8); 3559 } else { 3560 tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8); 3561 } 3562 break; 3563 default: 3564 g_assert_not_reached(); 3565 } 3566 return true; 3567} 3568 3569static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 3570 unsigned vecl, unsigned vece, 3571 const TCGArg args[TCG_MAX_OP_ARGS], 3572 const int const_args[TCG_MAX_OP_ARGS]) 3573{ 3574 static const uint32_t 3575 add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM }, 3576 sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM }, 3577 mul_op[4] = { 0, 0, VMULUWM, VMULLD }, 3578 neg_op[4] = { 0, 0, VNEGW, VNEGD }, 3579 eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD }, 3580 ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 }, 3581 gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD }, 3582 gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD }, 3583 ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 }, 3584 usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 }, 3585 sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 }, 3586 ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 }, 3587 umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD }, 3588 smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD }, 3589 umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD }, 3590 smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD }, 3591 shlv_op[4] = { VSLB, VSLH, VSLW, VSLD }, 3592 shrv_op[4] = { VSRB, VSRH, VSRW, VSRD }, 3593 sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD }, 3594 mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 }, 3595 mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 }, 3596 muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 }, 3597 mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 }, 3598 pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 }, 3599 rotl_op[4] = { VRLB, VRLH, VRLW, VRLD }; 3600 3601 TCGType type = vecl + TCG_TYPE_V64; 3602 TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; 3603 uint32_t insn; 3604 3605 switch (opc) { 3606 case INDEX_op_ld_vec: 3607 tcg_out_ld(s, type, a0, a1, a2); 3608 return; 3609 case INDEX_op_st_vec: 3610 tcg_out_st(s, type, a0, a1, a2); 3611 return; 3612 case INDEX_op_dupm_vec: 3613 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 3614 return; 3615 3616 case INDEX_op_add_vec: 3617 insn = add_op[vece]; 3618 break; 3619 case INDEX_op_sub_vec: 3620 insn = sub_op[vece]; 3621 break; 3622 case INDEX_op_neg_vec: 3623 insn = neg_op[vece]; 3624 a2 = a1; 3625 a1 = 0; 3626 break; 3627 case INDEX_op_mul_vec: 3628 insn = mul_op[vece]; 3629 break; 3630 case INDEX_op_ssadd_vec: 3631 insn = ssadd_op[vece]; 3632 break; 3633 case INDEX_op_sssub_vec: 3634 insn = sssub_op[vece]; 3635 break; 3636 case INDEX_op_usadd_vec: 3637 insn = usadd_op[vece]; 3638 break; 3639 case INDEX_op_ussub_vec: 3640 insn = ussub_op[vece]; 3641 break; 3642 case INDEX_op_smin_vec: 3643 insn = smin_op[vece]; 3644 break; 3645 case INDEX_op_umin_vec: 3646 insn = umin_op[vece]; 3647 break; 3648 case INDEX_op_smax_vec: 3649 insn = smax_op[vece]; 3650 break; 3651 case INDEX_op_umax_vec: 3652 insn = umax_op[vece]; 3653 break; 3654 case INDEX_op_shlv_vec: 3655 insn = shlv_op[vece]; 3656 break; 3657 case INDEX_op_shrv_vec: 3658 insn = shrv_op[vece]; 3659 break; 3660 case INDEX_op_sarv_vec: 3661 insn = sarv_op[vece]; 3662 break; 3663 case INDEX_op_and_vec: 3664 insn = VAND; 3665 break; 3666 case INDEX_op_or_vec: 3667 insn = VOR; 3668 break; 3669 case INDEX_op_xor_vec: 3670 insn = VXOR; 3671 break; 3672 case INDEX_op_andc_vec: 3673 insn = VANDC; 3674 break; 3675 case INDEX_op_not_vec: 3676 insn = VNOR; 3677 a2 = a1; 3678 break; 3679 case INDEX_op_orc_vec: 3680 insn = VORC; 3681 break; 3682 case INDEX_op_nand_vec: 3683 insn = VNAND; 3684 break; 3685 case INDEX_op_nor_vec: 3686 insn = VNOR; 3687 break; 3688 case INDEX_op_eqv_vec: 3689 insn = VEQV; 3690 break; 3691 3692 case INDEX_op_cmp_vec: 3693 switch (args[3]) { 3694 case TCG_COND_EQ: 3695 insn = eq_op[vece]; 3696 break; 3697 case TCG_COND_NE: 3698 insn = ne_op[vece]; 3699 break; 3700 case TCG_COND_GT: 3701 insn = gts_op[vece]; 3702 break; 3703 case TCG_COND_GTU: 3704 insn = gtu_op[vece]; 3705 break; 3706 default: 3707 g_assert_not_reached(); 3708 } 3709 break; 3710 3711 case INDEX_op_bitsel_vec: 3712 tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3])); 3713 return; 3714 3715 case INDEX_op_dup2_vec: 3716 assert(TCG_TARGET_REG_BITS == 32); 3717 /* With inputs a1 = xLxx, a2 = xHxx */ 3718 tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1)); /* a0 = xxHL */ 3719 tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8); /* tmp = HLxx */ 3720 tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8); /* a0 = HLHL */ 3721 return; 3722 3723 case INDEX_op_ppc_mrgh_vec: 3724 insn = mrgh_op[vece]; 3725 break; 3726 case INDEX_op_ppc_mrgl_vec: 3727 insn = mrgl_op[vece]; 3728 break; 3729 case INDEX_op_ppc_muleu_vec: 3730 insn = muleu_op[vece]; 3731 break; 3732 case INDEX_op_ppc_mulou_vec: 3733 insn = mulou_op[vece]; 3734 break; 3735 case INDEX_op_ppc_pkum_vec: 3736 insn = pkum_op[vece]; 3737 break; 3738 case INDEX_op_rotlv_vec: 3739 insn = rotl_op[vece]; 3740 break; 3741 case INDEX_op_ppc_msum_vec: 3742 tcg_debug_assert(vece == MO_16); 3743 tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3])); 3744 return; 3745 3746 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 3747 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 3748 default: 3749 g_assert_not_reached(); 3750 } 3751 3752 tcg_debug_assert(insn != 0); 3753 tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2)); 3754} 3755 3756static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0, 3757 TCGv_vec v1, TCGArg imm, TCGOpcode opci) 3758{ 3759 TCGv_vec t1; 3760 3761 if (vece == MO_32) { 3762 /* 3763 * Only 5 bits are significant, and VSPLTISB can represent -16..15. 3764 * So using negative numbers gets us the 4th bit easily. 3765 */ 3766 imm = sextract32(imm, 0, 5); 3767 } else { 3768 imm &= (8 << vece) - 1; 3769 } 3770 3771 /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */ 3772 t1 = tcg_constant_vec(type, MO_8, imm); 3773 vec_gen_3(opci, type, vece, tcgv_vec_arg(v0), 3774 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 3775} 3776 3777static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, 3778 TCGv_vec v1, TCGv_vec v2, TCGCond cond) 3779{ 3780 bool need_swap = false, need_inv = false; 3781 3782 tcg_debug_assert(vece <= MO_32 || have_isa_2_07); 3783 3784 switch (cond) { 3785 case TCG_COND_EQ: 3786 case TCG_COND_GT: 3787 case TCG_COND_GTU: 3788 break; 3789 case TCG_COND_NE: 3790 if (have_isa_3_00 && vece <= MO_32) { 3791 break; 3792 } 3793 /* fall through */ 3794 case TCG_COND_LE: 3795 case TCG_COND_LEU: 3796 need_inv = true; 3797 break; 3798 case TCG_COND_LT: 3799 case TCG_COND_LTU: 3800 need_swap = true; 3801 break; 3802 case TCG_COND_GE: 3803 case TCG_COND_GEU: 3804 need_swap = need_inv = true; 3805 break; 3806 default: 3807 g_assert_not_reached(); 3808 } 3809 3810 if (need_inv) { 3811 cond = tcg_invert_cond(cond); 3812 } 3813 if (need_swap) { 3814 TCGv_vec t1; 3815 t1 = v1, v1 = v2, v2 = t1; 3816 cond = tcg_swap_cond(cond); 3817 } 3818 3819 vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0), 3820 tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond); 3821 3822 if (need_inv) { 3823 tcg_gen_not_vec(vece, v0, v0); 3824 } 3825} 3826 3827static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0, 3828 TCGv_vec v1, TCGv_vec v2) 3829{ 3830 TCGv_vec t1 = tcg_temp_new_vec(type); 3831 TCGv_vec t2 = tcg_temp_new_vec(type); 3832 TCGv_vec c0, c16; 3833 3834 switch (vece) { 3835 case MO_8: 3836 case MO_16: 3837 vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1), 3838 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 3839 vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2), 3840 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 3841 vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0), 3842 tcgv_vec_arg(t1), tcgv_vec_arg(t2)); 3843 vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1), 3844 tcgv_vec_arg(t1), tcgv_vec_arg(t2)); 3845 vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0), 3846 tcgv_vec_arg(v0), tcgv_vec_arg(t1)); 3847 break; 3848 3849 case MO_32: 3850 tcg_debug_assert(!have_isa_2_07); 3851 /* 3852 * Only 5 bits are significant, and VSPLTISB can represent -16..15. 3853 * So using -16 is a quick way to represent 16. 3854 */ 3855 c16 = tcg_constant_vec(type, MO_8, -16); 3856 c0 = tcg_constant_vec(type, MO_8, 0); 3857 3858 vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1), 3859 tcgv_vec_arg(v2), tcgv_vec_arg(c16)); 3860 vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2), 3861 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 3862 vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1), 3863 tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0)); 3864 vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1), 3865 tcgv_vec_arg(t1), tcgv_vec_arg(c16)); 3866 tcg_gen_add_vec(MO_32, v0, t1, t2); 3867 break; 3868 3869 default: 3870 g_assert_not_reached(); 3871 } 3872 tcg_temp_free_vec(t1); 3873 tcg_temp_free_vec(t2); 3874} 3875 3876void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 3877 TCGArg a0, ...) 3878{ 3879 va_list va; 3880 TCGv_vec v0, v1, v2, t0; 3881 TCGArg a2; 3882 3883 va_start(va, a0); 3884 v0 = temp_tcgv_vec(arg_temp(a0)); 3885 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 3886 a2 = va_arg(va, TCGArg); 3887 3888 switch (opc) { 3889 case INDEX_op_shli_vec: 3890 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec); 3891 break; 3892 case INDEX_op_shri_vec: 3893 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec); 3894 break; 3895 case INDEX_op_sari_vec: 3896 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec); 3897 break; 3898 case INDEX_op_rotli_vec: 3899 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec); 3900 break; 3901 case INDEX_op_cmp_vec: 3902 v2 = temp_tcgv_vec(arg_temp(a2)); 3903 expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); 3904 break; 3905 case INDEX_op_mul_vec: 3906 v2 = temp_tcgv_vec(arg_temp(a2)); 3907 expand_vec_mul(type, vece, v0, v1, v2); 3908 break; 3909 case INDEX_op_rotlv_vec: 3910 v2 = temp_tcgv_vec(arg_temp(a2)); 3911 t0 = tcg_temp_new_vec(type); 3912 tcg_gen_neg_vec(vece, t0, v2); 3913 tcg_gen_rotlv_vec(vece, v0, v1, t0); 3914 tcg_temp_free_vec(t0); 3915 break; 3916 default: 3917 g_assert_not_reached(); 3918 } 3919 va_end(va); 3920} 3921 3922static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) 3923{ 3924 switch (op) { 3925 case INDEX_op_goto_ptr: 3926 return C_O0_I1(r); 3927 3928 case INDEX_op_ld8u_i32: 3929 case INDEX_op_ld8s_i32: 3930 case INDEX_op_ld16u_i32: 3931 case INDEX_op_ld16s_i32: 3932 case INDEX_op_ld_i32: 3933 case INDEX_op_ctpop_i32: 3934 case INDEX_op_neg_i32: 3935 case INDEX_op_not_i32: 3936 case INDEX_op_ext8s_i32: 3937 case INDEX_op_ext16s_i32: 3938 case INDEX_op_bswap16_i32: 3939 case INDEX_op_bswap32_i32: 3940 case INDEX_op_extract_i32: 3941 case INDEX_op_ld8u_i64: 3942 case INDEX_op_ld8s_i64: 3943 case INDEX_op_ld16u_i64: 3944 case INDEX_op_ld16s_i64: 3945 case INDEX_op_ld32u_i64: 3946 case INDEX_op_ld32s_i64: 3947 case INDEX_op_ld_i64: 3948 case INDEX_op_ctpop_i64: 3949 case INDEX_op_neg_i64: 3950 case INDEX_op_not_i64: 3951 case INDEX_op_ext8s_i64: 3952 case INDEX_op_ext16s_i64: 3953 case INDEX_op_ext32s_i64: 3954 case INDEX_op_ext_i32_i64: 3955 case INDEX_op_extu_i32_i64: 3956 case INDEX_op_bswap16_i64: 3957 case INDEX_op_bswap32_i64: 3958 case INDEX_op_bswap64_i64: 3959 case INDEX_op_extract_i64: 3960 return C_O1_I1(r, r); 3961 3962 case INDEX_op_st8_i32: 3963 case INDEX_op_st16_i32: 3964 case INDEX_op_st_i32: 3965 case INDEX_op_st8_i64: 3966 case INDEX_op_st16_i64: 3967 case INDEX_op_st32_i64: 3968 case INDEX_op_st_i64: 3969 return C_O0_I2(r, r); 3970 3971 case INDEX_op_add_i32: 3972 case INDEX_op_and_i32: 3973 case INDEX_op_or_i32: 3974 case INDEX_op_xor_i32: 3975 case INDEX_op_andc_i32: 3976 case INDEX_op_orc_i32: 3977 case INDEX_op_eqv_i32: 3978 case INDEX_op_shl_i32: 3979 case INDEX_op_shr_i32: 3980 case INDEX_op_sar_i32: 3981 case INDEX_op_rotl_i32: 3982 case INDEX_op_rotr_i32: 3983 case INDEX_op_setcond_i32: 3984 case INDEX_op_negsetcond_i32: 3985 case INDEX_op_and_i64: 3986 case INDEX_op_andc_i64: 3987 case INDEX_op_shl_i64: 3988 case INDEX_op_shr_i64: 3989 case INDEX_op_sar_i64: 3990 case INDEX_op_rotl_i64: 3991 case INDEX_op_rotr_i64: 3992 case INDEX_op_setcond_i64: 3993 case INDEX_op_negsetcond_i64: 3994 return C_O1_I2(r, r, ri); 3995 3996 case INDEX_op_mul_i32: 3997 case INDEX_op_mul_i64: 3998 return C_O1_I2(r, r, rI); 3999 4000 case INDEX_op_div_i32: 4001 case INDEX_op_divu_i32: 4002 case INDEX_op_rem_i32: 4003 case INDEX_op_remu_i32: 4004 case INDEX_op_nand_i32: 4005 case INDEX_op_nor_i32: 4006 case INDEX_op_muluh_i32: 4007 case INDEX_op_mulsh_i32: 4008 case INDEX_op_orc_i64: 4009 case INDEX_op_eqv_i64: 4010 case INDEX_op_nand_i64: 4011 case INDEX_op_nor_i64: 4012 case INDEX_op_div_i64: 4013 case INDEX_op_divu_i64: 4014 case INDEX_op_rem_i64: 4015 case INDEX_op_remu_i64: 4016 case INDEX_op_mulsh_i64: 4017 case INDEX_op_muluh_i64: 4018 return C_O1_I2(r, r, r); 4019 4020 case INDEX_op_sub_i32: 4021 return C_O1_I2(r, rI, ri); 4022 case INDEX_op_add_i64: 4023 return C_O1_I2(r, r, rT); 4024 case INDEX_op_or_i64: 4025 case INDEX_op_xor_i64: 4026 return C_O1_I2(r, r, rU); 4027 case INDEX_op_sub_i64: 4028 return C_O1_I2(r, rI, rT); 4029 case INDEX_op_clz_i32: 4030 case INDEX_op_ctz_i32: 4031 case INDEX_op_clz_i64: 4032 case INDEX_op_ctz_i64: 4033 return C_O1_I2(r, r, rZW); 4034 4035 case INDEX_op_brcond_i32: 4036 case INDEX_op_brcond_i64: 4037 return C_O0_I2(r, ri); 4038 4039 case INDEX_op_movcond_i32: 4040 case INDEX_op_movcond_i64: 4041 return C_O1_I4(r, r, ri, rZ, rZ); 4042 case INDEX_op_deposit_i32: 4043 case INDEX_op_deposit_i64: 4044 return C_O1_I2(r, 0, rZ); 4045 case INDEX_op_brcond2_i32: 4046 return C_O0_I4(r, r, ri, ri); 4047 case INDEX_op_setcond2_i32: 4048 return C_O1_I4(r, r, r, ri, ri); 4049 case INDEX_op_add2_i64: 4050 case INDEX_op_add2_i32: 4051 return C_O2_I4(r, r, r, r, rI, rZM); 4052 case INDEX_op_sub2_i64: 4053 case INDEX_op_sub2_i32: 4054 return C_O2_I4(r, r, rI, rZM, r, r); 4055 4056 case INDEX_op_qemu_ld_a32_i32: 4057 return C_O1_I1(r, r); 4058 case INDEX_op_qemu_ld_a64_i32: 4059 return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r); 4060 case INDEX_op_qemu_ld_a32_i64: 4061 return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r); 4062 case INDEX_op_qemu_ld_a64_i64: 4063 return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r); 4064 4065 case INDEX_op_qemu_st_a32_i32: 4066 return C_O0_I2(r, r); 4067 case INDEX_op_qemu_st_a64_i32: 4068 return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r); 4069 case INDEX_op_qemu_st_a32_i64: 4070 return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r); 4071 case INDEX_op_qemu_st_a64_i64: 4072 return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r); 4073 4074 case INDEX_op_qemu_ld_a32_i128: 4075 case INDEX_op_qemu_ld_a64_i128: 4076 return C_N1O1_I1(o, m, r); 4077 case INDEX_op_qemu_st_a32_i128: 4078 case INDEX_op_qemu_st_a64_i128: 4079 return C_O0_I3(o, m, r); 4080 4081 case INDEX_op_add_vec: 4082 case INDEX_op_sub_vec: 4083 case INDEX_op_mul_vec: 4084 case INDEX_op_and_vec: 4085 case INDEX_op_or_vec: 4086 case INDEX_op_xor_vec: 4087 case INDEX_op_andc_vec: 4088 case INDEX_op_orc_vec: 4089 case INDEX_op_nor_vec: 4090 case INDEX_op_eqv_vec: 4091 case INDEX_op_nand_vec: 4092 case INDEX_op_cmp_vec: 4093 case INDEX_op_ssadd_vec: 4094 case INDEX_op_sssub_vec: 4095 case INDEX_op_usadd_vec: 4096 case INDEX_op_ussub_vec: 4097 case INDEX_op_smax_vec: 4098 case INDEX_op_smin_vec: 4099 case INDEX_op_umax_vec: 4100 case INDEX_op_umin_vec: 4101 case INDEX_op_shlv_vec: 4102 case INDEX_op_shrv_vec: 4103 case INDEX_op_sarv_vec: 4104 case INDEX_op_rotlv_vec: 4105 case INDEX_op_rotrv_vec: 4106 case INDEX_op_ppc_mrgh_vec: 4107 case INDEX_op_ppc_mrgl_vec: 4108 case INDEX_op_ppc_muleu_vec: 4109 case INDEX_op_ppc_mulou_vec: 4110 case INDEX_op_ppc_pkum_vec: 4111 case INDEX_op_dup2_vec: 4112 return C_O1_I2(v, v, v); 4113 4114 case INDEX_op_not_vec: 4115 case INDEX_op_neg_vec: 4116 return C_O1_I1(v, v); 4117 4118 case INDEX_op_dup_vec: 4119 return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v); 4120 4121 case INDEX_op_ld_vec: 4122 case INDEX_op_dupm_vec: 4123 return C_O1_I1(v, r); 4124 4125 case INDEX_op_st_vec: 4126 return C_O0_I2(v, r); 4127 4128 case INDEX_op_bitsel_vec: 4129 case INDEX_op_ppc_msum_vec: 4130 return C_O1_I3(v, v, v, v); 4131 4132 default: 4133 g_assert_not_reached(); 4134 } 4135} 4136 4137static void tcg_target_init(TCGContext *s) 4138{ 4139 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; 4140 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; 4141 if (have_altivec) { 4142 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 4143 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 4144 } 4145 4146 tcg_target_call_clobber_regs = 0; 4147 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0); 4148 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2); 4149 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3); 4150 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4); 4151 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5); 4152 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6); 4153 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7); 4154 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); 4155 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); 4156 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); 4157 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); 4158 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12); 4159 4160 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0); 4161 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1); 4162 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2); 4163 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3); 4164 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4); 4165 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5); 4166 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6); 4167 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7); 4168 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 4169 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 4170 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 4171 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 4172 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 4173 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 4174 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 4175 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 4176 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16); 4177 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17); 4178 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18); 4179 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19); 4180 4181 s->reserved_regs = 0; 4182 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */ 4183 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */ 4184#if defined(_CALL_SYSV) 4185 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */ 4186#endif 4187#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64 4188 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ 4189#endif 4190 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); 4191 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); 4192 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1); 4193 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2); 4194 if (USE_REG_TB) { 4195 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); /* tb->tc_ptr */ 4196 } 4197} 4198 4199#ifdef __ELF__ 4200typedef struct { 4201 DebugFrameCIE cie; 4202 DebugFrameFDEHeader fde; 4203 uint8_t fde_def_cfa[4]; 4204 uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3]; 4205} DebugFrame; 4206 4207/* We're expecting a 2 byte uleb128 encoded value. */ 4208QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 4209 4210#if TCG_TARGET_REG_BITS == 64 4211# define ELF_HOST_MACHINE EM_PPC64 4212#else 4213# define ELF_HOST_MACHINE EM_PPC 4214#endif 4215 4216static DebugFrame debug_frame = { 4217 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 4218 .cie.id = -1, 4219 .cie.version = 1, 4220 .cie.code_align = 1, 4221 .cie.data_align = (-SZR & 0x7f), /* sleb128 -SZR */ 4222 .cie.return_column = 65, 4223 4224 /* Total FDE size does not include the "len" member. */ 4225 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), 4226 4227 .fde_def_cfa = { 4228 12, TCG_REG_R1, /* DW_CFA_def_cfa r1, ... */ 4229 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 4230 (FRAME_SIZE >> 7) 4231 }, 4232 .fde_reg_ofs = { 4233 /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */ 4234 0x11, 65, (LR_OFFSET / -SZR) & 0x7f, 4235 } 4236}; 4237 4238void tcg_register_jit(const void *buf, size_t buf_size) 4239{ 4240 uint8_t *p = &debug_frame.fde_reg_ofs[3]; 4241 int i; 4242 4243 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) { 4244 p[0] = 0x80 + tcg_target_callee_save_regs[i]; 4245 p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR; 4246 } 4247 4248 debug_frame.fde.func_start = (uintptr_t)buf; 4249 debug_frame.fde.func_len = buf_size; 4250 4251 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 4252} 4253#endif /* __ELF__ */ 4254#undef VMULEUB 4255#undef VMULEUH 4256#undef VMULEUW 4257#undef VMULOUB 4258#undef VMULOUH 4259#undef VMULOUW 4260#undef VMSUMUHM 4261