1/* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25#include "elf.h" 26 27/* 28 * Standardize on the _CALL_FOO symbols used by GCC: 29 * Apple XCode does not define _CALL_DARWIN. 30 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV or _CALL_AIX. 31 */ 32#if TCG_TARGET_REG_BITS == 64 33# ifdef _CALL_AIX 34 /* ok */ 35# elif defined(_CALL_ELF) && _CALL_ELF == 1 36# define _CALL_AIX 37# elif defined(_CALL_ELF) && _CALL_ELF == 2 38 /* ok */ 39# else 40# error "Unknown ABI" 41# endif 42#else 43# if defined(_CALL_SYSV) || defined(_CALL_DARWIN) 44 /* ok */ 45# elif defined(__APPLE__) 46# define _CALL_DARWIN 47# elif defined(__ELF__) 48# define _CALL_SYSV 49# else 50# error "Unknown ABI" 51# endif 52#endif 53 54#if TCG_TARGET_REG_BITS == 64 55# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND 56# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL 57#else 58# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL 59# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF 60#endif 61#ifdef _CALL_SYSV 62# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN 63# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF 64#else 65# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL 66# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL 67#endif 68 69/* For some memory operations, we need a scratch that isn't R0. For the AIX 70 calling convention, we can re-use the TOC register since we'll be reloading 71 it at every call. Otherwise R12 will do nicely as neither a call-saved 72 register nor a parameter register. */ 73#ifdef _CALL_AIX 74# define TCG_REG_TMP1 TCG_REG_R2 75#else 76# define TCG_REG_TMP1 TCG_REG_R12 77#endif 78#define TCG_REG_TMP2 TCG_REG_R11 79 80#define TCG_VEC_TMP1 TCG_REG_V0 81#define TCG_VEC_TMP2 TCG_REG_V1 82 83#define TCG_REG_TB TCG_REG_R31 84#define USE_REG_TB (TCG_TARGET_REG_BITS == 64 && !have_isa_3_00) 85 86/* Shorthand for size of a pointer. Avoid promotion to unsigned. */ 87#define SZP ((int)sizeof(void *)) 88 89/* Shorthand for size of a register. */ 90#define SZR (TCG_TARGET_REG_BITS / 8) 91 92#define TCG_CT_CONST_S16 0x100 93#define TCG_CT_CONST_U16 0x200 94#define TCG_CT_CONST_S32 0x400 95#define TCG_CT_CONST_U32 0x800 96#define TCG_CT_CONST_ZERO 0x1000 97#define TCG_CT_CONST_MONE 0x2000 98#define TCG_CT_CONST_WSZ 0x4000 99#define TCG_CT_CONST_CMP 0x8000 100 101#define ALL_GENERAL_REGS 0xffffffffu 102#define ALL_VECTOR_REGS 0xffffffff00000000ull 103 104#ifndef R_PPC64_PCREL34 105#define R_PPC64_PCREL34 132 106#endif 107 108#define have_isel (cpuinfo & CPUINFO_ISEL) 109 110#define TCG_GUEST_BASE_REG TCG_REG_R30 111 112#ifdef CONFIG_DEBUG_TCG 113static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = { 114 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", 115 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", 116 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", 117 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", 118 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 119 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 120 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 121 "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", 122}; 123#endif 124 125static const int tcg_target_reg_alloc_order[] = { 126 TCG_REG_R14, /* call saved registers */ 127 TCG_REG_R15, 128 TCG_REG_R16, 129 TCG_REG_R17, 130 TCG_REG_R18, 131 TCG_REG_R19, 132 TCG_REG_R20, 133 TCG_REG_R21, 134 TCG_REG_R22, 135 TCG_REG_R23, 136 TCG_REG_R24, 137 TCG_REG_R25, 138 TCG_REG_R26, 139 TCG_REG_R27, 140 TCG_REG_R28, 141 TCG_REG_R29, 142 TCG_REG_R30, 143 TCG_REG_R31, 144 TCG_REG_R12, /* call clobbered, non-arguments */ 145 TCG_REG_R11, 146 TCG_REG_R2, 147 TCG_REG_R13, 148 TCG_REG_R10, /* call clobbered, arguments */ 149 TCG_REG_R9, 150 TCG_REG_R8, 151 TCG_REG_R7, 152 TCG_REG_R6, 153 TCG_REG_R5, 154 TCG_REG_R4, 155 TCG_REG_R3, 156 157 /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */ 158 TCG_REG_V2, /* call clobbered, vectors */ 159 TCG_REG_V3, 160 TCG_REG_V4, 161 TCG_REG_V5, 162 TCG_REG_V6, 163 TCG_REG_V7, 164 TCG_REG_V8, 165 TCG_REG_V9, 166 TCG_REG_V10, 167 TCG_REG_V11, 168 TCG_REG_V12, 169 TCG_REG_V13, 170 TCG_REG_V14, 171 TCG_REG_V15, 172 TCG_REG_V16, 173 TCG_REG_V17, 174 TCG_REG_V18, 175 TCG_REG_V19, 176}; 177 178static const int tcg_target_call_iarg_regs[] = { 179 TCG_REG_R3, 180 TCG_REG_R4, 181 TCG_REG_R5, 182 TCG_REG_R6, 183 TCG_REG_R7, 184 TCG_REG_R8, 185 TCG_REG_R9, 186 TCG_REG_R10 187}; 188 189static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) 190{ 191 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); 192 tcg_debug_assert(slot >= 0 && slot <= 1); 193 return TCG_REG_R3 + slot; 194} 195 196static const int tcg_target_callee_save_regs[] = { 197#ifdef _CALL_DARWIN 198 TCG_REG_R11, 199#endif 200 TCG_REG_R14, 201 TCG_REG_R15, 202 TCG_REG_R16, 203 TCG_REG_R17, 204 TCG_REG_R18, 205 TCG_REG_R19, 206 TCG_REG_R20, 207 TCG_REG_R21, 208 TCG_REG_R22, 209 TCG_REG_R23, 210 TCG_REG_R24, 211 TCG_REG_R25, 212 TCG_REG_R26, 213 TCG_REG_R27, /* currently used for the global env */ 214 TCG_REG_R28, 215 TCG_REG_R29, 216 TCG_REG_R30, 217 TCG_REG_R31 218}; 219 220/* For PPC, we use TB+4 instead of TB as the base. */ 221static inline ptrdiff_t ppc_tbrel_diff(TCGContext *s, const void *target) 222{ 223 return tcg_tbrel_diff(s, target) - 4; 224} 225 226static inline bool in_range_b(tcg_target_long target) 227{ 228 return target == sextract64(target, 0, 26); 229} 230 231static uint32_t reloc_pc24_val(const tcg_insn_unit *pc, 232 const tcg_insn_unit *target) 233{ 234 ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); 235 tcg_debug_assert(in_range_b(disp)); 236 return disp & 0x3fffffc; 237} 238 239static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 240{ 241 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 242 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); 243 244 if (in_range_b(disp)) { 245 *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc); 246 return true; 247 } 248 return false; 249} 250 251static uint16_t reloc_pc14_val(const tcg_insn_unit *pc, 252 const tcg_insn_unit *target) 253{ 254 ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); 255 tcg_debug_assert(disp == (int16_t) disp); 256 return disp & 0xfffc; 257} 258 259static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 260{ 261 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 262 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); 263 264 if (disp == (int16_t) disp) { 265 *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc); 266 return true; 267 } 268 return false; 269} 270 271static bool reloc_pc34(tcg_insn_unit *src_rw, const tcg_insn_unit *target) 272{ 273 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); 274 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); 275 276 if (disp == sextract64(disp, 0, 34)) { 277 src_rw[0] = (src_rw[0] & ~0x3ffff) | ((disp >> 16) & 0x3ffff); 278 src_rw[1] = (src_rw[1] & ~0xffff) | (disp & 0xffff); 279 return true; 280 } 281 return false; 282} 283 284static bool mask_operand(uint32_t c, int *mb, int *me); 285static bool mask64_operand(uint64_t c, int *mb, int *me); 286 287/* test if a constant matches the constraint */ 288static bool tcg_target_const_match(int64_t sval, int ct, 289 TCGType type, TCGCond cond, int vece) 290{ 291 uint64_t uval = sval; 292 int mb, me; 293 294 if (ct & TCG_CT_CONST) { 295 return 1; 296 } 297 298 if (type == TCG_TYPE_I32) { 299 uval = (uint32_t)sval; 300 sval = (int32_t)sval; 301 } 302 303 if (ct & TCG_CT_CONST_CMP) { 304 switch (cond) { 305 case TCG_COND_EQ: 306 case TCG_COND_NE: 307 ct |= TCG_CT_CONST_S16 | TCG_CT_CONST_U16; 308 break; 309 case TCG_COND_LT: 310 case TCG_COND_GE: 311 case TCG_COND_LE: 312 case TCG_COND_GT: 313 ct |= TCG_CT_CONST_S16; 314 break; 315 case TCG_COND_LTU: 316 case TCG_COND_GEU: 317 case TCG_COND_LEU: 318 case TCG_COND_GTU: 319 ct |= TCG_CT_CONST_U16; 320 break; 321 case TCG_COND_TSTEQ: 322 case TCG_COND_TSTNE: 323 if ((uval & ~0xffff) == 0 || (uval & ~0xffff0000ull) == 0) { 324 return 1; 325 } 326 if (uval == (uint32_t)uval && mask_operand(uval, &mb, &me)) { 327 return 1; 328 } 329 if (TCG_TARGET_REG_BITS == 64 && 330 mask64_operand(uval << clz64(uval), &mb, &me)) { 331 return 1; 332 } 333 return 0; 334 default: 335 g_assert_not_reached(); 336 } 337 } 338 339 if ((ct & TCG_CT_CONST_S16) && sval == (int16_t)sval) { 340 return 1; 341 } 342 if ((ct & TCG_CT_CONST_U16) && uval == (uint16_t)uval) { 343 return 1; 344 } 345 if ((ct & TCG_CT_CONST_S32) && sval == (int32_t)sval) { 346 return 1; 347 } 348 if ((ct & TCG_CT_CONST_U32) && uval == (uint32_t)uval) { 349 return 1; 350 } 351 if ((ct & TCG_CT_CONST_ZERO) && sval == 0) { 352 return 1; 353 } 354 if ((ct & TCG_CT_CONST_MONE) && sval == -1) { 355 return 1; 356 } 357 if ((ct & TCG_CT_CONST_WSZ) && sval == (type == TCG_TYPE_I32 ? 32 : 64)) { 358 return 1; 359 } 360 return 0; 361} 362 363#define OPCD(opc) ((opc)<<26) 364#define XO19(opc) (OPCD(19)|((opc)<<1)) 365#define MD30(opc) (OPCD(30)|((opc)<<2)) 366#define MDS30(opc) (OPCD(30)|((opc)<<1)) 367#define XO31(opc) (OPCD(31)|((opc)<<1)) 368#define XO58(opc) (OPCD(58)|(opc)) 369#define XO62(opc) (OPCD(62)|(opc)) 370#define VX4(opc) (OPCD(4)|(opc)) 371 372#define B OPCD( 18) 373#define BC OPCD( 16) 374 375#define LBZ OPCD( 34) 376#define LHZ OPCD( 40) 377#define LHA OPCD( 42) 378#define LWZ OPCD( 32) 379#define LWZUX XO31( 55) 380#define LD XO58( 0) 381#define LDX XO31( 21) 382#define LDU XO58( 1) 383#define LDUX XO31( 53) 384#define LWA XO58( 2) 385#define LWAX XO31(341) 386#define LQ OPCD( 56) 387 388#define STB OPCD( 38) 389#define STH OPCD( 44) 390#define STW OPCD( 36) 391#define STD XO62( 0) 392#define STDU XO62( 1) 393#define STDX XO31(149) 394#define STQ XO62( 2) 395 396#define PLWA OPCD( 41) 397#define PLD OPCD( 57) 398#define PLXSD OPCD( 42) 399#define PLXV OPCD(25 * 2 + 1) /* force tx=1 */ 400 401#define PSTD OPCD( 61) 402#define PSTXSD OPCD( 46) 403#define PSTXV OPCD(27 * 2 + 1) /* force sx=1 */ 404 405#define ADDIC OPCD( 12) 406#define ADDI OPCD( 14) 407#define ADDIS OPCD( 15) 408#define ORI OPCD( 24) 409#define ORIS OPCD( 25) 410#define XORI OPCD( 26) 411#define XORIS OPCD( 27) 412#define ANDI OPCD( 28) 413#define ANDIS OPCD( 29) 414#define MULLI OPCD( 7) 415#define CMPLI OPCD( 10) 416#define CMPI OPCD( 11) 417#define SUBFIC OPCD( 8) 418 419#define LWZU OPCD( 33) 420#define STWU OPCD( 37) 421 422#define RLWIMI OPCD( 20) 423#define RLWINM OPCD( 21) 424#define RLWNM OPCD( 23) 425 426#define RLDICL MD30( 0) 427#define RLDICR MD30( 1) 428#define RLDIMI MD30( 3) 429#define RLDCL MDS30( 8) 430 431#define BCLR XO19( 16) 432#define BCCTR XO19(528) 433#define CRAND XO19(257) 434#define CRANDC XO19(129) 435#define CRNAND XO19(225) 436#define CROR XO19(449) 437#define CRNOR XO19( 33) 438#define ADDPCIS XO19( 2) 439 440#define EXTSB XO31(954) 441#define EXTSH XO31(922) 442#define EXTSW XO31(986) 443#define ADD XO31(266) 444#define ADDE XO31(138) 445#define ADDME XO31(234) 446#define ADDZE XO31(202) 447#define ADDC XO31( 10) 448#define AND XO31( 28) 449#define SUBF XO31( 40) 450#define SUBFC XO31( 8) 451#define SUBFE XO31(136) 452#define SUBFME XO31(232) 453#define SUBFZE XO31(200) 454#define OR XO31(444) 455#define XOR XO31(316) 456#define MULLW XO31(235) 457#define MULHW XO31( 75) 458#define MULHWU XO31( 11) 459#define DIVW XO31(491) 460#define DIVWU XO31(459) 461#define MODSW XO31(779) 462#define MODUW XO31(267) 463#define CMP XO31( 0) 464#define CMPL XO31( 32) 465#define LHBRX XO31(790) 466#define LWBRX XO31(534) 467#define LDBRX XO31(532) 468#define STHBRX XO31(918) 469#define STWBRX XO31(662) 470#define STDBRX XO31(660) 471#define MFSPR XO31(339) 472#define MTSPR XO31(467) 473#define SRAWI XO31(824) 474#define NEG XO31(104) 475#define MFCR XO31( 19) 476#define MFOCRF (MFCR | (1u << 20)) 477#define NOR XO31(124) 478#define CNTLZW XO31( 26) 479#define CNTLZD XO31( 58) 480#define CNTTZW XO31(538) 481#define CNTTZD XO31(570) 482#define CNTPOPW XO31(378) 483#define CNTPOPD XO31(506) 484#define ANDC XO31( 60) 485#define ORC XO31(412) 486#define EQV XO31(284) 487#define NAND XO31(476) 488#define ISEL XO31( 15) 489 490#define MULLD XO31(233) 491#define MULHD XO31( 73) 492#define MULHDU XO31( 9) 493#define DIVD XO31(489) 494#define DIVDU XO31(457) 495#define MODSD XO31(777) 496#define MODUD XO31(265) 497 498#define LBZX XO31( 87) 499#define LHZX XO31(279) 500#define LHAX XO31(343) 501#define LWZX XO31( 23) 502#define STBX XO31(215) 503#define STHX XO31(407) 504#define STWX XO31(151) 505 506#define EIEIO XO31(854) 507#define HWSYNC XO31(598) 508#define LWSYNC (HWSYNC | (1u << 21)) 509 510#define SPR(a, b) ((((a)<<5)|(b))<<11) 511#define LR SPR(8, 0) 512#define CTR SPR(9, 0) 513 514#define SLW XO31( 24) 515#define SRW XO31(536) 516#define SRAW XO31(792) 517 518#define SLD XO31( 27) 519#define SRD XO31(539) 520#define SRAD XO31(794) 521#define SRADI XO31(413<<1) 522 523#define BRH XO31(219) 524#define BRW XO31(155) 525#define BRD XO31(187) 526 527#define TW XO31( 4) 528#define TRAP (TW | TO(31)) 529 530#define SETBC XO31(384) /* v3.10 */ 531#define SETBCR XO31(416) /* v3.10 */ 532#define SETNBC XO31(448) /* v3.10 */ 533#define SETNBCR XO31(480) /* v3.10 */ 534 535#define NOP ORI /* ori 0,0,0 */ 536 537#define LVX XO31(103) 538#define LVEBX XO31(7) 539#define LVEHX XO31(39) 540#define LVEWX XO31(71) 541#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */ 542#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */ 543#define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */ 544#define LXV (OPCD(61) | 8 | 1) /* v3.00, force tx=1 */ 545#define LXSD (OPCD(57) | 2) /* v3.00 */ 546#define LXVWSX (XO31(364) | 1) /* v3.00, force tx=1 */ 547 548#define STVX XO31(231) 549#define STVEWX XO31(199) 550#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */ 551#define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */ 552#define STXV (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */ 553#define STXSD (OPCD(61) | 2) /* v3.00 */ 554 555#define VADDSBS VX4(768) 556#define VADDUBS VX4(512) 557#define VADDUBM VX4(0) 558#define VADDSHS VX4(832) 559#define VADDUHS VX4(576) 560#define VADDUHM VX4(64) 561#define VADDSWS VX4(896) 562#define VADDUWS VX4(640) 563#define VADDUWM VX4(128) 564#define VADDUDM VX4(192) /* v2.07 */ 565 566#define VSUBSBS VX4(1792) 567#define VSUBUBS VX4(1536) 568#define VSUBUBM VX4(1024) 569#define VSUBSHS VX4(1856) 570#define VSUBUHS VX4(1600) 571#define VSUBUHM VX4(1088) 572#define VSUBSWS VX4(1920) 573#define VSUBUWS VX4(1664) 574#define VSUBUWM VX4(1152) 575#define VSUBUDM VX4(1216) /* v2.07 */ 576 577#define VNEGW (VX4(1538) | (6 << 16)) /* v3.00 */ 578#define VNEGD (VX4(1538) | (7 << 16)) /* v3.00 */ 579 580#define VMAXSB VX4(258) 581#define VMAXSH VX4(322) 582#define VMAXSW VX4(386) 583#define VMAXSD VX4(450) /* v2.07 */ 584#define VMAXUB VX4(2) 585#define VMAXUH VX4(66) 586#define VMAXUW VX4(130) 587#define VMAXUD VX4(194) /* v2.07 */ 588#define VMINSB VX4(770) 589#define VMINSH VX4(834) 590#define VMINSW VX4(898) 591#define VMINSD VX4(962) /* v2.07 */ 592#define VMINUB VX4(514) 593#define VMINUH VX4(578) 594#define VMINUW VX4(642) 595#define VMINUD VX4(706) /* v2.07 */ 596 597#define VCMPEQUB VX4(6) 598#define VCMPEQUH VX4(70) 599#define VCMPEQUW VX4(134) 600#define VCMPEQUD VX4(199) /* v2.07 */ 601#define VCMPGTSB VX4(774) 602#define VCMPGTSH VX4(838) 603#define VCMPGTSW VX4(902) 604#define VCMPGTSD VX4(967) /* v2.07 */ 605#define VCMPGTUB VX4(518) 606#define VCMPGTUH VX4(582) 607#define VCMPGTUW VX4(646) 608#define VCMPGTUD VX4(711) /* v2.07 */ 609#define VCMPNEB VX4(7) /* v3.00 */ 610#define VCMPNEH VX4(71) /* v3.00 */ 611#define VCMPNEW VX4(135) /* v3.00 */ 612 613#define VSLB VX4(260) 614#define VSLH VX4(324) 615#define VSLW VX4(388) 616#define VSLD VX4(1476) /* v2.07 */ 617#define VSRB VX4(516) 618#define VSRH VX4(580) 619#define VSRW VX4(644) 620#define VSRD VX4(1732) /* v2.07 */ 621#define VSRAB VX4(772) 622#define VSRAH VX4(836) 623#define VSRAW VX4(900) 624#define VSRAD VX4(964) /* v2.07 */ 625#define VRLB VX4(4) 626#define VRLH VX4(68) 627#define VRLW VX4(132) 628#define VRLD VX4(196) /* v2.07 */ 629 630#define VMULEUB VX4(520) 631#define VMULEUH VX4(584) 632#define VMULEUW VX4(648) /* v2.07 */ 633#define VMULOUB VX4(8) 634#define VMULOUH VX4(72) 635#define VMULOUW VX4(136) /* v2.07 */ 636#define VMULUWM VX4(137) /* v2.07 */ 637#define VMULLD VX4(457) /* v3.10 */ 638#define VMSUMUHM VX4(38) 639 640#define VMRGHB VX4(12) 641#define VMRGHH VX4(76) 642#define VMRGHW VX4(140) 643#define VMRGLB VX4(268) 644#define VMRGLH VX4(332) 645#define VMRGLW VX4(396) 646 647#define VPKUHUM VX4(14) 648#define VPKUWUM VX4(78) 649 650#define VAND VX4(1028) 651#define VANDC VX4(1092) 652#define VNOR VX4(1284) 653#define VOR VX4(1156) 654#define VXOR VX4(1220) 655#define VEQV VX4(1668) /* v2.07 */ 656#define VNAND VX4(1412) /* v2.07 */ 657#define VORC VX4(1348) /* v2.07 */ 658 659#define VSPLTB VX4(524) 660#define VSPLTH VX4(588) 661#define VSPLTW VX4(652) 662#define VSPLTISB VX4(780) 663#define VSPLTISH VX4(844) 664#define VSPLTISW VX4(908) 665 666#define VSLDOI VX4(44) 667 668#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */ 669#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */ 670#define XXSPLTIB (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */ 671 672#define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */ 673#define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */ 674#define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */ 675#define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */ 676#define MTVSRDD (XO31(435) | 1) /* v3.00, force tx=1 */ 677#define MTVSRWS (XO31(403) | 1) /* v3.00, force tx=1 */ 678 679#define RT(r) ((r)<<21) 680#define RS(r) ((r)<<21) 681#define RA(r) ((r)<<16) 682#define RB(r) ((r)<<11) 683#define TO(t) ((t)<<21) 684#define SH(s) ((s)<<11) 685#define MB(b) ((b)<<6) 686#define ME(e) ((e)<<1) 687#define BO(o) ((o)<<21) 688#define MB64(b) ((b)<<5) 689#define FXM(b) (1 << (19 - (b))) 690 691#define VRT(r) (((r) & 31) << 21) 692#define VRA(r) (((r) & 31) << 16) 693#define VRB(r) (((r) & 31) << 11) 694#define VRC(r) (((r) & 31) << 6) 695 696#define LK 1 697 698#define TAB(t, a, b) (RT(t) | RA(a) | RB(b)) 699#define SAB(s, a, b) (RS(s) | RA(a) | RB(b)) 700#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff)) 701#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff)) 702 703#define BF(n) ((n)<<23) 704#define BI(n, c) (((c)+((n)*4))<<16) 705#define BT(n, c) (((c)+((n)*4))<<21) 706#define BA(n, c) (((c)+((n)*4))<<16) 707#define BB(n, c) (((c)+((n)*4))<<11) 708#define BC_(n, c) (((c)+((n)*4))<<6) 709 710#define BO_COND_TRUE BO(12) 711#define BO_COND_FALSE BO( 4) 712#define BO_ALWAYS BO(20) 713 714enum { 715 CR_LT, 716 CR_GT, 717 CR_EQ, 718 CR_SO 719}; 720 721static const uint32_t tcg_to_bc[16] = { 722 [TCG_COND_EQ] = BC | BI(0, CR_EQ) | BO_COND_TRUE, 723 [TCG_COND_NE] = BC | BI(0, CR_EQ) | BO_COND_FALSE, 724 [TCG_COND_TSTEQ] = BC | BI(0, CR_EQ) | BO_COND_TRUE, 725 [TCG_COND_TSTNE] = BC | BI(0, CR_EQ) | BO_COND_FALSE, 726 [TCG_COND_LT] = BC | BI(0, CR_LT) | BO_COND_TRUE, 727 [TCG_COND_GE] = BC | BI(0, CR_LT) | BO_COND_FALSE, 728 [TCG_COND_LE] = BC | BI(0, CR_GT) | BO_COND_FALSE, 729 [TCG_COND_GT] = BC | BI(0, CR_GT) | BO_COND_TRUE, 730 [TCG_COND_LTU] = BC | BI(0, CR_LT) | BO_COND_TRUE, 731 [TCG_COND_GEU] = BC | BI(0, CR_LT) | BO_COND_FALSE, 732 [TCG_COND_LEU] = BC | BI(0, CR_GT) | BO_COND_FALSE, 733 [TCG_COND_GTU] = BC | BI(0, CR_GT) | BO_COND_TRUE, 734}; 735 736/* The low bit here is set if the RA and RB fields must be inverted. */ 737static const uint32_t tcg_to_isel[16] = { 738 [TCG_COND_EQ] = ISEL | BC_(0, CR_EQ), 739 [TCG_COND_NE] = ISEL | BC_(0, CR_EQ) | 1, 740 [TCG_COND_TSTEQ] = ISEL | BC_(0, CR_EQ), 741 [TCG_COND_TSTNE] = ISEL | BC_(0, CR_EQ) | 1, 742 [TCG_COND_LT] = ISEL | BC_(0, CR_LT), 743 [TCG_COND_GE] = ISEL | BC_(0, CR_LT) | 1, 744 [TCG_COND_LE] = ISEL | BC_(0, CR_GT) | 1, 745 [TCG_COND_GT] = ISEL | BC_(0, CR_GT), 746 [TCG_COND_LTU] = ISEL | BC_(0, CR_LT), 747 [TCG_COND_GEU] = ISEL | BC_(0, CR_LT) | 1, 748 [TCG_COND_LEU] = ISEL | BC_(0, CR_GT) | 1, 749 [TCG_COND_GTU] = ISEL | BC_(0, CR_GT), 750}; 751 752static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 753 intptr_t value, intptr_t addend) 754{ 755 const tcg_insn_unit *target; 756 int16_t lo; 757 int32_t hi; 758 759 value += addend; 760 target = (const tcg_insn_unit *)value; 761 762 switch (type) { 763 case R_PPC_REL14: 764 return reloc_pc14(code_ptr, target); 765 case R_PPC_REL24: 766 return reloc_pc24(code_ptr, target); 767 case R_PPC64_PCREL34: 768 return reloc_pc34(code_ptr, target); 769 case R_PPC_ADDR16: 770 /* 771 * We are (slightly) abusing this relocation type. In particular, 772 * assert that the low 2 bits are zero, and do not modify them. 773 * That way we can use this with LD et al that have opcode bits 774 * in the low 2 bits of the insn. 775 */ 776 if ((value & 3) || value != (int16_t)value) { 777 return false; 778 } 779 *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc); 780 break; 781 case R_PPC_ADDR32: 782 /* 783 * We are abusing this relocation type. Again, this points to 784 * a pair of insns, lis + load. This is an absolute address 785 * relocation for PPC32 so the lis cannot be removed. 786 */ 787 lo = value; 788 hi = value - lo; 789 if (hi + lo != value) { 790 return false; 791 } 792 code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16); 793 code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo); 794 break; 795 default: 796 g_assert_not_reached(); 797 } 798 return true; 799} 800 801/* Ensure that the prefixed instruction does not cross a 64-byte boundary. */ 802static bool tcg_out_need_prefix_align(TCGContext *s) 803{ 804 return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c; 805} 806 807static void tcg_out_prefix_align(TCGContext *s) 808{ 809 if (tcg_out_need_prefix_align(s)) { 810 tcg_out32(s, NOP); 811 } 812} 813 814static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target) 815{ 816 return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0); 817} 818 819/* Output Type 00 Prefix - 8-Byte Load/Store Form (8LS:D) */ 820static void tcg_out_8ls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt, 821 unsigned ra, tcg_target_long imm, bool r) 822{ 823 tcg_insn_unit p, i; 824 825 p = OPCD(1) | (r << 20) | ((imm >> 16) & 0x3ffff); 826 i = opc | TAI(rt, ra, imm); 827 828 tcg_out_prefix_align(s); 829 tcg_out32(s, p); 830 tcg_out32(s, i); 831} 832 833/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */ 834static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt, 835 unsigned ra, tcg_target_long imm, bool r) 836{ 837 tcg_insn_unit p, i; 838 839 p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff); 840 i = opc | TAI(rt, ra, imm); 841 842 tcg_out_prefix_align(s); 843 tcg_out32(s, p); 844 tcg_out32(s, i); 845} 846 847static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, 848 TCGReg base, tcg_target_long offset); 849 850static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) 851{ 852 if (ret == arg) { 853 return true; 854 } 855 switch (type) { 856 case TCG_TYPE_I64: 857 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 858 /* fallthru */ 859 case TCG_TYPE_I32: 860 if (ret < TCG_REG_V0) { 861 if (arg < TCG_REG_V0) { 862 tcg_out32(s, OR | SAB(arg, ret, arg)); 863 break; 864 } else if (have_isa_2_07) { 865 tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD) 866 | VRT(arg) | RA(ret)); 867 break; 868 } else { 869 /* Altivec does not support vector->integer moves. */ 870 return false; 871 } 872 } else if (arg < TCG_REG_V0) { 873 if (have_isa_2_07) { 874 tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD) 875 | VRT(ret) | RA(arg)); 876 break; 877 } else { 878 /* Altivec does not support integer->vector moves. */ 879 return false; 880 } 881 } 882 /* fallthru */ 883 case TCG_TYPE_V64: 884 case TCG_TYPE_V128: 885 tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0); 886 tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg)); 887 break; 888 default: 889 g_assert_not_reached(); 890 } 891 return true; 892} 893 894static void tcg_out_rld_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs, 895 int sh, int mb, bool rc) 896{ 897 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 898 sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1); 899 mb = MB64((mb >> 5) | ((mb << 1) & 0x3f)); 900 tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb | rc); 901} 902 903static void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs, 904 int sh, int mb) 905{ 906 tcg_out_rld_rc(s, op, ra, rs, sh, mb, false); 907} 908 909static void tcg_out_rlw_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs, 910 int sh, int mb, int me, bool rc) 911{ 912 tcg_debug_assert((mb & 0x1f) == mb); 913 tcg_debug_assert((me & 0x1f) == me); 914 tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh & 0x1f) | MB(mb) | ME(me) | rc); 915} 916 917static void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs, 918 int sh, int mb, int me) 919{ 920 tcg_out_rlw_rc(s, op, ra, rs, sh, mb, me, false); 921} 922 923static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) 924{ 925 tcg_out32(s, EXTSB | RA(dst) | RS(src)); 926} 927 928static void tcg_out_ext8u(TCGContext *s, TCGReg dst, TCGReg src) 929{ 930 tcg_out32(s, ANDI | SAI(src, dst, 0xff)); 931} 932 933static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) 934{ 935 tcg_out32(s, EXTSH | RA(dst) | RS(src)); 936} 937 938static void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src) 939{ 940 tcg_out32(s, ANDI | SAI(src, dst, 0xffff)); 941} 942 943static void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src) 944{ 945 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 946 tcg_out32(s, EXTSW | RA(dst) | RS(src)); 947} 948 949static void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src) 950{ 951 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 952 tcg_out_rld(s, RLDICL, dst, src, 0, 32); 953} 954 955static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dst, TCGReg src) 956{ 957 tcg_out_ext32s(s, dst, src); 958} 959 960static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dst, TCGReg src) 961{ 962 tcg_out_ext32u(s, dst, src); 963} 964 965static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) 966{ 967 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 968 tcg_out_mov(s, TCG_TYPE_I32, rd, rn); 969} 970 971static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c) 972{ 973 tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c); 974} 975 976static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c) 977{ 978 tcg_out_rld(s, RLDICR, dst, src, c, 63 - c); 979} 980 981static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c) 982{ 983 /* Limit immediate shift count lest we create an illegal insn. */ 984 tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31)); 985} 986 987static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c) 988{ 989 tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31); 990} 991 992static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c) 993{ 994 tcg_out_rld(s, RLDICL, dst, src, 64 - c, c); 995} 996 997static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c) 998{ 999 tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2)); 1000} 1001 1002static void tcg_out_addpcis(TCGContext *s, TCGReg dst, intptr_t imm) 1003{ 1004 uint32_t d0, d1, d2; 1005 1006 tcg_debug_assert((imm & 0xffff) == 0); 1007 tcg_debug_assert(imm == (int32_t)imm); 1008 1009 d2 = extract32(imm, 16, 1); 1010 d1 = extract32(imm, 17, 5); 1011 d0 = extract32(imm, 22, 10); 1012 tcg_out32(s, ADDPCIS | RT(dst) | (d1 << 16) | (d0 << 6) | d2); 1013} 1014 1015/* Emit a move into ret of arg, if it can be done in one insn. */ 1016static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg) 1017{ 1018 if (arg == (int16_t)arg) { 1019 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 1020 return true; 1021 } 1022 if (arg == (int32_t)arg && (arg & 0xffff) == 0) { 1023 tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); 1024 return true; 1025 } 1026 return false; 1027} 1028 1029static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, 1030 tcg_target_long arg, bool in_prologue) 1031{ 1032 intptr_t tb_diff; 1033 tcg_target_long tmp; 1034 int shift; 1035 1036 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1037 1038 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { 1039 arg = (int32_t)arg; 1040 } 1041 1042 /* Load 16-bit immediates with one insn. */ 1043 if (tcg_out_movi_one(s, ret, arg)) { 1044 return; 1045 } 1046 1047 /* Load addresses within the TB with one insn. */ 1048 tb_diff = ppc_tbrel_diff(s, (void *)arg); 1049 if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) { 1050 tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff)); 1051 return; 1052 } 1053 1054 /* 1055 * Load values up to 34 bits, and pc-relative addresses, 1056 * with one prefixed insn. 1057 */ 1058 if (have_isa_3_10) { 1059 if (arg == sextract64(arg, 0, 34)) { 1060 /* pli ret,value = paddi ret,0,value,0 */ 1061 tcg_out_mls_d(s, ADDI, ret, 0, arg, 0); 1062 return; 1063 } 1064 1065 tmp = tcg_pcrel_diff_for_prefix(s, (void *)arg); 1066 if (tmp == sextract64(tmp, 0, 34)) { 1067 /* pla ret,value = paddi ret,0,value,1 */ 1068 tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1); 1069 return; 1070 } 1071 } 1072 1073 /* Load 32-bit immediates with two insns. Note that we've already 1074 eliminated bare ADDIS, so we know both insns are required. */ 1075 if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) { 1076 tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); 1077 tcg_out32(s, ORI | SAI(ret, ret, arg)); 1078 return; 1079 } 1080 if (arg == (uint32_t)arg && !(arg & 0x8000)) { 1081 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 1082 tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); 1083 return; 1084 } 1085 1086 /* Load masked 16-bit value. */ 1087 if (arg > 0 && (arg & 0x8000)) { 1088 tmp = arg | 0x7fff; 1089 if ((tmp & (tmp + 1)) == 0) { 1090 int mb = clz64(tmp + 1) + 1; 1091 tcg_out32(s, ADDI | TAI(ret, 0, arg)); 1092 tcg_out_rld(s, RLDICL, ret, ret, 0, mb); 1093 return; 1094 } 1095 } 1096 1097 /* Load common masks with 2 insns. */ 1098 shift = ctz64(arg); 1099 tmp = arg >> shift; 1100 if (tmp == (int16_t)tmp) { 1101 tcg_out32(s, ADDI | TAI(ret, 0, tmp)); 1102 tcg_out_shli64(s, ret, ret, shift); 1103 return; 1104 } 1105 shift = clz64(arg); 1106 if (tcg_out_movi_one(s, ret, arg << shift)) { 1107 tcg_out_shri64(s, ret, ret, shift); 1108 return; 1109 } 1110 1111 /* Load addresses within 2GB with 2 insns. */ 1112 if (have_isa_3_00) { 1113 intptr_t hi = tcg_pcrel_diff(s, (void *)arg) - 4; 1114 int16_t lo = hi; 1115 1116 hi -= lo; 1117 if (hi == (int32_t)hi) { 1118 tcg_out_addpcis(s, TCG_REG_TMP2, hi); 1119 tcg_out32(s, ADDI | TAI(ret, TCG_REG_TMP2, lo)); 1120 return; 1121 } 1122 } 1123 1124 /* Load addresses within 2GB of TB with 2 (or rarely 3) insns. */ 1125 if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) { 1126 tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff); 1127 return; 1128 } 1129 1130 /* Use the constant pool, if possible. */ 1131 if (!in_prologue && USE_REG_TB) { 1132 new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr, 1133 ppc_tbrel_diff(s, NULL)); 1134 tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0)); 1135 return; 1136 } 1137 if (have_isa_3_10) { 1138 tcg_out_8ls_d(s, PLD, ret, 0, 0, 1); 1139 new_pool_label(s, arg, R_PPC64_PCREL34, s->code_ptr - 2, 0); 1140 return; 1141 } 1142 if (have_isa_3_00) { 1143 tcg_out_addpcis(s, TCG_REG_TMP2, 0); 1144 new_pool_label(s, arg, R_PPC_REL14, s->code_ptr, 0); 1145 tcg_out32(s, LD | TAI(ret, TCG_REG_TMP2, 0)); 1146 return; 1147 } 1148 1149 tmp = arg >> 31 >> 1; 1150 tcg_out_movi(s, TCG_TYPE_I32, ret, tmp); 1151 if (tmp) { 1152 tcg_out_shli64(s, ret, ret, 32); 1153 } 1154 if (arg & 0xffff0000) { 1155 tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); 1156 } 1157 if (arg & 0xffff) { 1158 tcg_out32(s, ORI | SAI(ret, ret, arg)); 1159 } 1160} 1161 1162static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 1163 TCGReg ret, int64_t val) 1164{ 1165 uint32_t load_insn; 1166 int rel, low; 1167 intptr_t add; 1168 1169 switch (vece) { 1170 case MO_8: 1171 low = (int8_t)val; 1172 if (low >= -16 && low < 16) { 1173 tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16)); 1174 return; 1175 } 1176 if (have_isa_3_00) { 1177 tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11)); 1178 return; 1179 } 1180 break; 1181 1182 case MO_16: 1183 low = (int16_t)val; 1184 if (low >= -16 && low < 16) { 1185 tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16)); 1186 return; 1187 } 1188 break; 1189 1190 case MO_32: 1191 low = (int32_t)val; 1192 if (low >= -16 && low < 16) { 1193 tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16)); 1194 return; 1195 } 1196 break; 1197 } 1198 1199 /* 1200 * Otherwise we must load the value from the constant pool. 1201 */ 1202 if (USE_REG_TB) { 1203 rel = R_PPC_ADDR16; 1204 add = ppc_tbrel_diff(s, NULL); 1205 } else if (have_isa_3_10) { 1206 if (type == TCG_TYPE_V64) { 1207 tcg_out_8ls_d(s, PLXSD, ret & 31, 0, 0, 1); 1208 new_pool_label(s, val, R_PPC64_PCREL34, s->code_ptr - 2, 0); 1209 } else { 1210 tcg_out_8ls_d(s, PLXV, ret & 31, 0, 0, 1); 1211 new_pool_l2(s, R_PPC64_PCREL34, s->code_ptr - 2, 0, val, val); 1212 } 1213 return; 1214 } else if (have_isa_3_00) { 1215 tcg_out_addpcis(s, TCG_REG_TMP1, 0); 1216 rel = R_PPC_REL14; 1217 add = 0; 1218 } else { 1219 rel = R_PPC_ADDR32; 1220 add = 0; 1221 } 1222 1223 if (have_vsx) { 1224 load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX; 1225 load_insn |= VRT(ret) | RB(TCG_REG_TMP1); 1226 if (TCG_TARGET_REG_BITS == 64) { 1227 new_pool_label(s, val, rel, s->code_ptr, add); 1228 } else { 1229 new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val); 1230 } 1231 } else { 1232 load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); 1233 if (TCG_TARGET_REG_BITS == 64) { 1234 new_pool_l2(s, rel, s->code_ptr, add, val, val); 1235 } else { 1236 new_pool_l4(s, rel, s->code_ptr, add, 1237 val >> 32, val, val >> 32, val); 1238 } 1239 } 1240 1241 if (USE_REG_TB) { 1242 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0)); 1243 load_insn |= RA(TCG_REG_TB); 1244 } else if (have_isa_3_00) { 1245 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0)); 1246 } else { 1247 tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0)); 1248 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0)); 1249 } 1250 tcg_out32(s, load_insn); 1251} 1252 1253static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, 1254 tcg_target_long arg) 1255{ 1256 switch (type) { 1257 case TCG_TYPE_I32: 1258 case TCG_TYPE_I64: 1259 tcg_debug_assert(ret < TCG_REG_V0); 1260 tcg_out_movi_int(s, type, ret, arg, false); 1261 break; 1262 1263 default: 1264 g_assert_not_reached(); 1265 } 1266} 1267 1268static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) 1269{ 1270 return false; 1271} 1272 1273static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, 1274 tcg_target_long imm) 1275{ 1276 /* This function is only used for passing structs by reference. */ 1277 g_assert_not_reached(); 1278} 1279 1280static bool mask_operand(uint32_t c, int *mb, int *me) 1281{ 1282 uint32_t lsb, test; 1283 1284 /* Accept a bit pattern like: 1285 0....01....1 1286 1....10....0 1287 0..01..10..0 1288 Keep track of the transitions. */ 1289 if (c == 0 || c == -1) { 1290 return false; 1291 } 1292 test = c; 1293 lsb = test & -test; 1294 test += lsb; 1295 if (test & (test - 1)) { 1296 return false; 1297 } 1298 1299 *me = clz32(lsb); 1300 *mb = test ? clz32(test & -test) + 1 : 0; 1301 return true; 1302} 1303 1304static bool mask64_operand(uint64_t c, int *mb, int *me) 1305{ 1306 uint64_t lsb; 1307 1308 if (c == 0) { 1309 return false; 1310 } 1311 1312 lsb = c & -c; 1313 /* Accept 1..10..0. */ 1314 if (c == -lsb) { 1315 *mb = 0; 1316 *me = clz64(lsb); 1317 return true; 1318 } 1319 /* Accept 0..01..1. */ 1320 if (lsb == 1 && (c & (c + 1)) == 0) { 1321 *mb = clz64(c + 1) + 1; 1322 *me = 63; 1323 return true; 1324 } 1325 return false; 1326} 1327 1328static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1329{ 1330 int mb, me; 1331 1332 if (mask_operand(c, &mb, &me)) { 1333 tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); 1334 } else if ((c & 0xffff) == c) { 1335 tcg_out32(s, ANDI | SAI(src, dst, c)); 1336 return; 1337 } else if ((c & 0xffff0000) == c) { 1338 tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); 1339 return; 1340 } else { 1341 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c); 1342 tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); 1343 } 1344} 1345 1346static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c) 1347{ 1348 int mb, me; 1349 1350 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1351 if (mask64_operand(c, &mb, &me)) { 1352 if (mb == 0) { 1353 tcg_out_rld(s, RLDICR, dst, src, 0, me); 1354 } else { 1355 tcg_out_rld(s, RLDICL, dst, src, 0, mb); 1356 } 1357 } else if ((c & 0xffff) == c) { 1358 tcg_out32(s, ANDI | SAI(src, dst, c)); 1359 return; 1360 } else if ((c & 0xffff0000) == c) { 1361 tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); 1362 return; 1363 } else { 1364 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c); 1365 tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); 1366 } 1367} 1368 1369static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c, 1370 int op_lo, int op_hi) 1371{ 1372 if (c >> 16) { 1373 tcg_out32(s, op_hi | SAI(src, dst, c >> 16)); 1374 src = dst; 1375 } 1376 if (c & 0xffff) { 1377 tcg_out32(s, op_lo | SAI(src, dst, c)); 1378 src = dst; 1379 } 1380} 1381 1382static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1383{ 1384 tcg_out_zori32(s, dst, src, c, ORI, ORIS); 1385} 1386 1387static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) 1388{ 1389 tcg_out_zori32(s, dst, src, c, XORI, XORIS); 1390} 1391 1392static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target) 1393{ 1394 ptrdiff_t disp = tcg_pcrel_diff(s, target); 1395 if (in_range_b(disp)) { 1396 tcg_out32(s, B | (disp & 0x3fffffc) | mask); 1397 } else { 1398 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target); 1399 tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR); 1400 tcg_out32(s, BCCTR | BO_ALWAYS | mask); 1401 } 1402} 1403 1404static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, 1405 TCGReg base, tcg_target_long offset) 1406{ 1407 tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; 1408 bool is_int_store = false; 1409 TCGReg rs = TCG_REG_TMP1; 1410 1411 switch (opi) { 1412 case LD: case LWA: 1413 align = 3; 1414 /* FALLTHRU */ 1415 default: 1416 if (rt > TCG_REG_R0 && rt < TCG_REG_V0) { 1417 rs = rt; 1418 break; 1419 } 1420 break; 1421 case LXSD: 1422 case STXSD: 1423 align = 3; 1424 break; 1425 case LXV: 1426 case STXV: 1427 align = 15; 1428 break; 1429 case STD: 1430 align = 3; 1431 /* FALLTHRU */ 1432 case STB: case STH: case STW: 1433 is_int_store = true; 1434 break; 1435 } 1436 1437 /* For unaligned or large offsets, use the prefixed form. */ 1438 if (have_isa_3_10 1439 && (offset != (int16_t)offset || (offset & align)) 1440 && offset == sextract64(offset, 0, 34)) { 1441 /* 1442 * Note that the MLS:D insns retain their un-prefixed opcode, 1443 * while the 8LS:D insns use a different opcode space. 1444 */ 1445 switch (opi) { 1446 case LBZ: 1447 case LHZ: 1448 case LHA: 1449 case LWZ: 1450 case STB: 1451 case STH: 1452 case STW: 1453 case ADDI: 1454 tcg_out_mls_d(s, opi, rt, base, offset, 0); 1455 return; 1456 case LWA: 1457 tcg_out_8ls_d(s, PLWA, rt, base, offset, 0); 1458 return; 1459 case LD: 1460 tcg_out_8ls_d(s, PLD, rt, base, offset, 0); 1461 return; 1462 case STD: 1463 tcg_out_8ls_d(s, PSTD, rt, base, offset, 0); 1464 return; 1465 case LXSD: 1466 tcg_out_8ls_d(s, PLXSD, rt & 31, base, offset, 0); 1467 return; 1468 case STXSD: 1469 tcg_out_8ls_d(s, PSTXSD, rt & 31, base, offset, 0); 1470 return; 1471 case LXV: 1472 tcg_out_8ls_d(s, PLXV, rt & 31, base, offset, 0); 1473 return; 1474 case STXV: 1475 tcg_out_8ls_d(s, PSTXV, rt & 31, base, offset, 0); 1476 return; 1477 } 1478 } 1479 1480 /* For unaligned, or very large offsets, use the indexed form. */ 1481 if (offset & align || offset != (int32_t)offset || opi == 0) { 1482 if (rs == base) { 1483 rs = TCG_REG_R0; 1484 } 1485 tcg_debug_assert(!is_int_store || rs != rt); 1486 tcg_out_movi(s, TCG_TYPE_PTR, rs, orig); 1487 tcg_out32(s, opx | TAB(rt & 31, base, rs)); 1488 return; 1489 } 1490 1491 l0 = (int16_t)offset; 1492 offset = (offset - l0) >> 16; 1493 l1 = (int16_t)offset; 1494 1495 if (l1 < 0 && orig >= 0) { 1496 extra = 0x4000; 1497 l1 = (int16_t)(offset - 0x4000); 1498 } 1499 if (l1) { 1500 tcg_out32(s, ADDIS | TAI(rs, base, l1)); 1501 base = rs; 1502 } 1503 if (extra) { 1504 tcg_out32(s, ADDIS | TAI(rs, base, extra)); 1505 base = rs; 1506 } 1507 if (opi != ADDI || base != rt || l0 != 0) { 1508 tcg_out32(s, opi | TAI(rt & 31, base, l0)); 1509 } 1510} 1511 1512static void tcg_out_vsldoi(TCGContext *s, TCGReg ret, 1513 TCGReg va, TCGReg vb, int shb) 1514{ 1515 tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6)); 1516} 1517 1518static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 1519 TCGReg base, intptr_t offset) 1520{ 1521 int shift; 1522 1523 switch (type) { 1524 case TCG_TYPE_I32: 1525 if (ret < TCG_REG_V0) { 1526 tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset); 1527 break; 1528 } 1529 if (have_isa_2_07 && have_vsx) { 1530 tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset); 1531 break; 1532 } 1533 tcg_debug_assert((offset & 3) == 0); 1534 tcg_out_mem_long(s, 0, LVEWX, ret, base, offset); 1535 shift = (offset - 4) & 0xc; 1536 if (shift) { 1537 tcg_out_vsldoi(s, ret, ret, ret, shift); 1538 } 1539 break; 1540 case TCG_TYPE_I64: 1541 if (ret < TCG_REG_V0) { 1542 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1543 tcg_out_mem_long(s, LD, LDX, ret, base, offset); 1544 break; 1545 } 1546 /* fallthru */ 1547 case TCG_TYPE_V64: 1548 tcg_debug_assert(ret >= TCG_REG_V0); 1549 if (have_vsx) { 1550 tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX, 1551 ret, base, offset); 1552 break; 1553 } 1554 tcg_debug_assert((offset & 7) == 0); 1555 tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16); 1556 if (offset & 8) { 1557 tcg_out_vsldoi(s, ret, ret, ret, 8); 1558 } 1559 break; 1560 case TCG_TYPE_V128: 1561 tcg_debug_assert(ret >= TCG_REG_V0); 1562 tcg_debug_assert((offset & 15) == 0); 1563 tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0, 1564 LVX, ret, base, offset); 1565 break; 1566 default: 1567 g_assert_not_reached(); 1568 } 1569} 1570 1571static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, 1572 TCGReg base, intptr_t offset) 1573{ 1574 int shift; 1575 1576 switch (type) { 1577 case TCG_TYPE_I32: 1578 if (arg < TCG_REG_V0) { 1579 tcg_out_mem_long(s, STW, STWX, arg, base, offset); 1580 break; 1581 } 1582 if (have_isa_2_07 && have_vsx) { 1583 tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset); 1584 break; 1585 } 1586 assert((offset & 3) == 0); 1587 tcg_debug_assert((offset & 3) == 0); 1588 shift = (offset - 4) & 0xc; 1589 if (shift) { 1590 tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift); 1591 arg = TCG_VEC_TMP1; 1592 } 1593 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); 1594 break; 1595 case TCG_TYPE_I64: 1596 if (arg < TCG_REG_V0) { 1597 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 1598 tcg_out_mem_long(s, STD, STDX, arg, base, offset); 1599 break; 1600 } 1601 /* fallthru */ 1602 case TCG_TYPE_V64: 1603 tcg_debug_assert(arg >= TCG_REG_V0); 1604 if (have_vsx) { 1605 tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0, 1606 STXSDX, arg, base, offset); 1607 break; 1608 } 1609 tcg_debug_assert((offset & 7) == 0); 1610 if (offset & 8) { 1611 tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8); 1612 arg = TCG_VEC_TMP1; 1613 } 1614 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); 1615 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4); 1616 break; 1617 case TCG_TYPE_V128: 1618 tcg_debug_assert(arg >= TCG_REG_V0); 1619 tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0, 1620 STVX, arg, base, offset); 1621 break; 1622 default: 1623 g_assert_not_reached(); 1624 } 1625} 1626 1627static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 1628 TCGReg base, intptr_t ofs) 1629{ 1630 return false; 1631} 1632 1633/* 1634 * Set dest non-zero if and only if (arg1 & arg2) is non-zero. 1635 * If RC, then also set RC0. 1636 */ 1637static void tcg_out_test(TCGContext *s, TCGReg dest, TCGReg arg1, TCGArg arg2, 1638 bool const_arg2, TCGType type, bool rc) 1639{ 1640 int mb, me; 1641 1642 if (!const_arg2) { 1643 tcg_out32(s, AND | SAB(arg1, dest, arg2) | rc); 1644 return; 1645 } 1646 1647 if (type == TCG_TYPE_I32) { 1648 arg2 = (uint32_t)arg2; 1649 } 1650 1651 if ((arg2 & ~0xffff) == 0) { 1652 tcg_out32(s, ANDI | SAI(arg1, dest, arg2)); 1653 return; 1654 } 1655 if ((arg2 & ~0xffff0000ull) == 0) { 1656 tcg_out32(s, ANDIS | SAI(arg1, dest, arg2 >> 16)); 1657 return; 1658 } 1659 if (arg2 == (uint32_t)arg2 && mask_operand(arg2, &mb, &me)) { 1660 tcg_out_rlw_rc(s, RLWINM, dest, arg1, 0, mb, me, rc); 1661 return; 1662 } 1663 if (TCG_TARGET_REG_BITS == 64) { 1664 int sh = clz64(arg2); 1665 if (mask64_operand(arg2 << sh, &mb, &me)) { 1666 tcg_out_rld_rc(s, RLDICR, dest, arg1, sh, me, rc); 1667 return; 1668 } 1669 } 1670 /* Constraints should satisfy this. */ 1671 g_assert_not_reached(); 1672} 1673 1674static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, 1675 bool const_arg2, int cr, TCGType type) 1676{ 1677 uint32_t op; 1678 1679 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1680 1681 /* 1682 * Simplify the comparisons below wrt CMPI. 1683 * All of the tests are 16-bit, so a 32-bit sign extend always works. 1684 */ 1685 if (type == TCG_TYPE_I32) { 1686 arg2 = (int32_t)arg2; 1687 } 1688 1689 switch (cond) { 1690 case TCG_COND_EQ: 1691 case TCG_COND_NE: 1692 if (const_arg2) { 1693 if ((int16_t)arg2 == arg2) { 1694 op = CMPI; 1695 break; 1696 } 1697 tcg_debug_assert((uint16_t)arg2 == arg2); 1698 op = CMPLI; 1699 break; 1700 } 1701 op = CMPL; 1702 break; 1703 1704 case TCG_COND_TSTEQ: 1705 case TCG_COND_TSTNE: 1706 tcg_debug_assert(cr == 0); 1707 tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, true); 1708 return; 1709 1710 case TCG_COND_LT: 1711 case TCG_COND_GE: 1712 case TCG_COND_LE: 1713 case TCG_COND_GT: 1714 if (const_arg2) { 1715 tcg_debug_assert((int16_t)arg2 == arg2); 1716 op = CMPI; 1717 break; 1718 } 1719 op = CMP; 1720 break; 1721 1722 case TCG_COND_LTU: 1723 case TCG_COND_GEU: 1724 case TCG_COND_LEU: 1725 case TCG_COND_GTU: 1726 if (const_arg2) { 1727 tcg_debug_assert((uint16_t)arg2 == arg2); 1728 op = CMPLI; 1729 break; 1730 } 1731 op = CMPL; 1732 break; 1733 1734 default: 1735 g_assert_not_reached(); 1736 } 1737 op |= BF(cr) | ((type == TCG_TYPE_I64) << 21); 1738 op |= RA(arg1); 1739 op |= const_arg2 ? arg2 & 0xffff : RB(arg2); 1740 tcg_out32(s, op); 1741} 1742 1743static void tcg_out_setcond_eq0(TCGContext *s, TCGType type, 1744 TCGReg dst, TCGReg src, bool neg) 1745{ 1746 if (neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) { 1747 /* 1748 * X != 0 implies X + -1 generates a carry. 1749 * RT = (~X + X) + CA 1750 * = -1 + CA 1751 * = CA ? 0 : -1 1752 */ 1753 tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); 1754 tcg_out32(s, SUBFE | TAB(dst, src, src)); 1755 return; 1756 } 1757 1758 if (type == TCG_TYPE_I32) { 1759 tcg_out32(s, CNTLZW | RS(src) | RA(dst)); 1760 tcg_out_shri32(s, dst, dst, 5); 1761 } else { 1762 tcg_out32(s, CNTLZD | RS(src) | RA(dst)); 1763 tcg_out_shri64(s, dst, dst, 6); 1764 } 1765 if (neg) { 1766 tcg_out32(s, NEG | RT(dst) | RA(dst)); 1767 } 1768} 1769 1770static void tcg_out_setcond_ne0(TCGContext *s, TCGType type, 1771 TCGReg dst, TCGReg src, bool neg) 1772{ 1773 if (!neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) { 1774 /* 1775 * X != 0 implies X + -1 generates a carry. Extra addition 1776 * trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. 1777 */ 1778 tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); 1779 tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src)); 1780 return; 1781 } 1782 tcg_out_setcond_eq0(s, type, dst, src, false); 1783 if (neg) { 1784 tcg_out32(s, ADDI | TAI(dst, dst, -1)); 1785 } else { 1786 tcg_out_xori32(s, dst, dst, 1); 1787 } 1788} 1789 1790static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2, 1791 bool const_arg2) 1792{ 1793 if (const_arg2) { 1794 if ((uint32_t)arg2 == arg2) { 1795 tcg_out_xori32(s, TCG_REG_R0, arg1, arg2); 1796 } else { 1797 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2); 1798 tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0)); 1799 } 1800 } else { 1801 tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2)); 1802 } 1803 return TCG_REG_R0; 1804} 1805 1806static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, 1807 TCGReg arg0, TCGReg arg1, TCGArg arg2, 1808 bool const_arg2, bool neg) 1809{ 1810 int sh; 1811 bool inv; 1812 1813 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1814 1815 /* Ignore high bits of a potential constant arg2. */ 1816 if (type == TCG_TYPE_I32) { 1817 arg2 = (uint32_t)arg2; 1818 } 1819 1820 /* With SETBC/SETBCR, we can always implement with 2 insns. */ 1821 if (have_isa_3_10) { 1822 tcg_insn_unit bi, opc; 1823 1824 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type); 1825 1826 /* Re-use tcg_to_bc for BI and BO_COND_{TRUE,FALSE}. */ 1827 bi = tcg_to_bc[cond] & (0x1f << 16); 1828 if (tcg_to_bc[cond] & BO(8)) { 1829 opc = neg ? SETNBC : SETBC; 1830 } else { 1831 opc = neg ? SETNBCR : SETBCR; 1832 } 1833 tcg_out32(s, opc | RT(arg0) | bi); 1834 return; 1835 } 1836 1837 /* Handle common and trivial cases before handling anything else. */ 1838 if (arg2 == 0) { 1839 switch (cond) { 1840 case TCG_COND_EQ: 1841 tcg_out_setcond_eq0(s, type, arg0, arg1, neg); 1842 return; 1843 case TCG_COND_NE: 1844 tcg_out_setcond_ne0(s, type, arg0, arg1, neg); 1845 return; 1846 case TCG_COND_GE: 1847 tcg_out32(s, NOR | SAB(arg1, arg0, arg1)); 1848 arg1 = arg0; 1849 /* FALLTHRU */ 1850 case TCG_COND_LT: 1851 /* Extract the sign bit. */ 1852 if (type == TCG_TYPE_I32) { 1853 if (neg) { 1854 tcg_out_sari32(s, arg0, arg1, 31); 1855 } else { 1856 tcg_out_shri32(s, arg0, arg1, 31); 1857 } 1858 } else { 1859 if (neg) { 1860 tcg_out_sari64(s, arg0, arg1, 63); 1861 } else { 1862 tcg_out_shri64(s, arg0, arg1, 63); 1863 } 1864 } 1865 return; 1866 default: 1867 break; 1868 } 1869 } 1870 1871 /* If we have ISEL, we can implement everything with 3 or 4 insns. 1872 All other cases below are also at least 3 insns, so speed up the 1873 code generator by not considering them and always using ISEL. */ 1874 if (have_isel) { 1875 int isel, tab; 1876 1877 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type); 1878 1879 isel = tcg_to_isel[cond]; 1880 1881 tcg_out_movi(s, type, arg0, neg ? -1 : 1); 1882 if (isel & 1) { 1883 /* arg0 = (bc ? 0 : 1) */ 1884 tab = TAB(arg0, 0, arg0); 1885 isel &= ~1; 1886 } else { 1887 /* arg0 = (bc ? 1 : 0) */ 1888 tcg_out_movi(s, type, TCG_REG_R0, 0); 1889 tab = TAB(arg0, arg0, TCG_REG_R0); 1890 } 1891 tcg_out32(s, isel | tab); 1892 return; 1893 } 1894 1895 inv = false; 1896 switch (cond) { 1897 case TCG_COND_EQ: 1898 arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); 1899 tcg_out_setcond_eq0(s, type, arg0, arg1, neg); 1900 break; 1901 1902 case TCG_COND_NE: 1903 arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); 1904 tcg_out_setcond_ne0(s, type, arg0, arg1, neg); 1905 break; 1906 1907 case TCG_COND_TSTEQ: 1908 tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false); 1909 tcg_out_setcond_eq0(s, type, arg0, TCG_REG_R0, neg); 1910 break; 1911 1912 case TCG_COND_TSTNE: 1913 tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false); 1914 tcg_out_setcond_ne0(s, type, arg0, TCG_REG_R0, neg); 1915 break; 1916 1917 case TCG_COND_LE: 1918 case TCG_COND_LEU: 1919 inv = true; 1920 /* fall through */ 1921 case TCG_COND_GT: 1922 case TCG_COND_GTU: 1923 sh = 30; /* CR7 CR_GT */ 1924 goto crtest; 1925 1926 case TCG_COND_GE: 1927 case TCG_COND_GEU: 1928 inv = true; 1929 /* fall through */ 1930 case TCG_COND_LT: 1931 case TCG_COND_LTU: 1932 sh = 29; /* CR7 CR_LT */ 1933 goto crtest; 1934 1935 crtest: 1936 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); 1937 tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); 1938 tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31); 1939 if (neg && inv) { 1940 tcg_out32(s, ADDI | TAI(arg0, arg0, -1)); 1941 } else if (neg) { 1942 tcg_out32(s, NEG | RT(arg0) | RA(arg0)); 1943 } else if (inv) { 1944 tcg_out_xori32(s, arg0, arg0, 1); 1945 } 1946 break; 1947 1948 default: 1949 g_assert_not_reached(); 1950 } 1951} 1952 1953static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, 1954 TCGReg dest, TCGReg arg1, TCGReg arg2) 1955{ 1956 tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, false); 1957} 1958 1959static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond, 1960 TCGReg dest, TCGReg arg1, tcg_target_long arg2) 1961{ 1962 tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, false); 1963} 1964 1965static const TCGOutOpSetcond outop_setcond = { 1966 .base.static_constraint = C_O1_I2(r, r, rC), 1967 .out_rrr = tgen_setcond, 1968 .out_rri = tgen_setcondi, 1969}; 1970 1971static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, 1972 TCGReg dest, TCGReg arg1, TCGReg arg2) 1973{ 1974 tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, true); 1975} 1976 1977static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond, 1978 TCGReg dest, TCGReg arg1, tcg_target_long arg2) 1979{ 1980 tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, true); 1981} 1982 1983static const TCGOutOpSetcond outop_negsetcond = { 1984 .base.static_constraint = C_O1_I2(r, r, rC), 1985 .out_rrr = tgen_negsetcond, 1986 .out_rri = tgen_negsetcondi, 1987}; 1988 1989static void tcg_out_bc(TCGContext *s, TCGCond cond, int bd) 1990{ 1991 tcg_out32(s, tcg_to_bc[cond] | bd); 1992} 1993 1994static void tcg_out_bc_lab(TCGContext *s, TCGCond cond, TCGLabel *l) 1995{ 1996 int bd = 0; 1997 if (l->has_value) { 1998 bd = reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr); 1999 } else { 2000 tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0); 2001 } 2002 tcg_out_bc(s, cond, bd); 2003} 2004 2005static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond, 2006 TCGReg arg1, TCGReg arg2, TCGLabel *l) 2007{ 2008 tcg_out_cmp(s, cond, arg1, arg2, false, 0, type); 2009 tcg_out_bc_lab(s, cond, l); 2010} 2011 2012static void tgen_brcondi(TCGContext *s, TCGType type, TCGCond cond, 2013 TCGReg arg1, tcg_target_long arg2, TCGLabel *l) 2014{ 2015 tcg_out_cmp(s, cond, arg1, arg2, true, 0, type); 2016 tcg_out_bc_lab(s, cond, l); 2017} 2018 2019static const TCGOutOpBrcond outop_brcond = { 2020 .base.static_constraint = C_O0_I2(r, rC), 2021 .out_rr = tgen_brcond, 2022 .out_ri = tgen_brcondi, 2023}; 2024 2025static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, 2026 TCGReg dest, TCGReg c1, TCGArg c2, bool const_c2, 2027 TCGArg v1, bool const_v1, TCGArg v2, bool const_v2) 2028{ 2029 /* If for some reason both inputs are zero, don't produce bad code. */ 2030 if (v1 == 0 && v2 == 0) { 2031 tcg_out_movi(s, type, dest, 0); 2032 return; 2033 } 2034 2035 tcg_out_cmp(s, cond, c1, c2, const_c2, 0, type); 2036 2037 if (have_isel) { 2038 int isel = tcg_to_isel[cond]; 2039 2040 /* Swap the V operands if the operation indicates inversion. */ 2041 if (isel & 1) { 2042 int t = v1; 2043 v1 = v2; 2044 v2 = t; 2045 isel &= ~1; 2046 } 2047 /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand. */ 2048 if (v2 == 0) { 2049 tcg_out_movi(s, type, TCG_REG_R0, 0); 2050 } 2051 tcg_out32(s, isel | TAB(dest, v1, v2)); 2052 } else { 2053 if (dest == v2) { 2054 cond = tcg_invert_cond(cond); 2055 v2 = v1; 2056 } else if (dest != v1) { 2057 if (v1 == 0) { 2058 tcg_out_movi(s, type, dest, 0); 2059 } else { 2060 tcg_out_mov(s, type, dest, v1); 2061 } 2062 } 2063 /* Branch forward over one insn */ 2064 tcg_out_bc(s, cond, 8); 2065 if (v2 == 0) { 2066 tcg_out_movi(s, type, dest, 0); 2067 } else { 2068 tcg_out_mov(s, type, dest, v2); 2069 } 2070 } 2071} 2072 2073static const TCGOutOpMovcond outop_movcond = { 2074 .base.static_constraint = C_O1_I4(r, r, rC, rZ, rZ), 2075 .out = tgen_movcond, 2076}; 2077 2078static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc, 2079 TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2) 2080{ 2081 if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) { 2082 tcg_out32(s, opc | RA(a0) | RS(a1)); 2083 } else { 2084 tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 0, type); 2085 /* Note that the only other valid constant for a2 is 0. */ 2086 if (have_isel) { 2087 tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1)); 2088 tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0)); 2089 } else if (!const_a2 && a0 == a2) { 2090 tcg_out_bc(s, TCG_COND_EQ, 8); 2091 tcg_out32(s, opc | RA(a0) | RS(a1)); 2092 } else { 2093 tcg_out32(s, opc | RA(a0) | RS(a1)); 2094 tcg_out_bc(s, TCG_COND_NE, 8); 2095 if (const_a2) { 2096 tcg_out_movi(s, type, a0, 0); 2097 } else { 2098 tcg_out_mov(s, type, a0, a2); 2099 } 2100 } 2101 } 2102} 2103 2104static void tcg_out_cmp2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, 2105 TCGArg bl, bool blconst, TCGArg bh, bool bhconst) 2106{ 2107 static const struct { uint8_t bit1, bit2; } bits[] = { 2108 [TCG_COND_LT ] = { CR_LT, CR_LT }, 2109 [TCG_COND_LE ] = { CR_LT, CR_GT }, 2110 [TCG_COND_GT ] = { CR_GT, CR_GT }, 2111 [TCG_COND_GE ] = { CR_GT, CR_LT }, 2112 [TCG_COND_LTU] = { CR_LT, CR_LT }, 2113 [TCG_COND_LEU] = { CR_LT, CR_GT }, 2114 [TCG_COND_GTU] = { CR_GT, CR_GT }, 2115 [TCG_COND_GEU] = { CR_GT, CR_LT }, 2116 }; 2117 2118 TCGCond cond2; 2119 int op, bit1, bit2; 2120 2121 switch (cond) { 2122 case TCG_COND_EQ: 2123 op = CRAND; 2124 goto do_equality; 2125 case TCG_COND_NE: 2126 op = CRNAND; 2127 do_equality: 2128 tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32); 2129 tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32); 2130 tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); 2131 break; 2132 2133 case TCG_COND_TSTEQ: 2134 case TCG_COND_TSTNE: 2135 if (blconst) { 2136 tcg_out_andi32(s, TCG_REG_R0, al, bl); 2137 } else { 2138 tcg_out32(s, AND | SAB(al, TCG_REG_R0, bl)); 2139 } 2140 if (bhconst) { 2141 tcg_out_andi32(s, TCG_REG_TMP1, ah, bh); 2142 } else { 2143 tcg_out32(s, AND | SAB(ah, TCG_REG_TMP1, bh)); 2144 } 2145 tcg_out32(s, OR | SAB(TCG_REG_R0, TCG_REG_R0, TCG_REG_TMP1) | 1); 2146 break; 2147 2148 case TCG_COND_LT: 2149 case TCG_COND_LE: 2150 case TCG_COND_GT: 2151 case TCG_COND_GE: 2152 case TCG_COND_LTU: 2153 case TCG_COND_LEU: 2154 case TCG_COND_GTU: 2155 case TCG_COND_GEU: 2156 bit1 = bits[cond].bit1; 2157 bit2 = bits[cond].bit2; 2158 op = (bit1 != bit2 ? CRANDC : CRAND); 2159 cond2 = tcg_unsigned_cond(cond); 2160 2161 tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32); 2162 tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32); 2163 tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2)); 2164 tcg_out32(s, CROR | BT(0, CR_EQ) | BA(6, bit1) | BB(0, CR_EQ)); 2165 break; 2166 2167 default: 2168 g_assert_not_reached(); 2169 } 2170} 2171 2172static void tgen_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, 2173 TCGReg al, TCGReg ah, 2174 TCGArg bl, bool const_bl, 2175 TCGArg bh, bool const_bh) 2176{ 2177 tcg_out_cmp2(s, cond, al, ah, bl, const_bl, bh, const_bh); 2178 tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(0)); 2179 tcg_out_rlw(s, RLWINM, ret, TCG_REG_R0, CR_EQ + 0*4 + 1, 31, 31); 2180} 2181 2182#if TCG_TARGET_REG_BITS != 32 2183__attribute__((unused)) 2184#endif 2185static const TCGOutOpSetcond2 outop_setcond2 = { 2186 .base.static_constraint = C_O1_I4(r, r, r, rU, rC), 2187 .out = tgen_setcond2, 2188}; 2189 2190static void tgen_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, 2191 TCGArg bl, bool const_bl, 2192 TCGArg bh, bool const_bh, TCGLabel *l) 2193{ 2194 assert(TCG_TARGET_REG_BITS == 32); 2195 tcg_out_cmp2(s, cond, al, ah, bl, const_bl, bh, const_bh); 2196 tcg_out_bc_lab(s, TCG_COND_EQ, l); 2197} 2198 2199#if TCG_TARGET_REG_BITS != 32 2200__attribute__((unused)) 2201#endif 2202static const TCGOutOpBrcond2 outop_brcond2 = { 2203 .base.static_constraint = C_O0_I4(r, r, rU, rC), 2204 .out = tgen_brcond2, 2205}; 2206 2207static void tcg_out_mb(TCGContext *s, TCGArg a0) 2208{ 2209 uint32_t insn; 2210 2211 if (a0 & TCG_MO_ST_LD) { 2212 insn = HWSYNC; 2213 } else { 2214 insn = LWSYNC; 2215 } 2216 2217 tcg_out32(s, insn); 2218} 2219 2220static void tcg_out_call_int(TCGContext *s, int lk, 2221 const tcg_insn_unit *target) 2222{ 2223#ifdef _CALL_AIX 2224 /* Look through the descriptor. If the branch is in range, and we 2225 don't have to spend too much effort on building the toc. */ 2226 const void *tgt = ((const void * const *)target)[0]; 2227 uintptr_t toc = ((const uintptr_t *)target)[1]; 2228 intptr_t diff = tcg_pcrel_diff(s, tgt); 2229 2230 if (in_range_b(diff) && toc == (uint32_t)toc) { 2231 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc); 2232 tcg_out_b(s, lk, tgt); 2233 } else { 2234 /* Fold the low bits of the constant into the addresses below. */ 2235 intptr_t arg = (intptr_t)target; 2236 int ofs = (int16_t)arg; 2237 2238 if (ofs + 8 < 0x8000) { 2239 arg -= ofs; 2240 } else { 2241 ofs = 0; 2242 } 2243 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg); 2244 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs); 2245 tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR); 2246 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP); 2247 tcg_out32(s, BCCTR | BO_ALWAYS | lk); 2248 } 2249#elif defined(_CALL_ELF) && _CALL_ELF == 2 2250 intptr_t diff; 2251 2252 /* In the ELFv2 ABI, we have to set up r12 to contain the destination 2253 address, which the callee uses to compute its TOC address. */ 2254 /* FIXME: when the branch is in range, we could avoid r12 load if we 2255 knew that the destination uses the same TOC, and what its local 2256 entry point offset is. */ 2257 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target); 2258 2259 diff = tcg_pcrel_diff(s, target); 2260 if (in_range_b(diff)) { 2261 tcg_out_b(s, lk, target); 2262 } else { 2263 tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR); 2264 tcg_out32(s, BCCTR | BO_ALWAYS | lk); 2265 } 2266#else 2267 tcg_out_b(s, lk, target); 2268#endif 2269} 2270 2271static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 2272 const TCGHelperInfo *info) 2273{ 2274 tcg_out_call_int(s, LK, target); 2275} 2276 2277static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = { 2278 [MO_UB] = LBZX, 2279 [MO_UW] = LHZX, 2280 [MO_UL] = LWZX, 2281 [MO_UQ] = LDX, 2282 [MO_SW] = LHAX, 2283 [MO_SL] = LWAX, 2284 [MO_BSWAP | MO_UB] = LBZX, 2285 [MO_BSWAP | MO_UW] = LHBRX, 2286 [MO_BSWAP | MO_UL] = LWBRX, 2287 [MO_BSWAP | MO_UQ] = LDBRX, 2288}; 2289 2290static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = { 2291 [MO_UB] = STBX, 2292 [MO_UW] = STHX, 2293 [MO_UL] = STWX, 2294 [MO_UQ] = STDX, 2295 [MO_BSWAP | MO_UB] = STBX, 2296 [MO_BSWAP | MO_UW] = STHBRX, 2297 [MO_BSWAP | MO_UL] = STWBRX, 2298 [MO_BSWAP | MO_UQ] = STDBRX, 2299}; 2300 2301static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg) 2302{ 2303 if (arg < 0) { 2304 arg = TCG_REG_TMP1; 2305 } 2306 tcg_out32(s, MFSPR | RT(arg) | LR); 2307 return arg; 2308} 2309 2310/* 2311 * For the purposes of ppc32 sorting 4 input registers into 4 argument 2312 * registers, there is an outside chance we would require 3 temps. 2313 */ 2314static const TCGLdstHelperParam ldst_helper_param = { 2315 .ra_gen = ldst_ra_gen, 2316 .ntmp = 3, 2317 .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 } 2318}; 2319 2320static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 2321{ 2322 MemOp opc = get_memop(lb->oi); 2323 2324 if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 2325 return false; 2326 } 2327 2328 tcg_out_ld_helper_args(s, lb, &ldst_helper_param); 2329 tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]); 2330 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); 2331 2332 tcg_out_b(s, 0, lb->raddr); 2333 return true; 2334} 2335 2336static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) 2337{ 2338 MemOp opc = get_memop(lb->oi); 2339 2340 if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { 2341 return false; 2342 } 2343 2344 tcg_out_st_helper_args(s, lb, &ldst_helper_param); 2345 tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]); 2346 2347 tcg_out_b(s, 0, lb->raddr); 2348 return true; 2349} 2350 2351typedef struct { 2352 TCGReg base; 2353 TCGReg index; 2354 TCGAtomAlign aa; 2355} HostAddress; 2356 2357bool tcg_target_has_memory_bswap(MemOp memop) 2358{ 2359 TCGAtomAlign aa; 2360 2361 if ((memop & MO_SIZE) <= MO_64) { 2362 return true; 2363 } 2364 2365 /* 2366 * Reject 16-byte memop with 16-byte atomicity, 2367 * but do allow a pair of 64-bit operations. 2368 */ 2369 aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true); 2370 return aa.atom <= MO_64; 2371} 2372 2373/* We expect to use a 16-bit negative offset from ENV. */ 2374#define MIN_TLB_MASK_TABLE_OFS -32768 2375 2376/* 2377 * For system-mode, perform the TLB load and compare. 2378 * For user-mode, perform any required alignment tests. 2379 * In both cases, return a TCGLabelQemuLdst structure if the slow path 2380 * is required and fill in @h with the host address for the fast path. 2381 */ 2382static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, 2383 TCGReg addr, MemOpIdx oi, bool is_ld) 2384{ 2385 TCGType addr_type = s->addr_type; 2386 TCGLabelQemuLdst *ldst = NULL; 2387 MemOp opc = get_memop(oi); 2388 MemOp a_bits, s_bits; 2389 2390 /* 2391 * Book II, Section 1.4, Single-Copy Atomicity, specifies: 2392 * 2393 * Before 3.0, "An access that is not atomic is performed as a set of 2394 * smaller disjoint atomic accesses. In general, the number and alignment 2395 * of these accesses are implementation-dependent." Thus MO_ATOM_IFALIGN. 2396 * 2397 * As of 3.0, "the non-atomic access is performed as described in 2398 * the corresponding list", which matches MO_ATOM_SUBALIGN. 2399 */ 2400 s_bits = opc & MO_SIZE; 2401 h->aa = atom_and_align_for_opc(s, opc, 2402 have_isa_3_00 ? MO_ATOM_SUBALIGN 2403 : MO_ATOM_IFALIGN, 2404 s_bits == MO_128); 2405 a_bits = h->aa.align; 2406 2407 if (tcg_use_softmmu) { 2408 int mem_index = get_mmuidx(oi); 2409 int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read) 2410 : offsetof(CPUTLBEntry, addr_write); 2411 int fast_off = tlb_mask_table_ofs(s, mem_index); 2412 int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); 2413 int table_off = fast_off + offsetof(CPUTLBDescFast, table); 2414 2415 ldst = new_ldst_label(s); 2416 ldst->is_ld = is_ld; 2417 ldst->oi = oi; 2418 ldst->addr_reg = addr; 2419 2420 /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ 2421 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off); 2422 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off); 2423 2424 /* Extract the page index, shifted into place for tlb index. */ 2425 if (TCG_TARGET_REG_BITS == 32) { 2426 tcg_out_shri32(s, TCG_REG_R0, addr, 2427 s->page_bits - CPU_TLB_ENTRY_BITS); 2428 } else { 2429 tcg_out_shri64(s, TCG_REG_R0, addr, 2430 s->page_bits - CPU_TLB_ENTRY_BITS); 2431 } 2432 tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0)); 2433 2434 /* 2435 * Load the TLB comparator into TMP2. 2436 * For 64-bit host, always load the entire 64-bit slot for simplicity. 2437 * We will ignore the high bits with tcg_out_cmp(..., addr_type). 2438 */ 2439 if (cmp_off == 0) { 2440 tcg_out32(s, (TCG_TARGET_REG_BITS == 64 ? LDUX : LWZUX) 2441 | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2)); 2442 } else { 2443 tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2)); 2444 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off); 2445 } 2446 2447 /* 2448 * Load the TLB addend for use on the fast path. 2449 * Do this asap to minimize any load use delay. 2450 */ 2451 if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) { 2452 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, 2453 offsetof(CPUTLBEntry, addend)); 2454 } 2455 2456 /* Clear the non-page, non-alignment bits from the address in R0. */ 2457 if (TCG_TARGET_REG_BITS == 32) { 2458 /* 2459 * We don't support unaligned accesses on 32-bits. 2460 * Preserve the bottom bits and thus trigger a comparison 2461 * failure on unaligned accesses. 2462 */ 2463 if (a_bits < s_bits) { 2464 a_bits = s_bits; 2465 } 2466 tcg_out_rlw(s, RLWINM, TCG_REG_R0, addr, 0, 2467 (32 - a_bits) & 31, 31 - s->page_bits); 2468 } else { 2469 TCGReg t = addr; 2470 2471 /* 2472 * If the access is unaligned, we need to make sure we fail if we 2473 * cross a page boundary. The trick is to add the access size-1 2474 * to the address before masking the low bits. That will make the 2475 * address overflow to the next page if we cross a page boundary, 2476 * which will then force a mismatch of the TLB compare. 2477 */ 2478 if (a_bits < s_bits) { 2479 unsigned a_mask = (1 << a_bits) - 1; 2480 unsigned s_mask = (1 << s_bits) - 1; 2481 tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask)); 2482 t = TCG_REG_R0; 2483 } 2484 2485 /* Mask the address for the requested alignment. */ 2486 if (addr_type == TCG_TYPE_I32) { 2487 tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0, 2488 (32 - a_bits) & 31, 31 - s->page_bits); 2489 } else if (a_bits == 0) { 2490 tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits); 2491 } else { 2492 tcg_out_rld(s, RLDICL, TCG_REG_R0, t, 2493 64 - s->page_bits, s->page_bits - a_bits); 2494 tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0); 2495 } 2496 } 2497 2498 /* Full comparison into cr0. */ 2499 tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 0, 0, addr_type); 2500 2501 /* Load a pointer into the current opcode w/conditional branch-link. */ 2502 ldst->label_ptr[0] = s->code_ptr; 2503 tcg_out_bc(s, TCG_COND_NE, LK); 2504 2505 h->base = TCG_REG_TMP1; 2506 } else { 2507 if (a_bits) { 2508 ldst = new_ldst_label(s); 2509 ldst->is_ld = is_ld; 2510 ldst->oi = oi; 2511 ldst->addr_reg = addr; 2512 2513 /* We are expecting a_bits to max out at 7, much lower than ANDI. */ 2514 tcg_debug_assert(a_bits < 16); 2515 tcg_out32(s, ANDI | SAI(addr, TCG_REG_R0, (1 << a_bits) - 1)); 2516 2517 ldst->label_ptr[0] = s->code_ptr; 2518 tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK); 2519 } 2520 2521 h->base = guest_base ? TCG_GUEST_BASE_REG : 0; 2522 } 2523 2524 if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) { 2525 /* Zero-extend the guest address for use in the host address. */ 2526 tcg_out_ext32u(s, TCG_REG_TMP2, addr); 2527 h->index = TCG_REG_TMP2; 2528 } else { 2529 h->index = addr; 2530 } 2531 2532 return ldst; 2533} 2534 2535static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, 2536 TCGReg addr, MemOpIdx oi, TCGType data_type) 2537{ 2538 MemOp opc = get_memop(oi); 2539 TCGLabelQemuLdst *ldst; 2540 HostAddress h; 2541 2542 ldst = prepare_host_addr(s, &h, addr, oi, true); 2543 2544 if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { 2545 if (opc & MO_BSWAP) { 2546 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2547 tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index)); 2548 tcg_out32(s, LWBRX | TAB(datahi, h.base, TCG_REG_R0)); 2549 } else if (h.base != 0) { 2550 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2551 tcg_out32(s, LWZX | TAB(datahi, h.base, h.index)); 2552 tcg_out32(s, LWZX | TAB(datalo, h.base, TCG_REG_R0)); 2553 } else if (h.index == datahi) { 2554 tcg_out32(s, LWZ | TAI(datalo, h.index, 4)); 2555 tcg_out32(s, LWZ | TAI(datahi, h.index, 0)); 2556 } else { 2557 tcg_out32(s, LWZ | TAI(datahi, h.index, 0)); 2558 tcg_out32(s, LWZ | TAI(datalo, h.index, 4)); 2559 } 2560 } else { 2561 uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)]; 2562 if (!have_isa_2_06 && insn == LDBRX) { 2563 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2564 tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index)); 2565 tcg_out32(s, LWBRX | TAB(TCG_REG_R0, h.base, TCG_REG_R0)); 2566 tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0); 2567 } else if (insn) { 2568 tcg_out32(s, insn | TAB(datalo, h.base, h.index)); 2569 } else { 2570 insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)]; 2571 tcg_out32(s, insn | TAB(datalo, h.base, h.index)); 2572 tcg_out_movext(s, TCG_TYPE_REG, datalo, 2573 TCG_TYPE_REG, opc & MO_SSIZE, datalo); 2574 } 2575 } 2576 2577 if (ldst) { 2578 ldst->type = data_type; 2579 ldst->datalo_reg = datalo; 2580 ldst->datahi_reg = datahi; 2581 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2582 } 2583} 2584 2585static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, 2586 TCGReg addr, MemOpIdx oi, TCGType data_type) 2587{ 2588 MemOp opc = get_memop(oi); 2589 TCGLabelQemuLdst *ldst; 2590 HostAddress h; 2591 2592 ldst = prepare_host_addr(s, &h, addr, oi, false); 2593 2594 if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { 2595 if (opc & MO_BSWAP) { 2596 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2597 tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index)); 2598 tcg_out32(s, STWBRX | SAB(datahi, h.base, TCG_REG_R0)); 2599 } else if (h.base != 0) { 2600 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); 2601 tcg_out32(s, STWX | SAB(datahi, h.base, h.index)); 2602 tcg_out32(s, STWX | SAB(datalo, h.base, TCG_REG_R0)); 2603 } else { 2604 tcg_out32(s, STW | TAI(datahi, h.index, 0)); 2605 tcg_out32(s, STW | TAI(datalo, h.index, 4)); 2606 } 2607 } else { 2608 uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)]; 2609 if (!have_isa_2_06 && insn == STDBRX) { 2610 tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index)); 2611 tcg_out32(s, ADDI | TAI(TCG_REG_TMP2, h.index, 4)); 2612 tcg_out_shri64(s, TCG_REG_R0, datalo, 32); 2613 tcg_out32(s, STWBRX | SAB(TCG_REG_R0, h.base, TCG_REG_TMP2)); 2614 } else { 2615 tcg_out32(s, insn | SAB(datalo, h.base, h.index)); 2616 } 2617 } 2618 2619 if (ldst) { 2620 ldst->type = data_type; 2621 ldst->datalo_reg = datalo; 2622 ldst->datahi_reg = datahi; 2623 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2624 } 2625} 2626 2627static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, 2628 TCGReg addr_reg, MemOpIdx oi, bool is_ld) 2629{ 2630 TCGLabelQemuLdst *ldst; 2631 HostAddress h; 2632 bool need_bswap; 2633 uint32_t insn; 2634 TCGReg index; 2635 2636 ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld); 2637 2638 /* Compose the final address, as LQ/STQ have no indexing. */ 2639 index = h.index; 2640 if (h.base != 0) { 2641 index = TCG_REG_TMP1; 2642 tcg_out32(s, ADD | TAB(index, h.base, h.index)); 2643 } 2644 need_bswap = get_memop(oi) & MO_BSWAP; 2645 2646 if (h.aa.atom == MO_128) { 2647 tcg_debug_assert(!need_bswap); 2648 tcg_debug_assert(datalo & 1); 2649 tcg_debug_assert(datahi == datalo - 1); 2650 tcg_debug_assert(!is_ld || datahi != index); 2651 insn = is_ld ? LQ : STQ; 2652 tcg_out32(s, insn | TAI(datahi, index, 0)); 2653 } else { 2654 TCGReg d1, d2; 2655 2656 if (HOST_BIG_ENDIAN ^ need_bswap) { 2657 d1 = datahi, d2 = datalo; 2658 } else { 2659 d1 = datalo, d2 = datahi; 2660 } 2661 2662 if (need_bswap) { 2663 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8); 2664 insn = is_ld ? LDBRX : STDBRX; 2665 tcg_out32(s, insn | TAB(d1, 0, index)); 2666 tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0)); 2667 } else { 2668 insn = is_ld ? LD : STD; 2669 tcg_out32(s, insn | TAI(d1, index, 0)); 2670 tcg_out32(s, insn | TAI(d2, index, 8)); 2671 } 2672 } 2673 2674 if (ldst) { 2675 ldst->type = TCG_TYPE_I128; 2676 ldst->datalo_reg = datalo; 2677 ldst->datahi_reg = datahi; 2678 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); 2679 } 2680} 2681 2682static void tcg_out_nop_fill(tcg_insn_unit *p, int count) 2683{ 2684 int i; 2685 for (i = 0; i < count; ++i) { 2686 p[i] = NOP; 2687 } 2688} 2689 2690/* Parameters for function call generation, used in tcg.c. */ 2691#define TCG_TARGET_STACK_ALIGN 16 2692 2693#ifdef _CALL_AIX 2694# define LINK_AREA_SIZE (6 * SZR) 2695# define LR_OFFSET (1 * SZR) 2696# define TCG_TARGET_CALL_STACK_OFFSET (LINK_AREA_SIZE + 8 * SZR) 2697#elif defined(_CALL_DARWIN) 2698# define LINK_AREA_SIZE (6 * SZR) 2699# define LR_OFFSET (2 * SZR) 2700#elif TCG_TARGET_REG_BITS == 64 2701# if defined(_CALL_ELF) && _CALL_ELF == 2 2702# define LINK_AREA_SIZE (4 * SZR) 2703# define LR_OFFSET (1 * SZR) 2704# endif 2705#else /* TCG_TARGET_REG_BITS == 32 */ 2706# if defined(_CALL_SYSV) 2707# define LINK_AREA_SIZE (2 * SZR) 2708# define LR_OFFSET (1 * SZR) 2709# endif 2710#endif 2711#ifndef LR_OFFSET 2712# error "Unhandled abi" 2713#endif 2714#ifndef TCG_TARGET_CALL_STACK_OFFSET 2715# define TCG_TARGET_CALL_STACK_OFFSET LINK_AREA_SIZE 2716#endif 2717 2718#define CPU_TEMP_BUF_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) 2719#define REG_SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR) 2720 2721#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET \ 2722 + TCG_STATIC_CALL_ARGS_SIZE \ 2723 + CPU_TEMP_BUF_SIZE \ 2724 + REG_SAVE_SIZE \ 2725 + TCG_TARGET_STACK_ALIGN - 1) \ 2726 & -TCG_TARGET_STACK_ALIGN) 2727 2728#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE) 2729 2730static void tcg_target_qemu_prologue(TCGContext *s) 2731{ 2732 int i; 2733 2734#ifdef _CALL_AIX 2735 const void **desc = (const void **)s->code_ptr; 2736 desc[0] = tcg_splitwx_to_rx(desc + 2); /* entry point */ 2737 desc[1] = 0; /* environment pointer */ 2738 s->code_ptr = (void *)(desc + 2); /* skip over descriptor */ 2739#endif 2740 2741 tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE, 2742 CPU_TEMP_BUF_SIZE); 2743 2744 /* Prologue */ 2745 tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR); 2746 tcg_out32(s, (SZR == 8 ? STDU : STWU) 2747 | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE)); 2748 2749 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { 2750 tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 2751 TCG_REG_R1, REG_SAVE_BOT + i * SZR); 2752 } 2753 tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); 2754 2755 if (!tcg_use_softmmu && guest_base) { 2756 tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true); 2757 tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); 2758 } 2759 2760 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 2761 tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR); 2762 tcg_out32(s, BCCTR | BO_ALWAYS); 2763 2764 /* Epilogue */ 2765 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); 2766 2767 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); 2768 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { 2769 tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], 2770 TCG_REG_R1, REG_SAVE_BOT + i * SZR); 2771 } 2772 tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR); 2773 tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE)); 2774 tcg_out32(s, BCLR | BO_ALWAYS); 2775} 2776 2777static void tcg_out_tb_start(TCGContext *s) 2778{ 2779 /* Load TCG_REG_TB. */ 2780 if (USE_REG_TB) { 2781 if (have_isa_3_00) { 2782 /* lnia REG_TB */ 2783 tcg_out_addpcis(s, TCG_REG_TB, 0); 2784 } else { 2785 /* bcl 20,31,$+4 (preferred form for getting nia) */ 2786 tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK); 2787 tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR); 2788 } 2789 } 2790} 2791 2792static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) 2793{ 2794 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg); 2795 tcg_out_b(s, 0, tcg_code_gen_epilogue); 2796} 2797 2798static void tcg_out_goto_tb(TCGContext *s, int which) 2799{ 2800 uintptr_t ptr = get_jmp_target_addr(s, which); 2801 int16_t lo; 2802 2803 /* Direct branch will be patched by tb_target_set_jmp_target. */ 2804 set_jmp_insn_offset(s, which); 2805 tcg_out32(s, NOP); 2806 2807 /* When branch is out of range, fall through to indirect. */ 2808 if (USE_REG_TB) { 2809 ptrdiff_t offset = ppc_tbrel_diff(s, (void *)ptr); 2810 tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset); 2811 } else if (have_isa_3_10) { 2812 ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr); 2813 tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1); 2814 } else if (have_isa_3_00) { 2815 ptrdiff_t offset = tcg_pcrel_diff(s, (void *)ptr) - 4; 2816 lo = offset; 2817 tcg_out_addpcis(s, TCG_REG_TMP1, offset - lo); 2818 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo); 2819 } else { 2820 lo = ptr; 2821 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - lo); 2822 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo); 2823 } 2824 2825 tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); 2826 tcg_out32(s, BCCTR | BO_ALWAYS); 2827 set_jmp_reset_offset(s, which); 2828} 2829 2830void tb_target_set_jmp_target(const TranslationBlock *tb, int n, 2831 uintptr_t jmp_rx, uintptr_t jmp_rw) 2832{ 2833 uintptr_t addr = tb->jmp_target_addr[n]; 2834 intptr_t diff = addr - jmp_rx; 2835 tcg_insn_unit insn; 2836 2837 if (in_range_b(diff)) { 2838 insn = B | (diff & 0x3fffffc); 2839 } else { 2840 insn = NOP; 2841 } 2842 2843 qatomic_set((uint32_t *)jmp_rw, insn); 2844 flush_idcache_range(jmp_rx, jmp_rw, 4); 2845} 2846 2847 2848static void tgen_add(TCGContext *s, TCGType type, 2849 TCGReg a0, TCGReg a1, TCGReg a2) 2850{ 2851 tcg_out32(s, ADD | TAB(a0, a1, a2)); 2852} 2853 2854static void tgen_addi(TCGContext *s, TCGType type, 2855 TCGReg a0, TCGReg a1, tcg_target_long a2) 2856{ 2857 tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2); 2858} 2859 2860static const TCGOutOpBinary outop_add = { 2861 .base.static_constraint = C_O1_I2(r, r, rT), 2862 .out_rrr = tgen_add, 2863 .out_rri = tgen_addi, 2864}; 2865 2866static void tgen_and(TCGContext *s, TCGType type, 2867 TCGReg a0, TCGReg a1, TCGReg a2) 2868{ 2869 tcg_out32(s, AND | SAB(a1, a0, a2)); 2870} 2871 2872static void tgen_andi(TCGContext *s, TCGType type, 2873 TCGReg a0, TCGReg a1, tcg_target_long a2) 2874{ 2875 if (type == TCG_TYPE_I32) { 2876 tcg_out_andi32(s, a0, a1, a2); 2877 } else { 2878 tcg_out_andi64(s, a0, a1, a2); 2879 } 2880} 2881 2882static const TCGOutOpBinary outop_and = { 2883 .base.static_constraint = C_O1_I2(r, r, ri), 2884 .out_rrr = tgen_and, 2885 .out_rri = tgen_andi, 2886}; 2887 2888static void tgen_andc(TCGContext *s, TCGType type, 2889 TCGReg a0, TCGReg a1, TCGReg a2) 2890{ 2891 tcg_out32(s, ANDC | SAB(a1, a0, a2)); 2892} 2893 2894static const TCGOutOpBinary outop_andc = { 2895 .base.static_constraint = C_O1_I2(r, r, r), 2896 .out_rrr = tgen_andc, 2897}; 2898 2899static void tgen_clz(TCGContext *s, TCGType type, 2900 TCGReg a0, TCGReg a1, TCGReg a2) 2901{ 2902 uint32_t insn = type == TCG_TYPE_I32 ? CNTLZW : CNTLZD; 2903 tcg_out_cntxz(s, type, insn, a0, a1, a2, false); 2904} 2905 2906static void tgen_clzi(TCGContext *s, TCGType type, 2907 TCGReg a0, TCGReg a1, tcg_target_long a2) 2908{ 2909 uint32_t insn = type == TCG_TYPE_I32 ? CNTLZW : CNTLZD; 2910 tcg_out_cntxz(s, type, insn, a0, a1, a2, true); 2911} 2912 2913static const TCGOutOpBinary outop_clz = { 2914 .base.static_constraint = C_O1_I2(r, r, rZW), 2915 .out_rrr = tgen_clz, 2916 .out_rri = tgen_clzi, 2917}; 2918 2919static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) 2920{ 2921 uint32_t insn = type == TCG_TYPE_I32 ? CNTPOPW : CNTPOPD; 2922 tcg_out32(s, insn | SAB(a1, a0, 0)); 2923} 2924 2925static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags) 2926{ 2927 return have_isa_2_06 ? C_O1_I1(r, r) : C_NotImplemented; 2928} 2929 2930static const TCGOutOpUnary outop_ctpop = { 2931 .base.static_constraint = C_Dynamic, 2932 .base.dynamic_constraint = cset_ctpop, 2933 .out_rr = tgen_ctpop, 2934}; 2935 2936static void tgen_ctz(TCGContext *s, TCGType type, 2937 TCGReg a0, TCGReg a1, TCGReg a2) 2938{ 2939 uint32_t insn = type == TCG_TYPE_I32 ? CNTTZW : CNTTZD; 2940 tcg_out_cntxz(s, type, insn, a0, a1, a2, false); 2941} 2942 2943static void tgen_ctzi(TCGContext *s, TCGType type, 2944 TCGReg a0, TCGReg a1, tcg_target_long a2) 2945{ 2946 uint32_t insn = type == TCG_TYPE_I32 ? CNTTZW : CNTTZD; 2947 tcg_out_cntxz(s, type, insn, a0, a1, a2, true); 2948} 2949 2950static TCGConstraintSetIndex cset_ctz(TCGType type, unsigned flags) 2951{ 2952 return have_isa_3_00 ? C_O1_I2(r, r, rZW) : C_NotImplemented; 2953} 2954 2955static const TCGOutOpBinary outop_ctz = { 2956 .base.static_constraint = C_Dynamic, 2957 .base.dynamic_constraint = cset_ctz, 2958 .out_rrr = tgen_ctz, 2959 .out_rri = tgen_ctzi, 2960}; 2961 2962static void tgen_eqv(TCGContext *s, TCGType type, 2963 TCGReg a0, TCGReg a1, TCGReg a2) 2964{ 2965 tcg_out32(s, EQV | SAB(a1, a0, a2)); 2966} 2967 2968#if TCG_TARGET_REG_BITS == 64 2969static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) 2970{ 2971 tcg_out_shri64(s, a0, a1, 32); 2972} 2973 2974static const TCGOutOpUnary outop_extrh_i64_i32 = { 2975 .base.static_constraint = C_O1_I1(r, r), 2976 .out_rr = tgen_extrh_i64_i32, 2977}; 2978#endif 2979 2980static void tgen_divs(TCGContext *s, TCGType type, 2981 TCGReg a0, TCGReg a1, TCGReg a2) 2982{ 2983 uint32_t insn = type == TCG_TYPE_I32 ? DIVW : DIVD; 2984 tcg_out32(s, insn | TAB(a0, a1, a2)); 2985} 2986 2987static const TCGOutOpBinary outop_divs = { 2988 .base.static_constraint = C_O1_I2(r, r, r), 2989 .out_rrr = tgen_divs, 2990}; 2991 2992static const TCGOutOpDivRem outop_divs2 = { 2993 .base.static_constraint = C_NotImplemented, 2994}; 2995 2996static void tgen_divu(TCGContext *s, TCGType type, 2997 TCGReg a0, TCGReg a1, TCGReg a2) 2998{ 2999 uint32_t insn = type == TCG_TYPE_I32 ? DIVWU : DIVDU; 3000 tcg_out32(s, insn | TAB(a0, a1, a2)); 3001} 3002 3003static const TCGOutOpBinary outop_divu = { 3004 .base.static_constraint = C_O1_I2(r, r, r), 3005 .out_rrr = tgen_divu, 3006}; 3007 3008static const TCGOutOpDivRem outop_divu2 = { 3009 .base.static_constraint = C_NotImplemented, 3010}; 3011 3012static const TCGOutOpBinary outop_eqv = { 3013 .base.static_constraint = C_O1_I2(r, r, r), 3014 .out_rrr = tgen_eqv, 3015}; 3016 3017static void tgen_mul(TCGContext *s, TCGType type, 3018 TCGReg a0, TCGReg a1, TCGReg a2) 3019{ 3020 uint32_t insn = type == TCG_TYPE_I32 ? MULLW : MULLD; 3021 tcg_out32(s, insn | TAB(a0, a1, a2)); 3022} 3023 3024static void tgen_muli(TCGContext *s, TCGType type, 3025 TCGReg a0, TCGReg a1, tcg_target_long a2) 3026{ 3027 tcg_out32(s, MULLI | TAI(a0, a1, a2)); 3028} 3029 3030static const TCGOutOpBinary outop_mul = { 3031 .base.static_constraint = C_O1_I2(r, r, rI), 3032 .out_rrr = tgen_mul, 3033 .out_rri = tgen_muli, 3034}; 3035 3036static const TCGOutOpMul2 outop_muls2 = { 3037 .base.static_constraint = C_NotImplemented, 3038}; 3039 3040static void tgen_mulsh(TCGContext *s, TCGType type, 3041 TCGReg a0, TCGReg a1, TCGReg a2) 3042{ 3043 uint32_t insn = type == TCG_TYPE_I32 ? MULHW : MULHD; 3044 tcg_out32(s, insn | TAB(a0, a1, a2)); 3045} 3046 3047static const TCGOutOpBinary outop_mulsh = { 3048 .base.static_constraint = C_O1_I2(r, r, r), 3049 .out_rrr = tgen_mulsh, 3050}; 3051 3052static const TCGOutOpMul2 outop_mulu2 = { 3053 .base.static_constraint = C_NotImplemented, 3054}; 3055 3056static void tgen_muluh(TCGContext *s, TCGType type, 3057 TCGReg a0, TCGReg a1, TCGReg a2) 3058{ 3059 uint32_t insn = type == TCG_TYPE_I32 ? MULHWU : MULHDU; 3060 tcg_out32(s, insn | TAB(a0, a1, a2)); 3061} 3062 3063static const TCGOutOpBinary outop_muluh = { 3064 .base.static_constraint = C_O1_I2(r, r, r), 3065 .out_rrr = tgen_muluh, 3066}; 3067 3068static void tgen_nand(TCGContext *s, TCGType type, 3069 TCGReg a0, TCGReg a1, TCGReg a2) 3070{ 3071 tcg_out32(s, NAND | SAB(a1, a0, a2)); 3072} 3073 3074static const TCGOutOpBinary outop_nand = { 3075 .base.static_constraint = C_O1_I2(r, r, r), 3076 .out_rrr = tgen_nand, 3077}; 3078 3079static void tgen_nor(TCGContext *s, TCGType type, 3080 TCGReg a0, TCGReg a1, TCGReg a2) 3081{ 3082 tcg_out32(s, NOR | SAB(a1, a0, a2)); 3083} 3084 3085static const TCGOutOpBinary outop_nor = { 3086 .base.static_constraint = C_O1_I2(r, r, r), 3087 .out_rrr = tgen_nor, 3088}; 3089 3090static void tgen_or(TCGContext *s, TCGType type, 3091 TCGReg a0, TCGReg a1, TCGReg a2) 3092{ 3093 tcg_out32(s, OR | SAB(a1, a0, a2)); 3094} 3095 3096static void tgen_ori(TCGContext *s, TCGType type, 3097 TCGReg a0, TCGReg a1, tcg_target_long a2) 3098{ 3099 tcg_out_ori32(s, a0, a1, a2); 3100} 3101 3102static const TCGOutOpBinary outop_or = { 3103 .base.static_constraint = C_O1_I2(r, r, rU), 3104 .out_rrr = tgen_or, 3105 .out_rri = tgen_ori, 3106}; 3107 3108static void tgen_orc(TCGContext *s, TCGType type, 3109 TCGReg a0, TCGReg a1, TCGReg a2) 3110{ 3111 tcg_out32(s, ORC | SAB(a1, a0, a2)); 3112} 3113 3114static const TCGOutOpBinary outop_orc = { 3115 .base.static_constraint = C_O1_I2(r, r, r), 3116 .out_rrr = tgen_orc, 3117}; 3118 3119static TCGConstraintSetIndex cset_mod(TCGType type, unsigned flags) 3120{ 3121 return have_isa_3_00 ? C_O1_I2(r, r, r) : C_NotImplemented; 3122} 3123 3124static void tgen_rems(TCGContext *s, TCGType type, 3125 TCGReg a0, TCGReg a1, TCGReg a2) 3126{ 3127 uint32_t insn = type == TCG_TYPE_I32 ? MODSW : MODSD; 3128 tcg_out32(s, insn | TAB(a0, a1, a2)); 3129} 3130 3131static const TCGOutOpBinary outop_rems = { 3132 .base.static_constraint = C_Dynamic, 3133 .base.dynamic_constraint = cset_mod, 3134 .out_rrr = tgen_rems, 3135}; 3136 3137static void tgen_remu(TCGContext *s, TCGType type, 3138 TCGReg a0, TCGReg a1, TCGReg a2) 3139{ 3140 uint32_t insn = type == TCG_TYPE_I32 ? MODUW : MODUD; 3141 tcg_out32(s, insn | TAB(a0, a1, a2)); 3142} 3143 3144static const TCGOutOpBinary outop_remu = { 3145 .base.static_constraint = C_Dynamic, 3146 .base.dynamic_constraint = cset_mod, 3147 .out_rrr = tgen_remu, 3148}; 3149 3150static void tgen_rotl(TCGContext *s, TCGType type, 3151 TCGReg a0, TCGReg a1, TCGReg a2) 3152{ 3153 if (type == TCG_TYPE_I32) { 3154 tcg_out32(s, RLWNM | SAB(a1, a0, a2) | MB(0) | ME(31)); 3155 } else { 3156 tcg_out32(s, RLDCL | SAB(a1, a0, a2) | MB64(0)); 3157 } 3158} 3159 3160static void tgen_rotli(TCGContext *s, TCGType type, 3161 TCGReg a0, TCGReg a1, tcg_target_long a2) 3162{ 3163 if (type == TCG_TYPE_I32) { 3164 tcg_out_rlw(s, RLWINM, a0, a1, a2, 0, 31); 3165 } else { 3166 tcg_out_rld(s, RLDICL, a0, a1, a2, 0); 3167 } 3168} 3169 3170static const TCGOutOpBinary outop_rotl = { 3171 .base.static_constraint = C_O1_I2(r, r, ri), 3172 .out_rrr = tgen_rotl, 3173 .out_rri = tgen_rotli, 3174}; 3175 3176static const TCGOutOpBinary outop_rotr = { 3177 .base.static_constraint = C_NotImplemented, 3178}; 3179 3180static void tgen_sar(TCGContext *s, TCGType type, 3181 TCGReg a0, TCGReg a1, TCGReg a2) 3182{ 3183 uint32_t insn = type == TCG_TYPE_I32 ? SRAW : SRAD; 3184 tcg_out32(s, insn | SAB(a1, a0, a2)); 3185} 3186 3187static void tgen_sari(TCGContext *s, TCGType type, 3188 TCGReg a0, TCGReg a1, tcg_target_long a2) 3189{ 3190 /* Limit immediate shift count lest we create an illegal insn. */ 3191 if (type == TCG_TYPE_I32) { 3192 tcg_out_sari32(s, a0, a1, a2 & 31); 3193 } else { 3194 tcg_out_sari64(s, a0, a1, a2 & 63); 3195 } 3196} 3197 3198static const TCGOutOpBinary outop_sar = { 3199 .base.static_constraint = C_O1_I2(r, r, ri), 3200 .out_rrr = tgen_sar, 3201 .out_rri = tgen_sari, 3202}; 3203 3204static void tgen_shl(TCGContext *s, TCGType type, 3205 TCGReg a0, TCGReg a1, TCGReg a2) 3206{ 3207 uint32_t insn = type == TCG_TYPE_I32 ? SLW : SLD; 3208 tcg_out32(s, insn | SAB(a1, a0, a2)); 3209} 3210 3211static void tgen_shli(TCGContext *s, TCGType type, 3212 TCGReg a0, TCGReg a1, tcg_target_long a2) 3213{ 3214 /* Limit immediate shift count lest we create an illegal insn. */ 3215 if (type == TCG_TYPE_I32) { 3216 tcg_out_shli32(s, a0, a1, a2 & 31); 3217 } else { 3218 tcg_out_shli64(s, a0, a1, a2 & 63); 3219 } 3220} 3221 3222static const TCGOutOpBinary outop_shl = { 3223 .base.static_constraint = C_O1_I2(r, r, ri), 3224 .out_rrr = tgen_shl, 3225 .out_rri = tgen_shli, 3226}; 3227 3228static void tgen_shr(TCGContext *s, TCGType type, 3229 TCGReg a0, TCGReg a1, TCGReg a2) 3230{ 3231 uint32_t insn = type == TCG_TYPE_I32 ? SRW : SRD; 3232 tcg_out32(s, insn | SAB(a1, a0, a2)); 3233} 3234 3235static void tgen_shri(TCGContext *s, TCGType type, 3236 TCGReg a0, TCGReg a1, tcg_target_long a2) 3237{ 3238 /* Limit immediate shift count lest we create an illegal insn. */ 3239 if (type == TCG_TYPE_I32) { 3240 tcg_out_shri32(s, a0, a1, a2 & 31); 3241 } else { 3242 tcg_out_shri64(s, a0, a1, a2 & 63); 3243 } 3244} 3245 3246static const TCGOutOpBinary outop_shr = { 3247 .base.static_constraint = C_O1_I2(r, r, ri), 3248 .out_rrr = tgen_shr, 3249 .out_rri = tgen_shri, 3250}; 3251 3252static void tgen_sub(TCGContext *s, TCGType type, 3253 TCGReg a0, TCGReg a1, TCGReg a2) 3254{ 3255 tcg_out32(s, SUBF | TAB(a0, a2, a1)); 3256} 3257 3258static void tgen_subfi(TCGContext *s, TCGType type, 3259 TCGReg a0, tcg_target_long a1, TCGReg a2) 3260{ 3261 tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); 3262} 3263 3264static const TCGOutOpSubtract outop_sub = { 3265 .base.static_constraint = C_O1_I2(r, rI, r), 3266 .out_rrr = tgen_sub, 3267 .out_rir = tgen_subfi, 3268}; 3269 3270static void tgen_xor(TCGContext *s, TCGType type, 3271 TCGReg a0, TCGReg a1, TCGReg a2) 3272{ 3273 tcg_out32(s, XOR | SAB(a1, a0, a2)); 3274} 3275 3276static void tgen_xori(TCGContext *s, TCGType type, 3277 TCGReg a0, TCGReg a1, tcg_target_long a2) 3278{ 3279 tcg_out_xori32(s, a0, a1, a2); 3280} 3281 3282static const TCGOutOpBinary outop_xor = { 3283 .base.static_constraint = C_O1_I2(r, r, rU), 3284 .out_rrr = tgen_xor, 3285 .out_rri = tgen_xori, 3286}; 3287 3288static void tgen_bswap16(TCGContext *s, TCGType type, 3289 TCGReg dst, TCGReg src, unsigned flags) 3290{ 3291 TCGReg tmp = dst == src ? TCG_REG_R0 : dst; 3292 3293 if (have_isa_3_10) { 3294 tcg_out32(s, BRH | RA(dst) | RS(src)); 3295 if (flags & TCG_BSWAP_OS) { 3296 tcg_out_ext16s(s, TCG_TYPE_REG, dst, dst); 3297 } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 3298 tcg_out_ext16u(s, dst, dst); 3299 } 3300 return; 3301 } 3302 3303 /* 3304 * In the following, 3305 * dep(a, b, m) -> (a & ~m) | (b & m) 3306 * 3307 * Begin with: src = xxxxabcd 3308 */ 3309 /* tmp = rol32(src, 24) & 0x000000ff = 0000000c */ 3310 tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31); 3311 /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00) = 000000dc */ 3312 tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23); 3313 3314 if (flags & TCG_BSWAP_OS) { 3315 tcg_out_ext16s(s, TCG_TYPE_REG, dst, tmp); 3316 } else { 3317 tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); 3318 } 3319} 3320 3321static const TCGOutOpBswap outop_bswap16 = { 3322 .base.static_constraint = C_O1_I1(r, r), 3323 .out_rr = tgen_bswap16, 3324}; 3325 3326static void tgen_bswap32(TCGContext *s, TCGType type, 3327 TCGReg dst, TCGReg src, unsigned flags) 3328{ 3329 TCGReg tmp = dst == src ? TCG_REG_R0 : dst; 3330 3331 if (have_isa_3_10) { 3332 tcg_out32(s, BRW | RA(dst) | RS(src)); 3333 if (flags & TCG_BSWAP_OS) { 3334 tcg_out_ext32s(s, dst, dst); 3335 } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { 3336 tcg_out_ext32u(s, dst, dst); 3337 } 3338 return; 3339 } 3340 3341 /* 3342 * Stolen from gcc's builtin_bswap32. 3343 * In the following, 3344 * dep(a, b, m) -> (a & ~m) | (b & m) 3345 * 3346 * Begin with: src = xxxxabcd 3347 */ 3348 /* tmp = rol32(src, 8) & 0xffffffff = 0000bcda */ 3349 tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31); 3350 /* tmp = dep(tmp, rol32(src, 24), 0xff000000) = 0000dcda */ 3351 tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7); 3352 /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00) = 0000dcba */ 3353 tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23); 3354 3355 if (flags & TCG_BSWAP_OS) { 3356 tcg_out_ext32s(s, dst, tmp); 3357 } else { 3358 tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); 3359 } 3360} 3361 3362static const TCGOutOpBswap outop_bswap32 = { 3363 .base.static_constraint = C_O1_I1(r, r), 3364 .out_rr = tgen_bswap32, 3365}; 3366 3367#if TCG_TARGET_REG_BITS == 64 3368static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) 3369{ 3370 TCGReg t0 = dst == src ? TCG_REG_R0 : dst; 3371 TCGReg t1 = dst == src ? dst : TCG_REG_R0; 3372 3373 if (have_isa_3_10) { 3374 tcg_out32(s, BRD | RA(dst) | RS(src)); 3375 return; 3376 } 3377 3378 /* 3379 * In the following, 3380 * dep(a, b, m) -> (a & ~m) | (b & m) 3381 * 3382 * Begin with: src = abcdefgh 3383 */ 3384 /* t0 = rol32(src, 8) & 0xffffffff = 0000fghe */ 3385 tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31); 3386 /* t0 = dep(t0, rol32(src, 24), 0xff000000) = 0000hghe */ 3387 tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7); 3388 /* t0 = dep(t0, rol32(src, 24), 0x0000ff00) = 0000hgfe */ 3389 tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23); 3390 3391 /* t0 = rol64(t0, 32) = hgfe0000 */ 3392 tcg_out_rld(s, RLDICL, t0, t0, 32, 0); 3393 /* t1 = rol64(src, 32) = efghabcd */ 3394 tcg_out_rld(s, RLDICL, t1, src, 32, 0); 3395 3396 /* t0 = dep(t0, rol32(t1, 24), 0xffffffff) = hgfebcda */ 3397 tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31); 3398 /* t0 = dep(t0, rol32(t1, 24), 0xff000000) = hgfedcda */ 3399 tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7); 3400 /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00) = hgfedcba */ 3401 tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23); 3402 3403 tcg_out_mov(s, TCG_TYPE_REG, dst, t0); 3404} 3405 3406static const TCGOutOpUnary outop_bswap64 = { 3407 .base.static_constraint = C_O1_I1(r, r), 3408 .out_rr = tgen_bswap64, 3409}; 3410#endif /* TCG_TARGET_REG_BITS == 64 */ 3411 3412static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) 3413{ 3414 tcg_out32(s, NEG | RT(a0) | RA(a1)); 3415} 3416 3417static const TCGOutOpUnary outop_neg = { 3418 .base.static_constraint = C_O1_I1(r, r), 3419 .out_rr = tgen_neg, 3420}; 3421 3422static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) 3423{ 3424 tgen_nor(s, type, a0, a1, a1); 3425} 3426 3427static const TCGOutOpUnary outop_not = { 3428 .base.static_constraint = C_O1_I1(r, r), 3429 .out_rr = tgen_not, 3430}; 3431 3432static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 3433 TCGReg a2, unsigned ofs, unsigned len) 3434{ 3435 if (type == TCG_TYPE_I32) { 3436 tcg_out_rlw(s, RLWIMI, a0, a2, ofs, 32 - ofs - len, 31 - ofs); 3437 } else { 3438 tcg_out_rld(s, RLDIMI, a0, a2, ofs, 64 - ofs - len); 3439 } 3440} 3441 3442static void tgen_depositi(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 3443 tcg_target_long a2, unsigned ofs, unsigned len) 3444{ 3445 tgen_andi(s, type, a0, a1, ~MAKE_64BIT_MASK(ofs, len)); 3446} 3447 3448static const TCGOutOpDeposit outop_deposit = { 3449 .base.static_constraint = C_O1_I2(r, 0, rZ), 3450 .out_rrr = tgen_deposit, 3451 .out_rri = tgen_depositi, 3452}; 3453 3454static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 3455 unsigned ofs, unsigned len) 3456{ 3457 if (ofs == 0 && len <= 16) { 3458 tgen_andi(s, TCG_TYPE_I32, a0, a1, (1 << len) - 1); 3459 } else if (type == TCG_TYPE_I32) { 3460 tcg_out_rlw(s, RLWINM, a0, a1, 32 - ofs, 32 - len, 31); 3461 } else { 3462 tcg_out_rld(s, RLDICL, a0, a1, 64 - ofs, 64 - len); 3463 } 3464} 3465 3466static const TCGOutOpExtract outop_extract = { 3467 .base.static_constraint = C_O1_I1(r, r), 3468 .out_rr = tgen_extract, 3469}; 3470 3471static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 3472 unsigned ofs, unsigned len) 3473{ 3474 if (ofs == 0) { 3475 switch (len) { 3476 case 8: 3477 tcg_out_ext8s(s, type, a0, a1); 3478 return; 3479 case 16: 3480 tcg_out_ext16s(s, type, a0, a1); 3481 return; 3482 case 32: 3483 tcg_out_ext32s(s, a0, a1); 3484 return; 3485 } 3486 } else if (ofs + len == 32) { 3487 tcg_out_sari32(s, a0, a1, ofs); 3488 return; 3489 } 3490 g_assert_not_reached(); 3491} 3492 3493static const TCGOutOpExtract outop_sextract = { 3494 .base.static_constraint = C_O1_I1(r, r), 3495 .out_rr = tgen_sextract, 3496}; 3497 3498 3499static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, 3500 const TCGArg args[TCG_MAX_OP_ARGS], 3501 const int const_args[TCG_MAX_OP_ARGS]) 3502{ 3503 TCGArg a0, a1; 3504 3505 switch (opc) { 3506 case INDEX_op_goto_ptr: 3507 tcg_out32(s, MTSPR | RS(args[0]) | CTR); 3508 tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0)); 3509 tcg_out32(s, BCCTR | BO_ALWAYS); 3510 break; 3511 case INDEX_op_br: 3512 { 3513 TCGLabel *l = arg_label(args[0]); 3514 uint32_t insn = B; 3515 3516 if (l->has_value) { 3517 insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr), 3518 l->u.value_ptr); 3519 } else { 3520 tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0); 3521 } 3522 tcg_out32(s, insn); 3523 } 3524 break; 3525 case INDEX_op_ld8u_i32: 3526 case INDEX_op_ld8u_i64: 3527 tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); 3528 break; 3529 case INDEX_op_ld8s_i32: 3530 case INDEX_op_ld8s_i64: 3531 tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); 3532 tcg_out_ext8s(s, TCG_TYPE_REG, args[0], args[0]); 3533 break; 3534 case INDEX_op_ld16u_i32: 3535 case INDEX_op_ld16u_i64: 3536 tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]); 3537 break; 3538 case INDEX_op_ld16s_i32: 3539 case INDEX_op_ld16s_i64: 3540 tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]); 3541 break; 3542 case INDEX_op_ld_i32: 3543 case INDEX_op_ld32u_i64: 3544 tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]); 3545 break; 3546 case INDEX_op_ld32s_i64: 3547 tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]); 3548 break; 3549 case INDEX_op_ld_i64: 3550 tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]); 3551 break; 3552 case INDEX_op_st8_i32: 3553 case INDEX_op_st8_i64: 3554 tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]); 3555 break; 3556 case INDEX_op_st16_i32: 3557 case INDEX_op_st16_i64: 3558 tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]); 3559 break; 3560 case INDEX_op_st_i32: 3561 case INDEX_op_st32_i64: 3562 tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]); 3563 break; 3564 case INDEX_op_st_i64: 3565 tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); 3566 break; 3567 3568 case INDEX_op_qemu_ld_i32: 3569 tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I32); 3570 break; 3571 case INDEX_op_qemu_ld_i64: 3572 if (TCG_TARGET_REG_BITS == 64) { 3573 tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I64); 3574 } else { 3575 tcg_out_qemu_ld(s, args[0], args[1], args[2], 3576 args[3], TCG_TYPE_I64); 3577 } 3578 break; 3579 case INDEX_op_qemu_ld_i128: 3580 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 3581 tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true); 3582 break; 3583 3584 case INDEX_op_qemu_st_i32: 3585 tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I32); 3586 break; 3587 case INDEX_op_qemu_st_i64: 3588 if (TCG_TARGET_REG_BITS == 64) { 3589 tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I64); 3590 } else { 3591 tcg_out_qemu_st(s, args[0], args[1], args[2], 3592 args[3], TCG_TYPE_I64); 3593 } 3594 break; 3595 case INDEX_op_qemu_st_i128: 3596 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 3597 tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); 3598 break; 3599 3600#if TCG_TARGET_REG_BITS == 64 3601 case INDEX_op_add2_i64: 3602#else 3603 case INDEX_op_add2_i32: 3604#endif 3605 /* Note that the CA bit is defined based on the word size of the 3606 environment. So in 64-bit mode it's always carry-out of bit 63. 3607 The fallback code using deposit works just as well for 32-bit. */ 3608 a0 = args[0], a1 = args[1]; 3609 if (a0 == args[3] || (!const_args[5] && a0 == args[5])) { 3610 a0 = TCG_REG_R0; 3611 } 3612 if (const_args[4]) { 3613 tcg_out32(s, ADDIC | TAI(a0, args[2], args[4])); 3614 } else { 3615 tcg_out32(s, ADDC | TAB(a0, args[2], args[4])); 3616 } 3617 if (const_args[5]) { 3618 tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3])); 3619 } else { 3620 tcg_out32(s, ADDE | TAB(a1, args[3], args[5])); 3621 } 3622 if (a0 != args[0]) { 3623 tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); 3624 } 3625 break; 3626 3627#if TCG_TARGET_REG_BITS == 64 3628 case INDEX_op_sub2_i64: 3629#else 3630 case INDEX_op_sub2_i32: 3631#endif 3632 a0 = args[0], a1 = args[1]; 3633 if (a0 == args[5] || (!const_args[3] && a0 == args[3])) { 3634 a0 = TCG_REG_R0; 3635 } 3636 if (const_args[2]) { 3637 tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2])); 3638 } else { 3639 tcg_out32(s, SUBFC | TAB(a0, args[4], args[2])); 3640 } 3641 if (const_args[3]) { 3642 tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5])); 3643 } else { 3644 tcg_out32(s, SUBFE | TAB(a1, args[5], args[3])); 3645 } 3646 if (a0 != args[0]) { 3647 tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); 3648 } 3649 break; 3650 3651 case INDEX_op_mb: 3652 tcg_out_mb(s, args[0]); 3653 break; 3654 3655 case INDEX_op_call: /* Always emitted via tcg_out_call. */ 3656 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ 3657 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ 3658 default: 3659 g_assert_not_reached(); 3660 } 3661} 3662 3663int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) 3664{ 3665 switch (opc) { 3666 case INDEX_op_and_vec: 3667 case INDEX_op_or_vec: 3668 case INDEX_op_xor_vec: 3669 case INDEX_op_andc_vec: 3670 case INDEX_op_not_vec: 3671 case INDEX_op_nor_vec: 3672 case INDEX_op_eqv_vec: 3673 case INDEX_op_nand_vec: 3674 return 1; 3675 case INDEX_op_orc_vec: 3676 return have_isa_2_07; 3677 case INDEX_op_add_vec: 3678 case INDEX_op_sub_vec: 3679 case INDEX_op_smax_vec: 3680 case INDEX_op_smin_vec: 3681 case INDEX_op_umax_vec: 3682 case INDEX_op_umin_vec: 3683 case INDEX_op_shlv_vec: 3684 case INDEX_op_shrv_vec: 3685 case INDEX_op_sarv_vec: 3686 case INDEX_op_rotlv_vec: 3687 return vece <= MO_32 || have_isa_2_07; 3688 case INDEX_op_ssadd_vec: 3689 case INDEX_op_sssub_vec: 3690 case INDEX_op_usadd_vec: 3691 case INDEX_op_ussub_vec: 3692 return vece <= MO_32; 3693 case INDEX_op_shli_vec: 3694 case INDEX_op_shri_vec: 3695 case INDEX_op_sari_vec: 3696 case INDEX_op_rotli_vec: 3697 return vece <= MO_32 || have_isa_2_07 ? -1 : 0; 3698 case INDEX_op_cmp_vec: 3699 case INDEX_op_cmpsel_vec: 3700 return vece <= MO_32 || have_isa_2_07 ? 1 : 0; 3701 case INDEX_op_neg_vec: 3702 return vece >= MO_32 && have_isa_3_00; 3703 case INDEX_op_mul_vec: 3704 switch (vece) { 3705 case MO_8: 3706 case MO_16: 3707 return -1; 3708 case MO_32: 3709 return have_isa_2_07 ? 1 : -1; 3710 case MO_64: 3711 return have_isa_3_10; 3712 } 3713 return 0; 3714 case INDEX_op_bitsel_vec: 3715 return have_vsx; 3716 case INDEX_op_rotrv_vec: 3717 return -1; 3718 default: 3719 return 0; 3720 } 3721} 3722 3723static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 3724 TCGReg dst, TCGReg src) 3725{ 3726 tcg_debug_assert(dst >= TCG_REG_V0); 3727 3728 /* Splat from integer reg allowed via constraints for v3.00. */ 3729 if (src < TCG_REG_V0) { 3730 tcg_debug_assert(have_isa_3_00); 3731 switch (vece) { 3732 case MO_64: 3733 tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src)); 3734 return true; 3735 case MO_32: 3736 tcg_out32(s, MTVSRWS | VRT(dst) | RA(src)); 3737 return true; 3738 default: 3739 /* Fail, so that we fall back on either dupm or mov+dup. */ 3740 return false; 3741 } 3742 } 3743 3744 /* 3745 * Recall we use (or emulate) VSX integer loads, so the integer is 3746 * right justified within the left (zero-index) double-word. 3747 */ 3748 switch (vece) { 3749 case MO_8: 3750 tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16)); 3751 break; 3752 case MO_16: 3753 tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16)); 3754 break; 3755 case MO_32: 3756 tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16)); 3757 break; 3758 case MO_64: 3759 if (have_vsx) { 3760 tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src)); 3761 break; 3762 } 3763 tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8); 3764 tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8); 3765 break; 3766 default: 3767 g_assert_not_reached(); 3768 } 3769 return true; 3770} 3771 3772static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 3773 TCGReg out, TCGReg base, intptr_t offset) 3774{ 3775 int elt; 3776 3777 tcg_debug_assert(out >= TCG_REG_V0); 3778 switch (vece) { 3779 case MO_8: 3780 if (have_isa_3_00) { 3781 tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16); 3782 } else { 3783 tcg_out_mem_long(s, 0, LVEBX, out, base, offset); 3784 } 3785 elt = extract32(offset, 0, 4); 3786#if !HOST_BIG_ENDIAN 3787 elt ^= 15; 3788#endif 3789 tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16)); 3790 break; 3791 case MO_16: 3792 tcg_debug_assert((offset & 1) == 0); 3793 if (have_isa_3_00) { 3794 tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16); 3795 } else { 3796 tcg_out_mem_long(s, 0, LVEHX, out, base, offset); 3797 } 3798 elt = extract32(offset, 1, 3); 3799#if !HOST_BIG_ENDIAN 3800 elt ^= 7; 3801#endif 3802 tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16)); 3803 break; 3804 case MO_32: 3805 if (have_isa_3_00) { 3806 tcg_out_mem_long(s, 0, LXVWSX, out, base, offset); 3807 break; 3808 } 3809 tcg_debug_assert((offset & 3) == 0); 3810 tcg_out_mem_long(s, 0, LVEWX, out, base, offset); 3811 elt = extract32(offset, 2, 2); 3812#if !HOST_BIG_ENDIAN 3813 elt ^= 3; 3814#endif 3815 tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16)); 3816 break; 3817 case MO_64: 3818 if (have_vsx) { 3819 tcg_out_mem_long(s, 0, LXVDSX, out, base, offset); 3820 break; 3821 } 3822 tcg_debug_assert((offset & 7) == 0); 3823 tcg_out_mem_long(s, 0, LVX, out, base, offset & -16); 3824 tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8); 3825 elt = extract32(offset, 3, 1); 3826#if !HOST_BIG_ENDIAN 3827 elt = !elt; 3828#endif 3829 if (elt) { 3830 tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8); 3831 } else { 3832 tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8); 3833 } 3834 break; 3835 default: 3836 g_assert_not_reached(); 3837 } 3838 return true; 3839} 3840 3841static void tcg_out_not_vec(TCGContext *s, TCGReg a0, TCGReg a1) 3842{ 3843 tcg_out32(s, VNOR | VRT(a0) | VRA(a1) | VRB(a1)); 3844} 3845 3846static void tcg_out_or_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2) 3847{ 3848 tcg_out32(s, VOR | VRT(a0) | VRA(a1) | VRB(a2)); 3849} 3850 3851static void tcg_out_orc_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2) 3852{ 3853 tcg_out32(s, VORC | VRT(a0) | VRA(a1) | VRB(a2)); 3854} 3855 3856static void tcg_out_and_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2) 3857{ 3858 tcg_out32(s, VAND | VRT(a0) | VRA(a1) | VRB(a2)); 3859} 3860 3861static void tcg_out_andc_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2) 3862{ 3863 tcg_out32(s, VANDC | VRT(a0) | VRA(a1) | VRB(a2)); 3864} 3865 3866static void tcg_out_bitsel_vec(TCGContext *s, TCGReg d, 3867 TCGReg c, TCGReg t, TCGReg f) 3868{ 3869 if (TCG_TARGET_HAS_bitsel_vec) { 3870 tcg_out32(s, XXSEL | VRT(d) | VRC(c) | VRB(t) | VRA(f)); 3871 } else { 3872 tcg_out_and_vec(s, TCG_VEC_TMP2, t, c); 3873 tcg_out_andc_vec(s, d, f, c); 3874 tcg_out_or_vec(s, d, d, TCG_VEC_TMP2); 3875 } 3876} 3877 3878static bool tcg_out_cmp_vec_noinv(TCGContext *s, unsigned vece, TCGReg a0, 3879 TCGReg a1, TCGReg a2, TCGCond cond) 3880{ 3881 static const uint32_t 3882 eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD }, 3883 ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 }, 3884 gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD }, 3885 gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD }; 3886 uint32_t insn; 3887 3888 bool need_swap = false, need_inv = false; 3889 3890 tcg_debug_assert(vece <= MO_32 || have_isa_2_07); 3891 3892 switch (cond) { 3893 case TCG_COND_EQ: 3894 case TCG_COND_GT: 3895 case TCG_COND_GTU: 3896 break; 3897 case TCG_COND_NE: 3898 if (have_isa_3_00 && vece <= MO_32) { 3899 break; 3900 } 3901 /* fall through */ 3902 case TCG_COND_LE: 3903 case TCG_COND_LEU: 3904 need_inv = true; 3905 break; 3906 case TCG_COND_LT: 3907 case TCG_COND_LTU: 3908 need_swap = true; 3909 break; 3910 case TCG_COND_GE: 3911 case TCG_COND_GEU: 3912 need_swap = need_inv = true; 3913 break; 3914 default: 3915 g_assert_not_reached(); 3916 } 3917 3918 if (need_inv) { 3919 cond = tcg_invert_cond(cond); 3920 } 3921 if (need_swap) { 3922 TCGReg swap = a1; 3923 a1 = a2; 3924 a2 = swap; 3925 cond = tcg_swap_cond(cond); 3926 } 3927 3928 switch (cond) { 3929 case TCG_COND_EQ: 3930 insn = eq_op[vece]; 3931 break; 3932 case TCG_COND_NE: 3933 insn = ne_op[vece]; 3934 break; 3935 case TCG_COND_GT: 3936 insn = gts_op[vece]; 3937 break; 3938 case TCG_COND_GTU: 3939 insn = gtu_op[vece]; 3940 break; 3941 default: 3942 g_assert_not_reached(); 3943 } 3944 tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2)); 3945 3946 return need_inv; 3947} 3948 3949static void tcg_out_cmp_vec(TCGContext *s, unsigned vece, TCGReg a0, 3950 TCGReg a1, TCGReg a2, TCGCond cond) 3951{ 3952 if (tcg_out_cmp_vec_noinv(s, vece, a0, a1, a2, cond)) { 3953 tcg_out_not_vec(s, a0, a0); 3954 } 3955} 3956 3957static void tcg_out_cmpsel_vec(TCGContext *s, unsigned vece, TCGReg a0, 3958 TCGReg c1, TCGReg c2, TCGArg v3, int const_v3, 3959 TCGReg v4, TCGCond cond) 3960{ 3961 bool inv = tcg_out_cmp_vec_noinv(s, vece, TCG_VEC_TMP1, c1, c2, cond); 3962 3963 if (!const_v3) { 3964 if (inv) { 3965 tcg_out_bitsel_vec(s, a0, TCG_VEC_TMP1, v4, v3); 3966 } else { 3967 tcg_out_bitsel_vec(s, a0, TCG_VEC_TMP1, v3, v4); 3968 } 3969 } else if (v3) { 3970 if (inv) { 3971 tcg_out_orc_vec(s, a0, v4, TCG_VEC_TMP1); 3972 } else { 3973 tcg_out_or_vec(s, a0, v4, TCG_VEC_TMP1); 3974 } 3975 } else { 3976 if (inv) { 3977 tcg_out_and_vec(s, a0, v4, TCG_VEC_TMP1); 3978 } else { 3979 tcg_out_andc_vec(s, a0, v4, TCG_VEC_TMP1); 3980 } 3981 } 3982} 3983 3984static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 3985 unsigned vecl, unsigned vece, 3986 const TCGArg args[TCG_MAX_OP_ARGS], 3987 const int const_args[TCG_MAX_OP_ARGS]) 3988{ 3989 static const uint32_t 3990 add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM }, 3991 sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM }, 3992 mul_op[4] = { 0, 0, VMULUWM, VMULLD }, 3993 neg_op[4] = { 0, 0, VNEGW, VNEGD }, 3994 ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 }, 3995 usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 }, 3996 sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 }, 3997 ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 }, 3998 umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD }, 3999 smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD }, 4000 umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD }, 4001 smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD }, 4002 shlv_op[4] = { VSLB, VSLH, VSLW, VSLD }, 4003 shrv_op[4] = { VSRB, VSRH, VSRW, VSRD }, 4004 sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD }, 4005 mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 }, 4006 mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 }, 4007 muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 }, 4008 mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 }, 4009 pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 }, 4010 rotl_op[4] = { VRLB, VRLH, VRLW, VRLD }; 4011 4012 TCGType type = vecl + TCG_TYPE_V64; 4013 TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; 4014 uint32_t insn; 4015 4016 switch (opc) { 4017 case INDEX_op_ld_vec: 4018 tcg_out_ld(s, type, a0, a1, a2); 4019 return; 4020 case INDEX_op_st_vec: 4021 tcg_out_st(s, type, a0, a1, a2); 4022 return; 4023 case INDEX_op_dupm_vec: 4024 tcg_out_dupm_vec(s, type, vece, a0, a1, a2); 4025 return; 4026 4027 case INDEX_op_add_vec: 4028 insn = add_op[vece]; 4029 break; 4030 case INDEX_op_sub_vec: 4031 insn = sub_op[vece]; 4032 break; 4033 case INDEX_op_neg_vec: 4034 insn = neg_op[vece]; 4035 a2 = a1; 4036 a1 = 0; 4037 break; 4038 case INDEX_op_mul_vec: 4039 insn = mul_op[vece]; 4040 break; 4041 case INDEX_op_ssadd_vec: 4042 insn = ssadd_op[vece]; 4043 break; 4044 case INDEX_op_sssub_vec: 4045 insn = sssub_op[vece]; 4046 break; 4047 case INDEX_op_usadd_vec: 4048 insn = usadd_op[vece]; 4049 break; 4050 case INDEX_op_ussub_vec: 4051 insn = ussub_op[vece]; 4052 break; 4053 case INDEX_op_smin_vec: 4054 insn = smin_op[vece]; 4055 break; 4056 case INDEX_op_umin_vec: 4057 insn = umin_op[vece]; 4058 break; 4059 case INDEX_op_smax_vec: 4060 insn = smax_op[vece]; 4061 break; 4062 case INDEX_op_umax_vec: 4063 insn = umax_op[vece]; 4064 break; 4065 case INDEX_op_shlv_vec: 4066 insn = shlv_op[vece]; 4067 break; 4068 case INDEX_op_shrv_vec: 4069 insn = shrv_op[vece]; 4070 break; 4071 case INDEX_op_sarv_vec: 4072 insn = sarv_op[vece]; 4073 break; 4074 case INDEX_op_and_vec: 4075 tcg_out_and_vec(s, a0, a1, a2); 4076 return; 4077 case INDEX_op_or_vec: 4078 tcg_out_or_vec(s, a0, a1, a2); 4079 return; 4080 case INDEX_op_xor_vec: 4081 insn = VXOR; 4082 break; 4083 case INDEX_op_andc_vec: 4084 tcg_out_andc_vec(s, a0, a1, a2); 4085 return; 4086 case INDEX_op_not_vec: 4087 tcg_out_not_vec(s, a0, a1); 4088 return; 4089 case INDEX_op_orc_vec: 4090 tcg_out_orc_vec(s, a0, a1, a2); 4091 return; 4092 case INDEX_op_nand_vec: 4093 insn = VNAND; 4094 break; 4095 case INDEX_op_nor_vec: 4096 insn = VNOR; 4097 break; 4098 case INDEX_op_eqv_vec: 4099 insn = VEQV; 4100 break; 4101 4102 case INDEX_op_cmp_vec: 4103 tcg_out_cmp_vec(s, vece, a0, a1, a2, args[3]); 4104 return; 4105 case INDEX_op_cmpsel_vec: 4106 tcg_out_cmpsel_vec(s, vece, a0, a1, a2, 4107 args[3], const_args[3], args[4], args[5]); 4108 return; 4109 case INDEX_op_bitsel_vec: 4110 tcg_out_bitsel_vec(s, a0, a1, a2, args[3]); 4111 return; 4112 4113 case INDEX_op_dup2_vec: 4114 assert(TCG_TARGET_REG_BITS == 32); 4115 /* With inputs a1 = xLxx, a2 = xHxx */ 4116 tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1)); /* a0 = xxHL */ 4117 tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8); /* tmp = HLxx */ 4118 tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8); /* a0 = HLHL */ 4119 return; 4120 4121 case INDEX_op_ppc_mrgh_vec: 4122 insn = mrgh_op[vece]; 4123 break; 4124 case INDEX_op_ppc_mrgl_vec: 4125 insn = mrgl_op[vece]; 4126 break; 4127 case INDEX_op_ppc_muleu_vec: 4128 insn = muleu_op[vece]; 4129 break; 4130 case INDEX_op_ppc_mulou_vec: 4131 insn = mulou_op[vece]; 4132 break; 4133 case INDEX_op_ppc_pkum_vec: 4134 insn = pkum_op[vece]; 4135 break; 4136 case INDEX_op_rotlv_vec: 4137 insn = rotl_op[vece]; 4138 break; 4139 case INDEX_op_ppc_msum_vec: 4140 tcg_debug_assert(vece == MO_16); 4141 tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3])); 4142 return; 4143 4144 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ 4145 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ 4146 default: 4147 g_assert_not_reached(); 4148 } 4149 4150 tcg_debug_assert(insn != 0); 4151 tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2)); 4152} 4153 4154static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0, 4155 TCGv_vec v1, TCGArg imm, TCGOpcode opci) 4156{ 4157 TCGv_vec t1; 4158 4159 if (vece == MO_32) { 4160 /* 4161 * Only 5 bits are significant, and VSPLTISB can represent -16..15. 4162 * So using negative numbers gets us the 4th bit easily. 4163 */ 4164 imm = sextract32(imm, 0, 5); 4165 } else { 4166 imm &= (8 << vece) - 1; 4167 } 4168 4169 /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */ 4170 t1 = tcg_constant_vec(type, MO_8, imm); 4171 vec_gen_3(opci, type, vece, tcgv_vec_arg(v0), 4172 tcgv_vec_arg(v1), tcgv_vec_arg(t1)); 4173} 4174 4175static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0, 4176 TCGv_vec v1, TCGv_vec v2) 4177{ 4178 TCGv_vec t1 = tcg_temp_new_vec(type); 4179 TCGv_vec t2 = tcg_temp_new_vec(type); 4180 TCGv_vec c0, c16; 4181 4182 switch (vece) { 4183 case MO_8: 4184 case MO_16: 4185 vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1), 4186 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 4187 vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2), 4188 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 4189 vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0), 4190 tcgv_vec_arg(t1), tcgv_vec_arg(t2)); 4191 vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1), 4192 tcgv_vec_arg(t1), tcgv_vec_arg(t2)); 4193 vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0), 4194 tcgv_vec_arg(v0), tcgv_vec_arg(t1)); 4195 break; 4196 4197 case MO_32: 4198 tcg_debug_assert(!have_isa_2_07); 4199 /* 4200 * Only 5 bits are significant, and VSPLTISB can represent -16..15. 4201 * So using -16 is a quick way to represent 16. 4202 */ 4203 c16 = tcg_constant_vec(type, MO_8, -16); 4204 c0 = tcg_constant_vec(type, MO_8, 0); 4205 4206 vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1), 4207 tcgv_vec_arg(v2), tcgv_vec_arg(c16)); 4208 vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2), 4209 tcgv_vec_arg(v1), tcgv_vec_arg(v2)); 4210 vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1), 4211 tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0)); 4212 vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1), 4213 tcgv_vec_arg(t1), tcgv_vec_arg(c16)); 4214 tcg_gen_add_vec(MO_32, v0, t1, t2); 4215 break; 4216 4217 default: 4218 g_assert_not_reached(); 4219 } 4220 tcg_temp_free_vec(t1); 4221 tcg_temp_free_vec(t2); 4222} 4223 4224void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, 4225 TCGArg a0, ...) 4226{ 4227 va_list va; 4228 TCGv_vec v0, v1, v2, t0; 4229 TCGArg a2; 4230 4231 va_start(va, a0); 4232 v0 = temp_tcgv_vec(arg_temp(a0)); 4233 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); 4234 a2 = va_arg(va, TCGArg); 4235 4236 switch (opc) { 4237 case INDEX_op_shli_vec: 4238 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec); 4239 break; 4240 case INDEX_op_shri_vec: 4241 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec); 4242 break; 4243 case INDEX_op_sari_vec: 4244 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec); 4245 break; 4246 case INDEX_op_rotli_vec: 4247 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec); 4248 break; 4249 case INDEX_op_mul_vec: 4250 v2 = temp_tcgv_vec(arg_temp(a2)); 4251 expand_vec_mul(type, vece, v0, v1, v2); 4252 break; 4253 case INDEX_op_rotlv_vec: 4254 v2 = temp_tcgv_vec(arg_temp(a2)); 4255 t0 = tcg_temp_new_vec(type); 4256 tcg_gen_neg_vec(vece, t0, v2); 4257 tcg_gen_rotlv_vec(vece, v0, v1, t0); 4258 tcg_temp_free_vec(t0); 4259 break; 4260 default: 4261 g_assert_not_reached(); 4262 } 4263 va_end(va); 4264} 4265 4266static TCGConstraintSetIndex 4267tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) 4268{ 4269 switch (op) { 4270 case INDEX_op_goto_ptr: 4271 return C_O0_I1(r); 4272 4273 case INDEX_op_ld8u_i32: 4274 case INDEX_op_ld8s_i32: 4275 case INDEX_op_ld16u_i32: 4276 case INDEX_op_ld16s_i32: 4277 case INDEX_op_ld_i32: 4278 case INDEX_op_ld8u_i64: 4279 case INDEX_op_ld8s_i64: 4280 case INDEX_op_ld16u_i64: 4281 case INDEX_op_ld16s_i64: 4282 case INDEX_op_ld32u_i64: 4283 case INDEX_op_ld32s_i64: 4284 case INDEX_op_ld_i64: 4285 return C_O1_I1(r, r); 4286 4287 case INDEX_op_st8_i32: 4288 case INDEX_op_st16_i32: 4289 case INDEX_op_st_i32: 4290 case INDEX_op_st8_i64: 4291 case INDEX_op_st16_i64: 4292 case INDEX_op_st32_i64: 4293 case INDEX_op_st_i64: 4294 return C_O0_I2(r, r); 4295 4296 case INDEX_op_add2_i64: 4297 case INDEX_op_add2_i32: 4298 return C_O2_I4(r, r, r, r, rI, rZM); 4299 case INDEX_op_sub2_i64: 4300 case INDEX_op_sub2_i32: 4301 return C_O2_I4(r, r, rI, rZM, r, r); 4302 4303 case INDEX_op_qemu_ld_i32: 4304 return C_O1_I1(r, r); 4305 case INDEX_op_qemu_ld_i64: 4306 return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r); 4307 4308 case INDEX_op_qemu_st_i32: 4309 return C_O0_I2(r, r); 4310 case INDEX_op_qemu_st_i64: 4311 return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r); 4312 4313 case INDEX_op_qemu_ld_i128: 4314 return C_N1O1_I1(o, m, r); 4315 case INDEX_op_qemu_st_i128: 4316 return C_O0_I3(o, m, r); 4317 4318 case INDEX_op_add_vec: 4319 case INDEX_op_sub_vec: 4320 case INDEX_op_mul_vec: 4321 case INDEX_op_and_vec: 4322 case INDEX_op_or_vec: 4323 case INDEX_op_xor_vec: 4324 case INDEX_op_andc_vec: 4325 case INDEX_op_orc_vec: 4326 case INDEX_op_nor_vec: 4327 case INDEX_op_eqv_vec: 4328 case INDEX_op_nand_vec: 4329 case INDEX_op_cmp_vec: 4330 case INDEX_op_ssadd_vec: 4331 case INDEX_op_sssub_vec: 4332 case INDEX_op_usadd_vec: 4333 case INDEX_op_ussub_vec: 4334 case INDEX_op_smax_vec: 4335 case INDEX_op_smin_vec: 4336 case INDEX_op_umax_vec: 4337 case INDEX_op_umin_vec: 4338 case INDEX_op_shlv_vec: 4339 case INDEX_op_shrv_vec: 4340 case INDEX_op_sarv_vec: 4341 case INDEX_op_rotlv_vec: 4342 case INDEX_op_rotrv_vec: 4343 case INDEX_op_ppc_mrgh_vec: 4344 case INDEX_op_ppc_mrgl_vec: 4345 case INDEX_op_ppc_muleu_vec: 4346 case INDEX_op_ppc_mulou_vec: 4347 case INDEX_op_ppc_pkum_vec: 4348 case INDEX_op_dup2_vec: 4349 return C_O1_I2(v, v, v); 4350 4351 case INDEX_op_not_vec: 4352 case INDEX_op_neg_vec: 4353 return C_O1_I1(v, v); 4354 4355 case INDEX_op_dup_vec: 4356 return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v); 4357 4358 case INDEX_op_ld_vec: 4359 case INDEX_op_dupm_vec: 4360 return C_O1_I1(v, r); 4361 4362 case INDEX_op_st_vec: 4363 return C_O0_I2(v, r); 4364 4365 case INDEX_op_bitsel_vec: 4366 case INDEX_op_ppc_msum_vec: 4367 return C_O1_I3(v, v, v, v); 4368 case INDEX_op_cmpsel_vec: 4369 return C_O1_I4(v, v, v, vZM, v); 4370 4371 default: 4372 return C_NotImplemented; 4373 } 4374} 4375 4376static void tcg_target_init(TCGContext *s) 4377{ 4378 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; 4379 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; 4380 if (have_altivec) { 4381 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; 4382 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; 4383 } 4384 4385 tcg_target_call_clobber_regs = 0; 4386 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0); 4387 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2); 4388 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3); 4389 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4); 4390 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5); 4391 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6); 4392 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7); 4393 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); 4394 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); 4395 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); 4396 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); 4397 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12); 4398 4399 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0); 4400 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1); 4401 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2); 4402 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3); 4403 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4); 4404 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5); 4405 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6); 4406 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7); 4407 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8); 4408 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9); 4409 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10); 4410 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11); 4411 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12); 4412 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13); 4413 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14); 4414 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15); 4415 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16); 4416 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17); 4417 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18); 4418 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19); 4419 4420 s->reserved_regs = 0; 4421 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */ 4422 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */ 4423#if defined(_CALL_SYSV) 4424 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */ 4425#endif 4426#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64 4427 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ 4428#endif 4429 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); 4430 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); 4431 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1); 4432 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2); 4433 if (USE_REG_TB) { 4434 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); /* tb->tc_ptr */ 4435 } 4436} 4437 4438#ifdef __ELF__ 4439typedef struct { 4440 DebugFrameCIE cie; 4441 DebugFrameFDEHeader fde; 4442 uint8_t fde_def_cfa[4]; 4443 uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3]; 4444} DebugFrame; 4445 4446/* We're expecting a 2 byte uleb128 encoded value. */ 4447QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 4448 4449#if TCG_TARGET_REG_BITS == 64 4450# define ELF_HOST_MACHINE EM_PPC64 4451#else 4452# define ELF_HOST_MACHINE EM_PPC 4453#endif 4454 4455static DebugFrame debug_frame = { 4456 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 4457 .cie.id = -1, 4458 .cie.version = 1, 4459 .cie.code_align = 1, 4460 .cie.data_align = (-SZR & 0x7f), /* sleb128 -SZR */ 4461 .cie.return_column = 65, 4462 4463 /* Total FDE size does not include the "len" member. */ 4464 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), 4465 4466 .fde_def_cfa = { 4467 12, TCG_REG_R1, /* DW_CFA_def_cfa r1, ... */ 4468 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 4469 (FRAME_SIZE >> 7) 4470 }, 4471 .fde_reg_ofs = { 4472 /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */ 4473 0x11, 65, (LR_OFFSET / -SZR) & 0x7f, 4474 } 4475}; 4476 4477void tcg_register_jit(const void *buf, size_t buf_size) 4478{ 4479 uint8_t *p = &debug_frame.fde_reg_ofs[3]; 4480 int i; 4481 4482 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) { 4483 p[0] = 0x80 + tcg_target_callee_save_regs[i]; 4484 p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR; 4485 } 4486 4487 debug_frame.fde.func_start = (uintptr_t)buf; 4488 debug_frame.fde.func_len = buf_size; 4489 4490 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 4491} 4492#endif /* __ELF__ */ 4493#undef VMULEUB 4494#undef VMULEUH 4495#undef VMULEUW 4496#undef VMULOUB 4497#undef VMULOUH 4498#undef VMULOUW 4499#undef VMSUMUHM 4500