xref: /openbmc/qemu/tcg/ppc/tcg-target.c.inc (revision 0dd07ee1122abaf1adb4f1e00a8e0b89937f53bd)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "elf.h"
26
27/*
28 * Standardize on the _CALL_FOO symbols used by GCC:
29 * Apple XCode does not define _CALL_DARWIN.
30 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV or _CALL_AIX.
31 */
32#if TCG_TARGET_REG_BITS == 64
33# ifdef _CALL_AIX
34    /* ok */
35# elif defined(_CALL_ELF) && _CALL_ELF == 1
36#  define _CALL_AIX
37# elif defined(_CALL_ELF) && _CALL_ELF == 2
38    /* ok */
39# else
40#  error "Unknown ABI"
41# endif
42#else
43# if defined(_CALL_SYSV) || defined(_CALL_DARWIN)
44    /* ok */
45# elif defined(__APPLE__)
46#  define _CALL_DARWIN
47# elif defined(__ELF__)
48#  define _CALL_SYSV
49# else
50#  error "Unknown ABI"
51# endif
52#endif
53
54#if TCG_TARGET_REG_BITS == 64
55# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_EXTEND
56# define TCG_TARGET_CALL_RET_I128  TCG_CALL_RET_NORMAL
57#else
58# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_NORMAL
59# define TCG_TARGET_CALL_RET_I128  TCG_CALL_RET_BY_REF
60#endif
61#ifdef _CALL_SYSV
62# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_EVEN
63# define TCG_TARGET_CALL_ARG_I128  TCG_CALL_ARG_BY_REF
64#else
65# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_NORMAL
66# define TCG_TARGET_CALL_ARG_I128  TCG_CALL_ARG_NORMAL
67#endif
68
69/* For some memory operations, we need a scratch that isn't R0.  For the AIX
70   calling convention, we can re-use the TOC register since we'll be reloading
71   it at every call.  Otherwise R12 will do nicely as neither a call-saved
72   register nor a parameter register.  */
73#ifdef _CALL_AIX
74# define TCG_REG_TMP1   TCG_REG_R2
75#else
76# define TCG_REG_TMP1   TCG_REG_R12
77#endif
78#define TCG_REG_TMP2    TCG_REG_R11
79
80#define TCG_VEC_TMP1    TCG_REG_V0
81#define TCG_VEC_TMP2    TCG_REG_V1
82
83#define TCG_REG_TB     TCG_REG_R31
84#define USE_REG_TB     (TCG_TARGET_REG_BITS == 64 && !have_isa_3_00)
85
86/* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */
87#define SZP  ((int)sizeof(void *))
88
89/* Shorthand for size of a register.  */
90#define SZR  (TCG_TARGET_REG_BITS / 8)
91
92#define TCG_CT_CONST_S16  0x100
93#define TCG_CT_CONST_U16  0x200
94#define TCG_CT_CONST_S32  0x400
95#define TCG_CT_CONST_U32  0x800
96#define TCG_CT_CONST_ZERO 0x1000
97#define TCG_CT_CONST_MONE 0x2000
98#define TCG_CT_CONST_WSZ  0x4000
99#define TCG_CT_CONST_CMP  0x8000
100
101#define ALL_GENERAL_REGS  0xffffffffu
102#define ALL_VECTOR_REGS   0xffffffff00000000ull
103
104#ifndef R_PPC64_PCREL34
105#define R_PPC64_PCREL34  132
106#endif
107
108#define have_isel  (cpuinfo & CPUINFO_ISEL)
109
110#define TCG_GUEST_BASE_REG  TCG_REG_R30
111
112#ifdef CONFIG_DEBUG_TCG
113static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
114    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
115    "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
116    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
117    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
118    "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
119    "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
120    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
121    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
122};
123#endif
124
125static const int tcg_target_reg_alloc_order[] = {
126    TCG_REG_R14,  /* call saved registers */
127    TCG_REG_R15,
128    TCG_REG_R16,
129    TCG_REG_R17,
130    TCG_REG_R18,
131    TCG_REG_R19,
132    TCG_REG_R20,
133    TCG_REG_R21,
134    TCG_REG_R22,
135    TCG_REG_R23,
136    TCG_REG_R24,
137    TCG_REG_R25,
138    TCG_REG_R26,
139    TCG_REG_R27,
140    TCG_REG_R28,
141    TCG_REG_R29,
142    TCG_REG_R30,
143    TCG_REG_R31,
144    TCG_REG_R12,  /* call clobbered, non-arguments */
145    TCG_REG_R11,
146    TCG_REG_R2,
147    TCG_REG_R13,
148    TCG_REG_R10,  /* call clobbered, arguments */
149    TCG_REG_R9,
150    TCG_REG_R8,
151    TCG_REG_R7,
152    TCG_REG_R6,
153    TCG_REG_R5,
154    TCG_REG_R4,
155    TCG_REG_R3,
156
157    /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
158    TCG_REG_V2,   /* call clobbered, vectors */
159    TCG_REG_V3,
160    TCG_REG_V4,
161    TCG_REG_V5,
162    TCG_REG_V6,
163    TCG_REG_V7,
164    TCG_REG_V8,
165    TCG_REG_V9,
166    TCG_REG_V10,
167    TCG_REG_V11,
168    TCG_REG_V12,
169    TCG_REG_V13,
170    TCG_REG_V14,
171    TCG_REG_V15,
172    TCG_REG_V16,
173    TCG_REG_V17,
174    TCG_REG_V18,
175    TCG_REG_V19,
176};
177
178static const int tcg_target_call_iarg_regs[] = {
179    TCG_REG_R3,
180    TCG_REG_R4,
181    TCG_REG_R5,
182    TCG_REG_R6,
183    TCG_REG_R7,
184    TCG_REG_R8,
185    TCG_REG_R9,
186    TCG_REG_R10
187};
188
189static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
190{
191    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
192    tcg_debug_assert(slot >= 0 && slot <= 1);
193    return TCG_REG_R3 + slot;
194}
195
196static const int tcg_target_callee_save_regs[] = {
197#ifdef _CALL_DARWIN
198    TCG_REG_R11,
199#endif
200    TCG_REG_R14,
201    TCG_REG_R15,
202    TCG_REG_R16,
203    TCG_REG_R17,
204    TCG_REG_R18,
205    TCG_REG_R19,
206    TCG_REG_R20,
207    TCG_REG_R21,
208    TCG_REG_R22,
209    TCG_REG_R23,
210    TCG_REG_R24,
211    TCG_REG_R25,
212    TCG_REG_R26,
213    TCG_REG_R27, /* currently used for the global env */
214    TCG_REG_R28,
215    TCG_REG_R29,
216    TCG_REG_R30,
217    TCG_REG_R31
218};
219
220/* For PPC, we use TB+4 instead of TB as the base. */
221static inline ptrdiff_t ppc_tbrel_diff(TCGContext *s, const void *target)
222{
223    return tcg_tbrel_diff(s, target) - 4;
224}
225
226static inline bool in_range_b(tcg_target_long target)
227{
228    return target == sextract64(target, 0, 26);
229}
230
231static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
232                               const tcg_insn_unit *target)
233{
234    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
235    tcg_debug_assert(in_range_b(disp));
236    return disp & 0x3fffffc;
237}
238
239static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
240{
241    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
242    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
243
244    if (in_range_b(disp)) {
245        *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc);
246        return true;
247    }
248    return false;
249}
250
251static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
252                               const tcg_insn_unit *target)
253{
254    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
255    tcg_debug_assert(disp == (int16_t) disp);
256    return disp & 0xfffc;
257}
258
259static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
260{
261    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
262    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
263
264    if (disp == (int16_t) disp) {
265        *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc);
266        return true;
267    }
268    return false;
269}
270
271static bool reloc_pc34(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
272{
273    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
274    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
275
276    if (disp == sextract64(disp, 0, 34)) {
277        src_rw[0] = (src_rw[0] & ~0x3ffff) | ((disp >> 16) & 0x3ffff);
278        src_rw[1] = (src_rw[1] & ~0xffff) | (disp & 0xffff);
279        return true;
280    }
281    return false;
282}
283
284static bool mask_operand(uint32_t c, int *mb, int *me);
285static bool mask64_operand(uint64_t c, int *mb, int *me);
286
287/* test if a constant matches the constraint */
288static bool tcg_target_const_match(int64_t sval, int ct,
289                                   TCGType type, TCGCond cond, int vece)
290{
291    uint64_t uval = sval;
292    int mb, me;
293
294    if (ct & TCG_CT_CONST) {
295        return 1;
296    }
297
298    if (type == TCG_TYPE_I32) {
299        uval = (uint32_t)sval;
300        sval = (int32_t)sval;
301    }
302
303    if (ct & TCG_CT_CONST_CMP) {
304        switch (cond) {
305        case TCG_COND_EQ:
306        case TCG_COND_NE:
307            ct |= TCG_CT_CONST_S16 | TCG_CT_CONST_U16;
308            break;
309        case TCG_COND_LT:
310        case TCG_COND_GE:
311        case TCG_COND_LE:
312        case TCG_COND_GT:
313            ct |= TCG_CT_CONST_S16;
314            break;
315        case TCG_COND_LTU:
316        case TCG_COND_GEU:
317        case TCG_COND_LEU:
318        case TCG_COND_GTU:
319            ct |= TCG_CT_CONST_U16;
320            break;
321        case TCG_COND_TSTEQ:
322        case TCG_COND_TSTNE:
323            if ((uval & ~0xffff) == 0 || (uval & ~0xffff0000ull) == 0) {
324                return 1;
325            }
326            if (uval == (uint32_t)uval && mask_operand(uval, &mb, &me)) {
327                return 1;
328            }
329            if (TCG_TARGET_REG_BITS == 64 &&
330                mask64_operand(uval << clz64(uval), &mb, &me)) {
331                return 1;
332            }
333            return 0;
334        default:
335            g_assert_not_reached();
336        }
337    }
338
339    if ((ct & TCG_CT_CONST_S16) && sval == (int16_t)sval) {
340        return 1;
341    }
342    if ((ct & TCG_CT_CONST_U16) && uval == (uint16_t)uval) {
343        return 1;
344    }
345    if ((ct & TCG_CT_CONST_S32) && sval == (int32_t)sval) {
346        return 1;
347    }
348    if ((ct & TCG_CT_CONST_U32) && uval == (uint32_t)uval) {
349        return 1;
350    }
351    if ((ct & TCG_CT_CONST_ZERO) && sval == 0) {
352        return 1;
353    }
354    if ((ct & TCG_CT_CONST_MONE) && sval == -1) {
355        return 1;
356    }
357    if ((ct & TCG_CT_CONST_WSZ) && sval == (type == TCG_TYPE_I32 ? 32 : 64)) {
358        return 1;
359    }
360    return 0;
361}
362
363#define OPCD(opc) ((opc)<<26)
364#define XO19(opc) (OPCD(19)|((opc)<<1))
365#define MD30(opc) (OPCD(30)|((opc)<<2))
366#define MDS30(opc) (OPCD(30)|((opc)<<1))
367#define XO31(opc) (OPCD(31)|((opc)<<1))
368#define XO58(opc) (OPCD(58)|(opc))
369#define XO62(opc) (OPCD(62)|(opc))
370#define VX4(opc)  (OPCD(4)|(opc))
371
372#define B      OPCD( 18)
373#define BC     OPCD( 16)
374
375#define LBZ    OPCD( 34)
376#define LHZ    OPCD( 40)
377#define LHA    OPCD( 42)
378#define LWZ    OPCD( 32)
379#define LWZUX  XO31( 55)
380#define LD     XO58(  0)
381#define LDX    XO31( 21)
382#define LDU    XO58(  1)
383#define LDUX   XO31( 53)
384#define LWA    XO58(  2)
385#define LWAX   XO31(341)
386#define LQ     OPCD( 56)
387
388#define STB    OPCD( 38)
389#define STH    OPCD( 44)
390#define STW    OPCD( 36)
391#define STD    XO62(  0)
392#define STDU   XO62(  1)
393#define STDX   XO31(149)
394#define STQ    XO62(  2)
395
396#define PLWA   OPCD( 41)
397#define PLD    OPCD( 57)
398#define PLXSD  OPCD( 42)
399#define PLXV   OPCD(25 * 2 + 1)  /* force tx=1 */
400
401#define PSTD   OPCD( 61)
402#define PSTXSD OPCD( 46)
403#define PSTXV  OPCD(27 * 2 + 1)  /* force sx=1 */
404
405#define ADDIC  OPCD( 12)
406#define ADDI   OPCD( 14)
407#define ADDIS  OPCD( 15)
408#define ORI    OPCD( 24)
409#define ORIS   OPCD( 25)
410#define XORI   OPCD( 26)
411#define XORIS  OPCD( 27)
412#define ANDI   OPCD( 28)
413#define ANDIS  OPCD( 29)
414#define MULLI  OPCD(  7)
415#define CMPLI  OPCD( 10)
416#define CMPI   OPCD( 11)
417#define SUBFIC OPCD( 8)
418
419#define LWZU   OPCD( 33)
420#define STWU   OPCD( 37)
421
422#define RLWIMI OPCD( 20)
423#define RLWINM OPCD( 21)
424#define RLWNM  OPCD( 23)
425
426#define RLDICL MD30(  0)
427#define RLDICR MD30(  1)
428#define RLDIMI MD30(  3)
429#define RLDCL  MDS30( 8)
430
431#define BCLR   XO19( 16)
432#define BCCTR  XO19(528)
433#define CRAND  XO19(257)
434#define CRANDC XO19(129)
435#define CRNAND XO19(225)
436#define CROR   XO19(449)
437#define CRNOR  XO19( 33)
438#define ADDPCIS XO19( 2)
439
440#define EXTSB  XO31(954)
441#define EXTSH  XO31(922)
442#define EXTSW  XO31(986)
443#define ADD    XO31(266)
444#define ADDE   XO31(138)
445#define ADDME  XO31(234)
446#define ADDZE  XO31(202)
447#define ADDC   XO31( 10)
448#define AND    XO31( 28)
449#define SUBF   XO31( 40)
450#define SUBFC  XO31(  8)
451#define SUBFE  XO31(136)
452#define SUBFME XO31(232)
453#define SUBFZE XO31(200)
454#define OR     XO31(444)
455#define XOR    XO31(316)
456#define MULLW  XO31(235)
457#define MULHW  XO31( 75)
458#define MULHWU XO31( 11)
459#define DIVW   XO31(491)
460#define DIVWU  XO31(459)
461#define MODSW  XO31(779)
462#define MODUW  XO31(267)
463#define CMP    XO31(  0)
464#define CMPL   XO31( 32)
465#define LHBRX  XO31(790)
466#define LWBRX  XO31(534)
467#define LDBRX  XO31(532)
468#define STHBRX XO31(918)
469#define STWBRX XO31(662)
470#define STDBRX XO31(660)
471#define MFSPR  XO31(339)
472#define MTSPR  XO31(467)
473#define SRAWI  XO31(824)
474#define NEG    XO31(104)
475#define MFCR   XO31( 19)
476#define MFOCRF (MFCR | (1u << 20))
477#define NOR    XO31(124)
478#define CNTLZW XO31( 26)
479#define CNTLZD XO31( 58)
480#define CNTTZW XO31(538)
481#define CNTTZD XO31(570)
482#define CNTPOPW XO31(378)
483#define CNTPOPD XO31(506)
484#define ANDC   XO31( 60)
485#define ORC    XO31(412)
486#define EQV    XO31(284)
487#define NAND   XO31(476)
488#define ISEL   XO31( 15)
489
490#define MULLD  XO31(233)
491#define MULHD  XO31( 73)
492#define MULHDU XO31(  9)
493#define DIVD   XO31(489)
494#define DIVDU  XO31(457)
495#define MODSD  XO31(777)
496#define MODUD  XO31(265)
497
498#define LBZX   XO31( 87)
499#define LHZX   XO31(279)
500#define LHAX   XO31(343)
501#define LWZX   XO31( 23)
502#define STBX   XO31(215)
503#define STHX   XO31(407)
504#define STWX   XO31(151)
505
506#define EIEIO  XO31(854)
507#define HWSYNC XO31(598)
508#define LWSYNC (HWSYNC | (1u << 21))
509
510#define SPR(a, b) ((((a)<<5)|(b))<<11)
511#define LR     SPR(8, 0)
512#define CTR    SPR(9, 0)
513
514#define SLW    XO31( 24)
515#define SRW    XO31(536)
516#define SRAW   XO31(792)
517
518#define SLD    XO31( 27)
519#define SRD    XO31(539)
520#define SRAD   XO31(794)
521#define SRADI  XO31(413<<1)
522
523#define BRH    XO31(219)
524#define BRW    XO31(155)
525#define BRD    XO31(187)
526
527#define TW     XO31( 4)
528#define TRAP   (TW | TO(31))
529
530#define SETBC    XO31(384)  /* v3.10 */
531#define SETBCR   XO31(416)  /* v3.10 */
532#define SETNBC   XO31(448)  /* v3.10 */
533#define SETNBCR  XO31(480)  /* v3.10 */
534
535#define NOP    ORI  /* ori 0,0,0 */
536
537#define LVX        XO31(103)
538#define LVEBX      XO31(7)
539#define LVEHX      XO31(39)
540#define LVEWX      XO31(71)
541#define LXSDX      (XO31(588) | 1)  /* v2.06, force tx=1 */
542#define LXVDSX     (XO31(332) | 1)  /* v2.06, force tx=1 */
543#define LXSIWZX    (XO31(12) | 1)   /* v2.07, force tx=1 */
544#define LXV        (OPCD(61) | 8 | 1)  /* v3.00, force tx=1 */
545#define LXSD       (OPCD(57) | 2)   /* v3.00 */
546#define LXVWSX     (XO31(364) | 1)  /* v3.00, force tx=1 */
547
548#define STVX       XO31(231)
549#define STVEWX     XO31(199)
550#define STXSDX     (XO31(716) | 1)  /* v2.06, force sx=1 */
551#define STXSIWX    (XO31(140) | 1)  /* v2.07, force sx=1 */
552#define STXV       (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
553#define STXSD      (OPCD(61) | 2)   /* v3.00 */
554
555#define VADDSBS    VX4(768)
556#define VADDUBS    VX4(512)
557#define VADDUBM    VX4(0)
558#define VADDSHS    VX4(832)
559#define VADDUHS    VX4(576)
560#define VADDUHM    VX4(64)
561#define VADDSWS    VX4(896)
562#define VADDUWS    VX4(640)
563#define VADDUWM    VX4(128)
564#define VADDUDM    VX4(192)       /* v2.07 */
565
566#define VSUBSBS    VX4(1792)
567#define VSUBUBS    VX4(1536)
568#define VSUBUBM    VX4(1024)
569#define VSUBSHS    VX4(1856)
570#define VSUBUHS    VX4(1600)
571#define VSUBUHM    VX4(1088)
572#define VSUBSWS    VX4(1920)
573#define VSUBUWS    VX4(1664)
574#define VSUBUWM    VX4(1152)
575#define VSUBUDM    VX4(1216)      /* v2.07 */
576
577#define VNEGW      (VX4(1538) | (6 << 16))  /* v3.00 */
578#define VNEGD      (VX4(1538) | (7 << 16))  /* v3.00 */
579
580#define VMAXSB     VX4(258)
581#define VMAXSH     VX4(322)
582#define VMAXSW     VX4(386)
583#define VMAXSD     VX4(450)       /* v2.07 */
584#define VMAXUB     VX4(2)
585#define VMAXUH     VX4(66)
586#define VMAXUW     VX4(130)
587#define VMAXUD     VX4(194)       /* v2.07 */
588#define VMINSB     VX4(770)
589#define VMINSH     VX4(834)
590#define VMINSW     VX4(898)
591#define VMINSD     VX4(962)       /* v2.07 */
592#define VMINUB     VX4(514)
593#define VMINUH     VX4(578)
594#define VMINUW     VX4(642)
595#define VMINUD     VX4(706)       /* v2.07 */
596
597#define VCMPEQUB   VX4(6)
598#define VCMPEQUH   VX4(70)
599#define VCMPEQUW   VX4(134)
600#define VCMPEQUD   VX4(199)       /* v2.07 */
601#define VCMPGTSB   VX4(774)
602#define VCMPGTSH   VX4(838)
603#define VCMPGTSW   VX4(902)
604#define VCMPGTSD   VX4(967)       /* v2.07 */
605#define VCMPGTUB   VX4(518)
606#define VCMPGTUH   VX4(582)
607#define VCMPGTUW   VX4(646)
608#define VCMPGTUD   VX4(711)       /* v2.07 */
609#define VCMPNEB    VX4(7)         /* v3.00 */
610#define VCMPNEH    VX4(71)        /* v3.00 */
611#define VCMPNEW    VX4(135)       /* v3.00 */
612
613#define VSLB       VX4(260)
614#define VSLH       VX4(324)
615#define VSLW       VX4(388)
616#define VSLD       VX4(1476)      /* v2.07 */
617#define VSRB       VX4(516)
618#define VSRH       VX4(580)
619#define VSRW       VX4(644)
620#define VSRD       VX4(1732)      /* v2.07 */
621#define VSRAB      VX4(772)
622#define VSRAH      VX4(836)
623#define VSRAW      VX4(900)
624#define VSRAD      VX4(964)       /* v2.07 */
625#define VRLB       VX4(4)
626#define VRLH       VX4(68)
627#define VRLW       VX4(132)
628#define VRLD       VX4(196)       /* v2.07 */
629
630#define VMULEUB    VX4(520)
631#define VMULEUH    VX4(584)
632#define VMULEUW    VX4(648)       /* v2.07 */
633#define VMULOUB    VX4(8)
634#define VMULOUH    VX4(72)
635#define VMULOUW    VX4(136)       /* v2.07 */
636#define VMULUWM    VX4(137)       /* v2.07 */
637#define VMULLD     VX4(457)       /* v3.10 */
638#define VMSUMUHM   VX4(38)
639
640#define VMRGHB     VX4(12)
641#define VMRGHH     VX4(76)
642#define VMRGHW     VX4(140)
643#define VMRGLB     VX4(268)
644#define VMRGLH     VX4(332)
645#define VMRGLW     VX4(396)
646
647#define VPKUHUM    VX4(14)
648#define VPKUWUM    VX4(78)
649
650#define VAND       VX4(1028)
651#define VANDC      VX4(1092)
652#define VNOR       VX4(1284)
653#define VOR        VX4(1156)
654#define VXOR       VX4(1220)
655#define VEQV       VX4(1668)      /* v2.07 */
656#define VNAND      VX4(1412)      /* v2.07 */
657#define VORC       VX4(1348)      /* v2.07 */
658
659#define VSPLTB     VX4(524)
660#define VSPLTH     VX4(588)
661#define VSPLTW     VX4(652)
662#define VSPLTISB   VX4(780)
663#define VSPLTISH   VX4(844)
664#define VSPLTISW   VX4(908)
665
666#define VSLDOI     VX4(44)
667
668#define XXPERMDI   (OPCD(60) | (10 << 3) | 7)  /* v2.06, force ax=bx=tx=1 */
669#define XXSEL      (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
670#define XXSPLTIB   (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
671
672#define MFVSRD     (XO31(51) | 1)   /* v2.07, force sx=1 */
673#define MFVSRWZ    (XO31(115) | 1)  /* v2.07, force sx=1 */
674#define MTVSRD     (XO31(179) | 1)  /* v2.07, force tx=1 */
675#define MTVSRWZ    (XO31(243) | 1)  /* v2.07, force tx=1 */
676#define MTVSRDD    (XO31(435) | 1)  /* v3.00, force tx=1 */
677#define MTVSRWS    (XO31(403) | 1)  /* v3.00, force tx=1 */
678
679#define RT(r) ((r)<<21)
680#define RS(r) ((r)<<21)
681#define RA(r) ((r)<<16)
682#define RB(r) ((r)<<11)
683#define TO(t) ((t)<<21)
684#define SH(s) ((s)<<11)
685#define MB(b) ((b)<<6)
686#define ME(e) ((e)<<1)
687#define BO(o) ((o)<<21)
688#define MB64(b) ((b)<<5)
689#define FXM(b) (1 << (19 - (b)))
690
691#define VRT(r)  (((r) & 31) << 21)
692#define VRA(r)  (((r) & 31) << 16)
693#define VRB(r)  (((r) & 31) << 11)
694#define VRC(r)  (((r) & 31) <<  6)
695
696#define LK    1
697
698#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
699#define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
700#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
701#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
702
703#define BF(n)    ((n)<<23)
704#define BI(n, c) (((c)+((n)*4))<<16)
705#define BT(n, c) (((c)+((n)*4))<<21)
706#define BA(n, c) (((c)+((n)*4))<<16)
707#define BB(n, c) (((c)+((n)*4))<<11)
708#define BC_(n, c) (((c)+((n)*4))<<6)
709
710#define BO_COND_TRUE  BO(12)
711#define BO_COND_FALSE BO( 4)
712#define BO_ALWAYS     BO(20)
713
714enum {
715    CR_LT,
716    CR_GT,
717    CR_EQ,
718    CR_SO
719};
720
721static const uint32_t tcg_to_bc[16] = {
722    [TCG_COND_EQ]  = BC | BI(0, CR_EQ) | BO_COND_TRUE,
723    [TCG_COND_NE]  = BC | BI(0, CR_EQ) | BO_COND_FALSE,
724    [TCG_COND_TSTEQ]  = BC | BI(0, CR_EQ) | BO_COND_TRUE,
725    [TCG_COND_TSTNE]  = BC | BI(0, CR_EQ) | BO_COND_FALSE,
726    [TCG_COND_LT]  = BC | BI(0, CR_LT) | BO_COND_TRUE,
727    [TCG_COND_GE]  = BC | BI(0, CR_LT) | BO_COND_FALSE,
728    [TCG_COND_LE]  = BC | BI(0, CR_GT) | BO_COND_FALSE,
729    [TCG_COND_GT]  = BC | BI(0, CR_GT) | BO_COND_TRUE,
730    [TCG_COND_LTU] = BC | BI(0, CR_LT) | BO_COND_TRUE,
731    [TCG_COND_GEU] = BC | BI(0, CR_LT) | BO_COND_FALSE,
732    [TCG_COND_LEU] = BC | BI(0, CR_GT) | BO_COND_FALSE,
733    [TCG_COND_GTU] = BC | BI(0, CR_GT) | BO_COND_TRUE,
734};
735
736/* The low bit here is set if the RA and RB fields must be inverted.  */
737static const uint32_t tcg_to_isel[16] = {
738    [TCG_COND_EQ]  = ISEL | BC_(0, CR_EQ),
739    [TCG_COND_NE]  = ISEL | BC_(0, CR_EQ) | 1,
740    [TCG_COND_TSTEQ] = ISEL | BC_(0, CR_EQ),
741    [TCG_COND_TSTNE] = ISEL | BC_(0, CR_EQ) | 1,
742    [TCG_COND_LT]  = ISEL | BC_(0, CR_LT),
743    [TCG_COND_GE]  = ISEL | BC_(0, CR_LT) | 1,
744    [TCG_COND_LE]  = ISEL | BC_(0, CR_GT) | 1,
745    [TCG_COND_GT]  = ISEL | BC_(0, CR_GT),
746    [TCG_COND_LTU] = ISEL | BC_(0, CR_LT),
747    [TCG_COND_GEU] = ISEL | BC_(0, CR_LT) | 1,
748    [TCG_COND_LEU] = ISEL | BC_(0, CR_GT) | 1,
749    [TCG_COND_GTU] = ISEL | BC_(0, CR_GT),
750};
751
752static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
753                        intptr_t value, intptr_t addend)
754{
755    const tcg_insn_unit *target;
756    int16_t lo;
757    int32_t hi;
758
759    value += addend;
760    target = (const tcg_insn_unit *)value;
761
762    switch (type) {
763    case R_PPC_REL14:
764        return reloc_pc14(code_ptr, target);
765    case R_PPC_REL24:
766        return reloc_pc24(code_ptr, target);
767    case R_PPC64_PCREL34:
768        return reloc_pc34(code_ptr, target);
769    case R_PPC_ADDR16:
770        /*
771         * We are (slightly) abusing this relocation type.  In particular,
772         * assert that the low 2 bits are zero, and do not modify them.
773         * That way we can use this with LD et al that have opcode bits
774         * in the low 2 bits of the insn.
775         */
776        if ((value & 3) || value != (int16_t)value) {
777            return false;
778        }
779        *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
780        break;
781    case R_PPC_ADDR32:
782        /*
783         * We are abusing this relocation type.  Again, this points to
784         * a pair of insns, lis + load.  This is an absolute address
785         * relocation for PPC32 so the lis cannot be removed.
786         */
787        lo = value;
788        hi = value - lo;
789        if (hi + lo != value) {
790            return false;
791        }
792        code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
793        code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
794        break;
795    default:
796        g_assert_not_reached();
797    }
798    return true;
799}
800
801/* Ensure that the prefixed instruction does not cross a 64-byte boundary. */
802static bool tcg_out_need_prefix_align(TCGContext *s)
803{
804    return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c;
805}
806
807static void tcg_out_prefix_align(TCGContext *s)
808{
809    if (tcg_out_need_prefix_align(s)) {
810        tcg_out32(s, NOP);
811    }
812}
813
814static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target)
815{
816    return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0);
817}
818
819/* Output Type 00 Prefix - 8-Byte Load/Store Form (8LS:D) */
820static void tcg_out_8ls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
821                          unsigned ra, tcg_target_long imm, bool r)
822{
823    tcg_insn_unit p, i;
824
825    p = OPCD(1) | (r << 20) | ((imm >> 16) & 0x3ffff);
826    i = opc | TAI(rt, ra, imm);
827
828    tcg_out_prefix_align(s);
829    tcg_out32(s, p);
830    tcg_out32(s, i);
831}
832
833/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */
834static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
835                          unsigned ra, tcg_target_long imm, bool r)
836{
837    tcg_insn_unit p, i;
838
839    p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff);
840    i = opc | TAI(rt, ra, imm);
841
842    tcg_out_prefix_align(s);
843    tcg_out32(s, p);
844    tcg_out32(s, i);
845}
846
847static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
848                             TCGReg base, tcg_target_long offset);
849
850static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
851{
852    if (ret == arg) {
853        return true;
854    }
855    switch (type) {
856    case TCG_TYPE_I64:
857        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
858        /* fallthru */
859    case TCG_TYPE_I32:
860        if (ret < TCG_REG_V0) {
861            if (arg < TCG_REG_V0) {
862                tcg_out32(s, OR | SAB(arg, ret, arg));
863                break;
864            } else if (have_isa_2_07) {
865                tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
866                          | VRT(arg) | RA(ret));
867                break;
868            } else {
869                /* Altivec does not support vector->integer moves.  */
870                return false;
871            }
872        } else if (arg < TCG_REG_V0) {
873            if (have_isa_2_07) {
874                tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
875                          | VRT(ret) | RA(arg));
876                break;
877            } else {
878                /* Altivec does not support integer->vector moves.  */
879                return false;
880            }
881        }
882        /* fallthru */
883    case TCG_TYPE_V64:
884    case TCG_TYPE_V128:
885        tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
886        tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
887        break;
888    default:
889        g_assert_not_reached();
890    }
891    return true;
892}
893
894static void tcg_out_rld_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs,
895                           int sh, int mb, bool rc)
896{
897    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
898    sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
899    mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
900    tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb | rc);
901}
902
903static void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
904                        int sh, int mb)
905{
906    tcg_out_rld_rc(s, op, ra, rs, sh, mb, false);
907}
908
909static void tcg_out_rlw_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs,
910                           int sh, int mb, int me, bool rc)
911{
912    tcg_debug_assert((mb & 0x1f) == mb);
913    tcg_debug_assert((me & 0x1f) == me);
914    tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh & 0x1f) | MB(mb) | ME(me) | rc);
915}
916
917static void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
918                        int sh, int mb, int me)
919{
920    tcg_out_rlw_rc(s, op, ra, rs, sh, mb, me, false);
921}
922
923static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
924{
925    tcg_out32(s, EXTSB | RA(dst) | RS(src));
926}
927
928static void tcg_out_ext8u(TCGContext *s, TCGReg dst, TCGReg src)
929{
930    tcg_out32(s, ANDI | SAI(src, dst, 0xff));
931}
932
933static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
934{
935    tcg_out32(s, EXTSH | RA(dst) | RS(src));
936}
937
938static void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src)
939{
940    tcg_out32(s, ANDI | SAI(src, dst, 0xffff));
941}
942
943static void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src)
944{
945    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
946    tcg_out32(s, EXTSW | RA(dst) | RS(src));
947}
948
949static void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
950{
951    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
952    tcg_out_rld(s, RLDICL, dst, src, 0, 32);
953}
954
955static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dst, TCGReg src)
956{
957    tcg_out_ext32s(s, dst, src);
958}
959
960static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dst, TCGReg src)
961{
962    tcg_out_ext32u(s, dst, src);
963}
964
965static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
966{
967    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
968    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
969}
970
971static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
972{
973    tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
974}
975
976static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
977{
978    tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
979}
980
981static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c)
982{
983    /* Limit immediate shift count lest we create an illegal insn.  */
984    tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31));
985}
986
987static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
988{
989    tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
990}
991
992static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
993{
994    tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
995}
996
997static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
998{
999    tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
1000}
1001
1002static void tcg_out_addpcis(TCGContext *s, TCGReg dst, intptr_t imm)
1003{
1004    uint32_t d0, d1, d2;
1005
1006    tcg_debug_assert((imm & 0xffff) == 0);
1007    tcg_debug_assert(imm == (int32_t)imm);
1008
1009    d2 = extract32(imm, 16, 1);
1010    d1 = extract32(imm, 17, 5);
1011    d0 = extract32(imm, 22, 10);
1012    tcg_out32(s, ADDPCIS | RT(dst) | (d1 << 16) | (d0 << 6) | d2);
1013}
1014
1015static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags)
1016{
1017    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
1018
1019    if (have_isa_3_10) {
1020        tcg_out32(s, BRW | RA(dst) | RS(src));
1021        if (flags & TCG_BSWAP_OS) {
1022            tcg_out_ext32s(s, dst, dst);
1023        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
1024            tcg_out_ext32u(s, dst, dst);
1025        }
1026        return;
1027    }
1028
1029    /*
1030     * Stolen from gcc's builtin_bswap32.
1031     * In the following,
1032     *   dep(a, b, m) -> (a & ~m) | (b & m)
1033     *
1034     * Begin with:                              src = xxxxabcd
1035     */
1036    /* tmp = rol32(src, 8) & 0xffffffff             = 0000bcda */
1037    tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31);
1038    /* tmp = dep(tmp, rol32(src, 24), 0xff000000)   = 0000dcda */
1039    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7);
1040    /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00)   = 0000dcba */
1041    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23);
1042
1043    if (flags & TCG_BSWAP_OS) {
1044        tcg_out_ext32s(s, dst, tmp);
1045    } else {
1046        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
1047    }
1048}
1049
1050static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src)
1051{
1052    TCGReg t0 = dst == src ? TCG_REG_R0 : dst;
1053    TCGReg t1 = dst == src ? dst : TCG_REG_R0;
1054
1055    if (have_isa_3_10) {
1056        tcg_out32(s, BRD | RA(dst) | RS(src));
1057        return;
1058    }
1059
1060    /*
1061     * In the following,
1062     *   dep(a, b, m) -> (a & ~m) | (b & m)
1063     *
1064     * Begin with:                              src = abcdefgh
1065     */
1066    /* t0 = rol32(src, 8) & 0xffffffff              = 0000fghe */
1067    tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31);
1068    /* t0 = dep(t0, rol32(src, 24), 0xff000000)     = 0000hghe */
1069    tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7);
1070    /* t0 = dep(t0, rol32(src, 24), 0x0000ff00)     = 0000hgfe */
1071    tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23);
1072
1073    /* t0 = rol64(t0, 32)                           = hgfe0000 */
1074    tcg_out_rld(s, RLDICL, t0, t0, 32, 0);
1075    /* t1 = rol64(src, 32)                          = efghabcd */
1076    tcg_out_rld(s, RLDICL, t1, src, 32, 0);
1077
1078    /* t0 = dep(t0, rol32(t1, 24), 0xffffffff)      = hgfebcda */
1079    tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31);
1080    /* t0 = dep(t0, rol32(t1, 24), 0xff000000)      = hgfedcda */
1081    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7);
1082    /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00)      = hgfedcba */
1083    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23);
1084
1085    tcg_out_mov(s, TCG_TYPE_REG, dst, t0);
1086}
1087
1088/* Emit a move into ret of arg, if it can be done in one insn.  */
1089static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
1090{
1091    if (arg == (int16_t)arg) {
1092        tcg_out32(s, ADDI | TAI(ret, 0, arg));
1093        return true;
1094    }
1095    if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
1096        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
1097        return true;
1098    }
1099    return false;
1100}
1101
1102static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
1103                             tcg_target_long arg, bool in_prologue)
1104{
1105    intptr_t tb_diff;
1106    tcg_target_long tmp;
1107    int shift;
1108
1109    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1110
1111    if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1112        arg = (int32_t)arg;
1113    }
1114
1115    /* Load 16-bit immediates with one insn.  */
1116    if (tcg_out_movi_one(s, ret, arg)) {
1117        return;
1118    }
1119
1120    /* Load addresses within the TB with one insn.  */
1121    tb_diff = ppc_tbrel_diff(s, (void *)arg);
1122    if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
1123        tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
1124        return;
1125    }
1126
1127    /*
1128     * Load values up to 34 bits, and pc-relative addresses,
1129     * with one prefixed insn.
1130     */
1131    if (have_isa_3_10) {
1132        if (arg == sextract64(arg, 0, 34)) {
1133            /* pli ret,value = paddi ret,0,value,0 */
1134            tcg_out_mls_d(s, ADDI, ret, 0, arg, 0);
1135            return;
1136        }
1137
1138        tmp = tcg_pcrel_diff_for_prefix(s, (void *)arg);
1139        if (tmp == sextract64(tmp, 0, 34)) {
1140            /* pla ret,value = paddi ret,0,value,1 */
1141            tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1);
1142            return;
1143        }
1144    }
1145
1146    /* Load 32-bit immediates with two insns.  Note that we've already
1147       eliminated bare ADDIS, so we know both insns are required.  */
1148    if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
1149        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
1150        tcg_out32(s, ORI | SAI(ret, ret, arg));
1151        return;
1152    }
1153    if (arg == (uint32_t)arg && !(arg & 0x8000)) {
1154        tcg_out32(s, ADDI | TAI(ret, 0, arg));
1155        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1156        return;
1157    }
1158
1159    /* Load masked 16-bit value.  */
1160    if (arg > 0 && (arg & 0x8000)) {
1161        tmp = arg | 0x7fff;
1162        if ((tmp & (tmp + 1)) == 0) {
1163            int mb = clz64(tmp + 1) + 1;
1164            tcg_out32(s, ADDI | TAI(ret, 0, arg));
1165            tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
1166            return;
1167        }
1168    }
1169
1170    /* Load common masks with 2 insns.  */
1171    shift = ctz64(arg);
1172    tmp = arg >> shift;
1173    if (tmp == (int16_t)tmp) {
1174        tcg_out32(s, ADDI | TAI(ret, 0, tmp));
1175        tcg_out_shli64(s, ret, ret, shift);
1176        return;
1177    }
1178    shift = clz64(arg);
1179    if (tcg_out_movi_one(s, ret, arg << shift)) {
1180        tcg_out_shri64(s, ret, ret, shift);
1181        return;
1182    }
1183
1184    /* Load addresses within 2GB with 2 insns. */
1185    if (have_isa_3_00) {
1186        intptr_t hi = tcg_pcrel_diff(s, (void *)arg) - 4;
1187        int16_t lo = hi;
1188
1189        hi -= lo;
1190        if (hi == (int32_t)hi) {
1191            tcg_out_addpcis(s, TCG_REG_TMP2, hi);
1192            tcg_out32(s, ADDI | TAI(ret, TCG_REG_TMP2, lo));
1193            return;
1194        }
1195    }
1196
1197    /* Load addresses within 2GB of TB with 2 (or rarely 3) insns.  */
1198    if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
1199        tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
1200        return;
1201    }
1202
1203    /* Use the constant pool, if possible.  */
1204    if (!in_prologue && USE_REG_TB) {
1205        new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
1206                       ppc_tbrel_diff(s, NULL));
1207        tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
1208        return;
1209    }
1210    if (have_isa_3_10) {
1211        tcg_out_8ls_d(s, PLD, ret, 0, 0, 1);
1212        new_pool_label(s, arg, R_PPC64_PCREL34, s->code_ptr - 2, 0);
1213        return;
1214    }
1215    if (have_isa_3_00) {
1216        tcg_out_addpcis(s, TCG_REG_TMP2, 0);
1217        new_pool_label(s, arg, R_PPC_REL14, s->code_ptr, 0);
1218        tcg_out32(s, LD | TAI(ret, TCG_REG_TMP2, 0));
1219        return;
1220    }
1221
1222    tmp = arg >> 31 >> 1;
1223    tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
1224    if (tmp) {
1225        tcg_out_shli64(s, ret, ret, 32);
1226    }
1227    if (arg & 0xffff0000) {
1228        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1229    }
1230    if (arg & 0xffff) {
1231        tcg_out32(s, ORI | SAI(ret, ret, arg));
1232    }
1233}
1234
1235static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
1236                             TCGReg ret, int64_t val)
1237{
1238    uint32_t load_insn;
1239    int rel, low;
1240    intptr_t add;
1241
1242    switch (vece) {
1243    case MO_8:
1244        low = (int8_t)val;
1245        if (low >= -16 && low < 16) {
1246            tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
1247            return;
1248        }
1249        if (have_isa_3_00) {
1250            tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
1251            return;
1252        }
1253        break;
1254
1255    case MO_16:
1256        low = (int16_t)val;
1257        if (low >= -16 && low < 16) {
1258            tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
1259            return;
1260        }
1261        break;
1262
1263    case MO_32:
1264        low = (int32_t)val;
1265        if (low >= -16 && low < 16) {
1266            tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
1267            return;
1268        }
1269        break;
1270    }
1271
1272    /*
1273     * Otherwise we must load the value from the constant pool.
1274     */
1275    if (USE_REG_TB) {
1276        rel = R_PPC_ADDR16;
1277        add = ppc_tbrel_diff(s, NULL);
1278    } else if (have_isa_3_10) {
1279        if (type == TCG_TYPE_V64) {
1280            tcg_out_8ls_d(s, PLXSD, ret & 31, 0, 0, 1);
1281            new_pool_label(s, val, R_PPC64_PCREL34, s->code_ptr - 2, 0);
1282        } else {
1283            tcg_out_8ls_d(s, PLXV, ret & 31, 0, 0, 1);
1284            new_pool_l2(s, R_PPC64_PCREL34, s->code_ptr - 2, 0, val, val);
1285        }
1286        return;
1287    } else if (have_isa_3_00) {
1288        tcg_out_addpcis(s, TCG_REG_TMP1, 0);
1289        rel = R_PPC_REL14;
1290        add = 0;
1291    } else {
1292        rel = R_PPC_ADDR32;
1293        add = 0;
1294    }
1295
1296    if (have_vsx) {
1297        load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
1298        load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
1299        if (TCG_TARGET_REG_BITS == 64) {
1300            new_pool_label(s, val, rel, s->code_ptr, add);
1301        } else {
1302            new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val);
1303        }
1304    } else {
1305        load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
1306        if (TCG_TARGET_REG_BITS == 64) {
1307            new_pool_l2(s, rel, s->code_ptr, add, val, val);
1308        } else {
1309            new_pool_l4(s, rel, s->code_ptr, add,
1310                        val >> 32, val, val >> 32, val);
1311        }
1312    }
1313
1314    if (USE_REG_TB) {
1315        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
1316        load_insn |= RA(TCG_REG_TB);
1317    } else if (have_isa_3_00) {
1318        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1319    } else {
1320        tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
1321        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1322    }
1323    tcg_out32(s, load_insn);
1324}
1325
1326static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
1327                         tcg_target_long arg)
1328{
1329    switch (type) {
1330    case TCG_TYPE_I32:
1331    case TCG_TYPE_I64:
1332        tcg_debug_assert(ret < TCG_REG_V0);
1333        tcg_out_movi_int(s, type, ret, arg, false);
1334        break;
1335
1336    default:
1337        g_assert_not_reached();
1338    }
1339}
1340
1341static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1342{
1343    return false;
1344}
1345
1346static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1347                             tcg_target_long imm)
1348{
1349    /* This function is only used for passing structs by reference. */
1350    g_assert_not_reached();
1351}
1352
1353static bool mask_operand(uint32_t c, int *mb, int *me)
1354{
1355    uint32_t lsb, test;
1356
1357    /* Accept a bit pattern like:
1358           0....01....1
1359           1....10....0
1360           0..01..10..0
1361       Keep track of the transitions.  */
1362    if (c == 0 || c == -1) {
1363        return false;
1364    }
1365    test = c;
1366    lsb = test & -test;
1367    test += lsb;
1368    if (test & (test - 1)) {
1369        return false;
1370    }
1371
1372    *me = clz32(lsb);
1373    *mb = test ? clz32(test & -test) + 1 : 0;
1374    return true;
1375}
1376
1377static bool mask64_operand(uint64_t c, int *mb, int *me)
1378{
1379    uint64_t lsb;
1380
1381    if (c == 0) {
1382        return false;
1383    }
1384
1385    lsb = c & -c;
1386    /* Accept 1..10..0.  */
1387    if (c == -lsb) {
1388        *mb = 0;
1389        *me = clz64(lsb);
1390        return true;
1391    }
1392    /* Accept 0..01..1.  */
1393    if (lsb == 1 && (c & (c + 1)) == 0) {
1394        *mb = clz64(c + 1) + 1;
1395        *me = 63;
1396        return true;
1397    }
1398    return false;
1399}
1400
1401static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1402{
1403    int mb, me;
1404
1405    if (mask_operand(c, &mb, &me)) {
1406        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
1407    } else if ((c & 0xffff) == c) {
1408        tcg_out32(s, ANDI | SAI(src, dst, c));
1409        return;
1410    } else if ((c & 0xffff0000) == c) {
1411        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1412        return;
1413    } else {
1414        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
1415        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1416    }
1417}
1418
1419static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
1420{
1421    int mb, me;
1422
1423    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1424    if (mask64_operand(c, &mb, &me)) {
1425        if (mb == 0) {
1426            tcg_out_rld(s, RLDICR, dst, src, 0, me);
1427        } else {
1428            tcg_out_rld(s, RLDICL, dst, src, 0, mb);
1429        }
1430    } else if ((c & 0xffff) == c) {
1431        tcg_out32(s, ANDI | SAI(src, dst, c));
1432        return;
1433    } else if ((c & 0xffff0000) == c) {
1434        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1435        return;
1436    } else {
1437        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
1438        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1439    }
1440}
1441
1442static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
1443                           int op_lo, int op_hi)
1444{
1445    if (c >> 16) {
1446        tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
1447        src = dst;
1448    }
1449    if (c & 0xffff) {
1450        tcg_out32(s, op_lo | SAI(src, dst, c));
1451        src = dst;
1452    }
1453}
1454
1455static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1456{
1457    tcg_out_zori32(s, dst, src, c, ORI, ORIS);
1458}
1459
1460static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1461{
1462    tcg_out_zori32(s, dst, src, c, XORI, XORIS);
1463}
1464
1465static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target)
1466{
1467    ptrdiff_t disp = tcg_pcrel_diff(s, target);
1468    if (in_range_b(disp)) {
1469        tcg_out32(s, B | (disp & 0x3fffffc) | mask);
1470    } else {
1471        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
1472        tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
1473        tcg_out32(s, BCCTR | BO_ALWAYS | mask);
1474    }
1475}
1476
1477static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
1478                             TCGReg base, tcg_target_long offset)
1479{
1480    tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
1481    bool is_int_store = false;
1482    TCGReg rs = TCG_REG_TMP1;
1483
1484    switch (opi) {
1485    case LD: case LWA:
1486        align = 3;
1487        /* FALLTHRU */
1488    default:
1489        if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
1490            rs = rt;
1491            break;
1492        }
1493        break;
1494    case LXSD:
1495    case STXSD:
1496        align = 3;
1497        break;
1498    case LXV:
1499    case STXV:
1500        align = 15;
1501        break;
1502    case STD:
1503        align = 3;
1504        /* FALLTHRU */
1505    case STB: case STH: case STW:
1506        is_int_store = true;
1507        break;
1508    }
1509
1510    /* For unaligned or large offsets, use the prefixed form. */
1511    if (have_isa_3_10
1512        && (offset != (int16_t)offset || (offset & align))
1513        && offset == sextract64(offset, 0, 34)) {
1514        /*
1515         * Note that the MLS:D insns retain their un-prefixed opcode,
1516         * while the 8LS:D insns use a different opcode space.
1517         */
1518        switch (opi) {
1519        case LBZ:
1520        case LHZ:
1521        case LHA:
1522        case LWZ:
1523        case STB:
1524        case STH:
1525        case STW:
1526        case ADDI:
1527            tcg_out_mls_d(s, opi, rt, base, offset, 0);
1528            return;
1529        case LWA:
1530            tcg_out_8ls_d(s, PLWA, rt, base, offset, 0);
1531            return;
1532        case LD:
1533            tcg_out_8ls_d(s, PLD, rt, base, offset, 0);
1534            return;
1535        case STD:
1536            tcg_out_8ls_d(s, PSTD, rt, base, offset, 0);
1537            return;
1538        case LXSD:
1539            tcg_out_8ls_d(s, PLXSD, rt & 31, base, offset, 0);
1540            return;
1541        case STXSD:
1542            tcg_out_8ls_d(s, PSTXSD, rt & 31, base, offset, 0);
1543            return;
1544        case LXV:
1545            tcg_out_8ls_d(s, PLXV, rt & 31, base, offset, 0);
1546            return;
1547        case STXV:
1548            tcg_out_8ls_d(s, PSTXV, rt & 31, base, offset, 0);
1549            return;
1550        }
1551    }
1552
1553    /* For unaligned, or very large offsets, use the indexed form.  */
1554    if (offset & align || offset != (int32_t)offset || opi == 0) {
1555        if (rs == base) {
1556            rs = TCG_REG_R0;
1557        }
1558        tcg_debug_assert(!is_int_store || rs != rt);
1559        tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
1560        tcg_out32(s, opx | TAB(rt & 31, base, rs));
1561        return;
1562    }
1563
1564    l0 = (int16_t)offset;
1565    offset = (offset - l0) >> 16;
1566    l1 = (int16_t)offset;
1567
1568    if (l1 < 0 && orig >= 0) {
1569        extra = 0x4000;
1570        l1 = (int16_t)(offset - 0x4000);
1571    }
1572    if (l1) {
1573        tcg_out32(s, ADDIS | TAI(rs, base, l1));
1574        base = rs;
1575    }
1576    if (extra) {
1577        tcg_out32(s, ADDIS | TAI(rs, base, extra));
1578        base = rs;
1579    }
1580    if (opi != ADDI || base != rt || l0 != 0) {
1581        tcg_out32(s, opi | TAI(rt & 31, base, l0));
1582    }
1583}
1584
1585static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
1586                           TCGReg va, TCGReg vb, int shb)
1587{
1588    tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
1589}
1590
1591static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1592                       TCGReg base, intptr_t offset)
1593{
1594    int shift;
1595
1596    switch (type) {
1597    case TCG_TYPE_I32:
1598        if (ret < TCG_REG_V0) {
1599            tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
1600            break;
1601        }
1602        if (have_isa_2_07 && have_vsx) {
1603            tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
1604            break;
1605        }
1606        tcg_debug_assert((offset & 3) == 0);
1607        tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
1608        shift = (offset - 4) & 0xc;
1609        if (shift) {
1610            tcg_out_vsldoi(s, ret, ret, ret, shift);
1611        }
1612        break;
1613    case TCG_TYPE_I64:
1614        if (ret < TCG_REG_V0) {
1615            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1616            tcg_out_mem_long(s, LD, LDX, ret, base, offset);
1617            break;
1618        }
1619        /* fallthru */
1620    case TCG_TYPE_V64:
1621        tcg_debug_assert(ret >= TCG_REG_V0);
1622        if (have_vsx) {
1623            tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
1624                             ret, base, offset);
1625            break;
1626        }
1627        tcg_debug_assert((offset & 7) == 0);
1628        tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
1629        if (offset & 8) {
1630            tcg_out_vsldoi(s, ret, ret, ret, 8);
1631        }
1632        break;
1633    case TCG_TYPE_V128:
1634        tcg_debug_assert(ret >= TCG_REG_V0);
1635        tcg_debug_assert((offset & 15) == 0);
1636        tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
1637                         LVX, ret, base, offset);
1638        break;
1639    default:
1640        g_assert_not_reached();
1641    }
1642}
1643
1644static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1645                              TCGReg base, intptr_t offset)
1646{
1647    int shift;
1648
1649    switch (type) {
1650    case TCG_TYPE_I32:
1651        if (arg < TCG_REG_V0) {
1652            tcg_out_mem_long(s, STW, STWX, arg, base, offset);
1653            break;
1654        }
1655        if (have_isa_2_07 && have_vsx) {
1656            tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
1657            break;
1658        }
1659        assert((offset & 3) == 0);
1660        tcg_debug_assert((offset & 3) == 0);
1661        shift = (offset - 4) & 0xc;
1662        if (shift) {
1663            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
1664            arg = TCG_VEC_TMP1;
1665        }
1666        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1667        break;
1668    case TCG_TYPE_I64:
1669        if (arg < TCG_REG_V0) {
1670            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1671            tcg_out_mem_long(s, STD, STDX, arg, base, offset);
1672            break;
1673        }
1674        /* fallthru */
1675    case TCG_TYPE_V64:
1676        tcg_debug_assert(arg >= TCG_REG_V0);
1677        if (have_vsx) {
1678            tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
1679                             STXSDX, arg, base, offset);
1680            break;
1681        }
1682        tcg_debug_assert((offset & 7) == 0);
1683        if (offset & 8) {
1684            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
1685            arg = TCG_VEC_TMP1;
1686        }
1687        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1688        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
1689        break;
1690    case TCG_TYPE_V128:
1691        tcg_debug_assert(arg >= TCG_REG_V0);
1692        tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
1693                         STVX, arg, base, offset);
1694        break;
1695    default:
1696        g_assert_not_reached();
1697    }
1698}
1699
1700static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1701                               TCGReg base, intptr_t ofs)
1702{
1703    return false;
1704}
1705
1706/*
1707 * Set dest non-zero if and only if (arg1 & arg2) is non-zero.
1708 * If RC, then also set RC0.
1709 */
1710static void tcg_out_test(TCGContext *s, TCGReg dest, TCGReg arg1, TCGArg arg2,
1711                         bool const_arg2, TCGType type, bool rc)
1712{
1713    int mb, me;
1714
1715    if (!const_arg2) {
1716        tcg_out32(s, AND | SAB(arg1, dest, arg2) | rc);
1717        return;
1718    }
1719
1720    if (type == TCG_TYPE_I32) {
1721        arg2 = (uint32_t)arg2;
1722    }
1723
1724    if ((arg2 & ~0xffff) == 0) {
1725        tcg_out32(s, ANDI | SAI(arg1, dest, arg2));
1726        return;
1727    }
1728    if ((arg2 & ~0xffff0000ull) == 0) {
1729        tcg_out32(s, ANDIS | SAI(arg1, dest, arg2 >> 16));
1730        return;
1731    }
1732    if (arg2 == (uint32_t)arg2 && mask_operand(arg2, &mb, &me)) {
1733        tcg_out_rlw_rc(s, RLWINM, dest, arg1, 0, mb, me, rc);
1734        return;
1735    }
1736    if (TCG_TARGET_REG_BITS == 64) {
1737        int sh = clz64(arg2);
1738        if (mask64_operand(arg2 << sh, &mb, &me)) {
1739            tcg_out_rld_rc(s, RLDICR, dest, arg1, sh, me, rc);
1740            return;
1741        }
1742    }
1743    /* Constraints should satisfy this. */
1744    g_assert_not_reached();
1745}
1746
1747static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
1748                        bool const_arg2, int cr, TCGType type)
1749{
1750    uint32_t op;
1751
1752    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1753
1754    /*
1755     * Simplify the comparisons below wrt CMPI.
1756     * All of the tests are 16-bit, so a 32-bit sign extend always works.
1757     */
1758    if (type == TCG_TYPE_I32) {
1759        arg2 = (int32_t)arg2;
1760    }
1761
1762    switch (cond) {
1763    case TCG_COND_EQ:
1764    case TCG_COND_NE:
1765        if (const_arg2) {
1766            if ((int16_t)arg2 == arg2) {
1767                op = CMPI;
1768                break;
1769            }
1770            tcg_debug_assert((uint16_t)arg2 == arg2);
1771            op = CMPLI;
1772            break;
1773        }
1774        op = CMPL;
1775        break;
1776
1777    case TCG_COND_TSTEQ:
1778    case TCG_COND_TSTNE:
1779        tcg_debug_assert(cr == 0);
1780        tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, true);
1781        return;
1782
1783    case TCG_COND_LT:
1784    case TCG_COND_GE:
1785    case TCG_COND_LE:
1786    case TCG_COND_GT:
1787        if (const_arg2) {
1788            tcg_debug_assert((int16_t)arg2 == arg2);
1789            op = CMPI;
1790            break;
1791        }
1792        op = CMP;
1793        break;
1794
1795    case TCG_COND_LTU:
1796    case TCG_COND_GEU:
1797    case TCG_COND_LEU:
1798    case TCG_COND_GTU:
1799        if (const_arg2) {
1800            tcg_debug_assert((uint16_t)arg2 == arg2);
1801            op = CMPLI;
1802            break;
1803        }
1804        op = CMPL;
1805        break;
1806
1807    default:
1808        g_assert_not_reached();
1809    }
1810    op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
1811    op |= RA(arg1);
1812    op |= const_arg2 ? arg2 & 0xffff : RB(arg2);
1813    tcg_out32(s, op);
1814}
1815
1816static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
1817                                TCGReg dst, TCGReg src, bool neg)
1818{
1819    if (neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) {
1820        /*
1821         * X != 0 implies X + -1 generates a carry.
1822         * RT = (~X + X) + CA
1823         *    = -1 + CA
1824         *    = CA ? 0 : -1
1825         */
1826        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1827        tcg_out32(s, SUBFE | TAB(dst, src, src));
1828        return;
1829    }
1830
1831    if (type == TCG_TYPE_I32) {
1832        tcg_out32(s, CNTLZW | RS(src) | RA(dst));
1833        tcg_out_shri32(s, dst, dst, 5);
1834    } else {
1835        tcg_out32(s, CNTLZD | RS(src) | RA(dst));
1836        tcg_out_shri64(s, dst, dst, 6);
1837    }
1838    if (neg) {
1839        tcg_out32(s, NEG | RT(dst) | RA(dst));
1840    }
1841}
1842
1843static void tcg_out_setcond_ne0(TCGContext *s, TCGType type,
1844                                TCGReg dst, TCGReg src, bool neg)
1845{
1846    if (!neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) {
1847        /*
1848         * X != 0 implies X + -1 generates a carry.  Extra addition
1849         * trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.
1850         */
1851        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1852        tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
1853        return;
1854    }
1855    tcg_out_setcond_eq0(s, type, dst, src, false);
1856    if (neg) {
1857        tcg_out32(s, ADDI | TAI(dst, dst, -1));
1858    } else {
1859        tcg_out_xori32(s, dst, dst, 1);
1860    }
1861}
1862
1863static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
1864                                  bool const_arg2)
1865{
1866    if (const_arg2) {
1867        if ((uint32_t)arg2 == arg2) {
1868            tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
1869        } else {
1870            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
1871            tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
1872        }
1873    } else {
1874        tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
1875    }
1876    return TCG_REG_R0;
1877}
1878
1879static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
1880                            TCGReg arg0, TCGReg arg1, TCGArg arg2,
1881                            bool const_arg2, bool neg)
1882{
1883    int sh;
1884    bool inv;
1885
1886    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1887
1888    /* Ignore high bits of a potential constant arg2.  */
1889    if (type == TCG_TYPE_I32) {
1890        arg2 = (uint32_t)arg2;
1891    }
1892
1893    /* With SETBC/SETBCR, we can always implement with 2 insns. */
1894    if (have_isa_3_10) {
1895        tcg_insn_unit bi, opc;
1896
1897        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type);
1898
1899        /* Re-use tcg_to_bc for BI and BO_COND_{TRUE,FALSE}. */
1900        bi = tcg_to_bc[cond] & (0x1f << 16);
1901        if (tcg_to_bc[cond] & BO(8)) {
1902            opc = neg ? SETNBC : SETBC;
1903        } else {
1904            opc = neg ? SETNBCR : SETBCR;
1905        }
1906        tcg_out32(s, opc | RT(arg0) | bi);
1907        return;
1908    }
1909
1910    /* Handle common and trivial cases before handling anything else.  */
1911    if (arg2 == 0) {
1912        switch (cond) {
1913        case TCG_COND_EQ:
1914            tcg_out_setcond_eq0(s, type, arg0, arg1, neg);
1915            return;
1916        case TCG_COND_NE:
1917            tcg_out_setcond_ne0(s, type, arg0, arg1, neg);
1918            return;
1919        case TCG_COND_GE:
1920            tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
1921            arg1 = arg0;
1922            /* FALLTHRU */
1923        case TCG_COND_LT:
1924            /* Extract the sign bit.  */
1925            if (type == TCG_TYPE_I32) {
1926                if (neg) {
1927                    tcg_out_sari32(s, arg0, arg1, 31);
1928                } else {
1929                    tcg_out_shri32(s, arg0, arg1, 31);
1930                }
1931            } else {
1932                if (neg) {
1933                    tcg_out_sari64(s, arg0, arg1, 63);
1934                } else {
1935                    tcg_out_shri64(s, arg0, arg1, 63);
1936                }
1937            }
1938            return;
1939        default:
1940            break;
1941        }
1942    }
1943
1944    /* If we have ISEL, we can implement everything with 3 or 4 insns.
1945       All other cases below are also at least 3 insns, so speed up the
1946       code generator by not considering them and always using ISEL.  */
1947    if (have_isel) {
1948        int isel, tab;
1949
1950        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type);
1951
1952        isel = tcg_to_isel[cond];
1953
1954        tcg_out_movi(s, type, arg0, neg ? -1 : 1);
1955        if (isel & 1) {
1956            /* arg0 = (bc ? 0 : 1) */
1957            tab = TAB(arg0, 0, arg0);
1958            isel &= ~1;
1959        } else {
1960            /* arg0 = (bc ? 1 : 0) */
1961            tcg_out_movi(s, type, TCG_REG_R0, 0);
1962            tab = TAB(arg0, arg0, TCG_REG_R0);
1963        }
1964        tcg_out32(s, isel | tab);
1965        return;
1966    }
1967
1968    inv = false;
1969    switch (cond) {
1970    case TCG_COND_EQ:
1971        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1972        tcg_out_setcond_eq0(s, type, arg0, arg1, neg);
1973        break;
1974
1975    case TCG_COND_NE:
1976        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1977        tcg_out_setcond_ne0(s, type, arg0, arg1, neg);
1978        break;
1979
1980    case TCG_COND_TSTEQ:
1981        tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false);
1982        tcg_out_setcond_eq0(s, type, arg0, TCG_REG_R0, neg);
1983        break;
1984
1985    case TCG_COND_TSTNE:
1986        tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false);
1987        tcg_out_setcond_ne0(s, type, arg0, TCG_REG_R0, neg);
1988        break;
1989
1990    case TCG_COND_LE:
1991    case TCG_COND_LEU:
1992        inv = true;
1993        /* fall through */
1994    case TCG_COND_GT:
1995    case TCG_COND_GTU:
1996        sh = 30; /* CR7 CR_GT */
1997        goto crtest;
1998
1999    case TCG_COND_GE:
2000    case TCG_COND_GEU:
2001        inv = true;
2002        /* fall through */
2003    case TCG_COND_LT:
2004    case TCG_COND_LTU:
2005        sh = 29; /* CR7 CR_LT */
2006        goto crtest;
2007
2008    crtest:
2009        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
2010        tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
2011        tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
2012        if (neg && inv) {
2013            tcg_out32(s, ADDI | TAI(arg0, arg0, -1));
2014        } else if (neg) {
2015            tcg_out32(s, NEG | RT(arg0) | RA(arg0));
2016        } else if (inv) {
2017            tcg_out_xori32(s, arg0, arg0, 1);
2018        }
2019        break;
2020
2021    default:
2022        g_assert_not_reached();
2023    }
2024}
2025
2026static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
2027                         TCGReg dest, TCGReg arg1, TCGReg arg2)
2028{
2029    tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, false);
2030}
2031
2032static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond,
2033                          TCGReg dest, TCGReg arg1, tcg_target_long arg2)
2034{
2035    tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, false);
2036}
2037
2038static const TCGOutOpSetcond outop_setcond = {
2039    .base.static_constraint = C_O1_I2(r, r, rC),
2040    .out_rrr = tgen_setcond,
2041    .out_rri = tgen_setcondi,
2042};
2043
2044static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond,
2045                            TCGReg dest, TCGReg arg1, TCGReg arg2)
2046{
2047    tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, true);
2048}
2049
2050static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond,
2051                             TCGReg dest, TCGReg arg1, tcg_target_long arg2)
2052{
2053    tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, true);
2054}
2055
2056static const TCGOutOpSetcond outop_negsetcond = {
2057    .base.static_constraint = C_O1_I2(r, r, rC),
2058    .out_rrr = tgen_negsetcond,
2059    .out_rri = tgen_negsetcondi,
2060};
2061
2062static void tcg_out_bc(TCGContext *s, TCGCond cond, int bd)
2063{
2064    tcg_out32(s, tcg_to_bc[cond] | bd);
2065}
2066
2067static void tcg_out_bc_lab(TCGContext *s, TCGCond cond, TCGLabel *l)
2068{
2069    int bd = 0;
2070    if (l->has_value) {
2071        bd = reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
2072    } else {
2073        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
2074    }
2075    tcg_out_bc(s, cond, bd);
2076}
2077
2078static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond,
2079                        TCGReg arg1, TCGReg arg2, TCGLabel *l)
2080{
2081    tcg_out_cmp(s, cond, arg1, arg2, false, 0, type);
2082    tcg_out_bc_lab(s, cond, l);
2083}
2084
2085static void tgen_brcondi(TCGContext *s, TCGType type, TCGCond cond,
2086                         TCGReg arg1, tcg_target_long arg2, TCGLabel *l)
2087{
2088    tcg_out_cmp(s, cond, arg1, arg2, true, 0, type);
2089    tcg_out_bc_lab(s, cond, l);
2090}
2091
2092static const TCGOutOpBrcond outop_brcond = {
2093    .base.static_constraint = C_O0_I2(r, rC),
2094    .out_rr = tgen_brcond,
2095    .out_ri = tgen_brcondi,
2096};
2097
2098static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond,
2099                         TCGReg dest, TCGReg c1, TCGArg c2, bool const_c2,
2100                         TCGArg v1, bool const_v1, TCGArg v2, bool const_v2)
2101{
2102    /* If for some reason both inputs are zero, don't produce bad code.  */
2103    if (v1 == 0 && v2 == 0) {
2104        tcg_out_movi(s, type, dest, 0);
2105        return;
2106    }
2107
2108    tcg_out_cmp(s, cond, c1, c2, const_c2, 0, type);
2109
2110    if (have_isel) {
2111        int isel = tcg_to_isel[cond];
2112
2113        /* Swap the V operands if the operation indicates inversion.  */
2114        if (isel & 1) {
2115            int t = v1;
2116            v1 = v2;
2117            v2 = t;
2118            isel &= ~1;
2119        }
2120        /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */
2121        if (v2 == 0) {
2122            tcg_out_movi(s, type, TCG_REG_R0, 0);
2123        }
2124        tcg_out32(s, isel | TAB(dest, v1, v2));
2125    } else {
2126        if (dest == v2) {
2127            cond = tcg_invert_cond(cond);
2128            v2 = v1;
2129        } else if (dest != v1) {
2130            if (v1 == 0) {
2131                tcg_out_movi(s, type, dest, 0);
2132            } else {
2133                tcg_out_mov(s, type, dest, v1);
2134            }
2135        }
2136        /* Branch forward over one insn */
2137        tcg_out_bc(s, cond, 8);
2138        if (v2 == 0) {
2139            tcg_out_movi(s, type, dest, 0);
2140        } else {
2141            tcg_out_mov(s, type, dest, v2);
2142        }
2143    }
2144}
2145
2146static const TCGOutOpMovcond outop_movcond = {
2147    .base.static_constraint = C_O1_I4(r, r, rC, rZ, rZ),
2148    .out = tgen_movcond,
2149};
2150
2151static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
2152                          TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
2153{
2154    if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
2155        tcg_out32(s, opc | RA(a0) | RS(a1));
2156    } else {
2157        tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 0, type);
2158        /* Note that the only other valid constant for a2 is 0.  */
2159        if (have_isel) {
2160            tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
2161            tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
2162        } else if (!const_a2 && a0 == a2) {
2163            tcg_out_bc(s, TCG_COND_EQ, 8);
2164            tcg_out32(s, opc | RA(a0) | RS(a1));
2165        } else {
2166            tcg_out32(s, opc | RA(a0) | RS(a1));
2167            tcg_out_bc(s, TCG_COND_NE, 8);
2168            if (const_a2) {
2169                tcg_out_movi(s, type, a0, 0);
2170            } else {
2171                tcg_out_mov(s, type, a0, a2);
2172            }
2173        }
2174    }
2175}
2176
2177static void tcg_out_cmp2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
2178                         TCGArg bl, bool blconst, TCGArg bh, bool bhconst)
2179{
2180    static const struct { uint8_t bit1, bit2; } bits[] = {
2181        [TCG_COND_LT ] = { CR_LT, CR_LT },
2182        [TCG_COND_LE ] = { CR_LT, CR_GT },
2183        [TCG_COND_GT ] = { CR_GT, CR_GT },
2184        [TCG_COND_GE ] = { CR_GT, CR_LT },
2185        [TCG_COND_LTU] = { CR_LT, CR_LT },
2186        [TCG_COND_LEU] = { CR_LT, CR_GT },
2187        [TCG_COND_GTU] = { CR_GT, CR_GT },
2188        [TCG_COND_GEU] = { CR_GT, CR_LT },
2189    };
2190
2191    TCGCond cond2;
2192    int op, bit1, bit2;
2193
2194    switch (cond) {
2195    case TCG_COND_EQ:
2196        op = CRAND;
2197        goto do_equality;
2198    case TCG_COND_NE:
2199        op = CRNAND;
2200    do_equality:
2201        tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
2202        tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
2203        tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
2204        break;
2205
2206    case TCG_COND_TSTEQ:
2207    case TCG_COND_TSTNE:
2208        if (blconst) {
2209            tcg_out_andi32(s, TCG_REG_R0, al, bl);
2210        } else {
2211            tcg_out32(s, AND | SAB(al, TCG_REG_R0, bl));
2212        }
2213        if (bhconst) {
2214            tcg_out_andi32(s, TCG_REG_TMP1, ah, bh);
2215        } else {
2216            tcg_out32(s, AND | SAB(ah, TCG_REG_TMP1, bh));
2217        }
2218        tcg_out32(s, OR | SAB(TCG_REG_R0, TCG_REG_R0, TCG_REG_TMP1) | 1);
2219        break;
2220
2221    case TCG_COND_LT:
2222    case TCG_COND_LE:
2223    case TCG_COND_GT:
2224    case TCG_COND_GE:
2225    case TCG_COND_LTU:
2226    case TCG_COND_LEU:
2227    case TCG_COND_GTU:
2228    case TCG_COND_GEU:
2229        bit1 = bits[cond].bit1;
2230        bit2 = bits[cond].bit2;
2231        op = (bit1 != bit2 ? CRANDC : CRAND);
2232        cond2 = tcg_unsigned_cond(cond);
2233
2234        tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
2235        tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
2236        tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
2237        tcg_out32(s, CROR | BT(0, CR_EQ) | BA(6, bit1) | BB(0, CR_EQ));
2238        break;
2239
2240    default:
2241        g_assert_not_reached();
2242    }
2243}
2244
2245static void tgen_setcond2(TCGContext *s, TCGCond cond, TCGReg ret,
2246                          TCGReg al, TCGReg ah,
2247                          TCGArg bl, bool const_bl,
2248                          TCGArg bh, bool const_bh)
2249{
2250    tcg_out_cmp2(s, cond, al, ah, bl, const_bl, bh, const_bh);
2251    tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(0));
2252    tcg_out_rlw(s, RLWINM, ret, TCG_REG_R0, CR_EQ + 0*4 + 1, 31, 31);
2253}
2254
2255#if TCG_TARGET_REG_BITS != 32
2256__attribute__((unused))
2257#endif
2258static const TCGOutOpSetcond2 outop_setcond2 = {
2259    .base.static_constraint = C_O1_I4(r, r, r, rU, rC),
2260    .out = tgen_setcond2,
2261};
2262
2263static void tgen_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
2264                         TCGArg bl, bool const_bl,
2265                         TCGArg bh, bool const_bh, TCGLabel *l)
2266{
2267    assert(TCG_TARGET_REG_BITS == 32);
2268    tcg_out_cmp2(s, cond, al, ah, bl, const_bl, bh, const_bh);
2269    tcg_out_bc_lab(s, TCG_COND_EQ, l);
2270}
2271
2272#if TCG_TARGET_REG_BITS != 32
2273__attribute__((unused))
2274#endif
2275static const TCGOutOpBrcond2 outop_brcond2 = {
2276    .base.static_constraint = C_O0_I4(r, r, rU, rC),
2277    .out = tgen_brcond2,
2278};
2279
2280static void tcg_out_mb(TCGContext *s, TCGArg a0)
2281{
2282    uint32_t insn;
2283
2284    if (a0 & TCG_MO_ST_LD) {
2285        insn = HWSYNC;
2286    } else {
2287        insn = LWSYNC;
2288    }
2289
2290    tcg_out32(s, insn);
2291}
2292
2293static void tcg_out_call_int(TCGContext *s, int lk,
2294                             const tcg_insn_unit *target)
2295{
2296#ifdef _CALL_AIX
2297    /* Look through the descriptor.  If the branch is in range, and we
2298       don't have to spend too much effort on building the toc.  */
2299    const void *tgt = ((const void * const *)target)[0];
2300    uintptr_t toc = ((const uintptr_t *)target)[1];
2301    intptr_t diff = tcg_pcrel_diff(s, tgt);
2302
2303    if (in_range_b(diff) && toc == (uint32_t)toc) {
2304        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
2305        tcg_out_b(s, lk, tgt);
2306    } else {
2307        /* Fold the low bits of the constant into the addresses below.  */
2308        intptr_t arg = (intptr_t)target;
2309        int ofs = (int16_t)arg;
2310
2311        if (ofs + 8 < 0x8000) {
2312            arg -= ofs;
2313        } else {
2314            ofs = 0;
2315        }
2316        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
2317        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
2318        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
2319        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
2320        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
2321    }
2322#elif defined(_CALL_ELF) && _CALL_ELF == 2
2323    intptr_t diff;
2324
2325    /* In the ELFv2 ABI, we have to set up r12 to contain the destination
2326       address, which the callee uses to compute its TOC address.  */
2327    /* FIXME: when the branch is in range, we could avoid r12 load if we
2328       knew that the destination uses the same TOC, and what its local
2329       entry point offset is.  */
2330    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
2331
2332    diff = tcg_pcrel_diff(s, target);
2333    if (in_range_b(diff)) {
2334        tcg_out_b(s, lk, target);
2335    } else {
2336        tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
2337        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
2338    }
2339#else
2340    tcg_out_b(s, lk, target);
2341#endif
2342}
2343
2344static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
2345                         const TCGHelperInfo *info)
2346{
2347    tcg_out_call_int(s, LK, target);
2348}
2349
2350static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = {
2351    [MO_UB] = LBZX,
2352    [MO_UW] = LHZX,
2353    [MO_UL] = LWZX,
2354    [MO_UQ] = LDX,
2355    [MO_SW] = LHAX,
2356    [MO_SL] = LWAX,
2357    [MO_BSWAP | MO_UB] = LBZX,
2358    [MO_BSWAP | MO_UW] = LHBRX,
2359    [MO_BSWAP | MO_UL] = LWBRX,
2360    [MO_BSWAP | MO_UQ] = LDBRX,
2361};
2362
2363static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
2364    [MO_UB] = STBX,
2365    [MO_UW] = STHX,
2366    [MO_UL] = STWX,
2367    [MO_UQ] = STDX,
2368    [MO_BSWAP | MO_UB] = STBX,
2369    [MO_BSWAP | MO_UW] = STHBRX,
2370    [MO_BSWAP | MO_UL] = STWBRX,
2371    [MO_BSWAP | MO_UQ] = STDBRX,
2372};
2373
2374static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
2375{
2376    if (arg < 0) {
2377        arg = TCG_REG_TMP1;
2378    }
2379    tcg_out32(s, MFSPR | RT(arg) | LR);
2380    return arg;
2381}
2382
2383/*
2384 * For the purposes of ppc32 sorting 4 input registers into 4 argument
2385 * registers, there is an outside chance we would require 3 temps.
2386 */
2387static const TCGLdstHelperParam ldst_helper_param = {
2388    .ra_gen = ldst_ra_gen,
2389    .ntmp = 3,
2390    .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 }
2391};
2392
2393static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2394{
2395    MemOp opc = get_memop(lb->oi);
2396
2397    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2398        return false;
2399    }
2400
2401    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
2402    tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]);
2403    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
2404
2405    tcg_out_b(s, 0, lb->raddr);
2406    return true;
2407}
2408
2409static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2410{
2411    MemOp opc = get_memop(lb->oi);
2412
2413    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2414        return false;
2415    }
2416
2417    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
2418    tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]);
2419
2420    tcg_out_b(s, 0, lb->raddr);
2421    return true;
2422}
2423
2424typedef struct {
2425    TCGReg base;
2426    TCGReg index;
2427    TCGAtomAlign aa;
2428} HostAddress;
2429
2430bool tcg_target_has_memory_bswap(MemOp memop)
2431{
2432    TCGAtomAlign aa;
2433
2434    if ((memop & MO_SIZE) <= MO_64) {
2435        return true;
2436    }
2437
2438    /*
2439     * Reject 16-byte memop with 16-byte atomicity,
2440     * but do allow a pair of 64-bit operations.
2441     */
2442    aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
2443    return aa.atom <= MO_64;
2444}
2445
2446/* We expect to use a 16-bit negative offset from ENV.  */
2447#define MIN_TLB_MASK_TABLE_OFS  -32768
2448
2449/*
2450 * For system-mode, perform the TLB load and compare.
2451 * For user-mode, perform any required alignment tests.
2452 * In both cases, return a TCGLabelQemuLdst structure if the slow path
2453 * is required and fill in @h with the host address for the fast path.
2454 */
2455static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
2456                                           TCGReg addr, MemOpIdx oi, bool is_ld)
2457{
2458    TCGType addr_type = s->addr_type;
2459    TCGLabelQemuLdst *ldst = NULL;
2460    MemOp opc = get_memop(oi);
2461    MemOp a_bits, s_bits;
2462
2463    /*
2464     * Book II, Section 1.4, Single-Copy Atomicity, specifies:
2465     *
2466     * Before 3.0, "An access that is not atomic is performed as a set of
2467     * smaller disjoint atomic accesses. In general, the number and alignment
2468     * of these accesses are implementation-dependent."  Thus MO_ATOM_IFALIGN.
2469     *
2470     * As of 3.0, "the non-atomic access is performed as described in
2471     * the corresponding list", which matches MO_ATOM_SUBALIGN.
2472     */
2473    s_bits = opc & MO_SIZE;
2474    h->aa = atom_and_align_for_opc(s, opc,
2475                                   have_isa_3_00 ? MO_ATOM_SUBALIGN
2476                                                 : MO_ATOM_IFALIGN,
2477                                   s_bits == MO_128);
2478    a_bits = h->aa.align;
2479
2480    if (tcg_use_softmmu) {
2481        int mem_index = get_mmuidx(oi);
2482        int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
2483                            : offsetof(CPUTLBEntry, addr_write);
2484        int fast_off = tlb_mask_table_ofs(s, mem_index);
2485        int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
2486        int table_off = fast_off + offsetof(CPUTLBDescFast, table);
2487
2488        ldst = new_ldst_label(s);
2489        ldst->is_ld = is_ld;
2490        ldst->oi = oi;
2491        ldst->addr_reg = addr;
2492
2493        /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
2494        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off);
2495        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off);
2496
2497        /* Extract the page index, shifted into place for tlb index.  */
2498        if (TCG_TARGET_REG_BITS == 32) {
2499            tcg_out_shri32(s, TCG_REG_R0, addr,
2500                           s->page_bits - CPU_TLB_ENTRY_BITS);
2501        } else {
2502            tcg_out_shri64(s, TCG_REG_R0, addr,
2503                           s->page_bits - CPU_TLB_ENTRY_BITS);
2504        }
2505        tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
2506
2507        /*
2508         * Load the TLB comparator into TMP2.
2509         * For 64-bit host, always load the entire 64-bit slot for simplicity.
2510         * We will ignore the high bits with tcg_out_cmp(..., addr_type).
2511         */
2512        if (cmp_off == 0) {
2513            tcg_out32(s, (TCG_TARGET_REG_BITS == 64 ? LDUX : LWZUX)
2514                      | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2));
2515        } else {
2516            tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
2517            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off);
2518        }
2519
2520        /*
2521         * Load the TLB addend for use on the fast path.
2522         * Do this asap to minimize any load use delay.
2523         */
2524        if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
2525            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
2526                       offsetof(CPUTLBEntry, addend));
2527        }
2528
2529        /* Clear the non-page, non-alignment bits from the address in R0. */
2530        if (TCG_TARGET_REG_BITS == 32) {
2531            /*
2532             * We don't support unaligned accesses on 32-bits.
2533             * Preserve the bottom bits and thus trigger a comparison
2534             * failure on unaligned accesses.
2535             */
2536            if (a_bits < s_bits) {
2537                a_bits = s_bits;
2538            }
2539            tcg_out_rlw(s, RLWINM, TCG_REG_R0, addr, 0,
2540                        (32 - a_bits) & 31, 31 - s->page_bits);
2541        } else {
2542            TCGReg t = addr;
2543
2544            /*
2545             * If the access is unaligned, we need to make sure we fail if we
2546             * cross a page boundary.  The trick is to add the access size-1
2547             * to the address before masking the low bits.  That will make the
2548             * address overflow to the next page if we cross a page boundary,
2549             * which will then force a mismatch of the TLB compare.
2550             */
2551            if (a_bits < s_bits) {
2552                unsigned a_mask = (1 << a_bits) - 1;
2553                unsigned s_mask = (1 << s_bits) - 1;
2554                tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
2555                t = TCG_REG_R0;
2556            }
2557
2558            /* Mask the address for the requested alignment.  */
2559            if (addr_type == TCG_TYPE_I32) {
2560                tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
2561                            (32 - a_bits) & 31, 31 - s->page_bits);
2562            } else if (a_bits == 0) {
2563                tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits);
2564            } else {
2565                tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
2566                            64 - s->page_bits, s->page_bits - a_bits);
2567                tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0);
2568            }
2569        }
2570
2571        /* Full comparison into cr0. */
2572        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 0, 0, addr_type);
2573
2574        /* Load a pointer into the current opcode w/conditional branch-link. */
2575        ldst->label_ptr[0] = s->code_ptr;
2576        tcg_out_bc(s, TCG_COND_NE, LK);
2577
2578        h->base = TCG_REG_TMP1;
2579    } else {
2580        if (a_bits) {
2581            ldst = new_ldst_label(s);
2582            ldst->is_ld = is_ld;
2583            ldst->oi = oi;
2584            ldst->addr_reg = addr;
2585
2586            /* We are expecting a_bits to max out at 7, much lower than ANDI. */
2587            tcg_debug_assert(a_bits < 16);
2588            tcg_out32(s, ANDI | SAI(addr, TCG_REG_R0, (1 << a_bits) - 1));
2589
2590            ldst->label_ptr[0] = s->code_ptr;
2591            tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
2592        }
2593
2594        h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
2595    }
2596
2597    if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
2598        /* Zero-extend the guest address for use in the host address. */
2599        tcg_out_ext32u(s, TCG_REG_TMP2, addr);
2600        h->index = TCG_REG_TMP2;
2601    } else {
2602        h->index = addr;
2603    }
2604
2605    return ldst;
2606}
2607
2608static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
2609                            TCGReg addr, MemOpIdx oi, TCGType data_type)
2610{
2611    MemOp opc = get_memop(oi);
2612    TCGLabelQemuLdst *ldst;
2613    HostAddress h;
2614
2615    ldst = prepare_host_addr(s, &h, addr, oi, true);
2616
2617    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2618        if (opc & MO_BSWAP) {
2619            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2620            tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
2621            tcg_out32(s, LWBRX | TAB(datahi, h.base, TCG_REG_R0));
2622        } else if (h.base != 0) {
2623            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2624            tcg_out32(s, LWZX | TAB(datahi, h.base, h.index));
2625            tcg_out32(s, LWZX | TAB(datalo, h.base, TCG_REG_R0));
2626        } else if (h.index == datahi) {
2627            tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
2628            tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
2629        } else {
2630            tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
2631            tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
2632        }
2633    } else {
2634        uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
2635        if (!have_isa_2_06 && insn == LDBRX) {
2636            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2637            tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
2638            tcg_out32(s, LWBRX | TAB(TCG_REG_R0, h.base, TCG_REG_R0));
2639            tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
2640        } else if (insn) {
2641            tcg_out32(s, insn | TAB(datalo, h.base, h.index));
2642        } else {
2643            insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
2644            tcg_out32(s, insn | TAB(datalo, h.base, h.index));
2645            tcg_out_movext(s, TCG_TYPE_REG, datalo,
2646                           TCG_TYPE_REG, opc & MO_SSIZE, datalo);
2647        }
2648    }
2649
2650    if (ldst) {
2651        ldst->type = data_type;
2652        ldst->datalo_reg = datalo;
2653        ldst->datahi_reg = datahi;
2654        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2655    }
2656}
2657
2658static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
2659                            TCGReg addr, MemOpIdx oi, TCGType data_type)
2660{
2661    MemOp opc = get_memop(oi);
2662    TCGLabelQemuLdst *ldst;
2663    HostAddress h;
2664
2665    ldst = prepare_host_addr(s, &h, addr, oi, false);
2666
2667    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2668        if (opc & MO_BSWAP) {
2669            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2670            tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
2671            tcg_out32(s, STWBRX | SAB(datahi, h.base, TCG_REG_R0));
2672        } else if (h.base != 0) {
2673            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2674            tcg_out32(s, STWX | SAB(datahi, h.base, h.index));
2675            tcg_out32(s, STWX | SAB(datalo, h.base, TCG_REG_R0));
2676        } else {
2677            tcg_out32(s, STW | TAI(datahi, h.index, 0));
2678            tcg_out32(s, STW | TAI(datalo, h.index, 4));
2679        }
2680    } else {
2681        uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
2682        if (!have_isa_2_06 && insn == STDBRX) {
2683            tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
2684            tcg_out32(s, ADDI | TAI(TCG_REG_TMP2, h.index, 4));
2685            tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
2686            tcg_out32(s, STWBRX | SAB(TCG_REG_R0, h.base, TCG_REG_TMP2));
2687        } else {
2688            tcg_out32(s, insn | SAB(datalo, h.base, h.index));
2689        }
2690    }
2691
2692    if (ldst) {
2693        ldst->type = data_type;
2694        ldst->datalo_reg = datalo;
2695        ldst->datahi_reg = datahi;
2696        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2697    }
2698}
2699
2700static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
2701                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
2702{
2703    TCGLabelQemuLdst *ldst;
2704    HostAddress h;
2705    bool need_bswap;
2706    uint32_t insn;
2707    TCGReg index;
2708
2709    ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
2710
2711    /* Compose the final address, as LQ/STQ have no indexing. */
2712    index = h.index;
2713    if (h.base != 0) {
2714        index = TCG_REG_TMP1;
2715        tcg_out32(s, ADD | TAB(index, h.base, h.index));
2716    }
2717    need_bswap = get_memop(oi) & MO_BSWAP;
2718
2719    if (h.aa.atom == MO_128) {
2720        tcg_debug_assert(!need_bswap);
2721        tcg_debug_assert(datalo & 1);
2722        tcg_debug_assert(datahi == datalo - 1);
2723        tcg_debug_assert(!is_ld || datahi != index);
2724        insn = is_ld ? LQ : STQ;
2725        tcg_out32(s, insn | TAI(datahi, index, 0));
2726    } else {
2727        TCGReg d1, d2;
2728
2729        if (HOST_BIG_ENDIAN ^ need_bswap) {
2730            d1 = datahi, d2 = datalo;
2731        } else {
2732            d1 = datalo, d2 = datahi;
2733        }
2734
2735        if (need_bswap) {
2736            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8);
2737            insn = is_ld ? LDBRX : STDBRX;
2738            tcg_out32(s, insn | TAB(d1, 0, index));
2739            tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0));
2740        } else {
2741            insn = is_ld ? LD : STD;
2742            tcg_out32(s, insn | TAI(d1, index, 0));
2743            tcg_out32(s, insn | TAI(d2, index, 8));
2744        }
2745    }
2746
2747    if (ldst) {
2748        ldst->type = TCG_TYPE_I128;
2749        ldst->datalo_reg = datalo;
2750        ldst->datahi_reg = datahi;
2751        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2752    }
2753}
2754
2755static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2756{
2757    int i;
2758    for (i = 0; i < count; ++i) {
2759        p[i] = NOP;
2760    }
2761}
2762
2763/* Parameters for function call generation, used in tcg.c.  */
2764#define TCG_TARGET_STACK_ALIGN       16
2765
2766#ifdef _CALL_AIX
2767# define LINK_AREA_SIZE                (6 * SZR)
2768# define LR_OFFSET                     (1 * SZR)
2769# define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
2770#elif defined(_CALL_DARWIN)
2771# define LINK_AREA_SIZE                (6 * SZR)
2772# define LR_OFFSET                     (2 * SZR)
2773#elif TCG_TARGET_REG_BITS == 64
2774# if defined(_CALL_ELF) && _CALL_ELF == 2
2775#  define LINK_AREA_SIZE               (4 * SZR)
2776#  define LR_OFFSET                    (1 * SZR)
2777# endif
2778#else /* TCG_TARGET_REG_BITS == 32 */
2779# if defined(_CALL_SYSV)
2780#  define LINK_AREA_SIZE               (2 * SZR)
2781#  define LR_OFFSET                    (1 * SZR)
2782# endif
2783#endif
2784#ifndef LR_OFFSET
2785# error "Unhandled abi"
2786#endif
2787#ifndef TCG_TARGET_CALL_STACK_OFFSET
2788# define TCG_TARGET_CALL_STACK_OFFSET  LINK_AREA_SIZE
2789#endif
2790
2791#define CPU_TEMP_BUF_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2792#define REG_SAVE_SIZE      ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
2793
2794#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET   \
2795                     + TCG_STATIC_CALL_ARGS_SIZE    \
2796                     + CPU_TEMP_BUF_SIZE            \
2797                     + REG_SAVE_SIZE                \
2798                     + TCG_TARGET_STACK_ALIGN - 1)  \
2799                    & -TCG_TARGET_STACK_ALIGN)
2800
2801#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
2802
2803static void tcg_target_qemu_prologue(TCGContext *s)
2804{
2805    int i;
2806
2807#ifdef _CALL_AIX
2808    const void **desc = (const void **)s->code_ptr;
2809    desc[0] = tcg_splitwx_to_rx(desc + 2);  /* entry point */
2810    desc[1] = 0;                            /* environment pointer */
2811    s->code_ptr = (void *)(desc + 2);       /* skip over descriptor */
2812#endif
2813
2814    tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
2815                  CPU_TEMP_BUF_SIZE);
2816
2817    /* Prologue */
2818    tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
2819    tcg_out32(s, (SZR == 8 ? STDU : STWU)
2820              | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
2821
2822    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2823        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2824                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2825    }
2826    tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2827
2828    if (!tcg_use_softmmu && guest_base) {
2829        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
2830        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2831    }
2832
2833    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2834    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
2835    tcg_out32(s, BCCTR | BO_ALWAYS);
2836
2837    /* Epilogue */
2838    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2839
2840    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2841    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2842        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2843                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2844    }
2845    tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
2846    tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
2847    tcg_out32(s, BCLR | BO_ALWAYS);
2848}
2849
2850static void tcg_out_tb_start(TCGContext *s)
2851{
2852    /* Load TCG_REG_TB. */
2853    if (USE_REG_TB) {
2854        if (have_isa_3_00) {
2855            /* lnia REG_TB */
2856            tcg_out_addpcis(s, TCG_REG_TB, 0);
2857        } else {
2858            /* bcl 20,31,$+4 (preferred form for getting nia) */
2859            tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK);
2860            tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR);
2861        }
2862    }
2863}
2864
2865static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
2866{
2867    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg);
2868    tcg_out_b(s, 0, tcg_code_gen_epilogue);
2869}
2870
2871static void tcg_out_goto_tb(TCGContext *s, int which)
2872{
2873    uintptr_t ptr = get_jmp_target_addr(s, which);
2874    int16_t lo;
2875
2876    /* Direct branch will be patched by tb_target_set_jmp_target. */
2877    set_jmp_insn_offset(s, which);
2878    tcg_out32(s, NOP);
2879
2880    /* When branch is out of range, fall through to indirect. */
2881    if (USE_REG_TB) {
2882        ptrdiff_t offset = ppc_tbrel_diff(s, (void *)ptr);
2883        tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset);
2884    } else if (have_isa_3_10) {
2885        ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr);
2886        tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1);
2887    } else if (have_isa_3_00) {
2888        ptrdiff_t offset = tcg_pcrel_diff(s, (void *)ptr) - 4;
2889        lo = offset;
2890        tcg_out_addpcis(s, TCG_REG_TMP1, offset - lo);
2891        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo);
2892    } else {
2893        lo = ptr;
2894        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - lo);
2895        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo);
2896    }
2897
2898    tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
2899    tcg_out32(s, BCCTR | BO_ALWAYS);
2900    set_jmp_reset_offset(s, which);
2901}
2902
2903void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
2904                              uintptr_t jmp_rx, uintptr_t jmp_rw)
2905{
2906    uintptr_t addr = tb->jmp_target_addr[n];
2907    intptr_t diff = addr - jmp_rx;
2908    tcg_insn_unit insn;
2909
2910    if (in_range_b(diff)) {
2911        insn = B | (diff & 0x3fffffc);
2912    } else {
2913        insn = NOP;
2914    }
2915
2916    qatomic_set((uint32_t *)jmp_rw, insn);
2917    flush_idcache_range(jmp_rx, jmp_rw, 4);
2918}
2919
2920
2921static void tgen_add(TCGContext *s, TCGType type,
2922                     TCGReg a0, TCGReg a1, TCGReg a2)
2923{
2924    tcg_out32(s, ADD | TAB(a0, a1, a2));
2925}
2926
2927static void tgen_addi(TCGContext *s, TCGType type,
2928                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2929{
2930    tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
2931}
2932
2933static const TCGOutOpBinary outop_add = {
2934    .base.static_constraint = C_O1_I2(r, r, rT),
2935    .out_rrr = tgen_add,
2936    .out_rri = tgen_addi,
2937};
2938
2939static void tgen_and(TCGContext *s, TCGType type,
2940                     TCGReg a0, TCGReg a1, TCGReg a2)
2941{
2942    tcg_out32(s, AND | SAB(a1, a0, a2));
2943}
2944
2945static void tgen_andi(TCGContext *s, TCGType type,
2946                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2947{
2948    if (type == TCG_TYPE_I32) {
2949        tcg_out_andi32(s, a0, a1, a2);
2950    } else {
2951        tcg_out_andi64(s, a0, a1, a2);
2952    }
2953}
2954
2955static const TCGOutOpBinary outop_and = {
2956    .base.static_constraint = C_O1_I2(r, r, ri),
2957    .out_rrr = tgen_and,
2958    .out_rri = tgen_andi,
2959};
2960
2961static void tgen_andc(TCGContext *s, TCGType type,
2962                      TCGReg a0, TCGReg a1, TCGReg a2)
2963{
2964    tcg_out32(s, ANDC | SAB(a1, a0, a2));
2965}
2966
2967static const TCGOutOpBinary outop_andc = {
2968    .base.static_constraint = C_O1_I2(r, r, r),
2969    .out_rrr = tgen_andc,
2970};
2971
2972static void tgen_clz(TCGContext *s, TCGType type,
2973                     TCGReg a0, TCGReg a1, TCGReg a2)
2974{
2975    uint32_t insn = type == TCG_TYPE_I32 ? CNTLZW : CNTLZD;
2976    tcg_out_cntxz(s, type, insn, a0, a1, a2, false);
2977}
2978
2979static void tgen_clzi(TCGContext *s, TCGType type,
2980                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2981{
2982    uint32_t insn = type == TCG_TYPE_I32 ? CNTLZW : CNTLZD;
2983    tcg_out_cntxz(s, type, insn, a0, a1, a2, true);
2984}
2985
2986static const TCGOutOpBinary outop_clz = {
2987    .base.static_constraint = C_O1_I2(r, r, rZW),
2988    .out_rrr = tgen_clz,
2989    .out_rri = tgen_clzi,
2990};
2991
2992static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
2993{
2994    uint32_t insn = type == TCG_TYPE_I32 ? CNTPOPW : CNTPOPD;
2995    tcg_out32(s, insn | SAB(a1, a0, 0));
2996}
2997
2998static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags)
2999{
3000    return have_isa_2_06 ? C_O1_I1(r, r) : C_NotImplemented;
3001}
3002
3003static const TCGOutOpUnary outop_ctpop = {
3004    .base.static_constraint = C_Dynamic,
3005    .base.dynamic_constraint = cset_ctpop,
3006    .out_rr = tgen_ctpop,
3007};
3008
3009static void tgen_ctz(TCGContext *s, TCGType type,
3010                     TCGReg a0, TCGReg a1, TCGReg a2)
3011{
3012    uint32_t insn = type == TCG_TYPE_I32 ? CNTTZW : CNTTZD;
3013    tcg_out_cntxz(s, type, insn, a0, a1, a2, false);
3014}
3015
3016static void tgen_ctzi(TCGContext *s, TCGType type,
3017                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3018{
3019    uint32_t insn = type == TCG_TYPE_I32 ? CNTTZW : CNTTZD;
3020    tcg_out_cntxz(s, type, insn, a0, a1, a2, true);
3021}
3022
3023static TCGConstraintSetIndex cset_ctz(TCGType type, unsigned flags)
3024{
3025    return have_isa_3_00 ? C_O1_I2(r, r, rZW) : C_NotImplemented;
3026}
3027
3028static const TCGOutOpBinary outop_ctz = {
3029    .base.static_constraint = C_Dynamic,
3030    .base.dynamic_constraint = cset_ctz,
3031    .out_rrr = tgen_ctz,
3032    .out_rri = tgen_ctzi,
3033};
3034
3035static void tgen_eqv(TCGContext *s, TCGType type,
3036                     TCGReg a0, TCGReg a1, TCGReg a2)
3037{
3038    tcg_out32(s, EQV | SAB(a1, a0, a2));
3039}
3040
3041static void tgen_divs(TCGContext *s, TCGType type,
3042                      TCGReg a0, TCGReg a1, TCGReg a2)
3043{
3044    uint32_t insn = type == TCG_TYPE_I32 ? DIVW : DIVD;
3045    tcg_out32(s, insn | TAB(a0, a1, a2));
3046}
3047
3048static const TCGOutOpBinary outop_divs = {
3049    .base.static_constraint = C_O1_I2(r, r, r),
3050    .out_rrr = tgen_divs,
3051};
3052
3053static const TCGOutOpDivRem outop_divs2 = {
3054    .base.static_constraint = C_NotImplemented,
3055};
3056
3057static void tgen_divu(TCGContext *s, TCGType type,
3058                      TCGReg a0, TCGReg a1, TCGReg a2)
3059{
3060    uint32_t insn = type == TCG_TYPE_I32 ? DIVWU : DIVDU;
3061    tcg_out32(s, insn | TAB(a0, a1, a2));
3062}
3063
3064static const TCGOutOpBinary outop_divu = {
3065    .base.static_constraint = C_O1_I2(r, r, r),
3066    .out_rrr = tgen_divu,
3067};
3068
3069static const TCGOutOpDivRem outop_divu2 = {
3070    .base.static_constraint = C_NotImplemented,
3071};
3072
3073static const TCGOutOpBinary outop_eqv = {
3074    .base.static_constraint = C_O1_I2(r, r, r),
3075    .out_rrr = tgen_eqv,
3076};
3077
3078static void tgen_mul(TCGContext *s, TCGType type,
3079                    TCGReg a0, TCGReg a1, TCGReg a2)
3080{
3081    uint32_t insn = type == TCG_TYPE_I32 ? MULLW : MULLD;
3082    tcg_out32(s, insn | TAB(a0, a1, a2));
3083}
3084
3085static void tgen_muli(TCGContext *s, TCGType type,
3086                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3087{
3088    tcg_out32(s, MULLI | TAI(a0, a1, a2));
3089}
3090
3091static const TCGOutOpBinary outop_mul = {
3092    .base.static_constraint = C_O1_I2(r, r, rI),
3093    .out_rrr = tgen_mul,
3094    .out_rri = tgen_muli,
3095};
3096
3097static const TCGOutOpMul2 outop_muls2 = {
3098    .base.static_constraint = C_NotImplemented,
3099};
3100
3101static void tgen_mulsh(TCGContext *s, TCGType type,
3102                       TCGReg a0, TCGReg a1, TCGReg a2)
3103{
3104    uint32_t insn = type == TCG_TYPE_I32 ? MULHW : MULHD;
3105    tcg_out32(s, insn | TAB(a0, a1, a2));
3106}
3107
3108static const TCGOutOpBinary outop_mulsh = {
3109    .base.static_constraint = C_O1_I2(r, r, r),
3110    .out_rrr = tgen_mulsh,
3111};
3112
3113static const TCGOutOpMul2 outop_mulu2 = {
3114    .base.static_constraint = C_NotImplemented,
3115};
3116
3117static void tgen_muluh(TCGContext *s, TCGType type,
3118                       TCGReg a0, TCGReg a1, TCGReg a2)
3119{
3120    uint32_t insn = type == TCG_TYPE_I32 ? MULHWU : MULHDU;
3121    tcg_out32(s, insn | TAB(a0, a1, a2));
3122}
3123
3124static const TCGOutOpBinary outop_muluh = {
3125    .base.static_constraint = C_O1_I2(r, r, r),
3126    .out_rrr = tgen_muluh,
3127};
3128
3129static void tgen_nand(TCGContext *s, TCGType type,
3130                     TCGReg a0, TCGReg a1, TCGReg a2)
3131{
3132    tcg_out32(s, NAND | SAB(a1, a0, a2));
3133}
3134
3135static const TCGOutOpBinary outop_nand = {
3136    .base.static_constraint = C_O1_I2(r, r, r),
3137    .out_rrr = tgen_nand,
3138};
3139
3140static void tgen_nor(TCGContext *s, TCGType type,
3141                     TCGReg a0, TCGReg a1, TCGReg a2)
3142{
3143    tcg_out32(s, NOR | SAB(a1, a0, a2));
3144}
3145
3146static const TCGOutOpBinary outop_nor = {
3147    .base.static_constraint = C_O1_I2(r, r, r),
3148    .out_rrr = tgen_nor,
3149};
3150
3151static void tgen_or(TCGContext *s, TCGType type,
3152                    TCGReg a0, TCGReg a1, TCGReg a2)
3153{
3154    tcg_out32(s, OR | SAB(a1, a0, a2));
3155}
3156
3157static void tgen_ori(TCGContext *s, TCGType type,
3158                     TCGReg a0, TCGReg a1, tcg_target_long a2)
3159{
3160    tcg_out_ori32(s, a0, a1, a2);
3161}
3162
3163static const TCGOutOpBinary outop_or = {
3164    .base.static_constraint = C_O1_I2(r, r, rU),
3165    .out_rrr = tgen_or,
3166    .out_rri = tgen_ori,
3167};
3168
3169static void tgen_orc(TCGContext *s, TCGType type,
3170                     TCGReg a0, TCGReg a1, TCGReg a2)
3171{
3172    tcg_out32(s, ORC | SAB(a1, a0, a2));
3173}
3174
3175static const TCGOutOpBinary outop_orc = {
3176    .base.static_constraint = C_O1_I2(r, r, r),
3177    .out_rrr = tgen_orc,
3178};
3179
3180static TCGConstraintSetIndex cset_mod(TCGType type, unsigned flags)
3181{
3182    return have_isa_3_00 ? C_O1_I2(r, r, r) : C_NotImplemented;
3183}
3184
3185static void tgen_rems(TCGContext *s, TCGType type,
3186                      TCGReg a0, TCGReg a1, TCGReg a2)
3187{
3188    uint32_t insn = type == TCG_TYPE_I32 ? MODSW : MODSD;
3189    tcg_out32(s, insn | TAB(a0, a1, a2));
3190}
3191
3192static const TCGOutOpBinary outop_rems = {
3193    .base.static_constraint = C_Dynamic,
3194    .base.dynamic_constraint = cset_mod,
3195    .out_rrr = tgen_rems,
3196};
3197
3198static void tgen_remu(TCGContext *s, TCGType type,
3199                      TCGReg a0, TCGReg a1, TCGReg a2)
3200{
3201    uint32_t insn = type == TCG_TYPE_I32 ? MODUW : MODUD;
3202    tcg_out32(s, insn | TAB(a0, a1, a2));
3203}
3204
3205static const TCGOutOpBinary outop_remu = {
3206    .base.static_constraint = C_Dynamic,
3207    .base.dynamic_constraint = cset_mod,
3208    .out_rrr = tgen_remu,
3209};
3210
3211static void tgen_rotl(TCGContext *s, TCGType type,
3212                     TCGReg a0, TCGReg a1, TCGReg a2)
3213{
3214    if (type == TCG_TYPE_I32) {
3215        tcg_out32(s, RLWNM | SAB(a1, a0, a2) | MB(0) | ME(31));
3216    } else {
3217        tcg_out32(s, RLDCL | SAB(a1, a0, a2) | MB64(0));
3218    }
3219}
3220
3221static void tgen_rotli(TCGContext *s, TCGType type,
3222                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3223{
3224    if (type == TCG_TYPE_I32) {
3225        tcg_out_rlw(s, RLWINM, a0, a1, a2, 0, 31);
3226    } else {
3227        tcg_out_rld(s, RLDICL, a0, a1, a2, 0);
3228    }
3229}
3230
3231static const TCGOutOpBinary outop_rotl = {
3232    .base.static_constraint = C_O1_I2(r, r, ri),
3233    .out_rrr = tgen_rotl,
3234    .out_rri = tgen_rotli,
3235};
3236
3237static const TCGOutOpBinary outop_rotr = {
3238    .base.static_constraint = C_NotImplemented,
3239};
3240
3241static void tgen_sar(TCGContext *s, TCGType type,
3242                     TCGReg a0, TCGReg a1, TCGReg a2)
3243{
3244    uint32_t insn = type == TCG_TYPE_I32 ? SRAW : SRAD;
3245    tcg_out32(s, insn | SAB(a1, a0, a2));
3246}
3247
3248static void tgen_sari(TCGContext *s, TCGType type,
3249                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3250{
3251    /* Limit immediate shift count lest we create an illegal insn.  */
3252    if (type == TCG_TYPE_I32) {
3253        tcg_out_sari32(s, a0, a1, a2 & 31);
3254    } else {
3255        tcg_out_sari64(s, a0, a1, a2 & 63);
3256    }
3257}
3258
3259static const TCGOutOpBinary outop_sar = {
3260    .base.static_constraint = C_O1_I2(r, r, ri),
3261    .out_rrr = tgen_sar,
3262    .out_rri = tgen_sari,
3263};
3264
3265static void tgen_shl(TCGContext *s, TCGType type,
3266                     TCGReg a0, TCGReg a1, TCGReg a2)
3267{
3268    uint32_t insn = type == TCG_TYPE_I32 ? SLW : SLD;
3269    tcg_out32(s, insn | SAB(a1, a0, a2));
3270}
3271
3272static void tgen_shli(TCGContext *s, TCGType type,
3273                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3274{
3275    /* Limit immediate shift count lest we create an illegal insn.  */
3276    if (type == TCG_TYPE_I32) {
3277        tcg_out_shli32(s, a0, a1, a2 & 31);
3278    } else {
3279        tcg_out_shli64(s, a0, a1, a2 & 63);
3280    }
3281}
3282
3283static const TCGOutOpBinary outop_shl = {
3284    .base.static_constraint = C_O1_I2(r, r, ri),
3285    .out_rrr = tgen_shl,
3286    .out_rri = tgen_shli,
3287};
3288
3289static void tgen_shr(TCGContext *s, TCGType type,
3290                     TCGReg a0, TCGReg a1, TCGReg a2)
3291{
3292    uint32_t insn = type == TCG_TYPE_I32 ? SRW : SRD;
3293    tcg_out32(s, insn | SAB(a1, a0, a2));
3294}
3295
3296static void tgen_shri(TCGContext *s, TCGType type,
3297                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3298{
3299    /* Limit immediate shift count lest we create an illegal insn.  */
3300    if (type == TCG_TYPE_I32) {
3301        tcg_out_shri32(s, a0, a1, a2 & 31);
3302    } else {
3303        tcg_out_shri64(s, a0, a1, a2 & 63);
3304    }
3305}
3306
3307static const TCGOutOpBinary outop_shr = {
3308    .base.static_constraint = C_O1_I2(r, r, ri),
3309    .out_rrr = tgen_shr,
3310    .out_rri = tgen_shri,
3311};
3312
3313static void tgen_sub(TCGContext *s, TCGType type,
3314                     TCGReg a0, TCGReg a1, TCGReg a2)
3315{
3316    tcg_out32(s, SUBF | TAB(a0, a2, a1));
3317}
3318
3319static void tgen_subfi(TCGContext *s, TCGType type,
3320                       TCGReg a0, tcg_target_long a1, TCGReg a2)
3321{
3322    tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
3323}
3324
3325static const TCGOutOpSubtract outop_sub = {
3326    .base.static_constraint = C_O1_I2(r, rI, r),
3327    .out_rrr = tgen_sub,
3328    .out_rir = tgen_subfi,
3329};
3330
3331static void tgen_xor(TCGContext *s, TCGType type,
3332                     TCGReg a0, TCGReg a1, TCGReg a2)
3333{
3334    tcg_out32(s, XOR | SAB(a1, a0, a2));
3335}
3336
3337static void tgen_xori(TCGContext *s, TCGType type,
3338                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3339{
3340    tcg_out_xori32(s, a0, a1, a2);
3341}
3342
3343static const TCGOutOpBinary outop_xor = {
3344    .base.static_constraint = C_O1_I2(r, r, rU),
3345    .out_rrr = tgen_xor,
3346    .out_rri = tgen_xori,
3347};
3348
3349static void tgen_bswap16(TCGContext *s, TCGType type,
3350                         TCGReg dst, TCGReg src, unsigned flags)
3351{
3352    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
3353
3354    if (have_isa_3_10) {
3355        tcg_out32(s, BRH | RA(dst) | RS(src));
3356        if (flags & TCG_BSWAP_OS) {
3357            tcg_out_ext16s(s, TCG_TYPE_REG, dst, dst);
3358        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
3359            tcg_out_ext16u(s, dst, dst);
3360        }
3361        return;
3362    }
3363
3364    /*
3365     * In the following,
3366     *   dep(a, b, m) -> (a & ~m) | (b & m)
3367     *
3368     * Begin with:                              src = xxxxabcd
3369     */
3370    /* tmp = rol32(src, 24) & 0x000000ff            = 0000000c */
3371    tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31);
3372    /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00)    = 000000dc */
3373    tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);
3374
3375    if (flags & TCG_BSWAP_OS) {
3376        tcg_out_ext16s(s, TCG_TYPE_REG, dst, tmp);
3377    } else {
3378        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
3379    }
3380}
3381
3382static const TCGOutOpBswap outop_bswap16 = {
3383    .base.static_constraint = C_O1_I1(r, r),
3384    .out_rr = tgen_bswap16,
3385};
3386
3387static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
3388{
3389    tcg_out32(s, NEG | RT(a0) | RA(a1));
3390}
3391
3392static const TCGOutOpUnary outop_neg = {
3393    .base.static_constraint = C_O1_I1(r, r),
3394    .out_rr = tgen_neg,
3395};
3396
3397static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
3398{
3399    tgen_nor(s, type, a0, a1, a1);
3400}
3401
3402static const TCGOutOpUnary outop_not = {
3403    .base.static_constraint = C_O1_I1(r, r),
3404    .out_rr = tgen_not,
3405};
3406
3407
3408static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
3409                       const TCGArg args[TCG_MAX_OP_ARGS],
3410                       const int const_args[TCG_MAX_OP_ARGS])
3411{
3412    TCGArg a0, a1;
3413
3414    switch (opc) {
3415    case INDEX_op_goto_ptr:
3416        tcg_out32(s, MTSPR | RS(args[0]) | CTR);
3417        tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
3418        tcg_out32(s, BCCTR | BO_ALWAYS);
3419        break;
3420    case INDEX_op_br:
3421        {
3422            TCGLabel *l = arg_label(args[0]);
3423            uint32_t insn = B;
3424
3425            if (l->has_value) {
3426                insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr),
3427                                       l->u.value_ptr);
3428            } else {
3429                tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
3430            }
3431            tcg_out32(s, insn);
3432        }
3433        break;
3434    case INDEX_op_ld8u_i32:
3435    case INDEX_op_ld8u_i64:
3436        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
3437        break;
3438    case INDEX_op_ld8s_i32:
3439    case INDEX_op_ld8s_i64:
3440        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
3441        tcg_out_ext8s(s, TCG_TYPE_REG, args[0], args[0]);
3442        break;
3443    case INDEX_op_ld16u_i32:
3444    case INDEX_op_ld16u_i64:
3445        tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
3446        break;
3447    case INDEX_op_ld16s_i32:
3448    case INDEX_op_ld16s_i64:
3449        tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
3450        break;
3451    case INDEX_op_ld_i32:
3452    case INDEX_op_ld32u_i64:
3453        tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
3454        break;
3455    case INDEX_op_ld32s_i64:
3456        tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
3457        break;
3458    case INDEX_op_ld_i64:
3459        tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
3460        break;
3461    case INDEX_op_st8_i32:
3462    case INDEX_op_st8_i64:
3463        tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
3464        break;
3465    case INDEX_op_st16_i32:
3466    case INDEX_op_st16_i64:
3467        tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
3468        break;
3469    case INDEX_op_st_i32:
3470    case INDEX_op_st32_i64:
3471        tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
3472        break;
3473    case INDEX_op_st_i64:
3474        tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
3475        break;
3476
3477    case INDEX_op_qemu_ld_i32:
3478        tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I32);
3479        break;
3480    case INDEX_op_qemu_ld_i64:
3481        if (TCG_TARGET_REG_BITS == 64) {
3482            tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I64);
3483        } else {
3484            tcg_out_qemu_ld(s, args[0], args[1], args[2],
3485                            args[3], TCG_TYPE_I64);
3486        }
3487        break;
3488    case INDEX_op_qemu_ld_i128:
3489        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
3490        tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
3491        break;
3492
3493    case INDEX_op_qemu_st_i32:
3494        tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I32);
3495        break;
3496    case INDEX_op_qemu_st_i64:
3497        if (TCG_TARGET_REG_BITS == 64) {
3498            tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I64);
3499        } else {
3500            tcg_out_qemu_st(s, args[0], args[1], args[2],
3501                            args[3], TCG_TYPE_I64);
3502        }
3503        break;
3504    case INDEX_op_qemu_st_i128:
3505        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
3506        tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
3507        break;
3508
3509    case INDEX_op_bswap32_i32:
3510        tcg_out_bswap32(s, args[0], args[1], 0);
3511        break;
3512    case INDEX_op_bswap32_i64:
3513        tcg_out_bswap32(s, args[0], args[1], args[2]);
3514        break;
3515    case INDEX_op_bswap64_i64:
3516        tcg_out_bswap64(s, args[0], args[1]);
3517        break;
3518
3519    case INDEX_op_deposit_i32:
3520        if (const_args[2]) {
3521            uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
3522            tcg_out_andi32(s, args[0], args[0], ~mask);
3523        } else {
3524            tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
3525                        32 - args[3] - args[4], 31 - args[3]);
3526        }
3527        break;
3528    case INDEX_op_deposit_i64:
3529        if (const_args[2]) {
3530            uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
3531            tcg_out_andi64(s, args[0], args[0], ~mask);
3532        } else {
3533            tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
3534                        64 - args[3] - args[4]);
3535        }
3536        break;
3537
3538    case INDEX_op_extract_i32:
3539        if (args[2] == 0 && args[3] <= 16) {
3540            tcg_out32(s, ANDI | SAI(args[1], args[0], (1 << args[3]) - 1));
3541            break;
3542        }
3543        tcg_out_rlw(s, RLWINM, args[0], args[1],
3544                    32 - args[2], 32 - args[3], 31);
3545        break;
3546    case INDEX_op_extract_i64:
3547        if (args[2] == 0 && args[3] <= 16) {
3548            tcg_out32(s, ANDI | SAI(args[1], args[0], (1 << args[3]) - 1));
3549            break;
3550        }
3551        tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
3552        break;
3553
3554    case INDEX_op_sextract_i64:
3555        if (args[2] + args[3] == 32) {
3556            if (args[2] == 0) {
3557                tcg_out_ext32s(s, args[0], args[1]);
3558            } else {
3559                tcg_out_sari32(s, args[0], args[1], args[2]);
3560            }
3561            break;
3562        }
3563        /* FALLTHRU */
3564    case INDEX_op_sextract_i32:
3565        if (args[2] == 0 && args[3] == 8) {
3566            tcg_out_ext8s(s, TCG_TYPE_I32, args[0], args[1]);
3567        } else if (args[2] == 0 && args[3] == 16) {
3568            tcg_out_ext16s(s, TCG_TYPE_I32, args[0], args[1]);
3569        } else {
3570            g_assert_not_reached();
3571        }
3572        break;
3573
3574#if TCG_TARGET_REG_BITS == 64
3575    case INDEX_op_add2_i64:
3576#else
3577    case INDEX_op_add2_i32:
3578#endif
3579        /* Note that the CA bit is defined based on the word size of the
3580           environment.  So in 64-bit mode it's always carry-out of bit 63.
3581           The fallback code using deposit works just as well for 32-bit.  */
3582        a0 = args[0], a1 = args[1];
3583        if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
3584            a0 = TCG_REG_R0;
3585        }
3586        if (const_args[4]) {
3587            tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
3588        } else {
3589            tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
3590        }
3591        if (const_args[5]) {
3592            tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
3593        } else {
3594            tcg_out32(s, ADDE | TAB(a1, args[3], args[5]));
3595        }
3596        if (a0 != args[0]) {
3597            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3598        }
3599        break;
3600
3601#if TCG_TARGET_REG_BITS == 64
3602    case INDEX_op_sub2_i64:
3603#else
3604    case INDEX_op_sub2_i32:
3605#endif
3606        a0 = args[0], a1 = args[1];
3607        if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
3608            a0 = TCG_REG_R0;
3609        }
3610        if (const_args[2]) {
3611            tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
3612        } else {
3613            tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
3614        }
3615        if (const_args[3]) {
3616            tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
3617        } else {
3618            tcg_out32(s, SUBFE | TAB(a1, args[5], args[3]));
3619        }
3620        if (a0 != args[0]) {
3621            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3622        }
3623        break;
3624
3625    case INDEX_op_mb:
3626        tcg_out_mb(s, args[0]);
3627        break;
3628
3629    case INDEX_op_call:      /* Always emitted via tcg_out_call.  */
3630    case INDEX_op_exit_tb:   /* Always emitted via tcg_out_exit_tb.  */
3631    case INDEX_op_goto_tb:   /* Always emitted via tcg_out_goto_tb.  */
3632    case INDEX_op_ext_i32_i64:  /* Always emitted via tcg_reg_alloc_op.  */
3633    case INDEX_op_extu_i32_i64:
3634    case INDEX_op_extrl_i64_i32:
3635    default:
3636        g_assert_not_reached();
3637    }
3638}
3639
3640int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3641{
3642    switch (opc) {
3643    case INDEX_op_and_vec:
3644    case INDEX_op_or_vec:
3645    case INDEX_op_xor_vec:
3646    case INDEX_op_andc_vec:
3647    case INDEX_op_not_vec:
3648    case INDEX_op_nor_vec:
3649    case INDEX_op_eqv_vec:
3650    case INDEX_op_nand_vec:
3651        return 1;
3652    case INDEX_op_orc_vec:
3653        return have_isa_2_07;
3654    case INDEX_op_add_vec:
3655    case INDEX_op_sub_vec:
3656    case INDEX_op_smax_vec:
3657    case INDEX_op_smin_vec:
3658    case INDEX_op_umax_vec:
3659    case INDEX_op_umin_vec:
3660    case INDEX_op_shlv_vec:
3661    case INDEX_op_shrv_vec:
3662    case INDEX_op_sarv_vec:
3663    case INDEX_op_rotlv_vec:
3664        return vece <= MO_32 || have_isa_2_07;
3665    case INDEX_op_ssadd_vec:
3666    case INDEX_op_sssub_vec:
3667    case INDEX_op_usadd_vec:
3668    case INDEX_op_ussub_vec:
3669        return vece <= MO_32;
3670    case INDEX_op_shli_vec:
3671    case INDEX_op_shri_vec:
3672    case INDEX_op_sari_vec:
3673    case INDEX_op_rotli_vec:
3674        return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
3675    case INDEX_op_cmp_vec:
3676    case INDEX_op_cmpsel_vec:
3677        return vece <= MO_32 || have_isa_2_07 ? 1 : 0;
3678    case INDEX_op_neg_vec:
3679        return vece >= MO_32 && have_isa_3_00;
3680    case INDEX_op_mul_vec:
3681        switch (vece) {
3682        case MO_8:
3683        case MO_16:
3684            return -1;
3685        case MO_32:
3686            return have_isa_2_07 ? 1 : -1;
3687        case MO_64:
3688            return have_isa_3_10;
3689        }
3690        return 0;
3691    case INDEX_op_bitsel_vec:
3692        return have_vsx;
3693    case INDEX_op_rotrv_vec:
3694        return -1;
3695    default:
3696        return 0;
3697    }
3698}
3699
3700static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
3701                            TCGReg dst, TCGReg src)
3702{
3703    tcg_debug_assert(dst >= TCG_REG_V0);
3704
3705    /* Splat from integer reg allowed via constraints for v3.00.  */
3706    if (src < TCG_REG_V0) {
3707        tcg_debug_assert(have_isa_3_00);
3708        switch (vece) {
3709        case MO_64:
3710            tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
3711            return true;
3712        case MO_32:
3713            tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
3714            return true;
3715        default:
3716            /* Fail, so that we fall back on either dupm or mov+dup.  */
3717            return false;
3718        }
3719    }
3720
3721    /*
3722     * Recall we use (or emulate) VSX integer loads, so the integer is
3723     * right justified within the left (zero-index) double-word.
3724     */
3725    switch (vece) {
3726    case MO_8:
3727        tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
3728        break;
3729    case MO_16:
3730        tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
3731        break;
3732    case MO_32:
3733        tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
3734        break;
3735    case MO_64:
3736        if (have_vsx) {
3737            tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
3738            break;
3739        }
3740        tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
3741        tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
3742        break;
3743    default:
3744        g_assert_not_reached();
3745    }
3746    return true;
3747}
3748
3749static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
3750                             TCGReg out, TCGReg base, intptr_t offset)
3751{
3752    int elt;
3753
3754    tcg_debug_assert(out >= TCG_REG_V0);
3755    switch (vece) {
3756    case MO_8:
3757        if (have_isa_3_00) {
3758            tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
3759        } else {
3760            tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
3761        }
3762        elt = extract32(offset, 0, 4);
3763#if !HOST_BIG_ENDIAN
3764        elt ^= 15;
3765#endif
3766        tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
3767        break;
3768    case MO_16:
3769        tcg_debug_assert((offset & 1) == 0);
3770        if (have_isa_3_00) {
3771            tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
3772        } else {
3773            tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
3774        }
3775        elt = extract32(offset, 1, 3);
3776#if !HOST_BIG_ENDIAN
3777        elt ^= 7;
3778#endif
3779        tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
3780        break;
3781    case MO_32:
3782        if (have_isa_3_00) {
3783            tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
3784            break;
3785        }
3786        tcg_debug_assert((offset & 3) == 0);
3787        tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
3788        elt = extract32(offset, 2, 2);
3789#if !HOST_BIG_ENDIAN
3790        elt ^= 3;
3791#endif
3792        tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
3793        break;
3794    case MO_64:
3795        if (have_vsx) {
3796            tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
3797            break;
3798        }
3799        tcg_debug_assert((offset & 7) == 0);
3800        tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
3801        tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
3802        elt = extract32(offset, 3, 1);
3803#if !HOST_BIG_ENDIAN
3804        elt = !elt;
3805#endif
3806        if (elt) {
3807            tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
3808        } else {
3809            tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
3810        }
3811        break;
3812    default:
3813        g_assert_not_reached();
3814    }
3815    return true;
3816}
3817
3818static void tcg_out_not_vec(TCGContext *s, TCGReg a0, TCGReg a1)
3819{
3820    tcg_out32(s, VNOR | VRT(a0) | VRA(a1) | VRB(a1));
3821}
3822
3823static void tcg_out_or_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
3824{
3825    tcg_out32(s, VOR | VRT(a0) | VRA(a1) | VRB(a2));
3826}
3827
3828static void tcg_out_orc_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
3829{
3830    tcg_out32(s, VORC | VRT(a0) | VRA(a1) | VRB(a2));
3831}
3832
3833static void tcg_out_and_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
3834{
3835    tcg_out32(s, VAND | VRT(a0) | VRA(a1) | VRB(a2));
3836}
3837
3838static void tcg_out_andc_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
3839{
3840    tcg_out32(s, VANDC | VRT(a0) | VRA(a1) | VRB(a2));
3841}
3842
3843static void tcg_out_bitsel_vec(TCGContext *s, TCGReg d,
3844                               TCGReg c, TCGReg t, TCGReg f)
3845{
3846    if (TCG_TARGET_HAS_bitsel_vec) {
3847        tcg_out32(s, XXSEL | VRT(d) | VRC(c) | VRB(t) | VRA(f));
3848    } else {
3849        tcg_out_and_vec(s, TCG_VEC_TMP2, t, c);
3850        tcg_out_andc_vec(s, d, f, c);
3851        tcg_out_or_vec(s, d, d, TCG_VEC_TMP2);
3852    }
3853}
3854
3855static bool tcg_out_cmp_vec_noinv(TCGContext *s, unsigned vece, TCGReg a0,
3856                                  TCGReg a1, TCGReg a2, TCGCond cond)
3857{
3858    static const uint32_t
3859        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
3860        ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
3861        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
3862        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD };
3863    uint32_t insn;
3864
3865    bool need_swap = false, need_inv = false;
3866
3867    tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
3868
3869    switch (cond) {
3870    case TCG_COND_EQ:
3871    case TCG_COND_GT:
3872    case TCG_COND_GTU:
3873        break;
3874    case TCG_COND_NE:
3875        if (have_isa_3_00 && vece <= MO_32) {
3876            break;
3877        }
3878        /* fall through */
3879    case TCG_COND_LE:
3880    case TCG_COND_LEU:
3881        need_inv = true;
3882        break;
3883    case TCG_COND_LT:
3884    case TCG_COND_LTU:
3885        need_swap = true;
3886        break;
3887    case TCG_COND_GE:
3888    case TCG_COND_GEU:
3889        need_swap = need_inv = true;
3890        break;
3891    default:
3892        g_assert_not_reached();
3893    }
3894
3895    if (need_inv) {
3896        cond = tcg_invert_cond(cond);
3897    }
3898    if (need_swap) {
3899        TCGReg swap = a1;
3900        a1 = a2;
3901        a2 = swap;
3902        cond = tcg_swap_cond(cond);
3903    }
3904
3905    switch (cond) {
3906    case TCG_COND_EQ:
3907        insn = eq_op[vece];
3908        break;
3909    case TCG_COND_NE:
3910        insn = ne_op[vece];
3911        break;
3912    case TCG_COND_GT:
3913        insn = gts_op[vece];
3914        break;
3915    case TCG_COND_GTU:
3916        insn = gtu_op[vece];
3917        break;
3918    default:
3919        g_assert_not_reached();
3920    }
3921    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
3922
3923    return need_inv;
3924}
3925
3926static void tcg_out_cmp_vec(TCGContext *s, unsigned vece, TCGReg a0,
3927                            TCGReg a1, TCGReg a2, TCGCond cond)
3928{
3929    if (tcg_out_cmp_vec_noinv(s, vece, a0, a1, a2, cond)) {
3930        tcg_out_not_vec(s, a0, a0);
3931    }
3932}
3933
3934static void tcg_out_cmpsel_vec(TCGContext *s, unsigned vece, TCGReg a0,
3935                               TCGReg c1, TCGReg c2, TCGArg v3, int const_v3,
3936                               TCGReg v4, TCGCond cond)
3937{
3938    bool inv = tcg_out_cmp_vec_noinv(s, vece, TCG_VEC_TMP1, c1, c2, cond);
3939
3940    if (!const_v3) {
3941        if (inv) {
3942            tcg_out_bitsel_vec(s, a0, TCG_VEC_TMP1, v4, v3);
3943        } else {
3944            tcg_out_bitsel_vec(s, a0, TCG_VEC_TMP1, v3, v4);
3945        }
3946    } else if (v3) {
3947        if (inv) {
3948            tcg_out_orc_vec(s, a0, v4, TCG_VEC_TMP1);
3949        } else {
3950            tcg_out_or_vec(s, a0, v4, TCG_VEC_TMP1);
3951        }
3952    } else {
3953        if (inv) {
3954            tcg_out_and_vec(s, a0, v4, TCG_VEC_TMP1);
3955        } else {
3956            tcg_out_andc_vec(s, a0, v4, TCG_VEC_TMP1);
3957        }
3958    }
3959}
3960
3961static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
3962                           unsigned vecl, unsigned vece,
3963                           const TCGArg args[TCG_MAX_OP_ARGS],
3964                           const int const_args[TCG_MAX_OP_ARGS])
3965{
3966    static const uint32_t
3967        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
3968        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
3969        mul_op[4] = { 0, 0, VMULUWM, VMULLD },
3970        neg_op[4] = { 0, 0, VNEGW, VNEGD },
3971        ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
3972        usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
3973        sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
3974        ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
3975        umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
3976        smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
3977        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
3978        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
3979        shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
3980        shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
3981        sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
3982        mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
3983        mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
3984        muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
3985        mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
3986        pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
3987        rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
3988
3989    TCGType type = vecl + TCG_TYPE_V64;
3990    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
3991    uint32_t insn;
3992
3993    switch (opc) {
3994    case INDEX_op_ld_vec:
3995        tcg_out_ld(s, type, a0, a1, a2);
3996        return;
3997    case INDEX_op_st_vec:
3998        tcg_out_st(s, type, a0, a1, a2);
3999        return;
4000    case INDEX_op_dupm_vec:
4001        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
4002        return;
4003
4004    case INDEX_op_add_vec:
4005        insn = add_op[vece];
4006        break;
4007    case INDEX_op_sub_vec:
4008        insn = sub_op[vece];
4009        break;
4010    case INDEX_op_neg_vec:
4011        insn = neg_op[vece];
4012        a2 = a1;
4013        a1 = 0;
4014        break;
4015    case INDEX_op_mul_vec:
4016        insn = mul_op[vece];
4017        break;
4018    case INDEX_op_ssadd_vec:
4019        insn = ssadd_op[vece];
4020        break;
4021    case INDEX_op_sssub_vec:
4022        insn = sssub_op[vece];
4023        break;
4024    case INDEX_op_usadd_vec:
4025        insn = usadd_op[vece];
4026        break;
4027    case INDEX_op_ussub_vec:
4028        insn = ussub_op[vece];
4029        break;
4030    case INDEX_op_smin_vec:
4031        insn = smin_op[vece];
4032        break;
4033    case INDEX_op_umin_vec:
4034        insn = umin_op[vece];
4035        break;
4036    case INDEX_op_smax_vec:
4037        insn = smax_op[vece];
4038        break;
4039    case INDEX_op_umax_vec:
4040        insn = umax_op[vece];
4041        break;
4042    case INDEX_op_shlv_vec:
4043        insn = shlv_op[vece];
4044        break;
4045    case INDEX_op_shrv_vec:
4046        insn = shrv_op[vece];
4047        break;
4048    case INDEX_op_sarv_vec:
4049        insn = sarv_op[vece];
4050        break;
4051    case INDEX_op_and_vec:
4052        tcg_out_and_vec(s, a0, a1, a2);
4053        return;
4054    case INDEX_op_or_vec:
4055        tcg_out_or_vec(s, a0, a1, a2);
4056        return;
4057    case INDEX_op_xor_vec:
4058        insn = VXOR;
4059        break;
4060    case INDEX_op_andc_vec:
4061        tcg_out_andc_vec(s, a0, a1, a2);
4062        return;
4063    case INDEX_op_not_vec:
4064        tcg_out_not_vec(s, a0, a1);
4065        return;
4066    case INDEX_op_orc_vec:
4067        tcg_out_orc_vec(s, a0, a1, a2);
4068        return;
4069    case INDEX_op_nand_vec:
4070        insn = VNAND;
4071        break;
4072    case INDEX_op_nor_vec:
4073        insn = VNOR;
4074        break;
4075    case INDEX_op_eqv_vec:
4076        insn = VEQV;
4077        break;
4078
4079    case INDEX_op_cmp_vec:
4080        tcg_out_cmp_vec(s, vece, a0, a1, a2, args[3]);
4081        return;
4082    case INDEX_op_cmpsel_vec:
4083        tcg_out_cmpsel_vec(s, vece, a0, a1, a2,
4084                           args[3], const_args[3], args[4], args[5]);
4085        return;
4086    case INDEX_op_bitsel_vec:
4087        tcg_out_bitsel_vec(s, a0, a1, a2, args[3]);
4088        return;
4089
4090    case INDEX_op_dup2_vec:
4091        assert(TCG_TARGET_REG_BITS == 32);
4092        /* With inputs a1 = xLxx, a2 = xHxx  */
4093        tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1));  /* a0  = xxHL */
4094        tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8);          /* tmp = HLxx */
4095        tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8);          /* a0  = HLHL */
4096        return;
4097
4098    case INDEX_op_ppc_mrgh_vec:
4099        insn = mrgh_op[vece];
4100        break;
4101    case INDEX_op_ppc_mrgl_vec:
4102        insn = mrgl_op[vece];
4103        break;
4104    case INDEX_op_ppc_muleu_vec:
4105        insn = muleu_op[vece];
4106        break;
4107    case INDEX_op_ppc_mulou_vec:
4108        insn = mulou_op[vece];
4109        break;
4110    case INDEX_op_ppc_pkum_vec:
4111        insn = pkum_op[vece];
4112        break;
4113    case INDEX_op_rotlv_vec:
4114        insn = rotl_op[vece];
4115        break;
4116    case INDEX_op_ppc_msum_vec:
4117        tcg_debug_assert(vece == MO_16);
4118        tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
4119        return;
4120
4121    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
4122    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
4123    default:
4124        g_assert_not_reached();
4125    }
4126
4127    tcg_debug_assert(insn != 0);
4128    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
4129}
4130
4131static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
4132                           TCGv_vec v1, TCGArg imm, TCGOpcode opci)
4133{
4134    TCGv_vec t1;
4135
4136    if (vece == MO_32) {
4137        /*
4138         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
4139         * So using negative numbers gets us the 4th bit easily.
4140         */
4141        imm = sextract32(imm, 0, 5);
4142    } else {
4143        imm &= (8 << vece) - 1;
4144    }
4145
4146    /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */
4147    t1 = tcg_constant_vec(type, MO_8, imm);
4148    vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
4149              tcgv_vec_arg(v1), tcgv_vec_arg(t1));
4150}
4151
4152static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
4153                           TCGv_vec v1, TCGv_vec v2)
4154{
4155    TCGv_vec t1 = tcg_temp_new_vec(type);
4156    TCGv_vec t2 = tcg_temp_new_vec(type);
4157    TCGv_vec c0, c16;
4158
4159    switch (vece) {
4160    case MO_8:
4161    case MO_16:
4162        vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
4163                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
4164        vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
4165                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
4166        vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
4167                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
4168        vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
4169                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
4170        vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
4171                  tcgv_vec_arg(v0), tcgv_vec_arg(t1));
4172        break;
4173
4174    case MO_32:
4175        tcg_debug_assert(!have_isa_2_07);
4176        /*
4177         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
4178         * So using -16 is a quick way to represent 16.
4179         */
4180        c16 = tcg_constant_vec(type, MO_8, -16);
4181        c0 = tcg_constant_vec(type, MO_8, 0);
4182
4183        vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
4184                  tcgv_vec_arg(v2), tcgv_vec_arg(c16));
4185        vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
4186                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
4187        vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1),
4188                  tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0));
4189        vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1),
4190                  tcgv_vec_arg(t1), tcgv_vec_arg(c16));
4191        tcg_gen_add_vec(MO_32, v0, t1, t2);
4192        break;
4193
4194    default:
4195        g_assert_not_reached();
4196    }
4197    tcg_temp_free_vec(t1);
4198    tcg_temp_free_vec(t2);
4199}
4200
4201void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
4202                       TCGArg a0, ...)
4203{
4204    va_list va;
4205    TCGv_vec v0, v1, v2, t0;
4206    TCGArg a2;
4207
4208    va_start(va, a0);
4209    v0 = temp_tcgv_vec(arg_temp(a0));
4210    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
4211    a2 = va_arg(va, TCGArg);
4212
4213    switch (opc) {
4214    case INDEX_op_shli_vec:
4215        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
4216        break;
4217    case INDEX_op_shri_vec:
4218        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
4219        break;
4220    case INDEX_op_sari_vec:
4221        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
4222        break;
4223    case INDEX_op_rotli_vec:
4224        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
4225        break;
4226    case INDEX_op_mul_vec:
4227        v2 = temp_tcgv_vec(arg_temp(a2));
4228        expand_vec_mul(type, vece, v0, v1, v2);
4229        break;
4230    case INDEX_op_rotlv_vec:
4231        v2 = temp_tcgv_vec(arg_temp(a2));
4232        t0 = tcg_temp_new_vec(type);
4233        tcg_gen_neg_vec(vece, t0, v2);
4234        tcg_gen_rotlv_vec(vece, v0, v1, t0);
4235        tcg_temp_free_vec(t0);
4236        break;
4237    default:
4238        g_assert_not_reached();
4239    }
4240    va_end(va);
4241}
4242
4243static TCGConstraintSetIndex
4244tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
4245{
4246    switch (op) {
4247    case INDEX_op_goto_ptr:
4248        return C_O0_I1(r);
4249
4250    case INDEX_op_ld8u_i32:
4251    case INDEX_op_ld8s_i32:
4252    case INDEX_op_ld16u_i32:
4253    case INDEX_op_ld16s_i32:
4254    case INDEX_op_ld_i32:
4255    case INDEX_op_bswap32_i32:
4256    case INDEX_op_extract_i32:
4257    case INDEX_op_sextract_i32:
4258    case INDEX_op_ld8u_i64:
4259    case INDEX_op_ld8s_i64:
4260    case INDEX_op_ld16u_i64:
4261    case INDEX_op_ld16s_i64:
4262    case INDEX_op_ld32u_i64:
4263    case INDEX_op_ld32s_i64:
4264    case INDEX_op_ld_i64:
4265    case INDEX_op_ext_i32_i64:
4266    case INDEX_op_extu_i32_i64:
4267    case INDEX_op_bswap32_i64:
4268    case INDEX_op_bswap64_i64:
4269    case INDEX_op_extract_i64:
4270    case INDEX_op_sextract_i64:
4271        return C_O1_I1(r, r);
4272
4273    case INDEX_op_st8_i32:
4274    case INDEX_op_st16_i32:
4275    case INDEX_op_st_i32:
4276    case INDEX_op_st8_i64:
4277    case INDEX_op_st16_i64:
4278    case INDEX_op_st32_i64:
4279    case INDEX_op_st_i64:
4280        return C_O0_I2(r, r);
4281
4282    case INDEX_op_deposit_i32:
4283    case INDEX_op_deposit_i64:
4284        return C_O1_I2(r, 0, rZ);
4285    case INDEX_op_add2_i64:
4286    case INDEX_op_add2_i32:
4287        return C_O2_I4(r, r, r, r, rI, rZM);
4288    case INDEX_op_sub2_i64:
4289    case INDEX_op_sub2_i32:
4290        return C_O2_I4(r, r, rI, rZM, r, r);
4291
4292    case INDEX_op_qemu_ld_i32:
4293        return C_O1_I1(r, r);
4294    case INDEX_op_qemu_ld_i64:
4295        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
4296
4297    case INDEX_op_qemu_st_i32:
4298        return C_O0_I2(r, r);
4299    case INDEX_op_qemu_st_i64:
4300        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
4301
4302    case INDEX_op_qemu_ld_i128:
4303        return C_N1O1_I1(o, m, r);
4304    case INDEX_op_qemu_st_i128:
4305        return C_O0_I3(o, m, r);
4306
4307    case INDEX_op_add_vec:
4308    case INDEX_op_sub_vec:
4309    case INDEX_op_mul_vec:
4310    case INDEX_op_and_vec:
4311    case INDEX_op_or_vec:
4312    case INDEX_op_xor_vec:
4313    case INDEX_op_andc_vec:
4314    case INDEX_op_orc_vec:
4315    case INDEX_op_nor_vec:
4316    case INDEX_op_eqv_vec:
4317    case INDEX_op_nand_vec:
4318    case INDEX_op_cmp_vec:
4319    case INDEX_op_ssadd_vec:
4320    case INDEX_op_sssub_vec:
4321    case INDEX_op_usadd_vec:
4322    case INDEX_op_ussub_vec:
4323    case INDEX_op_smax_vec:
4324    case INDEX_op_smin_vec:
4325    case INDEX_op_umax_vec:
4326    case INDEX_op_umin_vec:
4327    case INDEX_op_shlv_vec:
4328    case INDEX_op_shrv_vec:
4329    case INDEX_op_sarv_vec:
4330    case INDEX_op_rotlv_vec:
4331    case INDEX_op_rotrv_vec:
4332    case INDEX_op_ppc_mrgh_vec:
4333    case INDEX_op_ppc_mrgl_vec:
4334    case INDEX_op_ppc_muleu_vec:
4335    case INDEX_op_ppc_mulou_vec:
4336    case INDEX_op_ppc_pkum_vec:
4337    case INDEX_op_dup2_vec:
4338        return C_O1_I2(v, v, v);
4339
4340    case INDEX_op_not_vec:
4341    case INDEX_op_neg_vec:
4342        return C_O1_I1(v, v);
4343
4344    case INDEX_op_dup_vec:
4345        return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v);
4346
4347    case INDEX_op_ld_vec:
4348    case INDEX_op_dupm_vec:
4349        return C_O1_I1(v, r);
4350
4351    case INDEX_op_st_vec:
4352        return C_O0_I2(v, r);
4353
4354    case INDEX_op_bitsel_vec:
4355    case INDEX_op_ppc_msum_vec:
4356        return C_O1_I3(v, v, v, v);
4357    case INDEX_op_cmpsel_vec:
4358        return C_O1_I4(v, v, v, vZM, v);
4359
4360    default:
4361        return C_NotImplemented;
4362    }
4363}
4364
4365static void tcg_target_init(TCGContext *s)
4366{
4367    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
4368    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
4369    if (have_altivec) {
4370        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
4371        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
4372    }
4373
4374    tcg_target_call_clobber_regs = 0;
4375    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
4376    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
4377    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
4378    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
4379    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
4380    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
4381    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7);
4382    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
4383    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
4384    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
4385    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
4386    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
4387
4388    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
4389    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
4390    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
4391    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
4392    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
4393    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
4394    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
4395    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
4396    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
4397    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
4398    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
4399    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
4400    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
4401    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
4402    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
4403    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
4404    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
4405    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
4406    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
4407    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
4408
4409    s->reserved_regs = 0;
4410    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
4411    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
4412#if defined(_CALL_SYSV)
4413    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
4414#endif
4415#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
4416    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
4417#endif
4418    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
4419    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
4420    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
4421    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
4422    if (USE_REG_TB) {
4423        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);  /* tb->tc_ptr */
4424    }
4425}
4426
4427#ifdef __ELF__
4428typedef struct {
4429    DebugFrameCIE cie;
4430    DebugFrameFDEHeader fde;
4431    uint8_t fde_def_cfa[4];
4432    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
4433} DebugFrame;
4434
4435/* We're expecting a 2 byte uleb128 encoded value.  */
4436QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
4437
4438#if TCG_TARGET_REG_BITS == 64
4439# define ELF_HOST_MACHINE EM_PPC64
4440#else
4441# define ELF_HOST_MACHINE EM_PPC
4442#endif
4443
4444static DebugFrame debug_frame = {
4445    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
4446    .cie.id = -1,
4447    .cie.version = 1,
4448    .cie.code_align = 1,
4449    .cie.data_align = (-SZR & 0x7f),         /* sleb128 -SZR */
4450    .cie.return_column = 65,
4451
4452    /* Total FDE size does not include the "len" member.  */
4453    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
4454
4455    .fde_def_cfa = {
4456        12, TCG_REG_R1,                 /* DW_CFA_def_cfa r1, ... */
4457        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
4458        (FRAME_SIZE >> 7)
4459    },
4460    .fde_reg_ofs = {
4461        /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
4462        0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
4463    }
4464};
4465
4466void tcg_register_jit(const void *buf, size_t buf_size)
4467{
4468    uint8_t *p = &debug_frame.fde_reg_ofs[3];
4469    int i;
4470
4471    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
4472        p[0] = 0x80 + tcg_target_callee_save_regs[i];
4473        p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
4474    }
4475
4476    debug_frame.fde.func_start = (uintptr_t)buf;
4477    debug_frame.fde.func_len = buf_size;
4478
4479    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
4480}
4481#endif /* __ELF__ */
4482#undef VMULEUB
4483#undef VMULEUH
4484#undef VMULEUW
4485#undef VMULOUB
4486#undef VMULOUH
4487#undef VMULOUW
4488#undef VMSUMUHM
4489