xref: /openbmc/qemu/tcg/ppc/tcg-target.c.inc (revision ee48fef06c034ff245db9e553dcf0f1262f97bd2)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "elf.h"
26#include "../tcg-pool.c.inc"
27#include "../tcg-ldst.c.inc"
28
29/*
30 * Standardize on the _CALL_FOO symbols used by GCC:
31 * Apple XCode does not define _CALL_DARWIN.
32 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV or _CALL_AIX.
33 */
34#if TCG_TARGET_REG_BITS == 64
35# ifdef _CALL_AIX
36    /* ok */
37# elif defined(_CALL_ELF) && _CALL_ELF == 1
38#  define _CALL_AIX
39# elif defined(_CALL_ELF) && _CALL_ELF == 2
40    /* ok */
41# else
42#  error "Unknown ABI"
43# endif
44#else
45# if defined(_CALL_SYSV) || defined(_CALL_DARWIN)
46    /* ok */
47# elif defined(__APPLE__)
48#  define _CALL_DARWIN
49# elif defined(__ELF__)
50#  define _CALL_SYSV
51# else
52#  error "Unknown ABI"
53# endif
54#endif
55
56#if TCG_TARGET_REG_BITS == 64
57# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_EXTEND
58# define TCG_TARGET_CALL_RET_I128  TCG_CALL_RET_NORMAL
59#else
60# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_NORMAL
61# define TCG_TARGET_CALL_RET_I128  TCG_CALL_RET_BY_REF
62#endif
63#ifdef _CALL_SYSV
64# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_EVEN
65# define TCG_TARGET_CALL_ARG_I128  TCG_CALL_ARG_BY_REF
66#else
67# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_NORMAL
68# define TCG_TARGET_CALL_ARG_I128  TCG_CALL_ARG_NORMAL
69#endif
70
71/* For some memory operations, we need a scratch that isn't R0.  For the AIX
72   calling convention, we can re-use the TOC register since we'll be reloading
73   it at every call.  Otherwise R12 will do nicely as neither a call-saved
74   register nor a parameter register.  */
75#ifdef _CALL_AIX
76# define TCG_REG_TMP1   TCG_REG_R2
77#else
78# define TCG_REG_TMP1   TCG_REG_R12
79#endif
80#define TCG_REG_TMP2    TCG_REG_R11
81
82#define TCG_VEC_TMP1    TCG_REG_V0
83#define TCG_VEC_TMP2    TCG_REG_V1
84
85#define TCG_REG_TB     TCG_REG_R31
86#define USE_REG_TB     (TCG_TARGET_REG_BITS == 64 && !have_isa_3_00)
87
88/* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */
89#define SZP  ((int)sizeof(void *))
90
91/* Shorthand for size of a register.  */
92#define SZR  (TCG_TARGET_REG_BITS / 8)
93
94#define TCG_CT_CONST_S16  0x100
95#define TCG_CT_CONST_U16  0x200
96#define TCG_CT_CONST_S32  0x400
97#define TCG_CT_CONST_U32  0x800
98#define TCG_CT_CONST_ZERO 0x1000
99#define TCG_CT_CONST_MONE 0x2000
100#define TCG_CT_CONST_WSZ  0x4000
101#define TCG_CT_CONST_CMP  0x8000
102
103#define ALL_GENERAL_REGS  0xffffffffu
104#define ALL_VECTOR_REGS   0xffffffff00000000ull
105
106#ifndef R_PPC64_PCREL34
107#define R_PPC64_PCREL34  132
108#endif
109
110#define have_isel  (cpuinfo & CPUINFO_ISEL)
111
112#define TCG_GUEST_BASE_REG  TCG_REG_R30
113
114#ifdef CONFIG_DEBUG_TCG
115static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
116    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
117    "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
118    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
119    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
120    "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
121    "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
122    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
123    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
124};
125#endif
126
127static const int tcg_target_reg_alloc_order[] = {
128    TCG_REG_R14,  /* call saved registers */
129    TCG_REG_R15,
130    TCG_REG_R16,
131    TCG_REG_R17,
132    TCG_REG_R18,
133    TCG_REG_R19,
134    TCG_REG_R20,
135    TCG_REG_R21,
136    TCG_REG_R22,
137    TCG_REG_R23,
138    TCG_REG_R24,
139    TCG_REG_R25,
140    TCG_REG_R26,
141    TCG_REG_R27,
142    TCG_REG_R28,
143    TCG_REG_R29,
144    TCG_REG_R30,
145    TCG_REG_R31,
146    TCG_REG_R12,  /* call clobbered, non-arguments */
147    TCG_REG_R11,
148    TCG_REG_R2,
149    TCG_REG_R13,
150    TCG_REG_R10,  /* call clobbered, arguments */
151    TCG_REG_R9,
152    TCG_REG_R8,
153    TCG_REG_R7,
154    TCG_REG_R6,
155    TCG_REG_R5,
156    TCG_REG_R4,
157    TCG_REG_R3,
158
159    /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
160    TCG_REG_V2,   /* call clobbered, vectors */
161    TCG_REG_V3,
162    TCG_REG_V4,
163    TCG_REG_V5,
164    TCG_REG_V6,
165    TCG_REG_V7,
166    TCG_REG_V8,
167    TCG_REG_V9,
168    TCG_REG_V10,
169    TCG_REG_V11,
170    TCG_REG_V12,
171    TCG_REG_V13,
172    TCG_REG_V14,
173    TCG_REG_V15,
174    TCG_REG_V16,
175    TCG_REG_V17,
176    TCG_REG_V18,
177    TCG_REG_V19,
178};
179
180static const int tcg_target_call_iarg_regs[] = {
181    TCG_REG_R3,
182    TCG_REG_R4,
183    TCG_REG_R5,
184    TCG_REG_R6,
185    TCG_REG_R7,
186    TCG_REG_R8,
187    TCG_REG_R9,
188    TCG_REG_R10
189};
190
191static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
192{
193    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
194    tcg_debug_assert(slot >= 0 && slot <= 1);
195    return TCG_REG_R3 + slot;
196}
197
198static const int tcg_target_callee_save_regs[] = {
199#ifdef _CALL_DARWIN
200    TCG_REG_R11,
201#endif
202    TCG_REG_R14,
203    TCG_REG_R15,
204    TCG_REG_R16,
205    TCG_REG_R17,
206    TCG_REG_R18,
207    TCG_REG_R19,
208    TCG_REG_R20,
209    TCG_REG_R21,
210    TCG_REG_R22,
211    TCG_REG_R23,
212    TCG_REG_R24,
213    TCG_REG_R25,
214    TCG_REG_R26,
215    TCG_REG_R27, /* currently used for the global env */
216    TCG_REG_R28,
217    TCG_REG_R29,
218    TCG_REG_R30,
219    TCG_REG_R31
220};
221
222/* For PPC, we use TB+4 instead of TB as the base. */
223static inline ptrdiff_t ppc_tbrel_diff(TCGContext *s, const void *target)
224{
225    return tcg_tbrel_diff(s, target) - 4;
226}
227
228static inline bool in_range_b(tcg_target_long target)
229{
230    return target == sextract64(target, 0, 26);
231}
232
233static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
234                               const tcg_insn_unit *target)
235{
236    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
237    tcg_debug_assert(in_range_b(disp));
238    return disp & 0x3fffffc;
239}
240
241static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
242{
243    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
244    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
245
246    if (in_range_b(disp)) {
247        *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc);
248        return true;
249    }
250    return false;
251}
252
253static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
254                               const tcg_insn_unit *target)
255{
256    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
257    tcg_debug_assert(disp == (int16_t) disp);
258    return disp & 0xfffc;
259}
260
261static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
262{
263    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
264    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
265
266    if (disp == (int16_t) disp) {
267        *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc);
268        return true;
269    }
270    return false;
271}
272
273static bool reloc_pc34(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
274{
275    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
276    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
277
278    if (disp == sextract64(disp, 0, 34)) {
279        src_rw[0] = (src_rw[0] & ~0x3ffff) | ((disp >> 16) & 0x3ffff);
280        src_rw[1] = (src_rw[1] & ~0xffff) | (disp & 0xffff);
281        return true;
282    }
283    return false;
284}
285
286static bool mask_operand(uint32_t c, int *mb, int *me);
287static bool mask64_operand(uint64_t c, int *mb, int *me);
288
289/* test if a constant matches the constraint */
290static bool tcg_target_const_match(int64_t sval, int ct,
291                                   TCGType type, TCGCond cond, int vece)
292{
293    uint64_t uval = sval;
294    int mb, me;
295
296    if (ct & TCG_CT_CONST) {
297        return 1;
298    }
299
300    if (type == TCG_TYPE_I32) {
301        uval = (uint32_t)sval;
302        sval = (int32_t)sval;
303    }
304
305    if (ct & TCG_CT_CONST_CMP) {
306        switch (cond) {
307        case TCG_COND_EQ:
308        case TCG_COND_NE:
309            ct |= TCG_CT_CONST_S16 | TCG_CT_CONST_U16;
310            break;
311        case TCG_COND_LT:
312        case TCG_COND_GE:
313        case TCG_COND_LE:
314        case TCG_COND_GT:
315            ct |= TCG_CT_CONST_S16;
316            break;
317        case TCG_COND_LTU:
318        case TCG_COND_GEU:
319        case TCG_COND_LEU:
320        case TCG_COND_GTU:
321            ct |= TCG_CT_CONST_U16;
322            break;
323        case TCG_COND_TSTEQ:
324        case TCG_COND_TSTNE:
325            if ((uval & ~0xffff) == 0 || (uval & ~0xffff0000ull) == 0) {
326                return 1;
327            }
328            if (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32
329                ? mask_operand(uval, &mb, &me)
330                : mask64_operand(uval << clz64(uval), &mb, &me)) {
331                return 1;
332            }
333            return 0;
334        default:
335            g_assert_not_reached();
336        }
337    }
338
339    if ((ct & TCG_CT_CONST_S16) && sval == (int16_t)sval) {
340        return 1;
341    }
342    if ((ct & TCG_CT_CONST_U16) && uval == (uint16_t)uval) {
343        return 1;
344    }
345    if ((ct & TCG_CT_CONST_S32) && sval == (int32_t)sval) {
346        return 1;
347    }
348    if ((ct & TCG_CT_CONST_U32) && uval == (uint32_t)uval) {
349        return 1;
350    }
351    if ((ct & TCG_CT_CONST_ZERO) && sval == 0) {
352        return 1;
353    }
354    if ((ct & TCG_CT_CONST_MONE) && sval == -1) {
355        return 1;
356    }
357    if ((ct & TCG_CT_CONST_WSZ) && sval == (type == TCG_TYPE_I32 ? 32 : 64)) {
358        return 1;
359    }
360    return 0;
361}
362
363#define OPCD(opc) ((opc)<<26)
364#define XO19(opc) (OPCD(19)|((opc)<<1))
365#define MD30(opc) (OPCD(30)|((opc)<<2))
366#define MDS30(opc) (OPCD(30)|((opc)<<1))
367#define XO31(opc) (OPCD(31)|((opc)<<1))
368#define XO58(opc) (OPCD(58)|(opc))
369#define XO62(opc) (OPCD(62)|(opc))
370#define VX4(opc)  (OPCD(4)|(opc))
371
372#define B      OPCD( 18)
373#define BC     OPCD( 16)
374
375#define LBZ    OPCD( 34)
376#define LHZ    OPCD( 40)
377#define LHA    OPCD( 42)
378#define LWZ    OPCD( 32)
379#define LWZUX  XO31( 55)
380#define LD     XO58(  0)
381#define LDX    XO31( 21)
382#define LDU    XO58(  1)
383#define LDUX   XO31( 53)
384#define LWA    XO58(  2)
385#define LWAX   XO31(341)
386#define LQ     OPCD( 56)
387
388#define STB    OPCD( 38)
389#define STH    OPCD( 44)
390#define STW    OPCD( 36)
391#define STD    XO62(  0)
392#define STDU   XO62(  1)
393#define STDX   XO31(149)
394#define STQ    XO62(  2)
395
396#define PLWA   OPCD( 41)
397#define PLD    OPCD( 57)
398#define PLXSD  OPCD( 42)
399#define PLXV   OPCD(25 * 2 + 1)  /* force tx=1 */
400
401#define PSTD   OPCD( 61)
402#define PSTXSD OPCD( 46)
403#define PSTXV  OPCD(27 * 2 + 1)  /* force sx=1 */
404
405#define ADDIC  OPCD( 12)
406#define ADDI   OPCD( 14)
407#define ADDIS  OPCD( 15)
408#define ORI    OPCD( 24)
409#define ORIS   OPCD( 25)
410#define XORI   OPCD( 26)
411#define XORIS  OPCD( 27)
412#define ANDI   OPCD( 28)
413#define ANDIS  OPCD( 29)
414#define MULLI  OPCD(  7)
415#define CMPLI  OPCD( 10)
416#define CMPI   OPCD( 11)
417#define SUBFIC OPCD( 8)
418
419#define LWZU   OPCD( 33)
420#define STWU   OPCD( 37)
421
422#define RLWIMI OPCD( 20)
423#define RLWINM OPCD( 21)
424#define RLWNM  OPCD( 23)
425
426#define RLDICL MD30(  0)
427#define RLDICR MD30(  1)
428#define RLDIMI MD30(  3)
429#define RLDCL  MDS30( 8)
430
431#define BCLR   XO19( 16)
432#define BCCTR  XO19(528)
433#define CRAND  XO19(257)
434#define CRANDC XO19(129)
435#define CRNAND XO19(225)
436#define CROR   XO19(449)
437#define CRNOR  XO19( 33)
438#define ADDPCIS XO19( 2)
439
440#define EXTSB  XO31(954)
441#define EXTSH  XO31(922)
442#define EXTSW  XO31(986)
443#define ADD    XO31(266)
444#define ADDE   XO31(138)
445#define ADDME  XO31(234)
446#define ADDZE  XO31(202)
447#define ADDC   XO31( 10)
448#define AND    XO31( 28)
449#define SUBF   XO31( 40)
450#define SUBFC  XO31(  8)
451#define SUBFE  XO31(136)
452#define SUBFME XO31(232)
453#define SUBFZE XO31(200)
454#define OR     XO31(444)
455#define XOR    XO31(316)
456#define MULLW  XO31(235)
457#define MULHW  XO31( 75)
458#define MULHWU XO31( 11)
459#define DIVW   XO31(491)
460#define DIVWU  XO31(459)
461#define MODSW  XO31(779)
462#define MODUW  XO31(267)
463#define CMP    XO31(  0)
464#define CMPL   XO31( 32)
465#define LHBRX  XO31(790)
466#define LWBRX  XO31(534)
467#define LDBRX  XO31(532)
468#define STHBRX XO31(918)
469#define STWBRX XO31(662)
470#define STDBRX XO31(660)
471#define MFSPR  XO31(339)
472#define MTSPR  XO31(467)
473#define SRAWI  XO31(824)
474#define NEG    XO31(104)
475#define MFCR   XO31( 19)
476#define MFOCRF (MFCR | (1u << 20))
477#define NOR    XO31(124)
478#define CNTLZW XO31( 26)
479#define CNTLZD XO31( 58)
480#define CNTTZW XO31(538)
481#define CNTTZD XO31(570)
482#define CNTPOPW XO31(378)
483#define CNTPOPD XO31(506)
484#define ANDC   XO31( 60)
485#define ORC    XO31(412)
486#define EQV    XO31(284)
487#define NAND   XO31(476)
488#define ISEL   XO31( 15)
489
490#define MULLD  XO31(233)
491#define MULHD  XO31( 73)
492#define MULHDU XO31(  9)
493#define DIVD   XO31(489)
494#define DIVDU  XO31(457)
495#define MODSD  XO31(777)
496#define MODUD  XO31(265)
497
498#define LBZX   XO31( 87)
499#define LHZX   XO31(279)
500#define LHAX   XO31(343)
501#define LWZX   XO31( 23)
502#define STBX   XO31(215)
503#define STHX   XO31(407)
504#define STWX   XO31(151)
505
506#define EIEIO  XO31(854)
507#define HWSYNC XO31(598)
508#define LWSYNC (HWSYNC | (1u << 21))
509
510#define SPR(a, b) ((((a)<<5)|(b))<<11)
511#define LR     SPR(8, 0)
512#define CTR    SPR(9, 0)
513
514#define SLW    XO31( 24)
515#define SRW    XO31(536)
516#define SRAW   XO31(792)
517
518#define SLD    XO31( 27)
519#define SRD    XO31(539)
520#define SRAD   XO31(794)
521#define SRADI  XO31(413<<1)
522
523#define BRH    XO31(219)
524#define BRW    XO31(155)
525#define BRD    XO31(187)
526
527#define TW     XO31( 4)
528#define TRAP   (TW | TO(31))
529
530#define SETBC    XO31(384)  /* v3.10 */
531#define SETBCR   XO31(416)  /* v3.10 */
532#define SETNBC   XO31(448)  /* v3.10 */
533#define SETNBCR  XO31(480)  /* v3.10 */
534
535#define NOP    ORI  /* ori 0,0,0 */
536
537#define LVX        XO31(103)
538#define LVEBX      XO31(7)
539#define LVEHX      XO31(39)
540#define LVEWX      XO31(71)
541#define LXSDX      (XO31(588) | 1)  /* v2.06, force tx=1 */
542#define LXVDSX     (XO31(332) | 1)  /* v2.06, force tx=1 */
543#define LXSIWZX    (XO31(12) | 1)   /* v2.07, force tx=1 */
544#define LXV        (OPCD(61) | 8 | 1)  /* v3.00, force tx=1 */
545#define LXSD       (OPCD(57) | 2)   /* v3.00 */
546#define LXVWSX     (XO31(364) | 1)  /* v3.00, force tx=1 */
547
548#define STVX       XO31(231)
549#define STVEWX     XO31(199)
550#define STXSDX     (XO31(716) | 1)  /* v2.06, force sx=1 */
551#define STXSIWX    (XO31(140) | 1)  /* v2.07, force sx=1 */
552#define STXV       (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
553#define STXSD      (OPCD(61) | 2)   /* v3.00 */
554
555#define VADDSBS    VX4(768)
556#define VADDUBS    VX4(512)
557#define VADDUBM    VX4(0)
558#define VADDSHS    VX4(832)
559#define VADDUHS    VX4(576)
560#define VADDUHM    VX4(64)
561#define VADDSWS    VX4(896)
562#define VADDUWS    VX4(640)
563#define VADDUWM    VX4(128)
564#define VADDUDM    VX4(192)       /* v2.07 */
565
566#define VSUBSBS    VX4(1792)
567#define VSUBUBS    VX4(1536)
568#define VSUBUBM    VX4(1024)
569#define VSUBSHS    VX4(1856)
570#define VSUBUHS    VX4(1600)
571#define VSUBUHM    VX4(1088)
572#define VSUBSWS    VX4(1920)
573#define VSUBUWS    VX4(1664)
574#define VSUBUWM    VX4(1152)
575#define VSUBUDM    VX4(1216)      /* v2.07 */
576
577#define VNEGW      (VX4(1538) | (6 << 16))  /* v3.00 */
578#define VNEGD      (VX4(1538) | (7 << 16))  /* v3.00 */
579
580#define VMAXSB     VX4(258)
581#define VMAXSH     VX4(322)
582#define VMAXSW     VX4(386)
583#define VMAXSD     VX4(450)       /* v2.07 */
584#define VMAXUB     VX4(2)
585#define VMAXUH     VX4(66)
586#define VMAXUW     VX4(130)
587#define VMAXUD     VX4(194)       /* v2.07 */
588#define VMINSB     VX4(770)
589#define VMINSH     VX4(834)
590#define VMINSW     VX4(898)
591#define VMINSD     VX4(962)       /* v2.07 */
592#define VMINUB     VX4(514)
593#define VMINUH     VX4(578)
594#define VMINUW     VX4(642)
595#define VMINUD     VX4(706)       /* v2.07 */
596
597#define VCMPEQUB   VX4(6)
598#define VCMPEQUH   VX4(70)
599#define VCMPEQUW   VX4(134)
600#define VCMPEQUD   VX4(199)       /* v2.07 */
601#define VCMPGTSB   VX4(774)
602#define VCMPGTSH   VX4(838)
603#define VCMPGTSW   VX4(902)
604#define VCMPGTSD   VX4(967)       /* v2.07 */
605#define VCMPGTUB   VX4(518)
606#define VCMPGTUH   VX4(582)
607#define VCMPGTUW   VX4(646)
608#define VCMPGTUD   VX4(711)       /* v2.07 */
609#define VCMPNEB    VX4(7)         /* v3.00 */
610#define VCMPNEH    VX4(71)        /* v3.00 */
611#define VCMPNEW    VX4(135)       /* v3.00 */
612
613#define VSLB       VX4(260)
614#define VSLH       VX4(324)
615#define VSLW       VX4(388)
616#define VSLD       VX4(1476)      /* v2.07 */
617#define VSRB       VX4(516)
618#define VSRH       VX4(580)
619#define VSRW       VX4(644)
620#define VSRD       VX4(1732)      /* v2.07 */
621#define VSRAB      VX4(772)
622#define VSRAH      VX4(836)
623#define VSRAW      VX4(900)
624#define VSRAD      VX4(964)       /* v2.07 */
625#define VRLB       VX4(4)
626#define VRLH       VX4(68)
627#define VRLW       VX4(132)
628#define VRLD       VX4(196)       /* v2.07 */
629
630#define VMULEUB    VX4(520)
631#define VMULEUH    VX4(584)
632#define VMULEUW    VX4(648)       /* v2.07 */
633#define VMULOUB    VX4(8)
634#define VMULOUH    VX4(72)
635#define VMULOUW    VX4(136)       /* v2.07 */
636#define VMULUWM    VX4(137)       /* v2.07 */
637#define VMULLD     VX4(457)       /* v3.10 */
638#define VMSUMUHM   VX4(38)
639
640#define VMRGHB     VX4(12)
641#define VMRGHH     VX4(76)
642#define VMRGHW     VX4(140)
643#define VMRGLB     VX4(268)
644#define VMRGLH     VX4(332)
645#define VMRGLW     VX4(396)
646
647#define VPKUHUM    VX4(14)
648#define VPKUWUM    VX4(78)
649
650#define VAND       VX4(1028)
651#define VANDC      VX4(1092)
652#define VNOR       VX4(1284)
653#define VOR        VX4(1156)
654#define VXOR       VX4(1220)
655#define VEQV       VX4(1668)      /* v2.07 */
656#define VNAND      VX4(1412)      /* v2.07 */
657#define VORC       VX4(1348)      /* v2.07 */
658
659#define VSPLTB     VX4(524)
660#define VSPLTH     VX4(588)
661#define VSPLTW     VX4(652)
662#define VSPLTISB   VX4(780)
663#define VSPLTISH   VX4(844)
664#define VSPLTISW   VX4(908)
665
666#define VSLDOI     VX4(44)
667
668#define XXPERMDI   (OPCD(60) | (10 << 3) | 7)  /* v2.06, force ax=bx=tx=1 */
669#define XXSEL      (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
670#define XXSPLTIB   (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
671
672#define MFVSRD     (XO31(51) | 1)   /* v2.07, force sx=1 */
673#define MFVSRWZ    (XO31(115) | 1)  /* v2.07, force sx=1 */
674#define MTVSRD     (XO31(179) | 1)  /* v2.07, force tx=1 */
675#define MTVSRWZ    (XO31(243) | 1)  /* v2.07, force tx=1 */
676#define MTVSRDD    (XO31(435) | 1)  /* v3.00, force tx=1 */
677#define MTVSRWS    (XO31(403) | 1)  /* v3.00, force tx=1 */
678
679#define RT(r) ((r)<<21)
680#define RS(r) ((r)<<21)
681#define RA(r) ((r)<<16)
682#define RB(r) ((r)<<11)
683#define TO(t) ((t)<<21)
684#define SH(s) ((s)<<11)
685#define MB(b) ((b)<<6)
686#define ME(e) ((e)<<1)
687#define BO(o) ((o)<<21)
688#define MB64(b) ((b)<<5)
689#define FXM(b) (1 << (19 - (b)))
690
691#define VRT(r)  (((r) & 31) << 21)
692#define VRA(r)  (((r) & 31) << 16)
693#define VRB(r)  (((r) & 31) << 11)
694#define VRC(r)  (((r) & 31) <<  6)
695
696#define LK    1
697
698#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
699#define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
700#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
701#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
702
703#define BF(n)    ((n)<<23)
704#define BI(n, c) (((c)+((n)*4))<<16)
705#define BT(n, c) (((c)+((n)*4))<<21)
706#define BA(n, c) (((c)+((n)*4))<<16)
707#define BB(n, c) (((c)+((n)*4))<<11)
708#define BC_(n, c) (((c)+((n)*4))<<6)
709
710#define BO_COND_TRUE  BO(12)
711#define BO_COND_FALSE BO( 4)
712#define BO_ALWAYS     BO(20)
713
714enum {
715    CR_LT,
716    CR_GT,
717    CR_EQ,
718    CR_SO
719};
720
721static const uint32_t tcg_to_bc[16] = {
722    [TCG_COND_EQ]  = BC | BI(0, CR_EQ) | BO_COND_TRUE,
723    [TCG_COND_NE]  = BC | BI(0, CR_EQ) | BO_COND_FALSE,
724    [TCG_COND_TSTEQ]  = BC | BI(0, CR_EQ) | BO_COND_TRUE,
725    [TCG_COND_TSTNE]  = BC | BI(0, CR_EQ) | BO_COND_FALSE,
726    [TCG_COND_LT]  = BC | BI(0, CR_LT) | BO_COND_TRUE,
727    [TCG_COND_GE]  = BC | BI(0, CR_LT) | BO_COND_FALSE,
728    [TCG_COND_LE]  = BC | BI(0, CR_GT) | BO_COND_FALSE,
729    [TCG_COND_GT]  = BC | BI(0, CR_GT) | BO_COND_TRUE,
730    [TCG_COND_LTU] = BC | BI(0, CR_LT) | BO_COND_TRUE,
731    [TCG_COND_GEU] = BC | BI(0, CR_LT) | BO_COND_FALSE,
732    [TCG_COND_LEU] = BC | BI(0, CR_GT) | BO_COND_FALSE,
733    [TCG_COND_GTU] = BC | BI(0, CR_GT) | BO_COND_TRUE,
734};
735
736/* The low bit here is set if the RA and RB fields must be inverted.  */
737static const uint32_t tcg_to_isel[16] = {
738    [TCG_COND_EQ]  = ISEL | BC_(0, CR_EQ),
739    [TCG_COND_NE]  = ISEL | BC_(0, CR_EQ) | 1,
740    [TCG_COND_TSTEQ] = ISEL | BC_(0, CR_EQ),
741    [TCG_COND_TSTNE] = ISEL | BC_(0, CR_EQ) | 1,
742    [TCG_COND_LT]  = ISEL | BC_(0, CR_LT),
743    [TCG_COND_GE]  = ISEL | BC_(0, CR_LT) | 1,
744    [TCG_COND_LE]  = ISEL | BC_(0, CR_GT) | 1,
745    [TCG_COND_GT]  = ISEL | BC_(0, CR_GT),
746    [TCG_COND_LTU] = ISEL | BC_(0, CR_LT),
747    [TCG_COND_GEU] = ISEL | BC_(0, CR_LT) | 1,
748    [TCG_COND_LEU] = ISEL | BC_(0, CR_GT) | 1,
749    [TCG_COND_GTU] = ISEL | BC_(0, CR_GT),
750};
751
752static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
753                        intptr_t value, intptr_t addend)
754{
755    const tcg_insn_unit *target;
756    int16_t lo;
757    int32_t hi;
758
759    value += addend;
760    target = (const tcg_insn_unit *)value;
761
762    switch (type) {
763    case R_PPC_REL14:
764        return reloc_pc14(code_ptr, target);
765    case R_PPC_REL24:
766        return reloc_pc24(code_ptr, target);
767    case R_PPC64_PCREL34:
768        return reloc_pc34(code_ptr, target);
769    case R_PPC_ADDR16:
770        /*
771         * We are (slightly) abusing this relocation type.  In particular,
772         * assert that the low 2 bits are zero, and do not modify them.
773         * That way we can use this with LD et al that have opcode bits
774         * in the low 2 bits of the insn.
775         */
776        if ((value & 3) || value != (int16_t)value) {
777            return false;
778        }
779        *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
780        break;
781    case R_PPC_ADDR32:
782        /*
783         * We are abusing this relocation type.  Again, this points to
784         * a pair of insns, lis + load.  This is an absolute address
785         * relocation for PPC32 so the lis cannot be removed.
786         */
787        lo = value;
788        hi = value - lo;
789        if (hi + lo != value) {
790            return false;
791        }
792        code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
793        code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
794        break;
795    default:
796        g_assert_not_reached();
797    }
798    return true;
799}
800
801/* Ensure that the prefixed instruction does not cross a 64-byte boundary. */
802static bool tcg_out_need_prefix_align(TCGContext *s)
803{
804    return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c;
805}
806
807static void tcg_out_prefix_align(TCGContext *s)
808{
809    if (tcg_out_need_prefix_align(s)) {
810        tcg_out32(s, NOP);
811    }
812}
813
814static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target)
815{
816    return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0);
817}
818
819/* Output Type 00 Prefix - 8-Byte Load/Store Form (8LS:D) */
820static void tcg_out_8ls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
821                          unsigned ra, tcg_target_long imm, bool r)
822{
823    tcg_insn_unit p, i;
824
825    p = OPCD(1) | (r << 20) | ((imm >> 16) & 0x3ffff);
826    i = opc | TAI(rt, ra, imm);
827
828    tcg_out_prefix_align(s);
829    tcg_out32(s, p);
830    tcg_out32(s, i);
831}
832
833/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */
834static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
835                          unsigned ra, tcg_target_long imm, bool r)
836{
837    tcg_insn_unit p, i;
838
839    p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff);
840    i = opc | TAI(rt, ra, imm);
841
842    tcg_out_prefix_align(s);
843    tcg_out32(s, p);
844    tcg_out32(s, i);
845}
846
847static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
848                             TCGReg base, tcg_target_long offset);
849
850static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
851{
852    if (ret == arg) {
853        return true;
854    }
855    switch (type) {
856    case TCG_TYPE_I64:
857        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
858        /* fallthru */
859    case TCG_TYPE_I32:
860        if (ret < TCG_REG_V0) {
861            if (arg < TCG_REG_V0) {
862                tcg_out32(s, OR | SAB(arg, ret, arg));
863                break;
864            } else if (have_isa_2_07) {
865                tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
866                          | VRT(arg) | RA(ret));
867                break;
868            } else {
869                /* Altivec does not support vector->integer moves.  */
870                return false;
871            }
872        } else if (arg < TCG_REG_V0) {
873            if (have_isa_2_07) {
874                tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
875                          | VRT(ret) | RA(arg));
876                break;
877            } else {
878                /* Altivec does not support integer->vector moves.  */
879                return false;
880            }
881        }
882        /* fallthru */
883    case TCG_TYPE_V64:
884    case TCG_TYPE_V128:
885        tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
886        tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
887        break;
888    default:
889        g_assert_not_reached();
890    }
891    return true;
892}
893
894static void tcg_out_rld_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs,
895                           int sh, int mb, bool rc)
896{
897    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
898    sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
899    mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
900    tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb | rc);
901}
902
903static void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
904                        int sh, int mb)
905{
906    tcg_out_rld_rc(s, op, ra, rs, sh, mb, false);
907}
908
909static void tcg_out_rlw_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs,
910                           int sh, int mb, int me, bool rc)
911{
912    tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me) | rc);
913}
914
915static void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
916                        int sh, int mb, int me)
917{
918    tcg_out_rlw_rc(s, op, ra, rs, sh, mb, me, false);
919}
920
921static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
922{
923    tcg_out32(s, EXTSB | RA(dst) | RS(src));
924}
925
926static void tcg_out_ext8u(TCGContext *s, TCGReg dst, TCGReg src)
927{
928    tcg_out32(s, ANDI | SAI(src, dst, 0xff));
929}
930
931static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
932{
933    tcg_out32(s, EXTSH | RA(dst) | RS(src));
934}
935
936static void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src)
937{
938    tcg_out32(s, ANDI | SAI(src, dst, 0xffff));
939}
940
941static void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src)
942{
943    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
944    tcg_out32(s, EXTSW | RA(dst) | RS(src));
945}
946
947static void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
948{
949    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
950    tcg_out_rld(s, RLDICL, dst, src, 0, 32);
951}
952
953static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dst, TCGReg src)
954{
955    tcg_out_ext32s(s, dst, src);
956}
957
958static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dst, TCGReg src)
959{
960    tcg_out_ext32u(s, dst, src);
961}
962
963static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
964{
965    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
966    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
967}
968
969static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
970{
971    tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
972}
973
974static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
975{
976    tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
977}
978
979static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c)
980{
981    /* Limit immediate shift count lest we create an illegal insn.  */
982    tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31));
983}
984
985static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
986{
987    tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
988}
989
990static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
991{
992    tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
993}
994
995static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
996{
997    tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
998}
999
1000static void tcg_out_addpcis(TCGContext *s, TCGReg dst, intptr_t imm)
1001{
1002    uint32_t d0, d1, d2;
1003
1004    tcg_debug_assert((imm & 0xffff) == 0);
1005    tcg_debug_assert(imm == (int32_t)imm);
1006
1007    d2 = extract32(imm, 16, 1);
1008    d1 = extract32(imm, 17, 5);
1009    d0 = extract32(imm, 22, 10);
1010    tcg_out32(s, ADDPCIS | RT(dst) | (d1 << 16) | (d0 << 6) | d2);
1011}
1012
1013static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags)
1014{
1015    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
1016
1017    if (have_isa_3_10) {
1018        tcg_out32(s, BRH | RA(dst) | RS(src));
1019        if (flags & TCG_BSWAP_OS) {
1020            tcg_out_ext16s(s, TCG_TYPE_REG, dst, dst);
1021        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
1022            tcg_out_ext16u(s, dst, dst);
1023        }
1024        return;
1025    }
1026
1027    /*
1028     * In the following,
1029     *   dep(a, b, m) -> (a & ~m) | (b & m)
1030     *
1031     * Begin with:                              src = xxxxabcd
1032     */
1033    /* tmp = rol32(src, 24) & 0x000000ff            = 0000000c */
1034    tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31);
1035    /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00)    = 000000dc */
1036    tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);
1037
1038    if (flags & TCG_BSWAP_OS) {
1039        tcg_out_ext16s(s, TCG_TYPE_REG, dst, tmp);
1040    } else {
1041        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
1042    }
1043}
1044
1045static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags)
1046{
1047    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
1048
1049    if (have_isa_3_10) {
1050        tcg_out32(s, BRW | RA(dst) | RS(src));
1051        if (flags & TCG_BSWAP_OS) {
1052            tcg_out_ext32s(s, dst, dst);
1053        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
1054            tcg_out_ext32u(s, dst, dst);
1055        }
1056        return;
1057    }
1058
1059    /*
1060     * Stolen from gcc's builtin_bswap32.
1061     * In the following,
1062     *   dep(a, b, m) -> (a & ~m) | (b & m)
1063     *
1064     * Begin with:                              src = xxxxabcd
1065     */
1066    /* tmp = rol32(src, 8) & 0xffffffff             = 0000bcda */
1067    tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31);
1068    /* tmp = dep(tmp, rol32(src, 24), 0xff000000)   = 0000dcda */
1069    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7);
1070    /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00)   = 0000dcba */
1071    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23);
1072
1073    if (flags & TCG_BSWAP_OS) {
1074        tcg_out_ext32s(s, dst, tmp);
1075    } else {
1076        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
1077    }
1078}
1079
1080static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src)
1081{
1082    TCGReg t0 = dst == src ? TCG_REG_R0 : dst;
1083    TCGReg t1 = dst == src ? dst : TCG_REG_R0;
1084
1085    if (have_isa_3_10) {
1086        tcg_out32(s, BRD | RA(dst) | RS(src));
1087        return;
1088    }
1089
1090    /*
1091     * In the following,
1092     *   dep(a, b, m) -> (a & ~m) | (b & m)
1093     *
1094     * Begin with:                              src = abcdefgh
1095     */
1096    /* t0 = rol32(src, 8) & 0xffffffff              = 0000fghe */
1097    tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31);
1098    /* t0 = dep(t0, rol32(src, 24), 0xff000000)     = 0000hghe */
1099    tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7);
1100    /* t0 = dep(t0, rol32(src, 24), 0x0000ff00)     = 0000hgfe */
1101    tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23);
1102
1103    /* t0 = rol64(t0, 32)                           = hgfe0000 */
1104    tcg_out_rld(s, RLDICL, t0, t0, 32, 0);
1105    /* t1 = rol64(src, 32)                          = efghabcd */
1106    tcg_out_rld(s, RLDICL, t1, src, 32, 0);
1107
1108    /* t0 = dep(t0, rol32(t1, 24), 0xffffffff)      = hgfebcda */
1109    tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31);
1110    /* t0 = dep(t0, rol32(t1, 24), 0xff000000)      = hgfedcda */
1111    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7);
1112    /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00)      = hgfedcba */
1113    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23);
1114
1115    tcg_out_mov(s, TCG_TYPE_REG, dst, t0);
1116}
1117
1118/* Emit a move into ret of arg, if it can be done in one insn.  */
1119static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
1120{
1121    if (arg == (int16_t)arg) {
1122        tcg_out32(s, ADDI | TAI(ret, 0, arg));
1123        return true;
1124    }
1125    if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
1126        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
1127        return true;
1128    }
1129    return false;
1130}
1131
1132static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
1133                             tcg_target_long arg, bool in_prologue)
1134{
1135    intptr_t tb_diff;
1136    tcg_target_long tmp;
1137    int shift;
1138
1139    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1140
1141    if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1142        arg = (int32_t)arg;
1143    }
1144
1145    /* Load 16-bit immediates with one insn.  */
1146    if (tcg_out_movi_one(s, ret, arg)) {
1147        return;
1148    }
1149
1150    /* Load addresses within the TB with one insn.  */
1151    tb_diff = ppc_tbrel_diff(s, (void *)arg);
1152    if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
1153        tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
1154        return;
1155    }
1156
1157    /*
1158     * Load values up to 34 bits, and pc-relative addresses,
1159     * with one prefixed insn.
1160     */
1161    if (have_isa_3_10) {
1162        if (arg == sextract64(arg, 0, 34)) {
1163            /* pli ret,value = paddi ret,0,value,0 */
1164            tcg_out_mls_d(s, ADDI, ret, 0, arg, 0);
1165            return;
1166        }
1167
1168        tmp = tcg_pcrel_diff_for_prefix(s, (void *)arg);
1169        if (tmp == sextract64(tmp, 0, 34)) {
1170            /* pla ret,value = paddi ret,0,value,1 */
1171            tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1);
1172            return;
1173        }
1174    }
1175
1176    /* Load 32-bit immediates with two insns.  Note that we've already
1177       eliminated bare ADDIS, so we know both insns are required.  */
1178    if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
1179        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
1180        tcg_out32(s, ORI | SAI(ret, ret, arg));
1181        return;
1182    }
1183    if (arg == (uint32_t)arg && !(arg & 0x8000)) {
1184        tcg_out32(s, ADDI | TAI(ret, 0, arg));
1185        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1186        return;
1187    }
1188
1189    /* Load masked 16-bit value.  */
1190    if (arg > 0 && (arg & 0x8000)) {
1191        tmp = arg | 0x7fff;
1192        if ((tmp & (tmp + 1)) == 0) {
1193            int mb = clz64(tmp + 1) + 1;
1194            tcg_out32(s, ADDI | TAI(ret, 0, arg));
1195            tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
1196            return;
1197        }
1198    }
1199
1200    /* Load common masks with 2 insns.  */
1201    shift = ctz64(arg);
1202    tmp = arg >> shift;
1203    if (tmp == (int16_t)tmp) {
1204        tcg_out32(s, ADDI | TAI(ret, 0, tmp));
1205        tcg_out_shli64(s, ret, ret, shift);
1206        return;
1207    }
1208    shift = clz64(arg);
1209    if (tcg_out_movi_one(s, ret, arg << shift)) {
1210        tcg_out_shri64(s, ret, ret, shift);
1211        return;
1212    }
1213
1214    /* Load addresses within 2GB with 2 insns. */
1215    if (have_isa_3_00) {
1216        intptr_t hi = tcg_pcrel_diff(s, (void *)arg) - 4;
1217        int16_t lo = hi;
1218
1219        hi -= lo;
1220        if (hi == (int32_t)hi) {
1221            tcg_out_addpcis(s, TCG_REG_TMP2, hi);
1222            tcg_out32(s, ADDI | TAI(ret, TCG_REG_TMP2, lo));
1223            return;
1224        }
1225    }
1226
1227    /* Load addresses within 2GB of TB with 2 (or rarely 3) insns.  */
1228    if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
1229        tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
1230        return;
1231    }
1232
1233    /* Use the constant pool, if possible.  */
1234    if (!in_prologue && USE_REG_TB) {
1235        new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
1236                       ppc_tbrel_diff(s, NULL));
1237        tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
1238        return;
1239    }
1240    if (have_isa_3_10) {
1241        tcg_out_8ls_d(s, PLD, ret, 0, 0, 1);
1242        new_pool_label(s, arg, R_PPC64_PCREL34, s->code_ptr - 2, 0);
1243        return;
1244    }
1245    if (have_isa_3_00) {
1246        tcg_out_addpcis(s, TCG_REG_TMP2, 0);
1247        new_pool_label(s, arg, R_PPC_REL14, s->code_ptr, 0);
1248        tcg_out32(s, LD | TAI(ret, TCG_REG_TMP2, 0));
1249        return;
1250    }
1251
1252    tmp = arg >> 31 >> 1;
1253    tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
1254    if (tmp) {
1255        tcg_out_shli64(s, ret, ret, 32);
1256    }
1257    if (arg & 0xffff0000) {
1258        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1259    }
1260    if (arg & 0xffff) {
1261        tcg_out32(s, ORI | SAI(ret, ret, arg));
1262    }
1263}
1264
1265static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
1266                             TCGReg ret, int64_t val)
1267{
1268    uint32_t load_insn;
1269    int rel, low;
1270    intptr_t add;
1271
1272    switch (vece) {
1273    case MO_8:
1274        low = (int8_t)val;
1275        if (low >= -16 && low < 16) {
1276            tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
1277            return;
1278        }
1279        if (have_isa_3_00) {
1280            tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
1281            return;
1282        }
1283        break;
1284
1285    case MO_16:
1286        low = (int16_t)val;
1287        if (low >= -16 && low < 16) {
1288            tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
1289            return;
1290        }
1291        break;
1292
1293    case MO_32:
1294        low = (int32_t)val;
1295        if (low >= -16 && low < 16) {
1296            tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
1297            return;
1298        }
1299        break;
1300    }
1301
1302    /*
1303     * Otherwise we must load the value from the constant pool.
1304     */
1305    if (USE_REG_TB) {
1306        rel = R_PPC_ADDR16;
1307        add = ppc_tbrel_diff(s, NULL);
1308    } else if (have_isa_3_10) {
1309        if (type == TCG_TYPE_V64) {
1310            tcg_out_8ls_d(s, PLXSD, ret & 31, 0, 0, 1);
1311            new_pool_label(s, val, R_PPC64_PCREL34, s->code_ptr - 2, 0);
1312        } else {
1313            tcg_out_8ls_d(s, PLXV, ret & 31, 0, 0, 1);
1314            new_pool_l2(s, R_PPC64_PCREL34, s->code_ptr - 2, 0, val, val);
1315        }
1316        return;
1317    } else if (have_isa_3_00) {
1318        tcg_out_addpcis(s, TCG_REG_TMP1, 0);
1319        rel = R_PPC_REL14;
1320        add = 0;
1321    } else {
1322        rel = R_PPC_ADDR32;
1323        add = 0;
1324    }
1325
1326    if (have_vsx) {
1327        load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
1328        load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
1329        if (TCG_TARGET_REG_BITS == 64) {
1330            new_pool_label(s, val, rel, s->code_ptr, add);
1331        } else {
1332            new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val);
1333        }
1334    } else {
1335        load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
1336        if (TCG_TARGET_REG_BITS == 64) {
1337            new_pool_l2(s, rel, s->code_ptr, add, val, val);
1338        } else {
1339            new_pool_l4(s, rel, s->code_ptr, add,
1340                        val >> 32, val, val >> 32, val);
1341        }
1342    }
1343
1344    if (USE_REG_TB) {
1345        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
1346        load_insn |= RA(TCG_REG_TB);
1347    } else if (have_isa_3_00) {
1348        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1349    } else {
1350        tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
1351        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1352    }
1353    tcg_out32(s, load_insn);
1354}
1355
1356static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
1357                         tcg_target_long arg)
1358{
1359    switch (type) {
1360    case TCG_TYPE_I32:
1361    case TCG_TYPE_I64:
1362        tcg_debug_assert(ret < TCG_REG_V0);
1363        tcg_out_movi_int(s, type, ret, arg, false);
1364        break;
1365
1366    default:
1367        g_assert_not_reached();
1368    }
1369}
1370
1371static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1372{
1373    return false;
1374}
1375
1376static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1377                             tcg_target_long imm)
1378{
1379    /* This function is only used for passing structs by reference. */
1380    g_assert_not_reached();
1381}
1382
1383static bool mask_operand(uint32_t c, int *mb, int *me)
1384{
1385    uint32_t lsb, test;
1386
1387    /* Accept a bit pattern like:
1388           0....01....1
1389           1....10....0
1390           0..01..10..0
1391       Keep track of the transitions.  */
1392    if (c == 0 || c == -1) {
1393        return false;
1394    }
1395    test = c;
1396    lsb = test & -test;
1397    test += lsb;
1398    if (test & (test - 1)) {
1399        return false;
1400    }
1401
1402    *me = clz32(lsb);
1403    *mb = test ? clz32(test & -test) + 1 : 0;
1404    return true;
1405}
1406
1407static bool mask64_operand(uint64_t c, int *mb, int *me)
1408{
1409    uint64_t lsb;
1410
1411    if (c == 0) {
1412        return false;
1413    }
1414
1415    lsb = c & -c;
1416    /* Accept 1..10..0.  */
1417    if (c == -lsb) {
1418        *mb = 0;
1419        *me = clz64(lsb);
1420        return true;
1421    }
1422    /* Accept 0..01..1.  */
1423    if (lsb == 1 && (c & (c + 1)) == 0) {
1424        *mb = clz64(c + 1) + 1;
1425        *me = 63;
1426        return true;
1427    }
1428    return false;
1429}
1430
1431static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1432{
1433    int mb, me;
1434
1435    if (mask_operand(c, &mb, &me)) {
1436        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
1437    } else if ((c & 0xffff) == c) {
1438        tcg_out32(s, ANDI | SAI(src, dst, c));
1439        return;
1440    } else if ((c & 0xffff0000) == c) {
1441        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1442        return;
1443    } else {
1444        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
1445        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1446    }
1447}
1448
1449static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
1450{
1451    int mb, me;
1452
1453    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1454    if (mask64_operand(c, &mb, &me)) {
1455        if (mb == 0) {
1456            tcg_out_rld(s, RLDICR, dst, src, 0, me);
1457        } else {
1458            tcg_out_rld(s, RLDICL, dst, src, 0, mb);
1459        }
1460    } else if ((c & 0xffff) == c) {
1461        tcg_out32(s, ANDI | SAI(src, dst, c));
1462        return;
1463    } else if ((c & 0xffff0000) == c) {
1464        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1465        return;
1466    } else {
1467        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
1468        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1469    }
1470}
1471
1472static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
1473                           int op_lo, int op_hi)
1474{
1475    if (c >> 16) {
1476        tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
1477        src = dst;
1478    }
1479    if (c & 0xffff) {
1480        tcg_out32(s, op_lo | SAI(src, dst, c));
1481        src = dst;
1482    }
1483}
1484
1485static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1486{
1487    tcg_out_zori32(s, dst, src, c, ORI, ORIS);
1488}
1489
1490static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1491{
1492    tcg_out_zori32(s, dst, src, c, XORI, XORIS);
1493}
1494
1495static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target)
1496{
1497    ptrdiff_t disp = tcg_pcrel_diff(s, target);
1498    if (in_range_b(disp)) {
1499        tcg_out32(s, B | (disp & 0x3fffffc) | mask);
1500    } else {
1501        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
1502        tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
1503        tcg_out32(s, BCCTR | BO_ALWAYS | mask);
1504    }
1505}
1506
1507static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
1508                             TCGReg base, tcg_target_long offset)
1509{
1510    tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
1511    bool is_int_store = false;
1512    TCGReg rs = TCG_REG_TMP1;
1513
1514    switch (opi) {
1515    case LD: case LWA:
1516        align = 3;
1517        /* FALLTHRU */
1518    default:
1519        if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
1520            rs = rt;
1521            break;
1522        }
1523        break;
1524    case LXSD:
1525    case STXSD:
1526        align = 3;
1527        break;
1528    case LXV:
1529    case STXV:
1530        align = 15;
1531        break;
1532    case STD:
1533        align = 3;
1534        /* FALLTHRU */
1535    case STB: case STH: case STW:
1536        is_int_store = true;
1537        break;
1538    }
1539
1540    /* For unaligned or large offsets, use the prefixed form. */
1541    if (have_isa_3_10
1542        && (offset != (int16_t)offset || (offset & align))
1543        && offset == sextract64(offset, 0, 34)) {
1544        /*
1545         * Note that the MLS:D insns retain their un-prefixed opcode,
1546         * while the 8LS:D insns use a different opcode space.
1547         */
1548        switch (opi) {
1549        case LBZ:
1550        case LHZ:
1551        case LHA:
1552        case LWZ:
1553        case STB:
1554        case STH:
1555        case STW:
1556        case ADDI:
1557            tcg_out_mls_d(s, opi, rt, base, offset, 0);
1558            return;
1559        case LWA:
1560            tcg_out_8ls_d(s, PLWA, rt, base, offset, 0);
1561            return;
1562        case LD:
1563            tcg_out_8ls_d(s, PLD, rt, base, offset, 0);
1564            return;
1565        case STD:
1566            tcg_out_8ls_d(s, PSTD, rt, base, offset, 0);
1567            return;
1568        case LXSD:
1569            tcg_out_8ls_d(s, PLXSD, rt & 31, base, offset, 0);
1570            return;
1571        case STXSD:
1572            tcg_out_8ls_d(s, PSTXSD, rt & 31, base, offset, 0);
1573            return;
1574        case LXV:
1575            tcg_out_8ls_d(s, PLXV, rt & 31, base, offset, 0);
1576            return;
1577        case STXV:
1578            tcg_out_8ls_d(s, PSTXV, rt & 31, base, offset, 0);
1579            return;
1580        }
1581    }
1582
1583    /* For unaligned, or very large offsets, use the indexed form.  */
1584    if (offset & align || offset != (int32_t)offset || opi == 0) {
1585        if (rs == base) {
1586            rs = TCG_REG_R0;
1587        }
1588        tcg_debug_assert(!is_int_store || rs != rt);
1589        tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
1590        tcg_out32(s, opx | TAB(rt & 31, base, rs));
1591        return;
1592    }
1593
1594    l0 = (int16_t)offset;
1595    offset = (offset - l0) >> 16;
1596    l1 = (int16_t)offset;
1597
1598    if (l1 < 0 && orig >= 0) {
1599        extra = 0x4000;
1600        l1 = (int16_t)(offset - 0x4000);
1601    }
1602    if (l1) {
1603        tcg_out32(s, ADDIS | TAI(rs, base, l1));
1604        base = rs;
1605    }
1606    if (extra) {
1607        tcg_out32(s, ADDIS | TAI(rs, base, extra));
1608        base = rs;
1609    }
1610    if (opi != ADDI || base != rt || l0 != 0) {
1611        tcg_out32(s, opi | TAI(rt & 31, base, l0));
1612    }
1613}
1614
1615static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
1616                           TCGReg va, TCGReg vb, int shb)
1617{
1618    tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
1619}
1620
1621static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1622                       TCGReg base, intptr_t offset)
1623{
1624    int shift;
1625
1626    switch (type) {
1627    case TCG_TYPE_I32:
1628        if (ret < TCG_REG_V0) {
1629            tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
1630            break;
1631        }
1632        if (have_isa_2_07 && have_vsx) {
1633            tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
1634            break;
1635        }
1636        tcg_debug_assert((offset & 3) == 0);
1637        tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
1638        shift = (offset - 4) & 0xc;
1639        if (shift) {
1640            tcg_out_vsldoi(s, ret, ret, ret, shift);
1641        }
1642        break;
1643    case TCG_TYPE_I64:
1644        if (ret < TCG_REG_V0) {
1645            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1646            tcg_out_mem_long(s, LD, LDX, ret, base, offset);
1647            break;
1648        }
1649        /* fallthru */
1650    case TCG_TYPE_V64:
1651        tcg_debug_assert(ret >= TCG_REG_V0);
1652        if (have_vsx) {
1653            tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
1654                             ret, base, offset);
1655            break;
1656        }
1657        tcg_debug_assert((offset & 7) == 0);
1658        tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
1659        if (offset & 8) {
1660            tcg_out_vsldoi(s, ret, ret, ret, 8);
1661        }
1662        break;
1663    case TCG_TYPE_V128:
1664        tcg_debug_assert(ret >= TCG_REG_V0);
1665        tcg_debug_assert((offset & 15) == 0);
1666        tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
1667                         LVX, ret, base, offset);
1668        break;
1669    default:
1670        g_assert_not_reached();
1671    }
1672}
1673
1674static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1675                              TCGReg base, intptr_t offset)
1676{
1677    int shift;
1678
1679    switch (type) {
1680    case TCG_TYPE_I32:
1681        if (arg < TCG_REG_V0) {
1682            tcg_out_mem_long(s, STW, STWX, arg, base, offset);
1683            break;
1684        }
1685        if (have_isa_2_07 && have_vsx) {
1686            tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
1687            break;
1688        }
1689        assert((offset & 3) == 0);
1690        tcg_debug_assert((offset & 3) == 0);
1691        shift = (offset - 4) & 0xc;
1692        if (shift) {
1693            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
1694            arg = TCG_VEC_TMP1;
1695        }
1696        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1697        break;
1698    case TCG_TYPE_I64:
1699        if (arg < TCG_REG_V0) {
1700            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1701            tcg_out_mem_long(s, STD, STDX, arg, base, offset);
1702            break;
1703        }
1704        /* fallthru */
1705    case TCG_TYPE_V64:
1706        tcg_debug_assert(arg >= TCG_REG_V0);
1707        if (have_vsx) {
1708            tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
1709                             STXSDX, arg, base, offset);
1710            break;
1711        }
1712        tcg_debug_assert((offset & 7) == 0);
1713        if (offset & 8) {
1714            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
1715            arg = TCG_VEC_TMP1;
1716        }
1717        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1718        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
1719        break;
1720    case TCG_TYPE_V128:
1721        tcg_debug_assert(arg >= TCG_REG_V0);
1722        tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
1723                         STVX, arg, base, offset);
1724        break;
1725    default:
1726        g_assert_not_reached();
1727    }
1728}
1729
1730static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1731                               TCGReg base, intptr_t ofs)
1732{
1733    return false;
1734}
1735
1736/*
1737 * Set dest non-zero if and only if (arg1 & arg2) is non-zero.
1738 * If RC, then also set RC0.
1739 */
1740static void tcg_out_test(TCGContext *s, TCGReg dest, TCGReg arg1, TCGArg arg2,
1741                         bool const_arg2, TCGType type, bool rc)
1742{
1743    int mb, me;
1744
1745    if (!const_arg2) {
1746        tcg_out32(s, AND | SAB(arg1, dest, arg2) | rc);
1747        return;
1748    }
1749
1750    if (type == TCG_TYPE_I32) {
1751        arg2 = (uint32_t)arg2;
1752    } else if (arg2 == (uint32_t)arg2) {
1753        type = TCG_TYPE_I32;
1754    }
1755
1756    if ((arg2 & ~0xffff) == 0) {
1757        tcg_out32(s, ANDI | SAI(arg1, dest, arg2));
1758        return;
1759    }
1760    if ((arg2 & ~0xffff0000ull) == 0) {
1761        tcg_out32(s, ANDIS | SAI(arg1, dest, arg2 >> 16));
1762        return;
1763    }
1764    if (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32) {
1765        if (mask_operand(arg2, &mb, &me)) {
1766            tcg_out_rlw_rc(s, RLWINM, dest, arg1, 0, mb, me, rc);
1767            return;
1768        }
1769    } else {
1770        int sh = clz64(arg2);
1771        if (mask64_operand(arg2 << sh, &mb, &me)) {
1772            tcg_out_rld_rc(s, RLDICR, dest, arg1, sh, me, rc);
1773            return;
1774        }
1775    }
1776    /* Constraints should satisfy this. */
1777    g_assert_not_reached();
1778}
1779
1780static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
1781                        int const_arg2, int cr, TCGType type)
1782{
1783    int imm;
1784    uint32_t op;
1785
1786    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1787
1788    /*
1789     * Simplify the comparisons below wrt CMPI.
1790     * All of the tests are 16-bit, so a 32-bit sign extend always works.
1791     */
1792    if (type == TCG_TYPE_I32) {
1793        arg2 = (int32_t)arg2;
1794    }
1795
1796    switch (cond) {
1797    case TCG_COND_EQ:
1798    case TCG_COND_NE:
1799        if (const_arg2) {
1800            if ((int16_t) arg2 == arg2) {
1801                op = CMPI;
1802                imm = 1;
1803                break;
1804            } else if ((uint16_t) arg2 == arg2) {
1805                op = CMPLI;
1806                imm = 1;
1807                break;
1808            }
1809        }
1810        op = CMPL;
1811        imm = 0;
1812        break;
1813
1814    case TCG_COND_TSTEQ:
1815    case TCG_COND_TSTNE:
1816        tcg_debug_assert(cr == 0);
1817        tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, true);
1818        return;
1819
1820    case TCG_COND_LT:
1821    case TCG_COND_GE:
1822    case TCG_COND_LE:
1823    case TCG_COND_GT:
1824        if (const_arg2) {
1825            if ((int16_t) arg2 == arg2) {
1826                op = CMPI;
1827                imm = 1;
1828                break;
1829            }
1830        }
1831        op = CMP;
1832        imm = 0;
1833        break;
1834
1835    case TCG_COND_LTU:
1836    case TCG_COND_GEU:
1837    case TCG_COND_LEU:
1838    case TCG_COND_GTU:
1839        if (const_arg2) {
1840            if ((uint16_t) arg2 == arg2) {
1841                op = CMPLI;
1842                imm = 1;
1843                break;
1844            }
1845        }
1846        op = CMPL;
1847        imm = 0;
1848        break;
1849
1850    default:
1851        g_assert_not_reached();
1852    }
1853    op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
1854
1855    if (imm) {
1856        tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
1857    } else {
1858        if (const_arg2) {
1859            tcg_out_movi(s, type, TCG_REG_R0, arg2);
1860            arg2 = TCG_REG_R0;
1861        }
1862        tcg_out32(s, op | RA(arg1) | RB(arg2));
1863    }
1864}
1865
1866static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
1867                                TCGReg dst, TCGReg src, bool neg)
1868{
1869    if (neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) {
1870        /*
1871         * X != 0 implies X + -1 generates a carry.
1872         * RT = (~X + X) + CA
1873         *    = -1 + CA
1874         *    = CA ? 0 : -1
1875         */
1876        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1877        tcg_out32(s, SUBFE | TAB(dst, src, src));
1878        return;
1879    }
1880
1881    if (type == TCG_TYPE_I32) {
1882        tcg_out32(s, CNTLZW | RS(src) | RA(dst));
1883        tcg_out_shri32(s, dst, dst, 5);
1884    } else {
1885        tcg_out32(s, CNTLZD | RS(src) | RA(dst));
1886        tcg_out_shri64(s, dst, dst, 6);
1887    }
1888    if (neg) {
1889        tcg_out32(s, NEG | RT(dst) | RA(dst));
1890    }
1891}
1892
1893static void tcg_out_setcond_ne0(TCGContext *s, TCGType type,
1894                                TCGReg dst, TCGReg src, bool neg)
1895{
1896    if (!neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) {
1897        /*
1898         * X != 0 implies X + -1 generates a carry.  Extra addition
1899         * trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.
1900         */
1901        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1902        tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
1903        return;
1904    }
1905    tcg_out_setcond_eq0(s, type, dst, src, false);
1906    if (neg) {
1907        tcg_out32(s, ADDI | TAI(dst, dst, -1));
1908    } else {
1909        tcg_out_xori32(s, dst, dst, 1);
1910    }
1911}
1912
1913static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
1914                                  bool const_arg2)
1915{
1916    if (const_arg2) {
1917        if ((uint32_t)arg2 == arg2) {
1918            tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
1919        } else {
1920            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
1921            tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
1922        }
1923    } else {
1924        tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
1925    }
1926    return TCG_REG_R0;
1927}
1928
1929static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
1930                            TCGArg arg0, TCGArg arg1, TCGArg arg2,
1931                            int const_arg2, bool neg)
1932{
1933    int sh;
1934    bool inv;
1935
1936    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1937
1938    /* Ignore high bits of a potential constant arg2.  */
1939    if (type == TCG_TYPE_I32) {
1940        arg2 = (uint32_t)arg2;
1941    }
1942
1943    /* With SETBC/SETBCR, we can always implement with 2 insns. */
1944    if (have_isa_3_10) {
1945        tcg_insn_unit bi, opc;
1946
1947        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type);
1948
1949        /* Re-use tcg_to_bc for BI and BO_COND_{TRUE,FALSE}. */
1950        bi = tcg_to_bc[cond] & (0x1f << 16);
1951        if (tcg_to_bc[cond] & BO(8)) {
1952            opc = neg ? SETNBC : SETBC;
1953        } else {
1954            opc = neg ? SETNBCR : SETBCR;
1955        }
1956        tcg_out32(s, opc | RT(arg0) | bi);
1957        return;
1958    }
1959
1960    /* Handle common and trivial cases before handling anything else.  */
1961    if (arg2 == 0) {
1962        switch (cond) {
1963        case TCG_COND_EQ:
1964            tcg_out_setcond_eq0(s, type, arg0, arg1, neg);
1965            return;
1966        case TCG_COND_NE:
1967            tcg_out_setcond_ne0(s, type, arg0, arg1, neg);
1968            return;
1969        case TCG_COND_GE:
1970            tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
1971            arg1 = arg0;
1972            /* FALLTHRU */
1973        case TCG_COND_LT:
1974            /* Extract the sign bit.  */
1975            if (type == TCG_TYPE_I32) {
1976                if (neg) {
1977                    tcg_out_sari32(s, arg0, arg1, 31);
1978                } else {
1979                    tcg_out_shri32(s, arg0, arg1, 31);
1980                }
1981            } else {
1982                if (neg) {
1983                    tcg_out_sari64(s, arg0, arg1, 63);
1984                } else {
1985                    tcg_out_shri64(s, arg0, arg1, 63);
1986                }
1987            }
1988            return;
1989        default:
1990            break;
1991        }
1992    }
1993
1994    /* If we have ISEL, we can implement everything with 3 or 4 insns.
1995       All other cases below are also at least 3 insns, so speed up the
1996       code generator by not considering them and always using ISEL.  */
1997    if (have_isel) {
1998        int isel, tab;
1999
2000        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type);
2001
2002        isel = tcg_to_isel[cond];
2003
2004        tcg_out_movi(s, type, arg0, neg ? -1 : 1);
2005        if (isel & 1) {
2006            /* arg0 = (bc ? 0 : 1) */
2007            tab = TAB(arg0, 0, arg0);
2008            isel &= ~1;
2009        } else {
2010            /* arg0 = (bc ? 1 : 0) */
2011            tcg_out_movi(s, type, TCG_REG_R0, 0);
2012            tab = TAB(arg0, arg0, TCG_REG_R0);
2013        }
2014        tcg_out32(s, isel | tab);
2015        return;
2016    }
2017
2018    inv = false;
2019    switch (cond) {
2020    case TCG_COND_EQ:
2021        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
2022        tcg_out_setcond_eq0(s, type, arg0, arg1, neg);
2023        break;
2024
2025    case TCG_COND_NE:
2026        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
2027        tcg_out_setcond_ne0(s, type, arg0, arg1, neg);
2028        break;
2029
2030    case TCG_COND_TSTEQ:
2031        tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false);
2032        tcg_out_setcond_eq0(s, type, arg0, TCG_REG_R0, neg);
2033        break;
2034
2035    case TCG_COND_TSTNE:
2036        tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false);
2037        tcg_out_setcond_ne0(s, type, arg0, TCG_REG_R0, neg);
2038        break;
2039
2040    case TCG_COND_LE:
2041    case TCG_COND_LEU:
2042        inv = true;
2043        /* fall through */
2044    case TCG_COND_GT:
2045    case TCG_COND_GTU:
2046        sh = 30; /* CR7 CR_GT */
2047        goto crtest;
2048
2049    case TCG_COND_GE:
2050    case TCG_COND_GEU:
2051        inv = true;
2052        /* fall through */
2053    case TCG_COND_LT:
2054    case TCG_COND_LTU:
2055        sh = 29; /* CR7 CR_LT */
2056        goto crtest;
2057
2058    crtest:
2059        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
2060        tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
2061        tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
2062        if (neg && inv) {
2063            tcg_out32(s, ADDI | TAI(arg0, arg0, -1));
2064        } else if (neg) {
2065            tcg_out32(s, NEG | RT(arg0) | RA(arg0));
2066        } else if (inv) {
2067            tcg_out_xori32(s, arg0, arg0, 1);
2068        }
2069        break;
2070
2071    default:
2072        g_assert_not_reached();
2073    }
2074}
2075
2076static void tcg_out_bc(TCGContext *s, TCGCond cond, int bd)
2077{
2078    tcg_out32(s, tcg_to_bc[cond] | bd);
2079}
2080
2081static void tcg_out_bc_lab(TCGContext *s, TCGCond cond, TCGLabel *l)
2082{
2083    int bd = 0;
2084    if (l->has_value) {
2085        bd = reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
2086    } else {
2087        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
2088    }
2089    tcg_out_bc(s, cond, bd);
2090}
2091
2092static void tcg_out_brcond(TCGContext *s, TCGCond cond,
2093                           TCGArg arg1, TCGArg arg2, int const_arg2,
2094                           TCGLabel *l, TCGType type)
2095{
2096    tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type);
2097    tcg_out_bc_lab(s, cond, l);
2098}
2099
2100static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
2101                            TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1,
2102                            TCGArg v2, bool const_c2)
2103{
2104    /* If for some reason both inputs are zero, don't produce bad code.  */
2105    if (v1 == 0 && v2 == 0) {
2106        tcg_out_movi(s, type, dest, 0);
2107        return;
2108    }
2109
2110    tcg_out_cmp(s, cond, c1, c2, const_c2, 0, type);
2111
2112    if (have_isel) {
2113        int isel = tcg_to_isel[cond];
2114
2115        /* Swap the V operands if the operation indicates inversion.  */
2116        if (isel & 1) {
2117            int t = v1;
2118            v1 = v2;
2119            v2 = t;
2120            isel &= ~1;
2121        }
2122        /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */
2123        if (v2 == 0) {
2124            tcg_out_movi(s, type, TCG_REG_R0, 0);
2125        }
2126        tcg_out32(s, isel | TAB(dest, v1, v2));
2127    } else {
2128        if (dest == v2) {
2129            cond = tcg_invert_cond(cond);
2130            v2 = v1;
2131        } else if (dest != v1) {
2132            if (v1 == 0) {
2133                tcg_out_movi(s, type, dest, 0);
2134            } else {
2135                tcg_out_mov(s, type, dest, v1);
2136            }
2137        }
2138        /* Branch forward over one insn */
2139        tcg_out_bc(s, cond, 8);
2140        if (v2 == 0) {
2141            tcg_out_movi(s, type, dest, 0);
2142        } else {
2143            tcg_out_mov(s, type, dest, v2);
2144        }
2145    }
2146}
2147
2148static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
2149                          TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
2150{
2151    if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
2152        tcg_out32(s, opc | RA(a0) | RS(a1));
2153    } else {
2154        tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 0, type);
2155        /* Note that the only other valid constant for a2 is 0.  */
2156        if (have_isel) {
2157            tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
2158            tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
2159        } else if (!const_a2 && a0 == a2) {
2160            tcg_out_bc(s, TCG_COND_EQ, 8);
2161            tcg_out32(s, opc | RA(a0) | RS(a1));
2162        } else {
2163            tcg_out32(s, opc | RA(a0) | RS(a1));
2164            tcg_out_bc(s, TCG_COND_NE, 8);
2165            if (const_a2) {
2166                tcg_out_movi(s, type, a0, 0);
2167            } else {
2168                tcg_out_mov(s, type, a0, a2);
2169            }
2170        }
2171    }
2172}
2173
2174static void tcg_out_cmp2(TCGContext *s, const TCGArg *args,
2175                         const int *const_args)
2176{
2177    static const struct { uint8_t bit1, bit2; } bits[] = {
2178        [TCG_COND_LT ] = { CR_LT, CR_LT },
2179        [TCG_COND_LE ] = { CR_LT, CR_GT },
2180        [TCG_COND_GT ] = { CR_GT, CR_GT },
2181        [TCG_COND_GE ] = { CR_GT, CR_LT },
2182        [TCG_COND_LTU] = { CR_LT, CR_LT },
2183        [TCG_COND_LEU] = { CR_LT, CR_GT },
2184        [TCG_COND_GTU] = { CR_GT, CR_GT },
2185        [TCG_COND_GEU] = { CR_GT, CR_LT },
2186    };
2187
2188    TCGCond cond = args[4], cond2;
2189    TCGArg al, ah, bl, bh;
2190    int blconst, bhconst;
2191    int op, bit1, bit2;
2192
2193    al = args[0];
2194    ah = args[1];
2195    bl = args[2];
2196    bh = args[3];
2197    blconst = const_args[2];
2198    bhconst = const_args[3];
2199
2200    switch (cond) {
2201    case TCG_COND_EQ:
2202        op = CRAND;
2203        goto do_equality;
2204    case TCG_COND_NE:
2205        op = CRNAND;
2206    do_equality:
2207        tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
2208        tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
2209        tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
2210        break;
2211
2212    case TCG_COND_TSTEQ:
2213    case TCG_COND_TSTNE:
2214        if (blconst) {
2215            tcg_out_andi32(s, TCG_REG_R0, al, bl);
2216        } else {
2217            tcg_out32(s, AND | SAB(al, TCG_REG_R0, bl));
2218        }
2219        if (bhconst) {
2220            tcg_out_andi32(s, TCG_REG_TMP1, ah, bh);
2221        } else {
2222            tcg_out32(s, AND | SAB(ah, TCG_REG_TMP1, bh));
2223        }
2224        tcg_out32(s, OR | SAB(TCG_REG_R0, TCG_REG_R0, TCG_REG_TMP1) | 1);
2225        break;
2226
2227    case TCG_COND_LT:
2228    case TCG_COND_LE:
2229    case TCG_COND_GT:
2230    case TCG_COND_GE:
2231    case TCG_COND_LTU:
2232    case TCG_COND_LEU:
2233    case TCG_COND_GTU:
2234    case TCG_COND_GEU:
2235        bit1 = bits[cond].bit1;
2236        bit2 = bits[cond].bit2;
2237        op = (bit1 != bit2 ? CRANDC : CRAND);
2238        cond2 = tcg_unsigned_cond(cond);
2239
2240        tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
2241        tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
2242        tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
2243        tcg_out32(s, CROR | BT(0, CR_EQ) | BA(6, bit1) | BB(0, CR_EQ));
2244        break;
2245
2246    default:
2247        g_assert_not_reached();
2248    }
2249}
2250
2251static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
2252                             const int *const_args)
2253{
2254    tcg_out_cmp2(s, args + 1, const_args + 1);
2255    tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(0));
2256    tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, CR_EQ + 0*4 + 1, 31, 31);
2257}
2258
2259static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
2260                            const int *const_args)
2261{
2262    tcg_out_cmp2(s, args, const_args);
2263    tcg_out_bc_lab(s, TCG_COND_EQ, arg_label(args[5]));
2264}
2265
2266static void tcg_out_mb(TCGContext *s, TCGArg a0)
2267{
2268    uint32_t insn;
2269
2270    if (a0 & TCG_MO_ST_LD) {
2271        insn = HWSYNC;
2272    } else {
2273        insn = LWSYNC;
2274    }
2275
2276    tcg_out32(s, insn);
2277}
2278
2279static void tcg_out_call_int(TCGContext *s, int lk,
2280                             const tcg_insn_unit *target)
2281{
2282#ifdef _CALL_AIX
2283    /* Look through the descriptor.  If the branch is in range, and we
2284       don't have to spend too much effort on building the toc.  */
2285    const void *tgt = ((const void * const *)target)[0];
2286    uintptr_t toc = ((const uintptr_t *)target)[1];
2287    intptr_t diff = tcg_pcrel_diff(s, tgt);
2288
2289    if (in_range_b(diff) && toc == (uint32_t)toc) {
2290        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
2291        tcg_out_b(s, lk, tgt);
2292    } else {
2293        /* Fold the low bits of the constant into the addresses below.  */
2294        intptr_t arg = (intptr_t)target;
2295        int ofs = (int16_t)arg;
2296
2297        if (ofs + 8 < 0x8000) {
2298            arg -= ofs;
2299        } else {
2300            ofs = 0;
2301        }
2302        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
2303        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
2304        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
2305        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
2306        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
2307    }
2308#elif defined(_CALL_ELF) && _CALL_ELF == 2
2309    intptr_t diff;
2310
2311    /* In the ELFv2 ABI, we have to set up r12 to contain the destination
2312       address, which the callee uses to compute its TOC address.  */
2313    /* FIXME: when the branch is in range, we could avoid r12 load if we
2314       knew that the destination uses the same TOC, and what its local
2315       entry point offset is.  */
2316    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
2317
2318    diff = tcg_pcrel_diff(s, target);
2319    if (in_range_b(diff)) {
2320        tcg_out_b(s, lk, target);
2321    } else {
2322        tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
2323        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
2324    }
2325#else
2326    tcg_out_b(s, lk, target);
2327#endif
2328}
2329
2330static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
2331                         const TCGHelperInfo *info)
2332{
2333    tcg_out_call_int(s, LK, target);
2334}
2335
2336static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = {
2337    [MO_UB] = LBZX,
2338    [MO_UW] = LHZX,
2339    [MO_UL] = LWZX,
2340    [MO_UQ] = LDX,
2341    [MO_SW] = LHAX,
2342    [MO_SL] = LWAX,
2343    [MO_BSWAP | MO_UB] = LBZX,
2344    [MO_BSWAP | MO_UW] = LHBRX,
2345    [MO_BSWAP | MO_UL] = LWBRX,
2346    [MO_BSWAP | MO_UQ] = LDBRX,
2347};
2348
2349static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
2350    [MO_UB] = STBX,
2351    [MO_UW] = STHX,
2352    [MO_UL] = STWX,
2353    [MO_UQ] = STDX,
2354    [MO_BSWAP | MO_UB] = STBX,
2355    [MO_BSWAP | MO_UW] = STHBRX,
2356    [MO_BSWAP | MO_UL] = STWBRX,
2357    [MO_BSWAP | MO_UQ] = STDBRX,
2358};
2359
2360static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
2361{
2362    if (arg < 0) {
2363        arg = TCG_REG_TMP1;
2364    }
2365    tcg_out32(s, MFSPR | RT(arg) | LR);
2366    return arg;
2367}
2368
2369/*
2370 * For the purposes of ppc32 sorting 4 input registers into 4 argument
2371 * registers, there is an outside chance we would require 3 temps.
2372 */
2373static const TCGLdstHelperParam ldst_helper_param = {
2374    .ra_gen = ldst_ra_gen,
2375    .ntmp = 3,
2376    .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 }
2377};
2378
2379static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2380{
2381    MemOp opc = get_memop(lb->oi);
2382
2383    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2384        return false;
2385    }
2386
2387    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
2388    tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]);
2389    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
2390
2391    tcg_out_b(s, 0, lb->raddr);
2392    return true;
2393}
2394
2395static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2396{
2397    MemOp opc = get_memop(lb->oi);
2398
2399    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2400        return false;
2401    }
2402
2403    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
2404    tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]);
2405
2406    tcg_out_b(s, 0, lb->raddr);
2407    return true;
2408}
2409
2410typedef struct {
2411    TCGReg base;
2412    TCGReg index;
2413    TCGAtomAlign aa;
2414} HostAddress;
2415
2416bool tcg_target_has_memory_bswap(MemOp memop)
2417{
2418    TCGAtomAlign aa;
2419
2420    if ((memop & MO_SIZE) <= MO_64) {
2421        return true;
2422    }
2423
2424    /*
2425     * Reject 16-byte memop with 16-byte atomicity,
2426     * but do allow a pair of 64-bit operations.
2427     */
2428    aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
2429    return aa.atom <= MO_64;
2430}
2431
2432/* We expect to use a 16-bit negative offset from ENV.  */
2433#define MIN_TLB_MASK_TABLE_OFS  -32768
2434
2435/*
2436 * For system-mode, perform the TLB load and compare.
2437 * For user-mode, perform any required alignment tests.
2438 * In both cases, return a TCGLabelQemuLdst structure if the slow path
2439 * is required and fill in @h with the host address for the fast path.
2440 */
2441static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
2442                                           TCGReg addrlo, TCGReg addrhi,
2443                                           MemOpIdx oi, bool is_ld)
2444{
2445    TCGType addr_type = s->addr_type;
2446    TCGLabelQemuLdst *ldst = NULL;
2447    MemOp opc = get_memop(oi);
2448    MemOp a_bits, s_bits;
2449
2450    /*
2451     * Book II, Section 1.4, Single-Copy Atomicity, specifies:
2452     *
2453     * Before 3.0, "An access that is not atomic is performed as a set of
2454     * smaller disjoint atomic accesses. In general, the number and alignment
2455     * of these accesses are implementation-dependent."  Thus MO_ATOM_IFALIGN.
2456     *
2457     * As of 3.0, "the non-atomic access is performed as described in
2458     * the corresponding list", which matches MO_ATOM_SUBALIGN.
2459     */
2460    s_bits = opc & MO_SIZE;
2461    h->aa = atom_and_align_for_opc(s, opc,
2462                                   have_isa_3_00 ? MO_ATOM_SUBALIGN
2463                                                 : MO_ATOM_IFALIGN,
2464                                   s_bits == MO_128);
2465    a_bits = h->aa.align;
2466
2467    if (tcg_use_softmmu) {
2468        int mem_index = get_mmuidx(oi);
2469        int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
2470                            : offsetof(CPUTLBEntry, addr_write);
2471        int fast_off = tlb_mask_table_ofs(s, mem_index);
2472        int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
2473        int table_off = fast_off + offsetof(CPUTLBDescFast, table);
2474
2475        ldst = new_ldst_label(s);
2476        ldst->is_ld = is_ld;
2477        ldst->oi = oi;
2478        ldst->addrlo_reg = addrlo;
2479        ldst->addrhi_reg = addrhi;
2480
2481        /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
2482        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off);
2483        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off);
2484
2485        /* Extract the page index, shifted into place for tlb index.  */
2486        if (TCG_TARGET_REG_BITS == 32) {
2487            tcg_out_shri32(s, TCG_REG_R0, addrlo,
2488                           s->page_bits - CPU_TLB_ENTRY_BITS);
2489        } else {
2490            tcg_out_shri64(s, TCG_REG_R0, addrlo,
2491                           s->page_bits - CPU_TLB_ENTRY_BITS);
2492        }
2493        tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
2494
2495        /*
2496         * Load the (low part) TLB comparator into TMP2.
2497         * For 64-bit host, always load the entire 64-bit slot for simplicity.
2498         * We will ignore the high bits with tcg_out_cmp(..., addr_type).
2499         */
2500        if (TCG_TARGET_REG_BITS == 64) {
2501            if (cmp_off == 0) {
2502                tcg_out32(s, LDUX | TAB(TCG_REG_TMP2,
2503                                        TCG_REG_TMP1, TCG_REG_TMP2));
2504            } else {
2505                tcg_out32(s, ADD | TAB(TCG_REG_TMP1,
2506                                       TCG_REG_TMP1, TCG_REG_TMP2));
2507                tcg_out_ld(s, TCG_TYPE_I64, TCG_REG_TMP2,
2508                           TCG_REG_TMP1, cmp_off);
2509            }
2510        } else if (cmp_off == 0 && !HOST_BIG_ENDIAN) {
2511            tcg_out32(s, LWZUX | TAB(TCG_REG_TMP2,
2512                                     TCG_REG_TMP1, TCG_REG_TMP2));
2513        } else {
2514            tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
2515            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
2516                       cmp_off + 4 * HOST_BIG_ENDIAN);
2517        }
2518
2519        /*
2520         * Load the TLB addend for use on the fast path.
2521         * Do this asap to minimize any load use delay.
2522         */
2523        if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
2524            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
2525                       offsetof(CPUTLBEntry, addend));
2526        }
2527
2528        /* Clear the non-page, non-alignment bits from the address in R0. */
2529        if (TCG_TARGET_REG_BITS == 32) {
2530            /*
2531             * We don't support unaligned accesses on 32-bits.
2532             * Preserve the bottom bits and thus trigger a comparison
2533             * failure on unaligned accesses.
2534             */
2535            if (a_bits < s_bits) {
2536                a_bits = s_bits;
2537            }
2538            tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
2539                        (32 - a_bits) & 31, 31 - s->page_bits);
2540        } else {
2541            TCGReg t = addrlo;
2542
2543            /*
2544             * If the access is unaligned, we need to make sure we fail if we
2545             * cross a page boundary.  The trick is to add the access size-1
2546             * to the address before masking the low bits.  That will make the
2547             * address overflow to the next page if we cross a page boundary,
2548             * which will then force a mismatch of the TLB compare.
2549             */
2550            if (a_bits < s_bits) {
2551                unsigned a_mask = (1 << a_bits) - 1;
2552                unsigned s_mask = (1 << s_bits) - 1;
2553                tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
2554                t = TCG_REG_R0;
2555            }
2556
2557            /* Mask the address for the requested alignment.  */
2558            if (addr_type == TCG_TYPE_I32) {
2559                tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
2560                            (32 - a_bits) & 31, 31 - s->page_bits);
2561            } else if (a_bits == 0) {
2562                tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits);
2563            } else {
2564                tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
2565                            64 - s->page_bits, s->page_bits - a_bits);
2566                tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0);
2567            }
2568        }
2569
2570        if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) {
2571            /* Low part comparison into cr7. */
2572            tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
2573                        0, 7, TCG_TYPE_I32);
2574
2575            /* Load the high part TLB comparator into TMP2.  */
2576            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
2577                       cmp_off + 4 * !HOST_BIG_ENDIAN);
2578
2579            /* Load addend, deferred for this case. */
2580            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
2581                       offsetof(CPUTLBEntry, addend));
2582
2583            /* High part comparison into cr6. */
2584            tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2,
2585                        0, 6, TCG_TYPE_I32);
2586
2587            /* Combine comparisons into cr0. */
2588            tcg_out32(s, CRAND | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
2589        } else {
2590            /* Full comparison into cr0. */
2591            tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
2592                        0, 0, addr_type);
2593        }
2594
2595        /* Load a pointer into the current opcode w/conditional branch-link. */
2596        ldst->label_ptr[0] = s->code_ptr;
2597        tcg_out_bc(s, TCG_COND_NE, LK);
2598
2599        h->base = TCG_REG_TMP1;
2600    } else {
2601        if (a_bits) {
2602            ldst = new_ldst_label(s);
2603            ldst->is_ld = is_ld;
2604            ldst->oi = oi;
2605            ldst->addrlo_reg = addrlo;
2606            ldst->addrhi_reg = addrhi;
2607
2608            /* We are expecting a_bits to max out at 7, much lower than ANDI. */
2609            tcg_debug_assert(a_bits < 16);
2610            tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1));
2611
2612            ldst->label_ptr[0] = s->code_ptr;
2613            tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
2614        }
2615
2616        h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
2617    }
2618
2619    if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
2620        /* Zero-extend the guest address for use in the host address. */
2621        tcg_out_ext32u(s, TCG_REG_R0, addrlo);
2622        h->index = TCG_REG_R0;
2623    } else {
2624        h->index = addrlo;
2625    }
2626
2627    return ldst;
2628}
2629
2630static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
2631                            TCGReg addrlo, TCGReg addrhi,
2632                            MemOpIdx oi, TCGType data_type)
2633{
2634    MemOp opc = get_memop(oi);
2635    TCGLabelQemuLdst *ldst;
2636    HostAddress h;
2637
2638    ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
2639
2640    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2641        if (opc & MO_BSWAP) {
2642            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2643            tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
2644            tcg_out32(s, LWBRX | TAB(datahi, h.base, TCG_REG_R0));
2645        } else if (h.base != 0) {
2646            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2647            tcg_out32(s, LWZX | TAB(datahi, h.base, h.index));
2648            tcg_out32(s, LWZX | TAB(datalo, h.base, TCG_REG_R0));
2649        } else if (h.index == datahi) {
2650            tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
2651            tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
2652        } else {
2653            tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
2654            tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
2655        }
2656    } else {
2657        uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
2658        if (!have_isa_2_06 && insn == LDBRX) {
2659            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2660            tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
2661            tcg_out32(s, LWBRX | TAB(TCG_REG_R0, h.base, TCG_REG_R0));
2662            tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
2663        } else if (insn) {
2664            tcg_out32(s, insn | TAB(datalo, h.base, h.index));
2665        } else {
2666            insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
2667            tcg_out32(s, insn | TAB(datalo, h.base, h.index));
2668            tcg_out_movext(s, TCG_TYPE_REG, datalo,
2669                           TCG_TYPE_REG, opc & MO_SSIZE, datalo);
2670        }
2671    }
2672
2673    if (ldst) {
2674        ldst->type = data_type;
2675        ldst->datalo_reg = datalo;
2676        ldst->datahi_reg = datahi;
2677        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2678    }
2679}
2680
2681static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
2682                            TCGReg addrlo, TCGReg addrhi,
2683                            MemOpIdx oi, TCGType data_type)
2684{
2685    MemOp opc = get_memop(oi);
2686    TCGLabelQemuLdst *ldst;
2687    HostAddress h;
2688
2689    ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
2690
2691    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2692        if (opc & MO_BSWAP) {
2693            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2694            tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
2695            tcg_out32(s, STWBRX | SAB(datahi, h.base, TCG_REG_R0));
2696        } else if (h.base != 0) {
2697            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2698            tcg_out32(s, STWX | SAB(datahi, h.base, h.index));
2699            tcg_out32(s, STWX | SAB(datalo, h.base, TCG_REG_R0));
2700        } else {
2701            tcg_out32(s, STW | TAI(datahi, h.index, 0));
2702            tcg_out32(s, STW | TAI(datalo, h.index, 4));
2703        }
2704    } else {
2705        uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
2706        if (!have_isa_2_06 && insn == STDBRX) {
2707            tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
2708            tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, h.index, 4));
2709            tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
2710            tcg_out32(s, STWBRX | SAB(TCG_REG_R0, h.base, TCG_REG_TMP1));
2711        } else {
2712            tcg_out32(s, insn | SAB(datalo, h.base, h.index));
2713        }
2714    }
2715
2716    if (ldst) {
2717        ldst->type = data_type;
2718        ldst->datalo_reg = datalo;
2719        ldst->datahi_reg = datahi;
2720        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2721    }
2722}
2723
2724static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
2725                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
2726{
2727    TCGLabelQemuLdst *ldst;
2728    HostAddress h;
2729    bool need_bswap;
2730    uint32_t insn;
2731    TCGReg index;
2732
2733    ldst = prepare_host_addr(s, &h, addr_reg, -1, oi, is_ld);
2734
2735    /* Compose the final address, as LQ/STQ have no indexing. */
2736    index = h.index;
2737    if (h.base != 0) {
2738        index = TCG_REG_TMP1;
2739        tcg_out32(s, ADD | TAB(index, h.base, h.index));
2740    }
2741    need_bswap = get_memop(oi) & MO_BSWAP;
2742
2743    if (h.aa.atom == MO_128) {
2744        tcg_debug_assert(!need_bswap);
2745        tcg_debug_assert(datalo & 1);
2746        tcg_debug_assert(datahi == datalo - 1);
2747        tcg_debug_assert(!is_ld || datahi != index);
2748        insn = is_ld ? LQ : STQ;
2749        tcg_out32(s, insn | TAI(datahi, index, 0));
2750    } else {
2751        TCGReg d1, d2;
2752
2753        if (HOST_BIG_ENDIAN ^ need_bswap) {
2754            d1 = datahi, d2 = datalo;
2755        } else {
2756            d1 = datalo, d2 = datahi;
2757        }
2758
2759        if (need_bswap) {
2760            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8);
2761            insn = is_ld ? LDBRX : STDBRX;
2762            tcg_out32(s, insn | TAB(d1, 0, index));
2763            tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0));
2764        } else {
2765            insn = is_ld ? LD : STD;
2766            tcg_out32(s, insn | TAI(d1, index, 0));
2767            tcg_out32(s, insn | TAI(d2, index, 8));
2768        }
2769    }
2770
2771    if (ldst) {
2772        ldst->type = TCG_TYPE_I128;
2773        ldst->datalo_reg = datalo;
2774        ldst->datahi_reg = datahi;
2775        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2776    }
2777}
2778
2779static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2780{
2781    int i;
2782    for (i = 0; i < count; ++i) {
2783        p[i] = NOP;
2784    }
2785}
2786
2787/* Parameters for function call generation, used in tcg.c.  */
2788#define TCG_TARGET_STACK_ALIGN       16
2789
2790#ifdef _CALL_AIX
2791# define LINK_AREA_SIZE                (6 * SZR)
2792# define LR_OFFSET                     (1 * SZR)
2793# define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
2794#elif defined(_CALL_DARWIN)
2795# define LINK_AREA_SIZE                (6 * SZR)
2796# define LR_OFFSET                     (2 * SZR)
2797#elif TCG_TARGET_REG_BITS == 64
2798# if defined(_CALL_ELF) && _CALL_ELF == 2
2799#  define LINK_AREA_SIZE               (4 * SZR)
2800#  define LR_OFFSET                    (1 * SZR)
2801# endif
2802#else /* TCG_TARGET_REG_BITS == 32 */
2803# if defined(_CALL_SYSV)
2804#  define LINK_AREA_SIZE               (2 * SZR)
2805#  define LR_OFFSET                    (1 * SZR)
2806# endif
2807#endif
2808#ifndef LR_OFFSET
2809# error "Unhandled abi"
2810#endif
2811#ifndef TCG_TARGET_CALL_STACK_OFFSET
2812# define TCG_TARGET_CALL_STACK_OFFSET  LINK_AREA_SIZE
2813#endif
2814
2815#define CPU_TEMP_BUF_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2816#define REG_SAVE_SIZE      ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
2817
2818#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET   \
2819                     + TCG_STATIC_CALL_ARGS_SIZE    \
2820                     + CPU_TEMP_BUF_SIZE            \
2821                     + REG_SAVE_SIZE                \
2822                     + TCG_TARGET_STACK_ALIGN - 1)  \
2823                    & -TCG_TARGET_STACK_ALIGN)
2824
2825#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
2826
2827static void tcg_target_qemu_prologue(TCGContext *s)
2828{
2829    int i;
2830
2831#ifdef _CALL_AIX
2832    const void **desc = (const void **)s->code_ptr;
2833    desc[0] = tcg_splitwx_to_rx(desc + 2);  /* entry point */
2834    desc[1] = 0;                            /* environment pointer */
2835    s->code_ptr = (void *)(desc + 2);       /* skip over descriptor */
2836#endif
2837
2838    tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
2839                  CPU_TEMP_BUF_SIZE);
2840
2841    /* Prologue */
2842    tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
2843    tcg_out32(s, (SZR == 8 ? STDU : STWU)
2844              | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
2845
2846    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2847        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2848                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2849    }
2850    tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2851
2852    if (!tcg_use_softmmu && guest_base) {
2853        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
2854        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2855    }
2856
2857    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2858    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
2859    tcg_out32(s, BCCTR | BO_ALWAYS);
2860
2861    /* Epilogue */
2862    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2863
2864    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2865    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2866        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2867                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2868    }
2869    tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
2870    tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
2871    tcg_out32(s, BCLR | BO_ALWAYS);
2872}
2873
2874static void tcg_out_tb_start(TCGContext *s)
2875{
2876    /* Load TCG_REG_TB. */
2877    if (USE_REG_TB) {
2878        if (have_isa_3_00) {
2879            /* lnia REG_TB */
2880            tcg_out_addpcis(s, TCG_REG_TB, 0);
2881        } else {
2882            /* bcl 20,31,$+4 (preferred form for getting nia) */
2883            tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK);
2884            tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR);
2885        }
2886    }
2887}
2888
2889static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
2890{
2891    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg);
2892    tcg_out_b(s, 0, tcg_code_gen_epilogue);
2893}
2894
2895static void tcg_out_goto_tb(TCGContext *s, int which)
2896{
2897    uintptr_t ptr = get_jmp_target_addr(s, which);
2898    int16_t lo;
2899
2900    /* Direct branch will be patched by tb_target_set_jmp_target. */
2901    set_jmp_insn_offset(s, which);
2902    tcg_out32(s, NOP);
2903
2904    /* When branch is out of range, fall through to indirect. */
2905    if (USE_REG_TB) {
2906        ptrdiff_t offset = ppc_tbrel_diff(s, (void *)ptr);
2907        tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset);
2908    } else if (have_isa_3_10) {
2909        ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr);
2910        tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1);
2911    } else if (have_isa_3_00) {
2912        ptrdiff_t offset = tcg_pcrel_diff(s, (void *)ptr) - 4;
2913        lo = offset;
2914        tcg_out_addpcis(s, TCG_REG_TMP1, offset - lo);
2915        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo);
2916    } else {
2917        lo = ptr;
2918        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - lo);
2919        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo);
2920    }
2921
2922    tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
2923    tcg_out32(s, BCCTR | BO_ALWAYS);
2924    set_jmp_reset_offset(s, which);
2925}
2926
2927void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
2928                              uintptr_t jmp_rx, uintptr_t jmp_rw)
2929{
2930    uintptr_t addr = tb->jmp_target_addr[n];
2931    intptr_t diff = addr - jmp_rx;
2932    tcg_insn_unit insn;
2933
2934    if (in_range_b(diff)) {
2935        insn = B | (diff & 0x3fffffc);
2936    } else {
2937        insn = NOP;
2938    }
2939
2940    qatomic_set((uint32_t *)jmp_rw, insn);
2941    flush_idcache_range(jmp_rx, jmp_rw, 4);
2942}
2943
2944static void tcg_out_op(TCGContext *s, TCGOpcode opc,
2945                       const TCGArg args[TCG_MAX_OP_ARGS],
2946                       const int const_args[TCG_MAX_OP_ARGS])
2947{
2948    TCGArg a0, a1, a2;
2949
2950    switch (opc) {
2951    case INDEX_op_goto_ptr:
2952        tcg_out32(s, MTSPR | RS(args[0]) | CTR);
2953        tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
2954        tcg_out32(s, BCCTR | BO_ALWAYS);
2955        break;
2956    case INDEX_op_br:
2957        {
2958            TCGLabel *l = arg_label(args[0]);
2959            uint32_t insn = B;
2960
2961            if (l->has_value) {
2962                insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr),
2963                                       l->u.value_ptr);
2964            } else {
2965                tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
2966            }
2967            tcg_out32(s, insn);
2968        }
2969        break;
2970    case INDEX_op_ld8u_i32:
2971    case INDEX_op_ld8u_i64:
2972        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2973        break;
2974    case INDEX_op_ld8s_i32:
2975    case INDEX_op_ld8s_i64:
2976        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2977        tcg_out_ext8s(s, TCG_TYPE_REG, args[0], args[0]);
2978        break;
2979    case INDEX_op_ld16u_i32:
2980    case INDEX_op_ld16u_i64:
2981        tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
2982        break;
2983    case INDEX_op_ld16s_i32:
2984    case INDEX_op_ld16s_i64:
2985        tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
2986        break;
2987    case INDEX_op_ld_i32:
2988    case INDEX_op_ld32u_i64:
2989        tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
2990        break;
2991    case INDEX_op_ld32s_i64:
2992        tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
2993        break;
2994    case INDEX_op_ld_i64:
2995        tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
2996        break;
2997    case INDEX_op_st8_i32:
2998    case INDEX_op_st8_i64:
2999        tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
3000        break;
3001    case INDEX_op_st16_i32:
3002    case INDEX_op_st16_i64:
3003        tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
3004        break;
3005    case INDEX_op_st_i32:
3006    case INDEX_op_st32_i64:
3007        tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
3008        break;
3009    case INDEX_op_st_i64:
3010        tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
3011        break;
3012
3013    case INDEX_op_add_i32:
3014        a0 = args[0], a1 = args[1], a2 = args[2];
3015        if (const_args[2]) {
3016        do_addi_32:
3017            tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
3018        } else {
3019            tcg_out32(s, ADD | TAB(a0, a1, a2));
3020        }
3021        break;
3022    case INDEX_op_sub_i32:
3023        a0 = args[0], a1 = args[1], a2 = args[2];
3024        if (const_args[1]) {
3025            if (const_args[2]) {
3026                tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2);
3027            } else {
3028                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
3029            }
3030        } else if (const_args[2]) {
3031            a2 = -a2;
3032            goto do_addi_32;
3033        } else {
3034            tcg_out32(s, SUBF | TAB(a0, a2, a1));
3035        }
3036        break;
3037
3038    case INDEX_op_and_i32:
3039        a0 = args[0], a1 = args[1], a2 = args[2];
3040        if (const_args[2]) {
3041            tcg_out_andi32(s, a0, a1, a2);
3042        } else {
3043            tcg_out32(s, AND | SAB(a1, a0, a2));
3044        }
3045        break;
3046    case INDEX_op_and_i64:
3047        a0 = args[0], a1 = args[1], a2 = args[2];
3048        if (const_args[2]) {
3049            tcg_out_andi64(s, a0, a1, a2);
3050        } else {
3051            tcg_out32(s, AND | SAB(a1, a0, a2));
3052        }
3053        break;
3054    case INDEX_op_or_i64:
3055    case INDEX_op_or_i32:
3056        a0 = args[0], a1 = args[1], a2 = args[2];
3057        if (const_args[2]) {
3058            tcg_out_ori32(s, a0, a1, a2);
3059        } else {
3060            tcg_out32(s, OR | SAB(a1, a0, a2));
3061        }
3062        break;
3063    case INDEX_op_xor_i64:
3064    case INDEX_op_xor_i32:
3065        a0 = args[0], a1 = args[1], a2 = args[2];
3066        if (const_args[2]) {
3067            tcg_out_xori32(s, a0, a1, a2);
3068        } else {
3069            tcg_out32(s, XOR | SAB(a1, a0, a2));
3070        }
3071        break;
3072    case INDEX_op_andc_i32:
3073        a0 = args[0], a1 = args[1], a2 = args[2];
3074        if (const_args[2]) {
3075            tcg_out_andi32(s, a0, a1, ~a2);
3076        } else {
3077            tcg_out32(s, ANDC | SAB(a1, a0, a2));
3078        }
3079        break;
3080    case INDEX_op_andc_i64:
3081        a0 = args[0], a1 = args[1], a2 = args[2];
3082        if (const_args[2]) {
3083            tcg_out_andi64(s, a0, a1, ~a2);
3084        } else {
3085            tcg_out32(s, ANDC | SAB(a1, a0, a2));
3086        }
3087        break;
3088    case INDEX_op_orc_i32:
3089        if (const_args[2]) {
3090            tcg_out_ori32(s, args[0], args[1], ~args[2]);
3091            break;
3092        }
3093        /* FALLTHRU */
3094    case INDEX_op_orc_i64:
3095        tcg_out32(s, ORC | SAB(args[1], args[0], args[2]));
3096        break;
3097    case INDEX_op_eqv_i32:
3098        if (const_args[2]) {
3099            tcg_out_xori32(s, args[0], args[1], ~args[2]);
3100            break;
3101        }
3102        /* FALLTHRU */
3103    case INDEX_op_eqv_i64:
3104        tcg_out32(s, EQV | SAB(args[1], args[0], args[2]));
3105        break;
3106    case INDEX_op_nand_i32:
3107    case INDEX_op_nand_i64:
3108        tcg_out32(s, NAND | SAB(args[1], args[0], args[2]));
3109        break;
3110    case INDEX_op_nor_i32:
3111    case INDEX_op_nor_i64:
3112        tcg_out32(s, NOR | SAB(args[1], args[0], args[2]));
3113        break;
3114
3115    case INDEX_op_clz_i32:
3116        tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1],
3117                      args[2], const_args[2]);
3118        break;
3119    case INDEX_op_ctz_i32:
3120        tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1],
3121                      args[2], const_args[2]);
3122        break;
3123    case INDEX_op_ctpop_i32:
3124        tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0));
3125        break;
3126
3127    case INDEX_op_clz_i64:
3128        tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1],
3129                      args[2], const_args[2]);
3130        break;
3131    case INDEX_op_ctz_i64:
3132        tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1],
3133                      args[2], const_args[2]);
3134        break;
3135    case INDEX_op_ctpop_i64:
3136        tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0));
3137        break;
3138
3139    case INDEX_op_mul_i32:
3140        a0 = args[0], a1 = args[1], a2 = args[2];
3141        if (const_args[2]) {
3142            tcg_out32(s, MULLI | TAI(a0, a1, a2));
3143        } else {
3144            tcg_out32(s, MULLW | TAB(a0, a1, a2));
3145        }
3146        break;
3147
3148    case INDEX_op_div_i32:
3149        tcg_out32(s, DIVW | TAB(args[0], args[1], args[2]));
3150        break;
3151
3152    case INDEX_op_divu_i32:
3153        tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2]));
3154        break;
3155
3156    case INDEX_op_rem_i32:
3157        tcg_out32(s, MODSW | TAB(args[0], args[1], args[2]));
3158        break;
3159
3160    case INDEX_op_remu_i32:
3161        tcg_out32(s, MODUW | TAB(args[0], args[1], args[2]));
3162        break;
3163
3164    case INDEX_op_shl_i32:
3165        if (const_args[2]) {
3166            /* Limit immediate shift count lest we create an illegal insn.  */
3167            tcg_out_shli32(s, args[0], args[1], args[2] & 31);
3168        } else {
3169            tcg_out32(s, SLW | SAB(args[1], args[0], args[2]));
3170        }
3171        break;
3172    case INDEX_op_shr_i32:
3173        if (const_args[2]) {
3174            /* Limit immediate shift count lest we create an illegal insn.  */
3175            tcg_out_shri32(s, args[0], args[1], args[2] & 31);
3176        } else {
3177            tcg_out32(s, SRW | SAB(args[1], args[0], args[2]));
3178        }
3179        break;
3180    case INDEX_op_sar_i32:
3181        if (const_args[2]) {
3182            tcg_out_sari32(s, args[0], args[1], args[2]);
3183        } else {
3184            tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
3185        }
3186        break;
3187    case INDEX_op_rotl_i32:
3188        if (const_args[2]) {
3189            tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31);
3190        } else {
3191            tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
3192                         | MB(0) | ME(31));
3193        }
3194        break;
3195    case INDEX_op_rotr_i32:
3196        if (const_args[2]) {
3197            tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
3198        } else {
3199            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32));
3200            tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0)
3201                         | MB(0) | ME(31));
3202        }
3203        break;
3204
3205    case INDEX_op_brcond_i32:
3206        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
3207                       arg_label(args[3]), TCG_TYPE_I32);
3208        break;
3209    case INDEX_op_brcond_i64:
3210        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
3211                       arg_label(args[3]), TCG_TYPE_I64);
3212        break;
3213    case INDEX_op_brcond2_i32:
3214        tcg_out_brcond2(s, args, const_args);
3215        break;
3216
3217    case INDEX_op_neg_i32:
3218    case INDEX_op_neg_i64:
3219        tcg_out32(s, NEG | RT(args[0]) | RA(args[1]));
3220        break;
3221
3222    case INDEX_op_not_i32:
3223    case INDEX_op_not_i64:
3224        tcg_out32(s, NOR | SAB(args[1], args[0], args[1]));
3225        break;
3226
3227    case INDEX_op_add_i64:
3228        a0 = args[0], a1 = args[1], a2 = args[2];
3229        if (const_args[2]) {
3230        do_addi_64:
3231            tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
3232        } else {
3233            tcg_out32(s, ADD | TAB(a0, a1, a2));
3234        }
3235        break;
3236    case INDEX_op_sub_i64:
3237        a0 = args[0], a1 = args[1], a2 = args[2];
3238        if (const_args[1]) {
3239            if (const_args[2]) {
3240                tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2);
3241            } else {
3242                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
3243            }
3244        } else if (const_args[2]) {
3245            a2 = -a2;
3246            goto do_addi_64;
3247        } else {
3248            tcg_out32(s, SUBF | TAB(a0, a2, a1));
3249        }
3250        break;
3251
3252    case INDEX_op_shl_i64:
3253        if (const_args[2]) {
3254            /* Limit immediate shift count lest we create an illegal insn.  */
3255            tcg_out_shli64(s, args[0], args[1], args[2] & 63);
3256        } else {
3257            tcg_out32(s, SLD | SAB(args[1], args[0], args[2]));
3258        }
3259        break;
3260    case INDEX_op_shr_i64:
3261        if (const_args[2]) {
3262            /* Limit immediate shift count lest we create an illegal insn.  */
3263            tcg_out_shri64(s, args[0], args[1], args[2] & 63);
3264        } else {
3265            tcg_out32(s, SRD | SAB(args[1], args[0], args[2]));
3266        }
3267        break;
3268    case INDEX_op_sar_i64:
3269        if (const_args[2]) {
3270            tcg_out_sari64(s, args[0], args[1], args[2]);
3271        } else {
3272            tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
3273        }
3274        break;
3275    case INDEX_op_rotl_i64:
3276        if (const_args[2]) {
3277            tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0);
3278        } else {
3279            tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0));
3280        }
3281        break;
3282    case INDEX_op_rotr_i64:
3283        if (const_args[2]) {
3284            tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0);
3285        } else {
3286            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64));
3287            tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0));
3288        }
3289        break;
3290
3291    case INDEX_op_mul_i64:
3292        a0 = args[0], a1 = args[1], a2 = args[2];
3293        if (const_args[2]) {
3294            tcg_out32(s, MULLI | TAI(a0, a1, a2));
3295        } else {
3296            tcg_out32(s, MULLD | TAB(a0, a1, a2));
3297        }
3298        break;
3299    case INDEX_op_div_i64:
3300        tcg_out32(s, DIVD | TAB(args[0], args[1], args[2]));
3301        break;
3302    case INDEX_op_divu_i64:
3303        tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
3304        break;
3305    case INDEX_op_rem_i64:
3306        tcg_out32(s, MODSD | TAB(args[0], args[1], args[2]));
3307        break;
3308    case INDEX_op_remu_i64:
3309        tcg_out32(s, MODUD | TAB(args[0], args[1], args[2]));
3310        break;
3311
3312    case INDEX_op_qemu_ld_a64_i32:
3313        if (TCG_TARGET_REG_BITS == 32) {
3314            tcg_out_qemu_ld(s, args[0], -1, args[1], args[2],
3315                            args[3], TCG_TYPE_I32);
3316            break;
3317        }
3318        /* fall through */
3319    case INDEX_op_qemu_ld_a32_i32:
3320        tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
3321        break;
3322    case INDEX_op_qemu_ld_a32_i64:
3323        if (TCG_TARGET_REG_BITS == 64) {
3324            tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
3325                            args[2], TCG_TYPE_I64);
3326        } else {
3327            tcg_out_qemu_ld(s, args[0], args[1], args[2], -1,
3328                            args[3], TCG_TYPE_I64);
3329        }
3330        break;
3331    case INDEX_op_qemu_ld_a64_i64:
3332        if (TCG_TARGET_REG_BITS == 64) {
3333            tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
3334                            args[2], TCG_TYPE_I64);
3335        } else {
3336            tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3],
3337                            args[4], TCG_TYPE_I64);
3338        }
3339        break;
3340    case INDEX_op_qemu_ld_a32_i128:
3341    case INDEX_op_qemu_ld_a64_i128:
3342        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
3343        tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
3344        break;
3345
3346    case INDEX_op_qemu_st_a64_i32:
3347        if (TCG_TARGET_REG_BITS == 32) {
3348            tcg_out_qemu_st(s, args[0], -1, args[1], args[2],
3349                            args[3], TCG_TYPE_I32);
3350            break;
3351        }
3352        /* fall through */
3353    case INDEX_op_qemu_st_a32_i32:
3354        tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
3355        break;
3356    case INDEX_op_qemu_st_a32_i64:
3357        if (TCG_TARGET_REG_BITS == 64) {
3358            tcg_out_qemu_st(s, args[0], -1, args[1], -1,
3359                            args[2], TCG_TYPE_I64);
3360        } else {
3361            tcg_out_qemu_st(s, args[0], args[1], args[2], -1,
3362                            args[3], TCG_TYPE_I64);
3363        }
3364        break;
3365    case INDEX_op_qemu_st_a64_i64:
3366        if (TCG_TARGET_REG_BITS == 64) {
3367            tcg_out_qemu_st(s, args[0], -1, args[1], -1,
3368                            args[2], TCG_TYPE_I64);
3369        } else {
3370            tcg_out_qemu_st(s, args[0], args[1], args[2], args[3],
3371                            args[4], TCG_TYPE_I64);
3372        }
3373        break;
3374    case INDEX_op_qemu_st_a32_i128:
3375    case INDEX_op_qemu_st_a64_i128:
3376        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
3377        tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
3378        break;
3379
3380    case INDEX_op_setcond_i32:
3381        tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
3382                        const_args[2], false);
3383        break;
3384    case INDEX_op_setcond_i64:
3385        tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
3386                        const_args[2], false);
3387        break;
3388    case INDEX_op_negsetcond_i32:
3389        tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
3390                        const_args[2], true);
3391        break;
3392    case INDEX_op_negsetcond_i64:
3393        tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
3394                        const_args[2], true);
3395        break;
3396    case INDEX_op_setcond2_i32:
3397        tcg_out_setcond2(s, args, const_args);
3398        break;
3399
3400    case INDEX_op_bswap16_i32:
3401    case INDEX_op_bswap16_i64:
3402        tcg_out_bswap16(s, args[0], args[1], args[2]);
3403        break;
3404    case INDEX_op_bswap32_i32:
3405        tcg_out_bswap32(s, args[0], args[1], 0);
3406        break;
3407    case INDEX_op_bswap32_i64:
3408        tcg_out_bswap32(s, args[0], args[1], args[2]);
3409        break;
3410    case INDEX_op_bswap64_i64:
3411        tcg_out_bswap64(s, args[0], args[1]);
3412        break;
3413
3414    case INDEX_op_deposit_i32:
3415        if (const_args[2]) {
3416            uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
3417            tcg_out_andi32(s, args[0], args[0], ~mask);
3418        } else {
3419            tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
3420                        32 - args[3] - args[4], 31 - args[3]);
3421        }
3422        break;
3423    case INDEX_op_deposit_i64:
3424        if (const_args[2]) {
3425            uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
3426            tcg_out_andi64(s, args[0], args[0], ~mask);
3427        } else {
3428            tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
3429                        64 - args[3] - args[4]);
3430        }
3431        break;
3432
3433    case INDEX_op_extract_i32:
3434        tcg_out_rlw(s, RLWINM, args[0], args[1],
3435                    32 - args[2], 32 - args[3], 31);
3436        break;
3437    case INDEX_op_extract_i64:
3438        tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
3439        break;
3440
3441    case INDEX_op_movcond_i32:
3442        tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
3443                        args[3], args[4], const_args[2]);
3444        break;
3445    case INDEX_op_movcond_i64:
3446        tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2],
3447                        args[3], args[4], const_args[2]);
3448        break;
3449
3450#if TCG_TARGET_REG_BITS == 64
3451    case INDEX_op_add2_i64:
3452#else
3453    case INDEX_op_add2_i32:
3454#endif
3455        /* Note that the CA bit is defined based on the word size of the
3456           environment.  So in 64-bit mode it's always carry-out of bit 63.
3457           The fallback code using deposit works just as well for 32-bit.  */
3458        a0 = args[0], a1 = args[1];
3459        if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
3460            a0 = TCG_REG_R0;
3461        }
3462        if (const_args[4]) {
3463            tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
3464        } else {
3465            tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
3466        }
3467        if (const_args[5]) {
3468            tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
3469        } else {
3470            tcg_out32(s, ADDE | TAB(a1, args[3], args[5]));
3471        }
3472        if (a0 != args[0]) {
3473            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3474        }
3475        break;
3476
3477#if TCG_TARGET_REG_BITS == 64
3478    case INDEX_op_sub2_i64:
3479#else
3480    case INDEX_op_sub2_i32:
3481#endif
3482        a0 = args[0], a1 = args[1];
3483        if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
3484            a0 = TCG_REG_R0;
3485        }
3486        if (const_args[2]) {
3487            tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
3488        } else {
3489            tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
3490        }
3491        if (const_args[3]) {
3492            tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
3493        } else {
3494            tcg_out32(s, SUBFE | TAB(a1, args[5], args[3]));
3495        }
3496        if (a0 != args[0]) {
3497            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3498        }
3499        break;
3500
3501    case INDEX_op_muluh_i32:
3502        tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2]));
3503        break;
3504    case INDEX_op_mulsh_i32:
3505        tcg_out32(s, MULHW | TAB(args[0], args[1], args[2]));
3506        break;
3507    case INDEX_op_muluh_i64:
3508        tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2]));
3509        break;
3510    case INDEX_op_mulsh_i64:
3511        tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
3512        break;
3513
3514    case INDEX_op_mb:
3515        tcg_out_mb(s, args[0]);
3516        break;
3517
3518    case INDEX_op_mov_i32:   /* Always emitted via tcg_out_mov.  */
3519    case INDEX_op_mov_i64:
3520    case INDEX_op_call:      /* Always emitted via tcg_out_call.  */
3521    case INDEX_op_exit_tb:   /* Always emitted via tcg_out_exit_tb.  */
3522    case INDEX_op_goto_tb:   /* Always emitted via tcg_out_goto_tb.  */
3523    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
3524    case INDEX_op_ext8s_i64:
3525    case INDEX_op_ext8u_i32:
3526    case INDEX_op_ext8u_i64:
3527    case INDEX_op_ext16s_i32:
3528    case INDEX_op_ext16s_i64:
3529    case INDEX_op_ext16u_i32:
3530    case INDEX_op_ext16u_i64:
3531    case INDEX_op_ext32s_i64:
3532    case INDEX_op_ext32u_i64:
3533    case INDEX_op_ext_i32_i64:
3534    case INDEX_op_extu_i32_i64:
3535    case INDEX_op_extrl_i64_i32:
3536    default:
3537        g_assert_not_reached();
3538    }
3539}
3540
3541int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3542{
3543    switch (opc) {
3544    case INDEX_op_and_vec:
3545    case INDEX_op_or_vec:
3546    case INDEX_op_xor_vec:
3547    case INDEX_op_andc_vec:
3548    case INDEX_op_not_vec:
3549    case INDEX_op_nor_vec:
3550    case INDEX_op_eqv_vec:
3551    case INDEX_op_nand_vec:
3552        return 1;
3553    case INDEX_op_orc_vec:
3554        return have_isa_2_07;
3555    case INDEX_op_add_vec:
3556    case INDEX_op_sub_vec:
3557    case INDEX_op_smax_vec:
3558    case INDEX_op_smin_vec:
3559    case INDEX_op_umax_vec:
3560    case INDEX_op_umin_vec:
3561    case INDEX_op_shlv_vec:
3562    case INDEX_op_shrv_vec:
3563    case INDEX_op_sarv_vec:
3564    case INDEX_op_rotlv_vec:
3565        return vece <= MO_32 || have_isa_2_07;
3566    case INDEX_op_ssadd_vec:
3567    case INDEX_op_sssub_vec:
3568    case INDEX_op_usadd_vec:
3569    case INDEX_op_ussub_vec:
3570        return vece <= MO_32;
3571    case INDEX_op_cmp_vec:
3572    case INDEX_op_shli_vec:
3573    case INDEX_op_shri_vec:
3574    case INDEX_op_sari_vec:
3575    case INDEX_op_rotli_vec:
3576        return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
3577    case INDEX_op_neg_vec:
3578        return vece >= MO_32 && have_isa_3_00;
3579    case INDEX_op_mul_vec:
3580        switch (vece) {
3581        case MO_8:
3582        case MO_16:
3583            return -1;
3584        case MO_32:
3585            return have_isa_2_07 ? 1 : -1;
3586        case MO_64:
3587            return have_isa_3_10;
3588        }
3589        return 0;
3590    case INDEX_op_bitsel_vec:
3591        return have_vsx;
3592    case INDEX_op_rotrv_vec:
3593        return -1;
3594    default:
3595        return 0;
3596    }
3597}
3598
3599static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
3600                            TCGReg dst, TCGReg src)
3601{
3602    tcg_debug_assert(dst >= TCG_REG_V0);
3603
3604    /* Splat from integer reg allowed via constraints for v3.00.  */
3605    if (src < TCG_REG_V0) {
3606        tcg_debug_assert(have_isa_3_00);
3607        switch (vece) {
3608        case MO_64:
3609            tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
3610            return true;
3611        case MO_32:
3612            tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
3613            return true;
3614        default:
3615            /* Fail, so that we fall back on either dupm or mov+dup.  */
3616            return false;
3617        }
3618    }
3619
3620    /*
3621     * Recall we use (or emulate) VSX integer loads, so the integer is
3622     * right justified within the left (zero-index) double-word.
3623     */
3624    switch (vece) {
3625    case MO_8:
3626        tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
3627        break;
3628    case MO_16:
3629        tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
3630        break;
3631    case MO_32:
3632        tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
3633        break;
3634    case MO_64:
3635        if (have_vsx) {
3636            tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
3637            break;
3638        }
3639        tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
3640        tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
3641        break;
3642    default:
3643        g_assert_not_reached();
3644    }
3645    return true;
3646}
3647
3648static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
3649                             TCGReg out, TCGReg base, intptr_t offset)
3650{
3651    int elt;
3652
3653    tcg_debug_assert(out >= TCG_REG_V0);
3654    switch (vece) {
3655    case MO_8:
3656        if (have_isa_3_00) {
3657            tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
3658        } else {
3659            tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
3660        }
3661        elt = extract32(offset, 0, 4);
3662#if !HOST_BIG_ENDIAN
3663        elt ^= 15;
3664#endif
3665        tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
3666        break;
3667    case MO_16:
3668        tcg_debug_assert((offset & 1) == 0);
3669        if (have_isa_3_00) {
3670            tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
3671        } else {
3672            tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
3673        }
3674        elt = extract32(offset, 1, 3);
3675#if !HOST_BIG_ENDIAN
3676        elt ^= 7;
3677#endif
3678        tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
3679        break;
3680    case MO_32:
3681        if (have_isa_3_00) {
3682            tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
3683            break;
3684        }
3685        tcg_debug_assert((offset & 3) == 0);
3686        tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
3687        elt = extract32(offset, 2, 2);
3688#if !HOST_BIG_ENDIAN
3689        elt ^= 3;
3690#endif
3691        tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
3692        break;
3693    case MO_64:
3694        if (have_vsx) {
3695            tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
3696            break;
3697        }
3698        tcg_debug_assert((offset & 7) == 0);
3699        tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
3700        tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
3701        elt = extract32(offset, 3, 1);
3702#if !HOST_BIG_ENDIAN
3703        elt = !elt;
3704#endif
3705        if (elt) {
3706            tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
3707        } else {
3708            tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
3709        }
3710        break;
3711    default:
3712        g_assert_not_reached();
3713    }
3714    return true;
3715}
3716
3717static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
3718                           unsigned vecl, unsigned vece,
3719                           const TCGArg args[TCG_MAX_OP_ARGS],
3720                           const int const_args[TCG_MAX_OP_ARGS])
3721{
3722    static const uint32_t
3723        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
3724        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
3725        mul_op[4] = { 0, 0, VMULUWM, VMULLD },
3726        neg_op[4] = { 0, 0, VNEGW, VNEGD },
3727        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
3728        ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
3729        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
3730        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
3731        ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
3732        usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
3733        sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
3734        ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
3735        umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
3736        smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
3737        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
3738        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
3739        shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
3740        shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
3741        sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
3742        mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
3743        mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
3744        muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
3745        mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
3746        pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
3747        rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
3748
3749    TCGType type = vecl + TCG_TYPE_V64;
3750    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
3751    uint32_t insn;
3752
3753    switch (opc) {
3754    case INDEX_op_ld_vec:
3755        tcg_out_ld(s, type, a0, a1, a2);
3756        return;
3757    case INDEX_op_st_vec:
3758        tcg_out_st(s, type, a0, a1, a2);
3759        return;
3760    case INDEX_op_dupm_vec:
3761        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
3762        return;
3763
3764    case INDEX_op_add_vec:
3765        insn = add_op[vece];
3766        break;
3767    case INDEX_op_sub_vec:
3768        insn = sub_op[vece];
3769        break;
3770    case INDEX_op_neg_vec:
3771        insn = neg_op[vece];
3772        a2 = a1;
3773        a1 = 0;
3774        break;
3775    case INDEX_op_mul_vec:
3776        insn = mul_op[vece];
3777        break;
3778    case INDEX_op_ssadd_vec:
3779        insn = ssadd_op[vece];
3780        break;
3781    case INDEX_op_sssub_vec:
3782        insn = sssub_op[vece];
3783        break;
3784    case INDEX_op_usadd_vec:
3785        insn = usadd_op[vece];
3786        break;
3787    case INDEX_op_ussub_vec:
3788        insn = ussub_op[vece];
3789        break;
3790    case INDEX_op_smin_vec:
3791        insn = smin_op[vece];
3792        break;
3793    case INDEX_op_umin_vec:
3794        insn = umin_op[vece];
3795        break;
3796    case INDEX_op_smax_vec:
3797        insn = smax_op[vece];
3798        break;
3799    case INDEX_op_umax_vec:
3800        insn = umax_op[vece];
3801        break;
3802    case INDEX_op_shlv_vec:
3803        insn = shlv_op[vece];
3804        break;
3805    case INDEX_op_shrv_vec:
3806        insn = shrv_op[vece];
3807        break;
3808    case INDEX_op_sarv_vec:
3809        insn = sarv_op[vece];
3810        break;
3811    case INDEX_op_and_vec:
3812        insn = VAND;
3813        break;
3814    case INDEX_op_or_vec:
3815        insn = VOR;
3816        break;
3817    case INDEX_op_xor_vec:
3818        insn = VXOR;
3819        break;
3820    case INDEX_op_andc_vec:
3821        insn = VANDC;
3822        break;
3823    case INDEX_op_not_vec:
3824        insn = VNOR;
3825        a2 = a1;
3826        break;
3827    case INDEX_op_orc_vec:
3828        insn = VORC;
3829        break;
3830    case INDEX_op_nand_vec:
3831        insn = VNAND;
3832        break;
3833    case INDEX_op_nor_vec:
3834        insn = VNOR;
3835        break;
3836    case INDEX_op_eqv_vec:
3837        insn = VEQV;
3838        break;
3839
3840    case INDEX_op_cmp_vec:
3841        switch (args[3]) {
3842        case TCG_COND_EQ:
3843            insn = eq_op[vece];
3844            break;
3845        case TCG_COND_NE:
3846            insn = ne_op[vece];
3847            break;
3848        case TCG_COND_GT:
3849            insn = gts_op[vece];
3850            break;
3851        case TCG_COND_GTU:
3852            insn = gtu_op[vece];
3853            break;
3854        default:
3855            g_assert_not_reached();
3856        }
3857        break;
3858
3859    case INDEX_op_bitsel_vec:
3860        tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
3861        return;
3862
3863    case INDEX_op_dup2_vec:
3864        assert(TCG_TARGET_REG_BITS == 32);
3865        /* With inputs a1 = xLxx, a2 = xHxx  */
3866        tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1));  /* a0  = xxHL */
3867        tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8);          /* tmp = HLxx */
3868        tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8);          /* a0  = HLHL */
3869        return;
3870
3871    case INDEX_op_ppc_mrgh_vec:
3872        insn = mrgh_op[vece];
3873        break;
3874    case INDEX_op_ppc_mrgl_vec:
3875        insn = mrgl_op[vece];
3876        break;
3877    case INDEX_op_ppc_muleu_vec:
3878        insn = muleu_op[vece];
3879        break;
3880    case INDEX_op_ppc_mulou_vec:
3881        insn = mulou_op[vece];
3882        break;
3883    case INDEX_op_ppc_pkum_vec:
3884        insn = pkum_op[vece];
3885        break;
3886    case INDEX_op_rotlv_vec:
3887        insn = rotl_op[vece];
3888        break;
3889    case INDEX_op_ppc_msum_vec:
3890        tcg_debug_assert(vece == MO_16);
3891        tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
3892        return;
3893
3894    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
3895    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
3896    default:
3897        g_assert_not_reached();
3898    }
3899
3900    tcg_debug_assert(insn != 0);
3901    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
3902}
3903
3904static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
3905                           TCGv_vec v1, TCGArg imm, TCGOpcode opci)
3906{
3907    TCGv_vec t1;
3908
3909    if (vece == MO_32) {
3910        /*
3911         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
3912         * So using negative numbers gets us the 4th bit easily.
3913         */
3914        imm = sextract32(imm, 0, 5);
3915    } else {
3916        imm &= (8 << vece) - 1;
3917    }
3918
3919    /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */
3920    t1 = tcg_constant_vec(type, MO_8, imm);
3921    vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
3922              tcgv_vec_arg(v1), tcgv_vec_arg(t1));
3923}
3924
3925static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
3926                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
3927{
3928    bool need_swap = false, need_inv = false;
3929
3930    tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
3931
3932    switch (cond) {
3933    case TCG_COND_EQ:
3934    case TCG_COND_GT:
3935    case TCG_COND_GTU:
3936        break;
3937    case TCG_COND_NE:
3938        if (have_isa_3_00 && vece <= MO_32) {
3939            break;
3940        }
3941        /* fall through */
3942    case TCG_COND_LE:
3943    case TCG_COND_LEU:
3944        need_inv = true;
3945        break;
3946    case TCG_COND_LT:
3947    case TCG_COND_LTU:
3948        need_swap = true;
3949        break;
3950    case TCG_COND_GE:
3951    case TCG_COND_GEU:
3952        need_swap = need_inv = true;
3953        break;
3954    default:
3955        g_assert_not_reached();
3956    }
3957
3958    if (need_inv) {
3959        cond = tcg_invert_cond(cond);
3960    }
3961    if (need_swap) {
3962        TCGv_vec t1;
3963        t1 = v1, v1 = v2, v2 = t1;
3964        cond = tcg_swap_cond(cond);
3965    }
3966
3967    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
3968              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
3969
3970    if (need_inv) {
3971        tcg_gen_not_vec(vece, v0, v0);
3972    }
3973}
3974
3975static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
3976                           TCGv_vec v1, TCGv_vec v2)
3977{
3978    TCGv_vec t1 = tcg_temp_new_vec(type);
3979    TCGv_vec t2 = tcg_temp_new_vec(type);
3980    TCGv_vec c0, c16;
3981
3982    switch (vece) {
3983    case MO_8:
3984    case MO_16:
3985        vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
3986                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3987        vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
3988                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3989        vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
3990                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3991        vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
3992                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3993        vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
3994                  tcgv_vec_arg(v0), tcgv_vec_arg(t1));
3995        break;
3996
3997    case MO_32:
3998        tcg_debug_assert(!have_isa_2_07);
3999        /*
4000         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
4001         * So using -16 is a quick way to represent 16.
4002         */
4003        c16 = tcg_constant_vec(type, MO_8, -16);
4004        c0 = tcg_constant_vec(type, MO_8, 0);
4005
4006        vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
4007                  tcgv_vec_arg(v2), tcgv_vec_arg(c16));
4008        vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
4009                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
4010        vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1),
4011                  tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0));
4012        vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1),
4013                  tcgv_vec_arg(t1), tcgv_vec_arg(c16));
4014        tcg_gen_add_vec(MO_32, v0, t1, t2);
4015        break;
4016
4017    default:
4018        g_assert_not_reached();
4019    }
4020    tcg_temp_free_vec(t1);
4021    tcg_temp_free_vec(t2);
4022}
4023
4024void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
4025                       TCGArg a0, ...)
4026{
4027    va_list va;
4028    TCGv_vec v0, v1, v2, t0;
4029    TCGArg a2;
4030
4031    va_start(va, a0);
4032    v0 = temp_tcgv_vec(arg_temp(a0));
4033    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
4034    a2 = va_arg(va, TCGArg);
4035
4036    switch (opc) {
4037    case INDEX_op_shli_vec:
4038        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
4039        break;
4040    case INDEX_op_shri_vec:
4041        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
4042        break;
4043    case INDEX_op_sari_vec:
4044        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
4045        break;
4046    case INDEX_op_rotli_vec:
4047        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
4048        break;
4049    case INDEX_op_cmp_vec:
4050        v2 = temp_tcgv_vec(arg_temp(a2));
4051        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
4052        break;
4053    case INDEX_op_mul_vec:
4054        v2 = temp_tcgv_vec(arg_temp(a2));
4055        expand_vec_mul(type, vece, v0, v1, v2);
4056        break;
4057    case INDEX_op_rotlv_vec:
4058        v2 = temp_tcgv_vec(arg_temp(a2));
4059        t0 = tcg_temp_new_vec(type);
4060        tcg_gen_neg_vec(vece, t0, v2);
4061        tcg_gen_rotlv_vec(vece, v0, v1, t0);
4062        tcg_temp_free_vec(t0);
4063        break;
4064    default:
4065        g_assert_not_reached();
4066    }
4067    va_end(va);
4068}
4069
4070static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
4071{
4072    switch (op) {
4073    case INDEX_op_goto_ptr:
4074        return C_O0_I1(r);
4075
4076    case INDEX_op_ld8u_i32:
4077    case INDEX_op_ld8s_i32:
4078    case INDEX_op_ld16u_i32:
4079    case INDEX_op_ld16s_i32:
4080    case INDEX_op_ld_i32:
4081    case INDEX_op_ctpop_i32:
4082    case INDEX_op_neg_i32:
4083    case INDEX_op_not_i32:
4084    case INDEX_op_ext8s_i32:
4085    case INDEX_op_ext16s_i32:
4086    case INDEX_op_bswap16_i32:
4087    case INDEX_op_bswap32_i32:
4088    case INDEX_op_extract_i32:
4089    case INDEX_op_ld8u_i64:
4090    case INDEX_op_ld8s_i64:
4091    case INDEX_op_ld16u_i64:
4092    case INDEX_op_ld16s_i64:
4093    case INDEX_op_ld32u_i64:
4094    case INDEX_op_ld32s_i64:
4095    case INDEX_op_ld_i64:
4096    case INDEX_op_ctpop_i64:
4097    case INDEX_op_neg_i64:
4098    case INDEX_op_not_i64:
4099    case INDEX_op_ext8s_i64:
4100    case INDEX_op_ext16s_i64:
4101    case INDEX_op_ext32s_i64:
4102    case INDEX_op_ext_i32_i64:
4103    case INDEX_op_extu_i32_i64:
4104    case INDEX_op_bswap16_i64:
4105    case INDEX_op_bswap32_i64:
4106    case INDEX_op_bswap64_i64:
4107    case INDEX_op_extract_i64:
4108        return C_O1_I1(r, r);
4109
4110    case INDEX_op_st8_i32:
4111    case INDEX_op_st16_i32:
4112    case INDEX_op_st_i32:
4113    case INDEX_op_st8_i64:
4114    case INDEX_op_st16_i64:
4115    case INDEX_op_st32_i64:
4116    case INDEX_op_st_i64:
4117        return C_O0_I2(r, r);
4118
4119    case INDEX_op_add_i32:
4120    case INDEX_op_and_i32:
4121    case INDEX_op_or_i32:
4122    case INDEX_op_xor_i32:
4123    case INDEX_op_andc_i32:
4124    case INDEX_op_orc_i32:
4125    case INDEX_op_eqv_i32:
4126    case INDEX_op_shl_i32:
4127    case INDEX_op_shr_i32:
4128    case INDEX_op_sar_i32:
4129    case INDEX_op_rotl_i32:
4130    case INDEX_op_rotr_i32:
4131    case INDEX_op_and_i64:
4132    case INDEX_op_andc_i64:
4133    case INDEX_op_shl_i64:
4134    case INDEX_op_shr_i64:
4135    case INDEX_op_sar_i64:
4136    case INDEX_op_rotl_i64:
4137    case INDEX_op_rotr_i64:
4138        return C_O1_I2(r, r, ri);
4139
4140    case INDEX_op_mul_i32:
4141    case INDEX_op_mul_i64:
4142        return C_O1_I2(r, r, rI);
4143
4144    case INDEX_op_div_i32:
4145    case INDEX_op_divu_i32:
4146    case INDEX_op_rem_i32:
4147    case INDEX_op_remu_i32:
4148    case INDEX_op_nand_i32:
4149    case INDEX_op_nor_i32:
4150    case INDEX_op_muluh_i32:
4151    case INDEX_op_mulsh_i32:
4152    case INDEX_op_orc_i64:
4153    case INDEX_op_eqv_i64:
4154    case INDEX_op_nand_i64:
4155    case INDEX_op_nor_i64:
4156    case INDEX_op_div_i64:
4157    case INDEX_op_divu_i64:
4158    case INDEX_op_rem_i64:
4159    case INDEX_op_remu_i64:
4160    case INDEX_op_mulsh_i64:
4161    case INDEX_op_muluh_i64:
4162        return C_O1_I2(r, r, r);
4163
4164    case INDEX_op_sub_i32:
4165        return C_O1_I2(r, rI, ri);
4166    case INDEX_op_add_i64:
4167        return C_O1_I2(r, r, rT);
4168    case INDEX_op_or_i64:
4169    case INDEX_op_xor_i64:
4170        return C_O1_I2(r, r, rU);
4171    case INDEX_op_sub_i64:
4172        return C_O1_I2(r, rI, rT);
4173    case INDEX_op_clz_i32:
4174    case INDEX_op_ctz_i32:
4175    case INDEX_op_clz_i64:
4176    case INDEX_op_ctz_i64:
4177        return C_O1_I2(r, r, rZW);
4178
4179    case INDEX_op_brcond_i32:
4180    case INDEX_op_brcond_i64:
4181        return C_O0_I2(r, rC);
4182    case INDEX_op_setcond_i32:
4183    case INDEX_op_setcond_i64:
4184    case INDEX_op_negsetcond_i32:
4185    case INDEX_op_negsetcond_i64:
4186        return C_O1_I2(r, r, rC);
4187    case INDEX_op_movcond_i32:
4188    case INDEX_op_movcond_i64:
4189        return C_O1_I4(r, r, rC, rZ, rZ);
4190
4191    case INDEX_op_deposit_i32:
4192    case INDEX_op_deposit_i64:
4193        return C_O1_I2(r, 0, rZ);
4194    case INDEX_op_brcond2_i32:
4195        return C_O0_I4(r, r, ri, ri);
4196    case INDEX_op_setcond2_i32:
4197        return C_O1_I4(r, r, r, ri, ri);
4198    case INDEX_op_add2_i64:
4199    case INDEX_op_add2_i32:
4200        return C_O2_I4(r, r, r, r, rI, rZM);
4201    case INDEX_op_sub2_i64:
4202    case INDEX_op_sub2_i32:
4203        return C_O2_I4(r, r, rI, rZM, r, r);
4204
4205    case INDEX_op_qemu_ld_a32_i32:
4206        return C_O1_I1(r, r);
4207    case INDEX_op_qemu_ld_a64_i32:
4208        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r);
4209    case INDEX_op_qemu_ld_a32_i64:
4210        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
4211    case INDEX_op_qemu_ld_a64_i64:
4212        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r);
4213
4214    case INDEX_op_qemu_st_a32_i32:
4215        return C_O0_I2(r, r);
4216    case INDEX_op_qemu_st_a64_i32:
4217        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
4218    case INDEX_op_qemu_st_a32_i64:
4219        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
4220    case INDEX_op_qemu_st_a64_i64:
4221        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
4222
4223    case INDEX_op_qemu_ld_a32_i128:
4224    case INDEX_op_qemu_ld_a64_i128:
4225        return C_N1O1_I1(o, m, r);
4226    case INDEX_op_qemu_st_a32_i128:
4227    case INDEX_op_qemu_st_a64_i128:
4228        return C_O0_I3(o, m, r);
4229
4230    case INDEX_op_add_vec:
4231    case INDEX_op_sub_vec:
4232    case INDEX_op_mul_vec:
4233    case INDEX_op_and_vec:
4234    case INDEX_op_or_vec:
4235    case INDEX_op_xor_vec:
4236    case INDEX_op_andc_vec:
4237    case INDEX_op_orc_vec:
4238    case INDEX_op_nor_vec:
4239    case INDEX_op_eqv_vec:
4240    case INDEX_op_nand_vec:
4241    case INDEX_op_cmp_vec:
4242    case INDEX_op_ssadd_vec:
4243    case INDEX_op_sssub_vec:
4244    case INDEX_op_usadd_vec:
4245    case INDEX_op_ussub_vec:
4246    case INDEX_op_smax_vec:
4247    case INDEX_op_smin_vec:
4248    case INDEX_op_umax_vec:
4249    case INDEX_op_umin_vec:
4250    case INDEX_op_shlv_vec:
4251    case INDEX_op_shrv_vec:
4252    case INDEX_op_sarv_vec:
4253    case INDEX_op_rotlv_vec:
4254    case INDEX_op_rotrv_vec:
4255    case INDEX_op_ppc_mrgh_vec:
4256    case INDEX_op_ppc_mrgl_vec:
4257    case INDEX_op_ppc_muleu_vec:
4258    case INDEX_op_ppc_mulou_vec:
4259    case INDEX_op_ppc_pkum_vec:
4260    case INDEX_op_dup2_vec:
4261        return C_O1_I2(v, v, v);
4262
4263    case INDEX_op_not_vec:
4264    case INDEX_op_neg_vec:
4265        return C_O1_I1(v, v);
4266
4267    case INDEX_op_dup_vec:
4268        return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v);
4269
4270    case INDEX_op_ld_vec:
4271    case INDEX_op_dupm_vec:
4272        return C_O1_I1(v, r);
4273
4274    case INDEX_op_st_vec:
4275        return C_O0_I2(v, r);
4276
4277    case INDEX_op_bitsel_vec:
4278    case INDEX_op_ppc_msum_vec:
4279        return C_O1_I3(v, v, v, v);
4280
4281    default:
4282        g_assert_not_reached();
4283    }
4284}
4285
4286static void tcg_target_init(TCGContext *s)
4287{
4288    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
4289    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
4290    if (have_altivec) {
4291        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
4292        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
4293    }
4294
4295    tcg_target_call_clobber_regs = 0;
4296    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
4297    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
4298    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
4299    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
4300    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
4301    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
4302    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7);
4303    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
4304    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
4305    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
4306    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
4307    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
4308
4309    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
4310    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
4311    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
4312    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
4313    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
4314    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
4315    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
4316    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
4317    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
4318    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
4319    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
4320    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
4321    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
4322    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
4323    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
4324    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
4325    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
4326    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
4327    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
4328    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
4329
4330    s->reserved_regs = 0;
4331    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
4332    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
4333#if defined(_CALL_SYSV)
4334    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
4335#endif
4336#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
4337    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
4338#endif
4339    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
4340    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
4341    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
4342    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
4343    if (USE_REG_TB) {
4344        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);  /* tb->tc_ptr */
4345    }
4346}
4347
4348#ifdef __ELF__
4349typedef struct {
4350    DebugFrameCIE cie;
4351    DebugFrameFDEHeader fde;
4352    uint8_t fde_def_cfa[4];
4353    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
4354} DebugFrame;
4355
4356/* We're expecting a 2 byte uleb128 encoded value.  */
4357QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
4358
4359#if TCG_TARGET_REG_BITS == 64
4360# define ELF_HOST_MACHINE EM_PPC64
4361#else
4362# define ELF_HOST_MACHINE EM_PPC
4363#endif
4364
4365static DebugFrame debug_frame = {
4366    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
4367    .cie.id = -1,
4368    .cie.version = 1,
4369    .cie.code_align = 1,
4370    .cie.data_align = (-SZR & 0x7f),         /* sleb128 -SZR */
4371    .cie.return_column = 65,
4372
4373    /* Total FDE size does not include the "len" member.  */
4374    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
4375
4376    .fde_def_cfa = {
4377        12, TCG_REG_R1,                 /* DW_CFA_def_cfa r1, ... */
4378        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
4379        (FRAME_SIZE >> 7)
4380    },
4381    .fde_reg_ofs = {
4382        /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
4383        0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
4384    }
4385};
4386
4387void tcg_register_jit(const void *buf, size_t buf_size)
4388{
4389    uint8_t *p = &debug_frame.fde_reg_ofs[3];
4390    int i;
4391
4392    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
4393        p[0] = 0x80 + tcg_target_callee_save_regs[i];
4394        p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
4395    }
4396
4397    debug_frame.fde.func_start = (uintptr_t)buf;
4398    debug_frame.fde.func_len = buf_size;
4399
4400    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
4401}
4402#endif /* __ELF__ */
4403#undef VMULEUB
4404#undef VMULEUH
4405#undef VMULEUW
4406#undef VMULOUB
4407#undef VMULOUH
4408#undef VMULOUW
4409#undef VMSUMUHM
4410