xref: /openbmc/qemu/tcg/ppc/tcg-target.c.inc (revision bc9da794)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "elf.h"
26#include "../tcg-pool.c.inc"
27#include "../tcg-ldst.c.inc"
28
29/*
30 * Standardize on the _CALL_FOO symbols used by GCC:
31 * Apple XCode does not define _CALL_DARWIN.
32 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV or _CALL_AIX.
33 */
34#if TCG_TARGET_REG_BITS == 64
35# ifdef _CALL_AIX
36    /* ok */
37# elif defined(_CALL_ELF) && _CALL_ELF == 1
38#  define _CALL_AIX
39# elif defined(_CALL_ELF) && _CALL_ELF == 2
40    /* ok */
41# else
42#  error "Unknown ABI"
43# endif
44#else
45# if defined(_CALL_SYSV) || defined(_CALL_DARWIN)
46    /* ok */
47# elif defined(__APPLE__)
48#  define _CALL_DARWIN
49# elif defined(__ELF__)
50#  define _CALL_SYSV
51# else
52#  error "Unknown ABI"
53# endif
54#endif
55
56#if TCG_TARGET_REG_BITS == 64
57# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_EXTEND
58# define TCG_TARGET_CALL_RET_I128  TCG_CALL_RET_NORMAL
59#else
60# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_NORMAL
61# define TCG_TARGET_CALL_RET_I128  TCG_CALL_RET_BY_REF
62#endif
63#ifdef _CALL_SYSV
64# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_EVEN
65# define TCG_TARGET_CALL_ARG_I128  TCG_CALL_ARG_BY_REF
66#else
67# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_NORMAL
68# define TCG_TARGET_CALL_ARG_I128  TCG_CALL_ARG_NORMAL
69#endif
70
71/* For some memory operations, we need a scratch that isn't R0.  For the AIX
72   calling convention, we can re-use the TOC register since we'll be reloading
73   it at every call.  Otherwise R12 will do nicely as neither a call-saved
74   register nor a parameter register.  */
75#ifdef _CALL_AIX
76# define TCG_REG_TMP1   TCG_REG_R2
77#else
78# define TCG_REG_TMP1   TCG_REG_R12
79#endif
80#define TCG_REG_TMP2    TCG_REG_R11
81
82#define TCG_VEC_TMP1    TCG_REG_V0
83#define TCG_VEC_TMP2    TCG_REG_V1
84
85#define TCG_REG_TB     TCG_REG_R31
86#define USE_REG_TB     (TCG_TARGET_REG_BITS == 64 && !have_isa_3_00)
87
88/* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */
89#define SZP  ((int)sizeof(void *))
90
91/* Shorthand for size of a register.  */
92#define SZR  (TCG_TARGET_REG_BITS / 8)
93
94#define TCG_CT_CONST_S16  0x100
95#define TCG_CT_CONST_U16  0x200
96#define TCG_CT_CONST_S32  0x400
97#define TCG_CT_CONST_U32  0x800
98#define TCG_CT_CONST_ZERO 0x1000
99#define TCG_CT_CONST_MONE 0x2000
100#define TCG_CT_CONST_WSZ  0x4000
101#define TCG_CT_CONST_CMP  0x8000
102
103#define ALL_GENERAL_REGS  0xffffffffu
104#define ALL_VECTOR_REGS   0xffffffff00000000ull
105
106#ifndef R_PPC64_PCREL34
107#define R_PPC64_PCREL34  132
108#endif
109
110#define have_isel  (cpuinfo & CPUINFO_ISEL)
111
112#define TCG_GUEST_BASE_REG  TCG_REG_R30
113
114#ifdef CONFIG_DEBUG_TCG
115static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
116    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
117    "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
118    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
119    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
120    "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
121    "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
122    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
123    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
124};
125#endif
126
127static const int tcg_target_reg_alloc_order[] = {
128    TCG_REG_R14,  /* call saved registers */
129    TCG_REG_R15,
130    TCG_REG_R16,
131    TCG_REG_R17,
132    TCG_REG_R18,
133    TCG_REG_R19,
134    TCG_REG_R20,
135    TCG_REG_R21,
136    TCG_REG_R22,
137    TCG_REG_R23,
138    TCG_REG_R24,
139    TCG_REG_R25,
140    TCG_REG_R26,
141    TCG_REG_R27,
142    TCG_REG_R28,
143    TCG_REG_R29,
144    TCG_REG_R30,
145    TCG_REG_R31,
146    TCG_REG_R12,  /* call clobbered, non-arguments */
147    TCG_REG_R11,
148    TCG_REG_R2,
149    TCG_REG_R13,
150    TCG_REG_R10,  /* call clobbered, arguments */
151    TCG_REG_R9,
152    TCG_REG_R8,
153    TCG_REG_R7,
154    TCG_REG_R6,
155    TCG_REG_R5,
156    TCG_REG_R4,
157    TCG_REG_R3,
158
159    /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
160    TCG_REG_V2,   /* call clobbered, vectors */
161    TCG_REG_V3,
162    TCG_REG_V4,
163    TCG_REG_V5,
164    TCG_REG_V6,
165    TCG_REG_V7,
166    TCG_REG_V8,
167    TCG_REG_V9,
168    TCG_REG_V10,
169    TCG_REG_V11,
170    TCG_REG_V12,
171    TCG_REG_V13,
172    TCG_REG_V14,
173    TCG_REG_V15,
174    TCG_REG_V16,
175    TCG_REG_V17,
176    TCG_REG_V18,
177    TCG_REG_V19,
178};
179
180static const int tcg_target_call_iarg_regs[] = {
181    TCG_REG_R3,
182    TCG_REG_R4,
183    TCG_REG_R5,
184    TCG_REG_R6,
185    TCG_REG_R7,
186    TCG_REG_R8,
187    TCG_REG_R9,
188    TCG_REG_R10
189};
190
191static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
192{
193    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
194    tcg_debug_assert(slot >= 0 && slot <= 1);
195    return TCG_REG_R3 + slot;
196}
197
198static const int tcg_target_callee_save_regs[] = {
199#ifdef _CALL_DARWIN
200    TCG_REG_R11,
201#endif
202    TCG_REG_R14,
203    TCG_REG_R15,
204    TCG_REG_R16,
205    TCG_REG_R17,
206    TCG_REG_R18,
207    TCG_REG_R19,
208    TCG_REG_R20,
209    TCG_REG_R21,
210    TCG_REG_R22,
211    TCG_REG_R23,
212    TCG_REG_R24,
213    TCG_REG_R25,
214    TCG_REG_R26,
215    TCG_REG_R27, /* currently used for the global env */
216    TCG_REG_R28,
217    TCG_REG_R29,
218    TCG_REG_R30,
219    TCG_REG_R31
220};
221
222/* For PPC, we use TB+4 instead of TB as the base. */
223static inline ptrdiff_t ppc_tbrel_diff(TCGContext *s, const void *target)
224{
225    return tcg_tbrel_diff(s, target) - 4;
226}
227
228static inline bool in_range_b(tcg_target_long target)
229{
230    return target == sextract64(target, 0, 26);
231}
232
233static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
234                               const tcg_insn_unit *target)
235{
236    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
237    tcg_debug_assert(in_range_b(disp));
238    return disp & 0x3fffffc;
239}
240
241static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
242{
243    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
244    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
245
246    if (in_range_b(disp)) {
247        *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc);
248        return true;
249    }
250    return false;
251}
252
253static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
254                               const tcg_insn_unit *target)
255{
256    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
257    tcg_debug_assert(disp == (int16_t) disp);
258    return disp & 0xfffc;
259}
260
261static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
262{
263    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
264    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
265
266    if (disp == (int16_t) disp) {
267        *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc);
268        return true;
269    }
270    return false;
271}
272
273static bool reloc_pc34(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
274{
275    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
276    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
277
278    if (disp == sextract64(disp, 0, 34)) {
279        src_rw[0] = (src_rw[0] & ~0x3ffff) | ((disp >> 16) & 0x3ffff);
280        src_rw[1] = (src_rw[1] & ~0xffff) | (disp & 0xffff);
281        return true;
282    }
283    return false;
284}
285
286static bool mask_operand(uint32_t c, int *mb, int *me);
287static bool mask64_operand(uint64_t c, int *mb, int *me);
288
289/* test if a constant matches the constraint */
290static bool tcg_target_const_match(int64_t sval, int ct,
291                                   TCGType type, TCGCond cond, int vece)
292{
293    uint64_t uval = sval;
294    int mb, me;
295
296    if (ct & TCG_CT_CONST) {
297        return 1;
298    }
299
300    if (type == TCG_TYPE_I32) {
301        uval = (uint32_t)sval;
302        sval = (int32_t)sval;
303    }
304
305    if (ct & TCG_CT_CONST_CMP) {
306        switch (cond) {
307        case TCG_COND_EQ:
308        case TCG_COND_NE:
309            ct |= TCG_CT_CONST_S16 | TCG_CT_CONST_U16;
310            break;
311        case TCG_COND_LT:
312        case TCG_COND_GE:
313        case TCG_COND_LE:
314        case TCG_COND_GT:
315            ct |= TCG_CT_CONST_S16;
316            break;
317        case TCG_COND_LTU:
318        case TCG_COND_GEU:
319        case TCG_COND_LEU:
320        case TCG_COND_GTU:
321            ct |= TCG_CT_CONST_U16;
322            break;
323        case TCG_COND_TSTEQ:
324        case TCG_COND_TSTNE:
325            if ((uval & ~0xffff) == 0 || (uval & ~0xffff0000ull) == 0) {
326                return 1;
327            }
328            if (uval == (uint32_t)uval && mask_operand(uval, &mb, &me)) {
329                return 1;
330            }
331            if (TCG_TARGET_REG_BITS == 64 &&
332                mask64_operand(uval << clz64(uval), &mb, &me)) {
333                return 1;
334            }
335            return 0;
336        default:
337            g_assert_not_reached();
338        }
339    }
340
341    if ((ct & TCG_CT_CONST_S16) && sval == (int16_t)sval) {
342        return 1;
343    }
344    if ((ct & TCG_CT_CONST_U16) && uval == (uint16_t)uval) {
345        return 1;
346    }
347    if ((ct & TCG_CT_CONST_S32) && sval == (int32_t)sval) {
348        return 1;
349    }
350    if ((ct & TCG_CT_CONST_U32) && uval == (uint32_t)uval) {
351        return 1;
352    }
353    if ((ct & TCG_CT_CONST_ZERO) && sval == 0) {
354        return 1;
355    }
356    if ((ct & TCG_CT_CONST_MONE) && sval == -1) {
357        return 1;
358    }
359    if ((ct & TCG_CT_CONST_WSZ) && sval == (type == TCG_TYPE_I32 ? 32 : 64)) {
360        return 1;
361    }
362    return 0;
363}
364
365#define OPCD(opc) ((opc)<<26)
366#define XO19(opc) (OPCD(19)|((opc)<<1))
367#define MD30(opc) (OPCD(30)|((opc)<<2))
368#define MDS30(opc) (OPCD(30)|((opc)<<1))
369#define XO31(opc) (OPCD(31)|((opc)<<1))
370#define XO58(opc) (OPCD(58)|(opc))
371#define XO62(opc) (OPCD(62)|(opc))
372#define VX4(opc)  (OPCD(4)|(opc))
373
374#define B      OPCD( 18)
375#define BC     OPCD( 16)
376
377#define LBZ    OPCD( 34)
378#define LHZ    OPCD( 40)
379#define LHA    OPCD( 42)
380#define LWZ    OPCD( 32)
381#define LWZUX  XO31( 55)
382#define LD     XO58(  0)
383#define LDX    XO31( 21)
384#define LDU    XO58(  1)
385#define LDUX   XO31( 53)
386#define LWA    XO58(  2)
387#define LWAX   XO31(341)
388#define LQ     OPCD( 56)
389
390#define STB    OPCD( 38)
391#define STH    OPCD( 44)
392#define STW    OPCD( 36)
393#define STD    XO62(  0)
394#define STDU   XO62(  1)
395#define STDX   XO31(149)
396#define STQ    XO62(  2)
397
398#define PLWA   OPCD( 41)
399#define PLD    OPCD( 57)
400#define PLXSD  OPCD( 42)
401#define PLXV   OPCD(25 * 2 + 1)  /* force tx=1 */
402
403#define PSTD   OPCD( 61)
404#define PSTXSD OPCD( 46)
405#define PSTXV  OPCD(27 * 2 + 1)  /* force sx=1 */
406
407#define ADDIC  OPCD( 12)
408#define ADDI   OPCD( 14)
409#define ADDIS  OPCD( 15)
410#define ORI    OPCD( 24)
411#define ORIS   OPCD( 25)
412#define XORI   OPCD( 26)
413#define XORIS  OPCD( 27)
414#define ANDI   OPCD( 28)
415#define ANDIS  OPCD( 29)
416#define MULLI  OPCD(  7)
417#define CMPLI  OPCD( 10)
418#define CMPI   OPCD( 11)
419#define SUBFIC OPCD( 8)
420
421#define LWZU   OPCD( 33)
422#define STWU   OPCD( 37)
423
424#define RLWIMI OPCD( 20)
425#define RLWINM OPCD( 21)
426#define RLWNM  OPCD( 23)
427
428#define RLDICL MD30(  0)
429#define RLDICR MD30(  1)
430#define RLDIMI MD30(  3)
431#define RLDCL  MDS30( 8)
432
433#define BCLR   XO19( 16)
434#define BCCTR  XO19(528)
435#define CRAND  XO19(257)
436#define CRANDC XO19(129)
437#define CRNAND XO19(225)
438#define CROR   XO19(449)
439#define CRNOR  XO19( 33)
440#define ADDPCIS XO19( 2)
441
442#define EXTSB  XO31(954)
443#define EXTSH  XO31(922)
444#define EXTSW  XO31(986)
445#define ADD    XO31(266)
446#define ADDE   XO31(138)
447#define ADDME  XO31(234)
448#define ADDZE  XO31(202)
449#define ADDC   XO31( 10)
450#define AND    XO31( 28)
451#define SUBF   XO31( 40)
452#define SUBFC  XO31(  8)
453#define SUBFE  XO31(136)
454#define SUBFME XO31(232)
455#define SUBFZE XO31(200)
456#define OR     XO31(444)
457#define XOR    XO31(316)
458#define MULLW  XO31(235)
459#define MULHW  XO31( 75)
460#define MULHWU XO31( 11)
461#define DIVW   XO31(491)
462#define DIVWU  XO31(459)
463#define MODSW  XO31(779)
464#define MODUW  XO31(267)
465#define CMP    XO31(  0)
466#define CMPL   XO31( 32)
467#define LHBRX  XO31(790)
468#define LWBRX  XO31(534)
469#define LDBRX  XO31(532)
470#define STHBRX XO31(918)
471#define STWBRX XO31(662)
472#define STDBRX XO31(660)
473#define MFSPR  XO31(339)
474#define MTSPR  XO31(467)
475#define SRAWI  XO31(824)
476#define NEG    XO31(104)
477#define MFCR   XO31( 19)
478#define MFOCRF (MFCR | (1u << 20))
479#define NOR    XO31(124)
480#define CNTLZW XO31( 26)
481#define CNTLZD XO31( 58)
482#define CNTTZW XO31(538)
483#define CNTTZD XO31(570)
484#define CNTPOPW XO31(378)
485#define CNTPOPD XO31(506)
486#define ANDC   XO31( 60)
487#define ORC    XO31(412)
488#define EQV    XO31(284)
489#define NAND   XO31(476)
490#define ISEL   XO31( 15)
491
492#define MULLD  XO31(233)
493#define MULHD  XO31( 73)
494#define MULHDU XO31(  9)
495#define DIVD   XO31(489)
496#define DIVDU  XO31(457)
497#define MODSD  XO31(777)
498#define MODUD  XO31(265)
499
500#define LBZX   XO31( 87)
501#define LHZX   XO31(279)
502#define LHAX   XO31(343)
503#define LWZX   XO31( 23)
504#define STBX   XO31(215)
505#define STHX   XO31(407)
506#define STWX   XO31(151)
507
508#define EIEIO  XO31(854)
509#define HWSYNC XO31(598)
510#define LWSYNC (HWSYNC | (1u << 21))
511
512#define SPR(a, b) ((((a)<<5)|(b))<<11)
513#define LR     SPR(8, 0)
514#define CTR    SPR(9, 0)
515
516#define SLW    XO31( 24)
517#define SRW    XO31(536)
518#define SRAW   XO31(792)
519
520#define SLD    XO31( 27)
521#define SRD    XO31(539)
522#define SRAD   XO31(794)
523#define SRADI  XO31(413<<1)
524
525#define BRH    XO31(219)
526#define BRW    XO31(155)
527#define BRD    XO31(187)
528
529#define TW     XO31( 4)
530#define TRAP   (TW | TO(31))
531
532#define SETBC    XO31(384)  /* v3.10 */
533#define SETBCR   XO31(416)  /* v3.10 */
534#define SETNBC   XO31(448)  /* v3.10 */
535#define SETNBCR  XO31(480)  /* v3.10 */
536
537#define NOP    ORI  /* ori 0,0,0 */
538
539#define LVX        XO31(103)
540#define LVEBX      XO31(7)
541#define LVEHX      XO31(39)
542#define LVEWX      XO31(71)
543#define LXSDX      (XO31(588) | 1)  /* v2.06, force tx=1 */
544#define LXVDSX     (XO31(332) | 1)  /* v2.06, force tx=1 */
545#define LXSIWZX    (XO31(12) | 1)   /* v2.07, force tx=1 */
546#define LXV        (OPCD(61) | 8 | 1)  /* v3.00, force tx=1 */
547#define LXSD       (OPCD(57) | 2)   /* v3.00 */
548#define LXVWSX     (XO31(364) | 1)  /* v3.00, force tx=1 */
549
550#define STVX       XO31(231)
551#define STVEWX     XO31(199)
552#define STXSDX     (XO31(716) | 1)  /* v2.06, force sx=1 */
553#define STXSIWX    (XO31(140) | 1)  /* v2.07, force sx=1 */
554#define STXV       (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
555#define STXSD      (OPCD(61) | 2)   /* v3.00 */
556
557#define VADDSBS    VX4(768)
558#define VADDUBS    VX4(512)
559#define VADDUBM    VX4(0)
560#define VADDSHS    VX4(832)
561#define VADDUHS    VX4(576)
562#define VADDUHM    VX4(64)
563#define VADDSWS    VX4(896)
564#define VADDUWS    VX4(640)
565#define VADDUWM    VX4(128)
566#define VADDUDM    VX4(192)       /* v2.07 */
567
568#define VSUBSBS    VX4(1792)
569#define VSUBUBS    VX4(1536)
570#define VSUBUBM    VX4(1024)
571#define VSUBSHS    VX4(1856)
572#define VSUBUHS    VX4(1600)
573#define VSUBUHM    VX4(1088)
574#define VSUBSWS    VX4(1920)
575#define VSUBUWS    VX4(1664)
576#define VSUBUWM    VX4(1152)
577#define VSUBUDM    VX4(1216)      /* v2.07 */
578
579#define VNEGW      (VX4(1538) | (6 << 16))  /* v3.00 */
580#define VNEGD      (VX4(1538) | (7 << 16))  /* v3.00 */
581
582#define VMAXSB     VX4(258)
583#define VMAXSH     VX4(322)
584#define VMAXSW     VX4(386)
585#define VMAXSD     VX4(450)       /* v2.07 */
586#define VMAXUB     VX4(2)
587#define VMAXUH     VX4(66)
588#define VMAXUW     VX4(130)
589#define VMAXUD     VX4(194)       /* v2.07 */
590#define VMINSB     VX4(770)
591#define VMINSH     VX4(834)
592#define VMINSW     VX4(898)
593#define VMINSD     VX4(962)       /* v2.07 */
594#define VMINUB     VX4(514)
595#define VMINUH     VX4(578)
596#define VMINUW     VX4(642)
597#define VMINUD     VX4(706)       /* v2.07 */
598
599#define VCMPEQUB   VX4(6)
600#define VCMPEQUH   VX4(70)
601#define VCMPEQUW   VX4(134)
602#define VCMPEQUD   VX4(199)       /* v2.07 */
603#define VCMPGTSB   VX4(774)
604#define VCMPGTSH   VX4(838)
605#define VCMPGTSW   VX4(902)
606#define VCMPGTSD   VX4(967)       /* v2.07 */
607#define VCMPGTUB   VX4(518)
608#define VCMPGTUH   VX4(582)
609#define VCMPGTUW   VX4(646)
610#define VCMPGTUD   VX4(711)       /* v2.07 */
611#define VCMPNEB    VX4(7)         /* v3.00 */
612#define VCMPNEH    VX4(71)        /* v3.00 */
613#define VCMPNEW    VX4(135)       /* v3.00 */
614
615#define VSLB       VX4(260)
616#define VSLH       VX4(324)
617#define VSLW       VX4(388)
618#define VSLD       VX4(1476)      /* v2.07 */
619#define VSRB       VX4(516)
620#define VSRH       VX4(580)
621#define VSRW       VX4(644)
622#define VSRD       VX4(1732)      /* v2.07 */
623#define VSRAB      VX4(772)
624#define VSRAH      VX4(836)
625#define VSRAW      VX4(900)
626#define VSRAD      VX4(964)       /* v2.07 */
627#define VRLB       VX4(4)
628#define VRLH       VX4(68)
629#define VRLW       VX4(132)
630#define VRLD       VX4(196)       /* v2.07 */
631
632#define VMULEUB    VX4(520)
633#define VMULEUH    VX4(584)
634#define VMULEUW    VX4(648)       /* v2.07 */
635#define VMULOUB    VX4(8)
636#define VMULOUH    VX4(72)
637#define VMULOUW    VX4(136)       /* v2.07 */
638#define VMULUWM    VX4(137)       /* v2.07 */
639#define VMULLD     VX4(457)       /* v3.10 */
640#define VMSUMUHM   VX4(38)
641
642#define VMRGHB     VX4(12)
643#define VMRGHH     VX4(76)
644#define VMRGHW     VX4(140)
645#define VMRGLB     VX4(268)
646#define VMRGLH     VX4(332)
647#define VMRGLW     VX4(396)
648
649#define VPKUHUM    VX4(14)
650#define VPKUWUM    VX4(78)
651
652#define VAND       VX4(1028)
653#define VANDC      VX4(1092)
654#define VNOR       VX4(1284)
655#define VOR        VX4(1156)
656#define VXOR       VX4(1220)
657#define VEQV       VX4(1668)      /* v2.07 */
658#define VNAND      VX4(1412)      /* v2.07 */
659#define VORC       VX4(1348)      /* v2.07 */
660
661#define VSPLTB     VX4(524)
662#define VSPLTH     VX4(588)
663#define VSPLTW     VX4(652)
664#define VSPLTISB   VX4(780)
665#define VSPLTISH   VX4(844)
666#define VSPLTISW   VX4(908)
667
668#define VSLDOI     VX4(44)
669
670#define XXPERMDI   (OPCD(60) | (10 << 3) | 7)  /* v2.06, force ax=bx=tx=1 */
671#define XXSEL      (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
672#define XXSPLTIB   (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
673
674#define MFVSRD     (XO31(51) | 1)   /* v2.07, force sx=1 */
675#define MFVSRWZ    (XO31(115) | 1)  /* v2.07, force sx=1 */
676#define MTVSRD     (XO31(179) | 1)  /* v2.07, force tx=1 */
677#define MTVSRWZ    (XO31(243) | 1)  /* v2.07, force tx=1 */
678#define MTVSRDD    (XO31(435) | 1)  /* v3.00, force tx=1 */
679#define MTVSRWS    (XO31(403) | 1)  /* v3.00, force tx=1 */
680
681#define RT(r) ((r)<<21)
682#define RS(r) ((r)<<21)
683#define RA(r) ((r)<<16)
684#define RB(r) ((r)<<11)
685#define TO(t) ((t)<<21)
686#define SH(s) ((s)<<11)
687#define MB(b) ((b)<<6)
688#define ME(e) ((e)<<1)
689#define BO(o) ((o)<<21)
690#define MB64(b) ((b)<<5)
691#define FXM(b) (1 << (19 - (b)))
692
693#define VRT(r)  (((r) & 31) << 21)
694#define VRA(r)  (((r) & 31) << 16)
695#define VRB(r)  (((r) & 31) << 11)
696#define VRC(r)  (((r) & 31) <<  6)
697
698#define LK    1
699
700#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
701#define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
702#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
703#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
704
705#define BF(n)    ((n)<<23)
706#define BI(n, c) (((c)+((n)*4))<<16)
707#define BT(n, c) (((c)+((n)*4))<<21)
708#define BA(n, c) (((c)+((n)*4))<<16)
709#define BB(n, c) (((c)+((n)*4))<<11)
710#define BC_(n, c) (((c)+((n)*4))<<6)
711
712#define BO_COND_TRUE  BO(12)
713#define BO_COND_FALSE BO( 4)
714#define BO_ALWAYS     BO(20)
715
716enum {
717    CR_LT,
718    CR_GT,
719    CR_EQ,
720    CR_SO
721};
722
723static const uint32_t tcg_to_bc[16] = {
724    [TCG_COND_EQ]  = BC | BI(0, CR_EQ) | BO_COND_TRUE,
725    [TCG_COND_NE]  = BC | BI(0, CR_EQ) | BO_COND_FALSE,
726    [TCG_COND_TSTEQ]  = BC | BI(0, CR_EQ) | BO_COND_TRUE,
727    [TCG_COND_TSTNE]  = BC | BI(0, CR_EQ) | BO_COND_FALSE,
728    [TCG_COND_LT]  = BC | BI(0, CR_LT) | BO_COND_TRUE,
729    [TCG_COND_GE]  = BC | BI(0, CR_LT) | BO_COND_FALSE,
730    [TCG_COND_LE]  = BC | BI(0, CR_GT) | BO_COND_FALSE,
731    [TCG_COND_GT]  = BC | BI(0, CR_GT) | BO_COND_TRUE,
732    [TCG_COND_LTU] = BC | BI(0, CR_LT) | BO_COND_TRUE,
733    [TCG_COND_GEU] = BC | BI(0, CR_LT) | BO_COND_FALSE,
734    [TCG_COND_LEU] = BC | BI(0, CR_GT) | BO_COND_FALSE,
735    [TCG_COND_GTU] = BC | BI(0, CR_GT) | BO_COND_TRUE,
736};
737
738/* The low bit here is set if the RA and RB fields must be inverted.  */
739static const uint32_t tcg_to_isel[16] = {
740    [TCG_COND_EQ]  = ISEL | BC_(0, CR_EQ),
741    [TCG_COND_NE]  = ISEL | BC_(0, CR_EQ) | 1,
742    [TCG_COND_TSTEQ] = ISEL | BC_(0, CR_EQ),
743    [TCG_COND_TSTNE] = ISEL | BC_(0, CR_EQ) | 1,
744    [TCG_COND_LT]  = ISEL | BC_(0, CR_LT),
745    [TCG_COND_GE]  = ISEL | BC_(0, CR_LT) | 1,
746    [TCG_COND_LE]  = ISEL | BC_(0, CR_GT) | 1,
747    [TCG_COND_GT]  = ISEL | BC_(0, CR_GT),
748    [TCG_COND_LTU] = ISEL | BC_(0, CR_LT),
749    [TCG_COND_GEU] = ISEL | BC_(0, CR_LT) | 1,
750    [TCG_COND_LEU] = ISEL | BC_(0, CR_GT) | 1,
751    [TCG_COND_GTU] = ISEL | BC_(0, CR_GT),
752};
753
754static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
755                        intptr_t value, intptr_t addend)
756{
757    const tcg_insn_unit *target;
758    int16_t lo;
759    int32_t hi;
760
761    value += addend;
762    target = (const tcg_insn_unit *)value;
763
764    switch (type) {
765    case R_PPC_REL14:
766        return reloc_pc14(code_ptr, target);
767    case R_PPC_REL24:
768        return reloc_pc24(code_ptr, target);
769    case R_PPC64_PCREL34:
770        return reloc_pc34(code_ptr, target);
771    case R_PPC_ADDR16:
772        /*
773         * We are (slightly) abusing this relocation type.  In particular,
774         * assert that the low 2 bits are zero, and do not modify them.
775         * That way we can use this with LD et al that have opcode bits
776         * in the low 2 bits of the insn.
777         */
778        if ((value & 3) || value != (int16_t)value) {
779            return false;
780        }
781        *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
782        break;
783    case R_PPC_ADDR32:
784        /*
785         * We are abusing this relocation type.  Again, this points to
786         * a pair of insns, lis + load.  This is an absolute address
787         * relocation for PPC32 so the lis cannot be removed.
788         */
789        lo = value;
790        hi = value - lo;
791        if (hi + lo != value) {
792            return false;
793        }
794        code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
795        code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
796        break;
797    default:
798        g_assert_not_reached();
799    }
800    return true;
801}
802
803/* Ensure that the prefixed instruction does not cross a 64-byte boundary. */
804static bool tcg_out_need_prefix_align(TCGContext *s)
805{
806    return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c;
807}
808
809static void tcg_out_prefix_align(TCGContext *s)
810{
811    if (tcg_out_need_prefix_align(s)) {
812        tcg_out32(s, NOP);
813    }
814}
815
816static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target)
817{
818    return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0);
819}
820
821/* Output Type 00 Prefix - 8-Byte Load/Store Form (8LS:D) */
822static void tcg_out_8ls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
823                          unsigned ra, tcg_target_long imm, bool r)
824{
825    tcg_insn_unit p, i;
826
827    p = OPCD(1) | (r << 20) | ((imm >> 16) & 0x3ffff);
828    i = opc | TAI(rt, ra, imm);
829
830    tcg_out_prefix_align(s);
831    tcg_out32(s, p);
832    tcg_out32(s, i);
833}
834
835/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */
836static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
837                          unsigned ra, tcg_target_long imm, bool r)
838{
839    tcg_insn_unit p, i;
840
841    p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff);
842    i = opc | TAI(rt, ra, imm);
843
844    tcg_out_prefix_align(s);
845    tcg_out32(s, p);
846    tcg_out32(s, i);
847}
848
849static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
850                             TCGReg base, tcg_target_long offset);
851
852static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
853{
854    if (ret == arg) {
855        return true;
856    }
857    switch (type) {
858    case TCG_TYPE_I64:
859        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
860        /* fallthru */
861    case TCG_TYPE_I32:
862        if (ret < TCG_REG_V0) {
863            if (arg < TCG_REG_V0) {
864                tcg_out32(s, OR | SAB(arg, ret, arg));
865                break;
866            } else if (have_isa_2_07) {
867                tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
868                          | VRT(arg) | RA(ret));
869                break;
870            } else {
871                /* Altivec does not support vector->integer moves.  */
872                return false;
873            }
874        } else if (arg < TCG_REG_V0) {
875            if (have_isa_2_07) {
876                tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
877                          | VRT(ret) | RA(arg));
878                break;
879            } else {
880                /* Altivec does not support integer->vector moves.  */
881                return false;
882            }
883        }
884        /* fallthru */
885    case TCG_TYPE_V64:
886    case TCG_TYPE_V128:
887        tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
888        tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
889        break;
890    default:
891        g_assert_not_reached();
892    }
893    return true;
894}
895
896static void tcg_out_rld_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs,
897                           int sh, int mb, bool rc)
898{
899    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
900    sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
901    mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
902    tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb | rc);
903}
904
905static void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
906                        int sh, int mb)
907{
908    tcg_out_rld_rc(s, op, ra, rs, sh, mb, false);
909}
910
911static void tcg_out_rlw_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs,
912                           int sh, int mb, int me, bool rc)
913{
914    tcg_debug_assert((mb & 0x1f) == mb);
915    tcg_debug_assert((me & 0x1f) == me);
916    tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh & 0x1f) | MB(mb) | ME(me) | rc);
917}
918
919static void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
920                        int sh, int mb, int me)
921{
922    tcg_out_rlw_rc(s, op, ra, rs, sh, mb, me, false);
923}
924
925static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
926{
927    tcg_out32(s, EXTSB | RA(dst) | RS(src));
928}
929
930static void tcg_out_ext8u(TCGContext *s, TCGReg dst, TCGReg src)
931{
932    tcg_out32(s, ANDI | SAI(src, dst, 0xff));
933}
934
935static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
936{
937    tcg_out32(s, EXTSH | RA(dst) | RS(src));
938}
939
940static void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src)
941{
942    tcg_out32(s, ANDI | SAI(src, dst, 0xffff));
943}
944
945static void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src)
946{
947    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
948    tcg_out32(s, EXTSW | RA(dst) | RS(src));
949}
950
951static void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
952{
953    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
954    tcg_out_rld(s, RLDICL, dst, src, 0, 32);
955}
956
957static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dst, TCGReg src)
958{
959    tcg_out_ext32s(s, dst, src);
960}
961
962static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dst, TCGReg src)
963{
964    tcg_out_ext32u(s, dst, src);
965}
966
967static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
968{
969    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
970    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
971}
972
973static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
974{
975    tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
976}
977
978static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
979{
980    tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
981}
982
983static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c)
984{
985    /* Limit immediate shift count lest we create an illegal insn.  */
986    tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31));
987}
988
989static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
990{
991    tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
992}
993
994static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
995{
996    tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
997}
998
999static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
1000{
1001    tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
1002}
1003
1004static void tcg_out_addpcis(TCGContext *s, TCGReg dst, intptr_t imm)
1005{
1006    uint32_t d0, d1, d2;
1007
1008    tcg_debug_assert((imm & 0xffff) == 0);
1009    tcg_debug_assert(imm == (int32_t)imm);
1010
1011    d2 = extract32(imm, 16, 1);
1012    d1 = extract32(imm, 17, 5);
1013    d0 = extract32(imm, 22, 10);
1014    tcg_out32(s, ADDPCIS | RT(dst) | (d1 << 16) | (d0 << 6) | d2);
1015}
1016
1017static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags)
1018{
1019    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
1020
1021    if (have_isa_3_10) {
1022        tcg_out32(s, BRH | RA(dst) | RS(src));
1023        if (flags & TCG_BSWAP_OS) {
1024            tcg_out_ext16s(s, TCG_TYPE_REG, dst, dst);
1025        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
1026            tcg_out_ext16u(s, dst, dst);
1027        }
1028        return;
1029    }
1030
1031    /*
1032     * In the following,
1033     *   dep(a, b, m) -> (a & ~m) | (b & m)
1034     *
1035     * Begin with:                              src = xxxxabcd
1036     */
1037    /* tmp = rol32(src, 24) & 0x000000ff            = 0000000c */
1038    tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31);
1039    /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00)    = 000000dc */
1040    tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);
1041
1042    if (flags & TCG_BSWAP_OS) {
1043        tcg_out_ext16s(s, TCG_TYPE_REG, dst, tmp);
1044    } else {
1045        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
1046    }
1047}
1048
1049static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags)
1050{
1051    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
1052
1053    if (have_isa_3_10) {
1054        tcg_out32(s, BRW | RA(dst) | RS(src));
1055        if (flags & TCG_BSWAP_OS) {
1056            tcg_out_ext32s(s, dst, dst);
1057        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
1058            tcg_out_ext32u(s, dst, dst);
1059        }
1060        return;
1061    }
1062
1063    /*
1064     * Stolen from gcc's builtin_bswap32.
1065     * In the following,
1066     *   dep(a, b, m) -> (a & ~m) | (b & m)
1067     *
1068     * Begin with:                              src = xxxxabcd
1069     */
1070    /* tmp = rol32(src, 8) & 0xffffffff             = 0000bcda */
1071    tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31);
1072    /* tmp = dep(tmp, rol32(src, 24), 0xff000000)   = 0000dcda */
1073    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7);
1074    /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00)   = 0000dcba */
1075    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23);
1076
1077    if (flags & TCG_BSWAP_OS) {
1078        tcg_out_ext32s(s, dst, tmp);
1079    } else {
1080        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
1081    }
1082}
1083
1084static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src)
1085{
1086    TCGReg t0 = dst == src ? TCG_REG_R0 : dst;
1087    TCGReg t1 = dst == src ? dst : TCG_REG_R0;
1088
1089    if (have_isa_3_10) {
1090        tcg_out32(s, BRD | RA(dst) | RS(src));
1091        return;
1092    }
1093
1094    /*
1095     * In the following,
1096     *   dep(a, b, m) -> (a & ~m) | (b & m)
1097     *
1098     * Begin with:                              src = abcdefgh
1099     */
1100    /* t0 = rol32(src, 8) & 0xffffffff              = 0000fghe */
1101    tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31);
1102    /* t0 = dep(t0, rol32(src, 24), 0xff000000)     = 0000hghe */
1103    tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7);
1104    /* t0 = dep(t0, rol32(src, 24), 0x0000ff00)     = 0000hgfe */
1105    tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23);
1106
1107    /* t0 = rol64(t0, 32)                           = hgfe0000 */
1108    tcg_out_rld(s, RLDICL, t0, t0, 32, 0);
1109    /* t1 = rol64(src, 32)                          = efghabcd */
1110    tcg_out_rld(s, RLDICL, t1, src, 32, 0);
1111
1112    /* t0 = dep(t0, rol32(t1, 24), 0xffffffff)      = hgfebcda */
1113    tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31);
1114    /* t0 = dep(t0, rol32(t1, 24), 0xff000000)      = hgfedcda */
1115    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7);
1116    /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00)      = hgfedcba */
1117    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23);
1118
1119    tcg_out_mov(s, TCG_TYPE_REG, dst, t0);
1120}
1121
1122/* Emit a move into ret of arg, if it can be done in one insn.  */
1123static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
1124{
1125    if (arg == (int16_t)arg) {
1126        tcg_out32(s, ADDI | TAI(ret, 0, arg));
1127        return true;
1128    }
1129    if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
1130        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
1131        return true;
1132    }
1133    return false;
1134}
1135
1136static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
1137                             tcg_target_long arg, bool in_prologue)
1138{
1139    intptr_t tb_diff;
1140    tcg_target_long tmp;
1141    int shift;
1142
1143    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1144
1145    if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1146        arg = (int32_t)arg;
1147    }
1148
1149    /* Load 16-bit immediates with one insn.  */
1150    if (tcg_out_movi_one(s, ret, arg)) {
1151        return;
1152    }
1153
1154    /* Load addresses within the TB with one insn.  */
1155    tb_diff = ppc_tbrel_diff(s, (void *)arg);
1156    if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
1157        tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
1158        return;
1159    }
1160
1161    /*
1162     * Load values up to 34 bits, and pc-relative addresses,
1163     * with one prefixed insn.
1164     */
1165    if (have_isa_3_10) {
1166        if (arg == sextract64(arg, 0, 34)) {
1167            /* pli ret,value = paddi ret,0,value,0 */
1168            tcg_out_mls_d(s, ADDI, ret, 0, arg, 0);
1169            return;
1170        }
1171
1172        tmp = tcg_pcrel_diff_for_prefix(s, (void *)arg);
1173        if (tmp == sextract64(tmp, 0, 34)) {
1174            /* pla ret,value = paddi ret,0,value,1 */
1175            tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1);
1176            return;
1177        }
1178    }
1179
1180    /* Load 32-bit immediates with two insns.  Note that we've already
1181       eliminated bare ADDIS, so we know both insns are required.  */
1182    if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
1183        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
1184        tcg_out32(s, ORI | SAI(ret, ret, arg));
1185        return;
1186    }
1187    if (arg == (uint32_t)arg && !(arg & 0x8000)) {
1188        tcg_out32(s, ADDI | TAI(ret, 0, arg));
1189        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1190        return;
1191    }
1192
1193    /* Load masked 16-bit value.  */
1194    if (arg > 0 && (arg & 0x8000)) {
1195        tmp = arg | 0x7fff;
1196        if ((tmp & (tmp + 1)) == 0) {
1197            int mb = clz64(tmp + 1) + 1;
1198            tcg_out32(s, ADDI | TAI(ret, 0, arg));
1199            tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
1200            return;
1201        }
1202    }
1203
1204    /* Load common masks with 2 insns.  */
1205    shift = ctz64(arg);
1206    tmp = arg >> shift;
1207    if (tmp == (int16_t)tmp) {
1208        tcg_out32(s, ADDI | TAI(ret, 0, tmp));
1209        tcg_out_shli64(s, ret, ret, shift);
1210        return;
1211    }
1212    shift = clz64(arg);
1213    if (tcg_out_movi_one(s, ret, arg << shift)) {
1214        tcg_out_shri64(s, ret, ret, shift);
1215        return;
1216    }
1217
1218    /* Load addresses within 2GB with 2 insns. */
1219    if (have_isa_3_00) {
1220        intptr_t hi = tcg_pcrel_diff(s, (void *)arg) - 4;
1221        int16_t lo = hi;
1222
1223        hi -= lo;
1224        if (hi == (int32_t)hi) {
1225            tcg_out_addpcis(s, TCG_REG_TMP2, hi);
1226            tcg_out32(s, ADDI | TAI(ret, TCG_REG_TMP2, lo));
1227            return;
1228        }
1229    }
1230
1231    /* Load addresses within 2GB of TB with 2 (or rarely 3) insns.  */
1232    if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
1233        tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
1234        return;
1235    }
1236
1237    /* Use the constant pool, if possible.  */
1238    if (!in_prologue && USE_REG_TB) {
1239        new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
1240                       ppc_tbrel_diff(s, NULL));
1241        tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
1242        return;
1243    }
1244    if (have_isa_3_10) {
1245        tcg_out_8ls_d(s, PLD, ret, 0, 0, 1);
1246        new_pool_label(s, arg, R_PPC64_PCREL34, s->code_ptr - 2, 0);
1247        return;
1248    }
1249    if (have_isa_3_00) {
1250        tcg_out_addpcis(s, TCG_REG_TMP2, 0);
1251        new_pool_label(s, arg, R_PPC_REL14, s->code_ptr, 0);
1252        tcg_out32(s, LD | TAI(ret, TCG_REG_TMP2, 0));
1253        return;
1254    }
1255
1256    tmp = arg >> 31 >> 1;
1257    tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
1258    if (tmp) {
1259        tcg_out_shli64(s, ret, ret, 32);
1260    }
1261    if (arg & 0xffff0000) {
1262        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1263    }
1264    if (arg & 0xffff) {
1265        tcg_out32(s, ORI | SAI(ret, ret, arg));
1266    }
1267}
1268
1269static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
1270                             TCGReg ret, int64_t val)
1271{
1272    uint32_t load_insn;
1273    int rel, low;
1274    intptr_t add;
1275
1276    switch (vece) {
1277    case MO_8:
1278        low = (int8_t)val;
1279        if (low >= -16 && low < 16) {
1280            tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
1281            return;
1282        }
1283        if (have_isa_3_00) {
1284            tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
1285            return;
1286        }
1287        break;
1288
1289    case MO_16:
1290        low = (int16_t)val;
1291        if (low >= -16 && low < 16) {
1292            tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
1293            return;
1294        }
1295        break;
1296
1297    case MO_32:
1298        low = (int32_t)val;
1299        if (low >= -16 && low < 16) {
1300            tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
1301            return;
1302        }
1303        break;
1304    }
1305
1306    /*
1307     * Otherwise we must load the value from the constant pool.
1308     */
1309    if (USE_REG_TB) {
1310        rel = R_PPC_ADDR16;
1311        add = ppc_tbrel_diff(s, NULL);
1312    } else if (have_isa_3_10) {
1313        if (type == TCG_TYPE_V64) {
1314            tcg_out_8ls_d(s, PLXSD, ret & 31, 0, 0, 1);
1315            new_pool_label(s, val, R_PPC64_PCREL34, s->code_ptr - 2, 0);
1316        } else {
1317            tcg_out_8ls_d(s, PLXV, ret & 31, 0, 0, 1);
1318            new_pool_l2(s, R_PPC64_PCREL34, s->code_ptr - 2, 0, val, val);
1319        }
1320        return;
1321    } else if (have_isa_3_00) {
1322        tcg_out_addpcis(s, TCG_REG_TMP1, 0);
1323        rel = R_PPC_REL14;
1324        add = 0;
1325    } else {
1326        rel = R_PPC_ADDR32;
1327        add = 0;
1328    }
1329
1330    if (have_vsx) {
1331        load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
1332        load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
1333        if (TCG_TARGET_REG_BITS == 64) {
1334            new_pool_label(s, val, rel, s->code_ptr, add);
1335        } else {
1336            new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val);
1337        }
1338    } else {
1339        load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
1340        if (TCG_TARGET_REG_BITS == 64) {
1341            new_pool_l2(s, rel, s->code_ptr, add, val, val);
1342        } else {
1343            new_pool_l4(s, rel, s->code_ptr, add,
1344                        val >> 32, val, val >> 32, val);
1345        }
1346    }
1347
1348    if (USE_REG_TB) {
1349        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
1350        load_insn |= RA(TCG_REG_TB);
1351    } else if (have_isa_3_00) {
1352        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1353    } else {
1354        tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
1355        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1356    }
1357    tcg_out32(s, load_insn);
1358}
1359
1360static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
1361                         tcg_target_long arg)
1362{
1363    switch (type) {
1364    case TCG_TYPE_I32:
1365    case TCG_TYPE_I64:
1366        tcg_debug_assert(ret < TCG_REG_V0);
1367        tcg_out_movi_int(s, type, ret, arg, false);
1368        break;
1369
1370    default:
1371        g_assert_not_reached();
1372    }
1373}
1374
1375static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1376{
1377    return false;
1378}
1379
1380static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1381                             tcg_target_long imm)
1382{
1383    /* This function is only used for passing structs by reference. */
1384    g_assert_not_reached();
1385}
1386
1387static bool mask_operand(uint32_t c, int *mb, int *me)
1388{
1389    uint32_t lsb, test;
1390
1391    /* Accept a bit pattern like:
1392           0....01....1
1393           1....10....0
1394           0..01..10..0
1395       Keep track of the transitions.  */
1396    if (c == 0 || c == -1) {
1397        return false;
1398    }
1399    test = c;
1400    lsb = test & -test;
1401    test += lsb;
1402    if (test & (test - 1)) {
1403        return false;
1404    }
1405
1406    *me = clz32(lsb);
1407    *mb = test ? clz32(test & -test) + 1 : 0;
1408    return true;
1409}
1410
1411static bool mask64_operand(uint64_t c, int *mb, int *me)
1412{
1413    uint64_t lsb;
1414
1415    if (c == 0) {
1416        return false;
1417    }
1418
1419    lsb = c & -c;
1420    /* Accept 1..10..0.  */
1421    if (c == -lsb) {
1422        *mb = 0;
1423        *me = clz64(lsb);
1424        return true;
1425    }
1426    /* Accept 0..01..1.  */
1427    if (lsb == 1 && (c & (c + 1)) == 0) {
1428        *mb = clz64(c + 1) + 1;
1429        *me = 63;
1430        return true;
1431    }
1432    return false;
1433}
1434
1435static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1436{
1437    int mb, me;
1438
1439    if (mask_operand(c, &mb, &me)) {
1440        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
1441    } else if ((c & 0xffff) == c) {
1442        tcg_out32(s, ANDI | SAI(src, dst, c));
1443        return;
1444    } else if ((c & 0xffff0000) == c) {
1445        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1446        return;
1447    } else {
1448        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
1449        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1450    }
1451}
1452
1453static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
1454{
1455    int mb, me;
1456
1457    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1458    if (mask64_operand(c, &mb, &me)) {
1459        if (mb == 0) {
1460            tcg_out_rld(s, RLDICR, dst, src, 0, me);
1461        } else {
1462            tcg_out_rld(s, RLDICL, dst, src, 0, mb);
1463        }
1464    } else if ((c & 0xffff) == c) {
1465        tcg_out32(s, ANDI | SAI(src, dst, c));
1466        return;
1467    } else if ((c & 0xffff0000) == c) {
1468        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1469        return;
1470    } else {
1471        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
1472        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1473    }
1474}
1475
1476static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
1477                           int op_lo, int op_hi)
1478{
1479    if (c >> 16) {
1480        tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
1481        src = dst;
1482    }
1483    if (c & 0xffff) {
1484        tcg_out32(s, op_lo | SAI(src, dst, c));
1485        src = dst;
1486    }
1487}
1488
1489static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1490{
1491    tcg_out_zori32(s, dst, src, c, ORI, ORIS);
1492}
1493
1494static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1495{
1496    tcg_out_zori32(s, dst, src, c, XORI, XORIS);
1497}
1498
1499static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target)
1500{
1501    ptrdiff_t disp = tcg_pcrel_diff(s, target);
1502    if (in_range_b(disp)) {
1503        tcg_out32(s, B | (disp & 0x3fffffc) | mask);
1504    } else {
1505        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
1506        tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
1507        tcg_out32(s, BCCTR | BO_ALWAYS | mask);
1508    }
1509}
1510
1511static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
1512                             TCGReg base, tcg_target_long offset)
1513{
1514    tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
1515    bool is_int_store = false;
1516    TCGReg rs = TCG_REG_TMP1;
1517
1518    switch (opi) {
1519    case LD: case LWA:
1520        align = 3;
1521        /* FALLTHRU */
1522    default:
1523        if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
1524            rs = rt;
1525            break;
1526        }
1527        break;
1528    case LXSD:
1529    case STXSD:
1530        align = 3;
1531        break;
1532    case LXV:
1533    case STXV:
1534        align = 15;
1535        break;
1536    case STD:
1537        align = 3;
1538        /* FALLTHRU */
1539    case STB: case STH: case STW:
1540        is_int_store = true;
1541        break;
1542    }
1543
1544    /* For unaligned or large offsets, use the prefixed form. */
1545    if (have_isa_3_10
1546        && (offset != (int16_t)offset || (offset & align))
1547        && offset == sextract64(offset, 0, 34)) {
1548        /*
1549         * Note that the MLS:D insns retain their un-prefixed opcode,
1550         * while the 8LS:D insns use a different opcode space.
1551         */
1552        switch (opi) {
1553        case LBZ:
1554        case LHZ:
1555        case LHA:
1556        case LWZ:
1557        case STB:
1558        case STH:
1559        case STW:
1560        case ADDI:
1561            tcg_out_mls_d(s, opi, rt, base, offset, 0);
1562            return;
1563        case LWA:
1564            tcg_out_8ls_d(s, PLWA, rt, base, offset, 0);
1565            return;
1566        case LD:
1567            tcg_out_8ls_d(s, PLD, rt, base, offset, 0);
1568            return;
1569        case STD:
1570            tcg_out_8ls_d(s, PSTD, rt, base, offset, 0);
1571            return;
1572        case LXSD:
1573            tcg_out_8ls_d(s, PLXSD, rt & 31, base, offset, 0);
1574            return;
1575        case STXSD:
1576            tcg_out_8ls_d(s, PSTXSD, rt & 31, base, offset, 0);
1577            return;
1578        case LXV:
1579            tcg_out_8ls_d(s, PLXV, rt & 31, base, offset, 0);
1580            return;
1581        case STXV:
1582            tcg_out_8ls_d(s, PSTXV, rt & 31, base, offset, 0);
1583            return;
1584        }
1585    }
1586
1587    /* For unaligned, or very large offsets, use the indexed form.  */
1588    if (offset & align || offset != (int32_t)offset || opi == 0) {
1589        if (rs == base) {
1590            rs = TCG_REG_R0;
1591        }
1592        tcg_debug_assert(!is_int_store || rs != rt);
1593        tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
1594        tcg_out32(s, opx | TAB(rt & 31, base, rs));
1595        return;
1596    }
1597
1598    l0 = (int16_t)offset;
1599    offset = (offset - l0) >> 16;
1600    l1 = (int16_t)offset;
1601
1602    if (l1 < 0 && orig >= 0) {
1603        extra = 0x4000;
1604        l1 = (int16_t)(offset - 0x4000);
1605    }
1606    if (l1) {
1607        tcg_out32(s, ADDIS | TAI(rs, base, l1));
1608        base = rs;
1609    }
1610    if (extra) {
1611        tcg_out32(s, ADDIS | TAI(rs, base, extra));
1612        base = rs;
1613    }
1614    if (opi != ADDI || base != rt || l0 != 0) {
1615        tcg_out32(s, opi | TAI(rt & 31, base, l0));
1616    }
1617}
1618
1619static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
1620                           TCGReg va, TCGReg vb, int shb)
1621{
1622    tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
1623}
1624
1625static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1626                       TCGReg base, intptr_t offset)
1627{
1628    int shift;
1629
1630    switch (type) {
1631    case TCG_TYPE_I32:
1632        if (ret < TCG_REG_V0) {
1633            tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
1634            break;
1635        }
1636        if (have_isa_2_07 && have_vsx) {
1637            tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
1638            break;
1639        }
1640        tcg_debug_assert((offset & 3) == 0);
1641        tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
1642        shift = (offset - 4) & 0xc;
1643        if (shift) {
1644            tcg_out_vsldoi(s, ret, ret, ret, shift);
1645        }
1646        break;
1647    case TCG_TYPE_I64:
1648        if (ret < TCG_REG_V0) {
1649            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1650            tcg_out_mem_long(s, LD, LDX, ret, base, offset);
1651            break;
1652        }
1653        /* fallthru */
1654    case TCG_TYPE_V64:
1655        tcg_debug_assert(ret >= TCG_REG_V0);
1656        if (have_vsx) {
1657            tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
1658                             ret, base, offset);
1659            break;
1660        }
1661        tcg_debug_assert((offset & 7) == 0);
1662        tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
1663        if (offset & 8) {
1664            tcg_out_vsldoi(s, ret, ret, ret, 8);
1665        }
1666        break;
1667    case TCG_TYPE_V128:
1668        tcg_debug_assert(ret >= TCG_REG_V0);
1669        tcg_debug_assert((offset & 15) == 0);
1670        tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
1671                         LVX, ret, base, offset);
1672        break;
1673    default:
1674        g_assert_not_reached();
1675    }
1676}
1677
1678static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1679                              TCGReg base, intptr_t offset)
1680{
1681    int shift;
1682
1683    switch (type) {
1684    case TCG_TYPE_I32:
1685        if (arg < TCG_REG_V0) {
1686            tcg_out_mem_long(s, STW, STWX, arg, base, offset);
1687            break;
1688        }
1689        if (have_isa_2_07 && have_vsx) {
1690            tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
1691            break;
1692        }
1693        assert((offset & 3) == 0);
1694        tcg_debug_assert((offset & 3) == 0);
1695        shift = (offset - 4) & 0xc;
1696        if (shift) {
1697            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
1698            arg = TCG_VEC_TMP1;
1699        }
1700        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1701        break;
1702    case TCG_TYPE_I64:
1703        if (arg < TCG_REG_V0) {
1704            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1705            tcg_out_mem_long(s, STD, STDX, arg, base, offset);
1706            break;
1707        }
1708        /* fallthru */
1709    case TCG_TYPE_V64:
1710        tcg_debug_assert(arg >= TCG_REG_V0);
1711        if (have_vsx) {
1712            tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
1713                             STXSDX, arg, base, offset);
1714            break;
1715        }
1716        tcg_debug_assert((offset & 7) == 0);
1717        if (offset & 8) {
1718            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
1719            arg = TCG_VEC_TMP1;
1720        }
1721        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1722        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
1723        break;
1724    case TCG_TYPE_V128:
1725        tcg_debug_assert(arg >= TCG_REG_V0);
1726        tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
1727                         STVX, arg, base, offset);
1728        break;
1729    default:
1730        g_assert_not_reached();
1731    }
1732}
1733
1734static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1735                               TCGReg base, intptr_t ofs)
1736{
1737    return false;
1738}
1739
1740/*
1741 * Set dest non-zero if and only if (arg1 & arg2) is non-zero.
1742 * If RC, then also set RC0.
1743 */
1744static void tcg_out_test(TCGContext *s, TCGReg dest, TCGReg arg1, TCGArg arg2,
1745                         bool const_arg2, TCGType type, bool rc)
1746{
1747    int mb, me;
1748
1749    if (!const_arg2) {
1750        tcg_out32(s, AND | SAB(arg1, dest, arg2) | rc);
1751        return;
1752    }
1753
1754    if (type == TCG_TYPE_I32) {
1755        arg2 = (uint32_t)arg2;
1756    }
1757
1758    if ((arg2 & ~0xffff) == 0) {
1759        tcg_out32(s, ANDI | SAI(arg1, dest, arg2));
1760        return;
1761    }
1762    if ((arg2 & ~0xffff0000ull) == 0) {
1763        tcg_out32(s, ANDIS | SAI(arg1, dest, arg2 >> 16));
1764        return;
1765    }
1766    if (arg2 == (uint32_t)arg2 && mask_operand(arg2, &mb, &me)) {
1767        tcg_out_rlw_rc(s, RLWINM, dest, arg1, 0, mb, me, rc);
1768        return;
1769    }
1770    if (TCG_TARGET_REG_BITS == 64) {
1771        int sh = clz64(arg2);
1772        if (mask64_operand(arg2 << sh, &mb, &me)) {
1773            tcg_out_rld_rc(s, RLDICR, dest, arg1, sh, me, rc);
1774            return;
1775        }
1776    }
1777    /* Constraints should satisfy this. */
1778    g_assert_not_reached();
1779}
1780
1781static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
1782                        int const_arg2, int cr, TCGType type)
1783{
1784    int imm;
1785    uint32_t op;
1786
1787    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1788
1789    /*
1790     * Simplify the comparisons below wrt CMPI.
1791     * All of the tests are 16-bit, so a 32-bit sign extend always works.
1792     */
1793    if (type == TCG_TYPE_I32) {
1794        arg2 = (int32_t)arg2;
1795    }
1796
1797    switch (cond) {
1798    case TCG_COND_EQ:
1799    case TCG_COND_NE:
1800        if (const_arg2) {
1801            if ((int16_t) arg2 == arg2) {
1802                op = CMPI;
1803                imm = 1;
1804                break;
1805            } else if ((uint16_t) arg2 == arg2) {
1806                op = CMPLI;
1807                imm = 1;
1808                break;
1809            }
1810        }
1811        op = CMPL;
1812        imm = 0;
1813        break;
1814
1815    case TCG_COND_TSTEQ:
1816    case TCG_COND_TSTNE:
1817        tcg_debug_assert(cr == 0);
1818        tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, true);
1819        return;
1820
1821    case TCG_COND_LT:
1822    case TCG_COND_GE:
1823    case TCG_COND_LE:
1824    case TCG_COND_GT:
1825        if (const_arg2) {
1826            if ((int16_t) arg2 == arg2) {
1827                op = CMPI;
1828                imm = 1;
1829                break;
1830            }
1831        }
1832        op = CMP;
1833        imm = 0;
1834        break;
1835
1836    case TCG_COND_LTU:
1837    case TCG_COND_GEU:
1838    case TCG_COND_LEU:
1839    case TCG_COND_GTU:
1840        if (const_arg2) {
1841            if ((uint16_t) arg2 == arg2) {
1842                op = CMPLI;
1843                imm = 1;
1844                break;
1845            }
1846        }
1847        op = CMPL;
1848        imm = 0;
1849        break;
1850
1851    default:
1852        g_assert_not_reached();
1853    }
1854    op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
1855
1856    if (imm) {
1857        tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
1858    } else {
1859        if (const_arg2) {
1860            tcg_out_movi(s, type, TCG_REG_R0, arg2);
1861            arg2 = TCG_REG_R0;
1862        }
1863        tcg_out32(s, op | RA(arg1) | RB(arg2));
1864    }
1865}
1866
1867static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
1868                                TCGReg dst, TCGReg src, bool neg)
1869{
1870    if (neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) {
1871        /*
1872         * X != 0 implies X + -1 generates a carry.
1873         * RT = (~X + X) + CA
1874         *    = -1 + CA
1875         *    = CA ? 0 : -1
1876         */
1877        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1878        tcg_out32(s, SUBFE | TAB(dst, src, src));
1879        return;
1880    }
1881
1882    if (type == TCG_TYPE_I32) {
1883        tcg_out32(s, CNTLZW | RS(src) | RA(dst));
1884        tcg_out_shri32(s, dst, dst, 5);
1885    } else {
1886        tcg_out32(s, CNTLZD | RS(src) | RA(dst));
1887        tcg_out_shri64(s, dst, dst, 6);
1888    }
1889    if (neg) {
1890        tcg_out32(s, NEG | RT(dst) | RA(dst));
1891    }
1892}
1893
1894static void tcg_out_setcond_ne0(TCGContext *s, TCGType type,
1895                                TCGReg dst, TCGReg src, bool neg)
1896{
1897    if (!neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) {
1898        /*
1899         * X != 0 implies X + -1 generates a carry.  Extra addition
1900         * trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.
1901         */
1902        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1903        tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
1904        return;
1905    }
1906    tcg_out_setcond_eq0(s, type, dst, src, false);
1907    if (neg) {
1908        tcg_out32(s, ADDI | TAI(dst, dst, -1));
1909    } else {
1910        tcg_out_xori32(s, dst, dst, 1);
1911    }
1912}
1913
1914static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
1915                                  bool const_arg2)
1916{
1917    if (const_arg2) {
1918        if ((uint32_t)arg2 == arg2) {
1919            tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
1920        } else {
1921            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
1922            tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
1923        }
1924    } else {
1925        tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
1926    }
1927    return TCG_REG_R0;
1928}
1929
1930static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
1931                            TCGArg arg0, TCGArg arg1, TCGArg arg2,
1932                            int const_arg2, bool neg)
1933{
1934    int sh;
1935    bool inv;
1936
1937    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1938
1939    /* Ignore high bits of a potential constant arg2.  */
1940    if (type == TCG_TYPE_I32) {
1941        arg2 = (uint32_t)arg2;
1942    }
1943
1944    /* With SETBC/SETBCR, we can always implement with 2 insns. */
1945    if (have_isa_3_10) {
1946        tcg_insn_unit bi, opc;
1947
1948        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type);
1949
1950        /* Re-use tcg_to_bc for BI and BO_COND_{TRUE,FALSE}. */
1951        bi = tcg_to_bc[cond] & (0x1f << 16);
1952        if (tcg_to_bc[cond] & BO(8)) {
1953            opc = neg ? SETNBC : SETBC;
1954        } else {
1955            opc = neg ? SETNBCR : SETBCR;
1956        }
1957        tcg_out32(s, opc | RT(arg0) | bi);
1958        return;
1959    }
1960
1961    /* Handle common and trivial cases before handling anything else.  */
1962    if (arg2 == 0) {
1963        switch (cond) {
1964        case TCG_COND_EQ:
1965            tcg_out_setcond_eq0(s, type, arg0, arg1, neg);
1966            return;
1967        case TCG_COND_NE:
1968            tcg_out_setcond_ne0(s, type, arg0, arg1, neg);
1969            return;
1970        case TCG_COND_GE:
1971            tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
1972            arg1 = arg0;
1973            /* FALLTHRU */
1974        case TCG_COND_LT:
1975            /* Extract the sign bit.  */
1976            if (type == TCG_TYPE_I32) {
1977                if (neg) {
1978                    tcg_out_sari32(s, arg0, arg1, 31);
1979                } else {
1980                    tcg_out_shri32(s, arg0, arg1, 31);
1981                }
1982            } else {
1983                if (neg) {
1984                    tcg_out_sari64(s, arg0, arg1, 63);
1985                } else {
1986                    tcg_out_shri64(s, arg0, arg1, 63);
1987                }
1988            }
1989            return;
1990        default:
1991            break;
1992        }
1993    }
1994
1995    /* If we have ISEL, we can implement everything with 3 or 4 insns.
1996       All other cases below are also at least 3 insns, so speed up the
1997       code generator by not considering them and always using ISEL.  */
1998    if (have_isel) {
1999        int isel, tab;
2000
2001        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type);
2002
2003        isel = tcg_to_isel[cond];
2004
2005        tcg_out_movi(s, type, arg0, neg ? -1 : 1);
2006        if (isel & 1) {
2007            /* arg0 = (bc ? 0 : 1) */
2008            tab = TAB(arg0, 0, arg0);
2009            isel &= ~1;
2010        } else {
2011            /* arg0 = (bc ? 1 : 0) */
2012            tcg_out_movi(s, type, TCG_REG_R0, 0);
2013            tab = TAB(arg0, arg0, TCG_REG_R0);
2014        }
2015        tcg_out32(s, isel | tab);
2016        return;
2017    }
2018
2019    inv = false;
2020    switch (cond) {
2021    case TCG_COND_EQ:
2022        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
2023        tcg_out_setcond_eq0(s, type, arg0, arg1, neg);
2024        break;
2025
2026    case TCG_COND_NE:
2027        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
2028        tcg_out_setcond_ne0(s, type, arg0, arg1, neg);
2029        break;
2030
2031    case TCG_COND_TSTEQ:
2032        tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false);
2033        tcg_out_setcond_eq0(s, type, arg0, TCG_REG_R0, neg);
2034        break;
2035
2036    case TCG_COND_TSTNE:
2037        tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false);
2038        tcg_out_setcond_ne0(s, type, arg0, TCG_REG_R0, neg);
2039        break;
2040
2041    case TCG_COND_LE:
2042    case TCG_COND_LEU:
2043        inv = true;
2044        /* fall through */
2045    case TCG_COND_GT:
2046    case TCG_COND_GTU:
2047        sh = 30; /* CR7 CR_GT */
2048        goto crtest;
2049
2050    case TCG_COND_GE:
2051    case TCG_COND_GEU:
2052        inv = true;
2053        /* fall through */
2054    case TCG_COND_LT:
2055    case TCG_COND_LTU:
2056        sh = 29; /* CR7 CR_LT */
2057        goto crtest;
2058
2059    crtest:
2060        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
2061        tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
2062        tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
2063        if (neg && inv) {
2064            tcg_out32(s, ADDI | TAI(arg0, arg0, -1));
2065        } else if (neg) {
2066            tcg_out32(s, NEG | RT(arg0) | RA(arg0));
2067        } else if (inv) {
2068            tcg_out_xori32(s, arg0, arg0, 1);
2069        }
2070        break;
2071
2072    default:
2073        g_assert_not_reached();
2074    }
2075}
2076
2077static void tcg_out_bc(TCGContext *s, TCGCond cond, int bd)
2078{
2079    tcg_out32(s, tcg_to_bc[cond] | bd);
2080}
2081
2082static void tcg_out_bc_lab(TCGContext *s, TCGCond cond, TCGLabel *l)
2083{
2084    int bd = 0;
2085    if (l->has_value) {
2086        bd = reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
2087    } else {
2088        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
2089    }
2090    tcg_out_bc(s, cond, bd);
2091}
2092
2093static void tcg_out_brcond(TCGContext *s, TCGCond cond,
2094                           TCGArg arg1, TCGArg arg2, int const_arg2,
2095                           TCGLabel *l, TCGType type)
2096{
2097    tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type);
2098    tcg_out_bc_lab(s, cond, l);
2099}
2100
2101static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
2102                            TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1,
2103                            TCGArg v2, bool const_c2)
2104{
2105    /* If for some reason both inputs are zero, don't produce bad code.  */
2106    if (v1 == 0 && v2 == 0) {
2107        tcg_out_movi(s, type, dest, 0);
2108        return;
2109    }
2110
2111    tcg_out_cmp(s, cond, c1, c2, const_c2, 0, type);
2112
2113    if (have_isel) {
2114        int isel = tcg_to_isel[cond];
2115
2116        /* Swap the V operands if the operation indicates inversion.  */
2117        if (isel & 1) {
2118            int t = v1;
2119            v1 = v2;
2120            v2 = t;
2121            isel &= ~1;
2122        }
2123        /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */
2124        if (v2 == 0) {
2125            tcg_out_movi(s, type, TCG_REG_R0, 0);
2126        }
2127        tcg_out32(s, isel | TAB(dest, v1, v2));
2128    } else {
2129        if (dest == v2) {
2130            cond = tcg_invert_cond(cond);
2131            v2 = v1;
2132        } else if (dest != v1) {
2133            if (v1 == 0) {
2134                tcg_out_movi(s, type, dest, 0);
2135            } else {
2136                tcg_out_mov(s, type, dest, v1);
2137            }
2138        }
2139        /* Branch forward over one insn */
2140        tcg_out_bc(s, cond, 8);
2141        if (v2 == 0) {
2142            tcg_out_movi(s, type, dest, 0);
2143        } else {
2144            tcg_out_mov(s, type, dest, v2);
2145        }
2146    }
2147}
2148
2149static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
2150                          TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
2151{
2152    if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
2153        tcg_out32(s, opc | RA(a0) | RS(a1));
2154    } else {
2155        tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 0, type);
2156        /* Note that the only other valid constant for a2 is 0.  */
2157        if (have_isel) {
2158            tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
2159            tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
2160        } else if (!const_a2 && a0 == a2) {
2161            tcg_out_bc(s, TCG_COND_EQ, 8);
2162            tcg_out32(s, opc | RA(a0) | RS(a1));
2163        } else {
2164            tcg_out32(s, opc | RA(a0) | RS(a1));
2165            tcg_out_bc(s, TCG_COND_NE, 8);
2166            if (const_a2) {
2167                tcg_out_movi(s, type, a0, 0);
2168            } else {
2169                tcg_out_mov(s, type, a0, a2);
2170            }
2171        }
2172    }
2173}
2174
2175static void tcg_out_cmp2(TCGContext *s, const TCGArg *args,
2176                         const int *const_args)
2177{
2178    static const struct { uint8_t bit1, bit2; } bits[] = {
2179        [TCG_COND_LT ] = { CR_LT, CR_LT },
2180        [TCG_COND_LE ] = { CR_LT, CR_GT },
2181        [TCG_COND_GT ] = { CR_GT, CR_GT },
2182        [TCG_COND_GE ] = { CR_GT, CR_LT },
2183        [TCG_COND_LTU] = { CR_LT, CR_LT },
2184        [TCG_COND_LEU] = { CR_LT, CR_GT },
2185        [TCG_COND_GTU] = { CR_GT, CR_GT },
2186        [TCG_COND_GEU] = { CR_GT, CR_LT },
2187    };
2188
2189    TCGCond cond = args[4], cond2;
2190    TCGArg al, ah, bl, bh;
2191    int blconst, bhconst;
2192    int op, bit1, bit2;
2193
2194    al = args[0];
2195    ah = args[1];
2196    bl = args[2];
2197    bh = args[3];
2198    blconst = const_args[2];
2199    bhconst = const_args[3];
2200
2201    switch (cond) {
2202    case TCG_COND_EQ:
2203        op = CRAND;
2204        goto do_equality;
2205    case TCG_COND_NE:
2206        op = CRNAND;
2207    do_equality:
2208        tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
2209        tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
2210        tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
2211        break;
2212
2213    case TCG_COND_TSTEQ:
2214    case TCG_COND_TSTNE:
2215        if (blconst) {
2216            tcg_out_andi32(s, TCG_REG_R0, al, bl);
2217        } else {
2218            tcg_out32(s, AND | SAB(al, TCG_REG_R0, bl));
2219        }
2220        if (bhconst) {
2221            tcg_out_andi32(s, TCG_REG_TMP1, ah, bh);
2222        } else {
2223            tcg_out32(s, AND | SAB(ah, TCG_REG_TMP1, bh));
2224        }
2225        tcg_out32(s, OR | SAB(TCG_REG_R0, TCG_REG_R0, TCG_REG_TMP1) | 1);
2226        break;
2227
2228    case TCG_COND_LT:
2229    case TCG_COND_LE:
2230    case TCG_COND_GT:
2231    case TCG_COND_GE:
2232    case TCG_COND_LTU:
2233    case TCG_COND_LEU:
2234    case TCG_COND_GTU:
2235    case TCG_COND_GEU:
2236        bit1 = bits[cond].bit1;
2237        bit2 = bits[cond].bit2;
2238        op = (bit1 != bit2 ? CRANDC : CRAND);
2239        cond2 = tcg_unsigned_cond(cond);
2240
2241        tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
2242        tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
2243        tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
2244        tcg_out32(s, CROR | BT(0, CR_EQ) | BA(6, bit1) | BB(0, CR_EQ));
2245        break;
2246
2247    default:
2248        g_assert_not_reached();
2249    }
2250}
2251
2252static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
2253                             const int *const_args)
2254{
2255    tcg_out_cmp2(s, args + 1, const_args + 1);
2256    tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(0));
2257    tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, CR_EQ + 0*4 + 1, 31, 31);
2258}
2259
2260static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
2261                            const int *const_args)
2262{
2263    tcg_out_cmp2(s, args, const_args);
2264    tcg_out_bc_lab(s, TCG_COND_EQ, arg_label(args[5]));
2265}
2266
2267static void tcg_out_mb(TCGContext *s, TCGArg a0)
2268{
2269    uint32_t insn;
2270
2271    if (a0 & TCG_MO_ST_LD) {
2272        insn = HWSYNC;
2273    } else {
2274        insn = LWSYNC;
2275    }
2276
2277    tcg_out32(s, insn);
2278}
2279
2280static void tcg_out_call_int(TCGContext *s, int lk,
2281                             const tcg_insn_unit *target)
2282{
2283#ifdef _CALL_AIX
2284    /* Look through the descriptor.  If the branch is in range, and we
2285       don't have to spend too much effort on building the toc.  */
2286    const void *tgt = ((const void * const *)target)[0];
2287    uintptr_t toc = ((const uintptr_t *)target)[1];
2288    intptr_t diff = tcg_pcrel_diff(s, tgt);
2289
2290    if (in_range_b(diff) && toc == (uint32_t)toc) {
2291        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
2292        tcg_out_b(s, lk, tgt);
2293    } else {
2294        /* Fold the low bits of the constant into the addresses below.  */
2295        intptr_t arg = (intptr_t)target;
2296        int ofs = (int16_t)arg;
2297
2298        if (ofs + 8 < 0x8000) {
2299            arg -= ofs;
2300        } else {
2301            ofs = 0;
2302        }
2303        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
2304        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
2305        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
2306        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
2307        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
2308    }
2309#elif defined(_CALL_ELF) && _CALL_ELF == 2
2310    intptr_t diff;
2311
2312    /* In the ELFv2 ABI, we have to set up r12 to contain the destination
2313       address, which the callee uses to compute its TOC address.  */
2314    /* FIXME: when the branch is in range, we could avoid r12 load if we
2315       knew that the destination uses the same TOC, and what its local
2316       entry point offset is.  */
2317    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
2318
2319    diff = tcg_pcrel_diff(s, target);
2320    if (in_range_b(diff)) {
2321        tcg_out_b(s, lk, target);
2322    } else {
2323        tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
2324        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
2325    }
2326#else
2327    tcg_out_b(s, lk, target);
2328#endif
2329}
2330
2331static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
2332                         const TCGHelperInfo *info)
2333{
2334    tcg_out_call_int(s, LK, target);
2335}
2336
2337static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = {
2338    [MO_UB] = LBZX,
2339    [MO_UW] = LHZX,
2340    [MO_UL] = LWZX,
2341    [MO_UQ] = LDX,
2342    [MO_SW] = LHAX,
2343    [MO_SL] = LWAX,
2344    [MO_BSWAP | MO_UB] = LBZX,
2345    [MO_BSWAP | MO_UW] = LHBRX,
2346    [MO_BSWAP | MO_UL] = LWBRX,
2347    [MO_BSWAP | MO_UQ] = LDBRX,
2348};
2349
2350static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
2351    [MO_UB] = STBX,
2352    [MO_UW] = STHX,
2353    [MO_UL] = STWX,
2354    [MO_UQ] = STDX,
2355    [MO_BSWAP | MO_UB] = STBX,
2356    [MO_BSWAP | MO_UW] = STHBRX,
2357    [MO_BSWAP | MO_UL] = STWBRX,
2358    [MO_BSWAP | MO_UQ] = STDBRX,
2359};
2360
2361static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
2362{
2363    if (arg < 0) {
2364        arg = TCG_REG_TMP1;
2365    }
2366    tcg_out32(s, MFSPR | RT(arg) | LR);
2367    return arg;
2368}
2369
2370/*
2371 * For the purposes of ppc32 sorting 4 input registers into 4 argument
2372 * registers, there is an outside chance we would require 3 temps.
2373 */
2374static const TCGLdstHelperParam ldst_helper_param = {
2375    .ra_gen = ldst_ra_gen,
2376    .ntmp = 3,
2377    .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 }
2378};
2379
2380static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2381{
2382    MemOp opc = get_memop(lb->oi);
2383
2384    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2385        return false;
2386    }
2387
2388    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
2389    tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]);
2390    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
2391
2392    tcg_out_b(s, 0, lb->raddr);
2393    return true;
2394}
2395
2396static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2397{
2398    MemOp opc = get_memop(lb->oi);
2399
2400    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2401        return false;
2402    }
2403
2404    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
2405    tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]);
2406
2407    tcg_out_b(s, 0, lb->raddr);
2408    return true;
2409}
2410
2411typedef struct {
2412    TCGReg base;
2413    TCGReg index;
2414    TCGAtomAlign aa;
2415} HostAddress;
2416
2417bool tcg_target_has_memory_bswap(MemOp memop)
2418{
2419    TCGAtomAlign aa;
2420
2421    if ((memop & MO_SIZE) <= MO_64) {
2422        return true;
2423    }
2424
2425    /*
2426     * Reject 16-byte memop with 16-byte atomicity,
2427     * but do allow a pair of 64-bit operations.
2428     */
2429    aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
2430    return aa.atom <= MO_64;
2431}
2432
2433/* We expect to use a 16-bit negative offset from ENV.  */
2434#define MIN_TLB_MASK_TABLE_OFS  -32768
2435
2436/*
2437 * For system-mode, perform the TLB load and compare.
2438 * For user-mode, perform any required alignment tests.
2439 * In both cases, return a TCGLabelQemuLdst structure if the slow path
2440 * is required and fill in @h with the host address for the fast path.
2441 */
2442static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
2443                                           TCGReg addrlo, TCGReg addrhi,
2444                                           MemOpIdx oi, bool is_ld)
2445{
2446    TCGType addr_type = s->addr_type;
2447    TCGLabelQemuLdst *ldst = NULL;
2448    MemOp opc = get_memop(oi);
2449    MemOp a_bits, s_bits;
2450
2451    /*
2452     * Book II, Section 1.4, Single-Copy Atomicity, specifies:
2453     *
2454     * Before 3.0, "An access that is not atomic is performed as a set of
2455     * smaller disjoint atomic accesses. In general, the number and alignment
2456     * of these accesses are implementation-dependent."  Thus MO_ATOM_IFALIGN.
2457     *
2458     * As of 3.0, "the non-atomic access is performed as described in
2459     * the corresponding list", which matches MO_ATOM_SUBALIGN.
2460     */
2461    s_bits = opc & MO_SIZE;
2462    h->aa = atom_and_align_for_opc(s, opc,
2463                                   have_isa_3_00 ? MO_ATOM_SUBALIGN
2464                                                 : MO_ATOM_IFALIGN,
2465                                   s_bits == MO_128);
2466    a_bits = h->aa.align;
2467
2468    if (tcg_use_softmmu) {
2469        int mem_index = get_mmuidx(oi);
2470        int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
2471                            : offsetof(CPUTLBEntry, addr_write);
2472        int fast_off = tlb_mask_table_ofs(s, mem_index);
2473        int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
2474        int table_off = fast_off + offsetof(CPUTLBDescFast, table);
2475
2476        ldst = new_ldst_label(s);
2477        ldst->is_ld = is_ld;
2478        ldst->oi = oi;
2479        ldst->addrlo_reg = addrlo;
2480        ldst->addrhi_reg = addrhi;
2481
2482        /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
2483        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off);
2484        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off);
2485
2486        /* Extract the page index, shifted into place for tlb index.  */
2487        if (TCG_TARGET_REG_BITS == 32) {
2488            tcg_out_shri32(s, TCG_REG_R0, addrlo,
2489                           s->page_bits - CPU_TLB_ENTRY_BITS);
2490        } else {
2491            tcg_out_shri64(s, TCG_REG_R0, addrlo,
2492                           s->page_bits - CPU_TLB_ENTRY_BITS);
2493        }
2494        tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
2495
2496        /*
2497         * Load the (low part) TLB comparator into TMP2.
2498         * For 64-bit host, always load the entire 64-bit slot for simplicity.
2499         * We will ignore the high bits with tcg_out_cmp(..., addr_type).
2500         */
2501        if (TCG_TARGET_REG_BITS == 64) {
2502            if (cmp_off == 0) {
2503                tcg_out32(s, LDUX | TAB(TCG_REG_TMP2,
2504                                        TCG_REG_TMP1, TCG_REG_TMP2));
2505            } else {
2506                tcg_out32(s, ADD | TAB(TCG_REG_TMP1,
2507                                       TCG_REG_TMP1, TCG_REG_TMP2));
2508                tcg_out_ld(s, TCG_TYPE_I64, TCG_REG_TMP2,
2509                           TCG_REG_TMP1, cmp_off);
2510            }
2511        } else if (cmp_off == 0 && !HOST_BIG_ENDIAN) {
2512            tcg_out32(s, LWZUX | TAB(TCG_REG_TMP2,
2513                                     TCG_REG_TMP1, TCG_REG_TMP2));
2514        } else {
2515            tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
2516            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
2517                       cmp_off + 4 * HOST_BIG_ENDIAN);
2518        }
2519
2520        /*
2521         * Load the TLB addend for use on the fast path.
2522         * Do this asap to minimize any load use delay.
2523         */
2524        if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
2525            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
2526                       offsetof(CPUTLBEntry, addend));
2527        }
2528
2529        /* Clear the non-page, non-alignment bits from the address in R0. */
2530        if (TCG_TARGET_REG_BITS == 32) {
2531            /*
2532             * We don't support unaligned accesses on 32-bits.
2533             * Preserve the bottom bits and thus trigger a comparison
2534             * failure on unaligned accesses.
2535             */
2536            if (a_bits < s_bits) {
2537                a_bits = s_bits;
2538            }
2539            tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
2540                        (32 - a_bits) & 31, 31 - s->page_bits);
2541        } else {
2542            TCGReg t = addrlo;
2543
2544            /*
2545             * If the access is unaligned, we need to make sure we fail if we
2546             * cross a page boundary.  The trick is to add the access size-1
2547             * to the address before masking the low bits.  That will make the
2548             * address overflow to the next page if we cross a page boundary,
2549             * which will then force a mismatch of the TLB compare.
2550             */
2551            if (a_bits < s_bits) {
2552                unsigned a_mask = (1 << a_bits) - 1;
2553                unsigned s_mask = (1 << s_bits) - 1;
2554                tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
2555                t = TCG_REG_R0;
2556            }
2557
2558            /* Mask the address for the requested alignment.  */
2559            if (addr_type == TCG_TYPE_I32) {
2560                tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
2561                            (32 - a_bits) & 31, 31 - s->page_bits);
2562            } else if (a_bits == 0) {
2563                tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits);
2564            } else {
2565                tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
2566                            64 - s->page_bits, s->page_bits - a_bits);
2567                tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0);
2568            }
2569        }
2570
2571        if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) {
2572            /* Low part comparison into cr7. */
2573            tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
2574                        0, 7, TCG_TYPE_I32);
2575
2576            /* Load the high part TLB comparator into TMP2.  */
2577            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
2578                       cmp_off + 4 * !HOST_BIG_ENDIAN);
2579
2580            /* Load addend, deferred for this case. */
2581            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
2582                       offsetof(CPUTLBEntry, addend));
2583
2584            /* High part comparison into cr6. */
2585            tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2,
2586                        0, 6, TCG_TYPE_I32);
2587
2588            /* Combine comparisons into cr0. */
2589            tcg_out32(s, CRAND | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
2590        } else {
2591            /* Full comparison into cr0. */
2592            tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
2593                        0, 0, addr_type);
2594        }
2595
2596        /* Load a pointer into the current opcode w/conditional branch-link. */
2597        ldst->label_ptr[0] = s->code_ptr;
2598        tcg_out_bc(s, TCG_COND_NE, LK);
2599
2600        h->base = TCG_REG_TMP1;
2601    } else {
2602        if (a_bits) {
2603            ldst = new_ldst_label(s);
2604            ldst->is_ld = is_ld;
2605            ldst->oi = oi;
2606            ldst->addrlo_reg = addrlo;
2607            ldst->addrhi_reg = addrhi;
2608
2609            /* We are expecting a_bits to max out at 7, much lower than ANDI. */
2610            tcg_debug_assert(a_bits < 16);
2611            tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1));
2612
2613            ldst->label_ptr[0] = s->code_ptr;
2614            tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
2615        }
2616
2617        h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
2618    }
2619
2620    if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
2621        /* Zero-extend the guest address for use in the host address. */
2622        tcg_out_ext32u(s, TCG_REG_TMP2, addrlo);
2623        h->index = TCG_REG_TMP2;
2624    } else {
2625        h->index = addrlo;
2626    }
2627
2628    return ldst;
2629}
2630
2631static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
2632                            TCGReg addrlo, TCGReg addrhi,
2633                            MemOpIdx oi, TCGType data_type)
2634{
2635    MemOp opc = get_memop(oi);
2636    TCGLabelQemuLdst *ldst;
2637    HostAddress h;
2638
2639    ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
2640
2641    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2642        if (opc & MO_BSWAP) {
2643            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2644            tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
2645            tcg_out32(s, LWBRX | TAB(datahi, h.base, TCG_REG_R0));
2646        } else if (h.base != 0) {
2647            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2648            tcg_out32(s, LWZX | TAB(datahi, h.base, h.index));
2649            tcg_out32(s, LWZX | TAB(datalo, h.base, TCG_REG_R0));
2650        } else if (h.index == datahi) {
2651            tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
2652            tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
2653        } else {
2654            tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
2655            tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
2656        }
2657    } else {
2658        uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
2659        if (!have_isa_2_06 && insn == LDBRX) {
2660            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2661            tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
2662            tcg_out32(s, LWBRX | TAB(TCG_REG_R0, h.base, TCG_REG_R0));
2663            tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
2664        } else if (insn) {
2665            tcg_out32(s, insn | TAB(datalo, h.base, h.index));
2666        } else {
2667            insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
2668            tcg_out32(s, insn | TAB(datalo, h.base, h.index));
2669            tcg_out_movext(s, TCG_TYPE_REG, datalo,
2670                           TCG_TYPE_REG, opc & MO_SSIZE, datalo);
2671        }
2672    }
2673
2674    if (ldst) {
2675        ldst->type = data_type;
2676        ldst->datalo_reg = datalo;
2677        ldst->datahi_reg = datahi;
2678        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2679    }
2680}
2681
2682static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
2683                            TCGReg addrlo, TCGReg addrhi,
2684                            MemOpIdx oi, TCGType data_type)
2685{
2686    MemOp opc = get_memop(oi);
2687    TCGLabelQemuLdst *ldst;
2688    HostAddress h;
2689
2690    ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
2691
2692    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2693        if (opc & MO_BSWAP) {
2694            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2695            tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
2696            tcg_out32(s, STWBRX | SAB(datahi, h.base, TCG_REG_R0));
2697        } else if (h.base != 0) {
2698            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2699            tcg_out32(s, STWX | SAB(datahi, h.base, h.index));
2700            tcg_out32(s, STWX | SAB(datalo, h.base, TCG_REG_R0));
2701        } else {
2702            tcg_out32(s, STW | TAI(datahi, h.index, 0));
2703            tcg_out32(s, STW | TAI(datalo, h.index, 4));
2704        }
2705    } else {
2706        uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
2707        if (!have_isa_2_06 && insn == STDBRX) {
2708            tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
2709            tcg_out32(s, ADDI | TAI(TCG_REG_TMP2, h.index, 4));
2710            tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
2711            tcg_out32(s, STWBRX | SAB(TCG_REG_R0, h.base, TCG_REG_TMP2));
2712        } else {
2713            tcg_out32(s, insn | SAB(datalo, h.base, h.index));
2714        }
2715    }
2716
2717    if (ldst) {
2718        ldst->type = data_type;
2719        ldst->datalo_reg = datalo;
2720        ldst->datahi_reg = datahi;
2721        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2722    }
2723}
2724
2725static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
2726                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
2727{
2728    TCGLabelQemuLdst *ldst;
2729    HostAddress h;
2730    bool need_bswap;
2731    uint32_t insn;
2732    TCGReg index;
2733
2734    ldst = prepare_host_addr(s, &h, addr_reg, -1, oi, is_ld);
2735
2736    /* Compose the final address, as LQ/STQ have no indexing. */
2737    index = h.index;
2738    if (h.base != 0) {
2739        index = TCG_REG_TMP1;
2740        tcg_out32(s, ADD | TAB(index, h.base, h.index));
2741    }
2742    need_bswap = get_memop(oi) & MO_BSWAP;
2743
2744    if (h.aa.atom == MO_128) {
2745        tcg_debug_assert(!need_bswap);
2746        tcg_debug_assert(datalo & 1);
2747        tcg_debug_assert(datahi == datalo - 1);
2748        tcg_debug_assert(!is_ld || datahi != index);
2749        insn = is_ld ? LQ : STQ;
2750        tcg_out32(s, insn | TAI(datahi, index, 0));
2751    } else {
2752        TCGReg d1, d2;
2753
2754        if (HOST_BIG_ENDIAN ^ need_bswap) {
2755            d1 = datahi, d2 = datalo;
2756        } else {
2757            d1 = datalo, d2 = datahi;
2758        }
2759
2760        if (need_bswap) {
2761            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8);
2762            insn = is_ld ? LDBRX : STDBRX;
2763            tcg_out32(s, insn | TAB(d1, 0, index));
2764            tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0));
2765        } else {
2766            insn = is_ld ? LD : STD;
2767            tcg_out32(s, insn | TAI(d1, index, 0));
2768            tcg_out32(s, insn | TAI(d2, index, 8));
2769        }
2770    }
2771
2772    if (ldst) {
2773        ldst->type = TCG_TYPE_I128;
2774        ldst->datalo_reg = datalo;
2775        ldst->datahi_reg = datahi;
2776        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2777    }
2778}
2779
2780static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2781{
2782    int i;
2783    for (i = 0; i < count; ++i) {
2784        p[i] = NOP;
2785    }
2786}
2787
2788/* Parameters for function call generation, used in tcg.c.  */
2789#define TCG_TARGET_STACK_ALIGN       16
2790
2791#ifdef _CALL_AIX
2792# define LINK_AREA_SIZE                (6 * SZR)
2793# define LR_OFFSET                     (1 * SZR)
2794# define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
2795#elif defined(_CALL_DARWIN)
2796# define LINK_AREA_SIZE                (6 * SZR)
2797# define LR_OFFSET                     (2 * SZR)
2798#elif TCG_TARGET_REG_BITS == 64
2799# if defined(_CALL_ELF) && _CALL_ELF == 2
2800#  define LINK_AREA_SIZE               (4 * SZR)
2801#  define LR_OFFSET                    (1 * SZR)
2802# endif
2803#else /* TCG_TARGET_REG_BITS == 32 */
2804# if defined(_CALL_SYSV)
2805#  define LINK_AREA_SIZE               (2 * SZR)
2806#  define LR_OFFSET                    (1 * SZR)
2807# endif
2808#endif
2809#ifndef LR_OFFSET
2810# error "Unhandled abi"
2811#endif
2812#ifndef TCG_TARGET_CALL_STACK_OFFSET
2813# define TCG_TARGET_CALL_STACK_OFFSET  LINK_AREA_SIZE
2814#endif
2815
2816#define CPU_TEMP_BUF_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2817#define REG_SAVE_SIZE      ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
2818
2819#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET   \
2820                     + TCG_STATIC_CALL_ARGS_SIZE    \
2821                     + CPU_TEMP_BUF_SIZE            \
2822                     + REG_SAVE_SIZE                \
2823                     + TCG_TARGET_STACK_ALIGN - 1)  \
2824                    & -TCG_TARGET_STACK_ALIGN)
2825
2826#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
2827
2828static void tcg_target_qemu_prologue(TCGContext *s)
2829{
2830    int i;
2831
2832#ifdef _CALL_AIX
2833    const void **desc = (const void **)s->code_ptr;
2834    desc[0] = tcg_splitwx_to_rx(desc + 2);  /* entry point */
2835    desc[1] = 0;                            /* environment pointer */
2836    s->code_ptr = (void *)(desc + 2);       /* skip over descriptor */
2837#endif
2838
2839    tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
2840                  CPU_TEMP_BUF_SIZE);
2841
2842    /* Prologue */
2843    tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
2844    tcg_out32(s, (SZR == 8 ? STDU : STWU)
2845              | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
2846
2847    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2848        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2849                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2850    }
2851    tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2852
2853    if (!tcg_use_softmmu && guest_base) {
2854        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
2855        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2856    }
2857
2858    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2859    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
2860    tcg_out32(s, BCCTR | BO_ALWAYS);
2861
2862    /* Epilogue */
2863    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2864
2865    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2866    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2867        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2868                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2869    }
2870    tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
2871    tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
2872    tcg_out32(s, BCLR | BO_ALWAYS);
2873}
2874
2875static void tcg_out_tb_start(TCGContext *s)
2876{
2877    /* Load TCG_REG_TB. */
2878    if (USE_REG_TB) {
2879        if (have_isa_3_00) {
2880            /* lnia REG_TB */
2881            tcg_out_addpcis(s, TCG_REG_TB, 0);
2882        } else {
2883            /* bcl 20,31,$+4 (preferred form for getting nia) */
2884            tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK);
2885            tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR);
2886        }
2887    }
2888}
2889
2890static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
2891{
2892    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg);
2893    tcg_out_b(s, 0, tcg_code_gen_epilogue);
2894}
2895
2896static void tcg_out_goto_tb(TCGContext *s, int which)
2897{
2898    uintptr_t ptr = get_jmp_target_addr(s, which);
2899    int16_t lo;
2900
2901    /* Direct branch will be patched by tb_target_set_jmp_target. */
2902    set_jmp_insn_offset(s, which);
2903    tcg_out32(s, NOP);
2904
2905    /* When branch is out of range, fall through to indirect. */
2906    if (USE_REG_TB) {
2907        ptrdiff_t offset = ppc_tbrel_diff(s, (void *)ptr);
2908        tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset);
2909    } else if (have_isa_3_10) {
2910        ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr);
2911        tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1);
2912    } else if (have_isa_3_00) {
2913        ptrdiff_t offset = tcg_pcrel_diff(s, (void *)ptr) - 4;
2914        lo = offset;
2915        tcg_out_addpcis(s, TCG_REG_TMP1, offset - lo);
2916        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo);
2917    } else {
2918        lo = ptr;
2919        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - lo);
2920        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo);
2921    }
2922
2923    tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
2924    tcg_out32(s, BCCTR | BO_ALWAYS);
2925    set_jmp_reset_offset(s, which);
2926}
2927
2928void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
2929                              uintptr_t jmp_rx, uintptr_t jmp_rw)
2930{
2931    uintptr_t addr = tb->jmp_target_addr[n];
2932    intptr_t diff = addr - jmp_rx;
2933    tcg_insn_unit insn;
2934
2935    if (in_range_b(diff)) {
2936        insn = B | (diff & 0x3fffffc);
2937    } else {
2938        insn = NOP;
2939    }
2940
2941    qatomic_set((uint32_t *)jmp_rw, insn);
2942    flush_idcache_range(jmp_rx, jmp_rw, 4);
2943}
2944
2945static void tcg_out_op(TCGContext *s, TCGOpcode opc,
2946                       const TCGArg args[TCG_MAX_OP_ARGS],
2947                       const int const_args[TCG_MAX_OP_ARGS])
2948{
2949    TCGArg a0, a1, a2;
2950
2951    switch (opc) {
2952    case INDEX_op_goto_ptr:
2953        tcg_out32(s, MTSPR | RS(args[0]) | CTR);
2954        tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
2955        tcg_out32(s, BCCTR | BO_ALWAYS);
2956        break;
2957    case INDEX_op_br:
2958        {
2959            TCGLabel *l = arg_label(args[0]);
2960            uint32_t insn = B;
2961
2962            if (l->has_value) {
2963                insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr),
2964                                       l->u.value_ptr);
2965            } else {
2966                tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
2967            }
2968            tcg_out32(s, insn);
2969        }
2970        break;
2971    case INDEX_op_ld8u_i32:
2972    case INDEX_op_ld8u_i64:
2973        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2974        break;
2975    case INDEX_op_ld8s_i32:
2976    case INDEX_op_ld8s_i64:
2977        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2978        tcg_out_ext8s(s, TCG_TYPE_REG, args[0], args[0]);
2979        break;
2980    case INDEX_op_ld16u_i32:
2981    case INDEX_op_ld16u_i64:
2982        tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
2983        break;
2984    case INDEX_op_ld16s_i32:
2985    case INDEX_op_ld16s_i64:
2986        tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
2987        break;
2988    case INDEX_op_ld_i32:
2989    case INDEX_op_ld32u_i64:
2990        tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
2991        break;
2992    case INDEX_op_ld32s_i64:
2993        tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
2994        break;
2995    case INDEX_op_ld_i64:
2996        tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
2997        break;
2998    case INDEX_op_st8_i32:
2999    case INDEX_op_st8_i64:
3000        tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
3001        break;
3002    case INDEX_op_st16_i32:
3003    case INDEX_op_st16_i64:
3004        tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
3005        break;
3006    case INDEX_op_st_i32:
3007    case INDEX_op_st32_i64:
3008        tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
3009        break;
3010    case INDEX_op_st_i64:
3011        tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
3012        break;
3013
3014    case INDEX_op_add_i32:
3015        a0 = args[0], a1 = args[1], a2 = args[2];
3016        if (const_args[2]) {
3017        do_addi_32:
3018            tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
3019        } else {
3020            tcg_out32(s, ADD | TAB(a0, a1, a2));
3021        }
3022        break;
3023    case INDEX_op_sub_i32:
3024        a0 = args[0], a1 = args[1], a2 = args[2];
3025        if (const_args[1]) {
3026            if (const_args[2]) {
3027                tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2);
3028            } else {
3029                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
3030            }
3031        } else if (const_args[2]) {
3032            a2 = -a2;
3033            goto do_addi_32;
3034        } else {
3035            tcg_out32(s, SUBF | TAB(a0, a2, a1));
3036        }
3037        break;
3038
3039    case INDEX_op_and_i32:
3040        a0 = args[0], a1 = args[1], a2 = args[2];
3041        if (const_args[2]) {
3042            tcg_out_andi32(s, a0, a1, a2);
3043        } else {
3044            tcg_out32(s, AND | SAB(a1, a0, a2));
3045        }
3046        break;
3047    case INDEX_op_and_i64:
3048        a0 = args[0], a1 = args[1], a2 = args[2];
3049        if (const_args[2]) {
3050            tcg_out_andi64(s, a0, a1, a2);
3051        } else {
3052            tcg_out32(s, AND | SAB(a1, a0, a2));
3053        }
3054        break;
3055    case INDEX_op_or_i64:
3056    case INDEX_op_or_i32:
3057        a0 = args[0], a1 = args[1], a2 = args[2];
3058        if (const_args[2]) {
3059            tcg_out_ori32(s, a0, a1, a2);
3060        } else {
3061            tcg_out32(s, OR | SAB(a1, a0, a2));
3062        }
3063        break;
3064    case INDEX_op_xor_i64:
3065    case INDEX_op_xor_i32:
3066        a0 = args[0], a1 = args[1], a2 = args[2];
3067        if (const_args[2]) {
3068            tcg_out_xori32(s, a0, a1, a2);
3069        } else {
3070            tcg_out32(s, XOR | SAB(a1, a0, a2));
3071        }
3072        break;
3073    case INDEX_op_andc_i32:
3074        a0 = args[0], a1 = args[1], a2 = args[2];
3075        if (const_args[2]) {
3076            tcg_out_andi32(s, a0, a1, ~a2);
3077        } else {
3078            tcg_out32(s, ANDC | SAB(a1, a0, a2));
3079        }
3080        break;
3081    case INDEX_op_andc_i64:
3082        a0 = args[0], a1 = args[1], a2 = args[2];
3083        if (const_args[2]) {
3084            tcg_out_andi64(s, a0, a1, ~a2);
3085        } else {
3086            tcg_out32(s, ANDC | SAB(a1, a0, a2));
3087        }
3088        break;
3089    case INDEX_op_orc_i32:
3090        if (const_args[2]) {
3091            tcg_out_ori32(s, args[0], args[1], ~args[2]);
3092            break;
3093        }
3094        /* FALLTHRU */
3095    case INDEX_op_orc_i64:
3096        tcg_out32(s, ORC | SAB(args[1], args[0], args[2]));
3097        break;
3098    case INDEX_op_eqv_i32:
3099        if (const_args[2]) {
3100            tcg_out_xori32(s, args[0], args[1], ~args[2]);
3101            break;
3102        }
3103        /* FALLTHRU */
3104    case INDEX_op_eqv_i64:
3105        tcg_out32(s, EQV | SAB(args[1], args[0], args[2]));
3106        break;
3107    case INDEX_op_nand_i32:
3108    case INDEX_op_nand_i64:
3109        tcg_out32(s, NAND | SAB(args[1], args[0], args[2]));
3110        break;
3111    case INDEX_op_nor_i32:
3112    case INDEX_op_nor_i64:
3113        tcg_out32(s, NOR | SAB(args[1], args[0], args[2]));
3114        break;
3115
3116    case INDEX_op_clz_i32:
3117        tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1],
3118                      args[2], const_args[2]);
3119        break;
3120    case INDEX_op_ctz_i32:
3121        tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1],
3122                      args[2], const_args[2]);
3123        break;
3124    case INDEX_op_ctpop_i32:
3125        tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0));
3126        break;
3127
3128    case INDEX_op_clz_i64:
3129        tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1],
3130                      args[2], const_args[2]);
3131        break;
3132    case INDEX_op_ctz_i64:
3133        tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1],
3134                      args[2], const_args[2]);
3135        break;
3136    case INDEX_op_ctpop_i64:
3137        tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0));
3138        break;
3139
3140    case INDEX_op_mul_i32:
3141        a0 = args[0], a1 = args[1], a2 = args[2];
3142        if (const_args[2]) {
3143            tcg_out32(s, MULLI | TAI(a0, a1, a2));
3144        } else {
3145            tcg_out32(s, MULLW | TAB(a0, a1, a2));
3146        }
3147        break;
3148
3149    case INDEX_op_div_i32:
3150        tcg_out32(s, DIVW | TAB(args[0], args[1], args[2]));
3151        break;
3152
3153    case INDEX_op_divu_i32:
3154        tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2]));
3155        break;
3156
3157    case INDEX_op_rem_i32:
3158        tcg_out32(s, MODSW | TAB(args[0], args[1], args[2]));
3159        break;
3160
3161    case INDEX_op_remu_i32:
3162        tcg_out32(s, MODUW | TAB(args[0], args[1], args[2]));
3163        break;
3164
3165    case INDEX_op_shl_i32:
3166        if (const_args[2]) {
3167            /* Limit immediate shift count lest we create an illegal insn.  */
3168            tcg_out_shli32(s, args[0], args[1], args[2] & 31);
3169        } else {
3170            tcg_out32(s, SLW | SAB(args[1], args[0], args[2]));
3171        }
3172        break;
3173    case INDEX_op_shr_i32:
3174        if (const_args[2]) {
3175            /* Limit immediate shift count lest we create an illegal insn.  */
3176            tcg_out_shri32(s, args[0], args[1], args[2] & 31);
3177        } else {
3178            tcg_out32(s, SRW | SAB(args[1], args[0], args[2]));
3179        }
3180        break;
3181    case INDEX_op_sar_i32:
3182        if (const_args[2]) {
3183            tcg_out_sari32(s, args[0], args[1], args[2]);
3184        } else {
3185            tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
3186        }
3187        break;
3188    case INDEX_op_rotl_i32:
3189        if (const_args[2]) {
3190            tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31);
3191        } else {
3192            tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
3193                         | MB(0) | ME(31));
3194        }
3195        break;
3196    case INDEX_op_rotr_i32:
3197        if (const_args[2]) {
3198            tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
3199        } else {
3200            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32));
3201            tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0)
3202                         | MB(0) | ME(31));
3203        }
3204        break;
3205
3206    case INDEX_op_brcond_i32:
3207        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
3208                       arg_label(args[3]), TCG_TYPE_I32);
3209        break;
3210    case INDEX_op_brcond_i64:
3211        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
3212                       arg_label(args[3]), TCG_TYPE_I64);
3213        break;
3214    case INDEX_op_brcond2_i32:
3215        tcg_out_brcond2(s, args, const_args);
3216        break;
3217
3218    case INDEX_op_neg_i32:
3219    case INDEX_op_neg_i64:
3220        tcg_out32(s, NEG | RT(args[0]) | RA(args[1]));
3221        break;
3222
3223    case INDEX_op_not_i32:
3224    case INDEX_op_not_i64:
3225        tcg_out32(s, NOR | SAB(args[1], args[0], args[1]));
3226        break;
3227
3228    case INDEX_op_add_i64:
3229        a0 = args[0], a1 = args[1], a2 = args[2];
3230        if (const_args[2]) {
3231        do_addi_64:
3232            tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
3233        } else {
3234            tcg_out32(s, ADD | TAB(a0, a1, a2));
3235        }
3236        break;
3237    case INDEX_op_sub_i64:
3238        a0 = args[0], a1 = args[1], a2 = args[2];
3239        if (const_args[1]) {
3240            if (const_args[2]) {
3241                tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2);
3242            } else {
3243                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
3244            }
3245        } else if (const_args[2]) {
3246            a2 = -a2;
3247            goto do_addi_64;
3248        } else {
3249            tcg_out32(s, SUBF | TAB(a0, a2, a1));
3250        }
3251        break;
3252
3253    case INDEX_op_shl_i64:
3254        if (const_args[2]) {
3255            /* Limit immediate shift count lest we create an illegal insn.  */
3256            tcg_out_shli64(s, args[0], args[1], args[2] & 63);
3257        } else {
3258            tcg_out32(s, SLD | SAB(args[1], args[0], args[2]));
3259        }
3260        break;
3261    case INDEX_op_shr_i64:
3262        if (const_args[2]) {
3263            /* Limit immediate shift count lest we create an illegal insn.  */
3264            tcg_out_shri64(s, args[0], args[1], args[2] & 63);
3265        } else {
3266            tcg_out32(s, SRD | SAB(args[1], args[0], args[2]));
3267        }
3268        break;
3269    case INDEX_op_sar_i64:
3270        if (const_args[2]) {
3271            tcg_out_sari64(s, args[0], args[1], args[2]);
3272        } else {
3273            tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
3274        }
3275        break;
3276    case INDEX_op_rotl_i64:
3277        if (const_args[2]) {
3278            tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0);
3279        } else {
3280            tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0));
3281        }
3282        break;
3283    case INDEX_op_rotr_i64:
3284        if (const_args[2]) {
3285            tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0);
3286        } else {
3287            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64));
3288            tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0));
3289        }
3290        break;
3291
3292    case INDEX_op_mul_i64:
3293        a0 = args[0], a1 = args[1], a2 = args[2];
3294        if (const_args[2]) {
3295            tcg_out32(s, MULLI | TAI(a0, a1, a2));
3296        } else {
3297            tcg_out32(s, MULLD | TAB(a0, a1, a2));
3298        }
3299        break;
3300    case INDEX_op_div_i64:
3301        tcg_out32(s, DIVD | TAB(args[0], args[1], args[2]));
3302        break;
3303    case INDEX_op_divu_i64:
3304        tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
3305        break;
3306    case INDEX_op_rem_i64:
3307        tcg_out32(s, MODSD | TAB(args[0], args[1], args[2]));
3308        break;
3309    case INDEX_op_remu_i64:
3310        tcg_out32(s, MODUD | TAB(args[0], args[1], args[2]));
3311        break;
3312
3313    case INDEX_op_qemu_ld_a64_i32:
3314        if (TCG_TARGET_REG_BITS == 32) {
3315            tcg_out_qemu_ld(s, args[0], -1, args[1], args[2],
3316                            args[3], TCG_TYPE_I32);
3317            break;
3318        }
3319        /* fall through */
3320    case INDEX_op_qemu_ld_a32_i32:
3321        tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
3322        break;
3323    case INDEX_op_qemu_ld_a32_i64:
3324        if (TCG_TARGET_REG_BITS == 64) {
3325            tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
3326                            args[2], TCG_TYPE_I64);
3327        } else {
3328            tcg_out_qemu_ld(s, args[0], args[1], args[2], -1,
3329                            args[3], TCG_TYPE_I64);
3330        }
3331        break;
3332    case INDEX_op_qemu_ld_a64_i64:
3333        if (TCG_TARGET_REG_BITS == 64) {
3334            tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
3335                            args[2], TCG_TYPE_I64);
3336        } else {
3337            tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3],
3338                            args[4], TCG_TYPE_I64);
3339        }
3340        break;
3341    case INDEX_op_qemu_ld_a32_i128:
3342    case INDEX_op_qemu_ld_a64_i128:
3343        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
3344        tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
3345        break;
3346
3347    case INDEX_op_qemu_st_a64_i32:
3348        if (TCG_TARGET_REG_BITS == 32) {
3349            tcg_out_qemu_st(s, args[0], -1, args[1], args[2],
3350                            args[3], TCG_TYPE_I32);
3351            break;
3352        }
3353        /* fall through */
3354    case INDEX_op_qemu_st_a32_i32:
3355        tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
3356        break;
3357    case INDEX_op_qemu_st_a32_i64:
3358        if (TCG_TARGET_REG_BITS == 64) {
3359            tcg_out_qemu_st(s, args[0], -1, args[1], -1,
3360                            args[2], TCG_TYPE_I64);
3361        } else {
3362            tcg_out_qemu_st(s, args[0], args[1], args[2], -1,
3363                            args[3], TCG_TYPE_I64);
3364        }
3365        break;
3366    case INDEX_op_qemu_st_a64_i64:
3367        if (TCG_TARGET_REG_BITS == 64) {
3368            tcg_out_qemu_st(s, args[0], -1, args[1], -1,
3369                            args[2], TCG_TYPE_I64);
3370        } else {
3371            tcg_out_qemu_st(s, args[0], args[1], args[2], args[3],
3372                            args[4], TCG_TYPE_I64);
3373        }
3374        break;
3375    case INDEX_op_qemu_st_a32_i128:
3376    case INDEX_op_qemu_st_a64_i128:
3377        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
3378        tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
3379        break;
3380
3381    case INDEX_op_setcond_i32:
3382        tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
3383                        const_args[2], false);
3384        break;
3385    case INDEX_op_setcond_i64:
3386        tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
3387                        const_args[2], false);
3388        break;
3389    case INDEX_op_negsetcond_i32:
3390        tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
3391                        const_args[2], true);
3392        break;
3393    case INDEX_op_negsetcond_i64:
3394        tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
3395                        const_args[2], true);
3396        break;
3397    case INDEX_op_setcond2_i32:
3398        tcg_out_setcond2(s, args, const_args);
3399        break;
3400
3401    case INDEX_op_bswap16_i32:
3402    case INDEX_op_bswap16_i64:
3403        tcg_out_bswap16(s, args[0], args[1], args[2]);
3404        break;
3405    case INDEX_op_bswap32_i32:
3406        tcg_out_bswap32(s, args[0], args[1], 0);
3407        break;
3408    case INDEX_op_bswap32_i64:
3409        tcg_out_bswap32(s, args[0], args[1], args[2]);
3410        break;
3411    case INDEX_op_bswap64_i64:
3412        tcg_out_bswap64(s, args[0], args[1]);
3413        break;
3414
3415    case INDEX_op_deposit_i32:
3416        if (const_args[2]) {
3417            uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
3418            tcg_out_andi32(s, args[0], args[0], ~mask);
3419        } else {
3420            tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
3421                        32 - args[3] - args[4], 31 - args[3]);
3422        }
3423        break;
3424    case INDEX_op_deposit_i64:
3425        if (const_args[2]) {
3426            uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
3427            tcg_out_andi64(s, args[0], args[0], ~mask);
3428        } else {
3429            tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
3430                        64 - args[3] - args[4]);
3431        }
3432        break;
3433
3434    case INDEX_op_extract_i32:
3435        tcg_out_rlw(s, RLWINM, args[0], args[1],
3436                    32 - args[2], 32 - args[3], 31);
3437        break;
3438    case INDEX_op_extract_i64:
3439        tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
3440        break;
3441
3442    case INDEX_op_movcond_i32:
3443        tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
3444                        args[3], args[4], const_args[2]);
3445        break;
3446    case INDEX_op_movcond_i64:
3447        tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2],
3448                        args[3], args[4], const_args[2]);
3449        break;
3450
3451#if TCG_TARGET_REG_BITS == 64
3452    case INDEX_op_add2_i64:
3453#else
3454    case INDEX_op_add2_i32:
3455#endif
3456        /* Note that the CA bit is defined based on the word size of the
3457           environment.  So in 64-bit mode it's always carry-out of bit 63.
3458           The fallback code using deposit works just as well for 32-bit.  */
3459        a0 = args[0], a1 = args[1];
3460        if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
3461            a0 = TCG_REG_R0;
3462        }
3463        if (const_args[4]) {
3464            tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
3465        } else {
3466            tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
3467        }
3468        if (const_args[5]) {
3469            tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
3470        } else {
3471            tcg_out32(s, ADDE | TAB(a1, args[3], args[5]));
3472        }
3473        if (a0 != args[0]) {
3474            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3475        }
3476        break;
3477
3478#if TCG_TARGET_REG_BITS == 64
3479    case INDEX_op_sub2_i64:
3480#else
3481    case INDEX_op_sub2_i32:
3482#endif
3483        a0 = args[0], a1 = args[1];
3484        if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
3485            a0 = TCG_REG_R0;
3486        }
3487        if (const_args[2]) {
3488            tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
3489        } else {
3490            tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
3491        }
3492        if (const_args[3]) {
3493            tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
3494        } else {
3495            tcg_out32(s, SUBFE | TAB(a1, args[5], args[3]));
3496        }
3497        if (a0 != args[0]) {
3498            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3499        }
3500        break;
3501
3502    case INDEX_op_muluh_i32:
3503        tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2]));
3504        break;
3505    case INDEX_op_mulsh_i32:
3506        tcg_out32(s, MULHW | TAB(args[0], args[1], args[2]));
3507        break;
3508    case INDEX_op_muluh_i64:
3509        tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2]));
3510        break;
3511    case INDEX_op_mulsh_i64:
3512        tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
3513        break;
3514
3515    case INDEX_op_mb:
3516        tcg_out_mb(s, args[0]);
3517        break;
3518
3519    case INDEX_op_mov_i32:   /* Always emitted via tcg_out_mov.  */
3520    case INDEX_op_mov_i64:
3521    case INDEX_op_call:      /* Always emitted via tcg_out_call.  */
3522    case INDEX_op_exit_tb:   /* Always emitted via tcg_out_exit_tb.  */
3523    case INDEX_op_goto_tb:   /* Always emitted via tcg_out_goto_tb.  */
3524    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
3525    case INDEX_op_ext8s_i64:
3526    case INDEX_op_ext8u_i32:
3527    case INDEX_op_ext8u_i64:
3528    case INDEX_op_ext16s_i32:
3529    case INDEX_op_ext16s_i64:
3530    case INDEX_op_ext16u_i32:
3531    case INDEX_op_ext16u_i64:
3532    case INDEX_op_ext32s_i64:
3533    case INDEX_op_ext32u_i64:
3534    case INDEX_op_ext_i32_i64:
3535    case INDEX_op_extu_i32_i64:
3536    case INDEX_op_extrl_i64_i32:
3537    default:
3538        g_assert_not_reached();
3539    }
3540}
3541
3542int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3543{
3544    switch (opc) {
3545    case INDEX_op_and_vec:
3546    case INDEX_op_or_vec:
3547    case INDEX_op_xor_vec:
3548    case INDEX_op_andc_vec:
3549    case INDEX_op_not_vec:
3550    case INDEX_op_nor_vec:
3551    case INDEX_op_eqv_vec:
3552    case INDEX_op_nand_vec:
3553        return 1;
3554    case INDEX_op_orc_vec:
3555        return have_isa_2_07;
3556    case INDEX_op_add_vec:
3557    case INDEX_op_sub_vec:
3558    case INDEX_op_smax_vec:
3559    case INDEX_op_smin_vec:
3560    case INDEX_op_umax_vec:
3561    case INDEX_op_umin_vec:
3562    case INDEX_op_shlv_vec:
3563    case INDEX_op_shrv_vec:
3564    case INDEX_op_sarv_vec:
3565    case INDEX_op_rotlv_vec:
3566        return vece <= MO_32 || have_isa_2_07;
3567    case INDEX_op_ssadd_vec:
3568    case INDEX_op_sssub_vec:
3569    case INDEX_op_usadd_vec:
3570    case INDEX_op_ussub_vec:
3571        return vece <= MO_32;
3572    case INDEX_op_shli_vec:
3573    case INDEX_op_shri_vec:
3574    case INDEX_op_sari_vec:
3575    case INDEX_op_rotli_vec:
3576        return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
3577    case INDEX_op_cmp_vec:
3578    case INDEX_op_cmpsel_vec:
3579        return vece <= MO_32 || have_isa_2_07 ? 1 : 0;
3580    case INDEX_op_neg_vec:
3581        return vece >= MO_32 && have_isa_3_00;
3582    case INDEX_op_mul_vec:
3583        switch (vece) {
3584        case MO_8:
3585        case MO_16:
3586            return -1;
3587        case MO_32:
3588            return have_isa_2_07 ? 1 : -1;
3589        case MO_64:
3590            return have_isa_3_10;
3591        }
3592        return 0;
3593    case INDEX_op_bitsel_vec:
3594        return have_vsx;
3595    case INDEX_op_rotrv_vec:
3596        return -1;
3597    default:
3598        return 0;
3599    }
3600}
3601
3602static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
3603                            TCGReg dst, TCGReg src)
3604{
3605    tcg_debug_assert(dst >= TCG_REG_V0);
3606
3607    /* Splat from integer reg allowed via constraints for v3.00.  */
3608    if (src < TCG_REG_V0) {
3609        tcg_debug_assert(have_isa_3_00);
3610        switch (vece) {
3611        case MO_64:
3612            tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
3613            return true;
3614        case MO_32:
3615            tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
3616            return true;
3617        default:
3618            /* Fail, so that we fall back on either dupm or mov+dup.  */
3619            return false;
3620        }
3621    }
3622
3623    /*
3624     * Recall we use (or emulate) VSX integer loads, so the integer is
3625     * right justified within the left (zero-index) double-word.
3626     */
3627    switch (vece) {
3628    case MO_8:
3629        tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
3630        break;
3631    case MO_16:
3632        tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
3633        break;
3634    case MO_32:
3635        tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
3636        break;
3637    case MO_64:
3638        if (have_vsx) {
3639            tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
3640            break;
3641        }
3642        tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
3643        tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
3644        break;
3645    default:
3646        g_assert_not_reached();
3647    }
3648    return true;
3649}
3650
3651static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
3652                             TCGReg out, TCGReg base, intptr_t offset)
3653{
3654    int elt;
3655
3656    tcg_debug_assert(out >= TCG_REG_V0);
3657    switch (vece) {
3658    case MO_8:
3659        if (have_isa_3_00) {
3660            tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
3661        } else {
3662            tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
3663        }
3664        elt = extract32(offset, 0, 4);
3665#if !HOST_BIG_ENDIAN
3666        elt ^= 15;
3667#endif
3668        tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
3669        break;
3670    case MO_16:
3671        tcg_debug_assert((offset & 1) == 0);
3672        if (have_isa_3_00) {
3673            tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
3674        } else {
3675            tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
3676        }
3677        elt = extract32(offset, 1, 3);
3678#if !HOST_BIG_ENDIAN
3679        elt ^= 7;
3680#endif
3681        tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
3682        break;
3683    case MO_32:
3684        if (have_isa_3_00) {
3685            tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
3686            break;
3687        }
3688        tcg_debug_assert((offset & 3) == 0);
3689        tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
3690        elt = extract32(offset, 2, 2);
3691#if !HOST_BIG_ENDIAN
3692        elt ^= 3;
3693#endif
3694        tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
3695        break;
3696    case MO_64:
3697        if (have_vsx) {
3698            tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
3699            break;
3700        }
3701        tcg_debug_assert((offset & 7) == 0);
3702        tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
3703        tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
3704        elt = extract32(offset, 3, 1);
3705#if !HOST_BIG_ENDIAN
3706        elt = !elt;
3707#endif
3708        if (elt) {
3709            tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
3710        } else {
3711            tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
3712        }
3713        break;
3714    default:
3715        g_assert_not_reached();
3716    }
3717    return true;
3718}
3719
3720static void tcg_out_not_vec(TCGContext *s, TCGReg a0, TCGReg a1)
3721{
3722    tcg_out32(s, VNOR | VRT(a0) | VRA(a1) | VRB(a1));
3723}
3724
3725static void tcg_out_or_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
3726{
3727    tcg_out32(s, VOR | VRT(a0) | VRA(a1) | VRB(a2));
3728}
3729
3730static void tcg_out_orc_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
3731{
3732    tcg_out32(s, VORC | VRT(a0) | VRA(a1) | VRB(a2));
3733}
3734
3735static void tcg_out_and_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
3736{
3737    tcg_out32(s, VAND | VRT(a0) | VRA(a1) | VRB(a2));
3738}
3739
3740static void tcg_out_andc_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
3741{
3742    tcg_out32(s, VANDC | VRT(a0) | VRA(a1) | VRB(a2));
3743}
3744
3745static void tcg_out_bitsel_vec(TCGContext *s, TCGReg d,
3746                               TCGReg c, TCGReg t, TCGReg f)
3747{
3748    if (TCG_TARGET_HAS_bitsel_vec) {
3749        tcg_out32(s, XXSEL | VRT(d) | VRC(c) | VRB(t) | VRA(f));
3750    } else {
3751        tcg_out_and_vec(s, TCG_VEC_TMP2, t, c);
3752        tcg_out_andc_vec(s, d, f, c);
3753        tcg_out_or_vec(s, d, d, TCG_VEC_TMP2);
3754    }
3755}
3756
3757static bool tcg_out_cmp_vec_noinv(TCGContext *s, unsigned vece, TCGReg a0,
3758                                  TCGReg a1, TCGReg a2, TCGCond cond)
3759{
3760    static const uint32_t
3761        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
3762        ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
3763        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
3764        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD };
3765    uint32_t insn;
3766
3767    bool need_swap = false, need_inv = false;
3768
3769    tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
3770
3771    switch (cond) {
3772    case TCG_COND_EQ:
3773    case TCG_COND_GT:
3774    case TCG_COND_GTU:
3775        break;
3776    case TCG_COND_NE:
3777        if (have_isa_3_00 && vece <= MO_32) {
3778            break;
3779        }
3780        /* fall through */
3781    case TCG_COND_LE:
3782    case TCG_COND_LEU:
3783        need_inv = true;
3784        break;
3785    case TCG_COND_LT:
3786    case TCG_COND_LTU:
3787        need_swap = true;
3788        break;
3789    case TCG_COND_GE:
3790    case TCG_COND_GEU:
3791        need_swap = need_inv = true;
3792        break;
3793    default:
3794        g_assert_not_reached();
3795    }
3796
3797    if (need_inv) {
3798        cond = tcg_invert_cond(cond);
3799    }
3800    if (need_swap) {
3801        TCGReg swap = a1;
3802        a1 = a2;
3803        a2 = swap;
3804        cond = tcg_swap_cond(cond);
3805    }
3806
3807    switch (cond) {
3808    case TCG_COND_EQ:
3809        insn = eq_op[vece];
3810        break;
3811    case TCG_COND_NE:
3812        insn = ne_op[vece];
3813        break;
3814    case TCG_COND_GT:
3815        insn = gts_op[vece];
3816        break;
3817    case TCG_COND_GTU:
3818        insn = gtu_op[vece];
3819        break;
3820    default:
3821        g_assert_not_reached();
3822    }
3823    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
3824
3825    return need_inv;
3826}
3827
3828static void tcg_out_cmp_vec(TCGContext *s, unsigned vece, TCGReg a0,
3829                            TCGReg a1, TCGReg a2, TCGCond cond)
3830{
3831    if (tcg_out_cmp_vec_noinv(s, vece, a0, a1, a2, cond)) {
3832        tcg_out_not_vec(s, a0, a0);
3833    }
3834}
3835
3836static void tcg_out_cmpsel_vec(TCGContext *s, unsigned vece, TCGReg a0,
3837                               TCGReg c1, TCGReg c2, TCGArg v3, int const_v3,
3838                               TCGReg v4, TCGCond cond)
3839{
3840    bool inv = tcg_out_cmp_vec_noinv(s, vece, TCG_VEC_TMP1, c1, c2, cond);
3841
3842    if (!const_v3) {
3843        if (inv) {
3844            tcg_out_bitsel_vec(s, a0, TCG_VEC_TMP1, v4, v3);
3845        } else {
3846            tcg_out_bitsel_vec(s, a0, TCG_VEC_TMP1, v3, v4);
3847        }
3848    } else if (v3) {
3849        if (inv) {
3850            tcg_out_orc_vec(s, a0, v4, TCG_VEC_TMP1);
3851        } else {
3852            tcg_out_or_vec(s, a0, v4, TCG_VEC_TMP1);
3853        }
3854    } else {
3855        if (inv) {
3856            tcg_out_and_vec(s, a0, v4, TCG_VEC_TMP1);
3857        } else {
3858            tcg_out_andc_vec(s, a0, v4, TCG_VEC_TMP1);
3859        }
3860    }
3861}
3862
3863static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
3864                           unsigned vecl, unsigned vece,
3865                           const TCGArg args[TCG_MAX_OP_ARGS],
3866                           const int const_args[TCG_MAX_OP_ARGS])
3867{
3868    static const uint32_t
3869        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
3870        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
3871        mul_op[4] = { 0, 0, VMULUWM, VMULLD },
3872        neg_op[4] = { 0, 0, VNEGW, VNEGD },
3873        ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
3874        usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
3875        sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
3876        ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
3877        umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
3878        smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
3879        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
3880        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
3881        shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
3882        shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
3883        sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
3884        mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
3885        mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
3886        muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
3887        mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
3888        pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
3889        rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
3890
3891    TCGType type = vecl + TCG_TYPE_V64;
3892    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
3893    uint32_t insn;
3894
3895    switch (opc) {
3896    case INDEX_op_ld_vec:
3897        tcg_out_ld(s, type, a0, a1, a2);
3898        return;
3899    case INDEX_op_st_vec:
3900        tcg_out_st(s, type, a0, a1, a2);
3901        return;
3902    case INDEX_op_dupm_vec:
3903        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
3904        return;
3905
3906    case INDEX_op_add_vec:
3907        insn = add_op[vece];
3908        break;
3909    case INDEX_op_sub_vec:
3910        insn = sub_op[vece];
3911        break;
3912    case INDEX_op_neg_vec:
3913        insn = neg_op[vece];
3914        a2 = a1;
3915        a1 = 0;
3916        break;
3917    case INDEX_op_mul_vec:
3918        insn = mul_op[vece];
3919        break;
3920    case INDEX_op_ssadd_vec:
3921        insn = ssadd_op[vece];
3922        break;
3923    case INDEX_op_sssub_vec:
3924        insn = sssub_op[vece];
3925        break;
3926    case INDEX_op_usadd_vec:
3927        insn = usadd_op[vece];
3928        break;
3929    case INDEX_op_ussub_vec:
3930        insn = ussub_op[vece];
3931        break;
3932    case INDEX_op_smin_vec:
3933        insn = smin_op[vece];
3934        break;
3935    case INDEX_op_umin_vec:
3936        insn = umin_op[vece];
3937        break;
3938    case INDEX_op_smax_vec:
3939        insn = smax_op[vece];
3940        break;
3941    case INDEX_op_umax_vec:
3942        insn = umax_op[vece];
3943        break;
3944    case INDEX_op_shlv_vec:
3945        insn = shlv_op[vece];
3946        break;
3947    case INDEX_op_shrv_vec:
3948        insn = shrv_op[vece];
3949        break;
3950    case INDEX_op_sarv_vec:
3951        insn = sarv_op[vece];
3952        break;
3953    case INDEX_op_and_vec:
3954        tcg_out_and_vec(s, a0, a1, a2);
3955        return;
3956    case INDEX_op_or_vec:
3957        tcg_out_or_vec(s, a0, a1, a2);
3958        return;
3959    case INDEX_op_xor_vec:
3960        insn = VXOR;
3961        break;
3962    case INDEX_op_andc_vec:
3963        tcg_out_andc_vec(s, a0, a1, a2);
3964        return;
3965    case INDEX_op_not_vec:
3966        tcg_out_not_vec(s, a0, a1);
3967        return;
3968    case INDEX_op_orc_vec:
3969        tcg_out_orc_vec(s, a0, a1, a2);
3970        return;
3971    case INDEX_op_nand_vec:
3972        insn = VNAND;
3973        break;
3974    case INDEX_op_nor_vec:
3975        insn = VNOR;
3976        break;
3977    case INDEX_op_eqv_vec:
3978        insn = VEQV;
3979        break;
3980
3981    case INDEX_op_cmp_vec:
3982        tcg_out_cmp_vec(s, vece, a0, a1, a2, args[3]);
3983        return;
3984    case INDEX_op_cmpsel_vec:
3985        tcg_out_cmpsel_vec(s, vece, a0, a1, a2,
3986                           args[3], const_args[3], args[4], args[5]);
3987        return;
3988    case INDEX_op_bitsel_vec:
3989        tcg_out_bitsel_vec(s, a0, a1, a2, args[3]);
3990        return;
3991
3992    case INDEX_op_dup2_vec:
3993        assert(TCG_TARGET_REG_BITS == 32);
3994        /* With inputs a1 = xLxx, a2 = xHxx  */
3995        tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1));  /* a0  = xxHL */
3996        tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8);          /* tmp = HLxx */
3997        tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8);          /* a0  = HLHL */
3998        return;
3999
4000    case INDEX_op_ppc_mrgh_vec:
4001        insn = mrgh_op[vece];
4002        break;
4003    case INDEX_op_ppc_mrgl_vec:
4004        insn = mrgl_op[vece];
4005        break;
4006    case INDEX_op_ppc_muleu_vec:
4007        insn = muleu_op[vece];
4008        break;
4009    case INDEX_op_ppc_mulou_vec:
4010        insn = mulou_op[vece];
4011        break;
4012    case INDEX_op_ppc_pkum_vec:
4013        insn = pkum_op[vece];
4014        break;
4015    case INDEX_op_rotlv_vec:
4016        insn = rotl_op[vece];
4017        break;
4018    case INDEX_op_ppc_msum_vec:
4019        tcg_debug_assert(vece == MO_16);
4020        tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
4021        return;
4022
4023    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
4024    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
4025    default:
4026        g_assert_not_reached();
4027    }
4028
4029    tcg_debug_assert(insn != 0);
4030    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
4031}
4032
4033static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
4034                           TCGv_vec v1, TCGArg imm, TCGOpcode opci)
4035{
4036    TCGv_vec t1;
4037
4038    if (vece == MO_32) {
4039        /*
4040         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
4041         * So using negative numbers gets us the 4th bit easily.
4042         */
4043        imm = sextract32(imm, 0, 5);
4044    } else {
4045        imm &= (8 << vece) - 1;
4046    }
4047
4048    /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */
4049    t1 = tcg_constant_vec(type, MO_8, imm);
4050    vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
4051              tcgv_vec_arg(v1), tcgv_vec_arg(t1));
4052}
4053
4054static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
4055                           TCGv_vec v1, TCGv_vec v2)
4056{
4057    TCGv_vec t1 = tcg_temp_new_vec(type);
4058    TCGv_vec t2 = tcg_temp_new_vec(type);
4059    TCGv_vec c0, c16;
4060
4061    switch (vece) {
4062    case MO_8:
4063    case MO_16:
4064        vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
4065                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
4066        vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
4067                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
4068        vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
4069                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
4070        vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
4071                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
4072        vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
4073                  tcgv_vec_arg(v0), tcgv_vec_arg(t1));
4074        break;
4075
4076    case MO_32:
4077        tcg_debug_assert(!have_isa_2_07);
4078        /*
4079         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
4080         * So using -16 is a quick way to represent 16.
4081         */
4082        c16 = tcg_constant_vec(type, MO_8, -16);
4083        c0 = tcg_constant_vec(type, MO_8, 0);
4084
4085        vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
4086                  tcgv_vec_arg(v2), tcgv_vec_arg(c16));
4087        vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
4088                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
4089        vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1),
4090                  tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0));
4091        vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1),
4092                  tcgv_vec_arg(t1), tcgv_vec_arg(c16));
4093        tcg_gen_add_vec(MO_32, v0, t1, t2);
4094        break;
4095
4096    default:
4097        g_assert_not_reached();
4098    }
4099    tcg_temp_free_vec(t1);
4100    tcg_temp_free_vec(t2);
4101}
4102
4103void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
4104                       TCGArg a0, ...)
4105{
4106    va_list va;
4107    TCGv_vec v0, v1, v2, t0;
4108    TCGArg a2;
4109
4110    va_start(va, a0);
4111    v0 = temp_tcgv_vec(arg_temp(a0));
4112    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
4113    a2 = va_arg(va, TCGArg);
4114
4115    switch (opc) {
4116    case INDEX_op_shli_vec:
4117        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
4118        break;
4119    case INDEX_op_shri_vec:
4120        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
4121        break;
4122    case INDEX_op_sari_vec:
4123        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
4124        break;
4125    case INDEX_op_rotli_vec:
4126        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
4127        break;
4128    case INDEX_op_mul_vec:
4129        v2 = temp_tcgv_vec(arg_temp(a2));
4130        expand_vec_mul(type, vece, v0, v1, v2);
4131        break;
4132    case INDEX_op_rotlv_vec:
4133        v2 = temp_tcgv_vec(arg_temp(a2));
4134        t0 = tcg_temp_new_vec(type);
4135        tcg_gen_neg_vec(vece, t0, v2);
4136        tcg_gen_rotlv_vec(vece, v0, v1, t0);
4137        tcg_temp_free_vec(t0);
4138        break;
4139    default:
4140        g_assert_not_reached();
4141    }
4142    va_end(va);
4143}
4144
4145static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
4146{
4147    switch (op) {
4148    case INDEX_op_goto_ptr:
4149        return C_O0_I1(r);
4150
4151    case INDEX_op_ld8u_i32:
4152    case INDEX_op_ld8s_i32:
4153    case INDEX_op_ld16u_i32:
4154    case INDEX_op_ld16s_i32:
4155    case INDEX_op_ld_i32:
4156    case INDEX_op_ctpop_i32:
4157    case INDEX_op_neg_i32:
4158    case INDEX_op_not_i32:
4159    case INDEX_op_ext8s_i32:
4160    case INDEX_op_ext16s_i32:
4161    case INDEX_op_bswap16_i32:
4162    case INDEX_op_bswap32_i32:
4163    case INDEX_op_extract_i32:
4164    case INDEX_op_ld8u_i64:
4165    case INDEX_op_ld8s_i64:
4166    case INDEX_op_ld16u_i64:
4167    case INDEX_op_ld16s_i64:
4168    case INDEX_op_ld32u_i64:
4169    case INDEX_op_ld32s_i64:
4170    case INDEX_op_ld_i64:
4171    case INDEX_op_ctpop_i64:
4172    case INDEX_op_neg_i64:
4173    case INDEX_op_not_i64:
4174    case INDEX_op_ext8s_i64:
4175    case INDEX_op_ext16s_i64:
4176    case INDEX_op_ext32s_i64:
4177    case INDEX_op_ext_i32_i64:
4178    case INDEX_op_extu_i32_i64:
4179    case INDEX_op_bswap16_i64:
4180    case INDEX_op_bswap32_i64:
4181    case INDEX_op_bswap64_i64:
4182    case INDEX_op_extract_i64:
4183        return C_O1_I1(r, r);
4184
4185    case INDEX_op_st8_i32:
4186    case INDEX_op_st16_i32:
4187    case INDEX_op_st_i32:
4188    case INDEX_op_st8_i64:
4189    case INDEX_op_st16_i64:
4190    case INDEX_op_st32_i64:
4191    case INDEX_op_st_i64:
4192        return C_O0_I2(r, r);
4193
4194    case INDEX_op_add_i32:
4195    case INDEX_op_and_i32:
4196    case INDEX_op_or_i32:
4197    case INDEX_op_xor_i32:
4198    case INDEX_op_andc_i32:
4199    case INDEX_op_orc_i32:
4200    case INDEX_op_eqv_i32:
4201    case INDEX_op_shl_i32:
4202    case INDEX_op_shr_i32:
4203    case INDEX_op_sar_i32:
4204    case INDEX_op_rotl_i32:
4205    case INDEX_op_rotr_i32:
4206    case INDEX_op_and_i64:
4207    case INDEX_op_andc_i64:
4208    case INDEX_op_shl_i64:
4209    case INDEX_op_shr_i64:
4210    case INDEX_op_sar_i64:
4211    case INDEX_op_rotl_i64:
4212    case INDEX_op_rotr_i64:
4213        return C_O1_I2(r, r, ri);
4214
4215    case INDEX_op_mul_i32:
4216    case INDEX_op_mul_i64:
4217        return C_O1_I2(r, r, rI);
4218
4219    case INDEX_op_div_i32:
4220    case INDEX_op_divu_i32:
4221    case INDEX_op_rem_i32:
4222    case INDEX_op_remu_i32:
4223    case INDEX_op_nand_i32:
4224    case INDEX_op_nor_i32:
4225    case INDEX_op_muluh_i32:
4226    case INDEX_op_mulsh_i32:
4227    case INDEX_op_orc_i64:
4228    case INDEX_op_eqv_i64:
4229    case INDEX_op_nand_i64:
4230    case INDEX_op_nor_i64:
4231    case INDEX_op_div_i64:
4232    case INDEX_op_divu_i64:
4233    case INDEX_op_rem_i64:
4234    case INDEX_op_remu_i64:
4235    case INDEX_op_mulsh_i64:
4236    case INDEX_op_muluh_i64:
4237        return C_O1_I2(r, r, r);
4238
4239    case INDEX_op_sub_i32:
4240        return C_O1_I2(r, rI, ri);
4241    case INDEX_op_add_i64:
4242        return C_O1_I2(r, r, rT);
4243    case INDEX_op_or_i64:
4244    case INDEX_op_xor_i64:
4245        return C_O1_I2(r, r, rU);
4246    case INDEX_op_sub_i64:
4247        return C_O1_I2(r, rI, rT);
4248    case INDEX_op_clz_i32:
4249    case INDEX_op_ctz_i32:
4250    case INDEX_op_clz_i64:
4251    case INDEX_op_ctz_i64:
4252        return C_O1_I2(r, r, rZW);
4253
4254    case INDEX_op_brcond_i32:
4255    case INDEX_op_brcond_i64:
4256        return C_O0_I2(r, rC);
4257    case INDEX_op_setcond_i32:
4258    case INDEX_op_setcond_i64:
4259    case INDEX_op_negsetcond_i32:
4260    case INDEX_op_negsetcond_i64:
4261        return C_O1_I2(r, r, rC);
4262    case INDEX_op_movcond_i32:
4263    case INDEX_op_movcond_i64:
4264        return C_O1_I4(r, r, rC, rZ, rZ);
4265
4266    case INDEX_op_deposit_i32:
4267    case INDEX_op_deposit_i64:
4268        return C_O1_I2(r, 0, rZ);
4269    case INDEX_op_brcond2_i32:
4270        return C_O0_I4(r, r, ri, ri);
4271    case INDEX_op_setcond2_i32:
4272        return C_O1_I4(r, r, r, ri, ri);
4273    case INDEX_op_add2_i64:
4274    case INDEX_op_add2_i32:
4275        return C_O2_I4(r, r, r, r, rI, rZM);
4276    case INDEX_op_sub2_i64:
4277    case INDEX_op_sub2_i32:
4278        return C_O2_I4(r, r, rI, rZM, r, r);
4279
4280    case INDEX_op_qemu_ld_a32_i32:
4281        return C_O1_I1(r, r);
4282    case INDEX_op_qemu_ld_a64_i32:
4283        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r);
4284    case INDEX_op_qemu_ld_a32_i64:
4285        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
4286    case INDEX_op_qemu_ld_a64_i64:
4287        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r);
4288
4289    case INDEX_op_qemu_st_a32_i32:
4290        return C_O0_I2(r, r);
4291    case INDEX_op_qemu_st_a64_i32:
4292        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
4293    case INDEX_op_qemu_st_a32_i64:
4294        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
4295    case INDEX_op_qemu_st_a64_i64:
4296        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
4297
4298    case INDEX_op_qemu_ld_a32_i128:
4299    case INDEX_op_qemu_ld_a64_i128:
4300        return C_N1O1_I1(o, m, r);
4301    case INDEX_op_qemu_st_a32_i128:
4302    case INDEX_op_qemu_st_a64_i128:
4303        return C_O0_I3(o, m, r);
4304
4305    case INDEX_op_add_vec:
4306    case INDEX_op_sub_vec:
4307    case INDEX_op_mul_vec:
4308    case INDEX_op_and_vec:
4309    case INDEX_op_or_vec:
4310    case INDEX_op_xor_vec:
4311    case INDEX_op_andc_vec:
4312    case INDEX_op_orc_vec:
4313    case INDEX_op_nor_vec:
4314    case INDEX_op_eqv_vec:
4315    case INDEX_op_nand_vec:
4316    case INDEX_op_cmp_vec:
4317    case INDEX_op_ssadd_vec:
4318    case INDEX_op_sssub_vec:
4319    case INDEX_op_usadd_vec:
4320    case INDEX_op_ussub_vec:
4321    case INDEX_op_smax_vec:
4322    case INDEX_op_smin_vec:
4323    case INDEX_op_umax_vec:
4324    case INDEX_op_umin_vec:
4325    case INDEX_op_shlv_vec:
4326    case INDEX_op_shrv_vec:
4327    case INDEX_op_sarv_vec:
4328    case INDEX_op_rotlv_vec:
4329    case INDEX_op_rotrv_vec:
4330    case INDEX_op_ppc_mrgh_vec:
4331    case INDEX_op_ppc_mrgl_vec:
4332    case INDEX_op_ppc_muleu_vec:
4333    case INDEX_op_ppc_mulou_vec:
4334    case INDEX_op_ppc_pkum_vec:
4335    case INDEX_op_dup2_vec:
4336        return C_O1_I2(v, v, v);
4337
4338    case INDEX_op_not_vec:
4339    case INDEX_op_neg_vec:
4340        return C_O1_I1(v, v);
4341
4342    case INDEX_op_dup_vec:
4343        return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v);
4344
4345    case INDEX_op_ld_vec:
4346    case INDEX_op_dupm_vec:
4347        return C_O1_I1(v, r);
4348
4349    case INDEX_op_st_vec:
4350        return C_O0_I2(v, r);
4351
4352    case INDEX_op_bitsel_vec:
4353    case INDEX_op_ppc_msum_vec:
4354        return C_O1_I3(v, v, v, v);
4355    case INDEX_op_cmpsel_vec:
4356        return C_O1_I4(v, v, v, vZM, v);
4357
4358    default:
4359        g_assert_not_reached();
4360    }
4361}
4362
4363static void tcg_target_init(TCGContext *s)
4364{
4365    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
4366    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
4367    if (have_altivec) {
4368        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
4369        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
4370    }
4371
4372    tcg_target_call_clobber_regs = 0;
4373    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
4374    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
4375    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
4376    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
4377    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
4378    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
4379    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7);
4380    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
4381    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
4382    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
4383    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
4384    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
4385
4386    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
4387    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
4388    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
4389    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
4390    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
4391    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
4392    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
4393    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
4394    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
4395    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
4396    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
4397    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
4398    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
4399    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
4400    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
4401    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
4402    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
4403    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
4404    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
4405    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
4406
4407    s->reserved_regs = 0;
4408    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
4409    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
4410#if defined(_CALL_SYSV)
4411    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
4412#endif
4413#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
4414    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
4415#endif
4416    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
4417    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
4418    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
4419    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
4420    if (USE_REG_TB) {
4421        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);  /* tb->tc_ptr */
4422    }
4423}
4424
4425#ifdef __ELF__
4426typedef struct {
4427    DebugFrameCIE cie;
4428    DebugFrameFDEHeader fde;
4429    uint8_t fde_def_cfa[4];
4430    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
4431} DebugFrame;
4432
4433/* We're expecting a 2 byte uleb128 encoded value.  */
4434QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
4435
4436#if TCG_TARGET_REG_BITS == 64
4437# define ELF_HOST_MACHINE EM_PPC64
4438#else
4439# define ELF_HOST_MACHINE EM_PPC
4440#endif
4441
4442static DebugFrame debug_frame = {
4443    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
4444    .cie.id = -1,
4445    .cie.version = 1,
4446    .cie.code_align = 1,
4447    .cie.data_align = (-SZR & 0x7f),         /* sleb128 -SZR */
4448    .cie.return_column = 65,
4449
4450    /* Total FDE size does not include the "len" member.  */
4451    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
4452
4453    .fde_def_cfa = {
4454        12, TCG_REG_R1,                 /* DW_CFA_def_cfa r1, ... */
4455        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
4456        (FRAME_SIZE >> 7)
4457    },
4458    .fde_reg_ofs = {
4459        /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
4460        0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
4461    }
4462};
4463
4464void tcg_register_jit(const void *buf, size_t buf_size)
4465{
4466    uint8_t *p = &debug_frame.fde_reg_ofs[3];
4467    int i;
4468
4469    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
4470        p[0] = 0x80 + tcg_target_callee_save_regs[i];
4471        p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
4472    }
4473
4474    debug_frame.fde.func_start = (uintptr_t)buf;
4475    debug_frame.fde.func_len = buf_size;
4476
4477    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
4478}
4479#endif /* __ELF__ */
4480#undef VMULEUB
4481#undef VMULEUH
4482#undef VMULEUW
4483#undef VMULOUB
4484#undef VMULOUH
4485#undef VMULOUW
4486#undef VMSUMUHM
4487