xref: /openbmc/qemu/tcg/ppc/tcg-target.c.inc (revision ea46c4bce8c8a8285e6715c1bac29f5b73f5062b)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "elf.h"
26
27/*
28 * Standardize on the _CALL_FOO symbols used by GCC:
29 * Apple XCode does not define _CALL_DARWIN.
30 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV or _CALL_AIX.
31 */
32#if TCG_TARGET_REG_BITS == 64
33# ifdef _CALL_AIX
34    /* ok */
35# elif defined(_CALL_ELF) && _CALL_ELF == 1
36#  define _CALL_AIX
37# elif defined(_CALL_ELF) && _CALL_ELF == 2
38    /* ok */
39# else
40#  error "Unknown ABI"
41# endif
42#else
43# if defined(_CALL_SYSV) || defined(_CALL_DARWIN)
44    /* ok */
45# elif defined(__APPLE__)
46#  define _CALL_DARWIN
47# elif defined(__ELF__)
48#  define _CALL_SYSV
49# else
50#  error "Unknown ABI"
51# endif
52#endif
53
54#if TCG_TARGET_REG_BITS == 64
55# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_EXTEND
56# define TCG_TARGET_CALL_RET_I128  TCG_CALL_RET_NORMAL
57#else
58# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_NORMAL
59# define TCG_TARGET_CALL_RET_I128  TCG_CALL_RET_BY_REF
60#endif
61#ifdef _CALL_SYSV
62# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_EVEN
63# define TCG_TARGET_CALL_ARG_I128  TCG_CALL_ARG_BY_REF
64#else
65# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_NORMAL
66# define TCG_TARGET_CALL_ARG_I128  TCG_CALL_ARG_NORMAL
67#endif
68
69/* For some memory operations, we need a scratch that isn't R0.  For the AIX
70   calling convention, we can re-use the TOC register since we'll be reloading
71   it at every call.  Otherwise R12 will do nicely as neither a call-saved
72   register nor a parameter register.  */
73#ifdef _CALL_AIX
74# define TCG_REG_TMP1   TCG_REG_R2
75#else
76# define TCG_REG_TMP1   TCG_REG_R12
77#endif
78#define TCG_REG_TMP2    TCG_REG_R11
79
80#define TCG_VEC_TMP1    TCG_REG_V0
81#define TCG_VEC_TMP2    TCG_REG_V1
82
83#define TCG_REG_TB     TCG_REG_R31
84#define USE_REG_TB     (TCG_TARGET_REG_BITS == 64 && !have_isa_3_00)
85
86/* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */
87#define SZP  ((int)sizeof(void *))
88
89/* Shorthand for size of a register.  */
90#define SZR  (TCG_TARGET_REG_BITS / 8)
91
92#define TCG_CT_CONST_S16  0x100
93#define TCG_CT_CONST_U16  0x200
94#define TCG_CT_CONST_S32  0x400
95#define TCG_CT_CONST_U32  0x800
96#define TCG_CT_CONST_ZERO 0x1000
97#define TCG_CT_CONST_MONE 0x2000
98#define TCG_CT_CONST_WSZ  0x4000
99#define TCG_CT_CONST_CMP  0x8000
100
101#define ALL_GENERAL_REGS  0xffffffffu
102#define ALL_VECTOR_REGS   0xffffffff00000000ull
103
104#ifndef R_PPC64_PCREL34
105#define R_PPC64_PCREL34  132
106#endif
107
108#define have_isel  (cpuinfo & CPUINFO_ISEL)
109
110#define TCG_GUEST_BASE_REG  TCG_REG_R30
111
112#ifdef CONFIG_DEBUG_TCG
113static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
114    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
115    "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
116    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
117    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
118    "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
119    "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
120    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
121    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
122};
123#endif
124
125static const int tcg_target_reg_alloc_order[] = {
126    TCG_REG_R14,  /* call saved registers */
127    TCG_REG_R15,
128    TCG_REG_R16,
129    TCG_REG_R17,
130    TCG_REG_R18,
131    TCG_REG_R19,
132    TCG_REG_R20,
133    TCG_REG_R21,
134    TCG_REG_R22,
135    TCG_REG_R23,
136    TCG_REG_R24,
137    TCG_REG_R25,
138    TCG_REG_R26,
139    TCG_REG_R27,
140    TCG_REG_R28,
141    TCG_REG_R29,
142    TCG_REG_R30,
143    TCG_REG_R31,
144    TCG_REG_R12,  /* call clobbered, non-arguments */
145    TCG_REG_R11,
146    TCG_REG_R2,
147    TCG_REG_R13,
148    TCG_REG_R10,  /* call clobbered, arguments */
149    TCG_REG_R9,
150    TCG_REG_R8,
151    TCG_REG_R7,
152    TCG_REG_R6,
153    TCG_REG_R5,
154    TCG_REG_R4,
155    TCG_REG_R3,
156
157    /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
158    TCG_REG_V2,   /* call clobbered, vectors */
159    TCG_REG_V3,
160    TCG_REG_V4,
161    TCG_REG_V5,
162    TCG_REG_V6,
163    TCG_REG_V7,
164    TCG_REG_V8,
165    TCG_REG_V9,
166    TCG_REG_V10,
167    TCG_REG_V11,
168    TCG_REG_V12,
169    TCG_REG_V13,
170    TCG_REG_V14,
171    TCG_REG_V15,
172    TCG_REG_V16,
173    TCG_REG_V17,
174    TCG_REG_V18,
175    TCG_REG_V19,
176};
177
178static const int tcg_target_call_iarg_regs[] = {
179    TCG_REG_R3,
180    TCG_REG_R4,
181    TCG_REG_R5,
182    TCG_REG_R6,
183    TCG_REG_R7,
184    TCG_REG_R8,
185    TCG_REG_R9,
186    TCG_REG_R10
187};
188
189static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
190{
191    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
192    tcg_debug_assert(slot >= 0 && slot <= 1);
193    return TCG_REG_R3 + slot;
194}
195
196static const int tcg_target_callee_save_regs[] = {
197#ifdef _CALL_DARWIN
198    TCG_REG_R11,
199#endif
200    TCG_REG_R14,
201    TCG_REG_R15,
202    TCG_REG_R16,
203    TCG_REG_R17,
204    TCG_REG_R18,
205    TCG_REG_R19,
206    TCG_REG_R20,
207    TCG_REG_R21,
208    TCG_REG_R22,
209    TCG_REG_R23,
210    TCG_REG_R24,
211    TCG_REG_R25,
212    TCG_REG_R26,
213    TCG_REG_R27, /* currently used for the global env */
214    TCG_REG_R28,
215    TCG_REG_R29,
216    TCG_REG_R30,
217    TCG_REG_R31
218};
219
220/* For PPC, we use TB+4 instead of TB as the base. */
221static inline ptrdiff_t ppc_tbrel_diff(TCGContext *s, const void *target)
222{
223    return tcg_tbrel_diff(s, target) - 4;
224}
225
226static inline bool in_range_b(tcg_target_long target)
227{
228    return target == sextract64(target, 0, 26);
229}
230
231static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
232                               const tcg_insn_unit *target)
233{
234    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
235    tcg_debug_assert(in_range_b(disp));
236    return disp & 0x3fffffc;
237}
238
239static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
240{
241    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
242    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
243
244    if (in_range_b(disp)) {
245        *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc);
246        return true;
247    }
248    return false;
249}
250
251static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
252                               const tcg_insn_unit *target)
253{
254    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
255    tcg_debug_assert(disp == (int16_t) disp);
256    return disp & 0xfffc;
257}
258
259static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
260{
261    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
262    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
263
264    if (disp == (int16_t) disp) {
265        *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc);
266        return true;
267    }
268    return false;
269}
270
271static bool reloc_pc34(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
272{
273    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
274    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
275
276    if (disp == sextract64(disp, 0, 34)) {
277        src_rw[0] = (src_rw[0] & ~0x3ffff) | ((disp >> 16) & 0x3ffff);
278        src_rw[1] = (src_rw[1] & ~0xffff) | (disp & 0xffff);
279        return true;
280    }
281    return false;
282}
283
284static bool mask_operand(uint32_t c, int *mb, int *me);
285static bool mask64_operand(uint64_t c, int *mb, int *me);
286
287/* test if a constant matches the constraint */
288static bool tcg_target_const_match(int64_t sval, int ct,
289                                   TCGType type, TCGCond cond, int vece)
290{
291    uint64_t uval = sval;
292    int mb, me;
293
294    if (ct & TCG_CT_CONST) {
295        return 1;
296    }
297
298    if (type == TCG_TYPE_I32) {
299        uval = (uint32_t)sval;
300        sval = (int32_t)sval;
301    }
302
303    if (ct & TCG_CT_CONST_CMP) {
304        switch (cond) {
305        case TCG_COND_EQ:
306        case TCG_COND_NE:
307            ct |= TCG_CT_CONST_S16 | TCG_CT_CONST_U16;
308            break;
309        case TCG_COND_LT:
310        case TCG_COND_GE:
311        case TCG_COND_LE:
312        case TCG_COND_GT:
313            ct |= TCG_CT_CONST_S16;
314            break;
315        case TCG_COND_LTU:
316        case TCG_COND_GEU:
317        case TCG_COND_LEU:
318        case TCG_COND_GTU:
319            ct |= TCG_CT_CONST_U16;
320            break;
321        case TCG_COND_TSTEQ:
322        case TCG_COND_TSTNE:
323            if ((uval & ~0xffff) == 0 || (uval & ~0xffff0000ull) == 0) {
324                return 1;
325            }
326            if (uval == (uint32_t)uval && mask_operand(uval, &mb, &me)) {
327                return 1;
328            }
329            if (TCG_TARGET_REG_BITS == 64 &&
330                mask64_operand(uval << clz64(uval), &mb, &me)) {
331                return 1;
332            }
333            return 0;
334        default:
335            g_assert_not_reached();
336        }
337    }
338
339    if ((ct & TCG_CT_CONST_S16) && sval == (int16_t)sval) {
340        return 1;
341    }
342    if ((ct & TCG_CT_CONST_U16) && uval == (uint16_t)uval) {
343        return 1;
344    }
345    if ((ct & TCG_CT_CONST_S32) && sval == (int32_t)sval) {
346        return 1;
347    }
348    if ((ct & TCG_CT_CONST_U32) && uval == (uint32_t)uval) {
349        return 1;
350    }
351    if ((ct & TCG_CT_CONST_ZERO) && sval == 0) {
352        return 1;
353    }
354    if ((ct & TCG_CT_CONST_MONE) && sval == -1) {
355        return 1;
356    }
357    if ((ct & TCG_CT_CONST_WSZ) && sval == (type == TCG_TYPE_I32 ? 32 : 64)) {
358        return 1;
359    }
360    return 0;
361}
362
363#define OPCD(opc) ((opc)<<26)
364#define XO19(opc) (OPCD(19)|((opc)<<1))
365#define MD30(opc) (OPCD(30)|((opc)<<2))
366#define MDS30(opc) (OPCD(30)|((opc)<<1))
367#define XO31(opc) (OPCD(31)|((opc)<<1))
368#define XO58(opc) (OPCD(58)|(opc))
369#define XO62(opc) (OPCD(62)|(opc))
370#define VX4(opc)  (OPCD(4)|(opc))
371
372#define B      OPCD( 18)
373#define BC     OPCD( 16)
374
375#define LBZ    OPCD( 34)
376#define LHZ    OPCD( 40)
377#define LHA    OPCD( 42)
378#define LWZ    OPCD( 32)
379#define LWZUX  XO31( 55)
380#define LD     XO58(  0)
381#define LDX    XO31( 21)
382#define LDU    XO58(  1)
383#define LDUX   XO31( 53)
384#define LWA    XO58(  2)
385#define LWAX   XO31(341)
386#define LQ     OPCD( 56)
387
388#define STB    OPCD( 38)
389#define STH    OPCD( 44)
390#define STW    OPCD( 36)
391#define STD    XO62(  0)
392#define STDU   XO62(  1)
393#define STDX   XO31(149)
394#define STQ    XO62(  2)
395
396#define PLWA   OPCD( 41)
397#define PLD    OPCD( 57)
398#define PLXSD  OPCD( 42)
399#define PLXV   OPCD(25 * 2 + 1)  /* force tx=1 */
400
401#define PSTD   OPCD( 61)
402#define PSTXSD OPCD( 46)
403#define PSTXV  OPCD(27 * 2 + 1)  /* force sx=1 */
404
405#define ADDIC  OPCD( 12)
406#define ADDI   OPCD( 14)
407#define ADDIS  OPCD( 15)
408#define ORI    OPCD( 24)
409#define ORIS   OPCD( 25)
410#define XORI   OPCD( 26)
411#define XORIS  OPCD( 27)
412#define ANDI   OPCD( 28)
413#define ANDIS  OPCD( 29)
414#define MULLI  OPCD(  7)
415#define CMPLI  OPCD( 10)
416#define CMPI   OPCD( 11)
417#define SUBFIC OPCD( 8)
418
419#define LWZU   OPCD( 33)
420#define STWU   OPCD( 37)
421
422#define RLWIMI OPCD( 20)
423#define RLWINM OPCD( 21)
424#define RLWNM  OPCD( 23)
425
426#define RLDICL MD30(  0)
427#define RLDICR MD30(  1)
428#define RLDIMI MD30(  3)
429#define RLDCL  MDS30( 8)
430
431#define BCLR   XO19( 16)
432#define BCCTR  XO19(528)
433#define CRAND  XO19(257)
434#define CRANDC XO19(129)
435#define CRNAND XO19(225)
436#define CROR   XO19(449)
437#define CRNOR  XO19( 33)
438#define ADDPCIS XO19( 2)
439
440#define EXTSB  XO31(954)
441#define EXTSH  XO31(922)
442#define EXTSW  XO31(986)
443#define ADD    XO31(266)
444#define ADDE   XO31(138)
445#define ADDME  XO31(234)
446#define ADDZE  XO31(202)
447#define ADDC   XO31( 10)
448#define AND    XO31( 28)
449#define SUBF   XO31( 40)
450#define SUBFC  XO31(  8)
451#define SUBFE  XO31(136)
452#define SUBFME XO31(232)
453#define SUBFZE XO31(200)
454#define OR     XO31(444)
455#define XOR    XO31(316)
456#define MULLW  XO31(235)
457#define MULHW  XO31( 75)
458#define MULHWU XO31( 11)
459#define DIVW   XO31(491)
460#define DIVWU  XO31(459)
461#define MODSW  XO31(779)
462#define MODUW  XO31(267)
463#define CMP    XO31(  0)
464#define CMPL   XO31( 32)
465#define LHBRX  XO31(790)
466#define LWBRX  XO31(534)
467#define LDBRX  XO31(532)
468#define STHBRX XO31(918)
469#define STWBRX XO31(662)
470#define STDBRX XO31(660)
471#define MFSPR  XO31(339)
472#define MTSPR  XO31(467)
473#define SRAWI  XO31(824)
474#define NEG    XO31(104)
475#define MFCR   XO31( 19)
476#define MFOCRF (MFCR | (1u << 20))
477#define NOR    XO31(124)
478#define CNTLZW XO31( 26)
479#define CNTLZD XO31( 58)
480#define CNTTZW XO31(538)
481#define CNTTZD XO31(570)
482#define CNTPOPW XO31(378)
483#define CNTPOPD XO31(506)
484#define ANDC   XO31( 60)
485#define ORC    XO31(412)
486#define EQV    XO31(284)
487#define NAND   XO31(476)
488#define ISEL   XO31( 15)
489
490#define MULLD  XO31(233)
491#define MULHD  XO31( 73)
492#define MULHDU XO31(  9)
493#define DIVD   XO31(489)
494#define DIVDU  XO31(457)
495#define MODSD  XO31(777)
496#define MODUD  XO31(265)
497
498#define LBZX   XO31( 87)
499#define LHZX   XO31(279)
500#define LHAX   XO31(343)
501#define LWZX   XO31( 23)
502#define STBX   XO31(215)
503#define STHX   XO31(407)
504#define STWX   XO31(151)
505
506#define EIEIO  XO31(854)
507#define HWSYNC XO31(598)
508#define LWSYNC (HWSYNC | (1u << 21))
509
510#define SPR(a, b) ((((a)<<5)|(b))<<11)
511#define LR     SPR(8, 0)
512#define CTR    SPR(9, 0)
513
514#define SLW    XO31( 24)
515#define SRW    XO31(536)
516#define SRAW   XO31(792)
517
518#define SLD    XO31( 27)
519#define SRD    XO31(539)
520#define SRAD   XO31(794)
521#define SRADI  XO31(413<<1)
522
523#define BRH    XO31(219)
524#define BRW    XO31(155)
525#define BRD    XO31(187)
526
527#define TW     XO31( 4)
528#define TRAP   (TW | TO(31))
529
530#define SETBC    XO31(384)  /* v3.10 */
531#define SETBCR   XO31(416)  /* v3.10 */
532#define SETNBC   XO31(448)  /* v3.10 */
533#define SETNBCR  XO31(480)  /* v3.10 */
534
535#define NOP    ORI  /* ori 0,0,0 */
536
537#define LVX        XO31(103)
538#define LVEBX      XO31(7)
539#define LVEHX      XO31(39)
540#define LVEWX      XO31(71)
541#define LXSDX      (XO31(588) | 1)  /* v2.06, force tx=1 */
542#define LXVDSX     (XO31(332) | 1)  /* v2.06, force tx=1 */
543#define LXSIWZX    (XO31(12) | 1)   /* v2.07, force tx=1 */
544#define LXV        (OPCD(61) | 8 | 1)  /* v3.00, force tx=1 */
545#define LXSD       (OPCD(57) | 2)   /* v3.00 */
546#define LXVWSX     (XO31(364) | 1)  /* v3.00, force tx=1 */
547
548#define STVX       XO31(231)
549#define STVEWX     XO31(199)
550#define STXSDX     (XO31(716) | 1)  /* v2.06, force sx=1 */
551#define STXSIWX    (XO31(140) | 1)  /* v2.07, force sx=1 */
552#define STXV       (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
553#define STXSD      (OPCD(61) | 2)   /* v3.00 */
554
555#define VADDSBS    VX4(768)
556#define VADDUBS    VX4(512)
557#define VADDUBM    VX4(0)
558#define VADDSHS    VX4(832)
559#define VADDUHS    VX4(576)
560#define VADDUHM    VX4(64)
561#define VADDSWS    VX4(896)
562#define VADDUWS    VX4(640)
563#define VADDUWM    VX4(128)
564#define VADDUDM    VX4(192)       /* v2.07 */
565
566#define VSUBSBS    VX4(1792)
567#define VSUBUBS    VX4(1536)
568#define VSUBUBM    VX4(1024)
569#define VSUBSHS    VX4(1856)
570#define VSUBUHS    VX4(1600)
571#define VSUBUHM    VX4(1088)
572#define VSUBSWS    VX4(1920)
573#define VSUBUWS    VX4(1664)
574#define VSUBUWM    VX4(1152)
575#define VSUBUDM    VX4(1216)      /* v2.07 */
576
577#define VNEGW      (VX4(1538) | (6 << 16))  /* v3.00 */
578#define VNEGD      (VX4(1538) | (7 << 16))  /* v3.00 */
579
580#define VMAXSB     VX4(258)
581#define VMAXSH     VX4(322)
582#define VMAXSW     VX4(386)
583#define VMAXSD     VX4(450)       /* v2.07 */
584#define VMAXUB     VX4(2)
585#define VMAXUH     VX4(66)
586#define VMAXUW     VX4(130)
587#define VMAXUD     VX4(194)       /* v2.07 */
588#define VMINSB     VX4(770)
589#define VMINSH     VX4(834)
590#define VMINSW     VX4(898)
591#define VMINSD     VX4(962)       /* v2.07 */
592#define VMINUB     VX4(514)
593#define VMINUH     VX4(578)
594#define VMINUW     VX4(642)
595#define VMINUD     VX4(706)       /* v2.07 */
596
597#define VCMPEQUB   VX4(6)
598#define VCMPEQUH   VX4(70)
599#define VCMPEQUW   VX4(134)
600#define VCMPEQUD   VX4(199)       /* v2.07 */
601#define VCMPGTSB   VX4(774)
602#define VCMPGTSH   VX4(838)
603#define VCMPGTSW   VX4(902)
604#define VCMPGTSD   VX4(967)       /* v2.07 */
605#define VCMPGTUB   VX4(518)
606#define VCMPGTUH   VX4(582)
607#define VCMPGTUW   VX4(646)
608#define VCMPGTUD   VX4(711)       /* v2.07 */
609#define VCMPNEB    VX4(7)         /* v3.00 */
610#define VCMPNEH    VX4(71)        /* v3.00 */
611#define VCMPNEW    VX4(135)       /* v3.00 */
612
613#define VSLB       VX4(260)
614#define VSLH       VX4(324)
615#define VSLW       VX4(388)
616#define VSLD       VX4(1476)      /* v2.07 */
617#define VSRB       VX4(516)
618#define VSRH       VX4(580)
619#define VSRW       VX4(644)
620#define VSRD       VX4(1732)      /* v2.07 */
621#define VSRAB      VX4(772)
622#define VSRAH      VX4(836)
623#define VSRAW      VX4(900)
624#define VSRAD      VX4(964)       /* v2.07 */
625#define VRLB       VX4(4)
626#define VRLH       VX4(68)
627#define VRLW       VX4(132)
628#define VRLD       VX4(196)       /* v2.07 */
629
630#define VMULEUB    VX4(520)
631#define VMULEUH    VX4(584)
632#define VMULEUW    VX4(648)       /* v2.07 */
633#define VMULOUB    VX4(8)
634#define VMULOUH    VX4(72)
635#define VMULOUW    VX4(136)       /* v2.07 */
636#define VMULUWM    VX4(137)       /* v2.07 */
637#define VMULLD     VX4(457)       /* v3.10 */
638#define VMSUMUHM   VX4(38)
639
640#define VMRGHB     VX4(12)
641#define VMRGHH     VX4(76)
642#define VMRGHW     VX4(140)
643#define VMRGLB     VX4(268)
644#define VMRGLH     VX4(332)
645#define VMRGLW     VX4(396)
646
647#define VPKUHUM    VX4(14)
648#define VPKUWUM    VX4(78)
649
650#define VAND       VX4(1028)
651#define VANDC      VX4(1092)
652#define VNOR       VX4(1284)
653#define VOR        VX4(1156)
654#define VXOR       VX4(1220)
655#define VEQV       VX4(1668)      /* v2.07 */
656#define VNAND      VX4(1412)      /* v2.07 */
657#define VORC       VX4(1348)      /* v2.07 */
658
659#define VSPLTB     VX4(524)
660#define VSPLTH     VX4(588)
661#define VSPLTW     VX4(652)
662#define VSPLTISB   VX4(780)
663#define VSPLTISH   VX4(844)
664#define VSPLTISW   VX4(908)
665
666#define VSLDOI     VX4(44)
667
668#define XXPERMDI   (OPCD(60) | (10 << 3) | 7)  /* v2.06, force ax=bx=tx=1 */
669#define XXSEL      (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
670#define XXSPLTIB   (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
671
672#define MFVSRD     (XO31(51) | 1)   /* v2.07, force sx=1 */
673#define MFVSRWZ    (XO31(115) | 1)  /* v2.07, force sx=1 */
674#define MTVSRD     (XO31(179) | 1)  /* v2.07, force tx=1 */
675#define MTVSRWZ    (XO31(243) | 1)  /* v2.07, force tx=1 */
676#define MTVSRDD    (XO31(435) | 1)  /* v3.00, force tx=1 */
677#define MTVSRWS    (XO31(403) | 1)  /* v3.00, force tx=1 */
678
679#define RT(r) ((r)<<21)
680#define RS(r) ((r)<<21)
681#define RA(r) ((r)<<16)
682#define RB(r) ((r)<<11)
683#define TO(t) ((t)<<21)
684#define SH(s) ((s)<<11)
685#define MB(b) ((b)<<6)
686#define ME(e) ((e)<<1)
687#define BO(o) ((o)<<21)
688#define MB64(b) ((b)<<5)
689#define FXM(b) (1 << (19 - (b)))
690
691#define VRT(r)  (((r) & 31) << 21)
692#define VRA(r)  (((r) & 31) << 16)
693#define VRB(r)  (((r) & 31) << 11)
694#define VRC(r)  (((r) & 31) <<  6)
695
696#define LK    1
697
698#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
699#define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
700#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
701#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
702
703#define BF(n)    ((n)<<23)
704#define BI(n, c) (((c)+((n)*4))<<16)
705#define BT(n, c) (((c)+((n)*4))<<21)
706#define BA(n, c) (((c)+((n)*4))<<16)
707#define BB(n, c) (((c)+((n)*4))<<11)
708#define BC_(n, c) (((c)+((n)*4))<<6)
709
710#define BO_COND_TRUE  BO(12)
711#define BO_COND_FALSE BO( 4)
712#define BO_ALWAYS     BO(20)
713
714enum {
715    CR_LT,
716    CR_GT,
717    CR_EQ,
718    CR_SO
719};
720
721static const uint32_t tcg_to_bc[16] = {
722    [TCG_COND_EQ]  = BC | BI(0, CR_EQ) | BO_COND_TRUE,
723    [TCG_COND_NE]  = BC | BI(0, CR_EQ) | BO_COND_FALSE,
724    [TCG_COND_TSTEQ]  = BC | BI(0, CR_EQ) | BO_COND_TRUE,
725    [TCG_COND_TSTNE]  = BC | BI(0, CR_EQ) | BO_COND_FALSE,
726    [TCG_COND_LT]  = BC | BI(0, CR_LT) | BO_COND_TRUE,
727    [TCG_COND_GE]  = BC | BI(0, CR_LT) | BO_COND_FALSE,
728    [TCG_COND_LE]  = BC | BI(0, CR_GT) | BO_COND_FALSE,
729    [TCG_COND_GT]  = BC | BI(0, CR_GT) | BO_COND_TRUE,
730    [TCG_COND_LTU] = BC | BI(0, CR_LT) | BO_COND_TRUE,
731    [TCG_COND_GEU] = BC | BI(0, CR_LT) | BO_COND_FALSE,
732    [TCG_COND_LEU] = BC | BI(0, CR_GT) | BO_COND_FALSE,
733    [TCG_COND_GTU] = BC | BI(0, CR_GT) | BO_COND_TRUE,
734};
735
736/* The low bit here is set if the RA and RB fields must be inverted.  */
737static const uint32_t tcg_to_isel[16] = {
738    [TCG_COND_EQ]  = ISEL | BC_(0, CR_EQ),
739    [TCG_COND_NE]  = ISEL | BC_(0, CR_EQ) | 1,
740    [TCG_COND_TSTEQ] = ISEL | BC_(0, CR_EQ),
741    [TCG_COND_TSTNE] = ISEL | BC_(0, CR_EQ) | 1,
742    [TCG_COND_LT]  = ISEL | BC_(0, CR_LT),
743    [TCG_COND_GE]  = ISEL | BC_(0, CR_LT) | 1,
744    [TCG_COND_LE]  = ISEL | BC_(0, CR_GT) | 1,
745    [TCG_COND_GT]  = ISEL | BC_(0, CR_GT),
746    [TCG_COND_LTU] = ISEL | BC_(0, CR_LT),
747    [TCG_COND_GEU] = ISEL | BC_(0, CR_LT) | 1,
748    [TCG_COND_LEU] = ISEL | BC_(0, CR_GT) | 1,
749    [TCG_COND_GTU] = ISEL | BC_(0, CR_GT),
750};
751
752static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
753                        intptr_t value, intptr_t addend)
754{
755    const tcg_insn_unit *target;
756    int16_t lo;
757    int32_t hi;
758
759    value += addend;
760    target = (const tcg_insn_unit *)value;
761
762    switch (type) {
763    case R_PPC_REL14:
764        return reloc_pc14(code_ptr, target);
765    case R_PPC_REL24:
766        return reloc_pc24(code_ptr, target);
767    case R_PPC64_PCREL34:
768        return reloc_pc34(code_ptr, target);
769    case R_PPC_ADDR16:
770        /*
771         * We are (slightly) abusing this relocation type.  In particular,
772         * assert that the low 2 bits are zero, and do not modify them.
773         * That way we can use this with LD et al that have opcode bits
774         * in the low 2 bits of the insn.
775         */
776        if ((value & 3) || value != (int16_t)value) {
777            return false;
778        }
779        *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
780        break;
781    case R_PPC_ADDR32:
782        /*
783         * We are abusing this relocation type.  Again, this points to
784         * a pair of insns, lis + load.  This is an absolute address
785         * relocation for PPC32 so the lis cannot be removed.
786         */
787        lo = value;
788        hi = value - lo;
789        if (hi + lo != value) {
790            return false;
791        }
792        code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
793        code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
794        break;
795    default:
796        g_assert_not_reached();
797    }
798    return true;
799}
800
801/* Ensure that the prefixed instruction does not cross a 64-byte boundary. */
802static bool tcg_out_need_prefix_align(TCGContext *s)
803{
804    return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c;
805}
806
807static void tcg_out_prefix_align(TCGContext *s)
808{
809    if (tcg_out_need_prefix_align(s)) {
810        tcg_out32(s, NOP);
811    }
812}
813
814static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target)
815{
816    return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0);
817}
818
819/* Output Type 00 Prefix - 8-Byte Load/Store Form (8LS:D) */
820static void tcg_out_8ls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
821                          unsigned ra, tcg_target_long imm, bool r)
822{
823    tcg_insn_unit p, i;
824
825    p = OPCD(1) | (r << 20) | ((imm >> 16) & 0x3ffff);
826    i = opc | TAI(rt, ra, imm);
827
828    tcg_out_prefix_align(s);
829    tcg_out32(s, p);
830    tcg_out32(s, i);
831}
832
833/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */
834static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
835                          unsigned ra, tcg_target_long imm, bool r)
836{
837    tcg_insn_unit p, i;
838
839    p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff);
840    i = opc | TAI(rt, ra, imm);
841
842    tcg_out_prefix_align(s);
843    tcg_out32(s, p);
844    tcg_out32(s, i);
845}
846
847static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
848                             TCGReg base, tcg_target_long offset);
849
850static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
851{
852    if (ret == arg) {
853        return true;
854    }
855    switch (type) {
856    case TCG_TYPE_I64:
857        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
858        /* fallthru */
859    case TCG_TYPE_I32:
860        if (ret < TCG_REG_V0) {
861            if (arg < TCG_REG_V0) {
862                tcg_out32(s, OR | SAB(arg, ret, arg));
863                break;
864            } else if (have_isa_2_07) {
865                tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
866                          | VRT(arg) | RA(ret));
867                break;
868            } else {
869                /* Altivec does not support vector->integer moves.  */
870                return false;
871            }
872        } else if (arg < TCG_REG_V0) {
873            if (have_isa_2_07) {
874                tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
875                          | VRT(ret) | RA(arg));
876                break;
877            } else {
878                /* Altivec does not support integer->vector moves.  */
879                return false;
880            }
881        }
882        /* fallthru */
883    case TCG_TYPE_V64:
884    case TCG_TYPE_V128:
885        tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
886        tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
887        break;
888    default:
889        g_assert_not_reached();
890    }
891    return true;
892}
893
894static void tcg_out_rld_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs,
895                           int sh, int mb, bool rc)
896{
897    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
898    sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
899    mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
900    tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb | rc);
901}
902
903static void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
904                        int sh, int mb)
905{
906    tcg_out_rld_rc(s, op, ra, rs, sh, mb, false);
907}
908
909static void tcg_out_rlw_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs,
910                           int sh, int mb, int me, bool rc)
911{
912    tcg_debug_assert((mb & 0x1f) == mb);
913    tcg_debug_assert((me & 0x1f) == me);
914    tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh & 0x1f) | MB(mb) | ME(me) | rc);
915}
916
917static void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
918                        int sh, int mb, int me)
919{
920    tcg_out_rlw_rc(s, op, ra, rs, sh, mb, me, false);
921}
922
923static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
924{
925    tcg_out32(s, EXTSB | RA(dst) | RS(src));
926}
927
928static void tcg_out_ext8u(TCGContext *s, TCGReg dst, TCGReg src)
929{
930    tcg_out32(s, ANDI | SAI(src, dst, 0xff));
931}
932
933static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
934{
935    tcg_out32(s, EXTSH | RA(dst) | RS(src));
936}
937
938static void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src)
939{
940    tcg_out32(s, ANDI | SAI(src, dst, 0xffff));
941}
942
943static void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src)
944{
945    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
946    tcg_out32(s, EXTSW | RA(dst) | RS(src));
947}
948
949static void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
950{
951    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
952    tcg_out_rld(s, RLDICL, dst, src, 0, 32);
953}
954
955static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dst, TCGReg src)
956{
957    tcg_out_ext32s(s, dst, src);
958}
959
960static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dst, TCGReg src)
961{
962    tcg_out_ext32u(s, dst, src);
963}
964
965static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
966{
967    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
968    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
969}
970
971static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
972{
973    tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
974}
975
976static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
977{
978    tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
979}
980
981static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c)
982{
983    /* Limit immediate shift count lest we create an illegal insn.  */
984    tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31));
985}
986
987static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
988{
989    tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
990}
991
992static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
993{
994    tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
995}
996
997static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
998{
999    tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
1000}
1001
1002static void tcg_out_addpcis(TCGContext *s, TCGReg dst, intptr_t imm)
1003{
1004    uint32_t d0, d1, d2;
1005
1006    tcg_debug_assert((imm & 0xffff) == 0);
1007    tcg_debug_assert(imm == (int32_t)imm);
1008
1009    d2 = extract32(imm, 16, 1);
1010    d1 = extract32(imm, 17, 5);
1011    d0 = extract32(imm, 22, 10);
1012    tcg_out32(s, ADDPCIS | RT(dst) | (d1 << 16) | (d0 << 6) | d2);
1013}
1014
1015static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags)
1016{
1017    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
1018
1019    if (have_isa_3_10) {
1020        tcg_out32(s, BRH | RA(dst) | RS(src));
1021        if (flags & TCG_BSWAP_OS) {
1022            tcg_out_ext16s(s, TCG_TYPE_REG, dst, dst);
1023        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
1024            tcg_out_ext16u(s, dst, dst);
1025        }
1026        return;
1027    }
1028
1029    /*
1030     * In the following,
1031     *   dep(a, b, m) -> (a & ~m) | (b & m)
1032     *
1033     * Begin with:                              src = xxxxabcd
1034     */
1035    /* tmp = rol32(src, 24) & 0x000000ff            = 0000000c */
1036    tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31);
1037    /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00)    = 000000dc */
1038    tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);
1039
1040    if (flags & TCG_BSWAP_OS) {
1041        tcg_out_ext16s(s, TCG_TYPE_REG, dst, tmp);
1042    } else {
1043        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
1044    }
1045}
1046
1047static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags)
1048{
1049    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
1050
1051    if (have_isa_3_10) {
1052        tcg_out32(s, BRW | RA(dst) | RS(src));
1053        if (flags & TCG_BSWAP_OS) {
1054            tcg_out_ext32s(s, dst, dst);
1055        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
1056            tcg_out_ext32u(s, dst, dst);
1057        }
1058        return;
1059    }
1060
1061    /*
1062     * Stolen from gcc's builtin_bswap32.
1063     * In the following,
1064     *   dep(a, b, m) -> (a & ~m) | (b & m)
1065     *
1066     * Begin with:                              src = xxxxabcd
1067     */
1068    /* tmp = rol32(src, 8) & 0xffffffff             = 0000bcda */
1069    tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31);
1070    /* tmp = dep(tmp, rol32(src, 24), 0xff000000)   = 0000dcda */
1071    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7);
1072    /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00)   = 0000dcba */
1073    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23);
1074
1075    if (flags & TCG_BSWAP_OS) {
1076        tcg_out_ext32s(s, dst, tmp);
1077    } else {
1078        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
1079    }
1080}
1081
1082static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src)
1083{
1084    TCGReg t0 = dst == src ? TCG_REG_R0 : dst;
1085    TCGReg t1 = dst == src ? dst : TCG_REG_R0;
1086
1087    if (have_isa_3_10) {
1088        tcg_out32(s, BRD | RA(dst) | RS(src));
1089        return;
1090    }
1091
1092    /*
1093     * In the following,
1094     *   dep(a, b, m) -> (a & ~m) | (b & m)
1095     *
1096     * Begin with:                              src = abcdefgh
1097     */
1098    /* t0 = rol32(src, 8) & 0xffffffff              = 0000fghe */
1099    tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31);
1100    /* t0 = dep(t0, rol32(src, 24), 0xff000000)     = 0000hghe */
1101    tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7);
1102    /* t0 = dep(t0, rol32(src, 24), 0x0000ff00)     = 0000hgfe */
1103    tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23);
1104
1105    /* t0 = rol64(t0, 32)                           = hgfe0000 */
1106    tcg_out_rld(s, RLDICL, t0, t0, 32, 0);
1107    /* t1 = rol64(src, 32)                          = efghabcd */
1108    tcg_out_rld(s, RLDICL, t1, src, 32, 0);
1109
1110    /* t0 = dep(t0, rol32(t1, 24), 0xffffffff)      = hgfebcda */
1111    tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31);
1112    /* t0 = dep(t0, rol32(t1, 24), 0xff000000)      = hgfedcda */
1113    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7);
1114    /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00)      = hgfedcba */
1115    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23);
1116
1117    tcg_out_mov(s, TCG_TYPE_REG, dst, t0);
1118}
1119
1120/* Emit a move into ret of arg, if it can be done in one insn.  */
1121static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
1122{
1123    if (arg == (int16_t)arg) {
1124        tcg_out32(s, ADDI | TAI(ret, 0, arg));
1125        return true;
1126    }
1127    if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
1128        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
1129        return true;
1130    }
1131    return false;
1132}
1133
1134static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
1135                             tcg_target_long arg, bool in_prologue)
1136{
1137    intptr_t tb_diff;
1138    tcg_target_long tmp;
1139    int shift;
1140
1141    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1142
1143    if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1144        arg = (int32_t)arg;
1145    }
1146
1147    /* Load 16-bit immediates with one insn.  */
1148    if (tcg_out_movi_one(s, ret, arg)) {
1149        return;
1150    }
1151
1152    /* Load addresses within the TB with one insn.  */
1153    tb_diff = ppc_tbrel_diff(s, (void *)arg);
1154    if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
1155        tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
1156        return;
1157    }
1158
1159    /*
1160     * Load values up to 34 bits, and pc-relative addresses,
1161     * with one prefixed insn.
1162     */
1163    if (have_isa_3_10) {
1164        if (arg == sextract64(arg, 0, 34)) {
1165            /* pli ret,value = paddi ret,0,value,0 */
1166            tcg_out_mls_d(s, ADDI, ret, 0, arg, 0);
1167            return;
1168        }
1169
1170        tmp = tcg_pcrel_diff_for_prefix(s, (void *)arg);
1171        if (tmp == sextract64(tmp, 0, 34)) {
1172            /* pla ret,value = paddi ret,0,value,1 */
1173            tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1);
1174            return;
1175        }
1176    }
1177
1178    /* Load 32-bit immediates with two insns.  Note that we've already
1179       eliminated bare ADDIS, so we know both insns are required.  */
1180    if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
1181        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
1182        tcg_out32(s, ORI | SAI(ret, ret, arg));
1183        return;
1184    }
1185    if (arg == (uint32_t)arg && !(arg & 0x8000)) {
1186        tcg_out32(s, ADDI | TAI(ret, 0, arg));
1187        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1188        return;
1189    }
1190
1191    /* Load masked 16-bit value.  */
1192    if (arg > 0 && (arg & 0x8000)) {
1193        tmp = arg | 0x7fff;
1194        if ((tmp & (tmp + 1)) == 0) {
1195            int mb = clz64(tmp + 1) + 1;
1196            tcg_out32(s, ADDI | TAI(ret, 0, arg));
1197            tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
1198            return;
1199        }
1200    }
1201
1202    /* Load common masks with 2 insns.  */
1203    shift = ctz64(arg);
1204    tmp = arg >> shift;
1205    if (tmp == (int16_t)tmp) {
1206        tcg_out32(s, ADDI | TAI(ret, 0, tmp));
1207        tcg_out_shli64(s, ret, ret, shift);
1208        return;
1209    }
1210    shift = clz64(arg);
1211    if (tcg_out_movi_one(s, ret, arg << shift)) {
1212        tcg_out_shri64(s, ret, ret, shift);
1213        return;
1214    }
1215
1216    /* Load addresses within 2GB with 2 insns. */
1217    if (have_isa_3_00) {
1218        intptr_t hi = tcg_pcrel_diff(s, (void *)arg) - 4;
1219        int16_t lo = hi;
1220
1221        hi -= lo;
1222        if (hi == (int32_t)hi) {
1223            tcg_out_addpcis(s, TCG_REG_TMP2, hi);
1224            tcg_out32(s, ADDI | TAI(ret, TCG_REG_TMP2, lo));
1225            return;
1226        }
1227    }
1228
1229    /* Load addresses within 2GB of TB with 2 (or rarely 3) insns.  */
1230    if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
1231        tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
1232        return;
1233    }
1234
1235    /* Use the constant pool, if possible.  */
1236    if (!in_prologue && USE_REG_TB) {
1237        new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
1238                       ppc_tbrel_diff(s, NULL));
1239        tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
1240        return;
1241    }
1242    if (have_isa_3_10) {
1243        tcg_out_8ls_d(s, PLD, ret, 0, 0, 1);
1244        new_pool_label(s, arg, R_PPC64_PCREL34, s->code_ptr - 2, 0);
1245        return;
1246    }
1247    if (have_isa_3_00) {
1248        tcg_out_addpcis(s, TCG_REG_TMP2, 0);
1249        new_pool_label(s, arg, R_PPC_REL14, s->code_ptr, 0);
1250        tcg_out32(s, LD | TAI(ret, TCG_REG_TMP2, 0));
1251        return;
1252    }
1253
1254    tmp = arg >> 31 >> 1;
1255    tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
1256    if (tmp) {
1257        tcg_out_shli64(s, ret, ret, 32);
1258    }
1259    if (arg & 0xffff0000) {
1260        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1261    }
1262    if (arg & 0xffff) {
1263        tcg_out32(s, ORI | SAI(ret, ret, arg));
1264    }
1265}
1266
1267static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
1268                             TCGReg ret, int64_t val)
1269{
1270    uint32_t load_insn;
1271    int rel, low;
1272    intptr_t add;
1273
1274    switch (vece) {
1275    case MO_8:
1276        low = (int8_t)val;
1277        if (low >= -16 && low < 16) {
1278            tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
1279            return;
1280        }
1281        if (have_isa_3_00) {
1282            tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
1283            return;
1284        }
1285        break;
1286
1287    case MO_16:
1288        low = (int16_t)val;
1289        if (low >= -16 && low < 16) {
1290            tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
1291            return;
1292        }
1293        break;
1294
1295    case MO_32:
1296        low = (int32_t)val;
1297        if (low >= -16 && low < 16) {
1298            tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
1299            return;
1300        }
1301        break;
1302    }
1303
1304    /*
1305     * Otherwise we must load the value from the constant pool.
1306     */
1307    if (USE_REG_TB) {
1308        rel = R_PPC_ADDR16;
1309        add = ppc_tbrel_diff(s, NULL);
1310    } else if (have_isa_3_10) {
1311        if (type == TCG_TYPE_V64) {
1312            tcg_out_8ls_d(s, PLXSD, ret & 31, 0, 0, 1);
1313            new_pool_label(s, val, R_PPC64_PCREL34, s->code_ptr - 2, 0);
1314        } else {
1315            tcg_out_8ls_d(s, PLXV, ret & 31, 0, 0, 1);
1316            new_pool_l2(s, R_PPC64_PCREL34, s->code_ptr - 2, 0, val, val);
1317        }
1318        return;
1319    } else if (have_isa_3_00) {
1320        tcg_out_addpcis(s, TCG_REG_TMP1, 0);
1321        rel = R_PPC_REL14;
1322        add = 0;
1323    } else {
1324        rel = R_PPC_ADDR32;
1325        add = 0;
1326    }
1327
1328    if (have_vsx) {
1329        load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
1330        load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
1331        if (TCG_TARGET_REG_BITS == 64) {
1332            new_pool_label(s, val, rel, s->code_ptr, add);
1333        } else {
1334            new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val);
1335        }
1336    } else {
1337        load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
1338        if (TCG_TARGET_REG_BITS == 64) {
1339            new_pool_l2(s, rel, s->code_ptr, add, val, val);
1340        } else {
1341            new_pool_l4(s, rel, s->code_ptr, add,
1342                        val >> 32, val, val >> 32, val);
1343        }
1344    }
1345
1346    if (USE_REG_TB) {
1347        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
1348        load_insn |= RA(TCG_REG_TB);
1349    } else if (have_isa_3_00) {
1350        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1351    } else {
1352        tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
1353        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1354    }
1355    tcg_out32(s, load_insn);
1356}
1357
1358static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
1359                         tcg_target_long arg)
1360{
1361    switch (type) {
1362    case TCG_TYPE_I32:
1363    case TCG_TYPE_I64:
1364        tcg_debug_assert(ret < TCG_REG_V0);
1365        tcg_out_movi_int(s, type, ret, arg, false);
1366        break;
1367
1368    default:
1369        g_assert_not_reached();
1370    }
1371}
1372
1373static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1374{
1375    return false;
1376}
1377
1378static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1379                             tcg_target_long imm)
1380{
1381    /* This function is only used for passing structs by reference. */
1382    g_assert_not_reached();
1383}
1384
1385static bool mask_operand(uint32_t c, int *mb, int *me)
1386{
1387    uint32_t lsb, test;
1388
1389    /* Accept a bit pattern like:
1390           0....01....1
1391           1....10....0
1392           0..01..10..0
1393       Keep track of the transitions.  */
1394    if (c == 0 || c == -1) {
1395        return false;
1396    }
1397    test = c;
1398    lsb = test & -test;
1399    test += lsb;
1400    if (test & (test - 1)) {
1401        return false;
1402    }
1403
1404    *me = clz32(lsb);
1405    *mb = test ? clz32(test & -test) + 1 : 0;
1406    return true;
1407}
1408
1409static bool mask64_operand(uint64_t c, int *mb, int *me)
1410{
1411    uint64_t lsb;
1412
1413    if (c == 0) {
1414        return false;
1415    }
1416
1417    lsb = c & -c;
1418    /* Accept 1..10..0.  */
1419    if (c == -lsb) {
1420        *mb = 0;
1421        *me = clz64(lsb);
1422        return true;
1423    }
1424    /* Accept 0..01..1.  */
1425    if (lsb == 1 && (c & (c + 1)) == 0) {
1426        *mb = clz64(c + 1) + 1;
1427        *me = 63;
1428        return true;
1429    }
1430    return false;
1431}
1432
1433static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1434{
1435    int mb, me;
1436
1437    if (mask_operand(c, &mb, &me)) {
1438        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
1439    } else if ((c & 0xffff) == c) {
1440        tcg_out32(s, ANDI | SAI(src, dst, c));
1441        return;
1442    } else if ((c & 0xffff0000) == c) {
1443        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1444        return;
1445    } else {
1446        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
1447        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1448    }
1449}
1450
1451static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
1452{
1453    int mb, me;
1454
1455    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1456    if (mask64_operand(c, &mb, &me)) {
1457        if (mb == 0) {
1458            tcg_out_rld(s, RLDICR, dst, src, 0, me);
1459        } else {
1460            tcg_out_rld(s, RLDICL, dst, src, 0, mb);
1461        }
1462    } else if ((c & 0xffff) == c) {
1463        tcg_out32(s, ANDI | SAI(src, dst, c));
1464        return;
1465    } else if ((c & 0xffff0000) == c) {
1466        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1467        return;
1468    } else {
1469        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
1470        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1471    }
1472}
1473
1474static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
1475                           int op_lo, int op_hi)
1476{
1477    if (c >> 16) {
1478        tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
1479        src = dst;
1480    }
1481    if (c & 0xffff) {
1482        tcg_out32(s, op_lo | SAI(src, dst, c));
1483        src = dst;
1484    }
1485}
1486
1487static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1488{
1489    tcg_out_zori32(s, dst, src, c, ORI, ORIS);
1490}
1491
1492static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1493{
1494    tcg_out_zori32(s, dst, src, c, XORI, XORIS);
1495}
1496
1497static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target)
1498{
1499    ptrdiff_t disp = tcg_pcrel_diff(s, target);
1500    if (in_range_b(disp)) {
1501        tcg_out32(s, B | (disp & 0x3fffffc) | mask);
1502    } else {
1503        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
1504        tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
1505        tcg_out32(s, BCCTR | BO_ALWAYS | mask);
1506    }
1507}
1508
1509static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
1510                             TCGReg base, tcg_target_long offset)
1511{
1512    tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
1513    bool is_int_store = false;
1514    TCGReg rs = TCG_REG_TMP1;
1515
1516    switch (opi) {
1517    case LD: case LWA:
1518        align = 3;
1519        /* FALLTHRU */
1520    default:
1521        if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
1522            rs = rt;
1523            break;
1524        }
1525        break;
1526    case LXSD:
1527    case STXSD:
1528        align = 3;
1529        break;
1530    case LXV:
1531    case STXV:
1532        align = 15;
1533        break;
1534    case STD:
1535        align = 3;
1536        /* FALLTHRU */
1537    case STB: case STH: case STW:
1538        is_int_store = true;
1539        break;
1540    }
1541
1542    /* For unaligned or large offsets, use the prefixed form. */
1543    if (have_isa_3_10
1544        && (offset != (int16_t)offset || (offset & align))
1545        && offset == sextract64(offset, 0, 34)) {
1546        /*
1547         * Note that the MLS:D insns retain their un-prefixed opcode,
1548         * while the 8LS:D insns use a different opcode space.
1549         */
1550        switch (opi) {
1551        case LBZ:
1552        case LHZ:
1553        case LHA:
1554        case LWZ:
1555        case STB:
1556        case STH:
1557        case STW:
1558        case ADDI:
1559            tcg_out_mls_d(s, opi, rt, base, offset, 0);
1560            return;
1561        case LWA:
1562            tcg_out_8ls_d(s, PLWA, rt, base, offset, 0);
1563            return;
1564        case LD:
1565            tcg_out_8ls_d(s, PLD, rt, base, offset, 0);
1566            return;
1567        case STD:
1568            tcg_out_8ls_d(s, PSTD, rt, base, offset, 0);
1569            return;
1570        case LXSD:
1571            tcg_out_8ls_d(s, PLXSD, rt & 31, base, offset, 0);
1572            return;
1573        case STXSD:
1574            tcg_out_8ls_d(s, PSTXSD, rt & 31, base, offset, 0);
1575            return;
1576        case LXV:
1577            tcg_out_8ls_d(s, PLXV, rt & 31, base, offset, 0);
1578            return;
1579        case STXV:
1580            tcg_out_8ls_d(s, PSTXV, rt & 31, base, offset, 0);
1581            return;
1582        }
1583    }
1584
1585    /* For unaligned, or very large offsets, use the indexed form.  */
1586    if (offset & align || offset != (int32_t)offset || opi == 0) {
1587        if (rs == base) {
1588            rs = TCG_REG_R0;
1589        }
1590        tcg_debug_assert(!is_int_store || rs != rt);
1591        tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
1592        tcg_out32(s, opx | TAB(rt & 31, base, rs));
1593        return;
1594    }
1595
1596    l0 = (int16_t)offset;
1597    offset = (offset - l0) >> 16;
1598    l1 = (int16_t)offset;
1599
1600    if (l1 < 0 && orig >= 0) {
1601        extra = 0x4000;
1602        l1 = (int16_t)(offset - 0x4000);
1603    }
1604    if (l1) {
1605        tcg_out32(s, ADDIS | TAI(rs, base, l1));
1606        base = rs;
1607    }
1608    if (extra) {
1609        tcg_out32(s, ADDIS | TAI(rs, base, extra));
1610        base = rs;
1611    }
1612    if (opi != ADDI || base != rt || l0 != 0) {
1613        tcg_out32(s, opi | TAI(rt & 31, base, l0));
1614    }
1615}
1616
1617static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
1618                           TCGReg va, TCGReg vb, int shb)
1619{
1620    tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
1621}
1622
1623static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1624                       TCGReg base, intptr_t offset)
1625{
1626    int shift;
1627
1628    switch (type) {
1629    case TCG_TYPE_I32:
1630        if (ret < TCG_REG_V0) {
1631            tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
1632            break;
1633        }
1634        if (have_isa_2_07 && have_vsx) {
1635            tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
1636            break;
1637        }
1638        tcg_debug_assert((offset & 3) == 0);
1639        tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
1640        shift = (offset - 4) & 0xc;
1641        if (shift) {
1642            tcg_out_vsldoi(s, ret, ret, ret, shift);
1643        }
1644        break;
1645    case TCG_TYPE_I64:
1646        if (ret < TCG_REG_V0) {
1647            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1648            tcg_out_mem_long(s, LD, LDX, ret, base, offset);
1649            break;
1650        }
1651        /* fallthru */
1652    case TCG_TYPE_V64:
1653        tcg_debug_assert(ret >= TCG_REG_V0);
1654        if (have_vsx) {
1655            tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
1656                             ret, base, offset);
1657            break;
1658        }
1659        tcg_debug_assert((offset & 7) == 0);
1660        tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
1661        if (offset & 8) {
1662            tcg_out_vsldoi(s, ret, ret, ret, 8);
1663        }
1664        break;
1665    case TCG_TYPE_V128:
1666        tcg_debug_assert(ret >= TCG_REG_V0);
1667        tcg_debug_assert((offset & 15) == 0);
1668        tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
1669                         LVX, ret, base, offset);
1670        break;
1671    default:
1672        g_assert_not_reached();
1673    }
1674}
1675
1676static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1677                              TCGReg base, intptr_t offset)
1678{
1679    int shift;
1680
1681    switch (type) {
1682    case TCG_TYPE_I32:
1683        if (arg < TCG_REG_V0) {
1684            tcg_out_mem_long(s, STW, STWX, arg, base, offset);
1685            break;
1686        }
1687        if (have_isa_2_07 && have_vsx) {
1688            tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
1689            break;
1690        }
1691        assert((offset & 3) == 0);
1692        tcg_debug_assert((offset & 3) == 0);
1693        shift = (offset - 4) & 0xc;
1694        if (shift) {
1695            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
1696            arg = TCG_VEC_TMP1;
1697        }
1698        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1699        break;
1700    case TCG_TYPE_I64:
1701        if (arg < TCG_REG_V0) {
1702            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1703            tcg_out_mem_long(s, STD, STDX, arg, base, offset);
1704            break;
1705        }
1706        /* fallthru */
1707    case TCG_TYPE_V64:
1708        tcg_debug_assert(arg >= TCG_REG_V0);
1709        if (have_vsx) {
1710            tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
1711                             STXSDX, arg, base, offset);
1712            break;
1713        }
1714        tcg_debug_assert((offset & 7) == 0);
1715        if (offset & 8) {
1716            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
1717            arg = TCG_VEC_TMP1;
1718        }
1719        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1720        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
1721        break;
1722    case TCG_TYPE_V128:
1723        tcg_debug_assert(arg >= TCG_REG_V0);
1724        tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
1725                         STVX, arg, base, offset);
1726        break;
1727    default:
1728        g_assert_not_reached();
1729    }
1730}
1731
1732static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1733                               TCGReg base, intptr_t ofs)
1734{
1735    return false;
1736}
1737
1738/*
1739 * Set dest non-zero if and only if (arg1 & arg2) is non-zero.
1740 * If RC, then also set RC0.
1741 */
1742static void tcg_out_test(TCGContext *s, TCGReg dest, TCGReg arg1, TCGArg arg2,
1743                         bool const_arg2, TCGType type, bool rc)
1744{
1745    int mb, me;
1746
1747    if (!const_arg2) {
1748        tcg_out32(s, AND | SAB(arg1, dest, arg2) | rc);
1749        return;
1750    }
1751
1752    if (type == TCG_TYPE_I32) {
1753        arg2 = (uint32_t)arg2;
1754    }
1755
1756    if ((arg2 & ~0xffff) == 0) {
1757        tcg_out32(s, ANDI | SAI(arg1, dest, arg2));
1758        return;
1759    }
1760    if ((arg2 & ~0xffff0000ull) == 0) {
1761        tcg_out32(s, ANDIS | SAI(arg1, dest, arg2 >> 16));
1762        return;
1763    }
1764    if (arg2 == (uint32_t)arg2 && mask_operand(arg2, &mb, &me)) {
1765        tcg_out_rlw_rc(s, RLWINM, dest, arg1, 0, mb, me, rc);
1766        return;
1767    }
1768    if (TCG_TARGET_REG_BITS == 64) {
1769        int sh = clz64(arg2);
1770        if (mask64_operand(arg2 << sh, &mb, &me)) {
1771            tcg_out_rld_rc(s, RLDICR, dest, arg1, sh, me, rc);
1772            return;
1773        }
1774    }
1775    /* Constraints should satisfy this. */
1776    g_assert_not_reached();
1777}
1778
1779static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
1780                        int const_arg2, int cr, TCGType type)
1781{
1782    int imm;
1783    uint32_t op;
1784
1785    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1786
1787    /*
1788     * Simplify the comparisons below wrt CMPI.
1789     * All of the tests are 16-bit, so a 32-bit sign extend always works.
1790     */
1791    if (type == TCG_TYPE_I32) {
1792        arg2 = (int32_t)arg2;
1793    }
1794
1795    switch (cond) {
1796    case TCG_COND_EQ:
1797    case TCG_COND_NE:
1798        if (const_arg2) {
1799            if ((int16_t) arg2 == arg2) {
1800                op = CMPI;
1801                imm = 1;
1802                break;
1803            } else if ((uint16_t) arg2 == arg2) {
1804                op = CMPLI;
1805                imm = 1;
1806                break;
1807            }
1808        }
1809        op = CMPL;
1810        imm = 0;
1811        break;
1812
1813    case TCG_COND_TSTEQ:
1814    case TCG_COND_TSTNE:
1815        tcg_debug_assert(cr == 0);
1816        tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, true);
1817        return;
1818
1819    case TCG_COND_LT:
1820    case TCG_COND_GE:
1821    case TCG_COND_LE:
1822    case TCG_COND_GT:
1823        if (const_arg2) {
1824            if ((int16_t) arg2 == arg2) {
1825                op = CMPI;
1826                imm = 1;
1827                break;
1828            }
1829        }
1830        op = CMP;
1831        imm = 0;
1832        break;
1833
1834    case TCG_COND_LTU:
1835    case TCG_COND_GEU:
1836    case TCG_COND_LEU:
1837    case TCG_COND_GTU:
1838        if (const_arg2) {
1839            if ((uint16_t) arg2 == arg2) {
1840                op = CMPLI;
1841                imm = 1;
1842                break;
1843            }
1844        }
1845        op = CMPL;
1846        imm = 0;
1847        break;
1848
1849    default:
1850        g_assert_not_reached();
1851    }
1852    op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
1853
1854    if (imm) {
1855        tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
1856    } else {
1857        if (const_arg2) {
1858            tcg_out_movi(s, type, TCG_REG_R0, arg2);
1859            arg2 = TCG_REG_R0;
1860        }
1861        tcg_out32(s, op | RA(arg1) | RB(arg2));
1862    }
1863}
1864
1865static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
1866                                TCGReg dst, TCGReg src, bool neg)
1867{
1868    if (neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) {
1869        /*
1870         * X != 0 implies X + -1 generates a carry.
1871         * RT = (~X + X) + CA
1872         *    = -1 + CA
1873         *    = CA ? 0 : -1
1874         */
1875        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1876        tcg_out32(s, SUBFE | TAB(dst, src, src));
1877        return;
1878    }
1879
1880    if (type == TCG_TYPE_I32) {
1881        tcg_out32(s, CNTLZW | RS(src) | RA(dst));
1882        tcg_out_shri32(s, dst, dst, 5);
1883    } else {
1884        tcg_out32(s, CNTLZD | RS(src) | RA(dst));
1885        tcg_out_shri64(s, dst, dst, 6);
1886    }
1887    if (neg) {
1888        tcg_out32(s, NEG | RT(dst) | RA(dst));
1889    }
1890}
1891
1892static void tcg_out_setcond_ne0(TCGContext *s, TCGType type,
1893                                TCGReg dst, TCGReg src, bool neg)
1894{
1895    if (!neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) {
1896        /*
1897         * X != 0 implies X + -1 generates a carry.  Extra addition
1898         * trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.
1899         */
1900        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1901        tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
1902        return;
1903    }
1904    tcg_out_setcond_eq0(s, type, dst, src, false);
1905    if (neg) {
1906        tcg_out32(s, ADDI | TAI(dst, dst, -1));
1907    } else {
1908        tcg_out_xori32(s, dst, dst, 1);
1909    }
1910}
1911
1912static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
1913                                  bool const_arg2)
1914{
1915    if (const_arg2) {
1916        if ((uint32_t)arg2 == arg2) {
1917            tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
1918        } else {
1919            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
1920            tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
1921        }
1922    } else {
1923        tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
1924    }
1925    return TCG_REG_R0;
1926}
1927
1928static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
1929                            TCGReg arg0, TCGReg arg1, TCGArg arg2,
1930                            bool const_arg2, bool neg)
1931{
1932    int sh;
1933    bool inv;
1934
1935    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1936
1937    /* Ignore high bits of a potential constant arg2.  */
1938    if (type == TCG_TYPE_I32) {
1939        arg2 = (uint32_t)arg2;
1940    }
1941
1942    /* With SETBC/SETBCR, we can always implement with 2 insns. */
1943    if (have_isa_3_10) {
1944        tcg_insn_unit bi, opc;
1945
1946        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type);
1947
1948        /* Re-use tcg_to_bc for BI and BO_COND_{TRUE,FALSE}. */
1949        bi = tcg_to_bc[cond] & (0x1f << 16);
1950        if (tcg_to_bc[cond] & BO(8)) {
1951            opc = neg ? SETNBC : SETBC;
1952        } else {
1953            opc = neg ? SETNBCR : SETBCR;
1954        }
1955        tcg_out32(s, opc | RT(arg0) | bi);
1956        return;
1957    }
1958
1959    /* Handle common and trivial cases before handling anything else.  */
1960    if (arg2 == 0) {
1961        switch (cond) {
1962        case TCG_COND_EQ:
1963            tcg_out_setcond_eq0(s, type, arg0, arg1, neg);
1964            return;
1965        case TCG_COND_NE:
1966            tcg_out_setcond_ne0(s, type, arg0, arg1, neg);
1967            return;
1968        case TCG_COND_GE:
1969            tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
1970            arg1 = arg0;
1971            /* FALLTHRU */
1972        case TCG_COND_LT:
1973            /* Extract the sign bit.  */
1974            if (type == TCG_TYPE_I32) {
1975                if (neg) {
1976                    tcg_out_sari32(s, arg0, arg1, 31);
1977                } else {
1978                    tcg_out_shri32(s, arg0, arg1, 31);
1979                }
1980            } else {
1981                if (neg) {
1982                    tcg_out_sari64(s, arg0, arg1, 63);
1983                } else {
1984                    tcg_out_shri64(s, arg0, arg1, 63);
1985                }
1986            }
1987            return;
1988        default:
1989            break;
1990        }
1991    }
1992
1993    /* If we have ISEL, we can implement everything with 3 or 4 insns.
1994       All other cases below are also at least 3 insns, so speed up the
1995       code generator by not considering them and always using ISEL.  */
1996    if (have_isel) {
1997        int isel, tab;
1998
1999        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type);
2000
2001        isel = tcg_to_isel[cond];
2002
2003        tcg_out_movi(s, type, arg0, neg ? -1 : 1);
2004        if (isel & 1) {
2005            /* arg0 = (bc ? 0 : 1) */
2006            tab = TAB(arg0, 0, arg0);
2007            isel &= ~1;
2008        } else {
2009            /* arg0 = (bc ? 1 : 0) */
2010            tcg_out_movi(s, type, TCG_REG_R0, 0);
2011            tab = TAB(arg0, arg0, TCG_REG_R0);
2012        }
2013        tcg_out32(s, isel | tab);
2014        return;
2015    }
2016
2017    inv = false;
2018    switch (cond) {
2019    case TCG_COND_EQ:
2020        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
2021        tcg_out_setcond_eq0(s, type, arg0, arg1, neg);
2022        break;
2023
2024    case TCG_COND_NE:
2025        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
2026        tcg_out_setcond_ne0(s, type, arg0, arg1, neg);
2027        break;
2028
2029    case TCG_COND_TSTEQ:
2030        tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false);
2031        tcg_out_setcond_eq0(s, type, arg0, TCG_REG_R0, neg);
2032        break;
2033
2034    case TCG_COND_TSTNE:
2035        tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false);
2036        tcg_out_setcond_ne0(s, type, arg0, TCG_REG_R0, neg);
2037        break;
2038
2039    case TCG_COND_LE:
2040    case TCG_COND_LEU:
2041        inv = true;
2042        /* fall through */
2043    case TCG_COND_GT:
2044    case TCG_COND_GTU:
2045        sh = 30; /* CR7 CR_GT */
2046        goto crtest;
2047
2048    case TCG_COND_GE:
2049    case TCG_COND_GEU:
2050        inv = true;
2051        /* fall through */
2052    case TCG_COND_LT:
2053    case TCG_COND_LTU:
2054        sh = 29; /* CR7 CR_LT */
2055        goto crtest;
2056
2057    crtest:
2058        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
2059        tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
2060        tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
2061        if (neg && inv) {
2062            tcg_out32(s, ADDI | TAI(arg0, arg0, -1));
2063        } else if (neg) {
2064            tcg_out32(s, NEG | RT(arg0) | RA(arg0));
2065        } else if (inv) {
2066            tcg_out_xori32(s, arg0, arg0, 1);
2067        }
2068        break;
2069
2070    default:
2071        g_assert_not_reached();
2072    }
2073}
2074
2075static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
2076                         TCGReg dest, TCGReg arg1, TCGReg arg2)
2077{
2078    tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, false);
2079}
2080
2081static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond,
2082                          TCGReg dest, TCGReg arg1, tcg_target_long arg2)
2083{
2084    tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, false);
2085}
2086
2087static const TCGOutOpSetcond outop_setcond = {
2088    .base.static_constraint = C_O1_I2(r, r, rC),
2089    .out_rrr = tgen_setcond,
2090    .out_rri = tgen_setcondi,
2091};
2092
2093static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond,
2094                            TCGReg dest, TCGReg arg1, TCGReg arg2)
2095{
2096    tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, true);
2097}
2098
2099static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond,
2100                             TCGReg dest, TCGReg arg1, tcg_target_long arg2)
2101{
2102    tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, true);
2103}
2104
2105static const TCGOutOpSetcond outop_negsetcond = {
2106    .base.static_constraint = C_O1_I2(r, r, rC),
2107    .out_rrr = tgen_negsetcond,
2108    .out_rri = tgen_negsetcondi,
2109};
2110
2111static void tcg_out_bc(TCGContext *s, TCGCond cond, int bd)
2112{
2113    tcg_out32(s, tcg_to_bc[cond] | bd);
2114}
2115
2116static void tcg_out_bc_lab(TCGContext *s, TCGCond cond, TCGLabel *l)
2117{
2118    int bd = 0;
2119    if (l->has_value) {
2120        bd = reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
2121    } else {
2122        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
2123    }
2124    tcg_out_bc(s, cond, bd);
2125}
2126
2127static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond,
2128                        TCGReg arg1, TCGReg arg2, TCGLabel *l)
2129{
2130    tcg_out_cmp(s, cond, arg1, arg2, false, 0, type);
2131    tcg_out_bc_lab(s, cond, l);
2132}
2133
2134static void tgen_brcondi(TCGContext *s, TCGType type, TCGCond cond,
2135                         TCGReg arg1, tcg_target_long arg2, TCGLabel *l)
2136{
2137    tcg_out_cmp(s, cond, arg1, arg2, true, 0, type);
2138    tcg_out_bc_lab(s, cond, l);
2139}
2140
2141static const TCGOutOpBrcond outop_brcond = {
2142    .base.static_constraint = C_O0_I2(r, rC),
2143    .out_rr = tgen_brcond,
2144    .out_ri = tgen_brcondi,
2145};
2146
2147static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond,
2148                         TCGReg dest, TCGReg c1, TCGArg c2, bool const_c2,
2149                         TCGArg v1, bool const_v1, TCGArg v2, bool const_v2)
2150{
2151    /* If for some reason both inputs are zero, don't produce bad code.  */
2152    if (v1 == 0 && v2 == 0) {
2153        tcg_out_movi(s, type, dest, 0);
2154        return;
2155    }
2156
2157    tcg_out_cmp(s, cond, c1, c2, const_c2, 0, type);
2158
2159    if (have_isel) {
2160        int isel = tcg_to_isel[cond];
2161
2162        /* Swap the V operands if the operation indicates inversion.  */
2163        if (isel & 1) {
2164            int t = v1;
2165            v1 = v2;
2166            v2 = t;
2167            isel &= ~1;
2168        }
2169        /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */
2170        if (v2 == 0) {
2171            tcg_out_movi(s, type, TCG_REG_R0, 0);
2172        }
2173        tcg_out32(s, isel | TAB(dest, v1, v2));
2174    } else {
2175        if (dest == v2) {
2176            cond = tcg_invert_cond(cond);
2177            v2 = v1;
2178        } else if (dest != v1) {
2179            if (v1 == 0) {
2180                tcg_out_movi(s, type, dest, 0);
2181            } else {
2182                tcg_out_mov(s, type, dest, v1);
2183            }
2184        }
2185        /* Branch forward over one insn */
2186        tcg_out_bc(s, cond, 8);
2187        if (v2 == 0) {
2188            tcg_out_movi(s, type, dest, 0);
2189        } else {
2190            tcg_out_mov(s, type, dest, v2);
2191        }
2192    }
2193}
2194
2195static const TCGOutOpMovcond outop_movcond = {
2196    .base.static_constraint = C_O1_I4(r, r, rC, rZ, rZ),
2197    .out = tgen_movcond,
2198};
2199
2200static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
2201                          TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
2202{
2203    if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
2204        tcg_out32(s, opc | RA(a0) | RS(a1));
2205    } else {
2206        tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 0, type);
2207        /* Note that the only other valid constant for a2 is 0.  */
2208        if (have_isel) {
2209            tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
2210            tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
2211        } else if (!const_a2 && a0 == a2) {
2212            tcg_out_bc(s, TCG_COND_EQ, 8);
2213            tcg_out32(s, opc | RA(a0) | RS(a1));
2214        } else {
2215            tcg_out32(s, opc | RA(a0) | RS(a1));
2216            tcg_out_bc(s, TCG_COND_NE, 8);
2217            if (const_a2) {
2218                tcg_out_movi(s, type, a0, 0);
2219            } else {
2220                tcg_out_mov(s, type, a0, a2);
2221            }
2222        }
2223    }
2224}
2225
2226static void tcg_out_cmp2(TCGContext *s, const TCGArg *args,
2227                         const int *const_args)
2228{
2229    static const struct { uint8_t bit1, bit2; } bits[] = {
2230        [TCG_COND_LT ] = { CR_LT, CR_LT },
2231        [TCG_COND_LE ] = { CR_LT, CR_GT },
2232        [TCG_COND_GT ] = { CR_GT, CR_GT },
2233        [TCG_COND_GE ] = { CR_GT, CR_LT },
2234        [TCG_COND_LTU] = { CR_LT, CR_LT },
2235        [TCG_COND_LEU] = { CR_LT, CR_GT },
2236        [TCG_COND_GTU] = { CR_GT, CR_GT },
2237        [TCG_COND_GEU] = { CR_GT, CR_LT },
2238    };
2239
2240    TCGCond cond = args[4], cond2;
2241    TCGArg al, ah, bl, bh;
2242    int blconst, bhconst;
2243    int op, bit1, bit2;
2244
2245    al = args[0];
2246    ah = args[1];
2247    bl = args[2];
2248    bh = args[3];
2249    blconst = const_args[2];
2250    bhconst = const_args[3];
2251
2252    switch (cond) {
2253    case TCG_COND_EQ:
2254        op = CRAND;
2255        goto do_equality;
2256    case TCG_COND_NE:
2257        op = CRNAND;
2258    do_equality:
2259        tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
2260        tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
2261        tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
2262        break;
2263
2264    case TCG_COND_TSTEQ:
2265    case TCG_COND_TSTNE:
2266        if (blconst) {
2267            tcg_out_andi32(s, TCG_REG_R0, al, bl);
2268        } else {
2269            tcg_out32(s, AND | SAB(al, TCG_REG_R0, bl));
2270        }
2271        if (bhconst) {
2272            tcg_out_andi32(s, TCG_REG_TMP1, ah, bh);
2273        } else {
2274            tcg_out32(s, AND | SAB(ah, TCG_REG_TMP1, bh));
2275        }
2276        tcg_out32(s, OR | SAB(TCG_REG_R0, TCG_REG_R0, TCG_REG_TMP1) | 1);
2277        break;
2278
2279    case TCG_COND_LT:
2280    case TCG_COND_LE:
2281    case TCG_COND_GT:
2282    case TCG_COND_GE:
2283    case TCG_COND_LTU:
2284    case TCG_COND_LEU:
2285    case TCG_COND_GTU:
2286    case TCG_COND_GEU:
2287        bit1 = bits[cond].bit1;
2288        bit2 = bits[cond].bit2;
2289        op = (bit1 != bit2 ? CRANDC : CRAND);
2290        cond2 = tcg_unsigned_cond(cond);
2291
2292        tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
2293        tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
2294        tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
2295        tcg_out32(s, CROR | BT(0, CR_EQ) | BA(6, bit1) | BB(0, CR_EQ));
2296        break;
2297
2298    default:
2299        g_assert_not_reached();
2300    }
2301}
2302
2303static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
2304                             const int *const_args)
2305{
2306    tcg_out_cmp2(s, args + 1, const_args + 1);
2307    tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(0));
2308    tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, CR_EQ + 0*4 + 1, 31, 31);
2309}
2310
2311static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
2312                            const int *const_args)
2313{
2314    tcg_out_cmp2(s, args, const_args);
2315    tcg_out_bc_lab(s, TCG_COND_EQ, arg_label(args[5]));
2316}
2317
2318static void tcg_out_mb(TCGContext *s, TCGArg a0)
2319{
2320    uint32_t insn;
2321
2322    if (a0 & TCG_MO_ST_LD) {
2323        insn = HWSYNC;
2324    } else {
2325        insn = LWSYNC;
2326    }
2327
2328    tcg_out32(s, insn);
2329}
2330
2331static void tcg_out_call_int(TCGContext *s, int lk,
2332                             const tcg_insn_unit *target)
2333{
2334#ifdef _CALL_AIX
2335    /* Look through the descriptor.  If the branch is in range, and we
2336       don't have to spend too much effort on building the toc.  */
2337    const void *tgt = ((const void * const *)target)[0];
2338    uintptr_t toc = ((const uintptr_t *)target)[1];
2339    intptr_t diff = tcg_pcrel_diff(s, tgt);
2340
2341    if (in_range_b(diff) && toc == (uint32_t)toc) {
2342        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
2343        tcg_out_b(s, lk, tgt);
2344    } else {
2345        /* Fold the low bits of the constant into the addresses below.  */
2346        intptr_t arg = (intptr_t)target;
2347        int ofs = (int16_t)arg;
2348
2349        if (ofs + 8 < 0x8000) {
2350            arg -= ofs;
2351        } else {
2352            ofs = 0;
2353        }
2354        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
2355        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
2356        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
2357        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
2358        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
2359    }
2360#elif defined(_CALL_ELF) && _CALL_ELF == 2
2361    intptr_t diff;
2362
2363    /* In the ELFv2 ABI, we have to set up r12 to contain the destination
2364       address, which the callee uses to compute its TOC address.  */
2365    /* FIXME: when the branch is in range, we could avoid r12 load if we
2366       knew that the destination uses the same TOC, and what its local
2367       entry point offset is.  */
2368    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
2369
2370    diff = tcg_pcrel_diff(s, target);
2371    if (in_range_b(diff)) {
2372        tcg_out_b(s, lk, target);
2373    } else {
2374        tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
2375        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
2376    }
2377#else
2378    tcg_out_b(s, lk, target);
2379#endif
2380}
2381
2382static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
2383                         const TCGHelperInfo *info)
2384{
2385    tcg_out_call_int(s, LK, target);
2386}
2387
2388static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = {
2389    [MO_UB] = LBZX,
2390    [MO_UW] = LHZX,
2391    [MO_UL] = LWZX,
2392    [MO_UQ] = LDX,
2393    [MO_SW] = LHAX,
2394    [MO_SL] = LWAX,
2395    [MO_BSWAP | MO_UB] = LBZX,
2396    [MO_BSWAP | MO_UW] = LHBRX,
2397    [MO_BSWAP | MO_UL] = LWBRX,
2398    [MO_BSWAP | MO_UQ] = LDBRX,
2399};
2400
2401static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
2402    [MO_UB] = STBX,
2403    [MO_UW] = STHX,
2404    [MO_UL] = STWX,
2405    [MO_UQ] = STDX,
2406    [MO_BSWAP | MO_UB] = STBX,
2407    [MO_BSWAP | MO_UW] = STHBRX,
2408    [MO_BSWAP | MO_UL] = STWBRX,
2409    [MO_BSWAP | MO_UQ] = STDBRX,
2410};
2411
2412static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
2413{
2414    if (arg < 0) {
2415        arg = TCG_REG_TMP1;
2416    }
2417    tcg_out32(s, MFSPR | RT(arg) | LR);
2418    return arg;
2419}
2420
2421/*
2422 * For the purposes of ppc32 sorting 4 input registers into 4 argument
2423 * registers, there is an outside chance we would require 3 temps.
2424 */
2425static const TCGLdstHelperParam ldst_helper_param = {
2426    .ra_gen = ldst_ra_gen,
2427    .ntmp = 3,
2428    .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 }
2429};
2430
2431static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2432{
2433    MemOp opc = get_memop(lb->oi);
2434
2435    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2436        return false;
2437    }
2438
2439    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
2440    tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]);
2441    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
2442
2443    tcg_out_b(s, 0, lb->raddr);
2444    return true;
2445}
2446
2447static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2448{
2449    MemOp opc = get_memop(lb->oi);
2450
2451    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2452        return false;
2453    }
2454
2455    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
2456    tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]);
2457
2458    tcg_out_b(s, 0, lb->raddr);
2459    return true;
2460}
2461
2462typedef struct {
2463    TCGReg base;
2464    TCGReg index;
2465    TCGAtomAlign aa;
2466} HostAddress;
2467
2468bool tcg_target_has_memory_bswap(MemOp memop)
2469{
2470    TCGAtomAlign aa;
2471
2472    if ((memop & MO_SIZE) <= MO_64) {
2473        return true;
2474    }
2475
2476    /*
2477     * Reject 16-byte memop with 16-byte atomicity,
2478     * but do allow a pair of 64-bit operations.
2479     */
2480    aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
2481    return aa.atom <= MO_64;
2482}
2483
2484/* We expect to use a 16-bit negative offset from ENV.  */
2485#define MIN_TLB_MASK_TABLE_OFS  -32768
2486
2487/*
2488 * For system-mode, perform the TLB load and compare.
2489 * For user-mode, perform any required alignment tests.
2490 * In both cases, return a TCGLabelQemuLdst structure if the slow path
2491 * is required and fill in @h with the host address for the fast path.
2492 */
2493static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
2494                                           TCGReg addr, MemOpIdx oi, bool is_ld)
2495{
2496    TCGType addr_type = s->addr_type;
2497    TCGLabelQemuLdst *ldst = NULL;
2498    MemOp opc = get_memop(oi);
2499    MemOp a_bits, s_bits;
2500
2501    /*
2502     * Book II, Section 1.4, Single-Copy Atomicity, specifies:
2503     *
2504     * Before 3.0, "An access that is not atomic is performed as a set of
2505     * smaller disjoint atomic accesses. In general, the number and alignment
2506     * of these accesses are implementation-dependent."  Thus MO_ATOM_IFALIGN.
2507     *
2508     * As of 3.0, "the non-atomic access is performed as described in
2509     * the corresponding list", which matches MO_ATOM_SUBALIGN.
2510     */
2511    s_bits = opc & MO_SIZE;
2512    h->aa = atom_and_align_for_opc(s, opc,
2513                                   have_isa_3_00 ? MO_ATOM_SUBALIGN
2514                                                 : MO_ATOM_IFALIGN,
2515                                   s_bits == MO_128);
2516    a_bits = h->aa.align;
2517
2518    if (tcg_use_softmmu) {
2519        int mem_index = get_mmuidx(oi);
2520        int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
2521                            : offsetof(CPUTLBEntry, addr_write);
2522        int fast_off = tlb_mask_table_ofs(s, mem_index);
2523        int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
2524        int table_off = fast_off + offsetof(CPUTLBDescFast, table);
2525
2526        ldst = new_ldst_label(s);
2527        ldst->is_ld = is_ld;
2528        ldst->oi = oi;
2529        ldst->addr_reg = addr;
2530
2531        /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
2532        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off);
2533        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off);
2534
2535        /* Extract the page index, shifted into place for tlb index.  */
2536        if (TCG_TARGET_REG_BITS == 32) {
2537            tcg_out_shri32(s, TCG_REG_R0, addr,
2538                           s->page_bits - CPU_TLB_ENTRY_BITS);
2539        } else {
2540            tcg_out_shri64(s, TCG_REG_R0, addr,
2541                           s->page_bits - CPU_TLB_ENTRY_BITS);
2542        }
2543        tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
2544
2545        /*
2546         * Load the TLB comparator into TMP2.
2547         * For 64-bit host, always load the entire 64-bit slot for simplicity.
2548         * We will ignore the high bits with tcg_out_cmp(..., addr_type).
2549         */
2550        if (cmp_off == 0) {
2551            tcg_out32(s, (TCG_TARGET_REG_BITS == 64 ? LDUX : LWZUX)
2552                      | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2));
2553        } else {
2554            tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
2555            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off);
2556        }
2557
2558        /*
2559         * Load the TLB addend for use on the fast path.
2560         * Do this asap to minimize any load use delay.
2561         */
2562        if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
2563            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
2564                       offsetof(CPUTLBEntry, addend));
2565        }
2566
2567        /* Clear the non-page, non-alignment bits from the address in R0. */
2568        if (TCG_TARGET_REG_BITS == 32) {
2569            /*
2570             * We don't support unaligned accesses on 32-bits.
2571             * Preserve the bottom bits and thus trigger a comparison
2572             * failure on unaligned accesses.
2573             */
2574            if (a_bits < s_bits) {
2575                a_bits = s_bits;
2576            }
2577            tcg_out_rlw(s, RLWINM, TCG_REG_R0, addr, 0,
2578                        (32 - a_bits) & 31, 31 - s->page_bits);
2579        } else {
2580            TCGReg t = addr;
2581
2582            /*
2583             * If the access is unaligned, we need to make sure we fail if we
2584             * cross a page boundary.  The trick is to add the access size-1
2585             * to the address before masking the low bits.  That will make the
2586             * address overflow to the next page if we cross a page boundary,
2587             * which will then force a mismatch of the TLB compare.
2588             */
2589            if (a_bits < s_bits) {
2590                unsigned a_mask = (1 << a_bits) - 1;
2591                unsigned s_mask = (1 << s_bits) - 1;
2592                tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
2593                t = TCG_REG_R0;
2594            }
2595
2596            /* Mask the address for the requested alignment.  */
2597            if (addr_type == TCG_TYPE_I32) {
2598                tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
2599                            (32 - a_bits) & 31, 31 - s->page_bits);
2600            } else if (a_bits == 0) {
2601                tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits);
2602            } else {
2603                tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
2604                            64 - s->page_bits, s->page_bits - a_bits);
2605                tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0);
2606            }
2607        }
2608
2609        /* Full comparison into cr0. */
2610        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 0, 0, addr_type);
2611
2612        /* Load a pointer into the current opcode w/conditional branch-link. */
2613        ldst->label_ptr[0] = s->code_ptr;
2614        tcg_out_bc(s, TCG_COND_NE, LK);
2615
2616        h->base = TCG_REG_TMP1;
2617    } else {
2618        if (a_bits) {
2619            ldst = new_ldst_label(s);
2620            ldst->is_ld = is_ld;
2621            ldst->oi = oi;
2622            ldst->addr_reg = addr;
2623
2624            /* We are expecting a_bits to max out at 7, much lower than ANDI. */
2625            tcg_debug_assert(a_bits < 16);
2626            tcg_out32(s, ANDI | SAI(addr, TCG_REG_R0, (1 << a_bits) - 1));
2627
2628            ldst->label_ptr[0] = s->code_ptr;
2629            tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
2630        }
2631
2632        h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
2633    }
2634
2635    if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
2636        /* Zero-extend the guest address for use in the host address. */
2637        tcg_out_ext32u(s, TCG_REG_TMP2, addr);
2638        h->index = TCG_REG_TMP2;
2639    } else {
2640        h->index = addr;
2641    }
2642
2643    return ldst;
2644}
2645
2646static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
2647                            TCGReg addr, MemOpIdx oi, TCGType data_type)
2648{
2649    MemOp opc = get_memop(oi);
2650    TCGLabelQemuLdst *ldst;
2651    HostAddress h;
2652
2653    ldst = prepare_host_addr(s, &h, addr, oi, true);
2654
2655    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2656        if (opc & MO_BSWAP) {
2657            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2658            tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
2659            tcg_out32(s, LWBRX | TAB(datahi, h.base, TCG_REG_R0));
2660        } else if (h.base != 0) {
2661            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2662            tcg_out32(s, LWZX | TAB(datahi, h.base, h.index));
2663            tcg_out32(s, LWZX | TAB(datalo, h.base, TCG_REG_R0));
2664        } else if (h.index == datahi) {
2665            tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
2666            tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
2667        } else {
2668            tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
2669            tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
2670        }
2671    } else {
2672        uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
2673        if (!have_isa_2_06 && insn == LDBRX) {
2674            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2675            tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
2676            tcg_out32(s, LWBRX | TAB(TCG_REG_R0, h.base, TCG_REG_R0));
2677            tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
2678        } else if (insn) {
2679            tcg_out32(s, insn | TAB(datalo, h.base, h.index));
2680        } else {
2681            insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
2682            tcg_out32(s, insn | TAB(datalo, h.base, h.index));
2683            tcg_out_movext(s, TCG_TYPE_REG, datalo,
2684                           TCG_TYPE_REG, opc & MO_SSIZE, datalo);
2685        }
2686    }
2687
2688    if (ldst) {
2689        ldst->type = data_type;
2690        ldst->datalo_reg = datalo;
2691        ldst->datahi_reg = datahi;
2692        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2693    }
2694}
2695
2696static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
2697                            TCGReg addr, MemOpIdx oi, TCGType data_type)
2698{
2699    MemOp opc = get_memop(oi);
2700    TCGLabelQemuLdst *ldst;
2701    HostAddress h;
2702
2703    ldst = prepare_host_addr(s, &h, addr, oi, false);
2704
2705    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2706        if (opc & MO_BSWAP) {
2707            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2708            tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
2709            tcg_out32(s, STWBRX | SAB(datahi, h.base, TCG_REG_R0));
2710        } else if (h.base != 0) {
2711            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2712            tcg_out32(s, STWX | SAB(datahi, h.base, h.index));
2713            tcg_out32(s, STWX | SAB(datalo, h.base, TCG_REG_R0));
2714        } else {
2715            tcg_out32(s, STW | TAI(datahi, h.index, 0));
2716            tcg_out32(s, STW | TAI(datalo, h.index, 4));
2717        }
2718    } else {
2719        uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
2720        if (!have_isa_2_06 && insn == STDBRX) {
2721            tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
2722            tcg_out32(s, ADDI | TAI(TCG_REG_TMP2, h.index, 4));
2723            tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
2724            tcg_out32(s, STWBRX | SAB(TCG_REG_R0, h.base, TCG_REG_TMP2));
2725        } else {
2726            tcg_out32(s, insn | SAB(datalo, h.base, h.index));
2727        }
2728    }
2729
2730    if (ldst) {
2731        ldst->type = data_type;
2732        ldst->datalo_reg = datalo;
2733        ldst->datahi_reg = datahi;
2734        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2735    }
2736}
2737
2738static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
2739                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
2740{
2741    TCGLabelQemuLdst *ldst;
2742    HostAddress h;
2743    bool need_bswap;
2744    uint32_t insn;
2745    TCGReg index;
2746
2747    ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
2748
2749    /* Compose the final address, as LQ/STQ have no indexing. */
2750    index = h.index;
2751    if (h.base != 0) {
2752        index = TCG_REG_TMP1;
2753        tcg_out32(s, ADD | TAB(index, h.base, h.index));
2754    }
2755    need_bswap = get_memop(oi) & MO_BSWAP;
2756
2757    if (h.aa.atom == MO_128) {
2758        tcg_debug_assert(!need_bswap);
2759        tcg_debug_assert(datalo & 1);
2760        tcg_debug_assert(datahi == datalo - 1);
2761        tcg_debug_assert(!is_ld || datahi != index);
2762        insn = is_ld ? LQ : STQ;
2763        tcg_out32(s, insn | TAI(datahi, index, 0));
2764    } else {
2765        TCGReg d1, d2;
2766
2767        if (HOST_BIG_ENDIAN ^ need_bswap) {
2768            d1 = datahi, d2 = datalo;
2769        } else {
2770            d1 = datalo, d2 = datahi;
2771        }
2772
2773        if (need_bswap) {
2774            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8);
2775            insn = is_ld ? LDBRX : STDBRX;
2776            tcg_out32(s, insn | TAB(d1, 0, index));
2777            tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0));
2778        } else {
2779            insn = is_ld ? LD : STD;
2780            tcg_out32(s, insn | TAI(d1, index, 0));
2781            tcg_out32(s, insn | TAI(d2, index, 8));
2782        }
2783    }
2784
2785    if (ldst) {
2786        ldst->type = TCG_TYPE_I128;
2787        ldst->datalo_reg = datalo;
2788        ldst->datahi_reg = datahi;
2789        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2790    }
2791}
2792
2793static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2794{
2795    int i;
2796    for (i = 0; i < count; ++i) {
2797        p[i] = NOP;
2798    }
2799}
2800
2801/* Parameters for function call generation, used in tcg.c.  */
2802#define TCG_TARGET_STACK_ALIGN       16
2803
2804#ifdef _CALL_AIX
2805# define LINK_AREA_SIZE                (6 * SZR)
2806# define LR_OFFSET                     (1 * SZR)
2807# define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
2808#elif defined(_CALL_DARWIN)
2809# define LINK_AREA_SIZE                (6 * SZR)
2810# define LR_OFFSET                     (2 * SZR)
2811#elif TCG_TARGET_REG_BITS == 64
2812# if defined(_CALL_ELF) && _CALL_ELF == 2
2813#  define LINK_AREA_SIZE               (4 * SZR)
2814#  define LR_OFFSET                    (1 * SZR)
2815# endif
2816#else /* TCG_TARGET_REG_BITS == 32 */
2817# if defined(_CALL_SYSV)
2818#  define LINK_AREA_SIZE               (2 * SZR)
2819#  define LR_OFFSET                    (1 * SZR)
2820# endif
2821#endif
2822#ifndef LR_OFFSET
2823# error "Unhandled abi"
2824#endif
2825#ifndef TCG_TARGET_CALL_STACK_OFFSET
2826# define TCG_TARGET_CALL_STACK_OFFSET  LINK_AREA_SIZE
2827#endif
2828
2829#define CPU_TEMP_BUF_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2830#define REG_SAVE_SIZE      ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
2831
2832#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET   \
2833                     + TCG_STATIC_CALL_ARGS_SIZE    \
2834                     + CPU_TEMP_BUF_SIZE            \
2835                     + REG_SAVE_SIZE                \
2836                     + TCG_TARGET_STACK_ALIGN - 1)  \
2837                    & -TCG_TARGET_STACK_ALIGN)
2838
2839#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
2840
2841static void tcg_target_qemu_prologue(TCGContext *s)
2842{
2843    int i;
2844
2845#ifdef _CALL_AIX
2846    const void **desc = (const void **)s->code_ptr;
2847    desc[0] = tcg_splitwx_to_rx(desc + 2);  /* entry point */
2848    desc[1] = 0;                            /* environment pointer */
2849    s->code_ptr = (void *)(desc + 2);       /* skip over descriptor */
2850#endif
2851
2852    tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
2853                  CPU_TEMP_BUF_SIZE);
2854
2855    /* Prologue */
2856    tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
2857    tcg_out32(s, (SZR == 8 ? STDU : STWU)
2858              | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
2859
2860    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2861        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2862                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2863    }
2864    tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2865
2866    if (!tcg_use_softmmu && guest_base) {
2867        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
2868        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2869    }
2870
2871    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2872    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
2873    tcg_out32(s, BCCTR | BO_ALWAYS);
2874
2875    /* Epilogue */
2876    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2877
2878    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2879    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2880        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2881                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2882    }
2883    tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
2884    tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
2885    tcg_out32(s, BCLR | BO_ALWAYS);
2886}
2887
2888static void tcg_out_tb_start(TCGContext *s)
2889{
2890    /* Load TCG_REG_TB. */
2891    if (USE_REG_TB) {
2892        if (have_isa_3_00) {
2893            /* lnia REG_TB */
2894            tcg_out_addpcis(s, TCG_REG_TB, 0);
2895        } else {
2896            /* bcl 20,31,$+4 (preferred form for getting nia) */
2897            tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK);
2898            tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR);
2899        }
2900    }
2901}
2902
2903static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
2904{
2905    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg);
2906    tcg_out_b(s, 0, tcg_code_gen_epilogue);
2907}
2908
2909static void tcg_out_goto_tb(TCGContext *s, int which)
2910{
2911    uintptr_t ptr = get_jmp_target_addr(s, which);
2912    int16_t lo;
2913
2914    /* Direct branch will be patched by tb_target_set_jmp_target. */
2915    set_jmp_insn_offset(s, which);
2916    tcg_out32(s, NOP);
2917
2918    /* When branch is out of range, fall through to indirect. */
2919    if (USE_REG_TB) {
2920        ptrdiff_t offset = ppc_tbrel_diff(s, (void *)ptr);
2921        tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset);
2922    } else if (have_isa_3_10) {
2923        ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr);
2924        tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1);
2925    } else if (have_isa_3_00) {
2926        ptrdiff_t offset = tcg_pcrel_diff(s, (void *)ptr) - 4;
2927        lo = offset;
2928        tcg_out_addpcis(s, TCG_REG_TMP1, offset - lo);
2929        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo);
2930    } else {
2931        lo = ptr;
2932        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - lo);
2933        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo);
2934    }
2935
2936    tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
2937    tcg_out32(s, BCCTR | BO_ALWAYS);
2938    set_jmp_reset_offset(s, which);
2939}
2940
2941void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
2942                              uintptr_t jmp_rx, uintptr_t jmp_rw)
2943{
2944    uintptr_t addr = tb->jmp_target_addr[n];
2945    intptr_t diff = addr - jmp_rx;
2946    tcg_insn_unit insn;
2947
2948    if (in_range_b(diff)) {
2949        insn = B | (diff & 0x3fffffc);
2950    } else {
2951        insn = NOP;
2952    }
2953
2954    qatomic_set((uint32_t *)jmp_rw, insn);
2955    flush_idcache_range(jmp_rx, jmp_rw, 4);
2956}
2957
2958
2959static void tgen_add(TCGContext *s, TCGType type,
2960                     TCGReg a0, TCGReg a1, TCGReg a2)
2961{
2962    tcg_out32(s, ADD | TAB(a0, a1, a2));
2963}
2964
2965static void tgen_addi(TCGContext *s, TCGType type,
2966                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2967{
2968    tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
2969}
2970
2971static const TCGOutOpBinary outop_add = {
2972    .base.static_constraint = C_O1_I2(r, r, rT),
2973    .out_rrr = tgen_add,
2974    .out_rri = tgen_addi,
2975};
2976
2977static void tgen_and(TCGContext *s, TCGType type,
2978                     TCGReg a0, TCGReg a1, TCGReg a2)
2979{
2980    tcg_out32(s, AND | SAB(a1, a0, a2));
2981}
2982
2983static void tgen_andi(TCGContext *s, TCGType type,
2984                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2985{
2986    if (type == TCG_TYPE_I32) {
2987        tcg_out_andi32(s, a0, a1, a2);
2988    } else {
2989        tcg_out_andi64(s, a0, a1, a2);
2990    }
2991}
2992
2993static const TCGOutOpBinary outop_and = {
2994    .base.static_constraint = C_O1_I2(r, r, ri),
2995    .out_rrr = tgen_and,
2996    .out_rri = tgen_andi,
2997};
2998
2999static void tgen_andc(TCGContext *s, TCGType type,
3000                      TCGReg a0, TCGReg a1, TCGReg a2)
3001{
3002    tcg_out32(s, ANDC | SAB(a1, a0, a2));
3003}
3004
3005static const TCGOutOpBinary outop_andc = {
3006    .base.static_constraint = C_O1_I2(r, r, r),
3007    .out_rrr = tgen_andc,
3008};
3009
3010static void tgen_clz(TCGContext *s, TCGType type,
3011                     TCGReg a0, TCGReg a1, TCGReg a2)
3012{
3013    uint32_t insn = type == TCG_TYPE_I32 ? CNTLZW : CNTLZD;
3014    tcg_out_cntxz(s, type, insn, a0, a1, a2, false);
3015}
3016
3017static void tgen_clzi(TCGContext *s, TCGType type,
3018                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3019{
3020    uint32_t insn = type == TCG_TYPE_I32 ? CNTLZW : CNTLZD;
3021    tcg_out_cntxz(s, type, insn, a0, a1, a2, true);
3022}
3023
3024static const TCGOutOpBinary outop_clz = {
3025    .base.static_constraint = C_O1_I2(r, r, rZW),
3026    .out_rrr = tgen_clz,
3027    .out_rri = tgen_clzi,
3028};
3029
3030static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
3031{
3032    uint32_t insn = type == TCG_TYPE_I32 ? CNTPOPW : CNTPOPD;
3033    tcg_out32(s, insn | SAB(a1, a0, 0));
3034}
3035
3036static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags)
3037{
3038    return have_isa_2_06 ? C_O1_I1(r, r) : C_NotImplemented;
3039}
3040
3041static const TCGOutOpUnary outop_ctpop = {
3042    .base.static_constraint = C_Dynamic,
3043    .base.dynamic_constraint = cset_ctpop,
3044    .out_rr = tgen_ctpop,
3045};
3046
3047static void tgen_ctz(TCGContext *s, TCGType type,
3048                     TCGReg a0, TCGReg a1, TCGReg a2)
3049{
3050    uint32_t insn = type == TCG_TYPE_I32 ? CNTTZW : CNTTZD;
3051    tcg_out_cntxz(s, type, insn, a0, a1, a2, false);
3052}
3053
3054static void tgen_ctzi(TCGContext *s, TCGType type,
3055                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3056{
3057    uint32_t insn = type == TCG_TYPE_I32 ? CNTTZW : CNTTZD;
3058    tcg_out_cntxz(s, type, insn, a0, a1, a2, true);
3059}
3060
3061static TCGConstraintSetIndex cset_ctz(TCGType type, unsigned flags)
3062{
3063    return have_isa_3_00 ? C_O1_I2(r, r, rZW) : C_NotImplemented;
3064}
3065
3066static const TCGOutOpBinary outop_ctz = {
3067    .base.static_constraint = C_Dynamic,
3068    .base.dynamic_constraint = cset_ctz,
3069    .out_rrr = tgen_ctz,
3070    .out_rri = tgen_ctzi,
3071};
3072
3073static void tgen_eqv(TCGContext *s, TCGType type,
3074                     TCGReg a0, TCGReg a1, TCGReg a2)
3075{
3076    tcg_out32(s, EQV | SAB(a1, a0, a2));
3077}
3078
3079static void tgen_divs(TCGContext *s, TCGType type,
3080                      TCGReg a0, TCGReg a1, TCGReg a2)
3081{
3082    uint32_t insn = type == TCG_TYPE_I32 ? DIVW : DIVD;
3083    tcg_out32(s, insn | TAB(a0, a1, a2));
3084}
3085
3086static const TCGOutOpBinary outop_divs = {
3087    .base.static_constraint = C_O1_I2(r, r, r),
3088    .out_rrr = tgen_divs,
3089};
3090
3091static const TCGOutOpDivRem outop_divs2 = {
3092    .base.static_constraint = C_NotImplemented,
3093};
3094
3095static void tgen_divu(TCGContext *s, TCGType type,
3096                      TCGReg a0, TCGReg a1, TCGReg a2)
3097{
3098    uint32_t insn = type == TCG_TYPE_I32 ? DIVWU : DIVDU;
3099    tcg_out32(s, insn | TAB(a0, a1, a2));
3100}
3101
3102static const TCGOutOpBinary outop_divu = {
3103    .base.static_constraint = C_O1_I2(r, r, r),
3104    .out_rrr = tgen_divu,
3105};
3106
3107static const TCGOutOpDivRem outop_divu2 = {
3108    .base.static_constraint = C_NotImplemented,
3109};
3110
3111static const TCGOutOpBinary outop_eqv = {
3112    .base.static_constraint = C_O1_I2(r, r, r),
3113    .out_rrr = tgen_eqv,
3114};
3115
3116static void tgen_mul(TCGContext *s, TCGType type,
3117                    TCGReg a0, TCGReg a1, TCGReg a2)
3118{
3119    uint32_t insn = type == TCG_TYPE_I32 ? MULLW : MULLD;
3120    tcg_out32(s, insn | TAB(a0, a1, a2));
3121}
3122
3123static void tgen_muli(TCGContext *s, TCGType type,
3124                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3125{
3126    tcg_out32(s, MULLI | TAI(a0, a1, a2));
3127}
3128
3129static const TCGOutOpBinary outop_mul = {
3130    .base.static_constraint = C_O1_I2(r, r, rI),
3131    .out_rrr = tgen_mul,
3132    .out_rri = tgen_muli,
3133};
3134
3135static const TCGOutOpMul2 outop_muls2 = {
3136    .base.static_constraint = C_NotImplemented,
3137};
3138
3139static void tgen_mulsh(TCGContext *s, TCGType type,
3140                       TCGReg a0, TCGReg a1, TCGReg a2)
3141{
3142    uint32_t insn = type == TCG_TYPE_I32 ? MULHW : MULHD;
3143    tcg_out32(s, insn | TAB(a0, a1, a2));
3144}
3145
3146static const TCGOutOpBinary outop_mulsh = {
3147    .base.static_constraint = C_O1_I2(r, r, r),
3148    .out_rrr = tgen_mulsh,
3149};
3150
3151static const TCGOutOpMul2 outop_mulu2 = {
3152    .base.static_constraint = C_NotImplemented,
3153};
3154
3155static void tgen_muluh(TCGContext *s, TCGType type,
3156                       TCGReg a0, TCGReg a1, TCGReg a2)
3157{
3158    uint32_t insn = type == TCG_TYPE_I32 ? MULHWU : MULHDU;
3159    tcg_out32(s, insn | TAB(a0, a1, a2));
3160}
3161
3162static const TCGOutOpBinary outop_muluh = {
3163    .base.static_constraint = C_O1_I2(r, r, r),
3164    .out_rrr = tgen_muluh,
3165};
3166
3167static void tgen_nand(TCGContext *s, TCGType type,
3168                     TCGReg a0, TCGReg a1, TCGReg a2)
3169{
3170    tcg_out32(s, NAND | SAB(a1, a0, a2));
3171}
3172
3173static const TCGOutOpBinary outop_nand = {
3174    .base.static_constraint = C_O1_I2(r, r, r),
3175    .out_rrr = tgen_nand,
3176};
3177
3178static void tgen_nor(TCGContext *s, TCGType type,
3179                     TCGReg a0, TCGReg a1, TCGReg a2)
3180{
3181    tcg_out32(s, NOR | SAB(a1, a0, a2));
3182}
3183
3184static const TCGOutOpBinary outop_nor = {
3185    .base.static_constraint = C_O1_I2(r, r, r),
3186    .out_rrr = tgen_nor,
3187};
3188
3189static void tgen_or(TCGContext *s, TCGType type,
3190                    TCGReg a0, TCGReg a1, TCGReg a2)
3191{
3192    tcg_out32(s, OR | SAB(a1, a0, a2));
3193}
3194
3195static void tgen_ori(TCGContext *s, TCGType type,
3196                     TCGReg a0, TCGReg a1, tcg_target_long a2)
3197{
3198    tcg_out_ori32(s, a0, a1, a2);
3199}
3200
3201static const TCGOutOpBinary outop_or = {
3202    .base.static_constraint = C_O1_I2(r, r, rU),
3203    .out_rrr = tgen_or,
3204    .out_rri = tgen_ori,
3205};
3206
3207static void tgen_orc(TCGContext *s, TCGType type,
3208                     TCGReg a0, TCGReg a1, TCGReg a2)
3209{
3210    tcg_out32(s, ORC | SAB(a1, a0, a2));
3211}
3212
3213static const TCGOutOpBinary outop_orc = {
3214    .base.static_constraint = C_O1_I2(r, r, r),
3215    .out_rrr = tgen_orc,
3216};
3217
3218static TCGConstraintSetIndex cset_mod(TCGType type, unsigned flags)
3219{
3220    return have_isa_3_00 ? C_O1_I2(r, r, r) : C_NotImplemented;
3221}
3222
3223static void tgen_rems(TCGContext *s, TCGType type,
3224                      TCGReg a0, TCGReg a1, TCGReg a2)
3225{
3226    uint32_t insn = type == TCG_TYPE_I32 ? MODSW : MODSD;
3227    tcg_out32(s, insn | TAB(a0, a1, a2));
3228}
3229
3230static const TCGOutOpBinary outop_rems = {
3231    .base.static_constraint = C_Dynamic,
3232    .base.dynamic_constraint = cset_mod,
3233    .out_rrr = tgen_rems,
3234};
3235
3236static void tgen_remu(TCGContext *s, TCGType type,
3237                      TCGReg a0, TCGReg a1, TCGReg a2)
3238{
3239    uint32_t insn = type == TCG_TYPE_I32 ? MODUW : MODUD;
3240    tcg_out32(s, insn | TAB(a0, a1, a2));
3241}
3242
3243static const TCGOutOpBinary outop_remu = {
3244    .base.static_constraint = C_Dynamic,
3245    .base.dynamic_constraint = cset_mod,
3246    .out_rrr = tgen_remu,
3247};
3248
3249static void tgen_rotl(TCGContext *s, TCGType type,
3250                     TCGReg a0, TCGReg a1, TCGReg a2)
3251{
3252    if (type == TCG_TYPE_I32) {
3253        tcg_out32(s, RLWNM | SAB(a1, a0, a2) | MB(0) | ME(31));
3254    } else {
3255        tcg_out32(s, RLDCL | SAB(a1, a0, a2) | MB64(0));
3256    }
3257}
3258
3259static void tgen_rotli(TCGContext *s, TCGType type,
3260                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3261{
3262    if (type == TCG_TYPE_I32) {
3263        tcg_out_rlw(s, RLWINM, a0, a1, a2, 0, 31);
3264    } else {
3265        tcg_out_rld(s, RLDICL, a0, a1, a2, 0);
3266    }
3267}
3268
3269static const TCGOutOpBinary outop_rotl = {
3270    .base.static_constraint = C_O1_I2(r, r, ri),
3271    .out_rrr = tgen_rotl,
3272    .out_rri = tgen_rotli,
3273};
3274
3275static const TCGOutOpBinary outop_rotr = {
3276    .base.static_constraint = C_NotImplemented,
3277};
3278
3279static void tgen_sar(TCGContext *s, TCGType type,
3280                     TCGReg a0, TCGReg a1, TCGReg a2)
3281{
3282    uint32_t insn = type == TCG_TYPE_I32 ? SRAW : SRAD;
3283    tcg_out32(s, insn | SAB(a1, a0, a2));
3284}
3285
3286static void tgen_sari(TCGContext *s, TCGType type,
3287                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3288{
3289    /* Limit immediate shift count lest we create an illegal insn.  */
3290    if (type == TCG_TYPE_I32) {
3291        tcg_out_sari32(s, a0, a1, a2 & 31);
3292    } else {
3293        tcg_out_sari64(s, a0, a1, a2 & 63);
3294    }
3295}
3296
3297static const TCGOutOpBinary outop_sar = {
3298    .base.static_constraint = C_O1_I2(r, r, ri),
3299    .out_rrr = tgen_sar,
3300    .out_rri = tgen_sari,
3301};
3302
3303static void tgen_shl(TCGContext *s, TCGType type,
3304                     TCGReg a0, TCGReg a1, TCGReg a2)
3305{
3306    uint32_t insn = type == TCG_TYPE_I32 ? SLW : SLD;
3307    tcg_out32(s, insn | SAB(a1, a0, a2));
3308}
3309
3310static void tgen_shli(TCGContext *s, TCGType type,
3311                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3312{
3313    /* Limit immediate shift count lest we create an illegal insn.  */
3314    if (type == TCG_TYPE_I32) {
3315        tcg_out_shli32(s, a0, a1, a2 & 31);
3316    } else {
3317        tcg_out_shli64(s, a0, a1, a2 & 63);
3318    }
3319}
3320
3321static const TCGOutOpBinary outop_shl = {
3322    .base.static_constraint = C_O1_I2(r, r, ri),
3323    .out_rrr = tgen_shl,
3324    .out_rri = tgen_shli,
3325};
3326
3327static void tgen_shr(TCGContext *s, TCGType type,
3328                     TCGReg a0, TCGReg a1, TCGReg a2)
3329{
3330    uint32_t insn = type == TCG_TYPE_I32 ? SRW : SRD;
3331    tcg_out32(s, insn | SAB(a1, a0, a2));
3332}
3333
3334static void tgen_shri(TCGContext *s, TCGType type,
3335                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3336{
3337    /* Limit immediate shift count lest we create an illegal insn.  */
3338    if (type == TCG_TYPE_I32) {
3339        tcg_out_shri32(s, a0, a1, a2 & 31);
3340    } else {
3341        tcg_out_shri64(s, a0, a1, a2 & 63);
3342    }
3343}
3344
3345static const TCGOutOpBinary outop_shr = {
3346    .base.static_constraint = C_O1_I2(r, r, ri),
3347    .out_rrr = tgen_shr,
3348    .out_rri = tgen_shri,
3349};
3350
3351static void tgen_sub(TCGContext *s, TCGType type,
3352                     TCGReg a0, TCGReg a1, TCGReg a2)
3353{
3354    tcg_out32(s, SUBF | TAB(a0, a2, a1));
3355}
3356
3357static void tgen_subfi(TCGContext *s, TCGType type,
3358                       TCGReg a0, tcg_target_long a1, TCGReg a2)
3359{
3360    tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
3361}
3362
3363static const TCGOutOpSubtract outop_sub = {
3364    .base.static_constraint = C_O1_I2(r, rI, r),
3365    .out_rrr = tgen_sub,
3366    .out_rir = tgen_subfi,
3367};
3368
3369static void tgen_xor(TCGContext *s, TCGType type,
3370                     TCGReg a0, TCGReg a1, TCGReg a2)
3371{
3372    tcg_out32(s, XOR | SAB(a1, a0, a2));
3373}
3374
3375static void tgen_xori(TCGContext *s, TCGType type,
3376                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3377{
3378    tcg_out_xori32(s, a0, a1, a2);
3379}
3380
3381static const TCGOutOpBinary outop_xor = {
3382    .base.static_constraint = C_O1_I2(r, r, rU),
3383    .out_rrr = tgen_xor,
3384    .out_rri = tgen_xori,
3385};
3386
3387static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
3388{
3389    tcg_out32(s, NEG | RT(a0) | RA(a1));
3390}
3391
3392static const TCGOutOpUnary outop_neg = {
3393    .base.static_constraint = C_O1_I1(r, r),
3394    .out_rr = tgen_neg,
3395};
3396
3397static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
3398{
3399    tgen_nor(s, type, a0, a1, a1);
3400}
3401
3402static const TCGOutOpUnary outop_not = {
3403    .base.static_constraint = C_O1_I1(r, r),
3404    .out_rr = tgen_not,
3405};
3406
3407
3408static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
3409                       const TCGArg args[TCG_MAX_OP_ARGS],
3410                       const int const_args[TCG_MAX_OP_ARGS])
3411{
3412    TCGArg a0, a1;
3413
3414    switch (opc) {
3415    case INDEX_op_goto_ptr:
3416        tcg_out32(s, MTSPR | RS(args[0]) | CTR);
3417        tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
3418        tcg_out32(s, BCCTR | BO_ALWAYS);
3419        break;
3420    case INDEX_op_br:
3421        {
3422            TCGLabel *l = arg_label(args[0]);
3423            uint32_t insn = B;
3424
3425            if (l->has_value) {
3426                insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr),
3427                                       l->u.value_ptr);
3428            } else {
3429                tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
3430            }
3431            tcg_out32(s, insn);
3432        }
3433        break;
3434    case INDEX_op_ld8u_i32:
3435    case INDEX_op_ld8u_i64:
3436        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
3437        break;
3438    case INDEX_op_ld8s_i32:
3439    case INDEX_op_ld8s_i64:
3440        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
3441        tcg_out_ext8s(s, TCG_TYPE_REG, args[0], args[0]);
3442        break;
3443    case INDEX_op_ld16u_i32:
3444    case INDEX_op_ld16u_i64:
3445        tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
3446        break;
3447    case INDEX_op_ld16s_i32:
3448    case INDEX_op_ld16s_i64:
3449        tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
3450        break;
3451    case INDEX_op_ld_i32:
3452    case INDEX_op_ld32u_i64:
3453        tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
3454        break;
3455    case INDEX_op_ld32s_i64:
3456        tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
3457        break;
3458    case INDEX_op_ld_i64:
3459        tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
3460        break;
3461    case INDEX_op_st8_i32:
3462    case INDEX_op_st8_i64:
3463        tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
3464        break;
3465    case INDEX_op_st16_i32:
3466    case INDEX_op_st16_i64:
3467        tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
3468        break;
3469    case INDEX_op_st_i32:
3470    case INDEX_op_st32_i64:
3471        tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
3472        break;
3473    case INDEX_op_st_i64:
3474        tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
3475        break;
3476
3477    case INDEX_op_brcond2_i32:
3478        tcg_out_brcond2(s, args, const_args);
3479        break;
3480
3481    case INDEX_op_qemu_ld_i32:
3482        tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I32);
3483        break;
3484    case INDEX_op_qemu_ld_i64:
3485        if (TCG_TARGET_REG_BITS == 64) {
3486            tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I64);
3487        } else {
3488            tcg_out_qemu_ld(s, args[0], args[1], args[2],
3489                            args[3], TCG_TYPE_I64);
3490        }
3491        break;
3492    case INDEX_op_qemu_ld_i128:
3493        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
3494        tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
3495        break;
3496
3497    case INDEX_op_qemu_st_i32:
3498        tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I32);
3499        break;
3500    case INDEX_op_qemu_st_i64:
3501        if (TCG_TARGET_REG_BITS == 64) {
3502            tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I64);
3503        } else {
3504            tcg_out_qemu_st(s, args[0], args[1], args[2],
3505                            args[3], TCG_TYPE_I64);
3506        }
3507        break;
3508    case INDEX_op_qemu_st_i128:
3509        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
3510        tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
3511        break;
3512
3513    case INDEX_op_setcond2_i32:
3514        tcg_out_setcond2(s, args, const_args);
3515        break;
3516
3517    case INDEX_op_bswap16_i32:
3518    case INDEX_op_bswap16_i64:
3519        tcg_out_bswap16(s, args[0], args[1], args[2]);
3520        break;
3521    case INDEX_op_bswap32_i32:
3522        tcg_out_bswap32(s, args[0], args[1], 0);
3523        break;
3524    case INDEX_op_bswap32_i64:
3525        tcg_out_bswap32(s, args[0], args[1], args[2]);
3526        break;
3527    case INDEX_op_bswap64_i64:
3528        tcg_out_bswap64(s, args[0], args[1]);
3529        break;
3530
3531    case INDEX_op_deposit_i32:
3532        if (const_args[2]) {
3533            uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
3534            tcg_out_andi32(s, args[0], args[0], ~mask);
3535        } else {
3536            tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
3537                        32 - args[3] - args[4], 31 - args[3]);
3538        }
3539        break;
3540    case INDEX_op_deposit_i64:
3541        if (const_args[2]) {
3542            uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
3543            tcg_out_andi64(s, args[0], args[0], ~mask);
3544        } else {
3545            tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
3546                        64 - args[3] - args[4]);
3547        }
3548        break;
3549
3550    case INDEX_op_extract_i32:
3551        if (args[2] == 0 && args[3] <= 16) {
3552            tcg_out32(s, ANDI | SAI(args[1], args[0], (1 << args[3]) - 1));
3553            break;
3554        }
3555        tcg_out_rlw(s, RLWINM, args[0], args[1],
3556                    32 - args[2], 32 - args[3], 31);
3557        break;
3558    case INDEX_op_extract_i64:
3559        if (args[2] == 0 && args[3] <= 16) {
3560            tcg_out32(s, ANDI | SAI(args[1], args[0], (1 << args[3]) - 1));
3561            break;
3562        }
3563        tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
3564        break;
3565
3566    case INDEX_op_sextract_i64:
3567        if (args[2] + args[3] == 32) {
3568            if (args[2] == 0) {
3569                tcg_out_ext32s(s, args[0], args[1]);
3570            } else {
3571                tcg_out_sari32(s, args[0], args[1], args[2]);
3572            }
3573            break;
3574        }
3575        /* FALLTHRU */
3576    case INDEX_op_sextract_i32:
3577        if (args[2] == 0 && args[3] == 8) {
3578            tcg_out_ext8s(s, TCG_TYPE_I32, args[0], args[1]);
3579        } else if (args[2] == 0 && args[3] == 16) {
3580            tcg_out_ext16s(s, TCG_TYPE_I32, args[0], args[1]);
3581        } else {
3582            g_assert_not_reached();
3583        }
3584        break;
3585
3586#if TCG_TARGET_REG_BITS == 64
3587    case INDEX_op_add2_i64:
3588#else
3589    case INDEX_op_add2_i32:
3590#endif
3591        /* Note that the CA bit is defined based on the word size of the
3592           environment.  So in 64-bit mode it's always carry-out of bit 63.
3593           The fallback code using deposit works just as well for 32-bit.  */
3594        a0 = args[0], a1 = args[1];
3595        if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
3596            a0 = TCG_REG_R0;
3597        }
3598        if (const_args[4]) {
3599            tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
3600        } else {
3601            tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
3602        }
3603        if (const_args[5]) {
3604            tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
3605        } else {
3606            tcg_out32(s, ADDE | TAB(a1, args[3], args[5]));
3607        }
3608        if (a0 != args[0]) {
3609            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3610        }
3611        break;
3612
3613#if TCG_TARGET_REG_BITS == 64
3614    case INDEX_op_sub2_i64:
3615#else
3616    case INDEX_op_sub2_i32:
3617#endif
3618        a0 = args[0], a1 = args[1];
3619        if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
3620            a0 = TCG_REG_R0;
3621        }
3622        if (const_args[2]) {
3623            tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
3624        } else {
3625            tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
3626        }
3627        if (const_args[3]) {
3628            tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
3629        } else {
3630            tcg_out32(s, SUBFE | TAB(a1, args[5], args[3]));
3631        }
3632        if (a0 != args[0]) {
3633            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3634        }
3635        break;
3636
3637    case INDEX_op_mb:
3638        tcg_out_mb(s, args[0]);
3639        break;
3640
3641    case INDEX_op_call:      /* Always emitted via tcg_out_call.  */
3642    case INDEX_op_exit_tb:   /* Always emitted via tcg_out_exit_tb.  */
3643    case INDEX_op_goto_tb:   /* Always emitted via tcg_out_goto_tb.  */
3644    case INDEX_op_ext_i32_i64:  /* Always emitted via tcg_reg_alloc_op.  */
3645    case INDEX_op_extu_i32_i64:
3646    case INDEX_op_extrl_i64_i32:
3647    default:
3648        g_assert_not_reached();
3649    }
3650}
3651
3652int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3653{
3654    switch (opc) {
3655    case INDEX_op_and_vec:
3656    case INDEX_op_or_vec:
3657    case INDEX_op_xor_vec:
3658    case INDEX_op_andc_vec:
3659    case INDEX_op_not_vec:
3660    case INDEX_op_nor_vec:
3661    case INDEX_op_eqv_vec:
3662    case INDEX_op_nand_vec:
3663        return 1;
3664    case INDEX_op_orc_vec:
3665        return have_isa_2_07;
3666    case INDEX_op_add_vec:
3667    case INDEX_op_sub_vec:
3668    case INDEX_op_smax_vec:
3669    case INDEX_op_smin_vec:
3670    case INDEX_op_umax_vec:
3671    case INDEX_op_umin_vec:
3672    case INDEX_op_shlv_vec:
3673    case INDEX_op_shrv_vec:
3674    case INDEX_op_sarv_vec:
3675    case INDEX_op_rotlv_vec:
3676        return vece <= MO_32 || have_isa_2_07;
3677    case INDEX_op_ssadd_vec:
3678    case INDEX_op_sssub_vec:
3679    case INDEX_op_usadd_vec:
3680    case INDEX_op_ussub_vec:
3681        return vece <= MO_32;
3682    case INDEX_op_shli_vec:
3683    case INDEX_op_shri_vec:
3684    case INDEX_op_sari_vec:
3685    case INDEX_op_rotli_vec:
3686        return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
3687    case INDEX_op_cmp_vec:
3688    case INDEX_op_cmpsel_vec:
3689        return vece <= MO_32 || have_isa_2_07 ? 1 : 0;
3690    case INDEX_op_neg_vec:
3691        return vece >= MO_32 && have_isa_3_00;
3692    case INDEX_op_mul_vec:
3693        switch (vece) {
3694        case MO_8:
3695        case MO_16:
3696            return -1;
3697        case MO_32:
3698            return have_isa_2_07 ? 1 : -1;
3699        case MO_64:
3700            return have_isa_3_10;
3701        }
3702        return 0;
3703    case INDEX_op_bitsel_vec:
3704        return have_vsx;
3705    case INDEX_op_rotrv_vec:
3706        return -1;
3707    default:
3708        return 0;
3709    }
3710}
3711
3712static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
3713                            TCGReg dst, TCGReg src)
3714{
3715    tcg_debug_assert(dst >= TCG_REG_V0);
3716
3717    /* Splat from integer reg allowed via constraints for v3.00.  */
3718    if (src < TCG_REG_V0) {
3719        tcg_debug_assert(have_isa_3_00);
3720        switch (vece) {
3721        case MO_64:
3722            tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
3723            return true;
3724        case MO_32:
3725            tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
3726            return true;
3727        default:
3728            /* Fail, so that we fall back on either dupm or mov+dup.  */
3729            return false;
3730        }
3731    }
3732
3733    /*
3734     * Recall we use (or emulate) VSX integer loads, so the integer is
3735     * right justified within the left (zero-index) double-word.
3736     */
3737    switch (vece) {
3738    case MO_8:
3739        tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
3740        break;
3741    case MO_16:
3742        tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
3743        break;
3744    case MO_32:
3745        tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
3746        break;
3747    case MO_64:
3748        if (have_vsx) {
3749            tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
3750            break;
3751        }
3752        tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
3753        tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
3754        break;
3755    default:
3756        g_assert_not_reached();
3757    }
3758    return true;
3759}
3760
3761static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
3762                             TCGReg out, TCGReg base, intptr_t offset)
3763{
3764    int elt;
3765
3766    tcg_debug_assert(out >= TCG_REG_V0);
3767    switch (vece) {
3768    case MO_8:
3769        if (have_isa_3_00) {
3770            tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
3771        } else {
3772            tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
3773        }
3774        elt = extract32(offset, 0, 4);
3775#if !HOST_BIG_ENDIAN
3776        elt ^= 15;
3777#endif
3778        tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
3779        break;
3780    case MO_16:
3781        tcg_debug_assert((offset & 1) == 0);
3782        if (have_isa_3_00) {
3783            tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
3784        } else {
3785            tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
3786        }
3787        elt = extract32(offset, 1, 3);
3788#if !HOST_BIG_ENDIAN
3789        elt ^= 7;
3790#endif
3791        tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
3792        break;
3793    case MO_32:
3794        if (have_isa_3_00) {
3795            tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
3796            break;
3797        }
3798        tcg_debug_assert((offset & 3) == 0);
3799        tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
3800        elt = extract32(offset, 2, 2);
3801#if !HOST_BIG_ENDIAN
3802        elt ^= 3;
3803#endif
3804        tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
3805        break;
3806    case MO_64:
3807        if (have_vsx) {
3808            tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
3809            break;
3810        }
3811        tcg_debug_assert((offset & 7) == 0);
3812        tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
3813        tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
3814        elt = extract32(offset, 3, 1);
3815#if !HOST_BIG_ENDIAN
3816        elt = !elt;
3817#endif
3818        if (elt) {
3819            tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
3820        } else {
3821            tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
3822        }
3823        break;
3824    default:
3825        g_assert_not_reached();
3826    }
3827    return true;
3828}
3829
3830static void tcg_out_not_vec(TCGContext *s, TCGReg a0, TCGReg a1)
3831{
3832    tcg_out32(s, VNOR | VRT(a0) | VRA(a1) | VRB(a1));
3833}
3834
3835static void tcg_out_or_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
3836{
3837    tcg_out32(s, VOR | VRT(a0) | VRA(a1) | VRB(a2));
3838}
3839
3840static void tcg_out_orc_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
3841{
3842    tcg_out32(s, VORC | VRT(a0) | VRA(a1) | VRB(a2));
3843}
3844
3845static void tcg_out_and_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
3846{
3847    tcg_out32(s, VAND | VRT(a0) | VRA(a1) | VRB(a2));
3848}
3849
3850static void tcg_out_andc_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
3851{
3852    tcg_out32(s, VANDC | VRT(a0) | VRA(a1) | VRB(a2));
3853}
3854
3855static void tcg_out_bitsel_vec(TCGContext *s, TCGReg d,
3856                               TCGReg c, TCGReg t, TCGReg f)
3857{
3858    if (TCG_TARGET_HAS_bitsel_vec) {
3859        tcg_out32(s, XXSEL | VRT(d) | VRC(c) | VRB(t) | VRA(f));
3860    } else {
3861        tcg_out_and_vec(s, TCG_VEC_TMP2, t, c);
3862        tcg_out_andc_vec(s, d, f, c);
3863        tcg_out_or_vec(s, d, d, TCG_VEC_TMP2);
3864    }
3865}
3866
3867static bool tcg_out_cmp_vec_noinv(TCGContext *s, unsigned vece, TCGReg a0,
3868                                  TCGReg a1, TCGReg a2, TCGCond cond)
3869{
3870    static const uint32_t
3871        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
3872        ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
3873        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
3874        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD };
3875    uint32_t insn;
3876
3877    bool need_swap = false, need_inv = false;
3878
3879    tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
3880
3881    switch (cond) {
3882    case TCG_COND_EQ:
3883    case TCG_COND_GT:
3884    case TCG_COND_GTU:
3885        break;
3886    case TCG_COND_NE:
3887        if (have_isa_3_00 && vece <= MO_32) {
3888            break;
3889        }
3890        /* fall through */
3891    case TCG_COND_LE:
3892    case TCG_COND_LEU:
3893        need_inv = true;
3894        break;
3895    case TCG_COND_LT:
3896    case TCG_COND_LTU:
3897        need_swap = true;
3898        break;
3899    case TCG_COND_GE:
3900    case TCG_COND_GEU:
3901        need_swap = need_inv = true;
3902        break;
3903    default:
3904        g_assert_not_reached();
3905    }
3906
3907    if (need_inv) {
3908        cond = tcg_invert_cond(cond);
3909    }
3910    if (need_swap) {
3911        TCGReg swap = a1;
3912        a1 = a2;
3913        a2 = swap;
3914        cond = tcg_swap_cond(cond);
3915    }
3916
3917    switch (cond) {
3918    case TCG_COND_EQ:
3919        insn = eq_op[vece];
3920        break;
3921    case TCG_COND_NE:
3922        insn = ne_op[vece];
3923        break;
3924    case TCG_COND_GT:
3925        insn = gts_op[vece];
3926        break;
3927    case TCG_COND_GTU:
3928        insn = gtu_op[vece];
3929        break;
3930    default:
3931        g_assert_not_reached();
3932    }
3933    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
3934
3935    return need_inv;
3936}
3937
3938static void tcg_out_cmp_vec(TCGContext *s, unsigned vece, TCGReg a0,
3939                            TCGReg a1, TCGReg a2, TCGCond cond)
3940{
3941    if (tcg_out_cmp_vec_noinv(s, vece, a0, a1, a2, cond)) {
3942        tcg_out_not_vec(s, a0, a0);
3943    }
3944}
3945
3946static void tcg_out_cmpsel_vec(TCGContext *s, unsigned vece, TCGReg a0,
3947                               TCGReg c1, TCGReg c2, TCGArg v3, int const_v3,
3948                               TCGReg v4, TCGCond cond)
3949{
3950    bool inv = tcg_out_cmp_vec_noinv(s, vece, TCG_VEC_TMP1, c1, c2, cond);
3951
3952    if (!const_v3) {
3953        if (inv) {
3954            tcg_out_bitsel_vec(s, a0, TCG_VEC_TMP1, v4, v3);
3955        } else {
3956            tcg_out_bitsel_vec(s, a0, TCG_VEC_TMP1, v3, v4);
3957        }
3958    } else if (v3) {
3959        if (inv) {
3960            tcg_out_orc_vec(s, a0, v4, TCG_VEC_TMP1);
3961        } else {
3962            tcg_out_or_vec(s, a0, v4, TCG_VEC_TMP1);
3963        }
3964    } else {
3965        if (inv) {
3966            tcg_out_and_vec(s, a0, v4, TCG_VEC_TMP1);
3967        } else {
3968            tcg_out_andc_vec(s, a0, v4, TCG_VEC_TMP1);
3969        }
3970    }
3971}
3972
3973static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
3974                           unsigned vecl, unsigned vece,
3975                           const TCGArg args[TCG_MAX_OP_ARGS],
3976                           const int const_args[TCG_MAX_OP_ARGS])
3977{
3978    static const uint32_t
3979        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
3980        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
3981        mul_op[4] = { 0, 0, VMULUWM, VMULLD },
3982        neg_op[4] = { 0, 0, VNEGW, VNEGD },
3983        ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
3984        usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
3985        sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
3986        ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
3987        umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
3988        smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
3989        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
3990        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
3991        shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
3992        shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
3993        sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
3994        mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
3995        mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
3996        muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
3997        mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
3998        pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
3999        rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
4000
4001    TCGType type = vecl + TCG_TYPE_V64;
4002    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
4003    uint32_t insn;
4004
4005    switch (opc) {
4006    case INDEX_op_ld_vec:
4007        tcg_out_ld(s, type, a0, a1, a2);
4008        return;
4009    case INDEX_op_st_vec:
4010        tcg_out_st(s, type, a0, a1, a2);
4011        return;
4012    case INDEX_op_dupm_vec:
4013        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
4014        return;
4015
4016    case INDEX_op_add_vec:
4017        insn = add_op[vece];
4018        break;
4019    case INDEX_op_sub_vec:
4020        insn = sub_op[vece];
4021        break;
4022    case INDEX_op_neg_vec:
4023        insn = neg_op[vece];
4024        a2 = a1;
4025        a1 = 0;
4026        break;
4027    case INDEX_op_mul_vec:
4028        insn = mul_op[vece];
4029        break;
4030    case INDEX_op_ssadd_vec:
4031        insn = ssadd_op[vece];
4032        break;
4033    case INDEX_op_sssub_vec:
4034        insn = sssub_op[vece];
4035        break;
4036    case INDEX_op_usadd_vec:
4037        insn = usadd_op[vece];
4038        break;
4039    case INDEX_op_ussub_vec:
4040        insn = ussub_op[vece];
4041        break;
4042    case INDEX_op_smin_vec:
4043        insn = smin_op[vece];
4044        break;
4045    case INDEX_op_umin_vec:
4046        insn = umin_op[vece];
4047        break;
4048    case INDEX_op_smax_vec:
4049        insn = smax_op[vece];
4050        break;
4051    case INDEX_op_umax_vec:
4052        insn = umax_op[vece];
4053        break;
4054    case INDEX_op_shlv_vec:
4055        insn = shlv_op[vece];
4056        break;
4057    case INDEX_op_shrv_vec:
4058        insn = shrv_op[vece];
4059        break;
4060    case INDEX_op_sarv_vec:
4061        insn = sarv_op[vece];
4062        break;
4063    case INDEX_op_and_vec:
4064        tcg_out_and_vec(s, a0, a1, a2);
4065        return;
4066    case INDEX_op_or_vec:
4067        tcg_out_or_vec(s, a0, a1, a2);
4068        return;
4069    case INDEX_op_xor_vec:
4070        insn = VXOR;
4071        break;
4072    case INDEX_op_andc_vec:
4073        tcg_out_andc_vec(s, a0, a1, a2);
4074        return;
4075    case INDEX_op_not_vec:
4076        tcg_out_not_vec(s, a0, a1);
4077        return;
4078    case INDEX_op_orc_vec:
4079        tcg_out_orc_vec(s, a0, a1, a2);
4080        return;
4081    case INDEX_op_nand_vec:
4082        insn = VNAND;
4083        break;
4084    case INDEX_op_nor_vec:
4085        insn = VNOR;
4086        break;
4087    case INDEX_op_eqv_vec:
4088        insn = VEQV;
4089        break;
4090
4091    case INDEX_op_cmp_vec:
4092        tcg_out_cmp_vec(s, vece, a0, a1, a2, args[3]);
4093        return;
4094    case INDEX_op_cmpsel_vec:
4095        tcg_out_cmpsel_vec(s, vece, a0, a1, a2,
4096                           args[3], const_args[3], args[4], args[5]);
4097        return;
4098    case INDEX_op_bitsel_vec:
4099        tcg_out_bitsel_vec(s, a0, a1, a2, args[3]);
4100        return;
4101
4102    case INDEX_op_dup2_vec:
4103        assert(TCG_TARGET_REG_BITS == 32);
4104        /* With inputs a1 = xLxx, a2 = xHxx  */
4105        tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1));  /* a0  = xxHL */
4106        tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8);          /* tmp = HLxx */
4107        tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8);          /* a0  = HLHL */
4108        return;
4109
4110    case INDEX_op_ppc_mrgh_vec:
4111        insn = mrgh_op[vece];
4112        break;
4113    case INDEX_op_ppc_mrgl_vec:
4114        insn = mrgl_op[vece];
4115        break;
4116    case INDEX_op_ppc_muleu_vec:
4117        insn = muleu_op[vece];
4118        break;
4119    case INDEX_op_ppc_mulou_vec:
4120        insn = mulou_op[vece];
4121        break;
4122    case INDEX_op_ppc_pkum_vec:
4123        insn = pkum_op[vece];
4124        break;
4125    case INDEX_op_rotlv_vec:
4126        insn = rotl_op[vece];
4127        break;
4128    case INDEX_op_ppc_msum_vec:
4129        tcg_debug_assert(vece == MO_16);
4130        tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
4131        return;
4132
4133    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
4134    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
4135    default:
4136        g_assert_not_reached();
4137    }
4138
4139    tcg_debug_assert(insn != 0);
4140    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
4141}
4142
4143static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
4144                           TCGv_vec v1, TCGArg imm, TCGOpcode opci)
4145{
4146    TCGv_vec t1;
4147
4148    if (vece == MO_32) {
4149        /*
4150         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
4151         * So using negative numbers gets us the 4th bit easily.
4152         */
4153        imm = sextract32(imm, 0, 5);
4154    } else {
4155        imm &= (8 << vece) - 1;
4156    }
4157
4158    /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */
4159    t1 = tcg_constant_vec(type, MO_8, imm);
4160    vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
4161              tcgv_vec_arg(v1), tcgv_vec_arg(t1));
4162}
4163
4164static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
4165                           TCGv_vec v1, TCGv_vec v2)
4166{
4167    TCGv_vec t1 = tcg_temp_new_vec(type);
4168    TCGv_vec t2 = tcg_temp_new_vec(type);
4169    TCGv_vec c0, c16;
4170
4171    switch (vece) {
4172    case MO_8:
4173    case MO_16:
4174        vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
4175                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
4176        vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
4177                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
4178        vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
4179                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
4180        vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
4181                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
4182        vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
4183                  tcgv_vec_arg(v0), tcgv_vec_arg(t1));
4184        break;
4185
4186    case MO_32:
4187        tcg_debug_assert(!have_isa_2_07);
4188        /*
4189         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
4190         * So using -16 is a quick way to represent 16.
4191         */
4192        c16 = tcg_constant_vec(type, MO_8, -16);
4193        c0 = tcg_constant_vec(type, MO_8, 0);
4194
4195        vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
4196                  tcgv_vec_arg(v2), tcgv_vec_arg(c16));
4197        vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
4198                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
4199        vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1),
4200                  tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0));
4201        vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1),
4202                  tcgv_vec_arg(t1), tcgv_vec_arg(c16));
4203        tcg_gen_add_vec(MO_32, v0, t1, t2);
4204        break;
4205
4206    default:
4207        g_assert_not_reached();
4208    }
4209    tcg_temp_free_vec(t1);
4210    tcg_temp_free_vec(t2);
4211}
4212
4213void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
4214                       TCGArg a0, ...)
4215{
4216    va_list va;
4217    TCGv_vec v0, v1, v2, t0;
4218    TCGArg a2;
4219
4220    va_start(va, a0);
4221    v0 = temp_tcgv_vec(arg_temp(a0));
4222    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
4223    a2 = va_arg(va, TCGArg);
4224
4225    switch (opc) {
4226    case INDEX_op_shli_vec:
4227        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
4228        break;
4229    case INDEX_op_shri_vec:
4230        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
4231        break;
4232    case INDEX_op_sari_vec:
4233        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
4234        break;
4235    case INDEX_op_rotli_vec:
4236        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
4237        break;
4238    case INDEX_op_mul_vec:
4239        v2 = temp_tcgv_vec(arg_temp(a2));
4240        expand_vec_mul(type, vece, v0, v1, v2);
4241        break;
4242    case INDEX_op_rotlv_vec:
4243        v2 = temp_tcgv_vec(arg_temp(a2));
4244        t0 = tcg_temp_new_vec(type);
4245        tcg_gen_neg_vec(vece, t0, v2);
4246        tcg_gen_rotlv_vec(vece, v0, v1, t0);
4247        tcg_temp_free_vec(t0);
4248        break;
4249    default:
4250        g_assert_not_reached();
4251    }
4252    va_end(va);
4253}
4254
4255static TCGConstraintSetIndex
4256tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
4257{
4258    switch (op) {
4259    case INDEX_op_goto_ptr:
4260        return C_O0_I1(r);
4261
4262    case INDEX_op_ld8u_i32:
4263    case INDEX_op_ld8s_i32:
4264    case INDEX_op_ld16u_i32:
4265    case INDEX_op_ld16s_i32:
4266    case INDEX_op_ld_i32:
4267    case INDEX_op_bswap16_i32:
4268    case INDEX_op_bswap32_i32:
4269    case INDEX_op_extract_i32:
4270    case INDEX_op_sextract_i32:
4271    case INDEX_op_ld8u_i64:
4272    case INDEX_op_ld8s_i64:
4273    case INDEX_op_ld16u_i64:
4274    case INDEX_op_ld16s_i64:
4275    case INDEX_op_ld32u_i64:
4276    case INDEX_op_ld32s_i64:
4277    case INDEX_op_ld_i64:
4278    case INDEX_op_ext_i32_i64:
4279    case INDEX_op_extu_i32_i64:
4280    case INDEX_op_bswap16_i64:
4281    case INDEX_op_bswap32_i64:
4282    case INDEX_op_bswap64_i64:
4283    case INDEX_op_extract_i64:
4284    case INDEX_op_sextract_i64:
4285        return C_O1_I1(r, r);
4286
4287    case INDEX_op_st8_i32:
4288    case INDEX_op_st16_i32:
4289    case INDEX_op_st_i32:
4290    case INDEX_op_st8_i64:
4291    case INDEX_op_st16_i64:
4292    case INDEX_op_st32_i64:
4293    case INDEX_op_st_i64:
4294        return C_O0_I2(r, r);
4295
4296    case INDEX_op_deposit_i32:
4297    case INDEX_op_deposit_i64:
4298        return C_O1_I2(r, 0, rZ);
4299    case INDEX_op_brcond2_i32:
4300        return C_O0_I4(r, r, ri, ri);
4301    case INDEX_op_setcond2_i32:
4302        return C_O1_I4(r, r, r, ri, ri);
4303    case INDEX_op_add2_i64:
4304    case INDEX_op_add2_i32:
4305        return C_O2_I4(r, r, r, r, rI, rZM);
4306    case INDEX_op_sub2_i64:
4307    case INDEX_op_sub2_i32:
4308        return C_O2_I4(r, r, rI, rZM, r, r);
4309
4310    case INDEX_op_qemu_ld_i32:
4311        return C_O1_I1(r, r);
4312    case INDEX_op_qemu_ld_i64:
4313        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
4314
4315    case INDEX_op_qemu_st_i32:
4316        return C_O0_I2(r, r);
4317    case INDEX_op_qemu_st_i64:
4318        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
4319
4320    case INDEX_op_qemu_ld_i128:
4321        return C_N1O1_I1(o, m, r);
4322    case INDEX_op_qemu_st_i128:
4323        return C_O0_I3(o, m, r);
4324
4325    case INDEX_op_add_vec:
4326    case INDEX_op_sub_vec:
4327    case INDEX_op_mul_vec:
4328    case INDEX_op_and_vec:
4329    case INDEX_op_or_vec:
4330    case INDEX_op_xor_vec:
4331    case INDEX_op_andc_vec:
4332    case INDEX_op_orc_vec:
4333    case INDEX_op_nor_vec:
4334    case INDEX_op_eqv_vec:
4335    case INDEX_op_nand_vec:
4336    case INDEX_op_cmp_vec:
4337    case INDEX_op_ssadd_vec:
4338    case INDEX_op_sssub_vec:
4339    case INDEX_op_usadd_vec:
4340    case INDEX_op_ussub_vec:
4341    case INDEX_op_smax_vec:
4342    case INDEX_op_smin_vec:
4343    case INDEX_op_umax_vec:
4344    case INDEX_op_umin_vec:
4345    case INDEX_op_shlv_vec:
4346    case INDEX_op_shrv_vec:
4347    case INDEX_op_sarv_vec:
4348    case INDEX_op_rotlv_vec:
4349    case INDEX_op_rotrv_vec:
4350    case INDEX_op_ppc_mrgh_vec:
4351    case INDEX_op_ppc_mrgl_vec:
4352    case INDEX_op_ppc_muleu_vec:
4353    case INDEX_op_ppc_mulou_vec:
4354    case INDEX_op_ppc_pkum_vec:
4355    case INDEX_op_dup2_vec:
4356        return C_O1_I2(v, v, v);
4357
4358    case INDEX_op_not_vec:
4359    case INDEX_op_neg_vec:
4360        return C_O1_I1(v, v);
4361
4362    case INDEX_op_dup_vec:
4363        return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v);
4364
4365    case INDEX_op_ld_vec:
4366    case INDEX_op_dupm_vec:
4367        return C_O1_I1(v, r);
4368
4369    case INDEX_op_st_vec:
4370        return C_O0_I2(v, r);
4371
4372    case INDEX_op_bitsel_vec:
4373    case INDEX_op_ppc_msum_vec:
4374        return C_O1_I3(v, v, v, v);
4375    case INDEX_op_cmpsel_vec:
4376        return C_O1_I4(v, v, v, vZM, v);
4377
4378    default:
4379        return C_NotImplemented;
4380    }
4381}
4382
4383static void tcg_target_init(TCGContext *s)
4384{
4385    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
4386    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
4387    if (have_altivec) {
4388        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
4389        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
4390    }
4391
4392    tcg_target_call_clobber_regs = 0;
4393    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
4394    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
4395    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
4396    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
4397    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
4398    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
4399    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7);
4400    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
4401    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
4402    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
4403    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
4404    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
4405
4406    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
4407    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
4408    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
4409    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
4410    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
4411    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
4412    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
4413    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
4414    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
4415    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
4416    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
4417    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
4418    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
4419    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
4420    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
4421    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
4422    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
4423    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
4424    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
4425    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
4426
4427    s->reserved_regs = 0;
4428    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
4429    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
4430#if defined(_CALL_SYSV)
4431    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
4432#endif
4433#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
4434    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
4435#endif
4436    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
4437    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
4438    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
4439    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
4440    if (USE_REG_TB) {
4441        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);  /* tb->tc_ptr */
4442    }
4443}
4444
4445#ifdef __ELF__
4446typedef struct {
4447    DebugFrameCIE cie;
4448    DebugFrameFDEHeader fde;
4449    uint8_t fde_def_cfa[4];
4450    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
4451} DebugFrame;
4452
4453/* We're expecting a 2 byte uleb128 encoded value.  */
4454QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
4455
4456#if TCG_TARGET_REG_BITS == 64
4457# define ELF_HOST_MACHINE EM_PPC64
4458#else
4459# define ELF_HOST_MACHINE EM_PPC
4460#endif
4461
4462static DebugFrame debug_frame = {
4463    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
4464    .cie.id = -1,
4465    .cie.version = 1,
4466    .cie.code_align = 1,
4467    .cie.data_align = (-SZR & 0x7f),         /* sleb128 -SZR */
4468    .cie.return_column = 65,
4469
4470    /* Total FDE size does not include the "len" member.  */
4471    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
4472
4473    .fde_def_cfa = {
4474        12, TCG_REG_R1,                 /* DW_CFA_def_cfa r1, ... */
4475        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
4476        (FRAME_SIZE >> 7)
4477    },
4478    .fde_reg_ofs = {
4479        /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
4480        0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
4481    }
4482};
4483
4484void tcg_register_jit(const void *buf, size_t buf_size)
4485{
4486    uint8_t *p = &debug_frame.fde_reg_ofs[3];
4487    int i;
4488
4489    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
4490        p[0] = 0x80 + tcg_target_callee_save_regs[i];
4491        p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
4492    }
4493
4494    debug_frame.fde.func_start = (uintptr_t)buf;
4495    debug_frame.fde.func_len = buf_size;
4496
4497    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
4498}
4499#endif /* __ELF__ */
4500#undef VMULEUB
4501#undef VMULEUH
4502#undef VMULEUW
4503#undef VMULOUB
4504#undef VMULOUH
4505#undef VMULOUW
4506#undef VMSUMUHM
4507