xref: /openbmc/qemu/tcg/ppc/tcg-target.c.inc (revision f2b1708e8080ab1beb0a2bf52a79a51e8de335cb)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "elf.h"
26
27/*
28 * Standardize on the _CALL_FOO symbols used by GCC:
29 * Apple XCode does not define _CALL_DARWIN.
30 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV or _CALL_AIX.
31 */
32#if TCG_TARGET_REG_BITS == 64
33# ifdef _CALL_AIX
34    /* ok */
35# elif defined(_CALL_ELF) && _CALL_ELF == 1
36#  define _CALL_AIX
37# elif defined(_CALL_ELF) && _CALL_ELF == 2
38    /* ok */
39# else
40#  error "Unknown ABI"
41# endif
42#else
43# if defined(_CALL_SYSV) || defined(_CALL_DARWIN)
44    /* ok */
45# elif defined(__APPLE__)
46#  define _CALL_DARWIN
47# elif defined(__ELF__)
48#  define _CALL_SYSV
49# else
50#  error "Unknown ABI"
51# endif
52#endif
53
54#if TCG_TARGET_REG_BITS == 64
55# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_EXTEND
56# define TCG_TARGET_CALL_RET_I128  TCG_CALL_RET_NORMAL
57#else
58# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_NORMAL
59# define TCG_TARGET_CALL_RET_I128  TCG_CALL_RET_BY_REF
60#endif
61#ifdef _CALL_SYSV
62# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_EVEN
63# define TCG_TARGET_CALL_ARG_I128  TCG_CALL_ARG_BY_REF
64#else
65# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_NORMAL
66# define TCG_TARGET_CALL_ARG_I128  TCG_CALL_ARG_NORMAL
67#endif
68
69/* For some memory operations, we need a scratch that isn't R0.  For the AIX
70   calling convention, we can re-use the TOC register since we'll be reloading
71   it at every call.  Otherwise R12 will do nicely as neither a call-saved
72   register nor a parameter register.  */
73#ifdef _CALL_AIX
74# define TCG_REG_TMP1   TCG_REG_R2
75#else
76# define TCG_REG_TMP1   TCG_REG_R12
77#endif
78#define TCG_REG_TMP2    TCG_REG_R11
79
80#define TCG_VEC_TMP1    TCG_REG_V0
81#define TCG_VEC_TMP2    TCG_REG_V1
82
83#define TCG_REG_TB     TCG_REG_R31
84#define USE_REG_TB     (TCG_TARGET_REG_BITS == 64 && !have_isa_3_00)
85
86/* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */
87#define SZP  ((int)sizeof(void *))
88
89/* Shorthand for size of a register.  */
90#define SZR  (TCG_TARGET_REG_BITS / 8)
91
92#define TCG_CT_CONST_S16     0x00100
93#define TCG_CT_CONST_U16     0x00200
94#define TCG_CT_CONST_N16     0x00400
95#define TCG_CT_CONST_S32     0x00800
96#define TCG_CT_CONST_U32     0x01000
97#define TCG_CT_CONST_ZERO    0x02000
98#define TCG_CT_CONST_MONE    0x04000
99#define TCG_CT_CONST_WSZ     0x08000
100#define TCG_CT_CONST_CMP     0x10000
101
102#define ALL_GENERAL_REGS  0xffffffffu
103#define ALL_VECTOR_REGS   0xffffffff00000000ull
104
105#ifndef R_PPC64_PCREL34
106#define R_PPC64_PCREL34  132
107#endif
108
109#define have_isel  (cpuinfo & CPUINFO_ISEL)
110
111#define TCG_GUEST_BASE_REG  TCG_REG_R30
112
113#ifdef CONFIG_DEBUG_TCG
114static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
115    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
116    "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
117    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
118    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
119    "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
120    "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
121    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
122    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
123};
124#endif
125
126static const int tcg_target_reg_alloc_order[] = {
127    TCG_REG_R14,  /* call saved registers */
128    TCG_REG_R15,
129    TCG_REG_R16,
130    TCG_REG_R17,
131    TCG_REG_R18,
132    TCG_REG_R19,
133    TCG_REG_R20,
134    TCG_REG_R21,
135    TCG_REG_R22,
136    TCG_REG_R23,
137    TCG_REG_R24,
138    TCG_REG_R25,
139    TCG_REG_R26,
140    TCG_REG_R27,
141    TCG_REG_R28,
142    TCG_REG_R29,
143    TCG_REG_R30,
144    TCG_REG_R31,
145    TCG_REG_R12,  /* call clobbered, non-arguments */
146    TCG_REG_R11,
147    TCG_REG_R2,
148    TCG_REG_R13,
149    TCG_REG_R10,  /* call clobbered, arguments */
150    TCG_REG_R9,
151    TCG_REG_R8,
152    TCG_REG_R7,
153    TCG_REG_R6,
154    TCG_REG_R5,
155    TCG_REG_R4,
156    TCG_REG_R3,
157
158    /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
159    TCG_REG_V2,   /* call clobbered, vectors */
160    TCG_REG_V3,
161    TCG_REG_V4,
162    TCG_REG_V5,
163    TCG_REG_V6,
164    TCG_REG_V7,
165    TCG_REG_V8,
166    TCG_REG_V9,
167    TCG_REG_V10,
168    TCG_REG_V11,
169    TCG_REG_V12,
170    TCG_REG_V13,
171    TCG_REG_V14,
172    TCG_REG_V15,
173    TCG_REG_V16,
174    TCG_REG_V17,
175    TCG_REG_V18,
176    TCG_REG_V19,
177};
178
179static const int tcg_target_call_iarg_regs[] = {
180    TCG_REG_R3,
181    TCG_REG_R4,
182    TCG_REG_R5,
183    TCG_REG_R6,
184    TCG_REG_R7,
185    TCG_REG_R8,
186    TCG_REG_R9,
187    TCG_REG_R10
188};
189
190static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
191{
192    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
193    tcg_debug_assert(slot >= 0 && slot <= 1);
194    return TCG_REG_R3 + slot;
195}
196
197static const int tcg_target_callee_save_regs[] = {
198#ifdef _CALL_DARWIN
199    TCG_REG_R11,
200#endif
201    TCG_REG_R14,
202    TCG_REG_R15,
203    TCG_REG_R16,
204    TCG_REG_R17,
205    TCG_REG_R18,
206    TCG_REG_R19,
207    TCG_REG_R20,
208    TCG_REG_R21,
209    TCG_REG_R22,
210    TCG_REG_R23,
211    TCG_REG_R24,
212    TCG_REG_R25,
213    TCG_REG_R26,
214    TCG_REG_R27, /* currently used for the global env */
215    TCG_REG_R28,
216    TCG_REG_R29,
217    TCG_REG_R30,
218    TCG_REG_R31
219};
220
221/* For PPC, we use TB+4 instead of TB as the base. */
222static inline ptrdiff_t ppc_tbrel_diff(TCGContext *s, const void *target)
223{
224    return tcg_tbrel_diff(s, target) - 4;
225}
226
227static inline bool in_range_b(tcg_target_long target)
228{
229    return target == sextract64(target, 0, 26);
230}
231
232static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
233                               const tcg_insn_unit *target)
234{
235    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
236    tcg_debug_assert(in_range_b(disp));
237    return disp & 0x3fffffc;
238}
239
240static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
241{
242    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
243    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
244
245    if (in_range_b(disp)) {
246        *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc);
247        return true;
248    }
249    return false;
250}
251
252static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
253                               const tcg_insn_unit *target)
254{
255    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
256    tcg_debug_assert(disp == (int16_t) disp);
257    return disp & 0xfffc;
258}
259
260static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
261{
262    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
263    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
264
265    if (disp == (int16_t) disp) {
266        *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc);
267        return true;
268    }
269    return false;
270}
271
272static bool reloc_pc34(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
273{
274    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
275    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
276
277    if (disp == sextract64(disp, 0, 34)) {
278        src_rw[0] = (src_rw[0] & ~0x3ffff) | ((disp >> 16) & 0x3ffff);
279        src_rw[1] = (src_rw[1] & ~0xffff) | (disp & 0xffff);
280        return true;
281    }
282    return false;
283}
284
285static bool mask_operand(uint32_t c, int *mb, int *me);
286static bool mask64_operand(uint64_t c, int *mb, int *me);
287
288/* test if a constant matches the constraint */
289static bool tcg_target_const_match(int64_t sval, int ct,
290                                   TCGType type, TCGCond cond, int vece)
291{
292    uint64_t uval = sval;
293    int mb, me;
294
295    if (ct & TCG_CT_CONST) {
296        return 1;
297    }
298
299    if (type == TCG_TYPE_I32) {
300        uval = (uint32_t)sval;
301        sval = (int32_t)sval;
302    }
303
304    if (ct & TCG_CT_CONST_CMP) {
305        switch (cond) {
306        case TCG_COND_EQ:
307        case TCG_COND_NE:
308            ct |= TCG_CT_CONST_S16 | TCG_CT_CONST_U16;
309            break;
310        case TCG_COND_LT:
311        case TCG_COND_GE:
312        case TCG_COND_LE:
313        case TCG_COND_GT:
314            ct |= TCG_CT_CONST_S16;
315            break;
316        case TCG_COND_LTU:
317        case TCG_COND_GEU:
318        case TCG_COND_LEU:
319        case TCG_COND_GTU:
320            ct |= TCG_CT_CONST_U16;
321            break;
322        case TCG_COND_TSTEQ:
323        case TCG_COND_TSTNE:
324            if ((uval & ~0xffff) == 0 || (uval & ~0xffff0000ull) == 0) {
325                return 1;
326            }
327            if (uval == (uint32_t)uval && mask_operand(uval, &mb, &me)) {
328                return 1;
329            }
330            if (TCG_TARGET_REG_BITS == 64 &&
331                mask64_operand(uval << clz64(uval), &mb, &me)) {
332                return 1;
333            }
334            return 0;
335        default:
336            g_assert_not_reached();
337        }
338    }
339
340    if ((ct & TCG_CT_CONST_S16) && sval == (int16_t)sval) {
341        return 1;
342    }
343    if ((ct & TCG_CT_CONST_U16) && uval == (uint16_t)uval) {
344        return 1;
345    }
346    if ((ct & TCG_CT_CONST_N16) && -sval == (int16_t)-sval) {
347        return 1;
348    }
349    if ((ct & TCG_CT_CONST_S32) && sval == (int32_t)sval) {
350        return 1;
351    }
352    if ((ct & TCG_CT_CONST_U32) && uval == (uint32_t)uval) {
353        return 1;
354    }
355    if ((ct & TCG_CT_CONST_ZERO) && sval == 0) {
356        return 1;
357    }
358    if ((ct & TCG_CT_CONST_MONE) && sval == -1) {
359        return 1;
360    }
361    if ((ct & TCG_CT_CONST_WSZ) && sval == (type == TCG_TYPE_I32 ? 32 : 64)) {
362        return 1;
363    }
364    return 0;
365}
366
367#define OPCD(opc) ((opc)<<26)
368#define XO19(opc) (OPCD(19)|((opc)<<1))
369#define MD30(opc) (OPCD(30)|((opc)<<2))
370#define MDS30(opc) (OPCD(30)|((opc)<<1))
371#define XO31(opc) (OPCD(31)|((opc)<<1))
372#define XO58(opc) (OPCD(58)|(opc))
373#define XO62(opc) (OPCD(62)|(opc))
374#define VX4(opc)  (OPCD(4)|(opc))
375
376#define B      OPCD( 18)
377#define BC     OPCD( 16)
378
379#define LBZ    OPCD( 34)
380#define LHZ    OPCD( 40)
381#define LHA    OPCD( 42)
382#define LWZ    OPCD( 32)
383#define LWZUX  XO31( 55)
384#define LD     XO58(  0)
385#define LDX    XO31( 21)
386#define LDU    XO58(  1)
387#define LDUX   XO31( 53)
388#define LWA    XO58(  2)
389#define LWAX   XO31(341)
390#define LQ     OPCD( 56)
391
392#define STB    OPCD( 38)
393#define STH    OPCD( 44)
394#define STW    OPCD( 36)
395#define STD    XO62(  0)
396#define STDU   XO62(  1)
397#define STDX   XO31(149)
398#define STQ    XO62(  2)
399
400#define PLWA   OPCD( 41)
401#define PLD    OPCD( 57)
402#define PLXSD  OPCD( 42)
403#define PLXV   OPCD(25 * 2 + 1)  /* force tx=1 */
404
405#define PSTD   OPCD( 61)
406#define PSTXSD OPCD( 46)
407#define PSTXV  OPCD(27 * 2 + 1)  /* force sx=1 */
408
409#define ADDIC  OPCD( 12)
410#define ADDI   OPCD( 14)
411#define ADDIS  OPCD( 15)
412#define ORI    OPCD( 24)
413#define ORIS   OPCD( 25)
414#define XORI   OPCD( 26)
415#define XORIS  OPCD( 27)
416#define ANDI   OPCD( 28)
417#define ANDIS  OPCD( 29)
418#define MULLI  OPCD(  7)
419#define CMPLI  OPCD( 10)
420#define CMPI   OPCD( 11)
421#define SUBFIC OPCD( 8)
422
423#define LWZU   OPCD( 33)
424#define STWU   OPCD( 37)
425
426#define RLWIMI OPCD( 20)
427#define RLWINM OPCD( 21)
428#define RLWNM  OPCD( 23)
429
430#define RLDICL MD30(  0)
431#define RLDICR MD30(  1)
432#define RLDIMI MD30(  3)
433#define RLDCL  MDS30( 8)
434
435#define BCLR   XO19( 16)
436#define BCCTR  XO19(528)
437#define CRAND  XO19(257)
438#define CRANDC XO19(129)
439#define CRNAND XO19(225)
440#define CROR   XO19(449)
441#define CRNOR  XO19( 33)
442#define ADDPCIS XO19( 2)
443
444#define EXTSB  XO31(954)
445#define EXTSH  XO31(922)
446#define EXTSW  XO31(986)
447#define ADD    XO31(266)
448#define ADDE   XO31(138)
449#define ADDME  XO31(234)
450#define ADDZE  XO31(202)
451#define ADDC   XO31( 10)
452#define AND    XO31( 28)
453#define SUBF   XO31( 40)
454#define SUBFC  XO31(  8)
455#define SUBFE  XO31(136)
456#define SUBFME XO31(232)
457#define SUBFZE XO31(200)
458#define OR     XO31(444)
459#define XOR    XO31(316)
460#define MULLW  XO31(235)
461#define MULHW  XO31( 75)
462#define MULHWU XO31( 11)
463#define DIVW   XO31(491)
464#define DIVWU  XO31(459)
465#define MODSW  XO31(779)
466#define MODUW  XO31(267)
467#define CMP    XO31(  0)
468#define CMPL   XO31( 32)
469#define LHBRX  XO31(790)
470#define LWBRX  XO31(534)
471#define LDBRX  XO31(532)
472#define STHBRX XO31(918)
473#define STWBRX XO31(662)
474#define STDBRX XO31(660)
475#define MFSPR  XO31(339)
476#define MTSPR  XO31(467)
477#define SRAWI  XO31(824)
478#define NEG    XO31(104)
479#define MFCR   XO31( 19)
480#define MFOCRF (MFCR | (1u << 20))
481#define NOR    XO31(124)
482#define CNTLZW XO31( 26)
483#define CNTLZD XO31( 58)
484#define CNTTZW XO31(538)
485#define CNTTZD XO31(570)
486#define CNTPOPW XO31(378)
487#define CNTPOPD XO31(506)
488#define ANDC   XO31( 60)
489#define ORC    XO31(412)
490#define EQV    XO31(284)
491#define NAND   XO31(476)
492#define ISEL   XO31( 15)
493
494#define MULLD  XO31(233)
495#define MULHD  XO31( 73)
496#define MULHDU XO31(  9)
497#define DIVD   XO31(489)
498#define DIVDU  XO31(457)
499#define MODSD  XO31(777)
500#define MODUD  XO31(265)
501
502#define LBZX   XO31( 87)
503#define LHZX   XO31(279)
504#define LHAX   XO31(343)
505#define LWZX   XO31( 23)
506#define STBX   XO31(215)
507#define STHX   XO31(407)
508#define STWX   XO31(151)
509
510#define EIEIO  XO31(854)
511#define HWSYNC XO31(598)
512#define LWSYNC (HWSYNC | (1u << 21))
513
514#define SPR(a, b) ((((a)<<5)|(b))<<11)
515#define LR     SPR(8, 0)
516#define CTR    SPR(9, 0)
517
518#define SLW    XO31( 24)
519#define SRW    XO31(536)
520#define SRAW   XO31(792)
521
522#define SLD    XO31( 27)
523#define SRD    XO31(539)
524#define SRAD   XO31(794)
525#define SRADI  XO31(413<<1)
526
527#define BRH    XO31(219)
528#define BRW    XO31(155)
529#define BRD    XO31(187)
530
531#define TW     XO31( 4)
532#define TRAP   (TW | TO(31))
533
534#define SETBC    XO31(384)  /* v3.10 */
535#define SETBCR   XO31(416)  /* v3.10 */
536#define SETNBC   XO31(448)  /* v3.10 */
537#define SETNBCR  XO31(480)  /* v3.10 */
538
539#define NOP    ORI  /* ori 0,0,0 */
540
541#define LVX        XO31(103)
542#define LVEBX      XO31(7)
543#define LVEHX      XO31(39)
544#define LVEWX      XO31(71)
545#define LXSDX      (XO31(588) | 1)  /* v2.06, force tx=1 */
546#define LXVDSX     (XO31(332) | 1)  /* v2.06, force tx=1 */
547#define LXSIWZX    (XO31(12) | 1)   /* v2.07, force tx=1 */
548#define LXV        (OPCD(61) | 8 | 1)  /* v3.00, force tx=1 */
549#define LXSD       (OPCD(57) | 2)   /* v3.00 */
550#define LXVWSX     (XO31(364) | 1)  /* v3.00, force tx=1 */
551
552#define STVX       XO31(231)
553#define STVEWX     XO31(199)
554#define STXSDX     (XO31(716) | 1)  /* v2.06, force sx=1 */
555#define STXSIWX    (XO31(140) | 1)  /* v2.07, force sx=1 */
556#define STXV       (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
557#define STXSD      (OPCD(61) | 2)   /* v3.00 */
558
559#define VADDSBS    VX4(768)
560#define VADDUBS    VX4(512)
561#define VADDUBM    VX4(0)
562#define VADDSHS    VX4(832)
563#define VADDUHS    VX4(576)
564#define VADDUHM    VX4(64)
565#define VADDSWS    VX4(896)
566#define VADDUWS    VX4(640)
567#define VADDUWM    VX4(128)
568#define VADDUDM    VX4(192)       /* v2.07 */
569
570#define VSUBSBS    VX4(1792)
571#define VSUBUBS    VX4(1536)
572#define VSUBUBM    VX4(1024)
573#define VSUBSHS    VX4(1856)
574#define VSUBUHS    VX4(1600)
575#define VSUBUHM    VX4(1088)
576#define VSUBSWS    VX4(1920)
577#define VSUBUWS    VX4(1664)
578#define VSUBUWM    VX4(1152)
579#define VSUBUDM    VX4(1216)      /* v2.07 */
580
581#define VNEGW      (VX4(1538) | (6 << 16))  /* v3.00 */
582#define VNEGD      (VX4(1538) | (7 << 16))  /* v3.00 */
583
584#define VMAXSB     VX4(258)
585#define VMAXSH     VX4(322)
586#define VMAXSW     VX4(386)
587#define VMAXSD     VX4(450)       /* v2.07 */
588#define VMAXUB     VX4(2)
589#define VMAXUH     VX4(66)
590#define VMAXUW     VX4(130)
591#define VMAXUD     VX4(194)       /* v2.07 */
592#define VMINSB     VX4(770)
593#define VMINSH     VX4(834)
594#define VMINSW     VX4(898)
595#define VMINSD     VX4(962)       /* v2.07 */
596#define VMINUB     VX4(514)
597#define VMINUH     VX4(578)
598#define VMINUW     VX4(642)
599#define VMINUD     VX4(706)       /* v2.07 */
600
601#define VCMPEQUB   VX4(6)
602#define VCMPEQUH   VX4(70)
603#define VCMPEQUW   VX4(134)
604#define VCMPEQUD   VX4(199)       /* v2.07 */
605#define VCMPGTSB   VX4(774)
606#define VCMPGTSH   VX4(838)
607#define VCMPGTSW   VX4(902)
608#define VCMPGTSD   VX4(967)       /* v2.07 */
609#define VCMPGTUB   VX4(518)
610#define VCMPGTUH   VX4(582)
611#define VCMPGTUW   VX4(646)
612#define VCMPGTUD   VX4(711)       /* v2.07 */
613#define VCMPNEB    VX4(7)         /* v3.00 */
614#define VCMPNEH    VX4(71)        /* v3.00 */
615#define VCMPNEW    VX4(135)       /* v3.00 */
616
617#define VSLB       VX4(260)
618#define VSLH       VX4(324)
619#define VSLW       VX4(388)
620#define VSLD       VX4(1476)      /* v2.07 */
621#define VSRB       VX4(516)
622#define VSRH       VX4(580)
623#define VSRW       VX4(644)
624#define VSRD       VX4(1732)      /* v2.07 */
625#define VSRAB      VX4(772)
626#define VSRAH      VX4(836)
627#define VSRAW      VX4(900)
628#define VSRAD      VX4(964)       /* v2.07 */
629#define VRLB       VX4(4)
630#define VRLH       VX4(68)
631#define VRLW       VX4(132)
632#define VRLD       VX4(196)       /* v2.07 */
633
634#define VMULEUB    VX4(520)
635#define VMULEUH    VX4(584)
636#define VMULEUW    VX4(648)       /* v2.07 */
637#define VMULOUB    VX4(8)
638#define VMULOUH    VX4(72)
639#define VMULOUW    VX4(136)       /* v2.07 */
640#define VMULUWM    VX4(137)       /* v2.07 */
641#define VMULLD     VX4(457)       /* v3.10 */
642#define VMSUMUHM   VX4(38)
643
644#define VMRGHB     VX4(12)
645#define VMRGHH     VX4(76)
646#define VMRGHW     VX4(140)
647#define VMRGLB     VX4(268)
648#define VMRGLH     VX4(332)
649#define VMRGLW     VX4(396)
650
651#define VPKUHUM    VX4(14)
652#define VPKUWUM    VX4(78)
653
654#define VAND       VX4(1028)
655#define VANDC      VX4(1092)
656#define VNOR       VX4(1284)
657#define VOR        VX4(1156)
658#define VXOR       VX4(1220)
659#define VEQV       VX4(1668)      /* v2.07 */
660#define VNAND      VX4(1412)      /* v2.07 */
661#define VORC       VX4(1348)      /* v2.07 */
662
663#define VSPLTB     VX4(524)
664#define VSPLTH     VX4(588)
665#define VSPLTW     VX4(652)
666#define VSPLTISB   VX4(780)
667#define VSPLTISH   VX4(844)
668#define VSPLTISW   VX4(908)
669
670#define VSLDOI     VX4(44)
671
672#define XXPERMDI   (OPCD(60) | (10 << 3) | 7)  /* v2.06, force ax=bx=tx=1 */
673#define XXSEL      (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
674#define XXSPLTIB   (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
675
676#define MFVSRD     (XO31(51) | 1)   /* v2.07, force sx=1 */
677#define MFVSRWZ    (XO31(115) | 1)  /* v2.07, force sx=1 */
678#define MTVSRD     (XO31(179) | 1)  /* v2.07, force tx=1 */
679#define MTVSRWZ    (XO31(243) | 1)  /* v2.07, force tx=1 */
680#define MTVSRDD    (XO31(435) | 1)  /* v3.00, force tx=1 */
681#define MTVSRWS    (XO31(403) | 1)  /* v3.00, force tx=1 */
682
683#define RT(r) ((r)<<21)
684#define RS(r) ((r)<<21)
685#define RA(r) ((r)<<16)
686#define RB(r) ((r)<<11)
687#define TO(t) ((t)<<21)
688#define SH(s) ((s)<<11)
689#define MB(b) ((b)<<6)
690#define ME(e) ((e)<<1)
691#define BO(o) ((o)<<21)
692#define MB64(b) ((b)<<5)
693#define FXM(b) (1 << (19 - (b)))
694
695#define VRT(r)  (((r) & 31) << 21)
696#define VRA(r)  (((r) & 31) << 16)
697#define VRB(r)  (((r) & 31) << 11)
698#define VRC(r)  (((r) & 31) <<  6)
699
700#define LK    1
701
702#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
703#define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
704#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
705#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
706
707#define BF(n)    ((n)<<23)
708#define BI(n, c) (((c)+((n)*4))<<16)
709#define BT(n, c) (((c)+((n)*4))<<21)
710#define BA(n, c) (((c)+((n)*4))<<16)
711#define BB(n, c) (((c)+((n)*4))<<11)
712#define BC_(n, c) (((c)+((n)*4))<<6)
713
714#define BO_COND_TRUE  BO(12)
715#define BO_COND_FALSE BO( 4)
716#define BO_ALWAYS     BO(20)
717
718enum {
719    CR_LT,
720    CR_GT,
721    CR_EQ,
722    CR_SO
723};
724
725static const uint32_t tcg_to_bc[16] = {
726    [TCG_COND_EQ]  = BC | BI(0, CR_EQ) | BO_COND_TRUE,
727    [TCG_COND_NE]  = BC | BI(0, CR_EQ) | BO_COND_FALSE,
728    [TCG_COND_TSTEQ]  = BC | BI(0, CR_EQ) | BO_COND_TRUE,
729    [TCG_COND_TSTNE]  = BC | BI(0, CR_EQ) | BO_COND_FALSE,
730    [TCG_COND_LT]  = BC | BI(0, CR_LT) | BO_COND_TRUE,
731    [TCG_COND_GE]  = BC | BI(0, CR_LT) | BO_COND_FALSE,
732    [TCG_COND_LE]  = BC | BI(0, CR_GT) | BO_COND_FALSE,
733    [TCG_COND_GT]  = BC | BI(0, CR_GT) | BO_COND_TRUE,
734    [TCG_COND_LTU] = BC | BI(0, CR_LT) | BO_COND_TRUE,
735    [TCG_COND_GEU] = BC | BI(0, CR_LT) | BO_COND_FALSE,
736    [TCG_COND_LEU] = BC | BI(0, CR_GT) | BO_COND_FALSE,
737    [TCG_COND_GTU] = BC | BI(0, CR_GT) | BO_COND_TRUE,
738};
739
740/* The low bit here is set if the RA and RB fields must be inverted.  */
741static const uint32_t tcg_to_isel[16] = {
742    [TCG_COND_EQ]  = ISEL | BC_(0, CR_EQ),
743    [TCG_COND_NE]  = ISEL | BC_(0, CR_EQ) | 1,
744    [TCG_COND_TSTEQ] = ISEL | BC_(0, CR_EQ),
745    [TCG_COND_TSTNE] = ISEL | BC_(0, CR_EQ) | 1,
746    [TCG_COND_LT]  = ISEL | BC_(0, CR_LT),
747    [TCG_COND_GE]  = ISEL | BC_(0, CR_LT) | 1,
748    [TCG_COND_LE]  = ISEL | BC_(0, CR_GT) | 1,
749    [TCG_COND_GT]  = ISEL | BC_(0, CR_GT),
750    [TCG_COND_LTU] = ISEL | BC_(0, CR_LT),
751    [TCG_COND_GEU] = ISEL | BC_(0, CR_LT) | 1,
752    [TCG_COND_LEU] = ISEL | BC_(0, CR_GT) | 1,
753    [TCG_COND_GTU] = ISEL | BC_(0, CR_GT),
754};
755
756static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
757                        intptr_t value, intptr_t addend)
758{
759    const tcg_insn_unit *target;
760    int16_t lo;
761    int32_t hi;
762
763    value += addend;
764    target = (const tcg_insn_unit *)value;
765
766    switch (type) {
767    case R_PPC_REL14:
768        return reloc_pc14(code_ptr, target);
769    case R_PPC_REL24:
770        return reloc_pc24(code_ptr, target);
771    case R_PPC64_PCREL34:
772        return reloc_pc34(code_ptr, target);
773    case R_PPC_ADDR16:
774        /*
775         * We are (slightly) abusing this relocation type.  In particular,
776         * assert that the low 2 bits are zero, and do not modify them.
777         * That way we can use this with LD et al that have opcode bits
778         * in the low 2 bits of the insn.
779         */
780        if ((value & 3) || value != (int16_t)value) {
781            return false;
782        }
783        *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
784        break;
785    case R_PPC_ADDR32:
786        /*
787         * We are abusing this relocation type.  Again, this points to
788         * a pair of insns, lis + load.  This is an absolute address
789         * relocation for PPC32 so the lis cannot be removed.
790         */
791        lo = value;
792        hi = value - lo;
793        if (hi + lo != value) {
794            return false;
795        }
796        code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
797        code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
798        break;
799    default:
800        g_assert_not_reached();
801    }
802    return true;
803}
804
805/* Ensure that the prefixed instruction does not cross a 64-byte boundary. */
806static bool tcg_out_need_prefix_align(TCGContext *s)
807{
808    return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c;
809}
810
811static void tcg_out_prefix_align(TCGContext *s)
812{
813    if (tcg_out_need_prefix_align(s)) {
814        tcg_out32(s, NOP);
815    }
816}
817
818static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target)
819{
820    return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0);
821}
822
823/* Output Type 00 Prefix - 8-Byte Load/Store Form (8LS:D) */
824static void tcg_out_8ls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
825                          unsigned ra, tcg_target_long imm, bool r)
826{
827    tcg_insn_unit p, i;
828
829    p = OPCD(1) | (r << 20) | ((imm >> 16) & 0x3ffff);
830    i = opc | TAI(rt, ra, imm);
831
832    tcg_out_prefix_align(s);
833    tcg_out32(s, p);
834    tcg_out32(s, i);
835}
836
837/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */
838static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
839                          unsigned ra, tcg_target_long imm, bool r)
840{
841    tcg_insn_unit p, i;
842
843    p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff);
844    i = opc | TAI(rt, ra, imm);
845
846    tcg_out_prefix_align(s);
847    tcg_out32(s, p);
848    tcg_out32(s, i);
849}
850
851static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
852                             TCGReg base, tcg_target_long offset);
853
854static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
855{
856    if (ret == arg) {
857        return true;
858    }
859    switch (type) {
860    case TCG_TYPE_I64:
861        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
862        /* fallthru */
863    case TCG_TYPE_I32:
864        if (ret < TCG_REG_V0) {
865            if (arg < TCG_REG_V0) {
866                tcg_out32(s, OR | SAB(arg, ret, arg));
867                break;
868            } else if (have_isa_2_07) {
869                tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
870                          | VRT(arg) | RA(ret));
871                break;
872            } else {
873                /* Altivec does not support vector->integer moves.  */
874                return false;
875            }
876        } else if (arg < TCG_REG_V0) {
877            if (have_isa_2_07) {
878                tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
879                          | VRT(ret) | RA(arg));
880                break;
881            } else {
882                /* Altivec does not support integer->vector moves.  */
883                return false;
884            }
885        }
886        /* fallthru */
887    case TCG_TYPE_V64:
888    case TCG_TYPE_V128:
889        tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
890        tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
891        break;
892    default:
893        g_assert_not_reached();
894    }
895    return true;
896}
897
898static void tcg_out_rld_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs,
899                           int sh, int mb, bool rc)
900{
901    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
902    sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
903    mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
904    tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb | rc);
905}
906
907static void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
908                        int sh, int mb)
909{
910    tcg_out_rld_rc(s, op, ra, rs, sh, mb, false);
911}
912
913static void tcg_out_rlw_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs,
914                           int sh, int mb, int me, bool rc)
915{
916    tcg_debug_assert((mb & 0x1f) == mb);
917    tcg_debug_assert((me & 0x1f) == me);
918    tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh & 0x1f) | MB(mb) | ME(me) | rc);
919}
920
921static void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
922                        int sh, int mb, int me)
923{
924    tcg_out_rlw_rc(s, op, ra, rs, sh, mb, me, false);
925}
926
927static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
928{
929    tcg_out32(s, EXTSB | RA(dst) | RS(src));
930}
931
932static void tcg_out_ext8u(TCGContext *s, TCGReg dst, TCGReg src)
933{
934    tcg_out32(s, ANDI | SAI(src, dst, 0xff));
935}
936
937static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
938{
939    tcg_out32(s, EXTSH | RA(dst) | RS(src));
940}
941
942static void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src)
943{
944    tcg_out32(s, ANDI | SAI(src, dst, 0xffff));
945}
946
947static void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src)
948{
949    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
950    tcg_out32(s, EXTSW | RA(dst) | RS(src));
951}
952
953static void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
954{
955    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
956    tcg_out_rld(s, RLDICL, dst, src, 0, 32);
957}
958
959static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dst, TCGReg src)
960{
961    tcg_out_ext32s(s, dst, src);
962}
963
964static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dst, TCGReg src)
965{
966    tcg_out_ext32u(s, dst, src);
967}
968
969static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
970{
971    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
972    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
973}
974
975static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
976{
977    tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
978}
979
980static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
981{
982    tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
983}
984
985static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c)
986{
987    /* Limit immediate shift count lest we create an illegal insn.  */
988    tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31));
989}
990
991static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
992{
993    tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
994}
995
996static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
997{
998    tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
999}
1000
1001static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
1002{
1003    tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
1004}
1005
1006static void tcg_out_addpcis(TCGContext *s, TCGReg dst, intptr_t imm)
1007{
1008    uint32_t d0, d1, d2;
1009
1010    tcg_debug_assert((imm & 0xffff) == 0);
1011    tcg_debug_assert(imm == (int32_t)imm);
1012
1013    d2 = extract32(imm, 16, 1);
1014    d1 = extract32(imm, 17, 5);
1015    d0 = extract32(imm, 22, 10);
1016    tcg_out32(s, ADDPCIS | RT(dst) | (d1 << 16) | (d0 << 6) | d2);
1017}
1018
1019/* Emit a move into ret of arg, if it can be done in one insn.  */
1020static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
1021{
1022    if (arg == (int16_t)arg) {
1023        tcg_out32(s, ADDI | TAI(ret, 0, arg));
1024        return true;
1025    }
1026    if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
1027        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
1028        return true;
1029    }
1030    return false;
1031}
1032
1033static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
1034                             tcg_target_long arg, bool in_prologue)
1035{
1036    intptr_t tb_diff;
1037    tcg_target_long tmp;
1038    int shift;
1039
1040    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1041
1042    if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1043        arg = (int32_t)arg;
1044    }
1045
1046    /* Load 16-bit immediates with one insn.  */
1047    if (tcg_out_movi_one(s, ret, arg)) {
1048        return;
1049    }
1050
1051    /* Load addresses within the TB with one insn.  */
1052    tb_diff = ppc_tbrel_diff(s, (void *)arg);
1053    if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
1054        tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
1055        return;
1056    }
1057
1058    /*
1059     * Load values up to 34 bits, and pc-relative addresses,
1060     * with one prefixed insn.
1061     */
1062    if (have_isa_3_10) {
1063        if (arg == sextract64(arg, 0, 34)) {
1064            /* pli ret,value = paddi ret,0,value,0 */
1065            tcg_out_mls_d(s, ADDI, ret, 0, arg, 0);
1066            return;
1067        }
1068
1069        tmp = tcg_pcrel_diff_for_prefix(s, (void *)arg);
1070        if (tmp == sextract64(tmp, 0, 34)) {
1071            /* pla ret,value = paddi ret,0,value,1 */
1072            tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1);
1073            return;
1074        }
1075    }
1076
1077    /* Load 32-bit immediates with two insns.  Note that we've already
1078       eliminated bare ADDIS, so we know both insns are required.  */
1079    if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
1080        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
1081        tcg_out32(s, ORI | SAI(ret, ret, arg));
1082        return;
1083    }
1084    if (arg == (uint32_t)arg && !(arg & 0x8000)) {
1085        tcg_out32(s, ADDI | TAI(ret, 0, arg));
1086        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1087        return;
1088    }
1089
1090    /* Load masked 16-bit value.  */
1091    if (arg > 0 && (arg & 0x8000)) {
1092        tmp = arg | 0x7fff;
1093        if ((tmp & (tmp + 1)) == 0) {
1094            int mb = clz64(tmp + 1) + 1;
1095            tcg_out32(s, ADDI | TAI(ret, 0, arg));
1096            tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
1097            return;
1098        }
1099    }
1100
1101    /* Load common masks with 2 insns.  */
1102    shift = ctz64(arg);
1103    tmp = arg >> shift;
1104    if (tmp == (int16_t)tmp) {
1105        tcg_out32(s, ADDI | TAI(ret, 0, tmp));
1106        tcg_out_shli64(s, ret, ret, shift);
1107        return;
1108    }
1109    shift = clz64(arg);
1110    if (tcg_out_movi_one(s, ret, arg << shift)) {
1111        tcg_out_shri64(s, ret, ret, shift);
1112        return;
1113    }
1114
1115    /* Load addresses within 2GB with 2 insns. */
1116    if (have_isa_3_00) {
1117        intptr_t hi = tcg_pcrel_diff(s, (void *)arg) - 4;
1118        int16_t lo = hi;
1119
1120        hi -= lo;
1121        if (hi == (int32_t)hi) {
1122            tcg_out_addpcis(s, TCG_REG_TMP2, hi);
1123            tcg_out32(s, ADDI | TAI(ret, TCG_REG_TMP2, lo));
1124            return;
1125        }
1126    }
1127
1128    /* Load addresses within 2GB of TB with 2 (or rarely 3) insns.  */
1129    if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
1130        tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
1131        return;
1132    }
1133
1134    /* Use the constant pool, if possible.  */
1135    if (!in_prologue && USE_REG_TB) {
1136        new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
1137                       ppc_tbrel_diff(s, NULL));
1138        tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
1139        return;
1140    }
1141    if (have_isa_3_10) {
1142        tcg_out_8ls_d(s, PLD, ret, 0, 0, 1);
1143        new_pool_label(s, arg, R_PPC64_PCREL34, s->code_ptr - 2, 0);
1144        return;
1145    }
1146    if (have_isa_3_00) {
1147        tcg_out_addpcis(s, TCG_REG_TMP2, 0);
1148        new_pool_label(s, arg, R_PPC_REL14, s->code_ptr, 0);
1149        tcg_out32(s, LD | TAI(ret, TCG_REG_TMP2, 0));
1150        return;
1151    }
1152
1153    tmp = arg >> 31 >> 1;
1154    tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
1155    if (tmp) {
1156        tcg_out_shli64(s, ret, ret, 32);
1157    }
1158    if (arg & 0xffff0000) {
1159        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1160    }
1161    if (arg & 0xffff) {
1162        tcg_out32(s, ORI | SAI(ret, ret, arg));
1163    }
1164}
1165
1166static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
1167                             TCGReg ret, int64_t val)
1168{
1169    uint32_t load_insn;
1170    int rel, low;
1171    intptr_t add;
1172
1173    switch (vece) {
1174    case MO_8:
1175        low = (int8_t)val;
1176        if (low >= -16 && low < 16) {
1177            tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
1178            return;
1179        }
1180        if (have_isa_3_00) {
1181            tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
1182            return;
1183        }
1184        break;
1185
1186    case MO_16:
1187        low = (int16_t)val;
1188        if (low >= -16 && low < 16) {
1189            tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
1190            return;
1191        }
1192        break;
1193
1194    case MO_32:
1195        low = (int32_t)val;
1196        if (low >= -16 && low < 16) {
1197            tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
1198            return;
1199        }
1200        break;
1201    }
1202
1203    /*
1204     * Otherwise we must load the value from the constant pool.
1205     */
1206    if (USE_REG_TB) {
1207        rel = R_PPC_ADDR16;
1208        add = ppc_tbrel_diff(s, NULL);
1209    } else if (have_isa_3_10) {
1210        if (type == TCG_TYPE_V64) {
1211            tcg_out_8ls_d(s, PLXSD, ret & 31, 0, 0, 1);
1212            new_pool_label(s, val, R_PPC64_PCREL34, s->code_ptr - 2, 0);
1213        } else {
1214            tcg_out_8ls_d(s, PLXV, ret & 31, 0, 0, 1);
1215            new_pool_l2(s, R_PPC64_PCREL34, s->code_ptr - 2, 0, val, val);
1216        }
1217        return;
1218    } else if (have_isa_3_00) {
1219        tcg_out_addpcis(s, TCG_REG_TMP1, 0);
1220        rel = R_PPC_REL14;
1221        add = 0;
1222    } else {
1223        rel = R_PPC_ADDR32;
1224        add = 0;
1225    }
1226
1227    if (have_vsx) {
1228        load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
1229        load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
1230        if (TCG_TARGET_REG_BITS == 64) {
1231            new_pool_label(s, val, rel, s->code_ptr, add);
1232        } else {
1233            new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val);
1234        }
1235    } else {
1236        load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
1237        if (TCG_TARGET_REG_BITS == 64) {
1238            new_pool_l2(s, rel, s->code_ptr, add, val, val);
1239        } else {
1240            new_pool_l4(s, rel, s->code_ptr, add,
1241                        val >> 32, val, val >> 32, val);
1242        }
1243    }
1244
1245    if (USE_REG_TB) {
1246        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
1247        load_insn |= RA(TCG_REG_TB);
1248    } else if (have_isa_3_00) {
1249        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1250    } else {
1251        tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
1252        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1253    }
1254    tcg_out32(s, load_insn);
1255}
1256
1257static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
1258                         tcg_target_long arg)
1259{
1260    switch (type) {
1261    case TCG_TYPE_I32:
1262    case TCG_TYPE_I64:
1263        tcg_debug_assert(ret < TCG_REG_V0);
1264        tcg_out_movi_int(s, type, ret, arg, false);
1265        break;
1266
1267    default:
1268        g_assert_not_reached();
1269    }
1270}
1271
1272static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1273{
1274    return false;
1275}
1276
1277static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1278                             tcg_target_long imm)
1279{
1280    /* This function is only used for passing structs by reference. */
1281    g_assert_not_reached();
1282}
1283
1284static bool mask_operand(uint32_t c, int *mb, int *me)
1285{
1286    uint32_t lsb, test;
1287
1288    /* Accept a bit pattern like:
1289           0....01....1
1290           1....10....0
1291           0..01..10..0
1292       Keep track of the transitions.  */
1293    if (c == 0 || c == -1) {
1294        return false;
1295    }
1296    test = c;
1297    lsb = test & -test;
1298    test += lsb;
1299    if (test & (test - 1)) {
1300        return false;
1301    }
1302
1303    *me = clz32(lsb);
1304    *mb = test ? clz32(test & -test) + 1 : 0;
1305    return true;
1306}
1307
1308static bool mask64_operand(uint64_t c, int *mb, int *me)
1309{
1310    uint64_t lsb;
1311
1312    if (c == 0) {
1313        return false;
1314    }
1315
1316    lsb = c & -c;
1317    /* Accept 1..10..0.  */
1318    if (c == -lsb) {
1319        *mb = 0;
1320        *me = clz64(lsb);
1321        return true;
1322    }
1323    /* Accept 0..01..1.  */
1324    if (lsb == 1 && (c & (c + 1)) == 0) {
1325        *mb = clz64(c + 1) + 1;
1326        *me = 63;
1327        return true;
1328    }
1329    return false;
1330}
1331
1332static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1333{
1334    int mb, me;
1335
1336    if (mask_operand(c, &mb, &me)) {
1337        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
1338    } else if ((c & 0xffff) == c) {
1339        tcg_out32(s, ANDI | SAI(src, dst, c));
1340        return;
1341    } else if ((c & 0xffff0000) == c) {
1342        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1343        return;
1344    } else {
1345        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
1346        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1347    }
1348}
1349
1350static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
1351{
1352    int mb, me;
1353
1354    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1355    if (mask64_operand(c, &mb, &me)) {
1356        if (mb == 0) {
1357            tcg_out_rld(s, RLDICR, dst, src, 0, me);
1358        } else {
1359            tcg_out_rld(s, RLDICL, dst, src, 0, mb);
1360        }
1361    } else if ((c & 0xffff) == c) {
1362        tcg_out32(s, ANDI | SAI(src, dst, c));
1363        return;
1364    } else if ((c & 0xffff0000) == c) {
1365        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1366        return;
1367    } else {
1368        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
1369        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1370    }
1371}
1372
1373static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
1374                           int op_lo, int op_hi)
1375{
1376    if (c >> 16) {
1377        tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
1378        src = dst;
1379    }
1380    if (c & 0xffff) {
1381        tcg_out32(s, op_lo | SAI(src, dst, c));
1382        src = dst;
1383    }
1384}
1385
1386static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1387{
1388    tcg_out_zori32(s, dst, src, c, ORI, ORIS);
1389}
1390
1391static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1392{
1393    tcg_out_zori32(s, dst, src, c, XORI, XORIS);
1394}
1395
1396static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target)
1397{
1398    ptrdiff_t disp = tcg_pcrel_diff(s, target);
1399    if (in_range_b(disp)) {
1400        tcg_out32(s, B | (disp & 0x3fffffc) | mask);
1401    } else {
1402        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
1403        tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
1404        tcg_out32(s, BCCTR | BO_ALWAYS | mask);
1405    }
1406}
1407
1408static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
1409                             TCGReg base, tcg_target_long offset)
1410{
1411    tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
1412    bool is_int_store = false;
1413    TCGReg rs = TCG_REG_TMP1;
1414
1415    switch (opi) {
1416    case LD: case LWA:
1417        align = 3;
1418        /* FALLTHRU */
1419    default:
1420        if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
1421            rs = rt;
1422            break;
1423        }
1424        break;
1425    case LXSD:
1426    case STXSD:
1427        align = 3;
1428        break;
1429    case LXV:
1430    case STXV:
1431        align = 15;
1432        break;
1433    case STD:
1434        align = 3;
1435        /* FALLTHRU */
1436    case STB: case STH: case STW:
1437        is_int_store = true;
1438        break;
1439    }
1440
1441    /* For unaligned or large offsets, use the prefixed form. */
1442    if (have_isa_3_10
1443        && (offset != (int16_t)offset || (offset & align))
1444        && offset == sextract64(offset, 0, 34)) {
1445        /*
1446         * Note that the MLS:D insns retain their un-prefixed opcode,
1447         * while the 8LS:D insns use a different opcode space.
1448         */
1449        switch (opi) {
1450        case LBZ:
1451        case LHZ:
1452        case LHA:
1453        case LWZ:
1454        case STB:
1455        case STH:
1456        case STW:
1457        case ADDI:
1458            tcg_out_mls_d(s, opi, rt, base, offset, 0);
1459            return;
1460        case LWA:
1461            tcg_out_8ls_d(s, PLWA, rt, base, offset, 0);
1462            return;
1463        case LD:
1464            tcg_out_8ls_d(s, PLD, rt, base, offset, 0);
1465            return;
1466        case STD:
1467            tcg_out_8ls_d(s, PSTD, rt, base, offset, 0);
1468            return;
1469        case LXSD:
1470            tcg_out_8ls_d(s, PLXSD, rt & 31, base, offset, 0);
1471            return;
1472        case STXSD:
1473            tcg_out_8ls_d(s, PSTXSD, rt & 31, base, offset, 0);
1474            return;
1475        case LXV:
1476            tcg_out_8ls_d(s, PLXV, rt & 31, base, offset, 0);
1477            return;
1478        case STXV:
1479            tcg_out_8ls_d(s, PSTXV, rt & 31, base, offset, 0);
1480            return;
1481        }
1482    }
1483
1484    /* For unaligned, or very large offsets, use the indexed form.  */
1485    if (offset & align || offset != (int32_t)offset || opi == 0) {
1486        if (rs == base) {
1487            rs = TCG_REG_R0;
1488        }
1489        tcg_debug_assert(!is_int_store || rs != rt);
1490        tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
1491        tcg_out32(s, opx | TAB(rt & 31, base, rs));
1492        return;
1493    }
1494
1495    l0 = (int16_t)offset;
1496    offset = (offset - l0) >> 16;
1497    l1 = (int16_t)offset;
1498
1499    if (l1 < 0 && orig >= 0) {
1500        extra = 0x4000;
1501        l1 = (int16_t)(offset - 0x4000);
1502    }
1503    if (l1) {
1504        tcg_out32(s, ADDIS | TAI(rs, base, l1));
1505        base = rs;
1506    }
1507    if (extra) {
1508        tcg_out32(s, ADDIS | TAI(rs, base, extra));
1509        base = rs;
1510    }
1511    if (opi != ADDI || base != rt || l0 != 0) {
1512        tcg_out32(s, opi | TAI(rt & 31, base, l0));
1513    }
1514}
1515
1516static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
1517                           TCGReg va, TCGReg vb, int shb)
1518{
1519    tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
1520}
1521
1522static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1523                       TCGReg base, intptr_t offset)
1524{
1525    int shift;
1526
1527    switch (type) {
1528    case TCG_TYPE_I32:
1529        if (ret < TCG_REG_V0) {
1530            tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
1531            break;
1532        }
1533        if (have_isa_2_07 && have_vsx) {
1534            tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
1535            break;
1536        }
1537        tcg_debug_assert((offset & 3) == 0);
1538        tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
1539        shift = (offset - 4) & 0xc;
1540        if (shift) {
1541            tcg_out_vsldoi(s, ret, ret, ret, shift);
1542        }
1543        break;
1544    case TCG_TYPE_I64:
1545        if (ret < TCG_REG_V0) {
1546            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1547            tcg_out_mem_long(s, LD, LDX, ret, base, offset);
1548            break;
1549        }
1550        /* fallthru */
1551    case TCG_TYPE_V64:
1552        tcg_debug_assert(ret >= TCG_REG_V0);
1553        if (have_vsx) {
1554            tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
1555                             ret, base, offset);
1556            break;
1557        }
1558        tcg_debug_assert((offset & 7) == 0);
1559        tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
1560        if (offset & 8) {
1561            tcg_out_vsldoi(s, ret, ret, ret, 8);
1562        }
1563        break;
1564    case TCG_TYPE_V128:
1565        tcg_debug_assert(ret >= TCG_REG_V0);
1566        tcg_debug_assert((offset & 15) == 0);
1567        tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
1568                         LVX, ret, base, offset);
1569        break;
1570    default:
1571        g_assert_not_reached();
1572    }
1573}
1574
1575static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1576                              TCGReg base, intptr_t offset)
1577{
1578    int shift;
1579
1580    switch (type) {
1581    case TCG_TYPE_I32:
1582        if (arg < TCG_REG_V0) {
1583            tcg_out_mem_long(s, STW, STWX, arg, base, offset);
1584            break;
1585        }
1586        if (have_isa_2_07 && have_vsx) {
1587            tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
1588            break;
1589        }
1590        assert((offset & 3) == 0);
1591        tcg_debug_assert((offset & 3) == 0);
1592        shift = (offset - 4) & 0xc;
1593        if (shift) {
1594            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
1595            arg = TCG_VEC_TMP1;
1596        }
1597        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1598        break;
1599    case TCG_TYPE_I64:
1600        if (arg < TCG_REG_V0) {
1601            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1602            tcg_out_mem_long(s, STD, STDX, arg, base, offset);
1603            break;
1604        }
1605        /* fallthru */
1606    case TCG_TYPE_V64:
1607        tcg_debug_assert(arg >= TCG_REG_V0);
1608        if (have_vsx) {
1609            tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
1610                             STXSDX, arg, base, offset);
1611            break;
1612        }
1613        tcg_debug_assert((offset & 7) == 0);
1614        if (offset & 8) {
1615            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
1616            arg = TCG_VEC_TMP1;
1617        }
1618        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1619        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
1620        break;
1621    case TCG_TYPE_V128:
1622        tcg_debug_assert(arg >= TCG_REG_V0);
1623        tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
1624                         STVX, arg, base, offset);
1625        break;
1626    default:
1627        g_assert_not_reached();
1628    }
1629}
1630
1631static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1632                               TCGReg base, intptr_t ofs)
1633{
1634    return false;
1635}
1636
1637/*
1638 * Set dest non-zero if and only if (arg1 & arg2) is non-zero.
1639 * If RC, then also set RC0.
1640 */
1641static void tcg_out_test(TCGContext *s, TCGReg dest, TCGReg arg1, TCGArg arg2,
1642                         bool const_arg2, TCGType type, bool rc)
1643{
1644    int mb, me;
1645
1646    if (!const_arg2) {
1647        tcg_out32(s, AND | SAB(arg1, dest, arg2) | rc);
1648        return;
1649    }
1650
1651    if (type == TCG_TYPE_I32) {
1652        arg2 = (uint32_t)arg2;
1653    }
1654
1655    if ((arg2 & ~0xffff) == 0) {
1656        tcg_out32(s, ANDI | SAI(arg1, dest, arg2));
1657        return;
1658    }
1659    if ((arg2 & ~0xffff0000ull) == 0) {
1660        tcg_out32(s, ANDIS | SAI(arg1, dest, arg2 >> 16));
1661        return;
1662    }
1663    if (arg2 == (uint32_t)arg2 && mask_operand(arg2, &mb, &me)) {
1664        tcg_out_rlw_rc(s, RLWINM, dest, arg1, 0, mb, me, rc);
1665        return;
1666    }
1667    if (TCG_TARGET_REG_BITS == 64) {
1668        int sh = clz64(arg2);
1669        if (mask64_operand(arg2 << sh, &mb, &me)) {
1670            tcg_out_rld_rc(s, RLDICR, dest, arg1, sh, me, rc);
1671            return;
1672        }
1673    }
1674    /* Constraints should satisfy this. */
1675    g_assert_not_reached();
1676}
1677
1678static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
1679                        bool const_arg2, int cr, TCGType type)
1680{
1681    uint32_t op;
1682
1683    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1684
1685    /*
1686     * Simplify the comparisons below wrt CMPI.
1687     * All of the tests are 16-bit, so a 32-bit sign extend always works.
1688     */
1689    if (type == TCG_TYPE_I32) {
1690        arg2 = (int32_t)arg2;
1691    }
1692
1693    switch (cond) {
1694    case TCG_COND_EQ:
1695    case TCG_COND_NE:
1696        if (const_arg2) {
1697            if ((int16_t)arg2 == arg2) {
1698                op = CMPI;
1699                break;
1700            }
1701            tcg_debug_assert((uint16_t)arg2 == arg2);
1702            op = CMPLI;
1703            break;
1704        }
1705        op = CMPL;
1706        break;
1707
1708    case TCG_COND_TSTEQ:
1709    case TCG_COND_TSTNE:
1710        tcg_debug_assert(cr == 0);
1711        tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, true);
1712        return;
1713
1714    case TCG_COND_LT:
1715    case TCG_COND_GE:
1716    case TCG_COND_LE:
1717    case TCG_COND_GT:
1718        if (const_arg2) {
1719            tcg_debug_assert((int16_t)arg2 == arg2);
1720            op = CMPI;
1721            break;
1722        }
1723        op = CMP;
1724        break;
1725
1726    case TCG_COND_LTU:
1727    case TCG_COND_GEU:
1728    case TCG_COND_LEU:
1729    case TCG_COND_GTU:
1730        if (const_arg2) {
1731            tcg_debug_assert((uint16_t)arg2 == arg2);
1732            op = CMPLI;
1733            break;
1734        }
1735        op = CMPL;
1736        break;
1737
1738    default:
1739        g_assert_not_reached();
1740    }
1741    op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
1742    op |= RA(arg1);
1743    op |= const_arg2 ? arg2 & 0xffff : RB(arg2);
1744    tcg_out32(s, op);
1745}
1746
1747static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
1748                                TCGReg dst, TCGReg src, bool neg)
1749{
1750    if (neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) {
1751        /*
1752         * X != 0 implies X + -1 generates a carry.
1753         * RT = (~X + X) + CA
1754         *    = -1 + CA
1755         *    = CA ? 0 : -1
1756         */
1757        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1758        tcg_out32(s, SUBFE | TAB(dst, src, src));
1759        return;
1760    }
1761
1762    if (type == TCG_TYPE_I32) {
1763        tcg_out32(s, CNTLZW | RS(src) | RA(dst));
1764        tcg_out_shri32(s, dst, dst, 5);
1765    } else {
1766        tcg_out32(s, CNTLZD | RS(src) | RA(dst));
1767        tcg_out_shri64(s, dst, dst, 6);
1768    }
1769    if (neg) {
1770        tcg_out32(s, NEG | RT(dst) | RA(dst));
1771    }
1772}
1773
1774static void tcg_out_setcond_ne0(TCGContext *s, TCGType type,
1775                                TCGReg dst, TCGReg src, bool neg)
1776{
1777    if (!neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) {
1778        /*
1779         * X != 0 implies X + -1 generates a carry.  Extra addition
1780         * trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.
1781         */
1782        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1783        tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
1784        return;
1785    }
1786    tcg_out_setcond_eq0(s, type, dst, src, false);
1787    if (neg) {
1788        tcg_out32(s, ADDI | TAI(dst, dst, -1));
1789    } else {
1790        tcg_out_xori32(s, dst, dst, 1);
1791    }
1792}
1793
1794static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
1795                                  bool const_arg2)
1796{
1797    if (const_arg2) {
1798        if ((uint32_t)arg2 == arg2) {
1799            tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
1800        } else {
1801            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
1802            tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
1803        }
1804    } else {
1805        tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
1806    }
1807    return TCG_REG_R0;
1808}
1809
1810static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
1811                            TCGReg arg0, TCGReg arg1, TCGArg arg2,
1812                            bool const_arg2, bool neg)
1813{
1814    int sh;
1815    bool inv;
1816
1817    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1818
1819    /* Ignore high bits of a potential constant arg2.  */
1820    if (type == TCG_TYPE_I32) {
1821        arg2 = (uint32_t)arg2;
1822    }
1823
1824    /* With SETBC/SETBCR, we can always implement with 2 insns. */
1825    if (have_isa_3_10) {
1826        tcg_insn_unit bi, opc;
1827
1828        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type);
1829
1830        /* Re-use tcg_to_bc for BI and BO_COND_{TRUE,FALSE}. */
1831        bi = tcg_to_bc[cond] & (0x1f << 16);
1832        if (tcg_to_bc[cond] & BO(8)) {
1833            opc = neg ? SETNBC : SETBC;
1834        } else {
1835            opc = neg ? SETNBCR : SETBCR;
1836        }
1837        tcg_out32(s, opc | RT(arg0) | bi);
1838        return;
1839    }
1840
1841    /* Handle common and trivial cases before handling anything else.  */
1842    if (arg2 == 0) {
1843        switch (cond) {
1844        case TCG_COND_EQ:
1845            tcg_out_setcond_eq0(s, type, arg0, arg1, neg);
1846            return;
1847        case TCG_COND_NE:
1848            tcg_out_setcond_ne0(s, type, arg0, arg1, neg);
1849            return;
1850        case TCG_COND_GE:
1851            tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
1852            arg1 = arg0;
1853            /* FALLTHRU */
1854        case TCG_COND_LT:
1855            /* Extract the sign bit.  */
1856            if (type == TCG_TYPE_I32) {
1857                if (neg) {
1858                    tcg_out_sari32(s, arg0, arg1, 31);
1859                } else {
1860                    tcg_out_shri32(s, arg0, arg1, 31);
1861                }
1862            } else {
1863                if (neg) {
1864                    tcg_out_sari64(s, arg0, arg1, 63);
1865                } else {
1866                    tcg_out_shri64(s, arg0, arg1, 63);
1867                }
1868            }
1869            return;
1870        default:
1871            break;
1872        }
1873    }
1874
1875    /* If we have ISEL, we can implement everything with 3 or 4 insns.
1876       All other cases below are also at least 3 insns, so speed up the
1877       code generator by not considering them and always using ISEL.  */
1878    if (have_isel) {
1879        int isel, tab;
1880
1881        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type);
1882
1883        isel = tcg_to_isel[cond];
1884
1885        tcg_out_movi(s, type, arg0, neg ? -1 : 1);
1886        if (isel & 1) {
1887            /* arg0 = (bc ? 0 : 1) */
1888            tab = TAB(arg0, 0, arg0);
1889            isel &= ~1;
1890        } else {
1891            /* arg0 = (bc ? 1 : 0) */
1892            tcg_out_movi(s, type, TCG_REG_R0, 0);
1893            tab = TAB(arg0, arg0, TCG_REG_R0);
1894        }
1895        tcg_out32(s, isel | tab);
1896        return;
1897    }
1898
1899    inv = false;
1900    switch (cond) {
1901    case TCG_COND_EQ:
1902        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1903        tcg_out_setcond_eq0(s, type, arg0, arg1, neg);
1904        break;
1905
1906    case TCG_COND_NE:
1907        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1908        tcg_out_setcond_ne0(s, type, arg0, arg1, neg);
1909        break;
1910
1911    case TCG_COND_TSTEQ:
1912        tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false);
1913        tcg_out_setcond_eq0(s, type, arg0, TCG_REG_R0, neg);
1914        break;
1915
1916    case TCG_COND_TSTNE:
1917        tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false);
1918        tcg_out_setcond_ne0(s, type, arg0, TCG_REG_R0, neg);
1919        break;
1920
1921    case TCG_COND_LE:
1922    case TCG_COND_LEU:
1923        inv = true;
1924        /* fall through */
1925    case TCG_COND_GT:
1926    case TCG_COND_GTU:
1927        sh = 30; /* CR7 CR_GT */
1928        goto crtest;
1929
1930    case TCG_COND_GE:
1931    case TCG_COND_GEU:
1932        inv = true;
1933        /* fall through */
1934    case TCG_COND_LT:
1935    case TCG_COND_LTU:
1936        sh = 29; /* CR7 CR_LT */
1937        goto crtest;
1938
1939    crtest:
1940        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1941        tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1942        tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
1943        if (neg && inv) {
1944            tcg_out32(s, ADDI | TAI(arg0, arg0, -1));
1945        } else if (neg) {
1946            tcg_out32(s, NEG | RT(arg0) | RA(arg0));
1947        } else if (inv) {
1948            tcg_out_xori32(s, arg0, arg0, 1);
1949        }
1950        break;
1951
1952    default:
1953        g_assert_not_reached();
1954    }
1955}
1956
1957static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
1958                         TCGReg dest, TCGReg arg1, TCGReg arg2)
1959{
1960    tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, false);
1961}
1962
1963static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond,
1964                          TCGReg dest, TCGReg arg1, tcg_target_long arg2)
1965{
1966    tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, false);
1967}
1968
1969static const TCGOutOpSetcond outop_setcond = {
1970    .base.static_constraint = C_O1_I2(r, r, rC),
1971    .out_rrr = tgen_setcond,
1972    .out_rri = tgen_setcondi,
1973};
1974
1975static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond,
1976                            TCGReg dest, TCGReg arg1, TCGReg arg2)
1977{
1978    tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, true);
1979}
1980
1981static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond,
1982                             TCGReg dest, TCGReg arg1, tcg_target_long arg2)
1983{
1984    tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, true);
1985}
1986
1987static const TCGOutOpSetcond outop_negsetcond = {
1988    .base.static_constraint = C_O1_I2(r, r, rC),
1989    .out_rrr = tgen_negsetcond,
1990    .out_rri = tgen_negsetcondi,
1991};
1992
1993static void tcg_out_bc(TCGContext *s, TCGCond cond, int bd)
1994{
1995    tcg_out32(s, tcg_to_bc[cond] | bd);
1996}
1997
1998static void tcg_out_bc_lab(TCGContext *s, TCGCond cond, TCGLabel *l)
1999{
2000    int bd = 0;
2001    if (l->has_value) {
2002        bd = reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
2003    } else {
2004        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
2005    }
2006    tcg_out_bc(s, cond, bd);
2007}
2008
2009static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond,
2010                        TCGReg arg1, TCGReg arg2, TCGLabel *l)
2011{
2012    tcg_out_cmp(s, cond, arg1, arg2, false, 0, type);
2013    tcg_out_bc_lab(s, cond, l);
2014}
2015
2016static void tgen_brcondi(TCGContext *s, TCGType type, TCGCond cond,
2017                         TCGReg arg1, tcg_target_long arg2, TCGLabel *l)
2018{
2019    tcg_out_cmp(s, cond, arg1, arg2, true, 0, type);
2020    tcg_out_bc_lab(s, cond, l);
2021}
2022
2023static const TCGOutOpBrcond outop_brcond = {
2024    .base.static_constraint = C_O0_I2(r, rC),
2025    .out_rr = tgen_brcond,
2026    .out_ri = tgen_brcondi,
2027};
2028
2029static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond,
2030                         TCGReg dest, TCGReg c1, TCGArg c2, bool const_c2,
2031                         TCGArg v1, bool const_v1, TCGArg v2, bool const_v2)
2032{
2033    /* If for some reason both inputs are zero, don't produce bad code.  */
2034    if (v1 == 0 && v2 == 0) {
2035        tcg_out_movi(s, type, dest, 0);
2036        return;
2037    }
2038
2039    tcg_out_cmp(s, cond, c1, c2, const_c2, 0, type);
2040
2041    if (have_isel) {
2042        int isel = tcg_to_isel[cond];
2043
2044        /* Swap the V operands if the operation indicates inversion.  */
2045        if (isel & 1) {
2046            int t = v1;
2047            v1 = v2;
2048            v2 = t;
2049            isel &= ~1;
2050        }
2051        /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */
2052        if (v2 == 0) {
2053            tcg_out_movi(s, type, TCG_REG_R0, 0);
2054        }
2055        tcg_out32(s, isel | TAB(dest, v1, v2));
2056    } else {
2057        if (dest == v2) {
2058            cond = tcg_invert_cond(cond);
2059            v2 = v1;
2060        } else if (dest != v1) {
2061            if (v1 == 0) {
2062                tcg_out_movi(s, type, dest, 0);
2063            } else {
2064                tcg_out_mov(s, type, dest, v1);
2065            }
2066        }
2067        /* Branch forward over one insn */
2068        tcg_out_bc(s, cond, 8);
2069        if (v2 == 0) {
2070            tcg_out_movi(s, type, dest, 0);
2071        } else {
2072            tcg_out_mov(s, type, dest, v2);
2073        }
2074    }
2075}
2076
2077static const TCGOutOpMovcond outop_movcond = {
2078    .base.static_constraint = C_O1_I4(r, r, rC, rZ, rZ),
2079    .out = tgen_movcond,
2080};
2081
2082static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
2083                          TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
2084{
2085    if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
2086        tcg_out32(s, opc | RA(a0) | RS(a1));
2087    } else {
2088        tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 0, type);
2089        /* Note that the only other valid constant for a2 is 0.  */
2090        if (have_isel) {
2091            tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
2092            tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
2093        } else if (!const_a2 && a0 == a2) {
2094            tcg_out_bc(s, TCG_COND_EQ, 8);
2095            tcg_out32(s, opc | RA(a0) | RS(a1));
2096        } else {
2097            tcg_out32(s, opc | RA(a0) | RS(a1));
2098            tcg_out_bc(s, TCG_COND_NE, 8);
2099            if (const_a2) {
2100                tcg_out_movi(s, type, a0, 0);
2101            } else {
2102                tcg_out_mov(s, type, a0, a2);
2103            }
2104        }
2105    }
2106}
2107
2108static void tcg_out_cmp2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
2109                         TCGArg bl, bool blconst, TCGArg bh, bool bhconst)
2110{
2111    static const struct { uint8_t bit1, bit2; } bits[] = {
2112        [TCG_COND_LT ] = { CR_LT, CR_LT },
2113        [TCG_COND_LE ] = { CR_LT, CR_GT },
2114        [TCG_COND_GT ] = { CR_GT, CR_GT },
2115        [TCG_COND_GE ] = { CR_GT, CR_LT },
2116        [TCG_COND_LTU] = { CR_LT, CR_LT },
2117        [TCG_COND_LEU] = { CR_LT, CR_GT },
2118        [TCG_COND_GTU] = { CR_GT, CR_GT },
2119        [TCG_COND_GEU] = { CR_GT, CR_LT },
2120    };
2121
2122    TCGCond cond2;
2123    int op, bit1, bit2;
2124
2125    switch (cond) {
2126    case TCG_COND_EQ:
2127        op = CRAND;
2128        goto do_equality;
2129    case TCG_COND_NE:
2130        op = CRNAND;
2131    do_equality:
2132        tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
2133        tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
2134        tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
2135        break;
2136
2137    case TCG_COND_TSTEQ:
2138    case TCG_COND_TSTNE:
2139        if (blconst) {
2140            tcg_out_andi32(s, TCG_REG_R0, al, bl);
2141        } else {
2142            tcg_out32(s, AND | SAB(al, TCG_REG_R0, bl));
2143        }
2144        if (bhconst) {
2145            tcg_out_andi32(s, TCG_REG_TMP1, ah, bh);
2146        } else {
2147            tcg_out32(s, AND | SAB(ah, TCG_REG_TMP1, bh));
2148        }
2149        tcg_out32(s, OR | SAB(TCG_REG_R0, TCG_REG_R0, TCG_REG_TMP1) | 1);
2150        break;
2151
2152    case TCG_COND_LT:
2153    case TCG_COND_LE:
2154    case TCG_COND_GT:
2155    case TCG_COND_GE:
2156    case TCG_COND_LTU:
2157    case TCG_COND_LEU:
2158    case TCG_COND_GTU:
2159    case TCG_COND_GEU:
2160        bit1 = bits[cond].bit1;
2161        bit2 = bits[cond].bit2;
2162        op = (bit1 != bit2 ? CRANDC : CRAND);
2163        cond2 = tcg_unsigned_cond(cond);
2164
2165        tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
2166        tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
2167        tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
2168        tcg_out32(s, CROR | BT(0, CR_EQ) | BA(6, bit1) | BB(0, CR_EQ));
2169        break;
2170
2171    default:
2172        g_assert_not_reached();
2173    }
2174}
2175
2176static void tgen_setcond2(TCGContext *s, TCGCond cond, TCGReg ret,
2177                          TCGReg al, TCGReg ah,
2178                          TCGArg bl, bool const_bl,
2179                          TCGArg bh, bool const_bh)
2180{
2181    tcg_out_cmp2(s, cond, al, ah, bl, const_bl, bh, const_bh);
2182    tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(0));
2183    tcg_out_rlw(s, RLWINM, ret, TCG_REG_R0, CR_EQ + 0*4 + 1, 31, 31);
2184}
2185
2186#if TCG_TARGET_REG_BITS != 32
2187__attribute__((unused))
2188#endif
2189static const TCGOutOpSetcond2 outop_setcond2 = {
2190    .base.static_constraint = C_O1_I4(r, r, r, rU, rC),
2191    .out = tgen_setcond2,
2192};
2193
2194static void tgen_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
2195                         TCGArg bl, bool const_bl,
2196                         TCGArg bh, bool const_bh, TCGLabel *l)
2197{
2198    assert(TCG_TARGET_REG_BITS == 32);
2199    tcg_out_cmp2(s, cond, al, ah, bl, const_bl, bh, const_bh);
2200    tcg_out_bc_lab(s, TCG_COND_EQ, l);
2201}
2202
2203#if TCG_TARGET_REG_BITS != 32
2204__attribute__((unused))
2205#endif
2206static const TCGOutOpBrcond2 outop_brcond2 = {
2207    .base.static_constraint = C_O0_I4(r, r, rU, rC),
2208    .out = tgen_brcond2,
2209};
2210
2211static void tcg_out_mb(TCGContext *s, TCGArg a0)
2212{
2213    uint32_t insn;
2214
2215    if (a0 & TCG_MO_ST_LD) {
2216        insn = HWSYNC;
2217    } else {
2218        insn = LWSYNC;
2219    }
2220
2221    tcg_out32(s, insn);
2222}
2223
2224static void tcg_out_call_int(TCGContext *s, int lk,
2225                             const tcg_insn_unit *target)
2226{
2227#ifdef _CALL_AIX
2228    /* Look through the descriptor.  If the branch is in range, and we
2229       don't have to spend too much effort on building the toc.  */
2230    const void *tgt = ((const void * const *)target)[0];
2231    uintptr_t toc = ((const uintptr_t *)target)[1];
2232    intptr_t diff = tcg_pcrel_diff(s, tgt);
2233
2234    if (in_range_b(diff) && toc == (uint32_t)toc) {
2235        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
2236        tcg_out_b(s, lk, tgt);
2237    } else {
2238        /* Fold the low bits of the constant into the addresses below.  */
2239        intptr_t arg = (intptr_t)target;
2240        int ofs = (int16_t)arg;
2241
2242        if (ofs + 8 < 0x8000) {
2243            arg -= ofs;
2244        } else {
2245            ofs = 0;
2246        }
2247        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
2248        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
2249        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
2250        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
2251        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
2252    }
2253#elif defined(_CALL_ELF) && _CALL_ELF == 2
2254    intptr_t diff;
2255
2256    /* In the ELFv2 ABI, we have to set up r12 to contain the destination
2257       address, which the callee uses to compute its TOC address.  */
2258    /* FIXME: when the branch is in range, we could avoid r12 load if we
2259       knew that the destination uses the same TOC, and what its local
2260       entry point offset is.  */
2261    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
2262
2263    diff = tcg_pcrel_diff(s, target);
2264    if (in_range_b(diff)) {
2265        tcg_out_b(s, lk, target);
2266    } else {
2267        tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
2268        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
2269    }
2270#else
2271    tcg_out_b(s, lk, target);
2272#endif
2273}
2274
2275static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
2276                         const TCGHelperInfo *info)
2277{
2278    tcg_out_call_int(s, LK, target);
2279}
2280
2281static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = {
2282    [MO_UB] = LBZX,
2283    [MO_UW] = LHZX,
2284    [MO_UL] = LWZX,
2285    [MO_UQ] = LDX,
2286    [MO_SW] = LHAX,
2287    [MO_SL] = LWAX,
2288    [MO_BSWAP | MO_UB] = LBZX,
2289    [MO_BSWAP | MO_UW] = LHBRX,
2290    [MO_BSWAP | MO_UL] = LWBRX,
2291    [MO_BSWAP | MO_UQ] = LDBRX,
2292};
2293
2294static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
2295    [MO_UB] = STBX,
2296    [MO_UW] = STHX,
2297    [MO_UL] = STWX,
2298    [MO_UQ] = STDX,
2299    [MO_BSWAP | MO_UB] = STBX,
2300    [MO_BSWAP | MO_UW] = STHBRX,
2301    [MO_BSWAP | MO_UL] = STWBRX,
2302    [MO_BSWAP | MO_UQ] = STDBRX,
2303};
2304
2305static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
2306{
2307    if (arg < 0) {
2308        arg = TCG_REG_TMP1;
2309    }
2310    tcg_out32(s, MFSPR | RT(arg) | LR);
2311    return arg;
2312}
2313
2314/*
2315 * For the purposes of ppc32 sorting 4 input registers into 4 argument
2316 * registers, there is an outside chance we would require 3 temps.
2317 */
2318static const TCGLdstHelperParam ldst_helper_param = {
2319    .ra_gen = ldst_ra_gen,
2320    .ntmp = 3,
2321    .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 }
2322};
2323
2324static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2325{
2326    MemOp opc = get_memop(lb->oi);
2327
2328    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2329        return false;
2330    }
2331
2332    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
2333    tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]);
2334    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
2335
2336    tcg_out_b(s, 0, lb->raddr);
2337    return true;
2338}
2339
2340static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2341{
2342    MemOp opc = get_memop(lb->oi);
2343
2344    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2345        return false;
2346    }
2347
2348    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
2349    tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]);
2350
2351    tcg_out_b(s, 0, lb->raddr);
2352    return true;
2353}
2354
2355typedef struct {
2356    TCGReg base;
2357    TCGReg index;
2358    TCGAtomAlign aa;
2359} HostAddress;
2360
2361bool tcg_target_has_memory_bswap(MemOp memop)
2362{
2363    TCGAtomAlign aa;
2364
2365    if ((memop & MO_SIZE) <= MO_64) {
2366        return true;
2367    }
2368
2369    /*
2370     * Reject 16-byte memop with 16-byte atomicity,
2371     * but do allow a pair of 64-bit operations.
2372     */
2373    aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
2374    return aa.atom <= MO_64;
2375}
2376
2377/* We expect to use a 16-bit negative offset from ENV.  */
2378#define MIN_TLB_MASK_TABLE_OFS  -32768
2379
2380/*
2381 * For system-mode, perform the TLB load and compare.
2382 * For user-mode, perform any required alignment tests.
2383 * In both cases, return a TCGLabelQemuLdst structure if the slow path
2384 * is required and fill in @h with the host address for the fast path.
2385 */
2386static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
2387                                           TCGReg addr, MemOpIdx oi, bool is_ld)
2388{
2389    TCGType addr_type = s->addr_type;
2390    TCGLabelQemuLdst *ldst = NULL;
2391    MemOp opc = get_memop(oi);
2392    MemOp a_bits, s_bits;
2393
2394    /*
2395     * Book II, Section 1.4, Single-Copy Atomicity, specifies:
2396     *
2397     * Before 3.0, "An access that is not atomic is performed as a set of
2398     * smaller disjoint atomic accesses. In general, the number and alignment
2399     * of these accesses are implementation-dependent."  Thus MO_ATOM_IFALIGN.
2400     *
2401     * As of 3.0, "the non-atomic access is performed as described in
2402     * the corresponding list", which matches MO_ATOM_SUBALIGN.
2403     */
2404    s_bits = opc & MO_SIZE;
2405    h->aa = atom_and_align_for_opc(s, opc,
2406                                   have_isa_3_00 ? MO_ATOM_SUBALIGN
2407                                                 : MO_ATOM_IFALIGN,
2408                                   s_bits == MO_128);
2409    a_bits = h->aa.align;
2410
2411    if (tcg_use_softmmu) {
2412        int mem_index = get_mmuidx(oi);
2413        int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
2414                            : offsetof(CPUTLBEntry, addr_write);
2415        int fast_off = tlb_mask_table_ofs(s, mem_index);
2416        int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
2417        int table_off = fast_off + offsetof(CPUTLBDescFast, table);
2418
2419        ldst = new_ldst_label(s);
2420        ldst->is_ld = is_ld;
2421        ldst->oi = oi;
2422        ldst->addr_reg = addr;
2423
2424        /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
2425        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off);
2426        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off);
2427
2428        /* Extract the page index, shifted into place for tlb index.  */
2429        if (TCG_TARGET_REG_BITS == 32) {
2430            tcg_out_shri32(s, TCG_REG_R0, addr,
2431                           s->page_bits - CPU_TLB_ENTRY_BITS);
2432        } else {
2433            tcg_out_shri64(s, TCG_REG_R0, addr,
2434                           s->page_bits - CPU_TLB_ENTRY_BITS);
2435        }
2436        tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
2437
2438        /*
2439         * Load the TLB comparator into TMP2.
2440         * For 64-bit host, always load the entire 64-bit slot for simplicity.
2441         * We will ignore the high bits with tcg_out_cmp(..., addr_type).
2442         */
2443        if (cmp_off == 0) {
2444            tcg_out32(s, (TCG_TARGET_REG_BITS == 64 ? LDUX : LWZUX)
2445                      | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2));
2446        } else {
2447            tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
2448            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off);
2449        }
2450
2451        /*
2452         * Load the TLB addend for use on the fast path.
2453         * Do this asap to minimize any load use delay.
2454         */
2455        if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
2456            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
2457                       offsetof(CPUTLBEntry, addend));
2458        }
2459
2460        /* Clear the non-page, non-alignment bits from the address in R0. */
2461        if (TCG_TARGET_REG_BITS == 32) {
2462            /*
2463             * We don't support unaligned accesses on 32-bits.
2464             * Preserve the bottom bits and thus trigger a comparison
2465             * failure on unaligned accesses.
2466             */
2467            if (a_bits < s_bits) {
2468                a_bits = s_bits;
2469            }
2470            tcg_out_rlw(s, RLWINM, TCG_REG_R0, addr, 0,
2471                        (32 - a_bits) & 31, 31 - s->page_bits);
2472        } else {
2473            TCGReg t = addr;
2474
2475            /*
2476             * If the access is unaligned, we need to make sure we fail if we
2477             * cross a page boundary.  The trick is to add the access size-1
2478             * to the address before masking the low bits.  That will make the
2479             * address overflow to the next page if we cross a page boundary,
2480             * which will then force a mismatch of the TLB compare.
2481             */
2482            if (a_bits < s_bits) {
2483                unsigned a_mask = (1 << a_bits) - 1;
2484                unsigned s_mask = (1 << s_bits) - 1;
2485                tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
2486                t = TCG_REG_R0;
2487            }
2488
2489            /* Mask the address for the requested alignment.  */
2490            if (addr_type == TCG_TYPE_I32) {
2491                tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
2492                            (32 - a_bits) & 31, 31 - s->page_bits);
2493            } else if (a_bits == 0) {
2494                tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits);
2495            } else {
2496                tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
2497                            64 - s->page_bits, s->page_bits - a_bits);
2498                tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0);
2499            }
2500        }
2501
2502        /* Full comparison into cr0. */
2503        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 0, 0, addr_type);
2504
2505        /* Load a pointer into the current opcode w/conditional branch-link. */
2506        ldst->label_ptr[0] = s->code_ptr;
2507        tcg_out_bc(s, TCG_COND_NE, LK);
2508
2509        h->base = TCG_REG_TMP1;
2510    } else {
2511        if (a_bits) {
2512            ldst = new_ldst_label(s);
2513            ldst->is_ld = is_ld;
2514            ldst->oi = oi;
2515            ldst->addr_reg = addr;
2516
2517            /* We are expecting a_bits to max out at 7, much lower than ANDI. */
2518            tcg_debug_assert(a_bits < 16);
2519            tcg_out32(s, ANDI | SAI(addr, TCG_REG_R0, (1 << a_bits) - 1));
2520
2521            ldst->label_ptr[0] = s->code_ptr;
2522            tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
2523        }
2524
2525        h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
2526    }
2527
2528    if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
2529        /* Zero-extend the guest address for use in the host address. */
2530        tcg_out_ext32u(s, TCG_REG_TMP2, addr);
2531        h->index = TCG_REG_TMP2;
2532    } else {
2533        h->index = addr;
2534    }
2535
2536    return ldst;
2537}
2538
2539static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
2540                            TCGReg addr, MemOpIdx oi, TCGType data_type)
2541{
2542    MemOp opc = get_memop(oi);
2543    TCGLabelQemuLdst *ldst;
2544    HostAddress h;
2545
2546    ldst = prepare_host_addr(s, &h, addr, oi, true);
2547
2548    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2549        if (opc & MO_BSWAP) {
2550            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2551            tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
2552            tcg_out32(s, LWBRX | TAB(datahi, h.base, TCG_REG_R0));
2553        } else if (h.base != 0) {
2554            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2555            tcg_out32(s, LWZX | TAB(datahi, h.base, h.index));
2556            tcg_out32(s, LWZX | TAB(datalo, h.base, TCG_REG_R0));
2557        } else if (h.index == datahi) {
2558            tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
2559            tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
2560        } else {
2561            tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
2562            tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
2563        }
2564    } else {
2565        uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
2566        if (!have_isa_2_06 && insn == LDBRX) {
2567            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2568            tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
2569            tcg_out32(s, LWBRX | TAB(TCG_REG_R0, h.base, TCG_REG_R0));
2570            tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
2571        } else if (insn) {
2572            tcg_out32(s, insn | TAB(datalo, h.base, h.index));
2573        } else {
2574            insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
2575            tcg_out32(s, insn | TAB(datalo, h.base, h.index));
2576            tcg_out_movext(s, TCG_TYPE_REG, datalo,
2577                           TCG_TYPE_REG, opc & MO_SSIZE, datalo);
2578        }
2579    }
2580
2581    if (ldst) {
2582        ldst->type = data_type;
2583        ldst->datalo_reg = datalo;
2584        ldst->datahi_reg = datahi;
2585        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2586    }
2587}
2588
2589static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
2590                            TCGReg addr, MemOpIdx oi, TCGType data_type)
2591{
2592    MemOp opc = get_memop(oi);
2593    TCGLabelQemuLdst *ldst;
2594    HostAddress h;
2595
2596    ldst = prepare_host_addr(s, &h, addr, oi, false);
2597
2598    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2599        if (opc & MO_BSWAP) {
2600            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2601            tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
2602            tcg_out32(s, STWBRX | SAB(datahi, h.base, TCG_REG_R0));
2603        } else if (h.base != 0) {
2604            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2605            tcg_out32(s, STWX | SAB(datahi, h.base, h.index));
2606            tcg_out32(s, STWX | SAB(datalo, h.base, TCG_REG_R0));
2607        } else {
2608            tcg_out32(s, STW | TAI(datahi, h.index, 0));
2609            tcg_out32(s, STW | TAI(datalo, h.index, 4));
2610        }
2611    } else {
2612        uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
2613        if (!have_isa_2_06 && insn == STDBRX) {
2614            tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
2615            tcg_out32(s, ADDI | TAI(TCG_REG_TMP2, h.index, 4));
2616            tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
2617            tcg_out32(s, STWBRX | SAB(TCG_REG_R0, h.base, TCG_REG_TMP2));
2618        } else {
2619            tcg_out32(s, insn | SAB(datalo, h.base, h.index));
2620        }
2621    }
2622
2623    if (ldst) {
2624        ldst->type = data_type;
2625        ldst->datalo_reg = datalo;
2626        ldst->datahi_reg = datahi;
2627        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2628    }
2629}
2630
2631static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
2632                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
2633{
2634    TCGLabelQemuLdst *ldst;
2635    HostAddress h;
2636    bool need_bswap;
2637    uint32_t insn;
2638    TCGReg index;
2639
2640    ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
2641
2642    /* Compose the final address, as LQ/STQ have no indexing. */
2643    index = h.index;
2644    if (h.base != 0) {
2645        index = TCG_REG_TMP1;
2646        tcg_out32(s, ADD | TAB(index, h.base, h.index));
2647    }
2648    need_bswap = get_memop(oi) & MO_BSWAP;
2649
2650    if (h.aa.atom == MO_128) {
2651        tcg_debug_assert(!need_bswap);
2652        tcg_debug_assert(datalo & 1);
2653        tcg_debug_assert(datahi == datalo - 1);
2654        tcg_debug_assert(!is_ld || datahi != index);
2655        insn = is_ld ? LQ : STQ;
2656        tcg_out32(s, insn | TAI(datahi, index, 0));
2657    } else {
2658        TCGReg d1, d2;
2659
2660        if (HOST_BIG_ENDIAN ^ need_bswap) {
2661            d1 = datahi, d2 = datalo;
2662        } else {
2663            d1 = datalo, d2 = datahi;
2664        }
2665
2666        if (need_bswap) {
2667            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8);
2668            insn = is_ld ? LDBRX : STDBRX;
2669            tcg_out32(s, insn | TAB(d1, 0, index));
2670            tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0));
2671        } else {
2672            insn = is_ld ? LD : STD;
2673            tcg_out32(s, insn | TAI(d1, index, 0));
2674            tcg_out32(s, insn | TAI(d2, index, 8));
2675        }
2676    }
2677
2678    if (ldst) {
2679        ldst->type = TCG_TYPE_I128;
2680        ldst->datalo_reg = datalo;
2681        ldst->datahi_reg = datahi;
2682        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2683    }
2684}
2685
2686static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2687{
2688    int i;
2689    for (i = 0; i < count; ++i) {
2690        p[i] = NOP;
2691    }
2692}
2693
2694/* Parameters for function call generation, used in tcg.c.  */
2695#define TCG_TARGET_STACK_ALIGN       16
2696
2697#ifdef _CALL_AIX
2698# define LINK_AREA_SIZE                (6 * SZR)
2699# define LR_OFFSET                     (1 * SZR)
2700# define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
2701#elif defined(_CALL_DARWIN)
2702# define LINK_AREA_SIZE                (6 * SZR)
2703# define LR_OFFSET                     (2 * SZR)
2704#elif TCG_TARGET_REG_BITS == 64
2705# if defined(_CALL_ELF) && _CALL_ELF == 2
2706#  define LINK_AREA_SIZE               (4 * SZR)
2707#  define LR_OFFSET                    (1 * SZR)
2708# endif
2709#else /* TCG_TARGET_REG_BITS == 32 */
2710# if defined(_CALL_SYSV)
2711#  define LINK_AREA_SIZE               (2 * SZR)
2712#  define LR_OFFSET                    (1 * SZR)
2713# endif
2714#endif
2715#ifndef LR_OFFSET
2716# error "Unhandled abi"
2717#endif
2718#ifndef TCG_TARGET_CALL_STACK_OFFSET
2719# define TCG_TARGET_CALL_STACK_OFFSET  LINK_AREA_SIZE
2720#endif
2721
2722#define CPU_TEMP_BUF_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2723#define REG_SAVE_SIZE      ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
2724
2725#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET   \
2726                     + TCG_STATIC_CALL_ARGS_SIZE    \
2727                     + CPU_TEMP_BUF_SIZE            \
2728                     + REG_SAVE_SIZE                \
2729                     + TCG_TARGET_STACK_ALIGN - 1)  \
2730                    & -TCG_TARGET_STACK_ALIGN)
2731
2732#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
2733
2734static void tcg_target_qemu_prologue(TCGContext *s)
2735{
2736    int i;
2737
2738#ifdef _CALL_AIX
2739    const void **desc = (const void **)s->code_ptr;
2740    desc[0] = tcg_splitwx_to_rx(desc + 2);  /* entry point */
2741    desc[1] = 0;                            /* environment pointer */
2742    s->code_ptr = (void *)(desc + 2);       /* skip over descriptor */
2743#endif
2744
2745    tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
2746                  CPU_TEMP_BUF_SIZE);
2747
2748    /* Prologue */
2749    tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
2750    tcg_out32(s, (SZR == 8 ? STDU : STWU)
2751              | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
2752
2753    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2754        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2755                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2756    }
2757    tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2758
2759    if (!tcg_use_softmmu && guest_base) {
2760        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
2761        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2762    }
2763
2764    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2765    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
2766    tcg_out32(s, BCCTR | BO_ALWAYS);
2767
2768    /* Epilogue */
2769    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2770
2771    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2772    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2773        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2774                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2775    }
2776    tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
2777    tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
2778    tcg_out32(s, BCLR | BO_ALWAYS);
2779}
2780
2781static void tcg_out_tb_start(TCGContext *s)
2782{
2783    /* Load TCG_REG_TB. */
2784    if (USE_REG_TB) {
2785        if (have_isa_3_00) {
2786            /* lnia REG_TB */
2787            tcg_out_addpcis(s, TCG_REG_TB, 0);
2788        } else {
2789            /* bcl 20,31,$+4 (preferred form for getting nia) */
2790            tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK);
2791            tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR);
2792        }
2793    }
2794}
2795
2796static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
2797{
2798    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg);
2799    tcg_out_b(s, 0, tcg_code_gen_epilogue);
2800}
2801
2802static void tcg_out_goto_tb(TCGContext *s, int which)
2803{
2804    uintptr_t ptr = get_jmp_target_addr(s, which);
2805    int16_t lo;
2806
2807    /* Direct branch will be patched by tb_target_set_jmp_target. */
2808    set_jmp_insn_offset(s, which);
2809    tcg_out32(s, NOP);
2810
2811    /* When branch is out of range, fall through to indirect. */
2812    if (USE_REG_TB) {
2813        ptrdiff_t offset = ppc_tbrel_diff(s, (void *)ptr);
2814        tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset);
2815    } else if (have_isa_3_10) {
2816        ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr);
2817        tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1);
2818    } else if (have_isa_3_00) {
2819        ptrdiff_t offset = tcg_pcrel_diff(s, (void *)ptr) - 4;
2820        lo = offset;
2821        tcg_out_addpcis(s, TCG_REG_TMP1, offset - lo);
2822        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo);
2823    } else {
2824        lo = ptr;
2825        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - lo);
2826        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo);
2827    }
2828
2829    tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
2830    tcg_out32(s, BCCTR | BO_ALWAYS);
2831    set_jmp_reset_offset(s, which);
2832}
2833
2834void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
2835                              uintptr_t jmp_rx, uintptr_t jmp_rw)
2836{
2837    uintptr_t addr = tb->jmp_target_addr[n];
2838    intptr_t diff = addr - jmp_rx;
2839    tcg_insn_unit insn;
2840
2841    if (in_range_b(diff)) {
2842        insn = B | (diff & 0x3fffffc);
2843    } else {
2844        insn = NOP;
2845    }
2846
2847    qatomic_set((uint32_t *)jmp_rw, insn);
2848    flush_idcache_range(jmp_rx, jmp_rw, 4);
2849}
2850
2851
2852static void tgen_add(TCGContext *s, TCGType type,
2853                     TCGReg a0, TCGReg a1, TCGReg a2)
2854{
2855    tcg_out32(s, ADD | TAB(a0, a1, a2));
2856}
2857
2858static void tgen_addi(TCGContext *s, TCGType type,
2859                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2860{
2861    tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
2862}
2863
2864static const TCGOutOpBinary outop_add = {
2865    .base.static_constraint = C_O1_I2(r, r, rT),
2866    .out_rrr = tgen_add,
2867    .out_rri = tgen_addi,
2868};
2869
2870static void tgen_addco_rrr(TCGContext *s, TCGType type,
2871                           TCGReg a0, TCGReg a1, TCGReg a2)
2872{
2873    tcg_out32(s, ADDC | TAB(a0, a1, a2));
2874}
2875
2876static void tgen_addco_rri(TCGContext *s, TCGType type,
2877                           TCGReg a0, TCGReg a1, tcg_target_long a2)
2878{
2879    tcg_out32(s, ADDIC | TAI(a0, a1, a2));
2880}
2881
2882static TCGConstraintSetIndex cset_addco(TCGType type, unsigned flags)
2883{
2884    /*
2885     * Note that the CA bit is defined based on the word size of the
2886     * environment.  So in 64-bit mode it's always carry-out of bit 63.
2887     * The fallback code using deposit works just as well for TCG_TYPE_I32.
2888     */
2889    return type == TCG_TYPE_REG ? C_O1_I2(r, r, rI) : C_NotImplemented;
2890}
2891
2892static const TCGOutOpBinary outop_addco = {
2893    .base.static_constraint = C_Dynamic,
2894    .base.dynamic_constraint = cset_addco,
2895    .out_rrr = tgen_addco_rrr,
2896    .out_rri = tgen_addco_rri,
2897};
2898
2899static void tgen_addcio_rrr(TCGContext *s, TCGType type,
2900                            TCGReg a0, TCGReg a1, TCGReg a2)
2901{
2902    tcg_out32(s, ADDE | TAB(a0, a1, a2));
2903}
2904
2905static void tgen_addcio_rri(TCGContext *s, TCGType type,
2906                            TCGReg a0, TCGReg a1, tcg_target_long a2)
2907{
2908    tcg_out32(s, (a2 ? ADDME : ADDZE) | RT(a0) | RA(a1));
2909}
2910
2911static TCGConstraintSetIndex cset_addcio(TCGType type, unsigned flags)
2912{
2913    return type == TCG_TYPE_REG ? C_O1_I2(r, r, rZM) : C_NotImplemented;
2914}
2915
2916static const TCGOutOpBinary outop_addcio = {
2917    .base.static_constraint = C_Dynamic,
2918    .base.dynamic_constraint = cset_addcio,
2919    .out_rrr = tgen_addcio_rrr,
2920    .out_rri = tgen_addcio_rri,
2921};
2922
2923static const TCGOutOpAddSubCarry outop_addci = {
2924    .base.static_constraint = C_Dynamic,
2925    .base.dynamic_constraint = cset_addcio,
2926    .out_rrr = tgen_addcio_rrr,
2927    .out_rri = tgen_addcio_rri,
2928};
2929
2930static void tcg_out_set_carry(TCGContext *s)
2931{
2932    tcg_out32(s, SUBFC | TAB(TCG_REG_R0, TCG_REG_R0, TCG_REG_R0));
2933}
2934
2935static void tgen_and(TCGContext *s, TCGType type,
2936                     TCGReg a0, TCGReg a1, TCGReg a2)
2937{
2938    tcg_out32(s, AND | SAB(a1, a0, a2));
2939}
2940
2941static void tgen_andi(TCGContext *s, TCGType type,
2942                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2943{
2944    if (type == TCG_TYPE_I32) {
2945        tcg_out_andi32(s, a0, a1, a2);
2946    } else {
2947        tcg_out_andi64(s, a0, a1, a2);
2948    }
2949}
2950
2951static const TCGOutOpBinary outop_and = {
2952    .base.static_constraint = C_O1_I2(r, r, ri),
2953    .out_rrr = tgen_and,
2954    .out_rri = tgen_andi,
2955};
2956
2957static void tgen_andc(TCGContext *s, TCGType type,
2958                      TCGReg a0, TCGReg a1, TCGReg a2)
2959{
2960    tcg_out32(s, ANDC | SAB(a1, a0, a2));
2961}
2962
2963static const TCGOutOpBinary outop_andc = {
2964    .base.static_constraint = C_O1_I2(r, r, r),
2965    .out_rrr = tgen_andc,
2966};
2967
2968static void tgen_clz(TCGContext *s, TCGType type,
2969                     TCGReg a0, TCGReg a1, TCGReg a2)
2970{
2971    uint32_t insn = type == TCG_TYPE_I32 ? CNTLZW : CNTLZD;
2972    tcg_out_cntxz(s, type, insn, a0, a1, a2, false);
2973}
2974
2975static void tgen_clzi(TCGContext *s, TCGType type,
2976                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2977{
2978    uint32_t insn = type == TCG_TYPE_I32 ? CNTLZW : CNTLZD;
2979    tcg_out_cntxz(s, type, insn, a0, a1, a2, true);
2980}
2981
2982static const TCGOutOpBinary outop_clz = {
2983    .base.static_constraint = C_O1_I2(r, r, rZW),
2984    .out_rrr = tgen_clz,
2985    .out_rri = tgen_clzi,
2986};
2987
2988static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
2989{
2990    uint32_t insn = type == TCG_TYPE_I32 ? CNTPOPW : CNTPOPD;
2991    tcg_out32(s, insn | SAB(a1, a0, 0));
2992}
2993
2994static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags)
2995{
2996    return have_isa_2_06 ? C_O1_I1(r, r) : C_NotImplemented;
2997}
2998
2999static const TCGOutOpUnary outop_ctpop = {
3000    .base.static_constraint = C_Dynamic,
3001    .base.dynamic_constraint = cset_ctpop,
3002    .out_rr = tgen_ctpop,
3003};
3004
3005static void tgen_ctz(TCGContext *s, TCGType type,
3006                     TCGReg a0, TCGReg a1, TCGReg a2)
3007{
3008    uint32_t insn = type == TCG_TYPE_I32 ? CNTTZW : CNTTZD;
3009    tcg_out_cntxz(s, type, insn, a0, a1, a2, false);
3010}
3011
3012static void tgen_ctzi(TCGContext *s, TCGType type,
3013                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3014{
3015    uint32_t insn = type == TCG_TYPE_I32 ? CNTTZW : CNTTZD;
3016    tcg_out_cntxz(s, type, insn, a0, a1, a2, true);
3017}
3018
3019static TCGConstraintSetIndex cset_ctz(TCGType type, unsigned flags)
3020{
3021    return have_isa_3_00 ? C_O1_I2(r, r, rZW) : C_NotImplemented;
3022}
3023
3024static const TCGOutOpBinary outop_ctz = {
3025    .base.static_constraint = C_Dynamic,
3026    .base.dynamic_constraint = cset_ctz,
3027    .out_rrr = tgen_ctz,
3028    .out_rri = tgen_ctzi,
3029};
3030
3031static void tgen_eqv(TCGContext *s, TCGType type,
3032                     TCGReg a0, TCGReg a1, TCGReg a2)
3033{
3034    tcg_out32(s, EQV | SAB(a1, a0, a2));
3035}
3036
3037#if TCG_TARGET_REG_BITS == 64
3038static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
3039{
3040    tcg_out_shri64(s, a0, a1, 32);
3041}
3042
3043static const TCGOutOpUnary outop_extrh_i64_i32 = {
3044    .base.static_constraint = C_O1_I1(r, r),
3045    .out_rr = tgen_extrh_i64_i32,
3046};
3047#endif
3048
3049static void tgen_divs(TCGContext *s, TCGType type,
3050                      TCGReg a0, TCGReg a1, TCGReg a2)
3051{
3052    uint32_t insn = type == TCG_TYPE_I32 ? DIVW : DIVD;
3053    tcg_out32(s, insn | TAB(a0, a1, a2));
3054}
3055
3056static const TCGOutOpBinary outop_divs = {
3057    .base.static_constraint = C_O1_I2(r, r, r),
3058    .out_rrr = tgen_divs,
3059};
3060
3061static const TCGOutOpDivRem outop_divs2 = {
3062    .base.static_constraint = C_NotImplemented,
3063};
3064
3065static void tgen_divu(TCGContext *s, TCGType type,
3066                      TCGReg a0, TCGReg a1, TCGReg a2)
3067{
3068    uint32_t insn = type == TCG_TYPE_I32 ? DIVWU : DIVDU;
3069    tcg_out32(s, insn | TAB(a0, a1, a2));
3070}
3071
3072static const TCGOutOpBinary outop_divu = {
3073    .base.static_constraint = C_O1_I2(r, r, r),
3074    .out_rrr = tgen_divu,
3075};
3076
3077static const TCGOutOpDivRem outop_divu2 = {
3078    .base.static_constraint = C_NotImplemented,
3079};
3080
3081static const TCGOutOpBinary outop_eqv = {
3082    .base.static_constraint = C_O1_I2(r, r, r),
3083    .out_rrr = tgen_eqv,
3084};
3085
3086static void tgen_mul(TCGContext *s, TCGType type,
3087                    TCGReg a0, TCGReg a1, TCGReg a2)
3088{
3089    uint32_t insn = type == TCG_TYPE_I32 ? MULLW : MULLD;
3090    tcg_out32(s, insn | TAB(a0, a1, a2));
3091}
3092
3093static void tgen_muli(TCGContext *s, TCGType type,
3094                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3095{
3096    tcg_out32(s, MULLI | TAI(a0, a1, a2));
3097}
3098
3099static const TCGOutOpBinary outop_mul = {
3100    .base.static_constraint = C_O1_I2(r, r, rI),
3101    .out_rrr = tgen_mul,
3102    .out_rri = tgen_muli,
3103};
3104
3105static const TCGOutOpMul2 outop_muls2 = {
3106    .base.static_constraint = C_NotImplemented,
3107};
3108
3109static void tgen_mulsh(TCGContext *s, TCGType type,
3110                       TCGReg a0, TCGReg a1, TCGReg a2)
3111{
3112    uint32_t insn = type == TCG_TYPE_I32 ? MULHW : MULHD;
3113    tcg_out32(s, insn | TAB(a0, a1, a2));
3114}
3115
3116static const TCGOutOpBinary outop_mulsh = {
3117    .base.static_constraint = C_O1_I2(r, r, r),
3118    .out_rrr = tgen_mulsh,
3119};
3120
3121static const TCGOutOpMul2 outop_mulu2 = {
3122    .base.static_constraint = C_NotImplemented,
3123};
3124
3125static void tgen_muluh(TCGContext *s, TCGType type,
3126                       TCGReg a0, TCGReg a1, TCGReg a2)
3127{
3128    uint32_t insn = type == TCG_TYPE_I32 ? MULHWU : MULHDU;
3129    tcg_out32(s, insn | TAB(a0, a1, a2));
3130}
3131
3132static const TCGOutOpBinary outop_muluh = {
3133    .base.static_constraint = C_O1_I2(r, r, r),
3134    .out_rrr = tgen_muluh,
3135};
3136
3137static void tgen_nand(TCGContext *s, TCGType type,
3138                     TCGReg a0, TCGReg a1, TCGReg a2)
3139{
3140    tcg_out32(s, NAND | SAB(a1, a0, a2));
3141}
3142
3143static const TCGOutOpBinary outop_nand = {
3144    .base.static_constraint = C_O1_I2(r, r, r),
3145    .out_rrr = tgen_nand,
3146};
3147
3148static void tgen_nor(TCGContext *s, TCGType type,
3149                     TCGReg a0, TCGReg a1, TCGReg a2)
3150{
3151    tcg_out32(s, NOR | SAB(a1, a0, a2));
3152}
3153
3154static const TCGOutOpBinary outop_nor = {
3155    .base.static_constraint = C_O1_I2(r, r, r),
3156    .out_rrr = tgen_nor,
3157};
3158
3159static void tgen_or(TCGContext *s, TCGType type,
3160                    TCGReg a0, TCGReg a1, TCGReg a2)
3161{
3162    tcg_out32(s, OR | SAB(a1, a0, a2));
3163}
3164
3165static void tgen_ori(TCGContext *s, TCGType type,
3166                     TCGReg a0, TCGReg a1, tcg_target_long a2)
3167{
3168    tcg_out_ori32(s, a0, a1, a2);
3169}
3170
3171static const TCGOutOpBinary outop_or = {
3172    .base.static_constraint = C_O1_I2(r, r, rU),
3173    .out_rrr = tgen_or,
3174    .out_rri = tgen_ori,
3175};
3176
3177static void tgen_orc(TCGContext *s, TCGType type,
3178                     TCGReg a0, TCGReg a1, TCGReg a2)
3179{
3180    tcg_out32(s, ORC | SAB(a1, a0, a2));
3181}
3182
3183static const TCGOutOpBinary outop_orc = {
3184    .base.static_constraint = C_O1_I2(r, r, r),
3185    .out_rrr = tgen_orc,
3186};
3187
3188static TCGConstraintSetIndex cset_mod(TCGType type, unsigned flags)
3189{
3190    return have_isa_3_00 ? C_O1_I2(r, r, r) : C_NotImplemented;
3191}
3192
3193static void tgen_rems(TCGContext *s, TCGType type,
3194                      TCGReg a0, TCGReg a1, TCGReg a2)
3195{
3196    uint32_t insn = type == TCG_TYPE_I32 ? MODSW : MODSD;
3197    tcg_out32(s, insn | TAB(a0, a1, a2));
3198}
3199
3200static const TCGOutOpBinary outop_rems = {
3201    .base.static_constraint = C_Dynamic,
3202    .base.dynamic_constraint = cset_mod,
3203    .out_rrr = tgen_rems,
3204};
3205
3206static void tgen_remu(TCGContext *s, TCGType type,
3207                      TCGReg a0, TCGReg a1, TCGReg a2)
3208{
3209    uint32_t insn = type == TCG_TYPE_I32 ? MODUW : MODUD;
3210    tcg_out32(s, insn | TAB(a0, a1, a2));
3211}
3212
3213static const TCGOutOpBinary outop_remu = {
3214    .base.static_constraint = C_Dynamic,
3215    .base.dynamic_constraint = cset_mod,
3216    .out_rrr = tgen_remu,
3217};
3218
3219static void tgen_rotl(TCGContext *s, TCGType type,
3220                     TCGReg a0, TCGReg a1, TCGReg a2)
3221{
3222    if (type == TCG_TYPE_I32) {
3223        tcg_out32(s, RLWNM | SAB(a1, a0, a2) | MB(0) | ME(31));
3224    } else {
3225        tcg_out32(s, RLDCL | SAB(a1, a0, a2) | MB64(0));
3226    }
3227}
3228
3229static void tgen_rotli(TCGContext *s, TCGType type,
3230                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3231{
3232    if (type == TCG_TYPE_I32) {
3233        tcg_out_rlw(s, RLWINM, a0, a1, a2, 0, 31);
3234    } else {
3235        tcg_out_rld(s, RLDICL, a0, a1, a2, 0);
3236    }
3237}
3238
3239static const TCGOutOpBinary outop_rotl = {
3240    .base.static_constraint = C_O1_I2(r, r, ri),
3241    .out_rrr = tgen_rotl,
3242    .out_rri = tgen_rotli,
3243};
3244
3245static const TCGOutOpBinary outop_rotr = {
3246    .base.static_constraint = C_NotImplemented,
3247};
3248
3249static void tgen_sar(TCGContext *s, TCGType type,
3250                     TCGReg a0, TCGReg a1, TCGReg a2)
3251{
3252    uint32_t insn = type == TCG_TYPE_I32 ? SRAW : SRAD;
3253    tcg_out32(s, insn | SAB(a1, a0, a2));
3254}
3255
3256static void tgen_sari(TCGContext *s, TCGType type,
3257                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3258{
3259    /* Limit immediate shift count lest we create an illegal insn.  */
3260    if (type == TCG_TYPE_I32) {
3261        tcg_out_sari32(s, a0, a1, a2 & 31);
3262    } else {
3263        tcg_out_sari64(s, a0, a1, a2 & 63);
3264    }
3265}
3266
3267static const TCGOutOpBinary outop_sar = {
3268    .base.static_constraint = C_O1_I2(r, r, ri),
3269    .out_rrr = tgen_sar,
3270    .out_rri = tgen_sari,
3271};
3272
3273static void tgen_shl(TCGContext *s, TCGType type,
3274                     TCGReg a0, TCGReg a1, TCGReg a2)
3275{
3276    uint32_t insn = type == TCG_TYPE_I32 ? SLW : SLD;
3277    tcg_out32(s, insn | SAB(a1, a0, a2));
3278}
3279
3280static void tgen_shli(TCGContext *s, TCGType type,
3281                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3282{
3283    /* Limit immediate shift count lest we create an illegal insn.  */
3284    if (type == TCG_TYPE_I32) {
3285        tcg_out_shli32(s, a0, a1, a2 & 31);
3286    } else {
3287        tcg_out_shli64(s, a0, a1, a2 & 63);
3288    }
3289}
3290
3291static const TCGOutOpBinary outop_shl = {
3292    .base.static_constraint = C_O1_I2(r, r, ri),
3293    .out_rrr = tgen_shl,
3294    .out_rri = tgen_shli,
3295};
3296
3297static void tgen_shr(TCGContext *s, TCGType type,
3298                     TCGReg a0, TCGReg a1, TCGReg a2)
3299{
3300    uint32_t insn = type == TCG_TYPE_I32 ? SRW : SRD;
3301    tcg_out32(s, insn | SAB(a1, a0, a2));
3302}
3303
3304static void tgen_shri(TCGContext *s, TCGType type,
3305                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3306{
3307    /* Limit immediate shift count lest we create an illegal insn.  */
3308    if (type == TCG_TYPE_I32) {
3309        tcg_out_shri32(s, a0, a1, a2 & 31);
3310    } else {
3311        tcg_out_shri64(s, a0, a1, a2 & 63);
3312    }
3313}
3314
3315static const TCGOutOpBinary outop_shr = {
3316    .base.static_constraint = C_O1_I2(r, r, ri),
3317    .out_rrr = tgen_shr,
3318    .out_rri = tgen_shri,
3319};
3320
3321static void tgen_sub(TCGContext *s, TCGType type,
3322                     TCGReg a0, TCGReg a1, TCGReg a2)
3323{
3324    tcg_out32(s, SUBF | TAB(a0, a2, a1));
3325}
3326
3327static void tgen_subfi(TCGContext *s, TCGType type,
3328                       TCGReg a0, tcg_target_long a1, TCGReg a2)
3329{
3330    tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
3331}
3332
3333static const TCGOutOpSubtract outop_sub = {
3334    .base.static_constraint = C_O1_I2(r, rI, r),
3335    .out_rrr = tgen_sub,
3336    .out_rir = tgen_subfi,
3337};
3338
3339static void tgen_subbo_rrr(TCGContext *s, TCGType type,
3340                           TCGReg a0, TCGReg a1, TCGReg a2)
3341{
3342    tcg_out32(s, SUBFC | TAB(a0, a2, a1));
3343}
3344
3345static void tgen_subbo_rri(TCGContext *s, TCGType type,
3346                           TCGReg a0, TCGReg a1, tcg_target_long a2)
3347{
3348    if (a2 == 0) {
3349        tcg_out_movi(s, type, TCG_REG_R0, 0);
3350        tgen_subbo_rrr(s, type, a0, a1, TCG_REG_R0);
3351    } else {
3352        tgen_addco_rri(s, type, a0, a1, -a2);
3353    }
3354}
3355
3356/* The underlying insn for subfi is subfic. */
3357#define tgen_subbo_rir  tgen_subfi
3358
3359static void tgen_subbo_rii(TCGContext *s, TCGType type,
3360                           TCGReg a0, tcg_target_long a1, tcg_target_long a2)
3361{
3362    tcg_out_movi(s, type, TCG_REG_R0, a2);
3363    tgen_subbo_rir(s, type, a0, a1, TCG_REG_R0);
3364}
3365
3366static TCGConstraintSetIndex cset_subbo(TCGType type, unsigned flags)
3367{
3368    /* Recall that the CA bit is defined based on the host word size. */
3369    return type == TCG_TYPE_REG ? C_O1_I2(r, rI, rN) : C_NotImplemented;
3370}
3371
3372static const TCGOutOpAddSubCarry outop_subbo = {
3373    .base.static_constraint = C_Dynamic,
3374    .base.dynamic_constraint = cset_subbo,
3375    .out_rrr = tgen_subbo_rrr,
3376    .out_rri = tgen_subbo_rri,
3377    .out_rir = tgen_subbo_rir,
3378    .out_rii = tgen_subbo_rii,
3379};
3380
3381static void tgen_subbio_rrr(TCGContext *s, TCGType type,
3382                            TCGReg a0, TCGReg a1, TCGReg a2)
3383{
3384    tcg_out32(s, SUBFE | TAB(a0, a2, a1));
3385}
3386
3387static void tgen_subbio_rri(TCGContext *s, TCGType type,
3388                            TCGReg a0, TCGReg a1, tcg_target_long a2)
3389{
3390    tgen_addcio_rri(s, type, a0, a1, ~a2);
3391}
3392
3393static void tgen_subbio_rir(TCGContext *s, TCGType type,
3394                            TCGReg a0, tcg_target_long a1, TCGReg a2)
3395{
3396    tcg_debug_assert(a1 == 0 || a1 == -1);
3397    tcg_out32(s, (a1 ? SUBFME : SUBFZE) | RT(a0) | RA(a2));
3398}
3399
3400static void tgen_subbio_rii(TCGContext *s, TCGType type,
3401                            TCGReg a0, tcg_target_long a1, tcg_target_long a2)
3402{
3403    tcg_out_movi(s, type, TCG_REG_R0, a2);
3404    tgen_subbio_rir(s, type, a0, a1, TCG_REG_R0);
3405}
3406
3407static TCGConstraintSetIndex cset_subbio(TCGType type, unsigned flags)
3408{
3409    return type == TCG_TYPE_REG ? C_O1_I2(r, rZM, rZM) : C_NotImplemented;
3410}
3411
3412static const TCGOutOpAddSubCarry outop_subbio = {
3413    .base.static_constraint = C_Dynamic,
3414    .base.dynamic_constraint = cset_subbio,
3415    .out_rrr = tgen_subbio_rrr,
3416    .out_rri = tgen_subbio_rri,
3417    .out_rir = tgen_subbio_rir,
3418    .out_rii = tgen_subbio_rii,
3419};
3420
3421#define outop_subbi  outop_subbio
3422
3423static void tcg_out_set_borrow(TCGContext *s)
3424{
3425    /* borrow = !carry */
3426    tcg_out32(s, ADDIC | TAI(TCG_REG_R0, TCG_REG_R0, 0));
3427}
3428
3429static void tgen_xor(TCGContext *s, TCGType type,
3430                     TCGReg a0, TCGReg a1, TCGReg a2)
3431{
3432    tcg_out32(s, XOR | SAB(a1, a0, a2));
3433}
3434
3435static void tgen_xori(TCGContext *s, TCGType type,
3436                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3437{
3438    tcg_out_xori32(s, a0, a1, a2);
3439}
3440
3441static const TCGOutOpBinary outop_xor = {
3442    .base.static_constraint = C_O1_I2(r, r, rU),
3443    .out_rrr = tgen_xor,
3444    .out_rri = tgen_xori,
3445};
3446
3447static void tgen_bswap16(TCGContext *s, TCGType type,
3448                         TCGReg dst, TCGReg src, unsigned flags)
3449{
3450    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
3451
3452    if (have_isa_3_10) {
3453        tcg_out32(s, BRH | RA(dst) | RS(src));
3454        if (flags & TCG_BSWAP_OS) {
3455            tcg_out_ext16s(s, TCG_TYPE_REG, dst, dst);
3456        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
3457            tcg_out_ext16u(s, dst, dst);
3458        }
3459        return;
3460    }
3461
3462    /*
3463     * In the following,
3464     *   dep(a, b, m) -> (a & ~m) | (b & m)
3465     *
3466     * Begin with:                              src = xxxxabcd
3467     */
3468    /* tmp = rol32(src, 24) & 0x000000ff            = 0000000c */
3469    tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31);
3470    /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00)    = 000000dc */
3471    tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);
3472
3473    if (flags & TCG_BSWAP_OS) {
3474        tcg_out_ext16s(s, TCG_TYPE_REG, dst, tmp);
3475    } else {
3476        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
3477    }
3478}
3479
3480static const TCGOutOpBswap outop_bswap16 = {
3481    .base.static_constraint = C_O1_I1(r, r),
3482    .out_rr = tgen_bswap16,
3483};
3484
3485static void tgen_bswap32(TCGContext *s, TCGType type,
3486                         TCGReg dst, TCGReg src, unsigned flags)
3487{
3488    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
3489
3490    if (have_isa_3_10) {
3491        tcg_out32(s, BRW | RA(dst) | RS(src));
3492        if (flags & TCG_BSWAP_OS) {
3493            tcg_out_ext32s(s, dst, dst);
3494        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
3495            tcg_out_ext32u(s, dst, dst);
3496        }
3497        return;
3498    }
3499
3500    /*
3501     * Stolen from gcc's builtin_bswap32.
3502     * In the following,
3503     *   dep(a, b, m) -> (a & ~m) | (b & m)
3504     *
3505     * Begin with:                              src = xxxxabcd
3506     */
3507    /* tmp = rol32(src, 8) & 0xffffffff             = 0000bcda */
3508    tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31);
3509    /* tmp = dep(tmp, rol32(src, 24), 0xff000000)   = 0000dcda */
3510    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7);
3511    /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00)   = 0000dcba */
3512    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23);
3513
3514    if (flags & TCG_BSWAP_OS) {
3515        tcg_out_ext32s(s, dst, tmp);
3516    } else {
3517        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
3518    }
3519}
3520
3521static const TCGOutOpBswap outop_bswap32 = {
3522    .base.static_constraint = C_O1_I1(r, r),
3523    .out_rr = tgen_bswap32,
3524};
3525
3526#if TCG_TARGET_REG_BITS == 64
3527static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
3528{
3529    TCGReg t0 = dst == src ? TCG_REG_R0 : dst;
3530    TCGReg t1 = dst == src ? dst : TCG_REG_R0;
3531
3532    if (have_isa_3_10) {
3533        tcg_out32(s, BRD | RA(dst) | RS(src));
3534        return;
3535    }
3536
3537    /*
3538     * In the following,
3539     *   dep(a, b, m) -> (a & ~m) | (b & m)
3540     *
3541     * Begin with:                              src = abcdefgh
3542     */
3543    /* t0 = rol32(src, 8) & 0xffffffff              = 0000fghe */
3544    tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31);
3545    /* t0 = dep(t0, rol32(src, 24), 0xff000000)     = 0000hghe */
3546    tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7);
3547    /* t0 = dep(t0, rol32(src, 24), 0x0000ff00)     = 0000hgfe */
3548    tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23);
3549
3550    /* t0 = rol64(t0, 32)                           = hgfe0000 */
3551    tcg_out_rld(s, RLDICL, t0, t0, 32, 0);
3552    /* t1 = rol64(src, 32)                          = efghabcd */
3553    tcg_out_rld(s, RLDICL, t1, src, 32, 0);
3554
3555    /* t0 = dep(t0, rol32(t1, 24), 0xffffffff)      = hgfebcda */
3556    tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31);
3557    /* t0 = dep(t0, rol32(t1, 24), 0xff000000)      = hgfedcda */
3558    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7);
3559    /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00)      = hgfedcba */
3560    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23);
3561
3562    tcg_out_mov(s, TCG_TYPE_REG, dst, t0);
3563}
3564
3565static const TCGOutOpUnary outop_bswap64 = {
3566    .base.static_constraint = C_O1_I1(r, r),
3567    .out_rr = tgen_bswap64,
3568};
3569#endif /* TCG_TARGET_REG_BITS == 64 */
3570
3571static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
3572{
3573    tcg_out32(s, NEG | RT(a0) | RA(a1));
3574}
3575
3576static const TCGOutOpUnary outop_neg = {
3577    .base.static_constraint = C_O1_I1(r, r),
3578    .out_rr = tgen_neg,
3579};
3580
3581static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
3582{
3583    tgen_nor(s, type, a0, a1, a1);
3584}
3585
3586static const TCGOutOpUnary outop_not = {
3587    .base.static_constraint = C_O1_I1(r, r),
3588    .out_rr = tgen_not,
3589};
3590
3591static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
3592                         TCGReg a2, unsigned ofs, unsigned len)
3593{
3594    if (type == TCG_TYPE_I32) {
3595        tcg_out_rlw(s, RLWIMI, a0, a2, ofs, 32 - ofs - len, 31 - ofs);
3596    } else {
3597        tcg_out_rld(s, RLDIMI, a0, a2, ofs, 64 - ofs - len);
3598    }
3599}
3600
3601static void tgen_depositi(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
3602                          tcg_target_long a2, unsigned ofs, unsigned len)
3603{
3604    tgen_andi(s, type, a0, a1, ~MAKE_64BIT_MASK(ofs, len));
3605}
3606
3607static const TCGOutOpDeposit outop_deposit = {
3608    .base.static_constraint = C_O1_I2(r, 0, rZ),
3609    .out_rrr = tgen_deposit,
3610    .out_rri = tgen_depositi,
3611};
3612
3613static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
3614                         unsigned ofs, unsigned len)
3615{
3616    if (ofs == 0 && len <= 16) {
3617        tgen_andi(s, TCG_TYPE_I32, a0, a1, (1 << len) - 1);
3618    } else if (type == TCG_TYPE_I32) {
3619        tcg_out_rlw(s, RLWINM, a0, a1, 32 - ofs, 32 - len, 31);
3620    } else {
3621        tcg_out_rld(s, RLDICL, a0, a1, 64 - ofs, 64 - len);
3622    }
3623}
3624
3625static const TCGOutOpExtract outop_extract = {
3626    .base.static_constraint = C_O1_I1(r, r),
3627    .out_rr = tgen_extract,
3628};
3629
3630static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
3631                          unsigned ofs, unsigned len)
3632{
3633    if (ofs == 0) {
3634        switch (len) {
3635        case 8:
3636            tcg_out_ext8s(s, type, a0, a1);
3637            return;
3638        case 16:
3639            tcg_out_ext16s(s, type, a0, a1);
3640            return;
3641        case 32:
3642            tcg_out_ext32s(s, a0, a1);
3643            return;
3644        }
3645    } else if (ofs + len == 32) {
3646        tcg_out_sari32(s, a0, a1, ofs);
3647        return;
3648    }
3649    g_assert_not_reached();
3650}
3651
3652static const TCGOutOpExtract outop_sextract = {
3653    .base.static_constraint = C_O1_I1(r, r),
3654    .out_rr = tgen_sextract,
3655};
3656
3657static const TCGOutOpExtract2 outop_extract2 = {
3658    .base.static_constraint = C_NotImplemented,
3659};
3660
3661
3662static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
3663                       const TCGArg args[TCG_MAX_OP_ARGS],
3664                       const int const_args[TCG_MAX_OP_ARGS])
3665{
3666    switch (opc) {
3667    case INDEX_op_goto_ptr:
3668        tcg_out32(s, MTSPR | RS(args[0]) | CTR);
3669        tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
3670        tcg_out32(s, BCCTR | BO_ALWAYS);
3671        break;
3672    case INDEX_op_br:
3673        {
3674            TCGLabel *l = arg_label(args[0]);
3675            uint32_t insn = B;
3676
3677            if (l->has_value) {
3678                insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr),
3679                                       l->u.value_ptr);
3680            } else {
3681                tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
3682            }
3683            tcg_out32(s, insn);
3684        }
3685        break;
3686    case INDEX_op_ld8u_i32:
3687    case INDEX_op_ld8u_i64:
3688        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
3689        break;
3690    case INDEX_op_ld8s_i32:
3691    case INDEX_op_ld8s_i64:
3692        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
3693        tcg_out_ext8s(s, TCG_TYPE_REG, args[0], args[0]);
3694        break;
3695    case INDEX_op_ld16u_i32:
3696    case INDEX_op_ld16u_i64:
3697        tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
3698        break;
3699    case INDEX_op_ld16s_i32:
3700    case INDEX_op_ld16s_i64:
3701        tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
3702        break;
3703    case INDEX_op_ld_i32:
3704    case INDEX_op_ld32u_i64:
3705        tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
3706        break;
3707    case INDEX_op_ld32s_i64:
3708        tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
3709        break;
3710    case INDEX_op_ld_i64:
3711        tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
3712        break;
3713    case INDEX_op_st8_i32:
3714    case INDEX_op_st8_i64:
3715        tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
3716        break;
3717    case INDEX_op_st16_i32:
3718    case INDEX_op_st16_i64:
3719        tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
3720        break;
3721    case INDEX_op_st_i32:
3722    case INDEX_op_st32_i64:
3723        tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
3724        break;
3725    case INDEX_op_st_i64:
3726        tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
3727        break;
3728
3729    case INDEX_op_qemu_ld_i32:
3730        tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I32);
3731        break;
3732    case INDEX_op_qemu_ld_i64:
3733        if (TCG_TARGET_REG_BITS == 64) {
3734            tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I64);
3735        } else {
3736            tcg_out_qemu_ld(s, args[0], args[1], args[2],
3737                            args[3], TCG_TYPE_I64);
3738        }
3739        break;
3740    case INDEX_op_qemu_ld_i128:
3741        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
3742        tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
3743        break;
3744
3745    case INDEX_op_qemu_st_i32:
3746        tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I32);
3747        break;
3748    case INDEX_op_qemu_st_i64:
3749        if (TCG_TARGET_REG_BITS == 64) {
3750            tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I64);
3751        } else {
3752            tcg_out_qemu_st(s, args[0], args[1], args[2],
3753                            args[3], TCG_TYPE_I64);
3754        }
3755        break;
3756    case INDEX_op_qemu_st_i128:
3757        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
3758        tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
3759        break;
3760
3761    case INDEX_op_mb:
3762        tcg_out_mb(s, args[0]);
3763        break;
3764
3765    case INDEX_op_call:      /* Always emitted via tcg_out_call.  */
3766    case INDEX_op_exit_tb:   /* Always emitted via tcg_out_exit_tb.  */
3767    case INDEX_op_goto_tb:   /* Always emitted via tcg_out_goto_tb.  */
3768    default:
3769        g_assert_not_reached();
3770    }
3771}
3772
3773int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3774{
3775    switch (opc) {
3776    case INDEX_op_and_vec:
3777    case INDEX_op_or_vec:
3778    case INDEX_op_xor_vec:
3779    case INDEX_op_andc_vec:
3780    case INDEX_op_not_vec:
3781    case INDEX_op_nor_vec:
3782    case INDEX_op_eqv_vec:
3783    case INDEX_op_nand_vec:
3784        return 1;
3785    case INDEX_op_orc_vec:
3786        return have_isa_2_07;
3787    case INDEX_op_add_vec:
3788    case INDEX_op_sub_vec:
3789    case INDEX_op_smax_vec:
3790    case INDEX_op_smin_vec:
3791    case INDEX_op_umax_vec:
3792    case INDEX_op_umin_vec:
3793    case INDEX_op_shlv_vec:
3794    case INDEX_op_shrv_vec:
3795    case INDEX_op_sarv_vec:
3796    case INDEX_op_rotlv_vec:
3797        return vece <= MO_32 || have_isa_2_07;
3798    case INDEX_op_ssadd_vec:
3799    case INDEX_op_sssub_vec:
3800    case INDEX_op_usadd_vec:
3801    case INDEX_op_ussub_vec:
3802        return vece <= MO_32;
3803    case INDEX_op_shli_vec:
3804    case INDEX_op_shri_vec:
3805    case INDEX_op_sari_vec:
3806    case INDEX_op_rotli_vec:
3807        return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
3808    case INDEX_op_cmp_vec:
3809    case INDEX_op_cmpsel_vec:
3810        return vece <= MO_32 || have_isa_2_07 ? 1 : 0;
3811    case INDEX_op_neg_vec:
3812        return vece >= MO_32 && have_isa_3_00;
3813    case INDEX_op_mul_vec:
3814        switch (vece) {
3815        case MO_8:
3816        case MO_16:
3817            return -1;
3818        case MO_32:
3819            return have_isa_2_07 ? 1 : -1;
3820        case MO_64:
3821            return have_isa_3_10;
3822        }
3823        return 0;
3824    case INDEX_op_bitsel_vec:
3825        return have_vsx;
3826    case INDEX_op_rotrv_vec:
3827        return -1;
3828    default:
3829        return 0;
3830    }
3831}
3832
3833static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
3834                            TCGReg dst, TCGReg src)
3835{
3836    tcg_debug_assert(dst >= TCG_REG_V0);
3837
3838    /* Splat from integer reg allowed via constraints for v3.00.  */
3839    if (src < TCG_REG_V0) {
3840        tcg_debug_assert(have_isa_3_00);
3841        switch (vece) {
3842        case MO_64:
3843            tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
3844            return true;
3845        case MO_32:
3846            tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
3847            return true;
3848        default:
3849            /* Fail, so that we fall back on either dupm or mov+dup.  */
3850            return false;
3851        }
3852    }
3853
3854    /*
3855     * Recall we use (or emulate) VSX integer loads, so the integer is
3856     * right justified within the left (zero-index) double-word.
3857     */
3858    switch (vece) {
3859    case MO_8:
3860        tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
3861        break;
3862    case MO_16:
3863        tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
3864        break;
3865    case MO_32:
3866        tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
3867        break;
3868    case MO_64:
3869        if (have_vsx) {
3870            tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
3871            break;
3872        }
3873        tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
3874        tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
3875        break;
3876    default:
3877        g_assert_not_reached();
3878    }
3879    return true;
3880}
3881
3882static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
3883                             TCGReg out, TCGReg base, intptr_t offset)
3884{
3885    int elt;
3886
3887    tcg_debug_assert(out >= TCG_REG_V0);
3888    switch (vece) {
3889    case MO_8:
3890        if (have_isa_3_00) {
3891            tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
3892        } else {
3893            tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
3894        }
3895        elt = extract32(offset, 0, 4);
3896#if !HOST_BIG_ENDIAN
3897        elt ^= 15;
3898#endif
3899        tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
3900        break;
3901    case MO_16:
3902        tcg_debug_assert((offset & 1) == 0);
3903        if (have_isa_3_00) {
3904            tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
3905        } else {
3906            tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
3907        }
3908        elt = extract32(offset, 1, 3);
3909#if !HOST_BIG_ENDIAN
3910        elt ^= 7;
3911#endif
3912        tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
3913        break;
3914    case MO_32:
3915        if (have_isa_3_00) {
3916            tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
3917            break;
3918        }
3919        tcg_debug_assert((offset & 3) == 0);
3920        tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
3921        elt = extract32(offset, 2, 2);
3922#if !HOST_BIG_ENDIAN
3923        elt ^= 3;
3924#endif
3925        tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
3926        break;
3927    case MO_64:
3928        if (have_vsx) {
3929            tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
3930            break;
3931        }
3932        tcg_debug_assert((offset & 7) == 0);
3933        tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
3934        tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
3935        elt = extract32(offset, 3, 1);
3936#if !HOST_BIG_ENDIAN
3937        elt = !elt;
3938#endif
3939        if (elt) {
3940            tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
3941        } else {
3942            tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
3943        }
3944        break;
3945    default:
3946        g_assert_not_reached();
3947    }
3948    return true;
3949}
3950
3951static void tcg_out_not_vec(TCGContext *s, TCGReg a0, TCGReg a1)
3952{
3953    tcg_out32(s, VNOR | VRT(a0) | VRA(a1) | VRB(a1));
3954}
3955
3956static void tcg_out_or_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
3957{
3958    tcg_out32(s, VOR | VRT(a0) | VRA(a1) | VRB(a2));
3959}
3960
3961static void tcg_out_orc_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
3962{
3963    tcg_out32(s, VORC | VRT(a0) | VRA(a1) | VRB(a2));
3964}
3965
3966static void tcg_out_and_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
3967{
3968    tcg_out32(s, VAND | VRT(a0) | VRA(a1) | VRB(a2));
3969}
3970
3971static void tcg_out_andc_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
3972{
3973    tcg_out32(s, VANDC | VRT(a0) | VRA(a1) | VRB(a2));
3974}
3975
3976static void tcg_out_bitsel_vec(TCGContext *s, TCGReg d,
3977                               TCGReg c, TCGReg t, TCGReg f)
3978{
3979    if (TCG_TARGET_HAS_bitsel_vec) {
3980        tcg_out32(s, XXSEL | VRT(d) | VRC(c) | VRB(t) | VRA(f));
3981    } else {
3982        tcg_out_and_vec(s, TCG_VEC_TMP2, t, c);
3983        tcg_out_andc_vec(s, d, f, c);
3984        tcg_out_or_vec(s, d, d, TCG_VEC_TMP2);
3985    }
3986}
3987
3988static bool tcg_out_cmp_vec_noinv(TCGContext *s, unsigned vece, TCGReg a0,
3989                                  TCGReg a1, TCGReg a2, TCGCond cond)
3990{
3991    static const uint32_t
3992        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
3993        ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
3994        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
3995        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD };
3996    uint32_t insn;
3997
3998    bool need_swap = false, need_inv = false;
3999
4000    tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
4001
4002    switch (cond) {
4003    case TCG_COND_EQ:
4004    case TCG_COND_GT:
4005    case TCG_COND_GTU:
4006        break;
4007    case TCG_COND_NE:
4008        if (have_isa_3_00 && vece <= MO_32) {
4009            break;
4010        }
4011        /* fall through */
4012    case TCG_COND_LE:
4013    case TCG_COND_LEU:
4014        need_inv = true;
4015        break;
4016    case TCG_COND_LT:
4017    case TCG_COND_LTU:
4018        need_swap = true;
4019        break;
4020    case TCG_COND_GE:
4021    case TCG_COND_GEU:
4022        need_swap = need_inv = true;
4023        break;
4024    default:
4025        g_assert_not_reached();
4026    }
4027
4028    if (need_inv) {
4029        cond = tcg_invert_cond(cond);
4030    }
4031    if (need_swap) {
4032        TCGReg swap = a1;
4033        a1 = a2;
4034        a2 = swap;
4035        cond = tcg_swap_cond(cond);
4036    }
4037
4038    switch (cond) {
4039    case TCG_COND_EQ:
4040        insn = eq_op[vece];
4041        break;
4042    case TCG_COND_NE:
4043        insn = ne_op[vece];
4044        break;
4045    case TCG_COND_GT:
4046        insn = gts_op[vece];
4047        break;
4048    case TCG_COND_GTU:
4049        insn = gtu_op[vece];
4050        break;
4051    default:
4052        g_assert_not_reached();
4053    }
4054    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
4055
4056    return need_inv;
4057}
4058
4059static void tcg_out_cmp_vec(TCGContext *s, unsigned vece, TCGReg a0,
4060                            TCGReg a1, TCGReg a2, TCGCond cond)
4061{
4062    if (tcg_out_cmp_vec_noinv(s, vece, a0, a1, a2, cond)) {
4063        tcg_out_not_vec(s, a0, a0);
4064    }
4065}
4066
4067static void tcg_out_cmpsel_vec(TCGContext *s, unsigned vece, TCGReg a0,
4068                               TCGReg c1, TCGReg c2, TCGArg v3, int const_v3,
4069                               TCGReg v4, TCGCond cond)
4070{
4071    bool inv = tcg_out_cmp_vec_noinv(s, vece, TCG_VEC_TMP1, c1, c2, cond);
4072
4073    if (!const_v3) {
4074        if (inv) {
4075            tcg_out_bitsel_vec(s, a0, TCG_VEC_TMP1, v4, v3);
4076        } else {
4077            tcg_out_bitsel_vec(s, a0, TCG_VEC_TMP1, v3, v4);
4078        }
4079    } else if (v3) {
4080        if (inv) {
4081            tcg_out_orc_vec(s, a0, v4, TCG_VEC_TMP1);
4082        } else {
4083            tcg_out_or_vec(s, a0, v4, TCG_VEC_TMP1);
4084        }
4085    } else {
4086        if (inv) {
4087            tcg_out_and_vec(s, a0, v4, TCG_VEC_TMP1);
4088        } else {
4089            tcg_out_andc_vec(s, a0, v4, TCG_VEC_TMP1);
4090        }
4091    }
4092}
4093
4094static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
4095                           unsigned vecl, unsigned vece,
4096                           const TCGArg args[TCG_MAX_OP_ARGS],
4097                           const int const_args[TCG_MAX_OP_ARGS])
4098{
4099    static const uint32_t
4100        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
4101        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
4102        mul_op[4] = { 0, 0, VMULUWM, VMULLD },
4103        neg_op[4] = { 0, 0, VNEGW, VNEGD },
4104        ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
4105        usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
4106        sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
4107        ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
4108        umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
4109        smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
4110        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
4111        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
4112        shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
4113        shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
4114        sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
4115        mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
4116        mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
4117        muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
4118        mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
4119        pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
4120        rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
4121
4122    TCGType type = vecl + TCG_TYPE_V64;
4123    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
4124    uint32_t insn;
4125
4126    switch (opc) {
4127    case INDEX_op_ld_vec:
4128        tcg_out_ld(s, type, a0, a1, a2);
4129        return;
4130    case INDEX_op_st_vec:
4131        tcg_out_st(s, type, a0, a1, a2);
4132        return;
4133    case INDEX_op_dupm_vec:
4134        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
4135        return;
4136
4137    case INDEX_op_add_vec:
4138        insn = add_op[vece];
4139        break;
4140    case INDEX_op_sub_vec:
4141        insn = sub_op[vece];
4142        break;
4143    case INDEX_op_neg_vec:
4144        insn = neg_op[vece];
4145        a2 = a1;
4146        a1 = 0;
4147        break;
4148    case INDEX_op_mul_vec:
4149        insn = mul_op[vece];
4150        break;
4151    case INDEX_op_ssadd_vec:
4152        insn = ssadd_op[vece];
4153        break;
4154    case INDEX_op_sssub_vec:
4155        insn = sssub_op[vece];
4156        break;
4157    case INDEX_op_usadd_vec:
4158        insn = usadd_op[vece];
4159        break;
4160    case INDEX_op_ussub_vec:
4161        insn = ussub_op[vece];
4162        break;
4163    case INDEX_op_smin_vec:
4164        insn = smin_op[vece];
4165        break;
4166    case INDEX_op_umin_vec:
4167        insn = umin_op[vece];
4168        break;
4169    case INDEX_op_smax_vec:
4170        insn = smax_op[vece];
4171        break;
4172    case INDEX_op_umax_vec:
4173        insn = umax_op[vece];
4174        break;
4175    case INDEX_op_shlv_vec:
4176        insn = shlv_op[vece];
4177        break;
4178    case INDEX_op_shrv_vec:
4179        insn = shrv_op[vece];
4180        break;
4181    case INDEX_op_sarv_vec:
4182        insn = sarv_op[vece];
4183        break;
4184    case INDEX_op_and_vec:
4185        tcg_out_and_vec(s, a0, a1, a2);
4186        return;
4187    case INDEX_op_or_vec:
4188        tcg_out_or_vec(s, a0, a1, a2);
4189        return;
4190    case INDEX_op_xor_vec:
4191        insn = VXOR;
4192        break;
4193    case INDEX_op_andc_vec:
4194        tcg_out_andc_vec(s, a0, a1, a2);
4195        return;
4196    case INDEX_op_not_vec:
4197        tcg_out_not_vec(s, a0, a1);
4198        return;
4199    case INDEX_op_orc_vec:
4200        tcg_out_orc_vec(s, a0, a1, a2);
4201        return;
4202    case INDEX_op_nand_vec:
4203        insn = VNAND;
4204        break;
4205    case INDEX_op_nor_vec:
4206        insn = VNOR;
4207        break;
4208    case INDEX_op_eqv_vec:
4209        insn = VEQV;
4210        break;
4211
4212    case INDEX_op_cmp_vec:
4213        tcg_out_cmp_vec(s, vece, a0, a1, a2, args[3]);
4214        return;
4215    case INDEX_op_cmpsel_vec:
4216        tcg_out_cmpsel_vec(s, vece, a0, a1, a2,
4217                           args[3], const_args[3], args[4], args[5]);
4218        return;
4219    case INDEX_op_bitsel_vec:
4220        tcg_out_bitsel_vec(s, a0, a1, a2, args[3]);
4221        return;
4222
4223    case INDEX_op_dup2_vec:
4224        assert(TCG_TARGET_REG_BITS == 32);
4225        /* With inputs a1 = xLxx, a2 = xHxx  */
4226        tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1));  /* a0  = xxHL */
4227        tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8);          /* tmp = HLxx */
4228        tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8);          /* a0  = HLHL */
4229        return;
4230
4231    case INDEX_op_ppc_mrgh_vec:
4232        insn = mrgh_op[vece];
4233        break;
4234    case INDEX_op_ppc_mrgl_vec:
4235        insn = mrgl_op[vece];
4236        break;
4237    case INDEX_op_ppc_muleu_vec:
4238        insn = muleu_op[vece];
4239        break;
4240    case INDEX_op_ppc_mulou_vec:
4241        insn = mulou_op[vece];
4242        break;
4243    case INDEX_op_ppc_pkum_vec:
4244        insn = pkum_op[vece];
4245        break;
4246    case INDEX_op_rotlv_vec:
4247        insn = rotl_op[vece];
4248        break;
4249    case INDEX_op_ppc_msum_vec:
4250        tcg_debug_assert(vece == MO_16);
4251        tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
4252        return;
4253
4254    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
4255    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
4256    default:
4257        g_assert_not_reached();
4258    }
4259
4260    tcg_debug_assert(insn != 0);
4261    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
4262}
4263
4264static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
4265                           TCGv_vec v1, TCGArg imm, TCGOpcode opci)
4266{
4267    TCGv_vec t1;
4268
4269    if (vece == MO_32) {
4270        /*
4271         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
4272         * So using negative numbers gets us the 4th bit easily.
4273         */
4274        imm = sextract32(imm, 0, 5);
4275    } else {
4276        imm &= (8 << vece) - 1;
4277    }
4278
4279    /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */
4280    t1 = tcg_constant_vec(type, MO_8, imm);
4281    vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
4282              tcgv_vec_arg(v1), tcgv_vec_arg(t1));
4283}
4284
4285static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
4286                           TCGv_vec v1, TCGv_vec v2)
4287{
4288    TCGv_vec t1 = tcg_temp_new_vec(type);
4289    TCGv_vec t2 = tcg_temp_new_vec(type);
4290    TCGv_vec c0, c16;
4291
4292    switch (vece) {
4293    case MO_8:
4294    case MO_16:
4295        vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
4296                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
4297        vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
4298                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
4299        vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
4300                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
4301        vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
4302                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
4303        vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
4304                  tcgv_vec_arg(v0), tcgv_vec_arg(t1));
4305        break;
4306
4307    case MO_32:
4308        tcg_debug_assert(!have_isa_2_07);
4309        /*
4310         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
4311         * So using -16 is a quick way to represent 16.
4312         */
4313        c16 = tcg_constant_vec(type, MO_8, -16);
4314        c0 = tcg_constant_vec(type, MO_8, 0);
4315
4316        vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
4317                  tcgv_vec_arg(v2), tcgv_vec_arg(c16));
4318        vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
4319                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
4320        vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1),
4321                  tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0));
4322        vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1),
4323                  tcgv_vec_arg(t1), tcgv_vec_arg(c16));
4324        tcg_gen_add_vec(MO_32, v0, t1, t2);
4325        break;
4326
4327    default:
4328        g_assert_not_reached();
4329    }
4330    tcg_temp_free_vec(t1);
4331    tcg_temp_free_vec(t2);
4332}
4333
4334void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
4335                       TCGArg a0, ...)
4336{
4337    va_list va;
4338    TCGv_vec v0, v1, v2, t0;
4339    TCGArg a2;
4340
4341    va_start(va, a0);
4342    v0 = temp_tcgv_vec(arg_temp(a0));
4343    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
4344    a2 = va_arg(va, TCGArg);
4345
4346    switch (opc) {
4347    case INDEX_op_shli_vec:
4348        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
4349        break;
4350    case INDEX_op_shri_vec:
4351        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
4352        break;
4353    case INDEX_op_sari_vec:
4354        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
4355        break;
4356    case INDEX_op_rotli_vec:
4357        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
4358        break;
4359    case INDEX_op_mul_vec:
4360        v2 = temp_tcgv_vec(arg_temp(a2));
4361        expand_vec_mul(type, vece, v0, v1, v2);
4362        break;
4363    case INDEX_op_rotlv_vec:
4364        v2 = temp_tcgv_vec(arg_temp(a2));
4365        t0 = tcg_temp_new_vec(type);
4366        tcg_gen_neg_vec(vece, t0, v2);
4367        tcg_gen_rotlv_vec(vece, v0, v1, t0);
4368        tcg_temp_free_vec(t0);
4369        break;
4370    default:
4371        g_assert_not_reached();
4372    }
4373    va_end(va);
4374}
4375
4376static TCGConstraintSetIndex
4377tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
4378{
4379    switch (op) {
4380    case INDEX_op_goto_ptr:
4381        return C_O0_I1(r);
4382
4383    case INDEX_op_ld8u_i32:
4384    case INDEX_op_ld8s_i32:
4385    case INDEX_op_ld16u_i32:
4386    case INDEX_op_ld16s_i32:
4387    case INDEX_op_ld_i32:
4388    case INDEX_op_ld8u_i64:
4389    case INDEX_op_ld8s_i64:
4390    case INDEX_op_ld16u_i64:
4391    case INDEX_op_ld16s_i64:
4392    case INDEX_op_ld32u_i64:
4393    case INDEX_op_ld32s_i64:
4394    case INDEX_op_ld_i64:
4395        return C_O1_I1(r, r);
4396
4397    case INDEX_op_st8_i32:
4398    case INDEX_op_st16_i32:
4399    case INDEX_op_st_i32:
4400    case INDEX_op_st8_i64:
4401    case INDEX_op_st16_i64:
4402    case INDEX_op_st32_i64:
4403    case INDEX_op_st_i64:
4404        return C_O0_I2(r, r);
4405
4406    case INDEX_op_qemu_ld_i32:
4407        return C_O1_I1(r, r);
4408    case INDEX_op_qemu_ld_i64:
4409        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
4410
4411    case INDEX_op_qemu_st_i32:
4412        return C_O0_I2(r, r);
4413    case INDEX_op_qemu_st_i64:
4414        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
4415
4416    case INDEX_op_qemu_ld_i128:
4417        return C_N1O1_I1(o, m, r);
4418    case INDEX_op_qemu_st_i128:
4419        return C_O0_I3(o, m, r);
4420
4421    case INDEX_op_add_vec:
4422    case INDEX_op_sub_vec:
4423    case INDEX_op_mul_vec:
4424    case INDEX_op_and_vec:
4425    case INDEX_op_or_vec:
4426    case INDEX_op_xor_vec:
4427    case INDEX_op_andc_vec:
4428    case INDEX_op_orc_vec:
4429    case INDEX_op_nor_vec:
4430    case INDEX_op_eqv_vec:
4431    case INDEX_op_nand_vec:
4432    case INDEX_op_cmp_vec:
4433    case INDEX_op_ssadd_vec:
4434    case INDEX_op_sssub_vec:
4435    case INDEX_op_usadd_vec:
4436    case INDEX_op_ussub_vec:
4437    case INDEX_op_smax_vec:
4438    case INDEX_op_smin_vec:
4439    case INDEX_op_umax_vec:
4440    case INDEX_op_umin_vec:
4441    case INDEX_op_shlv_vec:
4442    case INDEX_op_shrv_vec:
4443    case INDEX_op_sarv_vec:
4444    case INDEX_op_rotlv_vec:
4445    case INDEX_op_rotrv_vec:
4446    case INDEX_op_ppc_mrgh_vec:
4447    case INDEX_op_ppc_mrgl_vec:
4448    case INDEX_op_ppc_muleu_vec:
4449    case INDEX_op_ppc_mulou_vec:
4450    case INDEX_op_ppc_pkum_vec:
4451    case INDEX_op_dup2_vec:
4452        return C_O1_I2(v, v, v);
4453
4454    case INDEX_op_not_vec:
4455    case INDEX_op_neg_vec:
4456        return C_O1_I1(v, v);
4457
4458    case INDEX_op_dup_vec:
4459        return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v);
4460
4461    case INDEX_op_ld_vec:
4462    case INDEX_op_dupm_vec:
4463        return C_O1_I1(v, r);
4464
4465    case INDEX_op_st_vec:
4466        return C_O0_I2(v, r);
4467
4468    case INDEX_op_bitsel_vec:
4469    case INDEX_op_ppc_msum_vec:
4470        return C_O1_I3(v, v, v, v);
4471    case INDEX_op_cmpsel_vec:
4472        return C_O1_I4(v, v, v, vZM, v);
4473
4474    default:
4475        return C_NotImplemented;
4476    }
4477}
4478
4479static void tcg_target_init(TCGContext *s)
4480{
4481    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
4482    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
4483    if (have_altivec) {
4484        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
4485        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
4486    }
4487
4488    tcg_target_call_clobber_regs = 0;
4489    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
4490    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
4491    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
4492    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
4493    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
4494    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
4495    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7);
4496    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
4497    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
4498    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
4499    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
4500    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
4501
4502    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
4503    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
4504    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
4505    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
4506    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
4507    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
4508    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
4509    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
4510    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
4511    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
4512    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
4513    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
4514    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
4515    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
4516    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
4517    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
4518    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
4519    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
4520    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
4521    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
4522
4523    s->reserved_regs = 0;
4524    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
4525    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
4526#if defined(_CALL_SYSV)
4527    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
4528#endif
4529#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
4530    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
4531#endif
4532    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
4533    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
4534    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
4535    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
4536    if (USE_REG_TB) {
4537        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);  /* tb->tc_ptr */
4538    }
4539}
4540
4541#ifdef __ELF__
4542typedef struct {
4543    DebugFrameCIE cie;
4544    DebugFrameFDEHeader fde;
4545    uint8_t fde_def_cfa[4];
4546    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
4547} DebugFrame;
4548
4549/* We're expecting a 2 byte uleb128 encoded value.  */
4550QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
4551
4552#if TCG_TARGET_REG_BITS == 64
4553# define ELF_HOST_MACHINE EM_PPC64
4554#else
4555# define ELF_HOST_MACHINE EM_PPC
4556#endif
4557
4558static DebugFrame debug_frame = {
4559    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
4560    .cie.id = -1,
4561    .cie.version = 1,
4562    .cie.code_align = 1,
4563    .cie.data_align = (-SZR & 0x7f),         /* sleb128 -SZR */
4564    .cie.return_column = 65,
4565
4566    /* Total FDE size does not include the "len" member.  */
4567    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
4568
4569    .fde_def_cfa = {
4570        12, TCG_REG_R1,                 /* DW_CFA_def_cfa r1, ... */
4571        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
4572        (FRAME_SIZE >> 7)
4573    },
4574    .fde_reg_ofs = {
4575        /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
4576        0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
4577    }
4578};
4579
4580void tcg_register_jit(const void *buf, size_t buf_size)
4581{
4582    uint8_t *p = &debug_frame.fde_reg_ofs[3];
4583    int i;
4584
4585    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
4586        p[0] = 0x80 + tcg_target_callee_save_regs[i];
4587        p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
4588    }
4589
4590    debug_frame.fde.func_start = (uintptr_t)buf;
4591    debug_frame.fde.func_len = buf_size;
4592
4593    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
4594}
4595#endif /* __ELF__ */
4596#undef VMULEUB
4597#undef VMULEUH
4598#undef VMULEUW
4599#undef VMULOUB
4600#undef VMULOUH
4601#undef VMULOUW
4602#undef VMSUMUHM
4603