xref: /openbmc/qemu/tcg/ppc/tcg-target.c.inc (revision 560375cff3ccedabf1fe5ca1bc7a31b13fdc68e5)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "elf.h"
26
27/*
28 * Standardize on the _CALL_FOO symbols used by GCC:
29 * Apple XCode does not define _CALL_DARWIN.
30 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV or _CALL_AIX.
31 */
32#if TCG_TARGET_REG_BITS == 64
33# ifdef _CALL_AIX
34    /* ok */
35# elif defined(_CALL_ELF) && _CALL_ELF == 1
36#  define _CALL_AIX
37# elif defined(_CALL_ELF) && _CALL_ELF == 2
38    /* ok */
39# else
40#  error "Unknown ABI"
41# endif
42#else
43# if defined(_CALL_SYSV) || defined(_CALL_DARWIN)
44    /* ok */
45# elif defined(__APPLE__)
46#  define _CALL_DARWIN
47# elif defined(__ELF__)
48#  define _CALL_SYSV
49# else
50#  error "Unknown ABI"
51# endif
52#endif
53
54#if TCG_TARGET_REG_BITS == 64
55# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_EXTEND
56# define TCG_TARGET_CALL_RET_I128  TCG_CALL_RET_NORMAL
57#else
58# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_NORMAL
59# define TCG_TARGET_CALL_RET_I128  TCG_CALL_RET_BY_REF
60#endif
61#ifdef _CALL_SYSV
62# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_EVEN
63# define TCG_TARGET_CALL_ARG_I128  TCG_CALL_ARG_BY_REF
64#else
65# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_NORMAL
66# define TCG_TARGET_CALL_ARG_I128  TCG_CALL_ARG_NORMAL
67#endif
68
69/* For some memory operations, we need a scratch that isn't R0.  For the AIX
70   calling convention, we can re-use the TOC register since we'll be reloading
71   it at every call.  Otherwise R12 will do nicely as neither a call-saved
72   register nor a parameter register.  */
73#ifdef _CALL_AIX
74# define TCG_REG_TMP1   TCG_REG_R2
75#else
76# define TCG_REG_TMP1   TCG_REG_R12
77#endif
78#define TCG_REG_TMP2    TCG_REG_R11
79
80#define TCG_VEC_TMP1    TCG_REG_V0
81#define TCG_VEC_TMP2    TCG_REG_V1
82
83#define TCG_REG_TB     TCG_REG_R31
84#define USE_REG_TB     (TCG_TARGET_REG_BITS == 64 && !have_isa_3_00)
85
86/* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */
87#define SZP  ((int)sizeof(void *))
88
89/* Shorthand for size of a register.  */
90#define SZR  (TCG_TARGET_REG_BITS / 8)
91
92#define TCG_CT_CONST_S16     0x00100
93#define TCG_CT_CONST_U16     0x00200
94#define TCG_CT_CONST_N16     0x00400
95#define TCG_CT_CONST_S32     0x00800
96#define TCG_CT_CONST_U32     0x01000
97#define TCG_CT_CONST_ZERO    0x02000
98#define TCG_CT_CONST_MONE    0x04000
99#define TCG_CT_CONST_WSZ     0x08000
100#define TCG_CT_CONST_CMP     0x10000
101
102#define ALL_GENERAL_REGS  0xffffffffu
103#define ALL_VECTOR_REGS   0xffffffff00000000ull
104
105#ifndef R_PPC64_PCREL34
106#define R_PPC64_PCREL34  132
107#endif
108
109#define have_isel  (cpuinfo & CPUINFO_ISEL)
110
111#define TCG_GUEST_BASE_REG  TCG_REG_R30
112
113#ifdef CONFIG_DEBUG_TCG
114static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
115    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
116    "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
117    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
118    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
119    "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
120    "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
121    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
122    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
123};
124#endif
125
126static const int tcg_target_reg_alloc_order[] = {
127    TCG_REG_R14,  /* call saved registers */
128    TCG_REG_R15,
129    TCG_REG_R16,
130    TCG_REG_R17,
131    TCG_REG_R18,
132    TCG_REG_R19,
133    TCG_REG_R20,
134    TCG_REG_R21,
135    TCG_REG_R22,
136    TCG_REG_R23,
137    TCG_REG_R24,
138    TCG_REG_R25,
139    TCG_REG_R26,
140    TCG_REG_R27,
141    TCG_REG_R28,
142    TCG_REG_R29,
143    TCG_REG_R30,
144    TCG_REG_R31,
145    TCG_REG_R12,  /* call clobbered, non-arguments */
146    TCG_REG_R11,
147    TCG_REG_R2,
148    TCG_REG_R13,
149    TCG_REG_R10,  /* call clobbered, arguments */
150    TCG_REG_R9,
151    TCG_REG_R8,
152    TCG_REG_R7,
153    TCG_REG_R6,
154    TCG_REG_R5,
155    TCG_REG_R4,
156    TCG_REG_R3,
157
158    /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
159    TCG_REG_V2,   /* call clobbered, vectors */
160    TCG_REG_V3,
161    TCG_REG_V4,
162    TCG_REG_V5,
163    TCG_REG_V6,
164    TCG_REG_V7,
165    TCG_REG_V8,
166    TCG_REG_V9,
167    TCG_REG_V10,
168    TCG_REG_V11,
169    TCG_REG_V12,
170    TCG_REG_V13,
171    TCG_REG_V14,
172    TCG_REG_V15,
173    TCG_REG_V16,
174    TCG_REG_V17,
175    TCG_REG_V18,
176    TCG_REG_V19,
177};
178
179static const int tcg_target_call_iarg_regs[] = {
180    TCG_REG_R3,
181    TCG_REG_R4,
182    TCG_REG_R5,
183    TCG_REG_R6,
184    TCG_REG_R7,
185    TCG_REG_R8,
186    TCG_REG_R9,
187    TCG_REG_R10
188};
189
190static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
191{
192    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
193    tcg_debug_assert(slot >= 0 && slot <= 1);
194    return TCG_REG_R3 + slot;
195}
196
197static const int tcg_target_callee_save_regs[] = {
198#ifdef _CALL_DARWIN
199    TCG_REG_R11,
200#endif
201    TCG_REG_R14,
202    TCG_REG_R15,
203    TCG_REG_R16,
204    TCG_REG_R17,
205    TCG_REG_R18,
206    TCG_REG_R19,
207    TCG_REG_R20,
208    TCG_REG_R21,
209    TCG_REG_R22,
210    TCG_REG_R23,
211    TCG_REG_R24,
212    TCG_REG_R25,
213    TCG_REG_R26,
214    TCG_REG_R27, /* currently used for the global env */
215    TCG_REG_R28,
216    TCG_REG_R29,
217    TCG_REG_R30,
218    TCG_REG_R31
219};
220
221/* For PPC, we use TB+4 instead of TB as the base. */
222static inline ptrdiff_t ppc_tbrel_diff(TCGContext *s, const void *target)
223{
224    return tcg_tbrel_diff(s, target) - 4;
225}
226
227static inline bool in_range_b(tcg_target_long target)
228{
229    return target == sextract64(target, 0, 26);
230}
231
232static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
233                               const tcg_insn_unit *target)
234{
235    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
236    tcg_debug_assert(in_range_b(disp));
237    return disp & 0x3fffffc;
238}
239
240static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
241{
242    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
243    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
244
245    if (in_range_b(disp)) {
246        *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc);
247        return true;
248    }
249    return false;
250}
251
252static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
253                               const tcg_insn_unit *target)
254{
255    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
256    tcg_debug_assert(disp == (int16_t) disp);
257    return disp & 0xfffc;
258}
259
260static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
261{
262    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
263    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
264
265    if (disp == (int16_t) disp) {
266        *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc);
267        return true;
268    }
269    return false;
270}
271
272static bool reloc_pc34(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
273{
274    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
275    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
276
277    if (disp == sextract64(disp, 0, 34)) {
278        src_rw[0] = (src_rw[0] & ~0x3ffff) | ((disp >> 16) & 0x3ffff);
279        src_rw[1] = (src_rw[1] & ~0xffff) | (disp & 0xffff);
280        return true;
281    }
282    return false;
283}
284
285static bool mask_operand(uint32_t c, int *mb, int *me);
286static bool mask64_operand(uint64_t c, int *mb, int *me);
287
288/* test if a constant matches the constraint */
289static bool tcg_target_const_match(int64_t sval, int ct,
290                                   TCGType type, TCGCond cond, int vece)
291{
292    uint64_t uval = sval;
293    int mb, me;
294
295    if (ct & TCG_CT_CONST) {
296        return 1;
297    }
298
299    if (type == TCG_TYPE_I32) {
300        uval = (uint32_t)sval;
301        sval = (int32_t)sval;
302    }
303
304    if (ct & TCG_CT_CONST_CMP) {
305        switch (cond) {
306        case TCG_COND_EQ:
307        case TCG_COND_NE:
308            ct |= TCG_CT_CONST_S16 | TCG_CT_CONST_U16;
309            break;
310        case TCG_COND_LT:
311        case TCG_COND_GE:
312        case TCG_COND_LE:
313        case TCG_COND_GT:
314            ct |= TCG_CT_CONST_S16;
315            break;
316        case TCG_COND_LTU:
317        case TCG_COND_GEU:
318        case TCG_COND_LEU:
319        case TCG_COND_GTU:
320            ct |= TCG_CT_CONST_U16;
321            break;
322        case TCG_COND_TSTEQ:
323        case TCG_COND_TSTNE:
324            if ((uval & ~0xffff) == 0 || (uval & ~0xffff0000ull) == 0) {
325                return 1;
326            }
327            if (uval == (uint32_t)uval && mask_operand(uval, &mb, &me)) {
328                return 1;
329            }
330            if (TCG_TARGET_REG_BITS == 64 &&
331                mask64_operand(uval << clz64(uval), &mb, &me)) {
332                return 1;
333            }
334            return 0;
335        default:
336            g_assert_not_reached();
337        }
338    }
339
340    if ((ct & TCG_CT_CONST_S16) && sval == (int16_t)sval) {
341        return 1;
342    }
343    if ((ct & TCG_CT_CONST_U16) && uval == (uint16_t)uval) {
344        return 1;
345    }
346    if ((ct & TCG_CT_CONST_N16) && -sval == (int16_t)-sval) {
347        return 1;
348    }
349    if ((ct & TCG_CT_CONST_S32) && sval == (int32_t)sval) {
350        return 1;
351    }
352    if ((ct & TCG_CT_CONST_U32) && uval == (uint32_t)uval) {
353        return 1;
354    }
355    if ((ct & TCG_CT_CONST_ZERO) && sval == 0) {
356        return 1;
357    }
358    if ((ct & TCG_CT_CONST_MONE) && sval == -1) {
359        return 1;
360    }
361    if ((ct & TCG_CT_CONST_WSZ) && sval == (type == TCG_TYPE_I32 ? 32 : 64)) {
362        return 1;
363    }
364    return 0;
365}
366
367#define OPCD(opc) ((opc)<<26)
368#define XO19(opc) (OPCD(19)|((opc)<<1))
369#define MD30(opc) (OPCD(30)|((opc)<<2))
370#define MDS30(opc) (OPCD(30)|((opc)<<1))
371#define XO31(opc) (OPCD(31)|((opc)<<1))
372#define XO58(opc) (OPCD(58)|(opc))
373#define XO62(opc) (OPCD(62)|(opc))
374#define VX4(opc)  (OPCD(4)|(opc))
375
376#define B      OPCD( 18)
377#define BC     OPCD( 16)
378
379#define LBZ    OPCD( 34)
380#define LHZ    OPCD( 40)
381#define LHA    OPCD( 42)
382#define LWZ    OPCD( 32)
383#define LWZUX  XO31( 55)
384#define LD     XO58(  0)
385#define LDX    XO31( 21)
386#define LDU    XO58(  1)
387#define LDUX   XO31( 53)
388#define LWA    XO58(  2)
389#define LWAX   XO31(341)
390#define LQ     OPCD( 56)
391
392#define STB    OPCD( 38)
393#define STH    OPCD( 44)
394#define STW    OPCD( 36)
395#define STD    XO62(  0)
396#define STDU   XO62(  1)
397#define STDX   XO31(149)
398#define STQ    XO62(  2)
399
400#define PLWA   OPCD( 41)
401#define PLD    OPCD( 57)
402#define PLXSD  OPCD( 42)
403#define PLXV   OPCD(25 * 2 + 1)  /* force tx=1 */
404
405#define PSTD   OPCD( 61)
406#define PSTXSD OPCD( 46)
407#define PSTXV  OPCD(27 * 2 + 1)  /* force sx=1 */
408
409#define ADDIC  OPCD( 12)
410#define ADDI   OPCD( 14)
411#define ADDIS  OPCD( 15)
412#define ORI    OPCD( 24)
413#define ORIS   OPCD( 25)
414#define XORI   OPCD( 26)
415#define XORIS  OPCD( 27)
416#define ANDI   OPCD( 28)
417#define ANDIS  OPCD( 29)
418#define MULLI  OPCD(  7)
419#define CMPLI  OPCD( 10)
420#define CMPI   OPCD( 11)
421#define SUBFIC OPCD( 8)
422
423#define LWZU   OPCD( 33)
424#define STWU   OPCD( 37)
425
426#define RLWIMI OPCD( 20)
427#define RLWINM OPCD( 21)
428#define RLWNM  OPCD( 23)
429
430#define RLDICL MD30(  0)
431#define RLDICR MD30(  1)
432#define RLDIMI MD30(  3)
433#define RLDCL  MDS30( 8)
434
435#define BCLR   XO19( 16)
436#define BCCTR  XO19(528)
437#define CRAND  XO19(257)
438#define CRANDC XO19(129)
439#define CRNAND XO19(225)
440#define CROR   XO19(449)
441#define CRNOR  XO19( 33)
442#define ADDPCIS XO19( 2)
443
444#define EXTSB  XO31(954)
445#define EXTSH  XO31(922)
446#define EXTSW  XO31(986)
447#define ADD    XO31(266)
448#define ADDE   XO31(138)
449#define ADDME  XO31(234)
450#define ADDZE  XO31(202)
451#define ADDC   XO31( 10)
452#define AND    XO31( 28)
453#define SUBF   XO31( 40)
454#define SUBFC  XO31(  8)
455#define SUBFE  XO31(136)
456#define SUBFME XO31(232)
457#define SUBFZE XO31(200)
458#define OR     XO31(444)
459#define XOR    XO31(316)
460#define MULLW  XO31(235)
461#define MULHW  XO31( 75)
462#define MULHWU XO31( 11)
463#define DIVW   XO31(491)
464#define DIVWU  XO31(459)
465#define MODSW  XO31(779)
466#define MODUW  XO31(267)
467#define CMP    XO31(  0)
468#define CMPL   XO31( 32)
469#define LHBRX  XO31(790)
470#define LWBRX  XO31(534)
471#define LDBRX  XO31(532)
472#define STHBRX XO31(918)
473#define STWBRX XO31(662)
474#define STDBRX XO31(660)
475#define MFSPR  XO31(339)
476#define MTSPR  XO31(467)
477#define SRAWI  XO31(824)
478#define NEG    XO31(104)
479#define MFCR   XO31( 19)
480#define MFOCRF (MFCR | (1u << 20))
481#define NOR    XO31(124)
482#define CNTLZW XO31( 26)
483#define CNTLZD XO31( 58)
484#define CNTTZW XO31(538)
485#define CNTTZD XO31(570)
486#define CNTPOPW XO31(378)
487#define CNTPOPD XO31(506)
488#define ANDC   XO31( 60)
489#define ORC    XO31(412)
490#define EQV    XO31(284)
491#define NAND   XO31(476)
492#define ISEL   XO31( 15)
493
494#define MULLD  XO31(233)
495#define MULHD  XO31( 73)
496#define MULHDU XO31(  9)
497#define DIVD   XO31(489)
498#define DIVDU  XO31(457)
499#define MODSD  XO31(777)
500#define MODUD  XO31(265)
501
502#define LBZX   XO31( 87)
503#define LHZX   XO31(279)
504#define LHAX   XO31(343)
505#define LWZX   XO31( 23)
506#define STBX   XO31(215)
507#define STHX   XO31(407)
508#define STWX   XO31(151)
509
510#define EIEIO  XO31(854)
511#define HWSYNC XO31(598)
512#define LWSYNC (HWSYNC | (1u << 21))
513
514#define SPR(a, b) ((((a)<<5)|(b))<<11)
515#define LR     SPR(8, 0)
516#define CTR    SPR(9, 0)
517
518#define SLW    XO31( 24)
519#define SRW    XO31(536)
520#define SRAW   XO31(792)
521
522#define SLD    XO31( 27)
523#define SRD    XO31(539)
524#define SRAD   XO31(794)
525#define SRADI  XO31(413<<1)
526
527#define BRH    XO31(219)
528#define BRW    XO31(155)
529#define BRD    XO31(187)
530
531#define TW     XO31( 4)
532#define TRAP   (TW | TO(31))
533
534#define SETBC    XO31(384)  /* v3.10 */
535#define SETBCR   XO31(416)  /* v3.10 */
536#define SETNBC   XO31(448)  /* v3.10 */
537#define SETNBCR  XO31(480)  /* v3.10 */
538
539#define NOP    ORI  /* ori 0,0,0 */
540
541#define LVX        XO31(103)
542#define LVEBX      XO31(7)
543#define LVEHX      XO31(39)
544#define LVEWX      XO31(71)
545#define LXSDX      (XO31(588) | 1)  /* v2.06, force tx=1 */
546#define LXVDSX     (XO31(332) | 1)  /* v2.06, force tx=1 */
547#define LXSIWZX    (XO31(12) | 1)   /* v2.07, force tx=1 */
548#define LXV        (OPCD(61) | 8 | 1)  /* v3.00, force tx=1 */
549#define LXSD       (OPCD(57) | 2)   /* v3.00 */
550#define LXVWSX     (XO31(364) | 1)  /* v3.00, force tx=1 */
551
552#define STVX       XO31(231)
553#define STVEWX     XO31(199)
554#define STXSDX     (XO31(716) | 1)  /* v2.06, force sx=1 */
555#define STXSIWX    (XO31(140) | 1)  /* v2.07, force sx=1 */
556#define STXV       (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
557#define STXSD      (OPCD(61) | 2)   /* v3.00 */
558
559#define VADDSBS    VX4(768)
560#define VADDUBS    VX4(512)
561#define VADDUBM    VX4(0)
562#define VADDSHS    VX4(832)
563#define VADDUHS    VX4(576)
564#define VADDUHM    VX4(64)
565#define VADDSWS    VX4(896)
566#define VADDUWS    VX4(640)
567#define VADDUWM    VX4(128)
568#define VADDUDM    VX4(192)       /* v2.07 */
569
570#define VSUBSBS    VX4(1792)
571#define VSUBUBS    VX4(1536)
572#define VSUBUBM    VX4(1024)
573#define VSUBSHS    VX4(1856)
574#define VSUBUHS    VX4(1600)
575#define VSUBUHM    VX4(1088)
576#define VSUBSWS    VX4(1920)
577#define VSUBUWS    VX4(1664)
578#define VSUBUWM    VX4(1152)
579#define VSUBUDM    VX4(1216)      /* v2.07 */
580
581#define VNEGW      (VX4(1538) | (6 << 16))  /* v3.00 */
582#define VNEGD      (VX4(1538) | (7 << 16))  /* v3.00 */
583
584#define VMAXSB     VX4(258)
585#define VMAXSH     VX4(322)
586#define VMAXSW     VX4(386)
587#define VMAXSD     VX4(450)       /* v2.07 */
588#define VMAXUB     VX4(2)
589#define VMAXUH     VX4(66)
590#define VMAXUW     VX4(130)
591#define VMAXUD     VX4(194)       /* v2.07 */
592#define VMINSB     VX4(770)
593#define VMINSH     VX4(834)
594#define VMINSW     VX4(898)
595#define VMINSD     VX4(962)       /* v2.07 */
596#define VMINUB     VX4(514)
597#define VMINUH     VX4(578)
598#define VMINUW     VX4(642)
599#define VMINUD     VX4(706)       /* v2.07 */
600
601#define VCMPEQUB   VX4(6)
602#define VCMPEQUH   VX4(70)
603#define VCMPEQUW   VX4(134)
604#define VCMPEQUD   VX4(199)       /* v2.07 */
605#define VCMPGTSB   VX4(774)
606#define VCMPGTSH   VX4(838)
607#define VCMPGTSW   VX4(902)
608#define VCMPGTSD   VX4(967)       /* v2.07 */
609#define VCMPGTUB   VX4(518)
610#define VCMPGTUH   VX4(582)
611#define VCMPGTUW   VX4(646)
612#define VCMPGTUD   VX4(711)       /* v2.07 */
613#define VCMPNEB    VX4(7)         /* v3.00 */
614#define VCMPNEH    VX4(71)        /* v3.00 */
615#define VCMPNEW    VX4(135)       /* v3.00 */
616
617#define VSLB       VX4(260)
618#define VSLH       VX4(324)
619#define VSLW       VX4(388)
620#define VSLD       VX4(1476)      /* v2.07 */
621#define VSRB       VX4(516)
622#define VSRH       VX4(580)
623#define VSRW       VX4(644)
624#define VSRD       VX4(1732)      /* v2.07 */
625#define VSRAB      VX4(772)
626#define VSRAH      VX4(836)
627#define VSRAW      VX4(900)
628#define VSRAD      VX4(964)       /* v2.07 */
629#define VRLB       VX4(4)
630#define VRLH       VX4(68)
631#define VRLW       VX4(132)
632#define VRLD       VX4(196)       /* v2.07 */
633
634#define VMULEUB    VX4(520)
635#define VMULEUH    VX4(584)
636#define VMULEUW    VX4(648)       /* v2.07 */
637#define VMULOUB    VX4(8)
638#define VMULOUH    VX4(72)
639#define VMULOUW    VX4(136)       /* v2.07 */
640#define VMULUWM    VX4(137)       /* v2.07 */
641#define VMULLD     VX4(457)       /* v3.10 */
642#define VMSUMUHM   VX4(38)
643
644#define VMRGHB     VX4(12)
645#define VMRGHH     VX4(76)
646#define VMRGHW     VX4(140)
647#define VMRGLB     VX4(268)
648#define VMRGLH     VX4(332)
649#define VMRGLW     VX4(396)
650
651#define VPKUHUM    VX4(14)
652#define VPKUWUM    VX4(78)
653
654#define VAND       VX4(1028)
655#define VANDC      VX4(1092)
656#define VNOR       VX4(1284)
657#define VOR        VX4(1156)
658#define VXOR       VX4(1220)
659#define VEQV       VX4(1668)      /* v2.07 */
660#define VNAND      VX4(1412)      /* v2.07 */
661#define VORC       VX4(1348)      /* v2.07 */
662
663#define VSPLTB     VX4(524)
664#define VSPLTH     VX4(588)
665#define VSPLTW     VX4(652)
666#define VSPLTISB   VX4(780)
667#define VSPLTISH   VX4(844)
668#define VSPLTISW   VX4(908)
669
670#define VSLDOI     VX4(44)
671
672#define XXPERMDI   (OPCD(60) | (10 << 3) | 7)  /* v2.06, force ax=bx=tx=1 */
673#define XXSEL      (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
674#define XXSPLTIB   (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
675
676#define MFVSRD     (XO31(51) | 1)   /* v2.07, force sx=1 */
677#define MFVSRWZ    (XO31(115) | 1)  /* v2.07, force sx=1 */
678#define MTVSRD     (XO31(179) | 1)  /* v2.07, force tx=1 */
679#define MTVSRWZ    (XO31(243) | 1)  /* v2.07, force tx=1 */
680#define MTVSRDD    (XO31(435) | 1)  /* v3.00, force tx=1 */
681#define MTVSRWS    (XO31(403) | 1)  /* v3.00, force tx=1 */
682
683#define RT(r) ((r)<<21)
684#define RS(r) ((r)<<21)
685#define RA(r) ((r)<<16)
686#define RB(r) ((r)<<11)
687#define TO(t) ((t)<<21)
688#define SH(s) ((s)<<11)
689#define MB(b) ((b)<<6)
690#define ME(e) ((e)<<1)
691#define BO(o) ((o)<<21)
692#define MB64(b) ((b)<<5)
693#define FXM(b) (1 << (19 - (b)))
694
695#define VRT(r)  (((r) & 31) << 21)
696#define VRA(r)  (((r) & 31) << 16)
697#define VRB(r)  (((r) & 31) << 11)
698#define VRC(r)  (((r) & 31) <<  6)
699
700#define LK    1
701
702#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
703#define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
704#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
705#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
706
707#define BF(n)    ((n)<<23)
708#define BI(n, c) (((c)+((n)*4))<<16)
709#define BT(n, c) (((c)+((n)*4))<<21)
710#define BA(n, c) (((c)+((n)*4))<<16)
711#define BB(n, c) (((c)+((n)*4))<<11)
712#define BC_(n, c) (((c)+((n)*4))<<6)
713
714#define BO_COND_TRUE  BO(12)
715#define BO_COND_FALSE BO( 4)
716#define BO_ALWAYS     BO(20)
717
718enum {
719    CR_LT,
720    CR_GT,
721    CR_EQ,
722    CR_SO
723};
724
725static const uint32_t tcg_to_bc[16] = {
726    [TCG_COND_EQ]  = BC | BI(0, CR_EQ) | BO_COND_TRUE,
727    [TCG_COND_NE]  = BC | BI(0, CR_EQ) | BO_COND_FALSE,
728    [TCG_COND_TSTEQ]  = BC | BI(0, CR_EQ) | BO_COND_TRUE,
729    [TCG_COND_TSTNE]  = BC | BI(0, CR_EQ) | BO_COND_FALSE,
730    [TCG_COND_LT]  = BC | BI(0, CR_LT) | BO_COND_TRUE,
731    [TCG_COND_GE]  = BC | BI(0, CR_LT) | BO_COND_FALSE,
732    [TCG_COND_LE]  = BC | BI(0, CR_GT) | BO_COND_FALSE,
733    [TCG_COND_GT]  = BC | BI(0, CR_GT) | BO_COND_TRUE,
734    [TCG_COND_LTU] = BC | BI(0, CR_LT) | BO_COND_TRUE,
735    [TCG_COND_GEU] = BC | BI(0, CR_LT) | BO_COND_FALSE,
736    [TCG_COND_LEU] = BC | BI(0, CR_GT) | BO_COND_FALSE,
737    [TCG_COND_GTU] = BC | BI(0, CR_GT) | BO_COND_TRUE,
738};
739
740/* The low bit here is set if the RA and RB fields must be inverted.  */
741static const uint32_t tcg_to_isel[16] = {
742    [TCG_COND_EQ]  = ISEL | BC_(0, CR_EQ),
743    [TCG_COND_NE]  = ISEL | BC_(0, CR_EQ) | 1,
744    [TCG_COND_TSTEQ] = ISEL | BC_(0, CR_EQ),
745    [TCG_COND_TSTNE] = ISEL | BC_(0, CR_EQ) | 1,
746    [TCG_COND_LT]  = ISEL | BC_(0, CR_LT),
747    [TCG_COND_GE]  = ISEL | BC_(0, CR_LT) | 1,
748    [TCG_COND_LE]  = ISEL | BC_(0, CR_GT) | 1,
749    [TCG_COND_GT]  = ISEL | BC_(0, CR_GT),
750    [TCG_COND_LTU] = ISEL | BC_(0, CR_LT),
751    [TCG_COND_GEU] = ISEL | BC_(0, CR_LT) | 1,
752    [TCG_COND_LEU] = ISEL | BC_(0, CR_GT) | 1,
753    [TCG_COND_GTU] = ISEL | BC_(0, CR_GT),
754};
755
756static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
757                        intptr_t value, intptr_t addend)
758{
759    const tcg_insn_unit *target;
760    int16_t lo;
761    int32_t hi;
762
763    value += addend;
764    target = (const tcg_insn_unit *)value;
765
766    switch (type) {
767    case R_PPC_REL14:
768        return reloc_pc14(code_ptr, target);
769    case R_PPC_REL24:
770        return reloc_pc24(code_ptr, target);
771    case R_PPC64_PCREL34:
772        return reloc_pc34(code_ptr, target);
773    case R_PPC_ADDR16:
774        /*
775         * We are (slightly) abusing this relocation type.  In particular,
776         * assert that the low 2 bits are zero, and do not modify them.
777         * That way we can use this with LD et al that have opcode bits
778         * in the low 2 bits of the insn.
779         */
780        if ((value & 3) || value != (int16_t)value) {
781            return false;
782        }
783        *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
784        break;
785    case R_PPC_ADDR32:
786        /*
787         * We are abusing this relocation type.  Again, this points to
788         * a pair of insns, lis + load.  This is an absolute address
789         * relocation for PPC32 so the lis cannot be removed.
790         */
791        lo = value;
792        hi = value - lo;
793        if (hi + lo != value) {
794            return false;
795        }
796        code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
797        code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
798        break;
799    default:
800        g_assert_not_reached();
801    }
802    return true;
803}
804
805/* Ensure that the prefixed instruction does not cross a 64-byte boundary. */
806static bool tcg_out_need_prefix_align(TCGContext *s)
807{
808    return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c;
809}
810
811static void tcg_out_prefix_align(TCGContext *s)
812{
813    if (tcg_out_need_prefix_align(s)) {
814        tcg_out32(s, NOP);
815    }
816}
817
818static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target)
819{
820    return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0);
821}
822
823/* Output Type 00 Prefix - 8-Byte Load/Store Form (8LS:D) */
824static void tcg_out_8ls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
825                          unsigned ra, tcg_target_long imm, bool r)
826{
827    tcg_insn_unit p, i;
828
829    p = OPCD(1) | (r << 20) | ((imm >> 16) & 0x3ffff);
830    i = opc | TAI(rt, ra, imm);
831
832    tcg_out_prefix_align(s);
833    tcg_out32(s, p);
834    tcg_out32(s, i);
835}
836
837/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */
838static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
839                          unsigned ra, tcg_target_long imm, bool r)
840{
841    tcg_insn_unit p, i;
842
843    p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff);
844    i = opc | TAI(rt, ra, imm);
845
846    tcg_out_prefix_align(s);
847    tcg_out32(s, p);
848    tcg_out32(s, i);
849}
850
851static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
852                             TCGReg base, tcg_target_long offset);
853
854static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
855{
856    if (ret == arg) {
857        return true;
858    }
859    switch (type) {
860    case TCG_TYPE_I64:
861        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
862        /* fallthru */
863    case TCG_TYPE_I32:
864        if (ret < TCG_REG_V0) {
865            if (arg < TCG_REG_V0) {
866                tcg_out32(s, OR | SAB(arg, ret, arg));
867                break;
868            } else if (have_isa_2_07) {
869                tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
870                          | VRT(arg) | RA(ret));
871                break;
872            } else {
873                /* Altivec does not support vector->integer moves.  */
874                return false;
875            }
876        } else if (arg < TCG_REG_V0) {
877            if (have_isa_2_07) {
878                tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
879                          | VRT(ret) | RA(arg));
880                break;
881            } else {
882                /* Altivec does not support integer->vector moves.  */
883                return false;
884            }
885        }
886        /* fallthru */
887    case TCG_TYPE_V64:
888    case TCG_TYPE_V128:
889        tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
890        tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
891        break;
892    default:
893        g_assert_not_reached();
894    }
895    return true;
896}
897
898static void tcg_out_rld_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs,
899                           int sh, int mb, bool rc)
900{
901    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
902    sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
903    mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
904    tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb | rc);
905}
906
907static void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
908                        int sh, int mb)
909{
910    tcg_out_rld_rc(s, op, ra, rs, sh, mb, false);
911}
912
913static void tcg_out_rlw_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs,
914                           int sh, int mb, int me, bool rc)
915{
916    tcg_debug_assert((mb & 0x1f) == mb);
917    tcg_debug_assert((me & 0x1f) == me);
918    tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh & 0x1f) | MB(mb) | ME(me) | rc);
919}
920
921static void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
922                        int sh, int mb, int me)
923{
924    tcg_out_rlw_rc(s, op, ra, rs, sh, mb, me, false);
925}
926
927static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
928{
929    tcg_out32(s, EXTSB | RA(dst) | RS(src));
930}
931
932static void tcg_out_ext8u(TCGContext *s, TCGReg dst, TCGReg src)
933{
934    tcg_out32(s, ANDI | SAI(src, dst, 0xff));
935}
936
937static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
938{
939    tcg_out32(s, EXTSH | RA(dst) | RS(src));
940}
941
942static void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src)
943{
944    tcg_out32(s, ANDI | SAI(src, dst, 0xffff));
945}
946
947static void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src)
948{
949    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
950    tcg_out32(s, EXTSW | RA(dst) | RS(src));
951}
952
953static void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
954{
955    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
956    tcg_out_rld(s, RLDICL, dst, src, 0, 32);
957}
958
959static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dst, TCGReg src)
960{
961    tcg_out_ext32s(s, dst, src);
962}
963
964static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dst, TCGReg src)
965{
966    tcg_out_ext32u(s, dst, src);
967}
968
969static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
970{
971    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
972    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
973}
974
975static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
976{
977    tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
978}
979
980static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
981{
982    tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
983}
984
985static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c)
986{
987    /* Limit immediate shift count lest we create an illegal insn.  */
988    tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31));
989}
990
991static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
992{
993    tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
994}
995
996static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
997{
998    tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
999}
1000
1001static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
1002{
1003    tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
1004}
1005
1006static void tcg_out_addpcis(TCGContext *s, TCGReg dst, intptr_t imm)
1007{
1008    uint32_t d0, d1, d2;
1009
1010    tcg_debug_assert((imm & 0xffff) == 0);
1011    tcg_debug_assert(imm == (int32_t)imm);
1012
1013    d2 = extract32(imm, 16, 1);
1014    d1 = extract32(imm, 17, 5);
1015    d0 = extract32(imm, 22, 10);
1016    tcg_out32(s, ADDPCIS | RT(dst) | (d1 << 16) | (d0 << 6) | d2);
1017}
1018
1019/* Emit a move into ret of arg, if it can be done in one insn.  */
1020static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
1021{
1022    if (arg == (int16_t)arg) {
1023        tcg_out32(s, ADDI | TAI(ret, 0, arg));
1024        return true;
1025    }
1026    if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
1027        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
1028        return true;
1029    }
1030    return false;
1031}
1032
1033static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
1034                             tcg_target_long arg, bool in_prologue)
1035{
1036    intptr_t tb_diff;
1037    tcg_target_long tmp;
1038    int shift;
1039
1040    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1041
1042    if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1043        arg = (int32_t)arg;
1044    }
1045
1046    /* Load 16-bit immediates with one insn.  */
1047    if (tcg_out_movi_one(s, ret, arg)) {
1048        return;
1049    }
1050
1051    /* Load addresses within the TB with one insn.  */
1052    tb_diff = ppc_tbrel_diff(s, (void *)arg);
1053    if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
1054        tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
1055        return;
1056    }
1057
1058    /*
1059     * Load values up to 34 bits, and pc-relative addresses,
1060     * with one prefixed insn.
1061     */
1062    if (have_isa_3_10) {
1063        if (arg == sextract64(arg, 0, 34)) {
1064            /* pli ret,value = paddi ret,0,value,0 */
1065            tcg_out_mls_d(s, ADDI, ret, 0, arg, 0);
1066            return;
1067        }
1068
1069        tmp = tcg_pcrel_diff_for_prefix(s, (void *)arg);
1070        if (tmp == sextract64(tmp, 0, 34)) {
1071            /* pla ret,value = paddi ret,0,value,1 */
1072            tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1);
1073            return;
1074        }
1075    }
1076
1077    /* Load 32-bit immediates with two insns.  Note that we've already
1078       eliminated bare ADDIS, so we know both insns are required.  */
1079    if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
1080        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
1081        tcg_out32(s, ORI | SAI(ret, ret, arg));
1082        return;
1083    }
1084    if (arg == (uint32_t)arg && !(arg & 0x8000)) {
1085        tcg_out32(s, ADDI | TAI(ret, 0, arg));
1086        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1087        return;
1088    }
1089
1090    /* Load masked 16-bit value.  */
1091    if (arg > 0 && (arg & 0x8000)) {
1092        tmp = arg | 0x7fff;
1093        if ((tmp & (tmp + 1)) == 0) {
1094            int mb = clz64(tmp + 1) + 1;
1095            tcg_out32(s, ADDI | TAI(ret, 0, arg));
1096            tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
1097            return;
1098        }
1099    }
1100
1101    /* Load common masks with 2 insns.  */
1102    shift = ctz64(arg);
1103    tmp = arg >> shift;
1104    if (tmp == (int16_t)tmp) {
1105        tcg_out32(s, ADDI | TAI(ret, 0, tmp));
1106        tcg_out_shli64(s, ret, ret, shift);
1107        return;
1108    }
1109    shift = clz64(arg);
1110    if (tcg_out_movi_one(s, ret, arg << shift)) {
1111        tcg_out_shri64(s, ret, ret, shift);
1112        return;
1113    }
1114
1115    /* Load addresses within 2GB with 2 insns. */
1116    if (have_isa_3_00) {
1117        intptr_t hi = tcg_pcrel_diff(s, (void *)arg) - 4;
1118        int16_t lo = hi;
1119
1120        hi -= lo;
1121        if (hi == (int32_t)hi) {
1122            tcg_out_addpcis(s, TCG_REG_TMP2, hi);
1123            tcg_out32(s, ADDI | TAI(ret, TCG_REG_TMP2, lo));
1124            return;
1125        }
1126    }
1127
1128    /* Load addresses within 2GB of TB with 2 (or rarely 3) insns.  */
1129    if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
1130        tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
1131        return;
1132    }
1133
1134    /* Use the constant pool, if possible.  */
1135    if (!in_prologue && USE_REG_TB) {
1136        new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
1137                       ppc_tbrel_diff(s, NULL));
1138        tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
1139        return;
1140    }
1141    if (have_isa_3_10) {
1142        tcg_out_8ls_d(s, PLD, ret, 0, 0, 1);
1143        new_pool_label(s, arg, R_PPC64_PCREL34, s->code_ptr - 2, 0);
1144        return;
1145    }
1146    if (have_isa_3_00) {
1147        tcg_out_addpcis(s, TCG_REG_TMP2, 0);
1148        new_pool_label(s, arg, R_PPC_REL14, s->code_ptr, 0);
1149        tcg_out32(s, LD | TAI(ret, TCG_REG_TMP2, 0));
1150        return;
1151    }
1152
1153    tmp = arg >> 31 >> 1;
1154    tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
1155    if (tmp) {
1156        tcg_out_shli64(s, ret, ret, 32);
1157    }
1158    if (arg & 0xffff0000) {
1159        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1160    }
1161    if (arg & 0xffff) {
1162        tcg_out32(s, ORI | SAI(ret, ret, arg));
1163    }
1164}
1165
1166static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
1167                             TCGReg ret, int64_t val)
1168{
1169    uint32_t load_insn;
1170    int rel, low;
1171    intptr_t add;
1172
1173    switch (vece) {
1174    case MO_8:
1175        low = (int8_t)val;
1176        if (low >= -16 && low < 16) {
1177            tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
1178            return;
1179        }
1180        if (have_isa_3_00) {
1181            tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
1182            return;
1183        }
1184        break;
1185
1186    case MO_16:
1187        low = (int16_t)val;
1188        if (low >= -16 && low < 16) {
1189            tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
1190            return;
1191        }
1192        break;
1193
1194    case MO_32:
1195        low = (int32_t)val;
1196        if (low >= -16 && low < 16) {
1197            tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
1198            return;
1199        }
1200        break;
1201    }
1202
1203    /*
1204     * Otherwise we must load the value from the constant pool.
1205     */
1206    if (USE_REG_TB) {
1207        rel = R_PPC_ADDR16;
1208        add = ppc_tbrel_diff(s, NULL);
1209    } else if (have_isa_3_10) {
1210        if (type == TCG_TYPE_V64) {
1211            tcg_out_8ls_d(s, PLXSD, ret & 31, 0, 0, 1);
1212            new_pool_label(s, val, R_PPC64_PCREL34, s->code_ptr - 2, 0);
1213        } else {
1214            tcg_out_8ls_d(s, PLXV, ret & 31, 0, 0, 1);
1215            new_pool_l2(s, R_PPC64_PCREL34, s->code_ptr - 2, 0, val, val);
1216        }
1217        return;
1218    } else if (have_isa_3_00) {
1219        tcg_out_addpcis(s, TCG_REG_TMP1, 0);
1220        rel = R_PPC_REL14;
1221        add = 0;
1222    } else {
1223        rel = R_PPC_ADDR32;
1224        add = 0;
1225    }
1226
1227    if (have_vsx) {
1228        load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
1229        load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
1230        if (TCG_TARGET_REG_BITS == 64) {
1231            new_pool_label(s, val, rel, s->code_ptr, add);
1232        } else {
1233            new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val);
1234        }
1235    } else {
1236        load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
1237        if (TCG_TARGET_REG_BITS == 64) {
1238            new_pool_l2(s, rel, s->code_ptr, add, val, val);
1239        } else {
1240            new_pool_l4(s, rel, s->code_ptr, add,
1241                        val >> 32, val, val >> 32, val);
1242        }
1243    }
1244
1245    if (USE_REG_TB) {
1246        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
1247        load_insn |= RA(TCG_REG_TB);
1248    } else if (have_isa_3_00) {
1249        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1250    } else {
1251        tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
1252        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1253    }
1254    tcg_out32(s, load_insn);
1255}
1256
1257static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
1258                         tcg_target_long arg)
1259{
1260    switch (type) {
1261    case TCG_TYPE_I32:
1262    case TCG_TYPE_I64:
1263        tcg_debug_assert(ret < TCG_REG_V0);
1264        tcg_out_movi_int(s, type, ret, arg, false);
1265        break;
1266
1267    default:
1268        g_assert_not_reached();
1269    }
1270}
1271
1272static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1273{
1274    return false;
1275}
1276
1277static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1278                             tcg_target_long imm)
1279{
1280    /* This function is only used for passing structs by reference. */
1281    g_assert_not_reached();
1282}
1283
1284static bool mask_operand(uint32_t c, int *mb, int *me)
1285{
1286    uint32_t lsb, test;
1287
1288    /* Accept a bit pattern like:
1289           0....01....1
1290           1....10....0
1291           0..01..10..0
1292       Keep track of the transitions.  */
1293    if (c == 0 || c == -1) {
1294        return false;
1295    }
1296    test = c;
1297    lsb = test & -test;
1298    test += lsb;
1299    if (test & (test - 1)) {
1300        return false;
1301    }
1302
1303    *me = clz32(lsb);
1304    *mb = test ? clz32(test & -test) + 1 : 0;
1305    return true;
1306}
1307
1308static bool mask64_operand(uint64_t c, int *mb, int *me)
1309{
1310    uint64_t lsb;
1311
1312    if (c == 0) {
1313        return false;
1314    }
1315
1316    lsb = c & -c;
1317    /* Accept 1..10..0.  */
1318    if (c == -lsb) {
1319        *mb = 0;
1320        *me = clz64(lsb);
1321        return true;
1322    }
1323    /* Accept 0..01..1.  */
1324    if (lsb == 1 && (c & (c + 1)) == 0) {
1325        *mb = clz64(c + 1) + 1;
1326        *me = 63;
1327        return true;
1328    }
1329    return false;
1330}
1331
1332static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1333{
1334    int mb, me;
1335
1336    if (mask_operand(c, &mb, &me)) {
1337        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
1338    } else if ((c & 0xffff) == c) {
1339        tcg_out32(s, ANDI | SAI(src, dst, c));
1340        return;
1341    } else if ((c & 0xffff0000) == c) {
1342        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1343        return;
1344    } else {
1345        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
1346        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1347    }
1348}
1349
1350static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
1351{
1352    int mb, me;
1353
1354    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1355    if (mask64_operand(c, &mb, &me)) {
1356        if (mb == 0) {
1357            tcg_out_rld(s, RLDICR, dst, src, 0, me);
1358        } else {
1359            tcg_out_rld(s, RLDICL, dst, src, 0, mb);
1360        }
1361    } else if ((c & 0xffff) == c) {
1362        tcg_out32(s, ANDI | SAI(src, dst, c));
1363        return;
1364    } else if ((c & 0xffff0000) == c) {
1365        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1366        return;
1367    } else {
1368        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
1369        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1370    }
1371}
1372
1373static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
1374                           int op_lo, int op_hi)
1375{
1376    if (c >> 16) {
1377        tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
1378        src = dst;
1379    }
1380    if (c & 0xffff) {
1381        tcg_out32(s, op_lo | SAI(src, dst, c));
1382        src = dst;
1383    }
1384}
1385
1386static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1387{
1388    tcg_out_zori32(s, dst, src, c, ORI, ORIS);
1389}
1390
1391static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1392{
1393    tcg_out_zori32(s, dst, src, c, XORI, XORIS);
1394}
1395
1396static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target)
1397{
1398    ptrdiff_t disp = tcg_pcrel_diff(s, target);
1399    if (in_range_b(disp)) {
1400        tcg_out32(s, B | (disp & 0x3fffffc) | mask);
1401    } else {
1402        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
1403        tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
1404        tcg_out32(s, BCCTR | BO_ALWAYS | mask);
1405    }
1406}
1407
1408static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
1409                             TCGReg base, tcg_target_long offset)
1410{
1411    tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
1412    bool is_int_store = false;
1413    TCGReg rs = TCG_REG_TMP1;
1414
1415    switch (opi) {
1416    case LD: case LWA:
1417        align = 3;
1418        /* FALLTHRU */
1419    default:
1420        if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
1421            rs = rt;
1422            break;
1423        }
1424        break;
1425    case LXSD:
1426    case STXSD:
1427        align = 3;
1428        break;
1429    case LXV:
1430    case STXV:
1431        align = 15;
1432        break;
1433    case STD:
1434        align = 3;
1435        /* FALLTHRU */
1436    case STB: case STH: case STW:
1437        is_int_store = true;
1438        break;
1439    }
1440
1441    /* For unaligned or large offsets, use the prefixed form. */
1442    if (have_isa_3_10
1443        && (offset != (int16_t)offset || (offset & align))
1444        && offset == sextract64(offset, 0, 34)) {
1445        /*
1446         * Note that the MLS:D insns retain their un-prefixed opcode,
1447         * while the 8LS:D insns use a different opcode space.
1448         */
1449        switch (opi) {
1450        case LBZ:
1451        case LHZ:
1452        case LHA:
1453        case LWZ:
1454        case STB:
1455        case STH:
1456        case STW:
1457        case ADDI:
1458            tcg_out_mls_d(s, opi, rt, base, offset, 0);
1459            return;
1460        case LWA:
1461            tcg_out_8ls_d(s, PLWA, rt, base, offset, 0);
1462            return;
1463        case LD:
1464            tcg_out_8ls_d(s, PLD, rt, base, offset, 0);
1465            return;
1466        case STD:
1467            tcg_out_8ls_d(s, PSTD, rt, base, offset, 0);
1468            return;
1469        case LXSD:
1470            tcg_out_8ls_d(s, PLXSD, rt & 31, base, offset, 0);
1471            return;
1472        case STXSD:
1473            tcg_out_8ls_d(s, PSTXSD, rt & 31, base, offset, 0);
1474            return;
1475        case LXV:
1476            tcg_out_8ls_d(s, PLXV, rt & 31, base, offset, 0);
1477            return;
1478        case STXV:
1479            tcg_out_8ls_d(s, PSTXV, rt & 31, base, offset, 0);
1480            return;
1481        }
1482    }
1483
1484    /* For unaligned, or very large offsets, use the indexed form.  */
1485    if (offset & align || offset != (int32_t)offset || opi == 0) {
1486        if (rs == base) {
1487            rs = TCG_REG_R0;
1488        }
1489        tcg_debug_assert(!is_int_store || rs != rt);
1490        tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
1491        tcg_out32(s, opx | TAB(rt & 31, base, rs));
1492        return;
1493    }
1494
1495    l0 = (int16_t)offset;
1496    offset = (offset - l0) >> 16;
1497    l1 = (int16_t)offset;
1498
1499    if (l1 < 0 && orig >= 0) {
1500        extra = 0x4000;
1501        l1 = (int16_t)(offset - 0x4000);
1502    }
1503    if (l1) {
1504        tcg_out32(s, ADDIS | TAI(rs, base, l1));
1505        base = rs;
1506    }
1507    if (extra) {
1508        tcg_out32(s, ADDIS | TAI(rs, base, extra));
1509        base = rs;
1510    }
1511    if (opi != ADDI || base != rt || l0 != 0) {
1512        tcg_out32(s, opi | TAI(rt & 31, base, l0));
1513    }
1514}
1515
1516static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
1517                           TCGReg va, TCGReg vb, int shb)
1518{
1519    tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
1520}
1521
1522static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1523                       TCGReg base, intptr_t offset)
1524{
1525    int shift;
1526
1527    switch (type) {
1528    case TCG_TYPE_I32:
1529        if (ret < TCG_REG_V0) {
1530            tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
1531            break;
1532        }
1533        if (have_isa_2_07 && have_vsx) {
1534            tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
1535            break;
1536        }
1537        tcg_debug_assert((offset & 3) == 0);
1538        tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
1539        shift = (offset - 4) & 0xc;
1540        if (shift) {
1541            tcg_out_vsldoi(s, ret, ret, ret, shift);
1542        }
1543        break;
1544    case TCG_TYPE_I64:
1545        if (ret < TCG_REG_V0) {
1546            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1547            tcg_out_mem_long(s, LD, LDX, ret, base, offset);
1548            break;
1549        }
1550        /* fallthru */
1551    case TCG_TYPE_V64:
1552        tcg_debug_assert(ret >= TCG_REG_V0);
1553        if (have_vsx) {
1554            tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
1555                             ret, base, offset);
1556            break;
1557        }
1558        tcg_debug_assert((offset & 7) == 0);
1559        tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
1560        if (offset & 8) {
1561            tcg_out_vsldoi(s, ret, ret, ret, 8);
1562        }
1563        break;
1564    case TCG_TYPE_V128:
1565        tcg_debug_assert(ret >= TCG_REG_V0);
1566        tcg_debug_assert((offset & 15) == 0);
1567        tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
1568                         LVX, ret, base, offset);
1569        break;
1570    default:
1571        g_assert_not_reached();
1572    }
1573}
1574
1575static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1576                              TCGReg base, intptr_t offset)
1577{
1578    int shift;
1579
1580    switch (type) {
1581    case TCG_TYPE_I32:
1582        if (arg < TCG_REG_V0) {
1583            tcg_out_mem_long(s, STW, STWX, arg, base, offset);
1584            break;
1585        }
1586        if (have_isa_2_07 && have_vsx) {
1587            tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
1588            break;
1589        }
1590        assert((offset & 3) == 0);
1591        tcg_debug_assert((offset & 3) == 0);
1592        shift = (offset - 4) & 0xc;
1593        if (shift) {
1594            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
1595            arg = TCG_VEC_TMP1;
1596        }
1597        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1598        break;
1599    case TCG_TYPE_I64:
1600        if (arg < TCG_REG_V0) {
1601            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1602            tcg_out_mem_long(s, STD, STDX, arg, base, offset);
1603            break;
1604        }
1605        /* fallthru */
1606    case TCG_TYPE_V64:
1607        tcg_debug_assert(arg >= TCG_REG_V0);
1608        if (have_vsx) {
1609            tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
1610                             STXSDX, arg, base, offset);
1611            break;
1612        }
1613        tcg_debug_assert((offset & 7) == 0);
1614        if (offset & 8) {
1615            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
1616            arg = TCG_VEC_TMP1;
1617        }
1618        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1619        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
1620        break;
1621    case TCG_TYPE_V128:
1622        tcg_debug_assert(arg >= TCG_REG_V0);
1623        tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
1624                         STVX, arg, base, offset);
1625        break;
1626    default:
1627        g_assert_not_reached();
1628    }
1629}
1630
1631static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1632                               TCGReg base, intptr_t ofs)
1633{
1634    return false;
1635}
1636
1637/*
1638 * Set dest non-zero if and only if (arg1 & arg2) is non-zero.
1639 * If RC, then also set RC0.
1640 */
1641static void tcg_out_test(TCGContext *s, TCGReg dest, TCGReg arg1, TCGArg arg2,
1642                         bool const_arg2, TCGType type, bool rc)
1643{
1644    int mb, me;
1645
1646    if (!const_arg2) {
1647        tcg_out32(s, AND | SAB(arg1, dest, arg2) | rc);
1648        return;
1649    }
1650
1651    if (type == TCG_TYPE_I32) {
1652        arg2 = (uint32_t)arg2;
1653    }
1654
1655    if ((arg2 & ~0xffff) == 0) {
1656        tcg_out32(s, ANDI | SAI(arg1, dest, arg2));
1657        return;
1658    }
1659    if ((arg2 & ~0xffff0000ull) == 0) {
1660        tcg_out32(s, ANDIS | SAI(arg1, dest, arg2 >> 16));
1661        return;
1662    }
1663    if (arg2 == (uint32_t)arg2 && mask_operand(arg2, &mb, &me)) {
1664        tcg_out_rlw_rc(s, RLWINM, dest, arg1, 0, mb, me, rc);
1665        return;
1666    }
1667    if (TCG_TARGET_REG_BITS == 64) {
1668        int sh = clz64(arg2);
1669        if (mask64_operand(arg2 << sh, &mb, &me)) {
1670            tcg_out_rld_rc(s, RLDICR, dest, arg1, sh, me, rc);
1671            return;
1672        }
1673    }
1674    /* Constraints should satisfy this. */
1675    g_assert_not_reached();
1676}
1677
1678static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
1679                        bool const_arg2, int cr, TCGType type)
1680{
1681    uint32_t op;
1682
1683    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1684
1685    /*
1686     * Simplify the comparisons below wrt CMPI.
1687     * All of the tests are 16-bit, so a 32-bit sign extend always works.
1688     */
1689    if (type == TCG_TYPE_I32) {
1690        arg2 = (int32_t)arg2;
1691    }
1692
1693    switch (cond) {
1694    case TCG_COND_EQ:
1695    case TCG_COND_NE:
1696        if (const_arg2) {
1697            if ((int16_t)arg2 == arg2) {
1698                op = CMPI;
1699                break;
1700            }
1701            tcg_debug_assert((uint16_t)arg2 == arg2);
1702            op = CMPLI;
1703            break;
1704        }
1705        op = CMPL;
1706        break;
1707
1708    case TCG_COND_TSTEQ:
1709    case TCG_COND_TSTNE:
1710        tcg_debug_assert(cr == 0);
1711        tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, true);
1712        return;
1713
1714    case TCG_COND_LT:
1715    case TCG_COND_GE:
1716    case TCG_COND_LE:
1717    case TCG_COND_GT:
1718        if (const_arg2) {
1719            tcg_debug_assert((int16_t)arg2 == arg2);
1720            op = CMPI;
1721            break;
1722        }
1723        op = CMP;
1724        break;
1725
1726    case TCG_COND_LTU:
1727    case TCG_COND_GEU:
1728    case TCG_COND_LEU:
1729    case TCG_COND_GTU:
1730        if (const_arg2) {
1731            tcg_debug_assert((uint16_t)arg2 == arg2);
1732            op = CMPLI;
1733            break;
1734        }
1735        op = CMPL;
1736        break;
1737
1738    default:
1739        g_assert_not_reached();
1740    }
1741    op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
1742    op |= RA(arg1);
1743    op |= const_arg2 ? arg2 & 0xffff : RB(arg2);
1744    tcg_out32(s, op);
1745}
1746
1747static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
1748                                TCGReg dst, TCGReg src, bool neg)
1749{
1750    if (neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) {
1751        /*
1752         * X != 0 implies X + -1 generates a carry.
1753         * RT = (~X + X) + CA
1754         *    = -1 + CA
1755         *    = CA ? 0 : -1
1756         */
1757        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1758        tcg_out32(s, SUBFE | TAB(dst, src, src));
1759        return;
1760    }
1761
1762    if (type == TCG_TYPE_I32) {
1763        tcg_out32(s, CNTLZW | RS(src) | RA(dst));
1764        tcg_out_shri32(s, dst, dst, 5);
1765    } else {
1766        tcg_out32(s, CNTLZD | RS(src) | RA(dst));
1767        tcg_out_shri64(s, dst, dst, 6);
1768    }
1769    if (neg) {
1770        tcg_out32(s, NEG | RT(dst) | RA(dst));
1771    }
1772}
1773
1774static void tcg_out_setcond_ne0(TCGContext *s, TCGType type,
1775                                TCGReg dst, TCGReg src, bool neg)
1776{
1777    if (!neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) {
1778        /*
1779         * X != 0 implies X + -1 generates a carry.  Extra addition
1780         * trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.
1781         */
1782        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1783        tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
1784        return;
1785    }
1786    tcg_out_setcond_eq0(s, type, dst, src, false);
1787    if (neg) {
1788        tcg_out32(s, ADDI | TAI(dst, dst, -1));
1789    } else {
1790        tcg_out_xori32(s, dst, dst, 1);
1791    }
1792}
1793
1794static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
1795                                  bool const_arg2)
1796{
1797    if (const_arg2) {
1798        if ((uint32_t)arg2 == arg2) {
1799            tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
1800        } else {
1801            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
1802            tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
1803        }
1804    } else {
1805        tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
1806    }
1807    return TCG_REG_R0;
1808}
1809
1810static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
1811                            TCGReg arg0, TCGReg arg1, TCGArg arg2,
1812                            bool const_arg2, bool neg)
1813{
1814    int sh;
1815    bool inv;
1816
1817    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1818
1819    /* Ignore high bits of a potential constant arg2.  */
1820    if (type == TCG_TYPE_I32) {
1821        arg2 = (uint32_t)arg2;
1822    }
1823
1824    /* With SETBC/SETBCR, we can always implement with 2 insns. */
1825    if (have_isa_3_10) {
1826        tcg_insn_unit bi, opc;
1827
1828        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type);
1829
1830        /* Re-use tcg_to_bc for BI and BO_COND_{TRUE,FALSE}. */
1831        bi = tcg_to_bc[cond] & (0x1f << 16);
1832        if (tcg_to_bc[cond] & BO(8)) {
1833            opc = neg ? SETNBC : SETBC;
1834        } else {
1835            opc = neg ? SETNBCR : SETBCR;
1836        }
1837        tcg_out32(s, opc | RT(arg0) | bi);
1838        return;
1839    }
1840
1841    /* Handle common and trivial cases before handling anything else.  */
1842    if (arg2 == 0) {
1843        switch (cond) {
1844        case TCG_COND_EQ:
1845            tcg_out_setcond_eq0(s, type, arg0, arg1, neg);
1846            return;
1847        case TCG_COND_NE:
1848            tcg_out_setcond_ne0(s, type, arg0, arg1, neg);
1849            return;
1850        case TCG_COND_GE:
1851            tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
1852            arg1 = arg0;
1853            /* FALLTHRU */
1854        case TCG_COND_LT:
1855            /* Extract the sign bit.  */
1856            if (type == TCG_TYPE_I32) {
1857                if (neg) {
1858                    tcg_out_sari32(s, arg0, arg1, 31);
1859                } else {
1860                    tcg_out_shri32(s, arg0, arg1, 31);
1861                }
1862            } else {
1863                if (neg) {
1864                    tcg_out_sari64(s, arg0, arg1, 63);
1865                } else {
1866                    tcg_out_shri64(s, arg0, arg1, 63);
1867                }
1868            }
1869            return;
1870        default:
1871            break;
1872        }
1873    }
1874
1875    /* If we have ISEL, we can implement everything with 3 or 4 insns.
1876       All other cases below are also at least 3 insns, so speed up the
1877       code generator by not considering them and always using ISEL.  */
1878    if (have_isel) {
1879        int isel, tab;
1880
1881        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type);
1882
1883        isel = tcg_to_isel[cond];
1884
1885        tcg_out_movi(s, type, arg0, neg ? -1 : 1);
1886        if (isel & 1) {
1887            /* arg0 = (bc ? 0 : 1) */
1888            tab = TAB(arg0, 0, arg0);
1889            isel &= ~1;
1890        } else {
1891            /* arg0 = (bc ? 1 : 0) */
1892            tcg_out_movi(s, type, TCG_REG_R0, 0);
1893            tab = TAB(arg0, arg0, TCG_REG_R0);
1894        }
1895        tcg_out32(s, isel | tab);
1896        return;
1897    }
1898
1899    inv = false;
1900    switch (cond) {
1901    case TCG_COND_EQ:
1902        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1903        tcg_out_setcond_eq0(s, type, arg0, arg1, neg);
1904        break;
1905
1906    case TCG_COND_NE:
1907        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1908        tcg_out_setcond_ne0(s, type, arg0, arg1, neg);
1909        break;
1910
1911    case TCG_COND_TSTEQ:
1912        tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false);
1913        tcg_out_setcond_eq0(s, type, arg0, TCG_REG_R0, neg);
1914        break;
1915
1916    case TCG_COND_TSTNE:
1917        tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false);
1918        tcg_out_setcond_ne0(s, type, arg0, TCG_REG_R0, neg);
1919        break;
1920
1921    case TCG_COND_LE:
1922    case TCG_COND_LEU:
1923        inv = true;
1924        /* fall through */
1925    case TCG_COND_GT:
1926    case TCG_COND_GTU:
1927        sh = 30; /* CR7 CR_GT */
1928        goto crtest;
1929
1930    case TCG_COND_GE:
1931    case TCG_COND_GEU:
1932        inv = true;
1933        /* fall through */
1934    case TCG_COND_LT:
1935    case TCG_COND_LTU:
1936        sh = 29; /* CR7 CR_LT */
1937        goto crtest;
1938
1939    crtest:
1940        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1941        tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1942        tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
1943        if (neg && inv) {
1944            tcg_out32(s, ADDI | TAI(arg0, arg0, -1));
1945        } else if (neg) {
1946            tcg_out32(s, NEG | RT(arg0) | RA(arg0));
1947        } else if (inv) {
1948            tcg_out_xori32(s, arg0, arg0, 1);
1949        }
1950        break;
1951
1952    default:
1953        g_assert_not_reached();
1954    }
1955}
1956
1957static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
1958                         TCGReg dest, TCGReg arg1, TCGReg arg2)
1959{
1960    tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, false);
1961}
1962
1963static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond,
1964                          TCGReg dest, TCGReg arg1, tcg_target_long arg2)
1965{
1966    tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, false);
1967}
1968
1969static const TCGOutOpSetcond outop_setcond = {
1970    .base.static_constraint = C_O1_I2(r, r, rC),
1971    .out_rrr = tgen_setcond,
1972    .out_rri = tgen_setcondi,
1973};
1974
1975static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond,
1976                            TCGReg dest, TCGReg arg1, TCGReg arg2)
1977{
1978    tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, true);
1979}
1980
1981static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond,
1982                             TCGReg dest, TCGReg arg1, tcg_target_long arg2)
1983{
1984    tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, true);
1985}
1986
1987static const TCGOutOpSetcond outop_negsetcond = {
1988    .base.static_constraint = C_O1_I2(r, r, rC),
1989    .out_rrr = tgen_negsetcond,
1990    .out_rri = tgen_negsetcondi,
1991};
1992
1993void tcg_out_br(TCGContext *s, TCGLabel *l)
1994{
1995    uint32_t insn = B;
1996
1997    if (l->has_value) {
1998        insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
1999    } else {
2000        tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
2001    }
2002    tcg_out32(s, insn);
2003}
2004
2005static void tcg_out_bc(TCGContext *s, TCGCond cond, int bd)
2006{
2007    tcg_out32(s, tcg_to_bc[cond] | bd);
2008}
2009
2010static void tcg_out_bc_lab(TCGContext *s, TCGCond cond, TCGLabel *l)
2011{
2012    int bd = 0;
2013    if (l->has_value) {
2014        bd = reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
2015    } else {
2016        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
2017    }
2018    tcg_out_bc(s, cond, bd);
2019}
2020
2021static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond,
2022                        TCGReg arg1, TCGReg arg2, TCGLabel *l)
2023{
2024    tcg_out_cmp(s, cond, arg1, arg2, false, 0, type);
2025    tcg_out_bc_lab(s, cond, l);
2026}
2027
2028static void tgen_brcondi(TCGContext *s, TCGType type, TCGCond cond,
2029                         TCGReg arg1, tcg_target_long arg2, TCGLabel *l)
2030{
2031    tcg_out_cmp(s, cond, arg1, arg2, true, 0, type);
2032    tcg_out_bc_lab(s, cond, l);
2033}
2034
2035static const TCGOutOpBrcond outop_brcond = {
2036    .base.static_constraint = C_O0_I2(r, rC),
2037    .out_rr = tgen_brcond,
2038    .out_ri = tgen_brcondi,
2039};
2040
2041static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond,
2042                         TCGReg dest, TCGReg c1, TCGArg c2, bool const_c2,
2043                         TCGArg v1, bool const_v1, TCGArg v2, bool const_v2)
2044{
2045    /* If for some reason both inputs are zero, don't produce bad code.  */
2046    if (v1 == 0 && v2 == 0) {
2047        tcg_out_movi(s, type, dest, 0);
2048        return;
2049    }
2050
2051    tcg_out_cmp(s, cond, c1, c2, const_c2, 0, type);
2052
2053    if (have_isel) {
2054        int isel = tcg_to_isel[cond];
2055
2056        /* Swap the V operands if the operation indicates inversion.  */
2057        if (isel & 1) {
2058            int t = v1;
2059            v1 = v2;
2060            v2 = t;
2061            isel &= ~1;
2062        }
2063        /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */
2064        if (v2 == 0) {
2065            tcg_out_movi(s, type, TCG_REG_R0, 0);
2066        }
2067        tcg_out32(s, isel | TAB(dest, v1, v2));
2068    } else {
2069        if (dest == v2) {
2070            cond = tcg_invert_cond(cond);
2071            v2 = v1;
2072        } else if (dest != v1) {
2073            if (v1 == 0) {
2074                tcg_out_movi(s, type, dest, 0);
2075            } else {
2076                tcg_out_mov(s, type, dest, v1);
2077            }
2078        }
2079        /* Branch forward over one insn */
2080        tcg_out_bc(s, cond, 8);
2081        if (v2 == 0) {
2082            tcg_out_movi(s, type, dest, 0);
2083        } else {
2084            tcg_out_mov(s, type, dest, v2);
2085        }
2086    }
2087}
2088
2089static const TCGOutOpMovcond outop_movcond = {
2090    .base.static_constraint = C_O1_I4(r, r, rC, rZ, rZ),
2091    .out = tgen_movcond,
2092};
2093
2094static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
2095                          TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
2096{
2097    if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
2098        tcg_out32(s, opc | RA(a0) | RS(a1));
2099    } else {
2100        tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 0, type);
2101        /* Note that the only other valid constant for a2 is 0.  */
2102        if (have_isel) {
2103            tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
2104            tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
2105        } else if (!const_a2 && a0 == a2) {
2106            tcg_out_bc(s, TCG_COND_EQ, 8);
2107            tcg_out32(s, opc | RA(a0) | RS(a1));
2108        } else {
2109            tcg_out32(s, opc | RA(a0) | RS(a1));
2110            tcg_out_bc(s, TCG_COND_NE, 8);
2111            if (const_a2) {
2112                tcg_out_movi(s, type, a0, 0);
2113            } else {
2114                tcg_out_mov(s, type, a0, a2);
2115            }
2116        }
2117    }
2118}
2119
2120static void tcg_out_cmp2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
2121                         TCGArg bl, bool blconst, TCGArg bh, bool bhconst)
2122{
2123    static const struct { uint8_t bit1, bit2; } bits[] = {
2124        [TCG_COND_LT ] = { CR_LT, CR_LT },
2125        [TCG_COND_LE ] = { CR_LT, CR_GT },
2126        [TCG_COND_GT ] = { CR_GT, CR_GT },
2127        [TCG_COND_GE ] = { CR_GT, CR_LT },
2128        [TCG_COND_LTU] = { CR_LT, CR_LT },
2129        [TCG_COND_LEU] = { CR_LT, CR_GT },
2130        [TCG_COND_GTU] = { CR_GT, CR_GT },
2131        [TCG_COND_GEU] = { CR_GT, CR_LT },
2132    };
2133
2134    TCGCond cond2;
2135    int op, bit1, bit2;
2136
2137    switch (cond) {
2138    case TCG_COND_EQ:
2139        op = CRAND;
2140        goto do_equality;
2141    case TCG_COND_NE:
2142        op = CRNAND;
2143    do_equality:
2144        tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
2145        tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
2146        tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
2147        break;
2148
2149    case TCG_COND_TSTEQ:
2150    case TCG_COND_TSTNE:
2151        if (blconst) {
2152            tcg_out_andi32(s, TCG_REG_R0, al, bl);
2153        } else {
2154            tcg_out32(s, AND | SAB(al, TCG_REG_R0, bl));
2155        }
2156        if (bhconst) {
2157            tcg_out_andi32(s, TCG_REG_TMP1, ah, bh);
2158        } else {
2159            tcg_out32(s, AND | SAB(ah, TCG_REG_TMP1, bh));
2160        }
2161        tcg_out32(s, OR | SAB(TCG_REG_R0, TCG_REG_R0, TCG_REG_TMP1) | 1);
2162        break;
2163
2164    case TCG_COND_LT:
2165    case TCG_COND_LE:
2166    case TCG_COND_GT:
2167    case TCG_COND_GE:
2168    case TCG_COND_LTU:
2169    case TCG_COND_LEU:
2170    case TCG_COND_GTU:
2171    case TCG_COND_GEU:
2172        bit1 = bits[cond].bit1;
2173        bit2 = bits[cond].bit2;
2174        op = (bit1 != bit2 ? CRANDC : CRAND);
2175        cond2 = tcg_unsigned_cond(cond);
2176
2177        tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
2178        tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
2179        tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
2180        tcg_out32(s, CROR | BT(0, CR_EQ) | BA(6, bit1) | BB(0, CR_EQ));
2181        break;
2182
2183    default:
2184        g_assert_not_reached();
2185    }
2186}
2187
2188static void tgen_setcond2(TCGContext *s, TCGCond cond, TCGReg ret,
2189                          TCGReg al, TCGReg ah,
2190                          TCGArg bl, bool const_bl,
2191                          TCGArg bh, bool const_bh)
2192{
2193    tcg_out_cmp2(s, cond, al, ah, bl, const_bl, bh, const_bh);
2194    tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(0));
2195    tcg_out_rlw(s, RLWINM, ret, TCG_REG_R0, CR_EQ + 0*4 + 1, 31, 31);
2196}
2197
2198#if TCG_TARGET_REG_BITS != 32
2199__attribute__((unused))
2200#endif
2201static const TCGOutOpSetcond2 outop_setcond2 = {
2202    .base.static_constraint = C_O1_I4(r, r, r, rU, rC),
2203    .out = tgen_setcond2,
2204};
2205
2206static void tgen_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
2207                         TCGArg bl, bool const_bl,
2208                         TCGArg bh, bool const_bh, TCGLabel *l)
2209{
2210    assert(TCG_TARGET_REG_BITS == 32);
2211    tcg_out_cmp2(s, cond, al, ah, bl, const_bl, bh, const_bh);
2212    tcg_out_bc_lab(s, TCG_COND_EQ, l);
2213}
2214
2215#if TCG_TARGET_REG_BITS != 32
2216__attribute__((unused))
2217#endif
2218static const TCGOutOpBrcond2 outop_brcond2 = {
2219    .base.static_constraint = C_O0_I4(r, r, rU, rC),
2220    .out = tgen_brcond2,
2221};
2222
2223static void tcg_out_mb(TCGContext *s, unsigned a0)
2224{
2225    uint32_t insn;
2226
2227    if (a0 & TCG_MO_ST_LD) {
2228        insn = HWSYNC;
2229    } else {
2230        insn = LWSYNC;
2231    }
2232
2233    tcg_out32(s, insn);
2234}
2235
2236static void tcg_out_call_int(TCGContext *s, int lk,
2237                             const tcg_insn_unit *target)
2238{
2239#ifdef _CALL_AIX
2240    /* Look through the descriptor.  If the branch is in range, and we
2241       don't have to spend too much effort on building the toc.  */
2242    const void *tgt = ((const void * const *)target)[0];
2243    uintptr_t toc = ((const uintptr_t *)target)[1];
2244    intptr_t diff = tcg_pcrel_diff(s, tgt);
2245
2246    if (in_range_b(diff) && toc == (uint32_t)toc) {
2247        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
2248        tcg_out_b(s, lk, tgt);
2249    } else {
2250        /* Fold the low bits of the constant into the addresses below.  */
2251        intptr_t arg = (intptr_t)target;
2252        int ofs = (int16_t)arg;
2253
2254        if (ofs + 8 < 0x8000) {
2255            arg -= ofs;
2256        } else {
2257            ofs = 0;
2258        }
2259        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
2260        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
2261        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
2262        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
2263        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
2264    }
2265#elif defined(_CALL_ELF) && _CALL_ELF == 2
2266    intptr_t diff;
2267
2268    /* In the ELFv2 ABI, we have to set up r12 to contain the destination
2269       address, which the callee uses to compute its TOC address.  */
2270    /* FIXME: when the branch is in range, we could avoid r12 load if we
2271       knew that the destination uses the same TOC, and what its local
2272       entry point offset is.  */
2273    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
2274
2275    diff = tcg_pcrel_diff(s, target);
2276    if (in_range_b(diff)) {
2277        tcg_out_b(s, lk, target);
2278    } else {
2279        tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
2280        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
2281    }
2282#else
2283    tcg_out_b(s, lk, target);
2284#endif
2285}
2286
2287static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
2288                         const TCGHelperInfo *info)
2289{
2290    tcg_out_call_int(s, LK, target);
2291}
2292
2293static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = {
2294    [MO_UB] = LBZX,
2295    [MO_UW] = LHZX,
2296    [MO_UL] = LWZX,
2297    [MO_UQ] = LDX,
2298    [MO_SW] = LHAX,
2299    [MO_SL] = LWAX,
2300    [MO_BSWAP | MO_UB] = LBZX,
2301    [MO_BSWAP | MO_UW] = LHBRX,
2302    [MO_BSWAP | MO_UL] = LWBRX,
2303    [MO_BSWAP | MO_UQ] = LDBRX,
2304};
2305
2306static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
2307    [MO_UB] = STBX,
2308    [MO_UW] = STHX,
2309    [MO_UL] = STWX,
2310    [MO_UQ] = STDX,
2311    [MO_BSWAP | MO_UB] = STBX,
2312    [MO_BSWAP | MO_UW] = STHBRX,
2313    [MO_BSWAP | MO_UL] = STWBRX,
2314    [MO_BSWAP | MO_UQ] = STDBRX,
2315};
2316
2317static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
2318{
2319    if (arg < 0) {
2320        arg = TCG_REG_TMP1;
2321    }
2322    tcg_out32(s, MFSPR | RT(arg) | LR);
2323    return arg;
2324}
2325
2326/*
2327 * For the purposes of ppc32 sorting 4 input registers into 4 argument
2328 * registers, there is an outside chance we would require 3 temps.
2329 */
2330static const TCGLdstHelperParam ldst_helper_param = {
2331    .ra_gen = ldst_ra_gen,
2332    .ntmp = 3,
2333    .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 }
2334};
2335
2336static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2337{
2338    MemOp opc = get_memop(lb->oi);
2339
2340    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2341        return false;
2342    }
2343
2344    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
2345    tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]);
2346    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
2347
2348    tcg_out_b(s, 0, lb->raddr);
2349    return true;
2350}
2351
2352static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2353{
2354    MemOp opc = get_memop(lb->oi);
2355
2356    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2357        return false;
2358    }
2359
2360    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
2361    tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]);
2362
2363    tcg_out_b(s, 0, lb->raddr);
2364    return true;
2365}
2366
2367typedef struct {
2368    TCGReg base;
2369    TCGReg index;
2370    TCGAtomAlign aa;
2371} HostAddress;
2372
2373bool tcg_target_has_memory_bswap(MemOp memop)
2374{
2375    TCGAtomAlign aa;
2376
2377    if ((memop & MO_SIZE) <= MO_64) {
2378        return true;
2379    }
2380
2381    /*
2382     * Reject 16-byte memop with 16-byte atomicity,
2383     * but do allow a pair of 64-bit operations.
2384     */
2385    aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
2386    return aa.atom <= MO_64;
2387}
2388
2389/* We expect to use a 16-bit negative offset from ENV.  */
2390#define MIN_TLB_MASK_TABLE_OFS  -32768
2391
2392/*
2393 * For system-mode, perform the TLB load and compare.
2394 * For user-mode, perform any required alignment tests.
2395 * In both cases, return a TCGLabelQemuLdst structure if the slow path
2396 * is required and fill in @h with the host address for the fast path.
2397 */
2398static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
2399                                           TCGReg addr, MemOpIdx oi, bool is_ld)
2400{
2401    TCGType addr_type = s->addr_type;
2402    TCGLabelQemuLdst *ldst = NULL;
2403    MemOp opc = get_memop(oi);
2404    MemOp a_bits, s_bits;
2405
2406    /*
2407     * Book II, Section 1.4, Single-Copy Atomicity, specifies:
2408     *
2409     * Before 3.0, "An access that is not atomic is performed as a set of
2410     * smaller disjoint atomic accesses. In general, the number and alignment
2411     * of these accesses are implementation-dependent."  Thus MO_ATOM_IFALIGN.
2412     *
2413     * As of 3.0, "the non-atomic access is performed as described in
2414     * the corresponding list", which matches MO_ATOM_SUBALIGN.
2415     */
2416    s_bits = opc & MO_SIZE;
2417    h->aa = atom_and_align_for_opc(s, opc,
2418                                   have_isa_3_00 ? MO_ATOM_SUBALIGN
2419                                                 : MO_ATOM_IFALIGN,
2420                                   s_bits == MO_128);
2421    a_bits = h->aa.align;
2422
2423    if (tcg_use_softmmu) {
2424        int mem_index = get_mmuidx(oi);
2425        int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
2426                            : offsetof(CPUTLBEntry, addr_write);
2427        int fast_off = tlb_mask_table_ofs(s, mem_index);
2428        int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
2429        int table_off = fast_off + offsetof(CPUTLBDescFast, table);
2430
2431        ldst = new_ldst_label(s);
2432        ldst->is_ld = is_ld;
2433        ldst->oi = oi;
2434        ldst->addr_reg = addr;
2435
2436        /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
2437        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off);
2438        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off);
2439
2440        /* Extract the page index, shifted into place for tlb index.  */
2441        if (TCG_TARGET_REG_BITS == 32) {
2442            tcg_out_shri32(s, TCG_REG_R0, addr,
2443                           s->page_bits - CPU_TLB_ENTRY_BITS);
2444        } else {
2445            tcg_out_shri64(s, TCG_REG_R0, addr,
2446                           s->page_bits - CPU_TLB_ENTRY_BITS);
2447        }
2448        tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
2449
2450        /*
2451         * Load the TLB comparator into TMP2.
2452         * For 64-bit host, always load the entire 64-bit slot for simplicity.
2453         * We will ignore the high bits with tcg_out_cmp(..., addr_type).
2454         */
2455        if (cmp_off == 0) {
2456            tcg_out32(s, (TCG_TARGET_REG_BITS == 64 ? LDUX : LWZUX)
2457                      | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2));
2458        } else {
2459            tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
2460            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off);
2461        }
2462
2463        /*
2464         * Load the TLB addend for use on the fast path.
2465         * Do this asap to minimize any load use delay.
2466         */
2467        if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
2468            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
2469                       offsetof(CPUTLBEntry, addend));
2470        }
2471
2472        /* Clear the non-page, non-alignment bits from the address in R0. */
2473        if (TCG_TARGET_REG_BITS == 32) {
2474            /*
2475             * We don't support unaligned accesses on 32-bits.
2476             * Preserve the bottom bits and thus trigger a comparison
2477             * failure on unaligned accesses.
2478             */
2479            if (a_bits < s_bits) {
2480                a_bits = s_bits;
2481            }
2482            tcg_out_rlw(s, RLWINM, TCG_REG_R0, addr, 0,
2483                        (32 - a_bits) & 31, 31 - s->page_bits);
2484        } else {
2485            TCGReg t = addr;
2486
2487            /*
2488             * If the access is unaligned, we need to make sure we fail if we
2489             * cross a page boundary.  The trick is to add the access size-1
2490             * to the address before masking the low bits.  That will make the
2491             * address overflow to the next page if we cross a page boundary,
2492             * which will then force a mismatch of the TLB compare.
2493             */
2494            if (a_bits < s_bits) {
2495                unsigned a_mask = (1 << a_bits) - 1;
2496                unsigned s_mask = (1 << s_bits) - 1;
2497                tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
2498                t = TCG_REG_R0;
2499            }
2500
2501            /* Mask the address for the requested alignment.  */
2502            if (addr_type == TCG_TYPE_I32) {
2503                tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
2504                            (32 - a_bits) & 31, 31 - s->page_bits);
2505            } else if (a_bits == 0) {
2506                tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits);
2507            } else {
2508                tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
2509                            64 - s->page_bits, s->page_bits - a_bits);
2510                tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0);
2511            }
2512        }
2513
2514        /* Full comparison into cr0. */
2515        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 0, 0, addr_type);
2516
2517        /* Load a pointer into the current opcode w/conditional branch-link. */
2518        ldst->label_ptr[0] = s->code_ptr;
2519        tcg_out_bc(s, TCG_COND_NE, LK);
2520
2521        h->base = TCG_REG_TMP1;
2522    } else {
2523        if (a_bits) {
2524            ldst = new_ldst_label(s);
2525            ldst->is_ld = is_ld;
2526            ldst->oi = oi;
2527            ldst->addr_reg = addr;
2528
2529            /* We are expecting a_bits to max out at 7, much lower than ANDI. */
2530            tcg_debug_assert(a_bits < 16);
2531            tcg_out32(s, ANDI | SAI(addr, TCG_REG_R0, (1 << a_bits) - 1));
2532
2533            ldst->label_ptr[0] = s->code_ptr;
2534            tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
2535        }
2536
2537        h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
2538    }
2539
2540    if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
2541        /* Zero-extend the guest address for use in the host address. */
2542        tcg_out_ext32u(s, TCG_REG_TMP2, addr);
2543        h->index = TCG_REG_TMP2;
2544    } else {
2545        h->index = addr;
2546    }
2547
2548    return ldst;
2549}
2550
2551static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
2552                            TCGReg addr, MemOpIdx oi, TCGType data_type)
2553{
2554    MemOp opc = get_memop(oi);
2555    TCGLabelQemuLdst *ldst;
2556    HostAddress h;
2557
2558    ldst = prepare_host_addr(s, &h, addr, oi, true);
2559
2560    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2561        if (opc & MO_BSWAP) {
2562            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2563            tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
2564            tcg_out32(s, LWBRX | TAB(datahi, h.base, TCG_REG_R0));
2565        } else if (h.base != 0) {
2566            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2567            tcg_out32(s, LWZX | TAB(datahi, h.base, h.index));
2568            tcg_out32(s, LWZX | TAB(datalo, h.base, TCG_REG_R0));
2569        } else if (h.index == datahi) {
2570            tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
2571            tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
2572        } else {
2573            tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
2574            tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
2575        }
2576    } else {
2577        uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
2578        if (!have_isa_2_06 && insn == LDBRX) {
2579            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2580            tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
2581            tcg_out32(s, LWBRX | TAB(TCG_REG_R0, h.base, TCG_REG_R0));
2582            tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
2583        } else if (insn) {
2584            tcg_out32(s, insn | TAB(datalo, h.base, h.index));
2585        } else {
2586            insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
2587            tcg_out32(s, insn | TAB(datalo, h.base, h.index));
2588            tcg_out_movext(s, TCG_TYPE_REG, datalo,
2589                           TCG_TYPE_REG, opc & MO_SSIZE, datalo);
2590        }
2591    }
2592
2593    if (ldst) {
2594        ldst->type = data_type;
2595        ldst->datalo_reg = datalo;
2596        ldst->datahi_reg = datahi;
2597        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2598    }
2599}
2600
2601static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
2602                            TCGReg addr, MemOpIdx oi, TCGType data_type)
2603{
2604    MemOp opc = get_memop(oi);
2605    TCGLabelQemuLdst *ldst;
2606    HostAddress h;
2607
2608    ldst = prepare_host_addr(s, &h, addr, oi, false);
2609
2610    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2611        if (opc & MO_BSWAP) {
2612            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2613            tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
2614            tcg_out32(s, STWBRX | SAB(datahi, h.base, TCG_REG_R0));
2615        } else if (h.base != 0) {
2616            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2617            tcg_out32(s, STWX | SAB(datahi, h.base, h.index));
2618            tcg_out32(s, STWX | SAB(datalo, h.base, TCG_REG_R0));
2619        } else {
2620            tcg_out32(s, STW | TAI(datahi, h.index, 0));
2621            tcg_out32(s, STW | TAI(datalo, h.index, 4));
2622        }
2623    } else {
2624        uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
2625        if (!have_isa_2_06 && insn == STDBRX) {
2626            tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
2627            tcg_out32(s, ADDI | TAI(TCG_REG_TMP2, h.index, 4));
2628            tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
2629            tcg_out32(s, STWBRX | SAB(TCG_REG_R0, h.base, TCG_REG_TMP2));
2630        } else {
2631            tcg_out32(s, insn | SAB(datalo, h.base, h.index));
2632        }
2633    }
2634
2635    if (ldst) {
2636        ldst->type = data_type;
2637        ldst->datalo_reg = datalo;
2638        ldst->datahi_reg = datahi;
2639        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2640    }
2641}
2642
2643static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
2644                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
2645{
2646    TCGLabelQemuLdst *ldst;
2647    HostAddress h;
2648    bool need_bswap;
2649    uint32_t insn;
2650    TCGReg index;
2651
2652    ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
2653
2654    /* Compose the final address, as LQ/STQ have no indexing. */
2655    index = h.index;
2656    if (h.base != 0) {
2657        index = TCG_REG_TMP1;
2658        tcg_out32(s, ADD | TAB(index, h.base, h.index));
2659    }
2660    need_bswap = get_memop(oi) & MO_BSWAP;
2661
2662    if (h.aa.atom == MO_128) {
2663        tcg_debug_assert(!need_bswap);
2664        tcg_debug_assert(datalo & 1);
2665        tcg_debug_assert(datahi == datalo - 1);
2666        tcg_debug_assert(!is_ld || datahi != index);
2667        insn = is_ld ? LQ : STQ;
2668        tcg_out32(s, insn | TAI(datahi, index, 0));
2669    } else {
2670        TCGReg d1, d2;
2671
2672        if (HOST_BIG_ENDIAN ^ need_bswap) {
2673            d1 = datahi, d2 = datalo;
2674        } else {
2675            d1 = datalo, d2 = datahi;
2676        }
2677
2678        if (need_bswap) {
2679            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8);
2680            insn = is_ld ? LDBRX : STDBRX;
2681            tcg_out32(s, insn | TAB(d1, 0, index));
2682            tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0));
2683        } else {
2684            insn = is_ld ? LD : STD;
2685            tcg_out32(s, insn | TAI(d1, index, 0));
2686            tcg_out32(s, insn | TAI(d2, index, 8));
2687        }
2688    }
2689
2690    if (ldst) {
2691        ldst->type = TCG_TYPE_I128;
2692        ldst->datalo_reg = datalo;
2693        ldst->datahi_reg = datahi;
2694        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2695    }
2696}
2697
2698static void tgen_qemu_ld(TCGContext *s, TCGType type, TCGReg data,
2699                         TCGReg addr, MemOpIdx oi)
2700{
2701    tcg_out_qemu_ld(s, data, -1, addr, oi, type);
2702}
2703
2704static const TCGOutOpQemuLdSt outop_qemu_ld = {
2705    .base.static_constraint = C_O1_I1(r, r),
2706    .out = tgen_qemu_ld,
2707};
2708
2709static void tgen_qemu_ld2(TCGContext *s, TCGType type, TCGReg datalo,
2710                          TCGReg datahi, TCGReg addr, MemOpIdx oi)
2711{
2712    if (TCG_TARGET_REG_BITS == 32) {
2713        tcg_out_qemu_ld(s, datalo, datahi, addr, oi, type);
2714    } else {
2715        tcg_out_qemu_ldst_i128(s, datalo, datahi, addr, oi, true);
2716    }
2717}
2718
2719static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = {
2720    .base.static_constraint =
2721        TCG_TARGET_REG_BITS == 64 ? C_N1O1_I1(o, m, r) : C_O2_I1(r, r, r),
2722    .out = tgen_qemu_ld2,
2723};
2724
2725static void tgen_qemu_st(TCGContext *s, TCGType type, TCGReg data,
2726                         TCGReg addr, MemOpIdx oi)
2727{
2728    tcg_out_qemu_st(s, data, -1, addr, oi, type);
2729}
2730
2731static const TCGOutOpQemuLdSt outop_qemu_st = {
2732    .base.static_constraint = C_O0_I2(r, r),
2733    .out = tgen_qemu_st,
2734};
2735
2736static void tgen_qemu_st2(TCGContext *s, TCGType type, TCGReg datalo,
2737                          TCGReg datahi, TCGReg addr, MemOpIdx oi)
2738{
2739    if (TCG_TARGET_REG_BITS == 32) {
2740        tcg_out_qemu_st(s, datalo, datahi, addr, oi, type);
2741    } else {
2742        tcg_out_qemu_ldst_i128(s, datalo, datahi, addr, oi, false);
2743    }
2744}
2745
2746static const TCGOutOpQemuLdSt2 outop_qemu_st2 = {
2747    .base.static_constraint =
2748        TCG_TARGET_REG_BITS == 64 ? C_O0_I3(o, m, r) : C_O0_I3(r, r, r),
2749    .out = tgen_qemu_st2,
2750};
2751
2752static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2753{
2754    int i;
2755    for (i = 0; i < count; ++i) {
2756        p[i] = NOP;
2757    }
2758}
2759
2760/* Parameters for function call generation, used in tcg.c.  */
2761#define TCG_TARGET_STACK_ALIGN       16
2762
2763#ifdef _CALL_AIX
2764# define LINK_AREA_SIZE                (6 * SZR)
2765# define LR_OFFSET                     (1 * SZR)
2766# define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
2767#elif defined(_CALL_DARWIN)
2768# define LINK_AREA_SIZE                (6 * SZR)
2769# define LR_OFFSET                     (2 * SZR)
2770#elif TCG_TARGET_REG_BITS == 64
2771# if defined(_CALL_ELF) && _CALL_ELF == 2
2772#  define LINK_AREA_SIZE               (4 * SZR)
2773#  define LR_OFFSET                    (1 * SZR)
2774# endif
2775#else /* TCG_TARGET_REG_BITS == 32 */
2776# if defined(_CALL_SYSV)
2777#  define LINK_AREA_SIZE               (2 * SZR)
2778#  define LR_OFFSET                    (1 * SZR)
2779# endif
2780#endif
2781#ifndef LR_OFFSET
2782# error "Unhandled abi"
2783#endif
2784#ifndef TCG_TARGET_CALL_STACK_OFFSET
2785# define TCG_TARGET_CALL_STACK_OFFSET  LINK_AREA_SIZE
2786#endif
2787
2788#define CPU_TEMP_BUF_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2789#define REG_SAVE_SIZE      ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
2790
2791#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET   \
2792                     + TCG_STATIC_CALL_ARGS_SIZE    \
2793                     + CPU_TEMP_BUF_SIZE            \
2794                     + REG_SAVE_SIZE                \
2795                     + TCG_TARGET_STACK_ALIGN - 1)  \
2796                    & -TCG_TARGET_STACK_ALIGN)
2797
2798#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
2799
2800static void tcg_target_qemu_prologue(TCGContext *s)
2801{
2802    int i;
2803
2804#ifdef _CALL_AIX
2805    const void **desc = (const void **)s->code_ptr;
2806    desc[0] = tcg_splitwx_to_rx(desc + 2);  /* entry point */
2807    desc[1] = 0;                            /* environment pointer */
2808    s->code_ptr = (void *)(desc + 2);       /* skip over descriptor */
2809#endif
2810
2811    tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
2812                  CPU_TEMP_BUF_SIZE);
2813
2814    /* Prologue */
2815    tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
2816    tcg_out32(s, (SZR == 8 ? STDU : STWU)
2817              | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
2818
2819    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2820        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2821                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2822    }
2823    tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2824
2825    if (!tcg_use_softmmu && guest_base) {
2826        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
2827        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2828    }
2829
2830    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2831    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
2832    tcg_out32(s, BCCTR | BO_ALWAYS);
2833
2834    /* Epilogue */
2835    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2836
2837    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2838    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2839        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2840                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2841    }
2842    tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
2843    tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
2844    tcg_out32(s, BCLR | BO_ALWAYS);
2845}
2846
2847static void tcg_out_tb_start(TCGContext *s)
2848{
2849    /* Load TCG_REG_TB. */
2850    if (USE_REG_TB) {
2851        if (have_isa_3_00) {
2852            /* lnia REG_TB */
2853            tcg_out_addpcis(s, TCG_REG_TB, 0);
2854        } else {
2855            /* bcl 20,31,$+4 (preferred form for getting nia) */
2856            tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK);
2857            tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR);
2858        }
2859    }
2860}
2861
2862static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
2863{
2864    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg);
2865    tcg_out_b(s, 0, tcg_code_gen_epilogue);
2866}
2867
2868static void tcg_out_goto_tb(TCGContext *s, int which)
2869{
2870    uintptr_t ptr = get_jmp_target_addr(s, which);
2871    int16_t lo;
2872
2873    /* Direct branch will be patched by tb_target_set_jmp_target. */
2874    set_jmp_insn_offset(s, which);
2875    tcg_out32(s, NOP);
2876
2877    /* When branch is out of range, fall through to indirect. */
2878    if (USE_REG_TB) {
2879        ptrdiff_t offset = ppc_tbrel_diff(s, (void *)ptr);
2880        tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset);
2881    } else if (have_isa_3_10) {
2882        ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr);
2883        tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1);
2884    } else if (have_isa_3_00) {
2885        ptrdiff_t offset = tcg_pcrel_diff(s, (void *)ptr) - 4;
2886        lo = offset;
2887        tcg_out_addpcis(s, TCG_REG_TMP1, offset - lo);
2888        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo);
2889    } else {
2890        lo = ptr;
2891        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - lo);
2892        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo);
2893    }
2894
2895    tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
2896    tcg_out32(s, BCCTR | BO_ALWAYS);
2897    set_jmp_reset_offset(s, which);
2898}
2899
2900static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0)
2901{
2902    tcg_out32(s, MTSPR | RS(a0) | CTR);
2903    tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
2904    tcg_out32(s, BCCTR | BO_ALWAYS);
2905}
2906
2907void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
2908                              uintptr_t jmp_rx, uintptr_t jmp_rw)
2909{
2910    uintptr_t addr = tb->jmp_target_addr[n];
2911    intptr_t diff = addr - jmp_rx;
2912    tcg_insn_unit insn;
2913
2914    if (in_range_b(diff)) {
2915        insn = B | (diff & 0x3fffffc);
2916    } else {
2917        insn = NOP;
2918    }
2919
2920    qatomic_set((uint32_t *)jmp_rw, insn);
2921    flush_idcache_range(jmp_rx, jmp_rw, 4);
2922}
2923
2924
2925static void tgen_add(TCGContext *s, TCGType type,
2926                     TCGReg a0, TCGReg a1, TCGReg a2)
2927{
2928    tcg_out32(s, ADD | TAB(a0, a1, a2));
2929}
2930
2931static void tgen_addi(TCGContext *s, TCGType type,
2932                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2933{
2934    tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
2935}
2936
2937static const TCGOutOpBinary outop_add = {
2938    .base.static_constraint = C_O1_I2(r, r, rT),
2939    .out_rrr = tgen_add,
2940    .out_rri = tgen_addi,
2941};
2942
2943static void tgen_addco_rrr(TCGContext *s, TCGType type,
2944                           TCGReg a0, TCGReg a1, TCGReg a2)
2945{
2946    tcg_out32(s, ADDC | TAB(a0, a1, a2));
2947}
2948
2949static void tgen_addco_rri(TCGContext *s, TCGType type,
2950                           TCGReg a0, TCGReg a1, tcg_target_long a2)
2951{
2952    tcg_out32(s, ADDIC | TAI(a0, a1, a2));
2953}
2954
2955static TCGConstraintSetIndex cset_addco(TCGType type, unsigned flags)
2956{
2957    /*
2958     * Note that the CA bit is defined based on the word size of the
2959     * environment.  So in 64-bit mode it's always carry-out of bit 63.
2960     * The fallback code using deposit works just as well for TCG_TYPE_I32.
2961     */
2962    return type == TCG_TYPE_REG ? C_O1_I2(r, r, rI) : C_NotImplemented;
2963}
2964
2965static const TCGOutOpBinary outop_addco = {
2966    .base.static_constraint = C_Dynamic,
2967    .base.dynamic_constraint = cset_addco,
2968    .out_rrr = tgen_addco_rrr,
2969    .out_rri = tgen_addco_rri,
2970};
2971
2972static void tgen_addcio_rrr(TCGContext *s, TCGType type,
2973                            TCGReg a0, TCGReg a1, TCGReg a2)
2974{
2975    tcg_out32(s, ADDE | TAB(a0, a1, a2));
2976}
2977
2978static void tgen_addcio_rri(TCGContext *s, TCGType type,
2979                            TCGReg a0, TCGReg a1, tcg_target_long a2)
2980{
2981    tcg_out32(s, (a2 ? ADDME : ADDZE) | RT(a0) | RA(a1));
2982}
2983
2984static TCGConstraintSetIndex cset_addcio(TCGType type, unsigned flags)
2985{
2986    return type == TCG_TYPE_REG ? C_O1_I2(r, r, rZM) : C_NotImplemented;
2987}
2988
2989static const TCGOutOpBinary outop_addcio = {
2990    .base.static_constraint = C_Dynamic,
2991    .base.dynamic_constraint = cset_addcio,
2992    .out_rrr = tgen_addcio_rrr,
2993    .out_rri = tgen_addcio_rri,
2994};
2995
2996static const TCGOutOpAddSubCarry outop_addci = {
2997    .base.static_constraint = C_Dynamic,
2998    .base.dynamic_constraint = cset_addcio,
2999    .out_rrr = tgen_addcio_rrr,
3000    .out_rri = tgen_addcio_rri,
3001};
3002
3003static void tcg_out_set_carry(TCGContext *s)
3004{
3005    tcg_out32(s, SUBFC | TAB(TCG_REG_R0, TCG_REG_R0, TCG_REG_R0));
3006}
3007
3008static void tgen_and(TCGContext *s, TCGType type,
3009                     TCGReg a0, TCGReg a1, TCGReg a2)
3010{
3011    tcg_out32(s, AND | SAB(a1, a0, a2));
3012}
3013
3014static void tgen_andi(TCGContext *s, TCGType type,
3015                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3016{
3017    if (type == TCG_TYPE_I32) {
3018        tcg_out_andi32(s, a0, a1, a2);
3019    } else {
3020        tcg_out_andi64(s, a0, a1, a2);
3021    }
3022}
3023
3024static const TCGOutOpBinary outop_and = {
3025    .base.static_constraint = C_O1_I2(r, r, ri),
3026    .out_rrr = tgen_and,
3027    .out_rri = tgen_andi,
3028};
3029
3030static void tgen_andc(TCGContext *s, TCGType type,
3031                      TCGReg a0, TCGReg a1, TCGReg a2)
3032{
3033    tcg_out32(s, ANDC | SAB(a1, a0, a2));
3034}
3035
3036static const TCGOutOpBinary outop_andc = {
3037    .base.static_constraint = C_O1_I2(r, r, r),
3038    .out_rrr = tgen_andc,
3039};
3040
3041static void tgen_clz(TCGContext *s, TCGType type,
3042                     TCGReg a0, TCGReg a1, TCGReg a2)
3043{
3044    uint32_t insn = type == TCG_TYPE_I32 ? CNTLZW : CNTLZD;
3045    tcg_out_cntxz(s, type, insn, a0, a1, a2, false);
3046}
3047
3048static void tgen_clzi(TCGContext *s, TCGType type,
3049                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3050{
3051    uint32_t insn = type == TCG_TYPE_I32 ? CNTLZW : CNTLZD;
3052    tcg_out_cntxz(s, type, insn, a0, a1, a2, true);
3053}
3054
3055static const TCGOutOpBinary outop_clz = {
3056    .base.static_constraint = C_O1_I2(r, r, rZW),
3057    .out_rrr = tgen_clz,
3058    .out_rri = tgen_clzi,
3059};
3060
3061static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
3062{
3063    uint32_t insn = type == TCG_TYPE_I32 ? CNTPOPW : CNTPOPD;
3064    tcg_out32(s, insn | SAB(a1, a0, 0));
3065}
3066
3067static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags)
3068{
3069    return have_isa_2_06 ? C_O1_I1(r, r) : C_NotImplemented;
3070}
3071
3072static const TCGOutOpUnary outop_ctpop = {
3073    .base.static_constraint = C_Dynamic,
3074    .base.dynamic_constraint = cset_ctpop,
3075    .out_rr = tgen_ctpop,
3076};
3077
3078static void tgen_ctz(TCGContext *s, TCGType type,
3079                     TCGReg a0, TCGReg a1, TCGReg a2)
3080{
3081    uint32_t insn = type == TCG_TYPE_I32 ? CNTTZW : CNTTZD;
3082    tcg_out_cntxz(s, type, insn, a0, a1, a2, false);
3083}
3084
3085static void tgen_ctzi(TCGContext *s, TCGType type,
3086                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3087{
3088    uint32_t insn = type == TCG_TYPE_I32 ? CNTTZW : CNTTZD;
3089    tcg_out_cntxz(s, type, insn, a0, a1, a2, true);
3090}
3091
3092static TCGConstraintSetIndex cset_ctz(TCGType type, unsigned flags)
3093{
3094    return have_isa_3_00 ? C_O1_I2(r, r, rZW) : C_NotImplemented;
3095}
3096
3097static const TCGOutOpBinary outop_ctz = {
3098    .base.static_constraint = C_Dynamic,
3099    .base.dynamic_constraint = cset_ctz,
3100    .out_rrr = tgen_ctz,
3101    .out_rri = tgen_ctzi,
3102};
3103
3104static void tgen_eqv(TCGContext *s, TCGType type,
3105                     TCGReg a0, TCGReg a1, TCGReg a2)
3106{
3107    tcg_out32(s, EQV | SAB(a1, a0, a2));
3108}
3109
3110#if TCG_TARGET_REG_BITS == 64
3111static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
3112{
3113    tcg_out_shri64(s, a0, a1, 32);
3114}
3115
3116static const TCGOutOpUnary outop_extrh_i64_i32 = {
3117    .base.static_constraint = C_O1_I1(r, r),
3118    .out_rr = tgen_extrh_i64_i32,
3119};
3120#endif
3121
3122static void tgen_divs(TCGContext *s, TCGType type,
3123                      TCGReg a0, TCGReg a1, TCGReg a2)
3124{
3125    uint32_t insn = type == TCG_TYPE_I32 ? DIVW : DIVD;
3126    tcg_out32(s, insn | TAB(a0, a1, a2));
3127}
3128
3129static const TCGOutOpBinary outop_divs = {
3130    .base.static_constraint = C_O1_I2(r, r, r),
3131    .out_rrr = tgen_divs,
3132};
3133
3134static const TCGOutOpDivRem outop_divs2 = {
3135    .base.static_constraint = C_NotImplemented,
3136};
3137
3138static void tgen_divu(TCGContext *s, TCGType type,
3139                      TCGReg a0, TCGReg a1, TCGReg a2)
3140{
3141    uint32_t insn = type == TCG_TYPE_I32 ? DIVWU : DIVDU;
3142    tcg_out32(s, insn | TAB(a0, a1, a2));
3143}
3144
3145static const TCGOutOpBinary outop_divu = {
3146    .base.static_constraint = C_O1_I2(r, r, r),
3147    .out_rrr = tgen_divu,
3148};
3149
3150static const TCGOutOpDivRem outop_divu2 = {
3151    .base.static_constraint = C_NotImplemented,
3152};
3153
3154static const TCGOutOpBinary outop_eqv = {
3155    .base.static_constraint = C_O1_I2(r, r, r),
3156    .out_rrr = tgen_eqv,
3157};
3158
3159static void tgen_mul(TCGContext *s, TCGType type,
3160                    TCGReg a0, TCGReg a1, TCGReg a2)
3161{
3162    uint32_t insn = type == TCG_TYPE_I32 ? MULLW : MULLD;
3163    tcg_out32(s, insn | TAB(a0, a1, a2));
3164}
3165
3166static void tgen_muli(TCGContext *s, TCGType type,
3167                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3168{
3169    tcg_out32(s, MULLI | TAI(a0, a1, a2));
3170}
3171
3172static const TCGOutOpBinary outop_mul = {
3173    .base.static_constraint = C_O1_I2(r, r, rI),
3174    .out_rrr = tgen_mul,
3175    .out_rri = tgen_muli,
3176};
3177
3178static const TCGOutOpMul2 outop_muls2 = {
3179    .base.static_constraint = C_NotImplemented,
3180};
3181
3182static void tgen_mulsh(TCGContext *s, TCGType type,
3183                       TCGReg a0, TCGReg a1, TCGReg a2)
3184{
3185    uint32_t insn = type == TCG_TYPE_I32 ? MULHW : MULHD;
3186    tcg_out32(s, insn | TAB(a0, a1, a2));
3187}
3188
3189static const TCGOutOpBinary outop_mulsh = {
3190    .base.static_constraint = C_O1_I2(r, r, r),
3191    .out_rrr = tgen_mulsh,
3192};
3193
3194static const TCGOutOpMul2 outop_mulu2 = {
3195    .base.static_constraint = C_NotImplemented,
3196};
3197
3198static void tgen_muluh(TCGContext *s, TCGType type,
3199                       TCGReg a0, TCGReg a1, TCGReg a2)
3200{
3201    uint32_t insn = type == TCG_TYPE_I32 ? MULHWU : MULHDU;
3202    tcg_out32(s, insn | TAB(a0, a1, a2));
3203}
3204
3205static const TCGOutOpBinary outop_muluh = {
3206    .base.static_constraint = C_O1_I2(r, r, r),
3207    .out_rrr = tgen_muluh,
3208};
3209
3210static void tgen_nand(TCGContext *s, TCGType type,
3211                     TCGReg a0, TCGReg a1, TCGReg a2)
3212{
3213    tcg_out32(s, NAND | SAB(a1, a0, a2));
3214}
3215
3216static const TCGOutOpBinary outop_nand = {
3217    .base.static_constraint = C_O1_I2(r, r, r),
3218    .out_rrr = tgen_nand,
3219};
3220
3221static void tgen_nor(TCGContext *s, TCGType type,
3222                     TCGReg a0, TCGReg a1, TCGReg a2)
3223{
3224    tcg_out32(s, NOR | SAB(a1, a0, a2));
3225}
3226
3227static const TCGOutOpBinary outop_nor = {
3228    .base.static_constraint = C_O1_I2(r, r, r),
3229    .out_rrr = tgen_nor,
3230};
3231
3232static void tgen_or(TCGContext *s, TCGType type,
3233                    TCGReg a0, TCGReg a1, TCGReg a2)
3234{
3235    tcg_out32(s, OR | SAB(a1, a0, a2));
3236}
3237
3238static void tgen_ori(TCGContext *s, TCGType type,
3239                     TCGReg a0, TCGReg a1, tcg_target_long a2)
3240{
3241    tcg_out_ori32(s, a0, a1, a2);
3242}
3243
3244static const TCGOutOpBinary outop_or = {
3245    .base.static_constraint = C_O1_I2(r, r, rU),
3246    .out_rrr = tgen_or,
3247    .out_rri = tgen_ori,
3248};
3249
3250static void tgen_orc(TCGContext *s, TCGType type,
3251                     TCGReg a0, TCGReg a1, TCGReg a2)
3252{
3253    tcg_out32(s, ORC | SAB(a1, a0, a2));
3254}
3255
3256static const TCGOutOpBinary outop_orc = {
3257    .base.static_constraint = C_O1_I2(r, r, r),
3258    .out_rrr = tgen_orc,
3259};
3260
3261static TCGConstraintSetIndex cset_mod(TCGType type, unsigned flags)
3262{
3263    return have_isa_3_00 ? C_O1_I2(r, r, r) : C_NotImplemented;
3264}
3265
3266static void tgen_rems(TCGContext *s, TCGType type,
3267                      TCGReg a0, TCGReg a1, TCGReg a2)
3268{
3269    uint32_t insn = type == TCG_TYPE_I32 ? MODSW : MODSD;
3270    tcg_out32(s, insn | TAB(a0, a1, a2));
3271}
3272
3273static const TCGOutOpBinary outop_rems = {
3274    .base.static_constraint = C_Dynamic,
3275    .base.dynamic_constraint = cset_mod,
3276    .out_rrr = tgen_rems,
3277};
3278
3279static void tgen_remu(TCGContext *s, TCGType type,
3280                      TCGReg a0, TCGReg a1, TCGReg a2)
3281{
3282    uint32_t insn = type == TCG_TYPE_I32 ? MODUW : MODUD;
3283    tcg_out32(s, insn | TAB(a0, a1, a2));
3284}
3285
3286static const TCGOutOpBinary outop_remu = {
3287    .base.static_constraint = C_Dynamic,
3288    .base.dynamic_constraint = cset_mod,
3289    .out_rrr = tgen_remu,
3290};
3291
3292static void tgen_rotl(TCGContext *s, TCGType type,
3293                     TCGReg a0, TCGReg a1, TCGReg a2)
3294{
3295    if (type == TCG_TYPE_I32) {
3296        tcg_out32(s, RLWNM | SAB(a1, a0, a2) | MB(0) | ME(31));
3297    } else {
3298        tcg_out32(s, RLDCL | SAB(a1, a0, a2) | MB64(0));
3299    }
3300}
3301
3302static void tgen_rotli(TCGContext *s, TCGType type,
3303                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3304{
3305    if (type == TCG_TYPE_I32) {
3306        tcg_out_rlw(s, RLWINM, a0, a1, a2, 0, 31);
3307    } else {
3308        tcg_out_rld(s, RLDICL, a0, a1, a2, 0);
3309    }
3310}
3311
3312static const TCGOutOpBinary outop_rotl = {
3313    .base.static_constraint = C_O1_I2(r, r, ri),
3314    .out_rrr = tgen_rotl,
3315    .out_rri = tgen_rotli,
3316};
3317
3318static const TCGOutOpBinary outop_rotr = {
3319    .base.static_constraint = C_NotImplemented,
3320};
3321
3322static void tgen_sar(TCGContext *s, TCGType type,
3323                     TCGReg a0, TCGReg a1, TCGReg a2)
3324{
3325    uint32_t insn = type == TCG_TYPE_I32 ? SRAW : SRAD;
3326    tcg_out32(s, insn | SAB(a1, a0, a2));
3327}
3328
3329static void tgen_sari(TCGContext *s, TCGType type,
3330                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3331{
3332    /* Limit immediate shift count lest we create an illegal insn.  */
3333    if (type == TCG_TYPE_I32) {
3334        tcg_out_sari32(s, a0, a1, a2 & 31);
3335    } else {
3336        tcg_out_sari64(s, a0, a1, a2 & 63);
3337    }
3338}
3339
3340static const TCGOutOpBinary outop_sar = {
3341    .base.static_constraint = C_O1_I2(r, r, ri),
3342    .out_rrr = tgen_sar,
3343    .out_rri = tgen_sari,
3344};
3345
3346static void tgen_shl(TCGContext *s, TCGType type,
3347                     TCGReg a0, TCGReg a1, TCGReg a2)
3348{
3349    uint32_t insn = type == TCG_TYPE_I32 ? SLW : SLD;
3350    tcg_out32(s, insn | SAB(a1, a0, a2));
3351}
3352
3353static void tgen_shli(TCGContext *s, TCGType type,
3354                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3355{
3356    /* Limit immediate shift count lest we create an illegal insn.  */
3357    if (type == TCG_TYPE_I32) {
3358        tcg_out_shli32(s, a0, a1, a2 & 31);
3359    } else {
3360        tcg_out_shli64(s, a0, a1, a2 & 63);
3361    }
3362}
3363
3364static const TCGOutOpBinary outop_shl = {
3365    .base.static_constraint = C_O1_I2(r, r, ri),
3366    .out_rrr = tgen_shl,
3367    .out_rri = tgen_shli,
3368};
3369
3370static void tgen_shr(TCGContext *s, TCGType type,
3371                     TCGReg a0, TCGReg a1, TCGReg a2)
3372{
3373    uint32_t insn = type == TCG_TYPE_I32 ? SRW : SRD;
3374    tcg_out32(s, insn | SAB(a1, a0, a2));
3375}
3376
3377static void tgen_shri(TCGContext *s, TCGType type,
3378                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3379{
3380    /* Limit immediate shift count lest we create an illegal insn.  */
3381    if (type == TCG_TYPE_I32) {
3382        tcg_out_shri32(s, a0, a1, a2 & 31);
3383    } else {
3384        tcg_out_shri64(s, a0, a1, a2 & 63);
3385    }
3386}
3387
3388static const TCGOutOpBinary outop_shr = {
3389    .base.static_constraint = C_O1_I2(r, r, ri),
3390    .out_rrr = tgen_shr,
3391    .out_rri = tgen_shri,
3392};
3393
3394static void tgen_sub(TCGContext *s, TCGType type,
3395                     TCGReg a0, TCGReg a1, TCGReg a2)
3396{
3397    tcg_out32(s, SUBF | TAB(a0, a2, a1));
3398}
3399
3400static void tgen_subfi(TCGContext *s, TCGType type,
3401                       TCGReg a0, tcg_target_long a1, TCGReg a2)
3402{
3403    tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
3404}
3405
3406static const TCGOutOpSubtract outop_sub = {
3407    .base.static_constraint = C_O1_I2(r, rI, r),
3408    .out_rrr = tgen_sub,
3409    .out_rir = tgen_subfi,
3410};
3411
3412static void tgen_subbo_rrr(TCGContext *s, TCGType type,
3413                           TCGReg a0, TCGReg a1, TCGReg a2)
3414{
3415    tcg_out32(s, SUBFC | TAB(a0, a2, a1));
3416}
3417
3418static void tgen_subbo_rri(TCGContext *s, TCGType type,
3419                           TCGReg a0, TCGReg a1, tcg_target_long a2)
3420{
3421    if (a2 == 0) {
3422        tcg_out_movi(s, type, TCG_REG_R0, 0);
3423        tgen_subbo_rrr(s, type, a0, a1, TCG_REG_R0);
3424    } else {
3425        tgen_addco_rri(s, type, a0, a1, -a2);
3426    }
3427}
3428
3429/* The underlying insn for subfi is subfic. */
3430#define tgen_subbo_rir  tgen_subfi
3431
3432static void tgen_subbo_rii(TCGContext *s, TCGType type,
3433                           TCGReg a0, tcg_target_long a1, tcg_target_long a2)
3434{
3435    tcg_out_movi(s, type, TCG_REG_R0, a2);
3436    tgen_subbo_rir(s, type, a0, a1, TCG_REG_R0);
3437}
3438
3439static TCGConstraintSetIndex cset_subbo(TCGType type, unsigned flags)
3440{
3441    /* Recall that the CA bit is defined based on the host word size. */
3442    return type == TCG_TYPE_REG ? C_O1_I2(r, rI, rN) : C_NotImplemented;
3443}
3444
3445static const TCGOutOpAddSubCarry outop_subbo = {
3446    .base.static_constraint = C_Dynamic,
3447    .base.dynamic_constraint = cset_subbo,
3448    .out_rrr = tgen_subbo_rrr,
3449    .out_rri = tgen_subbo_rri,
3450    .out_rir = tgen_subbo_rir,
3451    .out_rii = tgen_subbo_rii,
3452};
3453
3454static void tgen_subbio_rrr(TCGContext *s, TCGType type,
3455                            TCGReg a0, TCGReg a1, TCGReg a2)
3456{
3457    tcg_out32(s, SUBFE | TAB(a0, a2, a1));
3458}
3459
3460static void tgen_subbio_rri(TCGContext *s, TCGType type,
3461                            TCGReg a0, TCGReg a1, tcg_target_long a2)
3462{
3463    tgen_addcio_rri(s, type, a0, a1, ~a2);
3464}
3465
3466static void tgen_subbio_rir(TCGContext *s, TCGType type,
3467                            TCGReg a0, tcg_target_long a1, TCGReg a2)
3468{
3469    tcg_debug_assert(a1 == 0 || a1 == -1);
3470    tcg_out32(s, (a1 ? SUBFME : SUBFZE) | RT(a0) | RA(a2));
3471}
3472
3473static void tgen_subbio_rii(TCGContext *s, TCGType type,
3474                            TCGReg a0, tcg_target_long a1, tcg_target_long a2)
3475{
3476    tcg_out_movi(s, type, TCG_REG_R0, a2);
3477    tgen_subbio_rir(s, type, a0, a1, TCG_REG_R0);
3478}
3479
3480static TCGConstraintSetIndex cset_subbio(TCGType type, unsigned flags)
3481{
3482    return type == TCG_TYPE_REG ? C_O1_I2(r, rZM, rZM) : C_NotImplemented;
3483}
3484
3485static const TCGOutOpAddSubCarry outop_subbio = {
3486    .base.static_constraint = C_Dynamic,
3487    .base.dynamic_constraint = cset_subbio,
3488    .out_rrr = tgen_subbio_rrr,
3489    .out_rri = tgen_subbio_rri,
3490    .out_rir = tgen_subbio_rir,
3491    .out_rii = tgen_subbio_rii,
3492};
3493
3494#define outop_subbi  outop_subbio
3495
3496static void tcg_out_set_borrow(TCGContext *s)
3497{
3498    /* borrow = !carry */
3499    tcg_out32(s, ADDIC | TAI(TCG_REG_R0, TCG_REG_R0, 0));
3500}
3501
3502static void tgen_xor(TCGContext *s, TCGType type,
3503                     TCGReg a0, TCGReg a1, TCGReg a2)
3504{
3505    tcg_out32(s, XOR | SAB(a1, a0, a2));
3506}
3507
3508static void tgen_xori(TCGContext *s, TCGType type,
3509                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3510{
3511    tcg_out_xori32(s, a0, a1, a2);
3512}
3513
3514static const TCGOutOpBinary outop_xor = {
3515    .base.static_constraint = C_O1_I2(r, r, rU),
3516    .out_rrr = tgen_xor,
3517    .out_rri = tgen_xori,
3518};
3519
3520static void tgen_bswap16(TCGContext *s, TCGType type,
3521                         TCGReg dst, TCGReg src, unsigned flags)
3522{
3523    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
3524
3525    if (have_isa_3_10) {
3526        tcg_out32(s, BRH | RA(dst) | RS(src));
3527        if (flags & TCG_BSWAP_OS) {
3528            tcg_out_ext16s(s, TCG_TYPE_REG, dst, dst);
3529        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
3530            tcg_out_ext16u(s, dst, dst);
3531        }
3532        return;
3533    }
3534
3535    /*
3536     * In the following,
3537     *   dep(a, b, m) -> (a & ~m) | (b & m)
3538     *
3539     * Begin with:                              src = xxxxabcd
3540     */
3541    /* tmp = rol32(src, 24) & 0x000000ff            = 0000000c */
3542    tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31);
3543    /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00)    = 000000dc */
3544    tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);
3545
3546    if (flags & TCG_BSWAP_OS) {
3547        tcg_out_ext16s(s, TCG_TYPE_REG, dst, tmp);
3548    } else {
3549        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
3550    }
3551}
3552
3553static const TCGOutOpBswap outop_bswap16 = {
3554    .base.static_constraint = C_O1_I1(r, r),
3555    .out_rr = tgen_bswap16,
3556};
3557
3558static void tgen_bswap32(TCGContext *s, TCGType type,
3559                         TCGReg dst, TCGReg src, unsigned flags)
3560{
3561    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
3562
3563    if (have_isa_3_10) {
3564        tcg_out32(s, BRW | RA(dst) | RS(src));
3565        if (flags & TCG_BSWAP_OS) {
3566            tcg_out_ext32s(s, dst, dst);
3567        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
3568            tcg_out_ext32u(s, dst, dst);
3569        }
3570        return;
3571    }
3572
3573    /*
3574     * Stolen from gcc's builtin_bswap32.
3575     * In the following,
3576     *   dep(a, b, m) -> (a & ~m) | (b & m)
3577     *
3578     * Begin with:                              src = xxxxabcd
3579     */
3580    /* tmp = rol32(src, 8) & 0xffffffff             = 0000bcda */
3581    tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31);
3582    /* tmp = dep(tmp, rol32(src, 24), 0xff000000)   = 0000dcda */
3583    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7);
3584    /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00)   = 0000dcba */
3585    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23);
3586
3587    if (flags & TCG_BSWAP_OS) {
3588        tcg_out_ext32s(s, dst, tmp);
3589    } else {
3590        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
3591    }
3592}
3593
3594static const TCGOutOpBswap outop_bswap32 = {
3595    .base.static_constraint = C_O1_I1(r, r),
3596    .out_rr = tgen_bswap32,
3597};
3598
3599#if TCG_TARGET_REG_BITS == 64
3600static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
3601{
3602    TCGReg t0 = dst == src ? TCG_REG_R0 : dst;
3603    TCGReg t1 = dst == src ? dst : TCG_REG_R0;
3604
3605    if (have_isa_3_10) {
3606        tcg_out32(s, BRD | RA(dst) | RS(src));
3607        return;
3608    }
3609
3610    /*
3611     * In the following,
3612     *   dep(a, b, m) -> (a & ~m) | (b & m)
3613     *
3614     * Begin with:                              src = abcdefgh
3615     */
3616    /* t0 = rol32(src, 8) & 0xffffffff              = 0000fghe */
3617    tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31);
3618    /* t0 = dep(t0, rol32(src, 24), 0xff000000)     = 0000hghe */
3619    tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7);
3620    /* t0 = dep(t0, rol32(src, 24), 0x0000ff00)     = 0000hgfe */
3621    tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23);
3622
3623    /* t0 = rol64(t0, 32)                           = hgfe0000 */
3624    tcg_out_rld(s, RLDICL, t0, t0, 32, 0);
3625    /* t1 = rol64(src, 32)                          = efghabcd */
3626    tcg_out_rld(s, RLDICL, t1, src, 32, 0);
3627
3628    /* t0 = dep(t0, rol32(t1, 24), 0xffffffff)      = hgfebcda */
3629    tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31);
3630    /* t0 = dep(t0, rol32(t1, 24), 0xff000000)      = hgfedcda */
3631    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7);
3632    /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00)      = hgfedcba */
3633    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23);
3634
3635    tcg_out_mov(s, TCG_TYPE_REG, dst, t0);
3636}
3637
3638static const TCGOutOpUnary outop_bswap64 = {
3639    .base.static_constraint = C_O1_I1(r, r),
3640    .out_rr = tgen_bswap64,
3641};
3642#endif /* TCG_TARGET_REG_BITS == 64 */
3643
3644static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
3645{
3646    tcg_out32(s, NEG | RT(a0) | RA(a1));
3647}
3648
3649static const TCGOutOpUnary outop_neg = {
3650    .base.static_constraint = C_O1_I1(r, r),
3651    .out_rr = tgen_neg,
3652};
3653
3654static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
3655{
3656    tgen_nor(s, type, a0, a1, a1);
3657}
3658
3659static const TCGOutOpUnary outop_not = {
3660    .base.static_constraint = C_O1_I1(r, r),
3661    .out_rr = tgen_not,
3662};
3663
3664static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
3665                         TCGReg a2, unsigned ofs, unsigned len)
3666{
3667    if (type == TCG_TYPE_I32) {
3668        tcg_out_rlw(s, RLWIMI, a0, a2, ofs, 32 - ofs - len, 31 - ofs);
3669    } else {
3670        tcg_out_rld(s, RLDIMI, a0, a2, ofs, 64 - ofs - len);
3671    }
3672}
3673
3674static void tgen_depositi(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
3675                          tcg_target_long a2, unsigned ofs, unsigned len)
3676{
3677    tgen_andi(s, type, a0, a1, ~MAKE_64BIT_MASK(ofs, len));
3678}
3679
3680static const TCGOutOpDeposit outop_deposit = {
3681    .base.static_constraint = C_O1_I2(r, 0, rZ),
3682    .out_rrr = tgen_deposit,
3683    .out_rri = tgen_depositi,
3684};
3685
3686static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
3687                         unsigned ofs, unsigned len)
3688{
3689    if (ofs == 0 && len <= 16) {
3690        tgen_andi(s, TCG_TYPE_I32, a0, a1, (1 << len) - 1);
3691    } else if (type == TCG_TYPE_I32) {
3692        tcg_out_rlw(s, RLWINM, a0, a1, 32 - ofs, 32 - len, 31);
3693    } else {
3694        tcg_out_rld(s, RLDICL, a0, a1, 64 - ofs, 64 - len);
3695    }
3696}
3697
3698static const TCGOutOpExtract outop_extract = {
3699    .base.static_constraint = C_O1_I1(r, r),
3700    .out_rr = tgen_extract,
3701};
3702
3703static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
3704                          unsigned ofs, unsigned len)
3705{
3706    if (ofs == 0) {
3707        switch (len) {
3708        case 8:
3709            tcg_out_ext8s(s, type, a0, a1);
3710            return;
3711        case 16:
3712            tcg_out_ext16s(s, type, a0, a1);
3713            return;
3714        case 32:
3715            tcg_out_ext32s(s, a0, a1);
3716            return;
3717        }
3718    } else if (ofs + len == 32) {
3719        tcg_out_sari32(s, a0, a1, ofs);
3720        return;
3721    }
3722    g_assert_not_reached();
3723}
3724
3725static const TCGOutOpExtract outop_sextract = {
3726    .base.static_constraint = C_O1_I1(r, r),
3727    .out_rr = tgen_sextract,
3728};
3729
3730static const TCGOutOpExtract2 outop_extract2 = {
3731    .base.static_constraint = C_NotImplemented,
3732};
3733
3734static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest,
3735                      TCGReg base, ptrdiff_t offset)
3736{
3737    tcg_out_mem_long(s, LBZ, LBZX, dest, base, offset);
3738}
3739
3740static const TCGOutOpLoad outop_ld8u = {
3741    .base.static_constraint = C_O1_I1(r, r),
3742    .out = tgen_ld8u,
3743};
3744
3745static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest,
3746                      TCGReg base, ptrdiff_t offset)
3747{
3748    tgen_ld8u(s, type, dest, base, offset);
3749    tcg_out_ext8s(s, type, dest, dest);
3750}
3751
3752static const TCGOutOpLoad outop_ld8s = {
3753    .base.static_constraint = C_O1_I1(r, r),
3754    .out = tgen_ld8s,
3755};
3756
3757static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest,
3758                       TCGReg base, ptrdiff_t offset)
3759{
3760    tcg_out_mem_long(s, LHZ, LHZX, dest, base, offset);
3761}
3762
3763static const TCGOutOpLoad outop_ld16u = {
3764    .base.static_constraint = C_O1_I1(r, r),
3765    .out = tgen_ld16u,
3766};
3767
3768static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest,
3769                       TCGReg base, ptrdiff_t offset)
3770{
3771    tcg_out_mem_long(s, LHA, LHAX, dest, base, offset);
3772}
3773
3774static const TCGOutOpLoad outop_ld16s = {
3775    .base.static_constraint = C_O1_I1(r, r),
3776    .out = tgen_ld16s,
3777};
3778
3779#if TCG_TARGET_REG_BITS == 64
3780static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest,
3781                       TCGReg base, ptrdiff_t offset)
3782{
3783    tcg_out_mem_long(s, LWZ, LWZX, dest, base, offset);
3784}
3785
3786static const TCGOutOpLoad outop_ld32u = {
3787    .base.static_constraint = C_O1_I1(r, r),
3788    .out = tgen_ld32u,
3789};
3790
3791static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest,
3792                       TCGReg base, ptrdiff_t offset)
3793{
3794    tcg_out_mem_long(s, LWA, LWAX, dest, base, offset);
3795}
3796
3797static const TCGOutOpLoad outop_ld32s = {
3798    .base.static_constraint = C_O1_I1(r, r),
3799    .out = tgen_ld32s,
3800};
3801#endif
3802
3803static void tgen_st8(TCGContext *s, TCGType type, TCGReg data,
3804                     TCGReg base, ptrdiff_t offset)
3805{
3806    tcg_out_mem_long(s, STB, STBX, data, base, offset);
3807}
3808
3809static const TCGOutOpStore outop_st8 = {
3810    .base.static_constraint = C_O0_I2(r, r),
3811    .out_r = tgen_st8,
3812};
3813
3814static void tgen_st16(TCGContext *s, TCGType type, TCGReg data,
3815                      TCGReg base, ptrdiff_t offset)
3816{
3817    tcg_out_mem_long(s, STH, STHX, data, base, offset);
3818}
3819
3820static const TCGOutOpStore outop_st16 = {
3821    .base.static_constraint = C_O0_I2(r, r),
3822    .out_r = tgen_st16,
3823};
3824
3825static const TCGOutOpStore outop_st = {
3826    .base.static_constraint = C_O0_I2(r, r),
3827    .out_r = tcg_out_st,
3828};
3829
3830
3831int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3832{
3833    switch (opc) {
3834    case INDEX_op_and_vec:
3835    case INDEX_op_or_vec:
3836    case INDEX_op_xor_vec:
3837    case INDEX_op_andc_vec:
3838    case INDEX_op_not_vec:
3839    case INDEX_op_nor_vec:
3840    case INDEX_op_eqv_vec:
3841    case INDEX_op_nand_vec:
3842        return 1;
3843    case INDEX_op_orc_vec:
3844        return have_isa_2_07;
3845    case INDEX_op_add_vec:
3846    case INDEX_op_sub_vec:
3847    case INDEX_op_smax_vec:
3848    case INDEX_op_smin_vec:
3849    case INDEX_op_umax_vec:
3850    case INDEX_op_umin_vec:
3851    case INDEX_op_shlv_vec:
3852    case INDEX_op_shrv_vec:
3853    case INDEX_op_sarv_vec:
3854    case INDEX_op_rotlv_vec:
3855        return vece <= MO_32 || have_isa_2_07;
3856    case INDEX_op_ssadd_vec:
3857    case INDEX_op_sssub_vec:
3858    case INDEX_op_usadd_vec:
3859    case INDEX_op_ussub_vec:
3860        return vece <= MO_32;
3861    case INDEX_op_shli_vec:
3862    case INDEX_op_shri_vec:
3863    case INDEX_op_sari_vec:
3864    case INDEX_op_rotli_vec:
3865        return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
3866    case INDEX_op_cmp_vec:
3867    case INDEX_op_cmpsel_vec:
3868        return vece <= MO_32 || have_isa_2_07 ? 1 : 0;
3869    case INDEX_op_neg_vec:
3870        return vece >= MO_32 && have_isa_3_00;
3871    case INDEX_op_mul_vec:
3872        switch (vece) {
3873        case MO_8:
3874        case MO_16:
3875            return -1;
3876        case MO_32:
3877            return have_isa_2_07 ? 1 : -1;
3878        case MO_64:
3879            return have_isa_3_10;
3880        }
3881        return 0;
3882    case INDEX_op_bitsel_vec:
3883        return have_vsx;
3884    case INDEX_op_rotrv_vec:
3885        return -1;
3886    default:
3887        return 0;
3888    }
3889}
3890
3891static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
3892                            TCGReg dst, TCGReg src)
3893{
3894    tcg_debug_assert(dst >= TCG_REG_V0);
3895
3896    /* Splat from integer reg allowed via constraints for v3.00.  */
3897    if (src < TCG_REG_V0) {
3898        tcg_debug_assert(have_isa_3_00);
3899        switch (vece) {
3900        case MO_64:
3901            tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
3902            return true;
3903        case MO_32:
3904            tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
3905            return true;
3906        default:
3907            /* Fail, so that we fall back on either dupm or mov+dup.  */
3908            return false;
3909        }
3910    }
3911
3912    /*
3913     * Recall we use (or emulate) VSX integer loads, so the integer is
3914     * right justified within the left (zero-index) double-word.
3915     */
3916    switch (vece) {
3917    case MO_8:
3918        tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
3919        break;
3920    case MO_16:
3921        tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
3922        break;
3923    case MO_32:
3924        tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
3925        break;
3926    case MO_64:
3927        if (have_vsx) {
3928            tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
3929            break;
3930        }
3931        tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
3932        tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
3933        break;
3934    default:
3935        g_assert_not_reached();
3936    }
3937    return true;
3938}
3939
3940static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
3941                             TCGReg out, TCGReg base, intptr_t offset)
3942{
3943    int elt;
3944
3945    tcg_debug_assert(out >= TCG_REG_V0);
3946    switch (vece) {
3947    case MO_8:
3948        if (have_isa_3_00) {
3949            tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
3950        } else {
3951            tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
3952        }
3953        elt = extract32(offset, 0, 4);
3954#if !HOST_BIG_ENDIAN
3955        elt ^= 15;
3956#endif
3957        tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
3958        break;
3959    case MO_16:
3960        tcg_debug_assert((offset & 1) == 0);
3961        if (have_isa_3_00) {
3962            tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
3963        } else {
3964            tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
3965        }
3966        elt = extract32(offset, 1, 3);
3967#if !HOST_BIG_ENDIAN
3968        elt ^= 7;
3969#endif
3970        tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
3971        break;
3972    case MO_32:
3973        if (have_isa_3_00) {
3974            tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
3975            break;
3976        }
3977        tcg_debug_assert((offset & 3) == 0);
3978        tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
3979        elt = extract32(offset, 2, 2);
3980#if !HOST_BIG_ENDIAN
3981        elt ^= 3;
3982#endif
3983        tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
3984        break;
3985    case MO_64:
3986        if (have_vsx) {
3987            tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
3988            break;
3989        }
3990        tcg_debug_assert((offset & 7) == 0);
3991        tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
3992        tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
3993        elt = extract32(offset, 3, 1);
3994#if !HOST_BIG_ENDIAN
3995        elt = !elt;
3996#endif
3997        if (elt) {
3998            tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
3999        } else {
4000            tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
4001        }
4002        break;
4003    default:
4004        g_assert_not_reached();
4005    }
4006    return true;
4007}
4008
4009static void tcg_out_not_vec(TCGContext *s, TCGReg a0, TCGReg a1)
4010{
4011    tcg_out32(s, VNOR | VRT(a0) | VRA(a1) | VRB(a1));
4012}
4013
4014static void tcg_out_or_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
4015{
4016    tcg_out32(s, VOR | VRT(a0) | VRA(a1) | VRB(a2));
4017}
4018
4019static void tcg_out_orc_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
4020{
4021    tcg_out32(s, VORC | VRT(a0) | VRA(a1) | VRB(a2));
4022}
4023
4024static void tcg_out_and_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
4025{
4026    tcg_out32(s, VAND | VRT(a0) | VRA(a1) | VRB(a2));
4027}
4028
4029static void tcg_out_andc_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
4030{
4031    tcg_out32(s, VANDC | VRT(a0) | VRA(a1) | VRB(a2));
4032}
4033
4034static void tcg_out_bitsel_vec(TCGContext *s, TCGReg d,
4035                               TCGReg c, TCGReg t, TCGReg f)
4036{
4037    if (TCG_TARGET_HAS_bitsel_vec) {
4038        tcg_out32(s, XXSEL | VRT(d) | VRC(c) | VRB(t) | VRA(f));
4039    } else {
4040        tcg_out_and_vec(s, TCG_VEC_TMP2, t, c);
4041        tcg_out_andc_vec(s, d, f, c);
4042        tcg_out_or_vec(s, d, d, TCG_VEC_TMP2);
4043    }
4044}
4045
4046static bool tcg_out_cmp_vec_noinv(TCGContext *s, unsigned vece, TCGReg a0,
4047                                  TCGReg a1, TCGReg a2, TCGCond cond)
4048{
4049    static const uint32_t
4050        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
4051        ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
4052        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
4053        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD };
4054    uint32_t insn;
4055
4056    bool need_swap = false, need_inv = false;
4057
4058    tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
4059
4060    switch (cond) {
4061    case TCG_COND_EQ:
4062    case TCG_COND_GT:
4063    case TCG_COND_GTU:
4064        break;
4065    case TCG_COND_NE:
4066        if (have_isa_3_00 && vece <= MO_32) {
4067            break;
4068        }
4069        /* fall through */
4070    case TCG_COND_LE:
4071    case TCG_COND_LEU:
4072        need_inv = true;
4073        break;
4074    case TCG_COND_LT:
4075    case TCG_COND_LTU:
4076        need_swap = true;
4077        break;
4078    case TCG_COND_GE:
4079    case TCG_COND_GEU:
4080        need_swap = need_inv = true;
4081        break;
4082    default:
4083        g_assert_not_reached();
4084    }
4085
4086    if (need_inv) {
4087        cond = tcg_invert_cond(cond);
4088    }
4089    if (need_swap) {
4090        TCGReg swap = a1;
4091        a1 = a2;
4092        a2 = swap;
4093        cond = tcg_swap_cond(cond);
4094    }
4095
4096    switch (cond) {
4097    case TCG_COND_EQ:
4098        insn = eq_op[vece];
4099        break;
4100    case TCG_COND_NE:
4101        insn = ne_op[vece];
4102        break;
4103    case TCG_COND_GT:
4104        insn = gts_op[vece];
4105        break;
4106    case TCG_COND_GTU:
4107        insn = gtu_op[vece];
4108        break;
4109    default:
4110        g_assert_not_reached();
4111    }
4112    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
4113
4114    return need_inv;
4115}
4116
4117static void tcg_out_cmp_vec(TCGContext *s, unsigned vece, TCGReg a0,
4118                            TCGReg a1, TCGReg a2, TCGCond cond)
4119{
4120    if (tcg_out_cmp_vec_noinv(s, vece, a0, a1, a2, cond)) {
4121        tcg_out_not_vec(s, a0, a0);
4122    }
4123}
4124
4125static void tcg_out_cmpsel_vec(TCGContext *s, unsigned vece, TCGReg a0,
4126                               TCGReg c1, TCGReg c2, TCGArg v3, int const_v3,
4127                               TCGReg v4, TCGCond cond)
4128{
4129    bool inv = tcg_out_cmp_vec_noinv(s, vece, TCG_VEC_TMP1, c1, c2, cond);
4130
4131    if (!const_v3) {
4132        if (inv) {
4133            tcg_out_bitsel_vec(s, a0, TCG_VEC_TMP1, v4, v3);
4134        } else {
4135            tcg_out_bitsel_vec(s, a0, TCG_VEC_TMP1, v3, v4);
4136        }
4137    } else if (v3) {
4138        if (inv) {
4139            tcg_out_orc_vec(s, a0, v4, TCG_VEC_TMP1);
4140        } else {
4141            tcg_out_or_vec(s, a0, v4, TCG_VEC_TMP1);
4142        }
4143    } else {
4144        if (inv) {
4145            tcg_out_and_vec(s, a0, v4, TCG_VEC_TMP1);
4146        } else {
4147            tcg_out_andc_vec(s, a0, v4, TCG_VEC_TMP1);
4148        }
4149    }
4150}
4151
4152static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
4153                           unsigned vecl, unsigned vece,
4154                           const TCGArg args[TCG_MAX_OP_ARGS],
4155                           const int const_args[TCG_MAX_OP_ARGS])
4156{
4157    static const uint32_t
4158        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
4159        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
4160        mul_op[4] = { 0, 0, VMULUWM, VMULLD },
4161        neg_op[4] = { 0, 0, VNEGW, VNEGD },
4162        ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
4163        usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
4164        sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
4165        ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
4166        umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
4167        smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
4168        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
4169        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
4170        shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
4171        shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
4172        sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
4173        mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
4174        mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
4175        muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
4176        mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
4177        pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
4178        rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
4179
4180    TCGType type = vecl + TCG_TYPE_V64;
4181    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
4182    uint32_t insn;
4183
4184    switch (opc) {
4185    case INDEX_op_ld_vec:
4186        tcg_out_ld(s, type, a0, a1, a2);
4187        return;
4188    case INDEX_op_st_vec:
4189        tcg_out_st(s, type, a0, a1, a2);
4190        return;
4191    case INDEX_op_dupm_vec:
4192        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
4193        return;
4194
4195    case INDEX_op_add_vec:
4196        insn = add_op[vece];
4197        break;
4198    case INDEX_op_sub_vec:
4199        insn = sub_op[vece];
4200        break;
4201    case INDEX_op_neg_vec:
4202        insn = neg_op[vece];
4203        a2 = a1;
4204        a1 = 0;
4205        break;
4206    case INDEX_op_mul_vec:
4207        insn = mul_op[vece];
4208        break;
4209    case INDEX_op_ssadd_vec:
4210        insn = ssadd_op[vece];
4211        break;
4212    case INDEX_op_sssub_vec:
4213        insn = sssub_op[vece];
4214        break;
4215    case INDEX_op_usadd_vec:
4216        insn = usadd_op[vece];
4217        break;
4218    case INDEX_op_ussub_vec:
4219        insn = ussub_op[vece];
4220        break;
4221    case INDEX_op_smin_vec:
4222        insn = smin_op[vece];
4223        break;
4224    case INDEX_op_umin_vec:
4225        insn = umin_op[vece];
4226        break;
4227    case INDEX_op_smax_vec:
4228        insn = smax_op[vece];
4229        break;
4230    case INDEX_op_umax_vec:
4231        insn = umax_op[vece];
4232        break;
4233    case INDEX_op_shlv_vec:
4234        insn = shlv_op[vece];
4235        break;
4236    case INDEX_op_shrv_vec:
4237        insn = shrv_op[vece];
4238        break;
4239    case INDEX_op_sarv_vec:
4240        insn = sarv_op[vece];
4241        break;
4242    case INDEX_op_and_vec:
4243        tcg_out_and_vec(s, a0, a1, a2);
4244        return;
4245    case INDEX_op_or_vec:
4246        tcg_out_or_vec(s, a0, a1, a2);
4247        return;
4248    case INDEX_op_xor_vec:
4249        insn = VXOR;
4250        break;
4251    case INDEX_op_andc_vec:
4252        tcg_out_andc_vec(s, a0, a1, a2);
4253        return;
4254    case INDEX_op_not_vec:
4255        tcg_out_not_vec(s, a0, a1);
4256        return;
4257    case INDEX_op_orc_vec:
4258        tcg_out_orc_vec(s, a0, a1, a2);
4259        return;
4260    case INDEX_op_nand_vec:
4261        insn = VNAND;
4262        break;
4263    case INDEX_op_nor_vec:
4264        insn = VNOR;
4265        break;
4266    case INDEX_op_eqv_vec:
4267        insn = VEQV;
4268        break;
4269
4270    case INDEX_op_cmp_vec:
4271        tcg_out_cmp_vec(s, vece, a0, a1, a2, args[3]);
4272        return;
4273    case INDEX_op_cmpsel_vec:
4274        tcg_out_cmpsel_vec(s, vece, a0, a1, a2,
4275                           args[3], const_args[3], args[4], args[5]);
4276        return;
4277    case INDEX_op_bitsel_vec:
4278        tcg_out_bitsel_vec(s, a0, a1, a2, args[3]);
4279        return;
4280
4281    case INDEX_op_dup2_vec:
4282        assert(TCG_TARGET_REG_BITS == 32);
4283        /* With inputs a1 = xLxx, a2 = xHxx  */
4284        tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1));  /* a0  = xxHL */
4285        tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8);          /* tmp = HLxx */
4286        tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8);          /* a0  = HLHL */
4287        return;
4288
4289    case INDEX_op_ppc_mrgh_vec:
4290        insn = mrgh_op[vece];
4291        break;
4292    case INDEX_op_ppc_mrgl_vec:
4293        insn = mrgl_op[vece];
4294        break;
4295    case INDEX_op_ppc_muleu_vec:
4296        insn = muleu_op[vece];
4297        break;
4298    case INDEX_op_ppc_mulou_vec:
4299        insn = mulou_op[vece];
4300        break;
4301    case INDEX_op_ppc_pkum_vec:
4302        insn = pkum_op[vece];
4303        break;
4304    case INDEX_op_rotlv_vec:
4305        insn = rotl_op[vece];
4306        break;
4307    case INDEX_op_ppc_msum_vec:
4308        tcg_debug_assert(vece == MO_16);
4309        tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
4310        return;
4311
4312    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
4313    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
4314    default:
4315        g_assert_not_reached();
4316    }
4317
4318    tcg_debug_assert(insn != 0);
4319    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
4320}
4321
4322static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
4323                           TCGv_vec v1, TCGArg imm, TCGOpcode opci)
4324{
4325    TCGv_vec t1;
4326
4327    if (vece == MO_32) {
4328        /*
4329         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
4330         * So using negative numbers gets us the 4th bit easily.
4331         */
4332        imm = sextract32(imm, 0, 5);
4333    } else {
4334        imm &= (8 << vece) - 1;
4335    }
4336
4337    /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */
4338    t1 = tcg_constant_vec(type, MO_8, imm);
4339    vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
4340              tcgv_vec_arg(v1), tcgv_vec_arg(t1));
4341}
4342
4343static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
4344                           TCGv_vec v1, TCGv_vec v2)
4345{
4346    TCGv_vec t1 = tcg_temp_new_vec(type);
4347    TCGv_vec t2 = tcg_temp_new_vec(type);
4348    TCGv_vec c0, c16;
4349
4350    switch (vece) {
4351    case MO_8:
4352    case MO_16:
4353        vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
4354                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
4355        vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
4356                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
4357        vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
4358                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
4359        vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
4360                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
4361        vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
4362                  tcgv_vec_arg(v0), tcgv_vec_arg(t1));
4363        break;
4364
4365    case MO_32:
4366        tcg_debug_assert(!have_isa_2_07);
4367        /*
4368         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
4369         * So using -16 is a quick way to represent 16.
4370         */
4371        c16 = tcg_constant_vec(type, MO_8, -16);
4372        c0 = tcg_constant_vec(type, MO_8, 0);
4373
4374        vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
4375                  tcgv_vec_arg(v2), tcgv_vec_arg(c16));
4376        vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
4377                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
4378        vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1),
4379                  tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0));
4380        vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1),
4381                  tcgv_vec_arg(t1), tcgv_vec_arg(c16));
4382        tcg_gen_add_vec(MO_32, v0, t1, t2);
4383        break;
4384
4385    default:
4386        g_assert_not_reached();
4387    }
4388    tcg_temp_free_vec(t1);
4389    tcg_temp_free_vec(t2);
4390}
4391
4392void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
4393                       TCGArg a0, ...)
4394{
4395    va_list va;
4396    TCGv_vec v0, v1, v2, t0;
4397    TCGArg a2;
4398
4399    va_start(va, a0);
4400    v0 = temp_tcgv_vec(arg_temp(a0));
4401    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
4402    a2 = va_arg(va, TCGArg);
4403
4404    switch (opc) {
4405    case INDEX_op_shli_vec:
4406        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
4407        break;
4408    case INDEX_op_shri_vec:
4409        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
4410        break;
4411    case INDEX_op_sari_vec:
4412        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
4413        break;
4414    case INDEX_op_rotli_vec:
4415        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
4416        break;
4417    case INDEX_op_mul_vec:
4418        v2 = temp_tcgv_vec(arg_temp(a2));
4419        expand_vec_mul(type, vece, v0, v1, v2);
4420        break;
4421    case INDEX_op_rotlv_vec:
4422        v2 = temp_tcgv_vec(arg_temp(a2));
4423        t0 = tcg_temp_new_vec(type);
4424        tcg_gen_neg_vec(vece, t0, v2);
4425        tcg_gen_rotlv_vec(vece, v0, v1, t0);
4426        tcg_temp_free_vec(t0);
4427        break;
4428    default:
4429        g_assert_not_reached();
4430    }
4431    va_end(va);
4432}
4433
4434static TCGConstraintSetIndex
4435tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
4436{
4437    switch (op) {
4438    case INDEX_op_qemu_st:
4439        return C_O0_I2(r, r);
4440    case INDEX_op_qemu_st2:
4441        return TCG_TARGET_REG_BITS == 64
4442               ? C_O0_I3(o, m, r) : C_O0_I3(r, r, r);
4443
4444    case INDEX_op_add_vec:
4445    case INDEX_op_sub_vec:
4446    case INDEX_op_mul_vec:
4447    case INDEX_op_and_vec:
4448    case INDEX_op_or_vec:
4449    case INDEX_op_xor_vec:
4450    case INDEX_op_andc_vec:
4451    case INDEX_op_orc_vec:
4452    case INDEX_op_nor_vec:
4453    case INDEX_op_eqv_vec:
4454    case INDEX_op_nand_vec:
4455    case INDEX_op_cmp_vec:
4456    case INDEX_op_ssadd_vec:
4457    case INDEX_op_sssub_vec:
4458    case INDEX_op_usadd_vec:
4459    case INDEX_op_ussub_vec:
4460    case INDEX_op_smax_vec:
4461    case INDEX_op_smin_vec:
4462    case INDEX_op_umax_vec:
4463    case INDEX_op_umin_vec:
4464    case INDEX_op_shlv_vec:
4465    case INDEX_op_shrv_vec:
4466    case INDEX_op_sarv_vec:
4467    case INDEX_op_rotlv_vec:
4468    case INDEX_op_rotrv_vec:
4469    case INDEX_op_ppc_mrgh_vec:
4470    case INDEX_op_ppc_mrgl_vec:
4471    case INDEX_op_ppc_muleu_vec:
4472    case INDEX_op_ppc_mulou_vec:
4473    case INDEX_op_ppc_pkum_vec:
4474    case INDEX_op_dup2_vec:
4475        return C_O1_I2(v, v, v);
4476
4477    case INDEX_op_not_vec:
4478    case INDEX_op_neg_vec:
4479        return C_O1_I1(v, v);
4480
4481    case INDEX_op_dup_vec:
4482        return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v);
4483
4484    case INDEX_op_ld_vec:
4485    case INDEX_op_dupm_vec:
4486        return C_O1_I1(v, r);
4487
4488    case INDEX_op_st_vec:
4489        return C_O0_I2(v, r);
4490
4491    case INDEX_op_bitsel_vec:
4492    case INDEX_op_ppc_msum_vec:
4493        return C_O1_I3(v, v, v, v);
4494    case INDEX_op_cmpsel_vec:
4495        return C_O1_I4(v, v, v, vZM, v);
4496
4497    default:
4498        return C_NotImplemented;
4499    }
4500}
4501
4502static void tcg_target_init(TCGContext *s)
4503{
4504    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
4505    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
4506    if (have_altivec) {
4507        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
4508        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
4509    }
4510
4511    tcg_target_call_clobber_regs = 0;
4512    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
4513    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
4514    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
4515    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
4516    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
4517    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
4518    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7);
4519    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
4520    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
4521    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
4522    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
4523    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
4524
4525    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
4526    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
4527    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
4528    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
4529    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
4530    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
4531    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
4532    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
4533    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
4534    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
4535    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
4536    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
4537    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
4538    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
4539    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
4540    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
4541    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
4542    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
4543    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
4544    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
4545
4546    s->reserved_regs = 0;
4547    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
4548    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
4549#if defined(_CALL_SYSV)
4550    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
4551#endif
4552#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
4553    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
4554#endif
4555    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
4556    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
4557    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
4558    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
4559    if (USE_REG_TB) {
4560        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);  /* tb->tc_ptr */
4561    }
4562}
4563
4564#ifdef __ELF__
4565typedef struct {
4566    DebugFrameCIE cie;
4567    DebugFrameFDEHeader fde;
4568    uint8_t fde_def_cfa[4];
4569    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
4570} DebugFrame;
4571
4572/* We're expecting a 2 byte uleb128 encoded value.  */
4573QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
4574
4575#if TCG_TARGET_REG_BITS == 64
4576# define ELF_HOST_MACHINE EM_PPC64
4577#else
4578# define ELF_HOST_MACHINE EM_PPC
4579#endif
4580
4581static DebugFrame debug_frame = {
4582    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
4583    .cie.id = -1,
4584    .cie.version = 1,
4585    .cie.code_align = 1,
4586    .cie.data_align = (-SZR & 0x7f),         /* sleb128 -SZR */
4587    .cie.return_column = 65,
4588
4589    /* Total FDE size does not include the "len" member.  */
4590    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
4591
4592    .fde_def_cfa = {
4593        12, TCG_REG_R1,                 /* DW_CFA_def_cfa r1, ... */
4594        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
4595        (FRAME_SIZE >> 7)
4596    },
4597    .fde_reg_ofs = {
4598        /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
4599        0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
4600    }
4601};
4602
4603void tcg_register_jit(const void *buf, size_t buf_size)
4604{
4605    uint8_t *p = &debug_frame.fde_reg_ofs[3];
4606    int i;
4607
4608    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
4609        p[0] = 0x80 + tcg_target_callee_save_regs[i];
4610        p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
4611    }
4612
4613    debug_frame.fde.func_start = (uintptr_t)buf;
4614    debug_frame.fde.func_len = buf_size;
4615
4616    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
4617}
4618#endif /* __ELF__ */
4619#undef VMULEUB
4620#undef VMULEUH
4621#undef VMULEUW
4622#undef VMULOUB
4623#undef VMULOUH
4624#undef VMULOUW
4625#undef VMSUMUHM
4626