xref: /openbmc/qemu/tcg/ppc/tcg-target.c.inc (revision 33aba058c8fcc9b1581b03a1fbac45d8d91baac6)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "elf.h"
26
27/*
28 * Standardize on the _CALL_FOO symbols used by GCC:
29 * Apple XCode does not define _CALL_DARWIN.
30 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV or _CALL_AIX.
31 */
32#if TCG_TARGET_REG_BITS == 64
33# ifdef _CALL_AIX
34    /* ok */
35# elif defined(_CALL_ELF) && _CALL_ELF == 1
36#  define _CALL_AIX
37# elif defined(_CALL_ELF) && _CALL_ELF == 2
38    /* ok */
39# else
40#  error "Unknown ABI"
41# endif
42#else
43# if defined(_CALL_SYSV) || defined(_CALL_DARWIN)
44    /* ok */
45# elif defined(__APPLE__)
46#  define _CALL_DARWIN
47# elif defined(__ELF__)
48#  define _CALL_SYSV
49# else
50#  error "Unknown ABI"
51# endif
52#endif
53
54#if TCG_TARGET_REG_BITS == 64
55# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_EXTEND
56# define TCG_TARGET_CALL_RET_I128  TCG_CALL_RET_NORMAL
57#else
58# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_NORMAL
59# define TCG_TARGET_CALL_RET_I128  TCG_CALL_RET_BY_REF
60#endif
61#ifdef _CALL_SYSV
62# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_EVEN
63# define TCG_TARGET_CALL_ARG_I128  TCG_CALL_ARG_BY_REF
64#else
65# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_NORMAL
66# define TCG_TARGET_CALL_ARG_I128  TCG_CALL_ARG_NORMAL
67#endif
68
69/* For some memory operations, we need a scratch that isn't R0.  For the AIX
70   calling convention, we can re-use the TOC register since we'll be reloading
71   it at every call.  Otherwise R12 will do nicely as neither a call-saved
72   register nor a parameter register.  */
73#ifdef _CALL_AIX
74# define TCG_REG_TMP1   TCG_REG_R2
75#else
76# define TCG_REG_TMP1   TCG_REG_R12
77#endif
78#define TCG_REG_TMP2    TCG_REG_R11
79
80#define TCG_VEC_TMP1    TCG_REG_V0
81#define TCG_VEC_TMP2    TCG_REG_V1
82
83#define TCG_REG_TB     TCG_REG_R31
84#define USE_REG_TB     (TCG_TARGET_REG_BITS == 64 && !have_isa_3_00)
85
86/* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */
87#define SZP  ((int)sizeof(void *))
88
89/* Shorthand for size of a register.  */
90#define SZR  (TCG_TARGET_REG_BITS / 8)
91
92#define TCG_CT_CONST_S16     0x00100
93#define TCG_CT_CONST_U16     0x00200
94#define TCG_CT_CONST_N16     0x00400
95#define TCG_CT_CONST_S32     0x00800
96#define TCG_CT_CONST_U32     0x01000
97#define TCG_CT_CONST_ZERO    0x02000
98#define TCG_CT_CONST_MONE    0x04000
99#define TCG_CT_CONST_WSZ     0x08000
100#define TCG_CT_CONST_CMP     0x10000
101
102#define ALL_GENERAL_REGS  0xffffffffu
103#define ALL_VECTOR_REGS   0xffffffff00000000ull
104
105#ifndef R_PPC64_PCREL34
106#define R_PPC64_PCREL34  132
107#endif
108
109#define have_isel  (cpuinfo & CPUINFO_ISEL)
110
111#define TCG_GUEST_BASE_REG  TCG_REG_R30
112
113#ifdef CONFIG_DEBUG_TCG
114static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
115    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
116    "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
117    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
118    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
119    "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
120    "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
121    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
122    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
123};
124#endif
125
126static const int tcg_target_reg_alloc_order[] = {
127    TCG_REG_R14,  /* call saved registers */
128    TCG_REG_R15,
129    TCG_REG_R16,
130    TCG_REG_R17,
131    TCG_REG_R18,
132    TCG_REG_R19,
133    TCG_REG_R20,
134    TCG_REG_R21,
135    TCG_REG_R22,
136    TCG_REG_R23,
137    TCG_REG_R24,
138    TCG_REG_R25,
139    TCG_REG_R26,
140    TCG_REG_R27,
141    TCG_REG_R28,
142    TCG_REG_R29,
143    TCG_REG_R30,
144    TCG_REG_R31,
145    TCG_REG_R12,  /* call clobbered, non-arguments */
146    TCG_REG_R11,
147    TCG_REG_R2,
148    TCG_REG_R13,
149    TCG_REG_R10,  /* call clobbered, arguments */
150    TCG_REG_R9,
151    TCG_REG_R8,
152    TCG_REG_R7,
153    TCG_REG_R6,
154    TCG_REG_R5,
155    TCG_REG_R4,
156    TCG_REG_R3,
157
158    /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
159    TCG_REG_V2,   /* call clobbered, vectors */
160    TCG_REG_V3,
161    TCG_REG_V4,
162    TCG_REG_V5,
163    TCG_REG_V6,
164    TCG_REG_V7,
165    TCG_REG_V8,
166    TCG_REG_V9,
167    TCG_REG_V10,
168    TCG_REG_V11,
169    TCG_REG_V12,
170    TCG_REG_V13,
171    TCG_REG_V14,
172    TCG_REG_V15,
173    TCG_REG_V16,
174    TCG_REG_V17,
175    TCG_REG_V18,
176    TCG_REG_V19,
177};
178
179static const int tcg_target_call_iarg_regs[] = {
180    TCG_REG_R3,
181    TCG_REG_R4,
182    TCG_REG_R5,
183    TCG_REG_R6,
184    TCG_REG_R7,
185    TCG_REG_R8,
186    TCG_REG_R9,
187    TCG_REG_R10
188};
189
190static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
191{
192    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
193    tcg_debug_assert(slot >= 0 && slot <= 1);
194    return TCG_REG_R3 + slot;
195}
196
197static const int tcg_target_callee_save_regs[] = {
198#ifdef _CALL_DARWIN
199    TCG_REG_R11,
200#endif
201    TCG_REG_R14,
202    TCG_REG_R15,
203    TCG_REG_R16,
204    TCG_REG_R17,
205    TCG_REG_R18,
206    TCG_REG_R19,
207    TCG_REG_R20,
208    TCG_REG_R21,
209    TCG_REG_R22,
210    TCG_REG_R23,
211    TCG_REG_R24,
212    TCG_REG_R25,
213    TCG_REG_R26,
214    TCG_REG_R27, /* currently used for the global env */
215    TCG_REG_R28,
216    TCG_REG_R29,
217    TCG_REG_R30,
218    TCG_REG_R31
219};
220
221/* For PPC, we use TB+4 instead of TB as the base. */
222static inline ptrdiff_t ppc_tbrel_diff(TCGContext *s, const void *target)
223{
224    return tcg_tbrel_diff(s, target) - 4;
225}
226
227static inline bool in_range_b(tcg_target_long target)
228{
229    return target == sextract64(target, 0, 26);
230}
231
232static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
233                               const tcg_insn_unit *target)
234{
235    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
236    tcg_debug_assert(in_range_b(disp));
237    return disp & 0x3fffffc;
238}
239
240static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
241{
242    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
243    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
244
245    if (in_range_b(disp)) {
246        *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc);
247        return true;
248    }
249    return false;
250}
251
252static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
253                               const tcg_insn_unit *target)
254{
255    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
256    tcg_debug_assert(disp == (int16_t) disp);
257    return disp & 0xfffc;
258}
259
260static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
261{
262    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
263    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
264
265    if (disp == (int16_t) disp) {
266        *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc);
267        return true;
268    }
269    return false;
270}
271
272static bool reloc_pc34(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
273{
274    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
275    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
276
277    if (disp == sextract64(disp, 0, 34)) {
278        src_rw[0] = (src_rw[0] & ~0x3ffff) | ((disp >> 16) & 0x3ffff);
279        src_rw[1] = (src_rw[1] & ~0xffff) | (disp & 0xffff);
280        return true;
281    }
282    return false;
283}
284
285static bool mask_operand(uint32_t c, int *mb, int *me);
286static bool mask64_operand(uint64_t c, int *mb, int *me);
287
288/* test if a constant matches the constraint */
289static bool tcg_target_const_match(int64_t sval, int ct,
290                                   TCGType type, TCGCond cond, int vece)
291{
292    uint64_t uval = sval;
293    int mb, me;
294
295    if (ct & TCG_CT_CONST) {
296        return 1;
297    }
298
299    if (type == TCG_TYPE_I32) {
300        uval = (uint32_t)sval;
301        sval = (int32_t)sval;
302    }
303
304    if (ct & TCG_CT_CONST_CMP) {
305        switch (cond) {
306        case TCG_COND_EQ:
307        case TCG_COND_NE:
308            ct |= TCG_CT_CONST_S16 | TCG_CT_CONST_U16;
309            break;
310        case TCG_COND_LT:
311        case TCG_COND_GE:
312        case TCG_COND_LE:
313        case TCG_COND_GT:
314            ct |= TCG_CT_CONST_S16;
315            break;
316        case TCG_COND_LTU:
317        case TCG_COND_GEU:
318        case TCG_COND_LEU:
319        case TCG_COND_GTU:
320            ct |= TCG_CT_CONST_U16;
321            break;
322        case TCG_COND_TSTEQ:
323        case TCG_COND_TSTNE:
324            if ((uval & ~0xffff) == 0 || (uval & ~0xffff0000ull) == 0) {
325                return 1;
326            }
327            if (uval == (uint32_t)uval && mask_operand(uval, &mb, &me)) {
328                return 1;
329            }
330            if (TCG_TARGET_REG_BITS == 64 &&
331                mask64_operand(uval << clz64(uval), &mb, &me)) {
332                return 1;
333            }
334            return 0;
335        default:
336            g_assert_not_reached();
337        }
338    }
339
340    if ((ct & TCG_CT_CONST_S16) && sval == (int16_t)sval) {
341        return 1;
342    }
343    if ((ct & TCG_CT_CONST_U16) && uval == (uint16_t)uval) {
344        return 1;
345    }
346    if ((ct & TCG_CT_CONST_N16) && -sval == (int16_t)-sval) {
347        return 1;
348    }
349    if ((ct & TCG_CT_CONST_S32) && sval == (int32_t)sval) {
350        return 1;
351    }
352    if ((ct & TCG_CT_CONST_U32) && uval == (uint32_t)uval) {
353        return 1;
354    }
355    if ((ct & TCG_CT_CONST_ZERO) && sval == 0) {
356        return 1;
357    }
358    if ((ct & TCG_CT_CONST_MONE) && sval == -1) {
359        return 1;
360    }
361    if ((ct & TCG_CT_CONST_WSZ) && sval == (type == TCG_TYPE_I32 ? 32 : 64)) {
362        return 1;
363    }
364    return 0;
365}
366
367#define OPCD(opc) ((opc)<<26)
368#define XO19(opc) (OPCD(19)|((opc)<<1))
369#define MD30(opc) (OPCD(30)|((opc)<<2))
370#define MDS30(opc) (OPCD(30)|((opc)<<1))
371#define XO31(opc) (OPCD(31)|((opc)<<1))
372#define XO58(opc) (OPCD(58)|(opc))
373#define XO62(opc) (OPCD(62)|(opc))
374#define VX4(opc)  (OPCD(4)|(opc))
375
376#define B      OPCD( 18)
377#define BC     OPCD( 16)
378
379#define LBZ    OPCD( 34)
380#define LHZ    OPCD( 40)
381#define LHA    OPCD( 42)
382#define LWZ    OPCD( 32)
383#define LWZUX  XO31( 55)
384#define LD     XO58(  0)
385#define LDX    XO31( 21)
386#define LDU    XO58(  1)
387#define LDUX   XO31( 53)
388#define LWA    XO58(  2)
389#define LWAX   XO31(341)
390#define LQ     OPCD( 56)
391
392#define STB    OPCD( 38)
393#define STH    OPCD( 44)
394#define STW    OPCD( 36)
395#define STD    XO62(  0)
396#define STDU   XO62(  1)
397#define STDX   XO31(149)
398#define STQ    XO62(  2)
399
400#define PLWA   OPCD( 41)
401#define PLD    OPCD( 57)
402#define PLXSD  OPCD( 42)
403#define PLXV   OPCD(25 * 2 + 1)  /* force tx=1 */
404
405#define PSTD   OPCD( 61)
406#define PSTXSD OPCD( 46)
407#define PSTXV  OPCD(27 * 2 + 1)  /* force sx=1 */
408
409#define ADDIC  OPCD( 12)
410#define ADDI   OPCD( 14)
411#define ADDIS  OPCD( 15)
412#define ORI    OPCD( 24)
413#define ORIS   OPCD( 25)
414#define XORI   OPCD( 26)
415#define XORIS  OPCD( 27)
416#define ANDI   OPCD( 28)
417#define ANDIS  OPCD( 29)
418#define MULLI  OPCD(  7)
419#define CMPLI  OPCD( 10)
420#define CMPI   OPCD( 11)
421#define SUBFIC OPCD( 8)
422
423#define LWZU   OPCD( 33)
424#define STWU   OPCD( 37)
425
426#define RLWIMI OPCD( 20)
427#define RLWINM OPCD( 21)
428#define RLWNM  OPCD( 23)
429
430#define RLDICL MD30(  0)
431#define RLDICR MD30(  1)
432#define RLDIMI MD30(  3)
433#define RLDCL  MDS30( 8)
434
435#define BCLR   XO19( 16)
436#define BCCTR  XO19(528)
437#define CRAND  XO19(257)
438#define CRANDC XO19(129)
439#define CRNAND XO19(225)
440#define CROR   XO19(449)
441#define CRNOR  XO19( 33)
442#define ADDPCIS XO19( 2)
443
444#define EXTSB  XO31(954)
445#define EXTSH  XO31(922)
446#define EXTSW  XO31(986)
447#define ADD    XO31(266)
448#define ADDE   XO31(138)
449#define ADDME  XO31(234)
450#define ADDZE  XO31(202)
451#define ADDC   XO31( 10)
452#define AND    XO31( 28)
453#define SUBF   XO31( 40)
454#define SUBFC  XO31(  8)
455#define SUBFE  XO31(136)
456#define SUBFME XO31(232)
457#define SUBFZE XO31(200)
458#define OR     XO31(444)
459#define XOR    XO31(316)
460#define MULLW  XO31(235)
461#define MULHW  XO31( 75)
462#define MULHWU XO31( 11)
463#define DIVW   XO31(491)
464#define DIVWU  XO31(459)
465#define MODSW  XO31(779)
466#define MODUW  XO31(267)
467#define CMP    XO31(  0)
468#define CMPL   XO31( 32)
469#define LHBRX  XO31(790)
470#define LWBRX  XO31(534)
471#define LDBRX  XO31(532)
472#define STHBRX XO31(918)
473#define STWBRX XO31(662)
474#define STDBRX XO31(660)
475#define MFSPR  XO31(339)
476#define MTSPR  XO31(467)
477#define SRAWI  XO31(824)
478#define NEG    XO31(104)
479#define MFCR   XO31( 19)
480#define MFOCRF (MFCR | (1u << 20))
481#define NOR    XO31(124)
482#define CNTLZW XO31( 26)
483#define CNTLZD XO31( 58)
484#define CNTTZW XO31(538)
485#define CNTTZD XO31(570)
486#define CNTPOPW XO31(378)
487#define CNTPOPD XO31(506)
488#define ANDC   XO31( 60)
489#define ORC    XO31(412)
490#define EQV    XO31(284)
491#define NAND   XO31(476)
492#define ISEL   XO31( 15)
493
494#define MULLD  XO31(233)
495#define MULHD  XO31( 73)
496#define MULHDU XO31(  9)
497#define DIVD   XO31(489)
498#define DIVDU  XO31(457)
499#define MODSD  XO31(777)
500#define MODUD  XO31(265)
501
502#define LBZX   XO31( 87)
503#define LHZX   XO31(279)
504#define LHAX   XO31(343)
505#define LWZX   XO31( 23)
506#define STBX   XO31(215)
507#define STHX   XO31(407)
508#define STWX   XO31(151)
509
510#define EIEIO  XO31(854)
511#define HWSYNC XO31(598)
512#define LWSYNC (HWSYNC | (1u << 21))
513
514#define SPR(a, b) ((((a)<<5)|(b))<<11)
515#define LR     SPR(8, 0)
516#define CTR    SPR(9, 0)
517
518#define SLW    XO31( 24)
519#define SRW    XO31(536)
520#define SRAW   XO31(792)
521
522#define SLD    XO31( 27)
523#define SRD    XO31(539)
524#define SRAD   XO31(794)
525#define SRADI  XO31(413<<1)
526
527#define BRH    XO31(219)
528#define BRW    XO31(155)
529#define BRD    XO31(187)
530
531#define TW     XO31( 4)
532#define TRAP   (TW | TO(31))
533
534#define SETBC    XO31(384)  /* v3.10 */
535#define SETBCR   XO31(416)  /* v3.10 */
536#define SETNBC   XO31(448)  /* v3.10 */
537#define SETNBCR  XO31(480)  /* v3.10 */
538
539#define NOP    ORI  /* ori 0,0,0 */
540
541#define LVX        XO31(103)
542#define LVEBX      XO31(7)
543#define LVEHX      XO31(39)
544#define LVEWX      XO31(71)
545#define LXSDX      (XO31(588) | 1)  /* v2.06, force tx=1 */
546#define LXVDSX     (XO31(332) | 1)  /* v2.06, force tx=1 */
547#define LXSIWZX    (XO31(12) | 1)   /* v2.07, force tx=1 */
548#define LXV        (OPCD(61) | 8 | 1)  /* v3.00, force tx=1 */
549#define LXSD       (OPCD(57) | 2)   /* v3.00 */
550#define LXVWSX     (XO31(364) | 1)  /* v3.00, force tx=1 */
551
552#define STVX       XO31(231)
553#define STVEWX     XO31(199)
554#define STXSDX     (XO31(716) | 1)  /* v2.06, force sx=1 */
555#define STXSIWX    (XO31(140) | 1)  /* v2.07, force sx=1 */
556#define STXV       (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
557#define STXSD      (OPCD(61) | 2)   /* v3.00 */
558
559#define VADDSBS    VX4(768)
560#define VADDUBS    VX4(512)
561#define VADDUBM    VX4(0)
562#define VADDSHS    VX4(832)
563#define VADDUHS    VX4(576)
564#define VADDUHM    VX4(64)
565#define VADDSWS    VX4(896)
566#define VADDUWS    VX4(640)
567#define VADDUWM    VX4(128)
568#define VADDUDM    VX4(192)       /* v2.07 */
569
570#define VSUBSBS    VX4(1792)
571#define VSUBUBS    VX4(1536)
572#define VSUBUBM    VX4(1024)
573#define VSUBSHS    VX4(1856)
574#define VSUBUHS    VX4(1600)
575#define VSUBUHM    VX4(1088)
576#define VSUBSWS    VX4(1920)
577#define VSUBUWS    VX4(1664)
578#define VSUBUWM    VX4(1152)
579#define VSUBUDM    VX4(1216)      /* v2.07 */
580
581#define VNEGW      (VX4(1538) | (6 << 16))  /* v3.00 */
582#define VNEGD      (VX4(1538) | (7 << 16))  /* v3.00 */
583
584#define VMAXSB     VX4(258)
585#define VMAXSH     VX4(322)
586#define VMAXSW     VX4(386)
587#define VMAXSD     VX4(450)       /* v2.07 */
588#define VMAXUB     VX4(2)
589#define VMAXUH     VX4(66)
590#define VMAXUW     VX4(130)
591#define VMAXUD     VX4(194)       /* v2.07 */
592#define VMINSB     VX4(770)
593#define VMINSH     VX4(834)
594#define VMINSW     VX4(898)
595#define VMINSD     VX4(962)       /* v2.07 */
596#define VMINUB     VX4(514)
597#define VMINUH     VX4(578)
598#define VMINUW     VX4(642)
599#define VMINUD     VX4(706)       /* v2.07 */
600
601#define VCMPEQUB   VX4(6)
602#define VCMPEQUH   VX4(70)
603#define VCMPEQUW   VX4(134)
604#define VCMPEQUD   VX4(199)       /* v2.07 */
605#define VCMPGTSB   VX4(774)
606#define VCMPGTSH   VX4(838)
607#define VCMPGTSW   VX4(902)
608#define VCMPGTSD   VX4(967)       /* v2.07 */
609#define VCMPGTUB   VX4(518)
610#define VCMPGTUH   VX4(582)
611#define VCMPGTUW   VX4(646)
612#define VCMPGTUD   VX4(711)       /* v2.07 */
613#define VCMPNEB    VX4(7)         /* v3.00 */
614#define VCMPNEH    VX4(71)        /* v3.00 */
615#define VCMPNEW    VX4(135)       /* v3.00 */
616
617#define VSLB       VX4(260)
618#define VSLH       VX4(324)
619#define VSLW       VX4(388)
620#define VSLD       VX4(1476)      /* v2.07 */
621#define VSRB       VX4(516)
622#define VSRH       VX4(580)
623#define VSRW       VX4(644)
624#define VSRD       VX4(1732)      /* v2.07 */
625#define VSRAB      VX4(772)
626#define VSRAH      VX4(836)
627#define VSRAW      VX4(900)
628#define VSRAD      VX4(964)       /* v2.07 */
629#define VRLB       VX4(4)
630#define VRLH       VX4(68)
631#define VRLW       VX4(132)
632#define VRLD       VX4(196)       /* v2.07 */
633
634#define VMULEUB    VX4(520)
635#define VMULEUH    VX4(584)
636#define VMULEUW    VX4(648)       /* v2.07 */
637#define VMULOUB    VX4(8)
638#define VMULOUH    VX4(72)
639#define VMULOUW    VX4(136)       /* v2.07 */
640#define VMULUWM    VX4(137)       /* v2.07 */
641#define VMULLD     VX4(457)       /* v3.10 */
642#define VMSUMUHM   VX4(38)
643
644#define VMRGHB     VX4(12)
645#define VMRGHH     VX4(76)
646#define VMRGHW     VX4(140)
647#define VMRGLB     VX4(268)
648#define VMRGLH     VX4(332)
649#define VMRGLW     VX4(396)
650
651#define VPKUHUM    VX4(14)
652#define VPKUWUM    VX4(78)
653
654#define VAND       VX4(1028)
655#define VANDC      VX4(1092)
656#define VNOR       VX4(1284)
657#define VOR        VX4(1156)
658#define VXOR       VX4(1220)
659#define VEQV       VX4(1668)      /* v2.07 */
660#define VNAND      VX4(1412)      /* v2.07 */
661#define VORC       VX4(1348)      /* v2.07 */
662
663#define VSPLTB     VX4(524)
664#define VSPLTH     VX4(588)
665#define VSPLTW     VX4(652)
666#define VSPLTISB   VX4(780)
667#define VSPLTISH   VX4(844)
668#define VSPLTISW   VX4(908)
669
670#define VSLDOI     VX4(44)
671
672#define XXPERMDI   (OPCD(60) | (10 << 3) | 7)  /* v2.06, force ax=bx=tx=1 */
673#define XXSEL      (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
674#define XXSPLTIB   (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
675
676#define MFVSRD     (XO31(51) | 1)   /* v2.07, force sx=1 */
677#define MFVSRWZ    (XO31(115) | 1)  /* v2.07, force sx=1 */
678#define MTVSRD     (XO31(179) | 1)  /* v2.07, force tx=1 */
679#define MTVSRWZ    (XO31(243) | 1)  /* v2.07, force tx=1 */
680#define MTVSRDD    (XO31(435) | 1)  /* v3.00, force tx=1 */
681#define MTVSRWS    (XO31(403) | 1)  /* v3.00, force tx=1 */
682
683#define RT(r) ((r)<<21)
684#define RS(r) ((r)<<21)
685#define RA(r) ((r)<<16)
686#define RB(r) ((r)<<11)
687#define TO(t) ((t)<<21)
688#define SH(s) ((s)<<11)
689#define MB(b) ((b)<<6)
690#define ME(e) ((e)<<1)
691#define BO(o) ((o)<<21)
692#define MB64(b) ((b)<<5)
693#define FXM(b) (1 << (19 - (b)))
694
695#define VRT(r)  (((r) & 31) << 21)
696#define VRA(r)  (((r) & 31) << 16)
697#define VRB(r)  (((r) & 31) << 11)
698#define VRC(r)  (((r) & 31) <<  6)
699
700#define LK    1
701
702#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
703#define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
704#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
705#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
706
707#define BF(n)    ((n)<<23)
708#define BI(n, c) (((c)+((n)*4))<<16)
709#define BT(n, c) (((c)+((n)*4))<<21)
710#define BA(n, c) (((c)+((n)*4))<<16)
711#define BB(n, c) (((c)+((n)*4))<<11)
712#define BC_(n, c) (((c)+((n)*4))<<6)
713
714#define BO_COND_TRUE  BO(12)
715#define BO_COND_FALSE BO( 4)
716#define BO_ALWAYS     BO(20)
717
718enum {
719    CR_LT,
720    CR_GT,
721    CR_EQ,
722    CR_SO
723};
724
725static const uint32_t tcg_to_bc[16] = {
726    [TCG_COND_EQ]  = BC | BI(0, CR_EQ) | BO_COND_TRUE,
727    [TCG_COND_NE]  = BC | BI(0, CR_EQ) | BO_COND_FALSE,
728    [TCG_COND_TSTEQ]  = BC | BI(0, CR_EQ) | BO_COND_TRUE,
729    [TCG_COND_TSTNE]  = BC | BI(0, CR_EQ) | BO_COND_FALSE,
730    [TCG_COND_LT]  = BC | BI(0, CR_LT) | BO_COND_TRUE,
731    [TCG_COND_GE]  = BC | BI(0, CR_LT) | BO_COND_FALSE,
732    [TCG_COND_LE]  = BC | BI(0, CR_GT) | BO_COND_FALSE,
733    [TCG_COND_GT]  = BC | BI(0, CR_GT) | BO_COND_TRUE,
734    [TCG_COND_LTU] = BC | BI(0, CR_LT) | BO_COND_TRUE,
735    [TCG_COND_GEU] = BC | BI(0, CR_LT) | BO_COND_FALSE,
736    [TCG_COND_LEU] = BC | BI(0, CR_GT) | BO_COND_FALSE,
737    [TCG_COND_GTU] = BC | BI(0, CR_GT) | BO_COND_TRUE,
738};
739
740/* The low bit here is set if the RA and RB fields must be inverted.  */
741static const uint32_t tcg_to_isel[16] = {
742    [TCG_COND_EQ]  = ISEL | BC_(0, CR_EQ),
743    [TCG_COND_NE]  = ISEL | BC_(0, CR_EQ) | 1,
744    [TCG_COND_TSTEQ] = ISEL | BC_(0, CR_EQ),
745    [TCG_COND_TSTNE] = ISEL | BC_(0, CR_EQ) | 1,
746    [TCG_COND_LT]  = ISEL | BC_(0, CR_LT),
747    [TCG_COND_GE]  = ISEL | BC_(0, CR_LT) | 1,
748    [TCG_COND_LE]  = ISEL | BC_(0, CR_GT) | 1,
749    [TCG_COND_GT]  = ISEL | BC_(0, CR_GT),
750    [TCG_COND_LTU] = ISEL | BC_(0, CR_LT),
751    [TCG_COND_GEU] = ISEL | BC_(0, CR_LT) | 1,
752    [TCG_COND_LEU] = ISEL | BC_(0, CR_GT) | 1,
753    [TCG_COND_GTU] = ISEL | BC_(0, CR_GT),
754};
755
756static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
757                        intptr_t value, intptr_t addend)
758{
759    const tcg_insn_unit *target;
760    int16_t lo;
761    int32_t hi;
762
763    value += addend;
764    target = (const tcg_insn_unit *)value;
765
766    switch (type) {
767    case R_PPC_REL14:
768        return reloc_pc14(code_ptr, target);
769    case R_PPC_REL24:
770        return reloc_pc24(code_ptr, target);
771    case R_PPC64_PCREL34:
772        return reloc_pc34(code_ptr, target);
773    case R_PPC_ADDR16:
774        /*
775         * We are (slightly) abusing this relocation type.  In particular,
776         * assert that the low 2 bits are zero, and do not modify them.
777         * That way we can use this with LD et al that have opcode bits
778         * in the low 2 bits of the insn.
779         */
780        if ((value & 3) || value != (int16_t)value) {
781            return false;
782        }
783        *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
784        break;
785    case R_PPC_ADDR32:
786        /*
787         * We are abusing this relocation type.  Again, this points to
788         * a pair of insns, lis + load.  This is an absolute address
789         * relocation for PPC32 so the lis cannot be removed.
790         */
791        lo = value;
792        hi = value - lo;
793        if (hi + lo != value) {
794            return false;
795        }
796        code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
797        code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
798        break;
799    default:
800        g_assert_not_reached();
801    }
802    return true;
803}
804
805/* Ensure that the prefixed instruction does not cross a 64-byte boundary. */
806static bool tcg_out_need_prefix_align(TCGContext *s)
807{
808    return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c;
809}
810
811static void tcg_out_prefix_align(TCGContext *s)
812{
813    if (tcg_out_need_prefix_align(s)) {
814        tcg_out32(s, NOP);
815    }
816}
817
818static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target)
819{
820    return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0);
821}
822
823/* Output Type 00 Prefix - 8-Byte Load/Store Form (8LS:D) */
824static void tcg_out_8ls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
825                          unsigned ra, tcg_target_long imm, bool r)
826{
827    tcg_insn_unit p, i;
828
829    p = OPCD(1) | (r << 20) | ((imm >> 16) & 0x3ffff);
830    i = opc | TAI(rt, ra, imm);
831
832    tcg_out_prefix_align(s);
833    tcg_out32(s, p);
834    tcg_out32(s, i);
835}
836
837/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */
838static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
839                          unsigned ra, tcg_target_long imm, bool r)
840{
841    tcg_insn_unit p, i;
842
843    p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff);
844    i = opc | TAI(rt, ra, imm);
845
846    tcg_out_prefix_align(s);
847    tcg_out32(s, p);
848    tcg_out32(s, i);
849}
850
851static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
852                             TCGReg base, tcg_target_long offset);
853
854static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
855{
856    if (ret == arg) {
857        return true;
858    }
859    switch (type) {
860    case TCG_TYPE_I64:
861        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
862        /* fallthru */
863    case TCG_TYPE_I32:
864        if (ret < TCG_REG_V0) {
865            if (arg < TCG_REG_V0) {
866                tcg_out32(s, OR | SAB(arg, ret, arg));
867                break;
868            } else if (have_isa_2_07) {
869                tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
870                          | VRT(arg) | RA(ret));
871                break;
872            } else {
873                /* Altivec does not support vector->integer moves.  */
874                return false;
875            }
876        } else if (arg < TCG_REG_V0) {
877            if (have_isa_2_07) {
878                tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
879                          | VRT(ret) | RA(arg));
880                break;
881            } else {
882                /* Altivec does not support integer->vector moves.  */
883                return false;
884            }
885        }
886        /* fallthru */
887    case TCG_TYPE_V64:
888    case TCG_TYPE_V128:
889        tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
890        tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
891        break;
892    default:
893        g_assert_not_reached();
894    }
895    return true;
896}
897
898static void tcg_out_rld_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs,
899                           int sh, int mb, bool rc)
900{
901    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
902    sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
903    mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
904    tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb | rc);
905}
906
907static void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
908                        int sh, int mb)
909{
910    tcg_out_rld_rc(s, op, ra, rs, sh, mb, false);
911}
912
913static void tcg_out_rlw_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs,
914                           int sh, int mb, int me, bool rc)
915{
916    tcg_debug_assert((mb & 0x1f) == mb);
917    tcg_debug_assert((me & 0x1f) == me);
918    tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh & 0x1f) | MB(mb) | ME(me) | rc);
919}
920
921static void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
922                        int sh, int mb, int me)
923{
924    tcg_out_rlw_rc(s, op, ra, rs, sh, mb, me, false);
925}
926
927static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
928{
929    tcg_out32(s, EXTSB | RA(dst) | RS(src));
930}
931
932static void tcg_out_ext8u(TCGContext *s, TCGReg dst, TCGReg src)
933{
934    tcg_out32(s, ANDI | SAI(src, dst, 0xff));
935}
936
937static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
938{
939    tcg_out32(s, EXTSH | RA(dst) | RS(src));
940}
941
942static void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src)
943{
944    tcg_out32(s, ANDI | SAI(src, dst, 0xffff));
945}
946
947static void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src)
948{
949    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
950    tcg_out32(s, EXTSW | RA(dst) | RS(src));
951}
952
953static void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
954{
955    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
956    tcg_out_rld(s, RLDICL, dst, src, 0, 32);
957}
958
959static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dst, TCGReg src)
960{
961    tcg_out_ext32s(s, dst, src);
962}
963
964static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dst, TCGReg src)
965{
966    tcg_out_ext32u(s, dst, src);
967}
968
969static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
970{
971    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
972    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
973}
974
975static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
976{
977    tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
978}
979
980static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
981{
982    tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
983}
984
985static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c)
986{
987    /* Limit immediate shift count lest we create an illegal insn.  */
988    tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31));
989}
990
991static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
992{
993    tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
994}
995
996static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
997{
998    tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
999}
1000
1001static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
1002{
1003    tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
1004}
1005
1006static void tcg_out_addpcis(TCGContext *s, TCGReg dst, intptr_t imm)
1007{
1008    uint32_t d0, d1, d2;
1009
1010    tcg_debug_assert((imm & 0xffff) == 0);
1011    tcg_debug_assert(imm == (int32_t)imm);
1012
1013    d2 = extract32(imm, 16, 1);
1014    d1 = extract32(imm, 17, 5);
1015    d0 = extract32(imm, 22, 10);
1016    tcg_out32(s, ADDPCIS | RT(dst) | (d1 << 16) | (d0 << 6) | d2);
1017}
1018
1019/* Emit a move into ret of arg, if it can be done in one insn.  */
1020static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
1021{
1022    if (arg == (int16_t)arg) {
1023        tcg_out32(s, ADDI | TAI(ret, 0, arg));
1024        return true;
1025    }
1026    if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
1027        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
1028        return true;
1029    }
1030    return false;
1031}
1032
1033static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
1034                             tcg_target_long arg, bool in_prologue)
1035{
1036    intptr_t tb_diff;
1037    tcg_target_long tmp;
1038    int shift;
1039
1040    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1041
1042    if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1043        arg = (int32_t)arg;
1044    }
1045
1046    /* Load 16-bit immediates with one insn.  */
1047    if (tcg_out_movi_one(s, ret, arg)) {
1048        return;
1049    }
1050
1051    /* Load addresses within the TB with one insn.  */
1052    tb_diff = ppc_tbrel_diff(s, (void *)arg);
1053    if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
1054        tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
1055        return;
1056    }
1057
1058    /*
1059     * Load values up to 34 bits, and pc-relative addresses,
1060     * with one prefixed insn.
1061     */
1062    if (have_isa_3_10) {
1063        if (arg == sextract64(arg, 0, 34)) {
1064            /* pli ret,value = paddi ret,0,value,0 */
1065            tcg_out_mls_d(s, ADDI, ret, 0, arg, 0);
1066            return;
1067        }
1068
1069        tmp = tcg_pcrel_diff_for_prefix(s, (void *)arg);
1070        if (tmp == sextract64(tmp, 0, 34)) {
1071            /* pla ret,value = paddi ret,0,value,1 */
1072            tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1);
1073            return;
1074        }
1075    }
1076
1077    /* Load 32-bit immediates with two insns.  Note that we've already
1078       eliminated bare ADDIS, so we know both insns are required.  */
1079    if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
1080        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
1081        tcg_out32(s, ORI | SAI(ret, ret, arg));
1082        return;
1083    }
1084    if (arg == (uint32_t)arg && !(arg & 0x8000)) {
1085        tcg_out32(s, ADDI | TAI(ret, 0, arg));
1086        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1087        return;
1088    }
1089
1090    /* Load masked 16-bit value.  */
1091    if (arg > 0 && (arg & 0x8000)) {
1092        tmp = arg | 0x7fff;
1093        if ((tmp & (tmp + 1)) == 0) {
1094            int mb = clz64(tmp + 1) + 1;
1095            tcg_out32(s, ADDI | TAI(ret, 0, arg));
1096            tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
1097            return;
1098        }
1099    }
1100
1101    /* Load common masks with 2 insns.  */
1102    shift = ctz64(arg);
1103    tmp = arg >> shift;
1104    if (tmp == (int16_t)tmp) {
1105        tcg_out32(s, ADDI | TAI(ret, 0, tmp));
1106        tcg_out_shli64(s, ret, ret, shift);
1107        return;
1108    }
1109    shift = clz64(arg);
1110    if (tcg_out_movi_one(s, ret, arg << shift)) {
1111        tcg_out_shri64(s, ret, ret, shift);
1112        return;
1113    }
1114
1115    /* Load addresses within 2GB with 2 insns. */
1116    if (have_isa_3_00) {
1117        intptr_t hi = tcg_pcrel_diff(s, (void *)arg) - 4;
1118        int16_t lo = hi;
1119
1120        hi -= lo;
1121        if (hi == (int32_t)hi) {
1122            tcg_out_addpcis(s, TCG_REG_TMP2, hi);
1123            tcg_out32(s, ADDI | TAI(ret, TCG_REG_TMP2, lo));
1124            return;
1125        }
1126    }
1127
1128    /* Load addresses within 2GB of TB with 2 (or rarely 3) insns.  */
1129    if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
1130        tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
1131        return;
1132    }
1133
1134    /* Use the constant pool, if possible.  */
1135    if (!in_prologue && USE_REG_TB) {
1136        new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
1137                       ppc_tbrel_diff(s, NULL));
1138        tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
1139        return;
1140    }
1141    if (have_isa_3_10) {
1142        tcg_out_8ls_d(s, PLD, ret, 0, 0, 1);
1143        new_pool_label(s, arg, R_PPC64_PCREL34, s->code_ptr - 2, 0);
1144        return;
1145    }
1146    if (have_isa_3_00) {
1147        tcg_out_addpcis(s, TCG_REG_TMP2, 0);
1148        new_pool_label(s, arg, R_PPC_REL14, s->code_ptr, 0);
1149        tcg_out32(s, LD | TAI(ret, TCG_REG_TMP2, 0));
1150        return;
1151    }
1152
1153    tmp = arg >> 31 >> 1;
1154    tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
1155    if (tmp) {
1156        tcg_out_shli64(s, ret, ret, 32);
1157    }
1158    if (arg & 0xffff0000) {
1159        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1160    }
1161    if (arg & 0xffff) {
1162        tcg_out32(s, ORI | SAI(ret, ret, arg));
1163    }
1164}
1165
1166static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
1167                             TCGReg ret, int64_t val)
1168{
1169    uint32_t load_insn;
1170    int rel, low;
1171    intptr_t add;
1172
1173    switch (vece) {
1174    case MO_8:
1175        low = (int8_t)val;
1176        if (low >= -16 && low < 16) {
1177            tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
1178            return;
1179        }
1180        if (have_isa_3_00) {
1181            tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
1182            return;
1183        }
1184        break;
1185
1186    case MO_16:
1187        low = (int16_t)val;
1188        if (low >= -16 && low < 16) {
1189            tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
1190            return;
1191        }
1192        break;
1193
1194    case MO_32:
1195        low = (int32_t)val;
1196        if (low >= -16 && low < 16) {
1197            tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
1198            return;
1199        }
1200        break;
1201    }
1202
1203    /*
1204     * Otherwise we must load the value from the constant pool.
1205     */
1206    if (USE_REG_TB) {
1207        rel = R_PPC_ADDR16;
1208        add = ppc_tbrel_diff(s, NULL);
1209    } else if (have_isa_3_10) {
1210        if (type == TCG_TYPE_V64) {
1211            tcg_out_8ls_d(s, PLXSD, ret & 31, 0, 0, 1);
1212            new_pool_label(s, val, R_PPC64_PCREL34, s->code_ptr - 2, 0);
1213        } else {
1214            tcg_out_8ls_d(s, PLXV, ret & 31, 0, 0, 1);
1215            new_pool_l2(s, R_PPC64_PCREL34, s->code_ptr - 2, 0, val, val);
1216        }
1217        return;
1218    } else if (have_isa_3_00) {
1219        tcg_out_addpcis(s, TCG_REG_TMP1, 0);
1220        rel = R_PPC_REL14;
1221        add = 0;
1222    } else {
1223        rel = R_PPC_ADDR32;
1224        add = 0;
1225    }
1226
1227    if (have_vsx) {
1228        load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
1229        load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
1230        if (TCG_TARGET_REG_BITS == 64) {
1231            new_pool_label(s, val, rel, s->code_ptr, add);
1232        } else {
1233            new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val);
1234        }
1235    } else {
1236        load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
1237        if (TCG_TARGET_REG_BITS == 64) {
1238            new_pool_l2(s, rel, s->code_ptr, add, val, val);
1239        } else {
1240            new_pool_l4(s, rel, s->code_ptr, add,
1241                        val >> 32, val, val >> 32, val);
1242        }
1243    }
1244
1245    if (USE_REG_TB) {
1246        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
1247        load_insn |= RA(TCG_REG_TB);
1248    } else if (have_isa_3_00) {
1249        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1250    } else {
1251        tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
1252        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1253    }
1254    tcg_out32(s, load_insn);
1255}
1256
1257static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
1258                         tcg_target_long arg)
1259{
1260    switch (type) {
1261    case TCG_TYPE_I32:
1262    case TCG_TYPE_I64:
1263        tcg_debug_assert(ret < TCG_REG_V0);
1264        tcg_out_movi_int(s, type, ret, arg, false);
1265        break;
1266
1267    default:
1268        g_assert_not_reached();
1269    }
1270}
1271
1272static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1273{
1274    return false;
1275}
1276
1277static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1278                             tcg_target_long imm)
1279{
1280    /* This function is only used for passing structs by reference. */
1281    g_assert_not_reached();
1282}
1283
1284static bool mask_operand(uint32_t c, int *mb, int *me)
1285{
1286    uint32_t lsb, test;
1287
1288    /* Accept a bit pattern like:
1289           0....01....1
1290           1....10....0
1291           0..01..10..0
1292       Keep track of the transitions.  */
1293    if (c == 0 || c == -1) {
1294        return false;
1295    }
1296    test = c;
1297    lsb = test & -test;
1298    test += lsb;
1299    if (test & (test - 1)) {
1300        return false;
1301    }
1302
1303    *me = clz32(lsb);
1304    *mb = test ? clz32(test & -test) + 1 : 0;
1305    return true;
1306}
1307
1308static bool mask64_operand(uint64_t c, int *mb, int *me)
1309{
1310    uint64_t lsb;
1311
1312    if (c == 0) {
1313        return false;
1314    }
1315
1316    lsb = c & -c;
1317    /* Accept 1..10..0.  */
1318    if (c == -lsb) {
1319        *mb = 0;
1320        *me = clz64(lsb);
1321        return true;
1322    }
1323    /* Accept 0..01..1.  */
1324    if (lsb == 1 && (c & (c + 1)) == 0) {
1325        *mb = clz64(c + 1) + 1;
1326        *me = 63;
1327        return true;
1328    }
1329    return false;
1330}
1331
1332static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1333{
1334    int mb, me;
1335
1336    if (mask_operand(c, &mb, &me)) {
1337        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
1338    } else if ((c & 0xffff) == c) {
1339        tcg_out32(s, ANDI | SAI(src, dst, c));
1340        return;
1341    } else if ((c & 0xffff0000) == c) {
1342        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1343        return;
1344    } else {
1345        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
1346        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1347    }
1348}
1349
1350static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
1351{
1352    int mb, me;
1353
1354    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1355    if (mask64_operand(c, &mb, &me)) {
1356        if (mb == 0) {
1357            tcg_out_rld(s, RLDICR, dst, src, 0, me);
1358        } else {
1359            tcg_out_rld(s, RLDICL, dst, src, 0, mb);
1360        }
1361    } else if ((c & 0xffff) == c) {
1362        tcg_out32(s, ANDI | SAI(src, dst, c));
1363        return;
1364    } else if ((c & 0xffff0000) == c) {
1365        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1366        return;
1367    } else {
1368        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
1369        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1370    }
1371}
1372
1373static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
1374                           int op_lo, int op_hi)
1375{
1376    if (c >> 16) {
1377        tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
1378        src = dst;
1379    }
1380    if (c & 0xffff) {
1381        tcg_out32(s, op_lo | SAI(src, dst, c));
1382        src = dst;
1383    }
1384}
1385
1386static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1387{
1388    tcg_out_zori32(s, dst, src, c, ORI, ORIS);
1389}
1390
1391static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1392{
1393    tcg_out_zori32(s, dst, src, c, XORI, XORIS);
1394}
1395
1396static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target)
1397{
1398    ptrdiff_t disp = tcg_pcrel_diff(s, target);
1399    if (in_range_b(disp)) {
1400        tcg_out32(s, B | (disp & 0x3fffffc) | mask);
1401    } else {
1402        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
1403        tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
1404        tcg_out32(s, BCCTR | BO_ALWAYS | mask);
1405    }
1406}
1407
1408static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
1409                             TCGReg base, tcg_target_long offset)
1410{
1411    tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
1412    bool is_int_store = false;
1413    TCGReg rs = TCG_REG_TMP1;
1414
1415    switch (opi) {
1416    case LD: case LWA:
1417        align = 3;
1418        /* FALLTHRU */
1419    default:
1420        if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
1421            rs = rt;
1422            break;
1423        }
1424        break;
1425    case LXSD:
1426    case STXSD:
1427        align = 3;
1428        break;
1429    case LXV:
1430    case STXV:
1431        align = 15;
1432        break;
1433    case STD:
1434        align = 3;
1435        /* FALLTHRU */
1436    case STB: case STH: case STW:
1437        is_int_store = true;
1438        break;
1439    }
1440
1441    /* For unaligned or large offsets, use the prefixed form. */
1442    if (have_isa_3_10
1443        && (offset != (int16_t)offset || (offset & align))
1444        && offset == sextract64(offset, 0, 34)) {
1445        /*
1446         * Note that the MLS:D insns retain their un-prefixed opcode,
1447         * while the 8LS:D insns use a different opcode space.
1448         */
1449        switch (opi) {
1450        case LBZ:
1451        case LHZ:
1452        case LHA:
1453        case LWZ:
1454        case STB:
1455        case STH:
1456        case STW:
1457        case ADDI:
1458            tcg_out_mls_d(s, opi, rt, base, offset, 0);
1459            return;
1460        case LWA:
1461            tcg_out_8ls_d(s, PLWA, rt, base, offset, 0);
1462            return;
1463        case LD:
1464            tcg_out_8ls_d(s, PLD, rt, base, offset, 0);
1465            return;
1466        case STD:
1467            tcg_out_8ls_d(s, PSTD, rt, base, offset, 0);
1468            return;
1469        case LXSD:
1470            tcg_out_8ls_d(s, PLXSD, rt & 31, base, offset, 0);
1471            return;
1472        case STXSD:
1473            tcg_out_8ls_d(s, PSTXSD, rt & 31, base, offset, 0);
1474            return;
1475        case LXV:
1476            tcg_out_8ls_d(s, PLXV, rt & 31, base, offset, 0);
1477            return;
1478        case STXV:
1479            tcg_out_8ls_d(s, PSTXV, rt & 31, base, offset, 0);
1480            return;
1481        }
1482    }
1483
1484    /* For unaligned, or very large offsets, use the indexed form.  */
1485    if (offset & align || offset != (int32_t)offset || opi == 0) {
1486        if (rs == base) {
1487            rs = TCG_REG_R0;
1488        }
1489        tcg_debug_assert(!is_int_store || rs != rt);
1490        tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
1491        tcg_out32(s, opx | TAB(rt & 31, base, rs));
1492        return;
1493    }
1494
1495    l0 = (int16_t)offset;
1496    offset = (offset - l0) >> 16;
1497    l1 = (int16_t)offset;
1498
1499    if (l1 < 0 && orig >= 0) {
1500        extra = 0x4000;
1501        l1 = (int16_t)(offset - 0x4000);
1502    }
1503    if (l1) {
1504        tcg_out32(s, ADDIS | TAI(rs, base, l1));
1505        base = rs;
1506    }
1507    if (extra) {
1508        tcg_out32(s, ADDIS | TAI(rs, base, extra));
1509        base = rs;
1510    }
1511    if (opi != ADDI || base != rt || l0 != 0) {
1512        tcg_out32(s, opi | TAI(rt & 31, base, l0));
1513    }
1514}
1515
1516static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
1517                           TCGReg va, TCGReg vb, int shb)
1518{
1519    tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
1520}
1521
1522static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1523                       TCGReg base, intptr_t offset)
1524{
1525    int shift;
1526
1527    switch (type) {
1528    case TCG_TYPE_I32:
1529        if (ret < TCG_REG_V0) {
1530            tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
1531            break;
1532        }
1533        if (have_isa_2_07 && have_vsx) {
1534            tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
1535            break;
1536        }
1537        tcg_debug_assert((offset & 3) == 0);
1538        tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
1539        shift = (offset - 4) & 0xc;
1540        if (shift) {
1541            tcg_out_vsldoi(s, ret, ret, ret, shift);
1542        }
1543        break;
1544    case TCG_TYPE_I64:
1545        if (ret < TCG_REG_V0) {
1546            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1547            tcg_out_mem_long(s, LD, LDX, ret, base, offset);
1548            break;
1549        }
1550        /* fallthru */
1551    case TCG_TYPE_V64:
1552        tcg_debug_assert(ret >= TCG_REG_V0);
1553        if (have_vsx) {
1554            tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
1555                             ret, base, offset);
1556            break;
1557        }
1558        tcg_debug_assert((offset & 7) == 0);
1559        tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
1560        if (offset & 8) {
1561            tcg_out_vsldoi(s, ret, ret, ret, 8);
1562        }
1563        break;
1564    case TCG_TYPE_V128:
1565        tcg_debug_assert(ret >= TCG_REG_V0);
1566        tcg_debug_assert((offset & 15) == 0);
1567        tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
1568                         LVX, ret, base, offset);
1569        break;
1570    default:
1571        g_assert_not_reached();
1572    }
1573}
1574
1575static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1576                              TCGReg base, intptr_t offset)
1577{
1578    int shift;
1579
1580    switch (type) {
1581    case TCG_TYPE_I32:
1582        if (arg < TCG_REG_V0) {
1583            tcg_out_mem_long(s, STW, STWX, arg, base, offset);
1584            break;
1585        }
1586        if (have_isa_2_07 && have_vsx) {
1587            tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
1588            break;
1589        }
1590        assert((offset & 3) == 0);
1591        tcg_debug_assert((offset & 3) == 0);
1592        shift = (offset - 4) & 0xc;
1593        if (shift) {
1594            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
1595            arg = TCG_VEC_TMP1;
1596        }
1597        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1598        break;
1599    case TCG_TYPE_I64:
1600        if (arg < TCG_REG_V0) {
1601            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1602            tcg_out_mem_long(s, STD, STDX, arg, base, offset);
1603            break;
1604        }
1605        /* fallthru */
1606    case TCG_TYPE_V64:
1607        tcg_debug_assert(arg >= TCG_REG_V0);
1608        if (have_vsx) {
1609            tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
1610                             STXSDX, arg, base, offset);
1611            break;
1612        }
1613        tcg_debug_assert((offset & 7) == 0);
1614        if (offset & 8) {
1615            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
1616            arg = TCG_VEC_TMP1;
1617        }
1618        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1619        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
1620        break;
1621    case TCG_TYPE_V128:
1622        tcg_debug_assert(arg >= TCG_REG_V0);
1623        tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
1624                         STVX, arg, base, offset);
1625        break;
1626    default:
1627        g_assert_not_reached();
1628    }
1629}
1630
1631static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1632                               TCGReg base, intptr_t ofs)
1633{
1634    return false;
1635}
1636
1637/*
1638 * Set dest non-zero if and only if (arg1 & arg2) is non-zero.
1639 * If RC, then also set RC0.
1640 */
1641static void tcg_out_test(TCGContext *s, TCGReg dest, TCGReg arg1, TCGArg arg2,
1642                         bool const_arg2, TCGType type, bool rc)
1643{
1644    int mb, me;
1645
1646    if (!const_arg2) {
1647        tcg_out32(s, AND | SAB(arg1, dest, arg2) | rc);
1648        return;
1649    }
1650
1651    if (type == TCG_TYPE_I32) {
1652        arg2 = (uint32_t)arg2;
1653    }
1654
1655    if ((arg2 & ~0xffff) == 0) {
1656        tcg_out32(s, ANDI | SAI(arg1, dest, arg2));
1657        return;
1658    }
1659    if ((arg2 & ~0xffff0000ull) == 0) {
1660        tcg_out32(s, ANDIS | SAI(arg1, dest, arg2 >> 16));
1661        return;
1662    }
1663    if (arg2 == (uint32_t)arg2 && mask_operand(arg2, &mb, &me)) {
1664        tcg_out_rlw_rc(s, RLWINM, dest, arg1, 0, mb, me, rc);
1665        return;
1666    }
1667    if (TCG_TARGET_REG_BITS == 64) {
1668        int sh = clz64(arg2);
1669        if (mask64_operand(arg2 << sh, &mb, &me)) {
1670            tcg_out_rld_rc(s, RLDICR, dest, arg1, sh, me, rc);
1671            return;
1672        }
1673    }
1674    /* Constraints should satisfy this. */
1675    g_assert_not_reached();
1676}
1677
1678static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
1679                        bool const_arg2, int cr, TCGType type)
1680{
1681    uint32_t op;
1682
1683    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1684
1685    /*
1686     * Simplify the comparisons below wrt CMPI.
1687     * All of the tests are 16-bit, so a 32-bit sign extend always works.
1688     */
1689    if (type == TCG_TYPE_I32) {
1690        arg2 = (int32_t)arg2;
1691    }
1692
1693    switch (cond) {
1694    case TCG_COND_EQ:
1695    case TCG_COND_NE:
1696        if (const_arg2) {
1697            if ((int16_t)arg2 == arg2) {
1698                op = CMPI;
1699                break;
1700            }
1701            tcg_debug_assert((uint16_t)arg2 == arg2);
1702            op = CMPLI;
1703            break;
1704        }
1705        op = CMPL;
1706        break;
1707
1708    case TCG_COND_TSTEQ:
1709    case TCG_COND_TSTNE:
1710        tcg_debug_assert(cr == 0);
1711        tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, true);
1712        return;
1713
1714    case TCG_COND_LT:
1715    case TCG_COND_GE:
1716    case TCG_COND_LE:
1717    case TCG_COND_GT:
1718        if (const_arg2) {
1719            tcg_debug_assert((int16_t)arg2 == arg2);
1720            op = CMPI;
1721            break;
1722        }
1723        op = CMP;
1724        break;
1725
1726    case TCG_COND_LTU:
1727    case TCG_COND_GEU:
1728    case TCG_COND_LEU:
1729    case TCG_COND_GTU:
1730        if (const_arg2) {
1731            tcg_debug_assert((uint16_t)arg2 == arg2);
1732            op = CMPLI;
1733            break;
1734        }
1735        op = CMPL;
1736        break;
1737
1738    default:
1739        g_assert_not_reached();
1740    }
1741    op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
1742    op |= RA(arg1);
1743    op |= const_arg2 ? arg2 & 0xffff : RB(arg2);
1744    tcg_out32(s, op);
1745}
1746
1747static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
1748                                TCGReg dst, TCGReg src, bool neg)
1749{
1750    if (neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) {
1751        /*
1752         * X != 0 implies X + -1 generates a carry.
1753         * RT = (~X + X) + CA
1754         *    = -1 + CA
1755         *    = CA ? 0 : -1
1756         */
1757        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1758        tcg_out32(s, SUBFE | TAB(dst, src, src));
1759        return;
1760    }
1761
1762    if (type == TCG_TYPE_I32) {
1763        tcg_out32(s, CNTLZW | RS(src) | RA(dst));
1764        tcg_out_shri32(s, dst, dst, 5);
1765    } else {
1766        tcg_out32(s, CNTLZD | RS(src) | RA(dst));
1767        tcg_out_shri64(s, dst, dst, 6);
1768    }
1769    if (neg) {
1770        tcg_out32(s, NEG | RT(dst) | RA(dst));
1771    }
1772}
1773
1774static void tcg_out_setcond_ne0(TCGContext *s, TCGType type,
1775                                TCGReg dst, TCGReg src, bool neg)
1776{
1777    if (!neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) {
1778        /*
1779         * X != 0 implies X + -1 generates a carry.  Extra addition
1780         * trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.
1781         */
1782        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1783        tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
1784        return;
1785    }
1786    tcg_out_setcond_eq0(s, type, dst, src, false);
1787    if (neg) {
1788        tcg_out32(s, ADDI | TAI(dst, dst, -1));
1789    } else {
1790        tcg_out_xori32(s, dst, dst, 1);
1791    }
1792}
1793
1794static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
1795                                  bool const_arg2)
1796{
1797    if (const_arg2) {
1798        if ((uint32_t)arg2 == arg2) {
1799            tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
1800        } else {
1801            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
1802            tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
1803        }
1804    } else {
1805        tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
1806    }
1807    return TCG_REG_R0;
1808}
1809
1810static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
1811                            TCGReg arg0, TCGReg arg1, TCGArg arg2,
1812                            bool const_arg2, bool neg)
1813{
1814    int sh;
1815    bool inv;
1816
1817    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1818
1819    /* Ignore high bits of a potential constant arg2.  */
1820    if (type == TCG_TYPE_I32) {
1821        arg2 = (uint32_t)arg2;
1822    }
1823
1824    /* With SETBC/SETBCR, we can always implement with 2 insns. */
1825    if (have_isa_3_10) {
1826        tcg_insn_unit bi, opc;
1827
1828        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type);
1829
1830        /* Re-use tcg_to_bc for BI and BO_COND_{TRUE,FALSE}. */
1831        bi = tcg_to_bc[cond] & (0x1f << 16);
1832        if (tcg_to_bc[cond] & BO(8)) {
1833            opc = neg ? SETNBC : SETBC;
1834        } else {
1835            opc = neg ? SETNBCR : SETBCR;
1836        }
1837        tcg_out32(s, opc | RT(arg0) | bi);
1838        return;
1839    }
1840
1841    /* Handle common and trivial cases before handling anything else.  */
1842    if (arg2 == 0) {
1843        switch (cond) {
1844        case TCG_COND_EQ:
1845            tcg_out_setcond_eq0(s, type, arg0, arg1, neg);
1846            return;
1847        case TCG_COND_NE:
1848            tcg_out_setcond_ne0(s, type, arg0, arg1, neg);
1849            return;
1850        case TCG_COND_GE:
1851            tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
1852            arg1 = arg0;
1853            /* FALLTHRU */
1854        case TCG_COND_LT:
1855            /* Extract the sign bit.  */
1856            if (type == TCG_TYPE_I32) {
1857                if (neg) {
1858                    tcg_out_sari32(s, arg0, arg1, 31);
1859                } else {
1860                    tcg_out_shri32(s, arg0, arg1, 31);
1861                }
1862            } else {
1863                if (neg) {
1864                    tcg_out_sari64(s, arg0, arg1, 63);
1865                } else {
1866                    tcg_out_shri64(s, arg0, arg1, 63);
1867                }
1868            }
1869            return;
1870        default:
1871            break;
1872        }
1873    }
1874
1875    /* If we have ISEL, we can implement everything with 3 or 4 insns.
1876       All other cases below are also at least 3 insns, so speed up the
1877       code generator by not considering them and always using ISEL.  */
1878    if (have_isel) {
1879        int isel, tab;
1880
1881        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type);
1882
1883        isel = tcg_to_isel[cond];
1884
1885        tcg_out_movi(s, type, arg0, neg ? -1 : 1);
1886        if (isel & 1) {
1887            /* arg0 = (bc ? 0 : 1) */
1888            tab = TAB(arg0, 0, arg0);
1889            isel &= ~1;
1890        } else {
1891            /* arg0 = (bc ? 1 : 0) */
1892            tcg_out_movi(s, type, TCG_REG_R0, 0);
1893            tab = TAB(arg0, arg0, TCG_REG_R0);
1894        }
1895        tcg_out32(s, isel | tab);
1896        return;
1897    }
1898
1899    inv = false;
1900    switch (cond) {
1901    case TCG_COND_EQ:
1902        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1903        tcg_out_setcond_eq0(s, type, arg0, arg1, neg);
1904        break;
1905
1906    case TCG_COND_NE:
1907        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1908        tcg_out_setcond_ne0(s, type, arg0, arg1, neg);
1909        break;
1910
1911    case TCG_COND_TSTEQ:
1912        tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false);
1913        tcg_out_setcond_eq0(s, type, arg0, TCG_REG_R0, neg);
1914        break;
1915
1916    case TCG_COND_TSTNE:
1917        tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false);
1918        tcg_out_setcond_ne0(s, type, arg0, TCG_REG_R0, neg);
1919        break;
1920
1921    case TCG_COND_LE:
1922    case TCG_COND_LEU:
1923        inv = true;
1924        /* fall through */
1925    case TCG_COND_GT:
1926    case TCG_COND_GTU:
1927        sh = 30; /* CR7 CR_GT */
1928        goto crtest;
1929
1930    case TCG_COND_GE:
1931    case TCG_COND_GEU:
1932        inv = true;
1933        /* fall through */
1934    case TCG_COND_LT:
1935    case TCG_COND_LTU:
1936        sh = 29; /* CR7 CR_LT */
1937        goto crtest;
1938
1939    crtest:
1940        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1941        tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1942        tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
1943        if (neg && inv) {
1944            tcg_out32(s, ADDI | TAI(arg0, arg0, -1));
1945        } else if (neg) {
1946            tcg_out32(s, NEG | RT(arg0) | RA(arg0));
1947        } else if (inv) {
1948            tcg_out_xori32(s, arg0, arg0, 1);
1949        }
1950        break;
1951
1952    default:
1953        g_assert_not_reached();
1954    }
1955}
1956
1957static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
1958                         TCGReg dest, TCGReg arg1, TCGReg arg2)
1959{
1960    tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, false);
1961}
1962
1963static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond,
1964                          TCGReg dest, TCGReg arg1, tcg_target_long arg2)
1965{
1966    tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, false);
1967}
1968
1969static const TCGOutOpSetcond outop_setcond = {
1970    .base.static_constraint = C_O1_I2(r, r, rC),
1971    .out_rrr = tgen_setcond,
1972    .out_rri = tgen_setcondi,
1973};
1974
1975static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond,
1976                            TCGReg dest, TCGReg arg1, TCGReg arg2)
1977{
1978    tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, true);
1979}
1980
1981static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond,
1982                             TCGReg dest, TCGReg arg1, tcg_target_long arg2)
1983{
1984    tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, true);
1985}
1986
1987static const TCGOutOpSetcond outop_negsetcond = {
1988    .base.static_constraint = C_O1_I2(r, r, rC),
1989    .out_rrr = tgen_negsetcond,
1990    .out_rri = tgen_negsetcondi,
1991};
1992
1993void tcg_out_br(TCGContext *s, TCGLabel *l)
1994{
1995    uint32_t insn = B;
1996
1997    if (l->has_value) {
1998        insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
1999    } else {
2000        tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
2001    }
2002    tcg_out32(s, insn);
2003}
2004
2005static void tcg_out_bc(TCGContext *s, TCGCond cond, int bd)
2006{
2007    tcg_out32(s, tcg_to_bc[cond] | bd);
2008}
2009
2010static void tcg_out_bc_lab(TCGContext *s, TCGCond cond, TCGLabel *l)
2011{
2012    int bd = 0;
2013    if (l->has_value) {
2014        bd = reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
2015    } else {
2016        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
2017    }
2018    tcg_out_bc(s, cond, bd);
2019}
2020
2021static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond,
2022                        TCGReg arg1, TCGReg arg2, TCGLabel *l)
2023{
2024    tcg_out_cmp(s, cond, arg1, arg2, false, 0, type);
2025    tcg_out_bc_lab(s, cond, l);
2026}
2027
2028static void tgen_brcondi(TCGContext *s, TCGType type, TCGCond cond,
2029                         TCGReg arg1, tcg_target_long arg2, TCGLabel *l)
2030{
2031    tcg_out_cmp(s, cond, arg1, arg2, true, 0, type);
2032    tcg_out_bc_lab(s, cond, l);
2033}
2034
2035static const TCGOutOpBrcond outop_brcond = {
2036    .base.static_constraint = C_O0_I2(r, rC),
2037    .out_rr = tgen_brcond,
2038    .out_ri = tgen_brcondi,
2039};
2040
2041static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond,
2042                         TCGReg dest, TCGReg c1, TCGArg c2, bool const_c2,
2043                         TCGArg v1, bool const_v1, TCGArg v2, bool const_v2)
2044{
2045    /* If for some reason both inputs are zero, don't produce bad code.  */
2046    if (v1 == 0 && v2 == 0) {
2047        tcg_out_movi(s, type, dest, 0);
2048        return;
2049    }
2050
2051    tcg_out_cmp(s, cond, c1, c2, const_c2, 0, type);
2052
2053    if (have_isel) {
2054        int isel = tcg_to_isel[cond];
2055
2056        /* Swap the V operands if the operation indicates inversion.  */
2057        if (isel & 1) {
2058            int t = v1;
2059            v1 = v2;
2060            v2 = t;
2061            isel &= ~1;
2062        }
2063        /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */
2064        if (v2 == 0) {
2065            tcg_out_movi(s, type, TCG_REG_R0, 0);
2066        }
2067        tcg_out32(s, isel | TAB(dest, v1, v2));
2068    } else {
2069        if (dest == v2) {
2070            cond = tcg_invert_cond(cond);
2071            v2 = v1;
2072        } else if (dest != v1) {
2073            if (v1 == 0) {
2074                tcg_out_movi(s, type, dest, 0);
2075            } else {
2076                tcg_out_mov(s, type, dest, v1);
2077            }
2078        }
2079        /* Branch forward over one insn */
2080        tcg_out_bc(s, cond, 8);
2081        if (v2 == 0) {
2082            tcg_out_movi(s, type, dest, 0);
2083        } else {
2084            tcg_out_mov(s, type, dest, v2);
2085        }
2086    }
2087}
2088
2089static const TCGOutOpMovcond outop_movcond = {
2090    .base.static_constraint = C_O1_I4(r, r, rC, rZ, rZ),
2091    .out = tgen_movcond,
2092};
2093
2094static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
2095                          TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
2096{
2097    if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
2098        tcg_out32(s, opc | RA(a0) | RS(a1));
2099    } else {
2100        tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 0, type);
2101        /* Note that the only other valid constant for a2 is 0.  */
2102        if (have_isel) {
2103            tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
2104            tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
2105        } else if (!const_a2 && a0 == a2) {
2106            tcg_out_bc(s, TCG_COND_EQ, 8);
2107            tcg_out32(s, opc | RA(a0) | RS(a1));
2108        } else {
2109            tcg_out32(s, opc | RA(a0) | RS(a1));
2110            tcg_out_bc(s, TCG_COND_NE, 8);
2111            if (const_a2) {
2112                tcg_out_movi(s, type, a0, 0);
2113            } else {
2114                tcg_out_mov(s, type, a0, a2);
2115            }
2116        }
2117    }
2118}
2119
2120static void tcg_out_cmp2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
2121                         TCGArg bl, bool blconst, TCGArg bh, bool bhconst)
2122{
2123    static const struct { uint8_t bit1, bit2; } bits[] = {
2124        [TCG_COND_LT ] = { CR_LT, CR_LT },
2125        [TCG_COND_LE ] = { CR_LT, CR_GT },
2126        [TCG_COND_GT ] = { CR_GT, CR_GT },
2127        [TCG_COND_GE ] = { CR_GT, CR_LT },
2128        [TCG_COND_LTU] = { CR_LT, CR_LT },
2129        [TCG_COND_LEU] = { CR_LT, CR_GT },
2130        [TCG_COND_GTU] = { CR_GT, CR_GT },
2131        [TCG_COND_GEU] = { CR_GT, CR_LT },
2132    };
2133
2134    TCGCond cond2;
2135    int op, bit1, bit2;
2136
2137    switch (cond) {
2138    case TCG_COND_EQ:
2139        op = CRAND;
2140        goto do_equality;
2141    case TCG_COND_NE:
2142        op = CRNAND;
2143    do_equality:
2144        tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
2145        tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
2146        tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
2147        break;
2148
2149    case TCG_COND_TSTEQ:
2150    case TCG_COND_TSTNE:
2151        if (blconst) {
2152            tcg_out_andi32(s, TCG_REG_R0, al, bl);
2153        } else {
2154            tcg_out32(s, AND | SAB(al, TCG_REG_R0, bl));
2155        }
2156        if (bhconst) {
2157            tcg_out_andi32(s, TCG_REG_TMP1, ah, bh);
2158        } else {
2159            tcg_out32(s, AND | SAB(ah, TCG_REG_TMP1, bh));
2160        }
2161        tcg_out32(s, OR | SAB(TCG_REG_R0, TCG_REG_R0, TCG_REG_TMP1) | 1);
2162        break;
2163
2164    case TCG_COND_LT:
2165    case TCG_COND_LE:
2166    case TCG_COND_GT:
2167    case TCG_COND_GE:
2168    case TCG_COND_LTU:
2169    case TCG_COND_LEU:
2170    case TCG_COND_GTU:
2171    case TCG_COND_GEU:
2172        bit1 = bits[cond].bit1;
2173        bit2 = bits[cond].bit2;
2174        op = (bit1 != bit2 ? CRANDC : CRAND);
2175        cond2 = tcg_unsigned_cond(cond);
2176
2177        tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
2178        tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
2179        tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
2180        tcg_out32(s, CROR | BT(0, CR_EQ) | BA(6, bit1) | BB(0, CR_EQ));
2181        break;
2182
2183    default:
2184        g_assert_not_reached();
2185    }
2186}
2187
2188static void tgen_setcond2(TCGContext *s, TCGCond cond, TCGReg ret,
2189                          TCGReg al, TCGReg ah,
2190                          TCGArg bl, bool const_bl,
2191                          TCGArg bh, bool const_bh)
2192{
2193    tcg_out_cmp2(s, cond, al, ah, bl, const_bl, bh, const_bh);
2194    tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(0));
2195    tcg_out_rlw(s, RLWINM, ret, TCG_REG_R0, CR_EQ + 0*4 + 1, 31, 31);
2196}
2197
2198#if TCG_TARGET_REG_BITS != 32
2199__attribute__((unused))
2200#endif
2201static const TCGOutOpSetcond2 outop_setcond2 = {
2202    .base.static_constraint = C_O1_I4(r, r, r, rU, rC),
2203    .out = tgen_setcond2,
2204};
2205
2206static void tgen_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
2207                         TCGArg bl, bool const_bl,
2208                         TCGArg bh, bool const_bh, TCGLabel *l)
2209{
2210    assert(TCG_TARGET_REG_BITS == 32);
2211    tcg_out_cmp2(s, cond, al, ah, bl, const_bl, bh, const_bh);
2212    tcg_out_bc_lab(s, TCG_COND_EQ, l);
2213}
2214
2215#if TCG_TARGET_REG_BITS != 32
2216__attribute__((unused))
2217#endif
2218static const TCGOutOpBrcond2 outop_brcond2 = {
2219    .base.static_constraint = C_O0_I4(r, r, rU, rC),
2220    .out = tgen_brcond2,
2221};
2222
2223static void tcg_out_mb(TCGContext *s, unsigned a0)
2224{
2225    uint32_t insn;
2226
2227    if (a0 & TCG_MO_ST_LD) {
2228        insn = HWSYNC;
2229    } else {
2230        insn = LWSYNC;
2231    }
2232
2233    tcg_out32(s, insn);
2234}
2235
2236static void tcg_out_call_int(TCGContext *s, int lk,
2237                             const tcg_insn_unit *target)
2238{
2239#ifdef _CALL_AIX
2240    /* Look through the descriptor.  If the branch is in range, and we
2241       don't have to spend too much effort on building the toc.  */
2242    const void *tgt = ((const void * const *)target)[0];
2243    uintptr_t toc = ((const uintptr_t *)target)[1];
2244    intptr_t diff = tcg_pcrel_diff(s, tgt);
2245
2246    if (in_range_b(diff) && toc == (uint32_t)toc) {
2247        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
2248        tcg_out_b(s, lk, tgt);
2249    } else {
2250        /* Fold the low bits of the constant into the addresses below.  */
2251        intptr_t arg = (intptr_t)target;
2252        int ofs = (int16_t)arg;
2253
2254        if (ofs + 8 < 0x8000) {
2255            arg -= ofs;
2256        } else {
2257            ofs = 0;
2258        }
2259        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
2260        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
2261        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
2262        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
2263        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
2264    }
2265#elif defined(_CALL_ELF) && _CALL_ELF == 2
2266    intptr_t diff;
2267
2268    /* In the ELFv2 ABI, we have to set up r12 to contain the destination
2269       address, which the callee uses to compute its TOC address.  */
2270    /* FIXME: when the branch is in range, we could avoid r12 load if we
2271       knew that the destination uses the same TOC, and what its local
2272       entry point offset is.  */
2273    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
2274
2275    diff = tcg_pcrel_diff(s, target);
2276    if (in_range_b(diff)) {
2277        tcg_out_b(s, lk, target);
2278    } else {
2279        tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
2280        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
2281    }
2282#else
2283    tcg_out_b(s, lk, target);
2284#endif
2285}
2286
2287static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
2288                         const TCGHelperInfo *info)
2289{
2290    tcg_out_call_int(s, LK, target);
2291}
2292
2293static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = {
2294    [MO_UB] = LBZX,
2295    [MO_UW] = LHZX,
2296    [MO_UL] = LWZX,
2297    [MO_UQ] = LDX,
2298    [MO_SW] = LHAX,
2299    [MO_SL] = LWAX,
2300    [MO_BSWAP | MO_UB] = LBZX,
2301    [MO_BSWAP | MO_UW] = LHBRX,
2302    [MO_BSWAP | MO_UL] = LWBRX,
2303    [MO_BSWAP | MO_UQ] = LDBRX,
2304};
2305
2306static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
2307    [MO_UB] = STBX,
2308    [MO_UW] = STHX,
2309    [MO_UL] = STWX,
2310    [MO_UQ] = STDX,
2311    [MO_BSWAP | MO_UB] = STBX,
2312    [MO_BSWAP | MO_UW] = STHBRX,
2313    [MO_BSWAP | MO_UL] = STWBRX,
2314    [MO_BSWAP | MO_UQ] = STDBRX,
2315};
2316
2317static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
2318{
2319    if (arg < 0) {
2320        arg = TCG_REG_TMP1;
2321    }
2322    tcg_out32(s, MFSPR | RT(arg) | LR);
2323    return arg;
2324}
2325
2326/*
2327 * For the purposes of ppc32 sorting 4 input registers into 4 argument
2328 * registers, there is an outside chance we would require 3 temps.
2329 */
2330static const TCGLdstHelperParam ldst_helper_param = {
2331    .ra_gen = ldst_ra_gen,
2332    .ntmp = 3,
2333    .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 }
2334};
2335
2336static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2337{
2338    MemOp opc = get_memop(lb->oi);
2339
2340    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2341        return false;
2342    }
2343
2344    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
2345    tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]);
2346    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
2347
2348    tcg_out_b(s, 0, lb->raddr);
2349    return true;
2350}
2351
2352static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2353{
2354    MemOp opc = get_memop(lb->oi);
2355
2356    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2357        return false;
2358    }
2359
2360    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
2361    tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]);
2362
2363    tcg_out_b(s, 0, lb->raddr);
2364    return true;
2365}
2366
2367typedef struct {
2368    TCGReg base;
2369    TCGReg index;
2370    TCGAtomAlign aa;
2371} HostAddress;
2372
2373bool tcg_target_has_memory_bswap(MemOp memop)
2374{
2375    TCGAtomAlign aa;
2376
2377    if ((memop & MO_SIZE) <= MO_64) {
2378        return true;
2379    }
2380
2381    /*
2382     * Reject 16-byte memop with 16-byte atomicity,
2383     * but do allow a pair of 64-bit operations.
2384     */
2385    aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
2386    return aa.atom <= MO_64;
2387}
2388
2389/* We expect to use a 16-bit negative offset from ENV.  */
2390#define MIN_TLB_MASK_TABLE_OFS  -32768
2391
2392/*
2393 * For system-mode, perform the TLB load and compare.
2394 * For user-mode, perform any required alignment tests.
2395 * In both cases, return a TCGLabelQemuLdst structure if the slow path
2396 * is required and fill in @h with the host address for the fast path.
2397 */
2398static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
2399                                           TCGReg addr, MemOpIdx oi, bool is_ld)
2400{
2401    TCGType addr_type = s->addr_type;
2402    TCGLabelQemuLdst *ldst = NULL;
2403    MemOp opc = get_memop(oi);
2404    MemOp a_bits, s_bits;
2405
2406    /*
2407     * Book II, Section 1.4, Single-Copy Atomicity, specifies:
2408     *
2409     * Before 3.0, "An access that is not atomic is performed as a set of
2410     * smaller disjoint atomic accesses. In general, the number and alignment
2411     * of these accesses are implementation-dependent."  Thus MO_ATOM_IFALIGN.
2412     *
2413     * As of 3.0, "the non-atomic access is performed as described in
2414     * the corresponding list", which matches MO_ATOM_SUBALIGN.
2415     */
2416    s_bits = opc & MO_SIZE;
2417    h->aa = atom_and_align_for_opc(s, opc,
2418                                   have_isa_3_00 ? MO_ATOM_SUBALIGN
2419                                                 : MO_ATOM_IFALIGN,
2420                                   s_bits == MO_128);
2421    a_bits = h->aa.align;
2422
2423    if (tcg_use_softmmu) {
2424        int mem_index = get_mmuidx(oi);
2425        int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
2426                            : offsetof(CPUTLBEntry, addr_write);
2427        int fast_off = tlb_mask_table_ofs(s, mem_index);
2428        int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
2429        int table_off = fast_off + offsetof(CPUTLBDescFast, table);
2430
2431        ldst = new_ldst_label(s);
2432        ldst->is_ld = is_ld;
2433        ldst->oi = oi;
2434        ldst->addr_reg = addr;
2435
2436        /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
2437        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off);
2438        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off);
2439
2440        /* Extract the page index, shifted into place for tlb index.  */
2441        if (TCG_TARGET_REG_BITS == 32) {
2442            tcg_out_shri32(s, TCG_REG_R0, addr,
2443                           s->page_bits - CPU_TLB_ENTRY_BITS);
2444        } else {
2445            tcg_out_shri64(s, TCG_REG_R0, addr,
2446                           s->page_bits - CPU_TLB_ENTRY_BITS);
2447        }
2448        tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
2449
2450        /*
2451         * Load the TLB comparator into TMP2.
2452         * For 64-bit host, always load the entire 64-bit slot for simplicity.
2453         * We will ignore the high bits with tcg_out_cmp(..., addr_type).
2454         */
2455        if (cmp_off == 0) {
2456            tcg_out32(s, (TCG_TARGET_REG_BITS == 64 ? LDUX : LWZUX)
2457                      | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2));
2458        } else {
2459            tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
2460            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off);
2461        }
2462
2463        /*
2464         * Load the TLB addend for use on the fast path.
2465         * Do this asap to minimize any load use delay.
2466         */
2467        if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
2468            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
2469                       offsetof(CPUTLBEntry, addend));
2470        }
2471
2472        /* Clear the non-page, non-alignment bits from the address in R0. */
2473        if (TCG_TARGET_REG_BITS == 32) {
2474            /*
2475             * We don't support unaligned accesses on 32-bits.
2476             * Preserve the bottom bits and thus trigger a comparison
2477             * failure on unaligned accesses.
2478             */
2479            if (a_bits < s_bits) {
2480                a_bits = s_bits;
2481            }
2482            tcg_out_rlw(s, RLWINM, TCG_REG_R0, addr, 0,
2483                        (32 - a_bits) & 31, 31 - s->page_bits);
2484        } else {
2485            TCGReg t = addr;
2486
2487            /*
2488             * If the access is unaligned, we need to make sure we fail if we
2489             * cross a page boundary.  The trick is to add the access size-1
2490             * to the address before masking the low bits.  That will make the
2491             * address overflow to the next page if we cross a page boundary,
2492             * which will then force a mismatch of the TLB compare.
2493             */
2494            if (a_bits < s_bits) {
2495                unsigned a_mask = (1 << a_bits) - 1;
2496                unsigned s_mask = (1 << s_bits) - 1;
2497                tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
2498                t = TCG_REG_R0;
2499            }
2500
2501            /* Mask the address for the requested alignment.  */
2502            if (addr_type == TCG_TYPE_I32) {
2503                tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
2504                            (32 - a_bits) & 31, 31 - s->page_bits);
2505            } else if (a_bits == 0) {
2506                tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits);
2507            } else {
2508                tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
2509                            64 - s->page_bits, s->page_bits - a_bits);
2510                tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0);
2511            }
2512        }
2513
2514        /* Full comparison into cr0. */
2515        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 0, 0, addr_type);
2516
2517        /* Load a pointer into the current opcode w/conditional branch-link. */
2518        ldst->label_ptr[0] = s->code_ptr;
2519        tcg_out_bc(s, TCG_COND_NE, LK);
2520
2521        h->base = TCG_REG_TMP1;
2522    } else {
2523        if (a_bits) {
2524            ldst = new_ldst_label(s);
2525            ldst->is_ld = is_ld;
2526            ldst->oi = oi;
2527            ldst->addr_reg = addr;
2528
2529            /* We are expecting a_bits to max out at 7, much lower than ANDI. */
2530            tcg_debug_assert(a_bits < 16);
2531            tcg_out32(s, ANDI | SAI(addr, TCG_REG_R0, (1 << a_bits) - 1));
2532
2533            ldst->label_ptr[0] = s->code_ptr;
2534            tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
2535        }
2536
2537        h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
2538    }
2539
2540    if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
2541        /* Zero-extend the guest address for use in the host address. */
2542        tcg_out_ext32u(s, TCG_REG_TMP2, addr);
2543        h->index = TCG_REG_TMP2;
2544    } else {
2545        h->index = addr;
2546    }
2547
2548    return ldst;
2549}
2550
2551static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
2552                            TCGReg addr, MemOpIdx oi, TCGType data_type)
2553{
2554    MemOp opc = get_memop(oi);
2555    TCGLabelQemuLdst *ldst;
2556    HostAddress h;
2557
2558    ldst = prepare_host_addr(s, &h, addr, oi, true);
2559
2560    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2561        if (opc & MO_BSWAP) {
2562            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2563            tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
2564            tcg_out32(s, LWBRX | TAB(datahi, h.base, TCG_REG_R0));
2565        } else if (h.base != 0) {
2566            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2567            tcg_out32(s, LWZX | TAB(datahi, h.base, h.index));
2568            tcg_out32(s, LWZX | TAB(datalo, h.base, TCG_REG_R0));
2569        } else if (h.index == datahi) {
2570            tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
2571            tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
2572        } else {
2573            tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
2574            tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
2575        }
2576    } else {
2577        uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
2578        if (!have_isa_2_06 && insn == LDBRX) {
2579            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2580            tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
2581            tcg_out32(s, LWBRX | TAB(TCG_REG_R0, h.base, TCG_REG_R0));
2582            tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
2583        } else if (insn) {
2584            tcg_out32(s, insn | TAB(datalo, h.base, h.index));
2585        } else {
2586            insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
2587            tcg_out32(s, insn | TAB(datalo, h.base, h.index));
2588            tcg_out_movext(s, TCG_TYPE_REG, datalo,
2589                           TCG_TYPE_REG, opc & MO_SSIZE, datalo);
2590        }
2591    }
2592
2593    if (ldst) {
2594        ldst->type = data_type;
2595        ldst->datalo_reg = datalo;
2596        ldst->datahi_reg = datahi;
2597        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2598    }
2599}
2600
2601static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
2602                            TCGReg addr, MemOpIdx oi, TCGType data_type)
2603{
2604    MemOp opc = get_memop(oi);
2605    TCGLabelQemuLdst *ldst;
2606    HostAddress h;
2607
2608    ldst = prepare_host_addr(s, &h, addr, oi, false);
2609
2610    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2611        if (opc & MO_BSWAP) {
2612            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2613            tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
2614            tcg_out32(s, STWBRX | SAB(datahi, h.base, TCG_REG_R0));
2615        } else if (h.base != 0) {
2616            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2617            tcg_out32(s, STWX | SAB(datahi, h.base, h.index));
2618            tcg_out32(s, STWX | SAB(datalo, h.base, TCG_REG_R0));
2619        } else {
2620            tcg_out32(s, STW | TAI(datahi, h.index, 0));
2621            tcg_out32(s, STW | TAI(datalo, h.index, 4));
2622        }
2623    } else {
2624        uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
2625        if (!have_isa_2_06 && insn == STDBRX) {
2626            tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
2627            tcg_out32(s, ADDI | TAI(TCG_REG_TMP2, h.index, 4));
2628            tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
2629            tcg_out32(s, STWBRX | SAB(TCG_REG_R0, h.base, TCG_REG_TMP2));
2630        } else {
2631            tcg_out32(s, insn | SAB(datalo, h.base, h.index));
2632        }
2633    }
2634
2635    if (ldst) {
2636        ldst->type = data_type;
2637        ldst->datalo_reg = datalo;
2638        ldst->datahi_reg = datahi;
2639        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2640    }
2641}
2642
2643static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
2644                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
2645{
2646    TCGLabelQemuLdst *ldst;
2647    HostAddress h;
2648    bool need_bswap;
2649    uint32_t insn;
2650    TCGReg index;
2651
2652    ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
2653
2654    /* Compose the final address, as LQ/STQ have no indexing. */
2655    index = h.index;
2656    if (h.base != 0) {
2657        index = TCG_REG_TMP1;
2658        tcg_out32(s, ADD | TAB(index, h.base, h.index));
2659    }
2660    need_bswap = get_memop(oi) & MO_BSWAP;
2661
2662    if (h.aa.atom == MO_128) {
2663        tcg_debug_assert(!need_bswap);
2664        tcg_debug_assert(datalo & 1);
2665        tcg_debug_assert(datahi == datalo - 1);
2666        tcg_debug_assert(!is_ld || datahi != index);
2667        insn = is_ld ? LQ : STQ;
2668        tcg_out32(s, insn | TAI(datahi, index, 0));
2669    } else {
2670        TCGReg d1, d2;
2671
2672        if (HOST_BIG_ENDIAN ^ need_bswap) {
2673            d1 = datahi, d2 = datalo;
2674        } else {
2675            d1 = datalo, d2 = datahi;
2676        }
2677
2678        if (need_bswap) {
2679            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8);
2680            insn = is_ld ? LDBRX : STDBRX;
2681            tcg_out32(s, insn | TAB(d1, 0, index));
2682            tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0));
2683        } else {
2684            insn = is_ld ? LD : STD;
2685            tcg_out32(s, insn | TAI(d1, index, 0));
2686            tcg_out32(s, insn | TAI(d2, index, 8));
2687        }
2688    }
2689
2690    if (ldst) {
2691        ldst->type = TCG_TYPE_I128;
2692        ldst->datalo_reg = datalo;
2693        ldst->datahi_reg = datahi;
2694        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2695    }
2696}
2697
2698static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2699{
2700    int i;
2701    for (i = 0; i < count; ++i) {
2702        p[i] = NOP;
2703    }
2704}
2705
2706/* Parameters for function call generation, used in tcg.c.  */
2707#define TCG_TARGET_STACK_ALIGN       16
2708
2709#ifdef _CALL_AIX
2710# define LINK_AREA_SIZE                (6 * SZR)
2711# define LR_OFFSET                     (1 * SZR)
2712# define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
2713#elif defined(_CALL_DARWIN)
2714# define LINK_AREA_SIZE                (6 * SZR)
2715# define LR_OFFSET                     (2 * SZR)
2716#elif TCG_TARGET_REG_BITS == 64
2717# if defined(_CALL_ELF) && _CALL_ELF == 2
2718#  define LINK_AREA_SIZE               (4 * SZR)
2719#  define LR_OFFSET                    (1 * SZR)
2720# endif
2721#else /* TCG_TARGET_REG_BITS == 32 */
2722# if defined(_CALL_SYSV)
2723#  define LINK_AREA_SIZE               (2 * SZR)
2724#  define LR_OFFSET                    (1 * SZR)
2725# endif
2726#endif
2727#ifndef LR_OFFSET
2728# error "Unhandled abi"
2729#endif
2730#ifndef TCG_TARGET_CALL_STACK_OFFSET
2731# define TCG_TARGET_CALL_STACK_OFFSET  LINK_AREA_SIZE
2732#endif
2733
2734#define CPU_TEMP_BUF_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2735#define REG_SAVE_SIZE      ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
2736
2737#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET   \
2738                     + TCG_STATIC_CALL_ARGS_SIZE    \
2739                     + CPU_TEMP_BUF_SIZE            \
2740                     + REG_SAVE_SIZE                \
2741                     + TCG_TARGET_STACK_ALIGN - 1)  \
2742                    & -TCG_TARGET_STACK_ALIGN)
2743
2744#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
2745
2746static void tcg_target_qemu_prologue(TCGContext *s)
2747{
2748    int i;
2749
2750#ifdef _CALL_AIX
2751    const void **desc = (const void **)s->code_ptr;
2752    desc[0] = tcg_splitwx_to_rx(desc + 2);  /* entry point */
2753    desc[1] = 0;                            /* environment pointer */
2754    s->code_ptr = (void *)(desc + 2);       /* skip over descriptor */
2755#endif
2756
2757    tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
2758                  CPU_TEMP_BUF_SIZE);
2759
2760    /* Prologue */
2761    tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
2762    tcg_out32(s, (SZR == 8 ? STDU : STWU)
2763              | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
2764
2765    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2766        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2767                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2768    }
2769    tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2770
2771    if (!tcg_use_softmmu && guest_base) {
2772        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
2773        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2774    }
2775
2776    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2777    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
2778    tcg_out32(s, BCCTR | BO_ALWAYS);
2779
2780    /* Epilogue */
2781    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2782
2783    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2784    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2785        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2786                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2787    }
2788    tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
2789    tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
2790    tcg_out32(s, BCLR | BO_ALWAYS);
2791}
2792
2793static void tcg_out_tb_start(TCGContext *s)
2794{
2795    /* Load TCG_REG_TB. */
2796    if (USE_REG_TB) {
2797        if (have_isa_3_00) {
2798            /* lnia REG_TB */
2799            tcg_out_addpcis(s, TCG_REG_TB, 0);
2800        } else {
2801            /* bcl 20,31,$+4 (preferred form for getting nia) */
2802            tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK);
2803            tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR);
2804        }
2805    }
2806}
2807
2808static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
2809{
2810    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg);
2811    tcg_out_b(s, 0, tcg_code_gen_epilogue);
2812}
2813
2814static void tcg_out_goto_tb(TCGContext *s, int which)
2815{
2816    uintptr_t ptr = get_jmp_target_addr(s, which);
2817    int16_t lo;
2818
2819    /* Direct branch will be patched by tb_target_set_jmp_target. */
2820    set_jmp_insn_offset(s, which);
2821    tcg_out32(s, NOP);
2822
2823    /* When branch is out of range, fall through to indirect. */
2824    if (USE_REG_TB) {
2825        ptrdiff_t offset = ppc_tbrel_diff(s, (void *)ptr);
2826        tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset);
2827    } else if (have_isa_3_10) {
2828        ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr);
2829        tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1);
2830    } else if (have_isa_3_00) {
2831        ptrdiff_t offset = tcg_pcrel_diff(s, (void *)ptr) - 4;
2832        lo = offset;
2833        tcg_out_addpcis(s, TCG_REG_TMP1, offset - lo);
2834        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo);
2835    } else {
2836        lo = ptr;
2837        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - lo);
2838        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo);
2839    }
2840
2841    tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
2842    tcg_out32(s, BCCTR | BO_ALWAYS);
2843    set_jmp_reset_offset(s, which);
2844}
2845
2846static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0)
2847{
2848    tcg_out32(s, MTSPR | RS(a0) | CTR);
2849    tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
2850    tcg_out32(s, BCCTR | BO_ALWAYS);
2851}
2852
2853void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
2854                              uintptr_t jmp_rx, uintptr_t jmp_rw)
2855{
2856    uintptr_t addr = tb->jmp_target_addr[n];
2857    intptr_t diff = addr - jmp_rx;
2858    tcg_insn_unit insn;
2859
2860    if (in_range_b(diff)) {
2861        insn = B | (diff & 0x3fffffc);
2862    } else {
2863        insn = NOP;
2864    }
2865
2866    qatomic_set((uint32_t *)jmp_rw, insn);
2867    flush_idcache_range(jmp_rx, jmp_rw, 4);
2868}
2869
2870
2871static void tgen_add(TCGContext *s, TCGType type,
2872                     TCGReg a0, TCGReg a1, TCGReg a2)
2873{
2874    tcg_out32(s, ADD | TAB(a0, a1, a2));
2875}
2876
2877static void tgen_addi(TCGContext *s, TCGType type,
2878                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2879{
2880    tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
2881}
2882
2883static const TCGOutOpBinary outop_add = {
2884    .base.static_constraint = C_O1_I2(r, r, rT),
2885    .out_rrr = tgen_add,
2886    .out_rri = tgen_addi,
2887};
2888
2889static void tgen_addco_rrr(TCGContext *s, TCGType type,
2890                           TCGReg a0, TCGReg a1, TCGReg a2)
2891{
2892    tcg_out32(s, ADDC | TAB(a0, a1, a2));
2893}
2894
2895static void tgen_addco_rri(TCGContext *s, TCGType type,
2896                           TCGReg a0, TCGReg a1, tcg_target_long a2)
2897{
2898    tcg_out32(s, ADDIC | TAI(a0, a1, a2));
2899}
2900
2901static TCGConstraintSetIndex cset_addco(TCGType type, unsigned flags)
2902{
2903    /*
2904     * Note that the CA bit is defined based on the word size of the
2905     * environment.  So in 64-bit mode it's always carry-out of bit 63.
2906     * The fallback code using deposit works just as well for TCG_TYPE_I32.
2907     */
2908    return type == TCG_TYPE_REG ? C_O1_I2(r, r, rI) : C_NotImplemented;
2909}
2910
2911static const TCGOutOpBinary outop_addco = {
2912    .base.static_constraint = C_Dynamic,
2913    .base.dynamic_constraint = cset_addco,
2914    .out_rrr = tgen_addco_rrr,
2915    .out_rri = tgen_addco_rri,
2916};
2917
2918static void tgen_addcio_rrr(TCGContext *s, TCGType type,
2919                            TCGReg a0, TCGReg a1, TCGReg a2)
2920{
2921    tcg_out32(s, ADDE | TAB(a0, a1, a2));
2922}
2923
2924static void tgen_addcio_rri(TCGContext *s, TCGType type,
2925                            TCGReg a0, TCGReg a1, tcg_target_long a2)
2926{
2927    tcg_out32(s, (a2 ? ADDME : ADDZE) | RT(a0) | RA(a1));
2928}
2929
2930static TCGConstraintSetIndex cset_addcio(TCGType type, unsigned flags)
2931{
2932    return type == TCG_TYPE_REG ? C_O1_I2(r, r, rZM) : C_NotImplemented;
2933}
2934
2935static const TCGOutOpBinary outop_addcio = {
2936    .base.static_constraint = C_Dynamic,
2937    .base.dynamic_constraint = cset_addcio,
2938    .out_rrr = tgen_addcio_rrr,
2939    .out_rri = tgen_addcio_rri,
2940};
2941
2942static const TCGOutOpAddSubCarry outop_addci = {
2943    .base.static_constraint = C_Dynamic,
2944    .base.dynamic_constraint = cset_addcio,
2945    .out_rrr = tgen_addcio_rrr,
2946    .out_rri = tgen_addcio_rri,
2947};
2948
2949static void tcg_out_set_carry(TCGContext *s)
2950{
2951    tcg_out32(s, SUBFC | TAB(TCG_REG_R0, TCG_REG_R0, TCG_REG_R0));
2952}
2953
2954static void tgen_and(TCGContext *s, TCGType type,
2955                     TCGReg a0, TCGReg a1, TCGReg a2)
2956{
2957    tcg_out32(s, AND | SAB(a1, a0, a2));
2958}
2959
2960static void tgen_andi(TCGContext *s, TCGType type,
2961                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2962{
2963    if (type == TCG_TYPE_I32) {
2964        tcg_out_andi32(s, a0, a1, a2);
2965    } else {
2966        tcg_out_andi64(s, a0, a1, a2);
2967    }
2968}
2969
2970static const TCGOutOpBinary outop_and = {
2971    .base.static_constraint = C_O1_I2(r, r, ri),
2972    .out_rrr = tgen_and,
2973    .out_rri = tgen_andi,
2974};
2975
2976static void tgen_andc(TCGContext *s, TCGType type,
2977                      TCGReg a0, TCGReg a1, TCGReg a2)
2978{
2979    tcg_out32(s, ANDC | SAB(a1, a0, a2));
2980}
2981
2982static const TCGOutOpBinary outop_andc = {
2983    .base.static_constraint = C_O1_I2(r, r, r),
2984    .out_rrr = tgen_andc,
2985};
2986
2987static void tgen_clz(TCGContext *s, TCGType type,
2988                     TCGReg a0, TCGReg a1, TCGReg a2)
2989{
2990    uint32_t insn = type == TCG_TYPE_I32 ? CNTLZW : CNTLZD;
2991    tcg_out_cntxz(s, type, insn, a0, a1, a2, false);
2992}
2993
2994static void tgen_clzi(TCGContext *s, TCGType type,
2995                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2996{
2997    uint32_t insn = type == TCG_TYPE_I32 ? CNTLZW : CNTLZD;
2998    tcg_out_cntxz(s, type, insn, a0, a1, a2, true);
2999}
3000
3001static const TCGOutOpBinary outop_clz = {
3002    .base.static_constraint = C_O1_I2(r, r, rZW),
3003    .out_rrr = tgen_clz,
3004    .out_rri = tgen_clzi,
3005};
3006
3007static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
3008{
3009    uint32_t insn = type == TCG_TYPE_I32 ? CNTPOPW : CNTPOPD;
3010    tcg_out32(s, insn | SAB(a1, a0, 0));
3011}
3012
3013static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags)
3014{
3015    return have_isa_2_06 ? C_O1_I1(r, r) : C_NotImplemented;
3016}
3017
3018static const TCGOutOpUnary outop_ctpop = {
3019    .base.static_constraint = C_Dynamic,
3020    .base.dynamic_constraint = cset_ctpop,
3021    .out_rr = tgen_ctpop,
3022};
3023
3024static void tgen_ctz(TCGContext *s, TCGType type,
3025                     TCGReg a0, TCGReg a1, TCGReg a2)
3026{
3027    uint32_t insn = type == TCG_TYPE_I32 ? CNTTZW : CNTTZD;
3028    tcg_out_cntxz(s, type, insn, a0, a1, a2, false);
3029}
3030
3031static void tgen_ctzi(TCGContext *s, TCGType type,
3032                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3033{
3034    uint32_t insn = type == TCG_TYPE_I32 ? CNTTZW : CNTTZD;
3035    tcg_out_cntxz(s, type, insn, a0, a1, a2, true);
3036}
3037
3038static TCGConstraintSetIndex cset_ctz(TCGType type, unsigned flags)
3039{
3040    return have_isa_3_00 ? C_O1_I2(r, r, rZW) : C_NotImplemented;
3041}
3042
3043static const TCGOutOpBinary outop_ctz = {
3044    .base.static_constraint = C_Dynamic,
3045    .base.dynamic_constraint = cset_ctz,
3046    .out_rrr = tgen_ctz,
3047    .out_rri = tgen_ctzi,
3048};
3049
3050static void tgen_eqv(TCGContext *s, TCGType type,
3051                     TCGReg a0, TCGReg a1, TCGReg a2)
3052{
3053    tcg_out32(s, EQV | SAB(a1, a0, a2));
3054}
3055
3056#if TCG_TARGET_REG_BITS == 64
3057static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
3058{
3059    tcg_out_shri64(s, a0, a1, 32);
3060}
3061
3062static const TCGOutOpUnary outop_extrh_i64_i32 = {
3063    .base.static_constraint = C_O1_I1(r, r),
3064    .out_rr = tgen_extrh_i64_i32,
3065};
3066#endif
3067
3068static void tgen_divs(TCGContext *s, TCGType type,
3069                      TCGReg a0, TCGReg a1, TCGReg a2)
3070{
3071    uint32_t insn = type == TCG_TYPE_I32 ? DIVW : DIVD;
3072    tcg_out32(s, insn | TAB(a0, a1, a2));
3073}
3074
3075static const TCGOutOpBinary outop_divs = {
3076    .base.static_constraint = C_O1_I2(r, r, r),
3077    .out_rrr = tgen_divs,
3078};
3079
3080static const TCGOutOpDivRem outop_divs2 = {
3081    .base.static_constraint = C_NotImplemented,
3082};
3083
3084static void tgen_divu(TCGContext *s, TCGType type,
3085                      TCGReg a0, TCGReg a1, TCGReg a2)
3086{
3087    uint32_t insn = type == TCG_TYPE_I32 ? DIVWU : DIVDU;
3088    tcg_out32(s, insn | TAB(a0, a1, a2));
3089}
3090
3091static const TCGOutOpBinary outop_divu = {
3092    .base.static_constraint = C_O1_I2(r, r, r),
3093    .out_rrr = tgen_divu,
3094};
3095
3096static const TCGOutOpDivRem outop_divu2 = {
3097    .base.static_constraint = C_NotImplemented,
3098};
3099
3100static const TCGOutOpBinary outop_eqv = {
3101    .base.static_constraint = C_O1_I2(r, r, r),
3102    .out_rrr = tgen_eqv,
3103};
3104
3105static void tgen_mul(TCGContext *s, TCGType type,
3106                    TCGReg a0, TCGReg a1, TCGReg a2)
3107{
3108    uint32_t insn = type == TCG_TYPE_I32 ? MULLW : MULLD;
3109    tcg_out32(s, insn | TAB(a0, a1, a2));
3110}
3111
3112static void tgen_muli(TCGContext *s, TCGType type,
3113                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3114{
3115    tcg_out32(s, MULLI | TAI(a0, a1, a2));
3116}
3117
3118static const TCGOutOpBinary outop_mul = {
3119    .base.static_constraint = C_O1_I2(r, r, rI),
3120    .out_rrr = tgen_mul,
3121    .out_rri = tgen_muli,
3122};
3123
3124static const TCGOutOpMul2 outop_muls2 = {
3125    .base.static_constraint = C_NotImplemented,
3126};
3127
3128static void tgen_mulsh(TCGContext *s, TCGType type,
3129                       TCGReg a0, TCGReg a1, TCGReg a2)
3130{
3131    uint32_t insn = type == TCG_TYPE_I32 ? MULHW : MULHD;
3132    tcg_out32(s, insn | TAB(a0, a1, a2));
3133}
3134
3135static const TCGOutOpBinary outop_mulsh = {
3136    .base.static_constraint = C_O1_I2(r, r, r),
3137    .out_rrr = tgen_mulsh,
3138};
3139
3140static const TCGOutOpMul2 outop_mulu2 = {
3141    .base.static_constraint = C_NotImplemented,
3142};
3143
3144static void tgen_muluh(TCGContext *s, TCGType type,
3145                       TCGReg a0, TCGReg a1, TCGReg a2)
3146{
3147    uint32_t insn = type == TCG_TYPE_I32 ? MULHWU : MULHDU;
3148    tcg_out32(s, insn | TAB(a0, a1, a2));
3149}
3150
3151static const TCGOutOpBinary outop_muluh = {
3152    .base.static_constraint = C_O1_I2(r, r, r),
3153    .out_rrr = tgen_muluh,
3154};
3155
3156static void tgen_nand(TCGContext *s, TCGType type,
3157                     TCGReg a0, TCGReg a1, TCGReg a2)
3158{
3159    tcg_out32(s, NAND | SAB(a1, a0, a2));
3160}
3161
3162static const TCGOutOpBinary outop_nand = {
3163    .base.static_constraint = C_O1_I2(r, r, r),
3164    .out_rrr = tgen_nand,
3165};
3166
3167static void tgen_nor(TCGContext *s, TCGType type,
3168                     TCGReg a0, TCGReg a1, TCGReg a2)
3169{
3170    tcg_out32(s, NOR | SAB(a1, a0, a2));
3171}
3172
3173static const TCGOutOpBinary outop_nor = {
3174    .base.static_constraint = C_O1_I2(r, r, r),
3175    .out_rrr = tgen_nor,
3176};
3177
3178static void tgen_or(TCGContext *s, TCGType type,
3179                    TCGReg a0, TCGReg a1, TCGReg a2)
3180{
3181    tcg_out32(s, OR | SAB(a1, a0, a2));
3182}
3183
3184static void tgen_ori(TCGContext *s, TCGType type,
3185                     TCGReg a0, TCGReg a1, tcg_target_long a2)
3186{
3187    tcg_out_ori32(s, a0, a1, a2);
3188}
3189
3190static const TCGOutOpBinary outop_or = {
3191    .base.static_constraint = C_O1_I2(r, r, rU),
3192    .out_rrr = tgen_or,
3193    .out_rri = tgen_ori,
3194};
3195
3196static void tgen_orc(TCGContext *s, TCGType type,
3197                     TCGReg a0, TCGReg a1, TCGReg a2)
3198{
3199    tcg_out32(s, ORC | SAB(a1, a0, a2));
3200}
3201
3202static const TCGOutOpBinary outop_orc = {
3203    .base.static_constraint = C_O1_I2(r, r, r),
3204    .out_rrr = tgen_orc,
3205};
3206
3207static TCGConstraintSetIndex cset_mod(TCGType type, unsigned flags)
3208{
3209    return have_isa_3_00 ? C_O1_I2(r, r, r) : C_NotImplemented;
3210}
3211
3212static void tgen_rems(TCGContext *s, TCGType type,
3213                      TCGReg a0, TCGReg a1, TCGReg a2)
3214{
3215    uint32_t insn = type == TCG_TYPE_I32 ? MODSW : MODSD;
3216    tcg_out32(s, insn | TAB(a0, a1, a2));
3217}
3218
3219static const TCGOutOpBinary outop_rems = {
3220    .base.static_constraint = C_Dynamic,
3221    .base.dynamic_constraint = cset_mod,
3222    .out_rrr = tgen_rems,
3223};
3224
3225static void tgen_remu(TCGContext *s, TCGType type,
3226                      TCGReg a0, TCGReg a1, TCGReg a2)
3227{
3228    uint32_t insn = type == TCG_TYPE_I32 ? MODUW : MODUD;
3229    tcg_out32(s, insn | TAB(a0, a1, a2));
3230}
3231
3232static const TCGOutOpBinary outop_remu = {
3233    .base.static_constraint = C_Dynamic,
3234    .base.dynamic_constraint = cset_mod,
3235    .out_rrr = tgen_remu,
3236};
3237
3238static void tgen_rotl(TCGContext *s, TCGType type,
3239                     TCGReg a0, TCGReg a1, TCGReg a2)
3240{
3241    if (type == TCG_TYPE_I32) {
3242        tcg_out32(s, RLWNM | SAB(a1, a0, a2) | MB(0) | ME(31));
3243    } else {
3244        tcg_out32(s, RLDCL | SAB(a1, a0, a2) | MB64(0));
3245    }
3246}
3247
3248static void tgen_rotli(TCGContext *s, TCGType type,
3249                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3250{
3251    if (type == TCG_TYPE_I32) {
3252        tcg_out_rlw(s, RLWINM, a0, a1, a2, 0, 31);
3253    } else {
3254        tcg_out_rld(s, RLDICL, a0, a1, a2, 0);
3255    }
3256}
3257
3258static const TCGOutOpBinary outop_rotl = {
3259    .base.static_constraint = C_O1_I2(r, r, ri),
3260    .out_rrr = tgen_rotl,
3261    .out_rri = tgen_rotli,
3262};
3263
3264static const TCGOutOpBinary outop_rotr = {
3265    .base.static_constraint = C_NotImplemented,
3266};
3267
3268static void tgen_sar(TCGContext *s, TCGType type,
3269                     TCGReg a0, TCGReg a1, TCGReg a2)
3270{
3271    uint32_t insn = type == TCG_TYPE_I32 ? SRAW : SRAD;
3272    tcg_out32(s, insn | SAB(a1, a0, a2));
3273}
3274
3275static void tgen_sari(TCGContext *s, TCGType type,
3276                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3277{
3278    /* Limit immediate shift count lest we create an illegal insn.  */
3279    if (type == TCG_TYPE_I32) {
3280        tcg_out_sari32(s, a0, a1, a2 & 31);
3281    } else {
3282        tcg_out_sari64(s, a0, a1, a2 & 63);
3283    }
3284}
3285
3286static const TCGOutOpBinary outop_sar = {
3287    .base.static_constraint = C_O1_I2(r, r, ri),
3288    .out_rrr = tgen_sar,
3289    .out_rri = tgen_sari,
3290};
3291
3292static void tgen_shl(TCGContext *s, TCGType type,
3293                     TCGReg a0, TCGReg a1, TCGReg a2)
3294{
3295    uint32_t insn = type == TCG_TYPE_I32 ? SLW : SLD;
3296    tcg_out32(s, insn | SAB(a1, a0, a2));
3297}
3298
3299static void tgen_shli(TCGContext *s, TCGType type,
3300                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3301{
3302    /* Limit immediate shift count lest we create an illegal insn.  */
3303    if (type == TCG_TYPE_I32) {
3304        tcg_out_shli32(s, a0, a1, a2 & 31);
3305    } else {
3306        tcg_out_shli64(s, a0, a1, a2 & 63);
3307    }
3308}
3309
3310static const TCGOutOpBinary outop_shl = {
3311    .base.static_constraint = C_O1_I2(r, r, ri),
3312    .out_rrr = tgen_shl,
3313    .out_rri = tgen_shli,
3314};
3315
3316static void tgen_shr(TCGContext *s, TCGType type,
3317                     TCGReg a0, TCGReg a1, TCGReg a2)
3318{
3319    uint32_t insn = type == TCG_TYPE_I32 ? SRW : SRD;
3320    tcg_out32(s, insn | SAB(a1, a0, a2));
3321}
3322
3323static void tgen_shri(TCGContext *s, TCGType type,
3324                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3325{
3326    /* Limit immediate shift count lest we create an illegal insn.  */
3327    if (type == TCG_TYPE_I32) {
3328        tcg_out_shri32(s, a0, a1, a2 & 31);
3329    } else {
3330        tcg_out_shri64(s, a0, a1, a2 & 63);
3331    }
3332}
3333
3334static const TCGOutOpBinary outop_shr = {
3335    .base.static_constraint = C_O1_I2(r, r, ri),
3336    .out_rrr = tgen_shr,
3337    .out_rri = tgen_shri,
3338};
3339
3340static void tgen_sub(TCGContext *s, TCGType type,
3341                     TCGReg a0, TCGReg a1, TCGReg a2)
3342{
3343    tcg_out32(s, SUBF | TAB(a0, a2, a1));
3344}
3345
3346static void tgen_subfi(TCGContext *s, TCGType type,
3347                       TCGReg a0, tcg_target_long a1, TCGReg a2)
3348{
3349    tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
3350}
3351
3352static const TCGOutOpSubtract outop_sub = {
3353    .base.static_constraint = C_O1_I2(r, rI, r),
3354    .out_rrr = tgen_sub,
3355    .out_rir = tgen_subfi,
3356};
3357
3358static void tgen_subbo_rrr(TCGContext *s, TCGType type,
3359                           TCGReg a0, TCGReg a1, TCGReg a2)
3360{
3361    tcg_out32(s, SUBFC | TAB(a0, a2, a1));
3362}
3363
3364static void tgen_subbo_rri(TCGContext *s, TCGType type,
3365                           TCGReg a0, TCGReg a1, tcg_target_long a2)
3366{
3367    if (a2 == 0) {
3368        tcg_out_movi(s, type, TCG_REG_R0, 0);
3369        tgen_subbo_rrr(s, type, a0, a1, TCG_REG_R0);
3370    } else {
3371        tgen_addco_rri(s, type, a0, a1, -a2);
3372    }
3373}
3374
3375/* The underlying insn for subfi is subfic. */
3376#define tgen_subbo_rir  tgen_subfi
3377
3378static void tgen_subbo_rii(TCGContext *s, TCGType type,
3379                           TCGReg a0, tcg_target_long a1, tcg_target_long a2)
3380{
3381    tcg_out_movi(s, type, TCG_REG_R0, a2);
3382    tgen_subbo_rir(s, type, a0, a1, TCG_REG_R0);
3383}
3384
3385static TCGConstraintSetIndex cset_subbo(TCGType type, unsigned flags)
3386{
3387    /* Recall that the CA bit is defined based on the host word size. */
3388    return type == TCG_TYPE_REG ? C_O1_I2(r, rI, rN) : C_NotImplemented;
3389}
3390
3391static const TCGOutOpAddSubCarry outop_subbo = {
3392    .base.static_constraint = C_Dynamic,
3393    .base.dynamic_constraint = cset_subbo,
3394    .out_rrr = tgen_subbo_rrr,
3395    .out_rri = tgen_subbo_rri,
3396    .out_rir = tgen_subbo_rir,
3397    .out_rii = tgen_subbo_rii,
3398};
3399
3400static void tgen_subbio_rrr(TCGContext *s, TCGType type,
3401                            TCGReg a0, TCGReg a1, TCGReg a2)
3402{
3403    tcg_out32(s, SUBFE | TAB(a0, a2, a1));
3404}
3405
3406static void tgen_subbio_rri(TCGContext *s, TCGType type,
3407                            TCGReg a0, TCGReg a1, tcg_target_long a2)
3408{
3409    tgen_addcio_rri(s, type, a0, a1, ~a2);
3410}
3411
3412static void tgen_subbio_rir(TCGContext *s, TCGType type,
3413                            TCGReg a0, tcg_target_long a1, TCGReg a2)
3414{
3415    tcg_debug_assert(a1 == 0 || a1 == -1);
3416    tcg_out32(s, (a1 ? SUBFME : SUBFZE) | RT(a0) | RA(a2));
3417}
3418
3419static void tgen_subbio_rii(TCGContext *s, TCGType type,
3420                            TCGReg a0, tcg_target_long a1, tcg_target_long a2)
3421{
3422    tcg_out_movi(s, type, TCG_REG_R0, a2);
3423    tgen_subbio_rir(s, type, a0, a1, TCG_REG_R0);
3424}
3425
3426static TCGConstraintSetIndex cset_subbio(TCGType type, unsigned flags)
3427{
3428    return type == TCG_TYPE_REG ? C_O1_I2(r, rZM, rZM) : C_NotImplemented;
3429}
3430
3431static const TCGOutOpAddSubCarry outop_subbio = {
3432    .base.static_constraint = C_Dynamic,
3433    .base.dynamic_constraint = cset_subbio,
3434    .out_rrr = tgen_subbio_rrr,
3435    .out_rri = tgen_subbio_rri,
3436    .out_rir = tgen_subbio_rir,
3437    .out_rii = tgen_subbio_rii,
3438};
3439
3440#define outop_subbi  outop_subbio
3441
3442static void tcg_out_set_borrow(TCGContext *s)
3443{
3444    /* borrow = !carry */
3445    tcg_out32(s, ADDIC | TAI(TCG_REG_R0, TCG_REG_R0, 0));
3446}
3447
3448static void tgen_xor(TCGContext *s, TCGType type,
3449                     TCGReg a0, TCGReg a1, TCGReg a2)
3450{
3451    tcg_out32(s, XOR | SAB(a1, a0, a2));
3452}
3453
3454static void tgen_xori(TCGContext *s, TCGType type,
3455                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3456{
3457    tcg_out_xori32(s, a0, a1, a2);
3458}
3459
3460static const TCGOutOpBinary outop_xor = {
3461    .base.static_constraint = C_O1_I2(r, r, rU),
3462    .out_rrr = tgen_xor,
3463    .out_rri = tgen_xori,
3464};
3465
3466static void tgen_bswap16(TCGContext *s, TCGType type,
3467                         TCGReg dst, TCGReg src, unsigned flags)
3468{
3469    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
3470
3471    if (have_isa_3_10) {
3472        tcg_out32(s, BRH | RA(dst) | RS(src));
3473        if (flags & TCG_BSWAP_OS) {
3474            tcg_out_ext16s(s, TCG_TYPE_REG, dst, dst);
3475        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
3476            tcg_out_ext16u(s, dst, dst);
3477        }
3478        return;
3479    }
3480
3481    /*
3482     * In the following,
3483     *   dep(a, b, m) -> (a & ~m) | (b & m)
3484     *
3485     * Begin with:                              src = xxxxabcd
3486     */
3487    /* tmp = rol32(src, 24) & 0x000000ff            = 0000000c */
3488    tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31);
3489    /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00)    = 000000dc */
3490    tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);
3491
3492    if (flags & TCG_BSWAP_OS) {
3493        tcg_out_ext16s(s, TCG_TYPE_REG, dst, tmp);
3494    } else {
3495        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
3496    }
3497}
3498
3499static const TCGOutOpBswap outop_bswap16 = {
3500    .base.static_constraint = C_O1_I1(r, r),
3501    .out_rr = tgen_bswap16,
3502};
3503
3504static void tgen_bswap32(TCGContext *s, TCGType type,
3505                         TCGReg dst, TCGReg src, unsigned flags)
3506{
3507    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
3508
3509    if (have_isa_3_10) {
3510        tcg_out32(s, BRW | RA(dst) | RS(src));
3511        if (flags & TCG_BSWAP_OS) {
3512            tcg_out_ext32s(s, dst, dst);
3513        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
3514            tcg_out_ext32u(s, dst, dst);
3515        }
3516        return;
3517    }
3518
3519    /*
3520     * Stolen from gcc's builtin_bswap32.
3521     * In the following,
3522     *   dep(a, b, m) -> (a & ~m) | (b & m)
3523     *
3524     * Begin with:                              src = xxxxabcd
3525     */
3526    /* tmp = rol32(src, 8) & 0xffffffff             = 0000bcda */
3527    tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31);
3528    /* tmp = dep(tmp, rol32(src, 24), 0xff000000)   = 0000dcda */
3529    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7);
3530    /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00)   = 0000dcba */
3531    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23);
3532
3533    if (flags & TCG_BSWAP_OS) {
3534        tcg_out_ext32s(s, dst, tmp);
3535    } else {
3536        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
3537    }
3538}
3539
3540static const TCGOutOpBswap outop_bswap32 = {
3541    .base.static_constraint = C_O1_I1(r, r),
3542    .out_rr = tgen_bswap32,
3543};
3544
3545#if TCG_TARGET_REG_BITS == 64
3546static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
3547{
3548    TCGReg t0 = dst == src ? TCG_REG_R0 : dst;
3549    TCGReg t1 = dst == src ? dst : TCG_REG_R0;
3550
3551    if (have_isa_3_10) {
3552        tcg_out32(s, BRD | RA(dst) | RS(src));
3553        return;
3554    }
3555
3556    /*
3557     * In the following,
3558     *   dep(a, b, m) -> (a & ~m) | (b & m)
3559     *
3560     * Begin with:                              src = abcdefgh
3561     */
3562    /* t0 = rol32(src, 8) & 0xffffffff              = 0000fghe */
3563    tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31);
3564    /* t0 = dep(t0, rol32(src, 24), 0xff000000)     = 0000hghe */
3565    tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7);
3566    /* t0 = dep(t0, rol32(src, 24), 0x0000ff00)     = 0000hgfe */
3567    tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23);
3568
3569    /* t0 = rol64(t0, 32)                           = hgfe0000 */
3570    tcg_out_rld(s, RLDICL, t0, t0, 32, 0);
3571    /* t1 = rol64(src, 32)                          = efghabcd */
3572    tcg_out_rld(s, RLDICL, t1, src, 32, 0);
3573
3574    /* t0 = dep(t0, rol32(t1, 24), 0xffffffff)      = hgfebcda */
3575    tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31);
3576    /* t0 = dep(t0, rol32(t1, 24), 0xff000000)      = hgfedcda */
3577    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7);
3578    /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00)      = hgfedcba */
3579    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23);
3580
3581    tcg_out_mov(s, TCG_TYPE_REG, dst, t0);
3582}
3583
3584static const TCGOutOpUnary outop_bswap64 = {
3585    .base.static_constraint = C_O1_I1(r, r),
3586    .out_rr = tgen_bswap64,
3587};
3588#endif /* TCG_TARGET_REG_BITS == 64 */
3589
3590static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
3591{
3592    tcg_out32(s, NEG | RT(a0) | RA(a1));
3593}
3594
3595static const TCGOutOpUnary outop_neg = {
3596    .base.static_constraint = C_O1_I1(r, r),
3597    .out_rr = tgen_neg,
3598};
3599
3600static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
3601{
3602    tgen_nor(s, type, a0, a1, a1);
3603}
3604
3605static const TCGOutOpUnary outop_not = {
3606    .base.static_constraint = C_O1_I1(r, r),
3607    .out_rr = tgen_not,
3608};
3609
3610static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
3611                         TCGReg a2, unsigned ofs, unsigned len)
3612{
3613    if (type == TCG_TYPE_I32) {
3614        tcg_out_rlw(s, RLWIMI, a0, a2, ofs, 32 - ofs - len, 31 - ofs);
3615    } else {
3616        tcg_out_rld(s, RLDIMI, a0, a2, ofs, 64 - ofs - len);
3617    }
3618}
3619
3620static void tgen_depositi(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
3621                          tcg_target_long a2, unsigned ofs, unsigned len)
3622{
3623    tgen_andi(s, type, a0, a1, ~MAKE_64BIT_MASK(ofs, len));
3624}
3625
3626static const TCGOutOpDeposit outop_deposit = {
3627    .base.static_constraint = C_O1_I2(r, 0, rZ),
3628    .out_rrr = tgen_deposit,
3629    .out_rri = tgen_depositi,
3630};
3631
3632static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
3633                         unsigned ofs, unsigned len)
3634{
3635    if (ofs == 0 && len <= 16) {
3636        tgen_andi(s, TCG_TYPE_I32, a0, a1, (1 << len) - 1);
3637    } else if (type == TCG_TYPE_I32) {
3638        tcg_out_rlw(s, RLWINM, a0, a1, 32 - ofs, 32 - len, 31);
3639    } else {
3640        tcg_out_rld(s, RLDICL, a0, a1, 64 - ofs, 64 - len);
3641    }
3642}
3643
3644static const TCGOutOpExtract outop_extract = {
3645    .base.static_constraint = C_O1_I1(r, r),
3646    .out_rr = tgen_extract,
3647};
3648
3649static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
3650                          unsigned ofs, unsigned len)
3651{
3652    if (ofs == 0) {
3653        switch (len) {
3654        case 8:
3655            tcg_out_ext8s(s, type, a0, a1);
3656            return;
3657        case 16:
3658            tcg_out_ext16s(s, type, a0, a1);
3659            return;
3660        case 32:
3661            tcg_out_ext32s(s, a0, a1);
3662            return;
3663        }
3664    } else if (ofs + len == 32) {
3665        tcg_out_sari32(s, a0, a1, ofs);
3666        return;
3667    }
3668    g_assert_not_reached();
3669}
3670
3671static const TCGOutOpExtract outop_sextract = {
3672    .base.static_constraint = C_O1_I1(r, r),
3673    .out_rr = tgen_sextract,
3674};
3675
3676static const TCGOutOpExtract2 outop_extract2 = {
3677    .base.static_constraint = C_NotImplemented,
3678};
3679
3680static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest,
3681                      TCGReg base, ptrdiff_t offset)
3682{
3683    tcg_out_mem_long(s, LBZ, LBZX, dest, base, offset);
3684}
3685
3686static const TCGOutOpLoad outop_ld8u = {
3687    .base.static_constraint = C_O1_I1(r, r),
3688    .out = tgen_ld8u,
3689};
3690
3691static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest,
3692                      TCGReg base, ptrdiff_t offset)
3693{
3694    tgen_ld8u(s, type, dest, base, offset);
3695    tcg_out_ext8s(s, type, dest, dest);
3696}
3697
3698static const TCGOutOpLoad outop_ld8s = {
3699    .base.static_constraint = C_O1_I1(r, r),
3700    .out = tgen_ld8s,
3701};
3702
3703static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest,
3704                       TCGReg base, ptrdiff_t offset)
3705{
3706    tcg_out_mem_long(s, LHZ, LHZX, dest, base, offset);
3707}
3708
3709static const TCGOutOpLoad outop_ld16u = {
3710    .base.static_constraint = C_O1_I1(r, r),
3711    .out = tgen_ld16u,
3712};
3713
3714static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest,
3715                       TCGReg base, ptrdiff_t offset)
3716{
3717    tcg_out_mem_long(s, LHA, LHAX, dest, base, offset);
3718}
3719
3720static const TCGOutOpLoad outop_ld16s = {
3721    .base.static_constraint = C_O1_I1(r, r),
3722    .out = tgen_ld16s,
3723};
3724
3725#if TCG_TARGET_REG_BITS == 64
3726static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest,
3727                       TCGReg base, ptrdiff_t offset)
3728{
3729    tcg_out_mem_long(s, LWZ, LWZX, dest, base, offset);
3730}
3731
3732static const TCGOutOpLoad outop_ld32u = {
3733    .base.static_constraint = C_O1_I1(r, r),
3734    .out = tgen_ld32u,
3735};
3736
3737static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest,
3738                       TCGReg base, ptrdiff_t offset)
3739{
3740    tcg_out_mem_long(s, LWA, LWAX, dest, base, offset);
3741}
3742
3743static const TCGOutOpLoad outop_ld32s = {
3744    .base.static_constraint = C_O1_I1(r, r),
3745    .out = tgen_ld32s,
3746};
3747#endif
3748
3749static void tgen_st8(TCGContext *s, TCGType type, TCGReg data,
3750                     TCGReg base, ptrdiff_t offset)
3751{
3752    tcg_out_mem_long(s, STB, STBX, data, base, offset);
3753}
3754
3755static const TCGOutOpStore outop_st8 = {
3756    .base.static_constraint = C_O0_I2(r, r),
3757    .out_r = tgen_st8,
3758};
3759
3760static void tgen_st16(TCGContext *s, TCGType type, TCGReg data,
3761                      TCGReg base, ptrdiff_t offset)
3762{
3763    tcg_out_mem_long(s, STH, STHX, data, base, offset);
3764}
3765
3766static const TCGOutOpStore outop_st16 = {
3767    .base.static_constraint = C_O0_I2(r, r),
3768    .out_r = tgen_st16,
3769};
3770
3771static const TCGOutOpStore outop_st = {
3772    .base.static_constraint = C_O0_I2(r, r),
3773    .out_r = tcg_out_st,
3774};
3775
3776
3777static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
3778                       const TCGArg args[TCG_MAX_OP_ARGS],
3779                       const int const_args[TCG_MAX_OP_ARGS])
3780{
3781    switch (opc) {
3782    case INDEX_op_qemu_ld_i32:
3783        tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I32);
3784        break;
3785    case INDEX_op_qemu_ld_i64:
3786        if (TCG_TARGET_REG_BITS == 64) {
3787            tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I64);
3788        } else {
3789            tcg_out_qemu_ld(s, args[0], args[1], args[2],
3790                            args[3], TCG_TYPE_I64);
3791        }
3792        break;
3793    case INDEX_op_qemu_ld_i128:
3794        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
3795        tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
3796        break;
3797
3798    case INDEX_op_qemu_st_i32:
3799        tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I32);
3800        break;
3801    case INDEX_op_qemu_st_i64:
3802        if (TCG_TARGET_REG_BITS == 64) {
3803            tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I64);
3804        } else {
3805            tcg_out_qemu_st(s, args[0], args[1], args[2],
3806                            args[3], TCG_TYPE_I64);
3807        }
3808        break;
3809    case INDEX_op_qemu_st_i128:
3810        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
3811        tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
3812        break;
3813
3814    case INDEX_op_call:      /* Always emitted via tcg_out_call.  */
3815    case INDEX_op_exit_tb:   /* Always emitted via tcg_out_exit_tb.  */
3816    case INDEX_op_goto_tb:   /* Always emitted via tcg_out_goto_tb.  */
3817    default:
3818        g_assert_not_reached();
3819    }
3820}
3821
3822int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3823{
3824    switch (opc) {
3825    case INDEX_op_and_vec:
3826    case INDEX_op_or_vec:
3827    case INDEX_op_xor_vec:
3828    case INDEX_op_andc_vec:
3829    case INDEX_op_not_vec:
3830    case INDEX_op_nor_vec:
3831    case INDEX_op_eqv_vec:
3832    case INDEX_op_nand_vec:
3833        return 1;
3834    case INDEX_op_orc_vec:
3835        return have_isa_2_07;
3836    case INDEX_op_add_vec:
3837    case INDEX_op_sub_vec:
3838    case INDEX_op_smax_vec:
3839    case INDEX_op_smin_vec:
3840    case INDEX_op_umax_vec:
3841    case INDEX_op_umin_vec:
3842    case INDEX_op_shlv_vec:
3843    case INDEX_op_shrv_vec:
3844    case INDEX_op_sarv_vec:
3845    case INDEX_op_rotlv_vec:
3846        return vece <= MO_32 || have_isa_2_07;
3847    case INDEX_op_ssadd_vec:
3848    case INDEX_op_sssub_vec:
3849    case INDEX_op_usadd_vec:
3850    case INDEX_op_ussub_vec:
3851        return vece <= MO_32;
3852    case INDEX_op_shli_vec:
3853    case INDEX_op_shri_vec:
3854    case INDEX_op_sari_vec:
3855    case INDEX_op_rotli_vec:
3856        return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
3857    case INDEX_op_cmp_vec:
3858    case INDEX_op_cmpsel_vec:
3859        return vece <= MO_32 || have_isa_2_07 ? 1 : 0;
3860    case INDEX_op_neg_vec:
3861        return vece >= MO_32 && have_isa_3_00;
3862    case INDEX_op_mul_vec:
3863        switch (vece) {
3864        case MO_8:
3865        case MO_16:
3866            return -1;
3867        case MO_32:
3868            return have_isa_2_07 ? 1 : -1;
3869        case MO_64:
3870            return have_isa_3_10;
3871        }
3872        return 0;
3873    case INDEX_op_bitsel_vec:
3874        return have_vsx;
3875    case INDEX_op_rotrv_vec:
3876        return -1;
3877    default:
3878        return 0;
3879    }
3880}
3881
3882static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
3883                            TCGReg dst, TCGReg src)
3884{
3885    tcg_debug_assert(dst >= TCG_REG_V0);
3886
3887    /* Splat from integer reg allowed via constraints for v3.00.  */
3888    if (src < TCG_REG_V0) {
3889        tcg_debug_assert(have_isa_3_00);
3890        switch (vece) {
3891        case MO_64:
3892            tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
3893            return true;
3894        case MO_32:
3895            tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
3896            return true;
3897        default:
3898            /* Fail, so that we fall back on either dupm or mov+dup.  */
3899            return false;
3900        }
3901    }
3902
3903    /*
3904     * Recall we use (or emulate) VSX integer loads, so the integer is
3905     * right justified within the left (zero-index) double-word.
3906     */
3907    switch (vece) {
3908    case MO_8:
3909        tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
3910        break;
3911    case MO_16:
3912        tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
3913        break;
3914    case MO_32:
3915        tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
3916        break;
3917    case MO_64:
3918        if (have_vsx) {
3919            tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
3920            break;
3921        }
3922        tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
3923        tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
3924        break;
3925    default:
3926        g_assert_not_reached();
3927    }
3928    return true;
3929}
3930
3931static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
3932                             TCGReg out, TCGReg base, intptr_t offset)
3933{
3934    int elt;
3935
3936    tcg_debug_assert(out >= TCG_REG_V0);
3937    switch (vece) {
3938    case MO_8:
3939        if (have_isa_3_00) {
3940            tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
3941        } else {
3942            tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
3943        }
3944        elt = extract32(offset, 0, 4);
3945#if !HOST_BIG_ENDIAN
3946        elt ^= 15;
3947#endif
3948        tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
3949        break;
3950    case MO_16:
3951        tcg_debug_assert((offset & 1) == 0);
3952        if (have_isa_3_00) {
3953            tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
3954        } else {
3955            tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
3956        }
3957        elt = extract32(offset, 1, 3);
3958#if !HOST_BIG_ENDIAN
3959        elt ^= 7;
3960#endif
3961        tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
3962        break;
3963    case MO_32:
3964        if (have_isa_3_00) {
3965            tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
3966            break;
3967        }
3968        tcg_debug_assert((offset & 3) == 0);
3969        tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
3970        elt = extract32(offset, 2, 2);
3971#if !HOST_BIG_ENDIAN
3972        elt ^= 3;
3973#endif
3974        tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
3975        break;
3976    case MO_64:
3977        if (have_vsx) {
3978            tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
3979            break;
3980        }
3981        tcg_debug_assert((offset & 7) == 0);
3982        tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
3983        tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
3984        elt = extract32(offset, 3, 1);
3985#if !HOST_BIG_ENDIAN
3986        elt = !elt;
3987#endif
3988        if (elt) {
3989            tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
3990        } else {
3991            tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
3992        }
3993        break;
3994    default:
3995        g_assert_not_reached();
3996    }
3997    return true;
3998}
3999
4000static void tcg_out_not_vec(TCGContext *s, TCGReg a0, TCGReg a1)
4001{
4002    tcg_out32(s, VNOR | VRT(a0) | VRA(a1) | VRB(a1));
4003}
4004
4005static void tcg_out_or_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
4006{
4007    tcg_out32(s, VOR | VRT(a0) | VRA(a1) | VRB(a2));
4008}
4009
4010static void tcg_out_orc_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
4011{
4012    tcg_out32(s, VORC | VRT(a0) | VRA(a1) | VRB(a2));
4013}
4014
4015static void tcg_out_and_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
4016{
4017    tcg_out32(s, VAND | VRT(a0) | VRA(a1) | VRB(a2));
4018}
4019
4020static void tcg_out_andc_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
4021{
4022    tcg_out32(s, VANDC | VRT(a0) | VRA(a1) | VRB(a2));
4023}
4024
4025static void tcg_out_bitsel_vec(TCGContext *s, TCGReg d,
4026                               TCGReg c, TCGReg t, TCGReg f)
4027{
4028    if (TCG_TARGET_HAS_bitsel_vec) {
4029        tcg_out32(s, XXSEL | VRT(d) | VRC(c) | VRB(t) | VRA(f));
4030    } else {
4031        tcg_out_and_vec(s, TCG_VEC_TMP2, t, c);
4032        tcg_out_andc_vec(s, d, f, c);
4033        tcg_out_or_vec(s, d, d, TCG_VEC_TMP2);
4034    }
4035}
4036
4037static bool tcg_out_cmp_vec_noinv(TCGContext *s, unsigned vece, TCGReg a0,
4038                                  TCGReg a1, TCGReg a2, TCGCond cond)
4039{
4040    static const uint32_t
4041        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
4042        ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
4043        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
4044        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD };
4045    uint32_t insn;
4046
4047    bool need_swap = false, need_inv = false;
4048
4049    tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
4050
4051    switch (cond) {
4052    case TCG_COND_EQ:
4053    case TCG_COND_GT:
4054    case TCG_COND_GTU:
4055        break;
4056    case TCG_COND_NE:
4057        if (have_isa_3_00 && vece <= MO_32) {
4058            break;
4059        }
4060        /* fall through */
4061    case TCG_COND_LE:
4062    case TCG_COND_LEU:
4063        need_inv = true;
4064        break;
4065    case TCG_COND_LT:
4066    case TCG_COND_LTU:
4067        need_swap = true;
4068        break;
4069    case TCG_COND_GE:
4070    case TCG_COND_GEU:
4071        need_swap = need_inv = true;
4072        break;
4073    default:
4074        g_assert_not_reached();
4075    }
4076
4077    if (need_inv) {
4078        cond = tcg_invert_cond(cond);
4079    }
4080    if (need_swap) {
4081        TCGReg swap = a1;
4082        a1 = a2;
4083        a2 = swap;
4084        cond = tcg_swap_cond(cond);
4085    }
4086
4087    switch (cond) {
4088    case TCG_COND_EQ:
4089        insn = eq_op[vece];
4090        break;
4091    case TCG_COND_NE:
4092        insn = ne_op[vece];
4093        break;
4094    case TCG_COND_GT:
4095        insn = gts_op[vece];
4096        break;
4097    case TCG_COND_GTU:
4098        insn = gtu_op[vece];
4099        break;
4100    default:
4101        g_assert_not_reached();
4102    }
4103    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
4104
4105    return need_inv;
4106}
4107
4108static void tcg_out_cmp_vec(TCGContext *s, unsigned vece, TCGReg a0,
4109                            TCGReg a1, TCGReg a2, TCGCond cond)
4110{
4111    if (tcg_out_cmp_vec_noinv(s, vece, a0, a1, a2, cond)) {
4112        tcg_out_not_vec(s, a0, a0);
4113    }
4114}
4115
4116static void tcg_out_cmpsel_vec(TCGContext *s, unsigned vece, TCGReg a0,
4117                               TCGReg c1, TCGReg c2, TCGArg v3, int const_v3,
4118                               TCGReg v4, TCGCond cond)
4119{
4120    bool inv = tcg_out_cmp_vec_noinv(s, vece, TCG_VEC_TMP1, c1, c2, cond);
4121
4122    if (!const_v3) {
4123        if (inv) {
4124            tcg_out_bitsel_vec(s, a0, TCG_VEC_TMP1, v4, v3);
4125        } else {
4126            tcg_out_bitsel_vec(s, a0, TCG_VEC_TMP1, v3, v4);
4127        }
4128    } else if (v3) {
4129        if (inv) {
4130            tcg_out_orc_vec(s, a0, v4, TCG_VEC_TMP1);
4131        } else {
4132            tcg_out_or_vec(s, a0, v4, TCG_VEC_TMP1);
4133        }
4134    } else {
4135        if (inv) {
4136            tcg_out_and_vec(s, a0, v4, TCG_VEC_TMP1);
4137        } else {
4138            tcg_out_andc_vec(s, a0, v4, TCG_VEC_TMP1);
4139        }
4140    }
4141}
4142
4143static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
4144                           unsigned vecl, unsigned vece,
4145                           const TCGArg args[TCG_MAX_OP_ARGS],
4146                           const int const_args[TCG_MAX_OP_ARGS])
4147{
4148    static const uint32_t
4149        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
4150        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
4151        mul_op[4] = { 0, 0, VMULUWM, VMULLD },
4152        neg_op[4] = { 0, 0, VNEGW, VNEGD },
4153        ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
4154        usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
4155        sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
4156        ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
4157        umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
4158        smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
4159        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
4160        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
4161        shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
4162        shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
4163        sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
4164        mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
4165        mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
4166        muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
4167        mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
4168        pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
4169        rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
4170
4171    TCGType type = vecl + TCG_TYPE_V64;
4172    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
4173    uint32_t insn;
4174
4175    switch (opc) {
4176    case INDEX_op_ld_vec:
4177        tcg_out_ld(s, type, a0, a1, a2);
4178        return;
4179    case INDEX_op_st_vec:
4180        tcg_out_st(s, type, a0, a1, a2);
4181        return;
4182    case INDEX_op_dupm_vec:
4183        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
4184        return;
4185
4186    case INDEX_op_add_vec:
4187        insn = add_op[vece];
4188        break;
4189    case INDEX_op_sub_vec:
4190        insn = sub_op[vece];
4191        break;
4192    case INDEX_op_neg_vec:
4193        insn = neg_op[vece];
4194        a2 = a1;
4195        a1 = 0;
4196        break;
4197    case INDEX_op_mul_vec:
4198        insn = mul_op[vece];
4199        break;
4200    case INDEX_op_ssadd_vec:
4201        insn = ssadd_op[vece];
4202        break;
4203    case INDEX_op_sssub_vec:
4204        insn = sssub_op[vece];
4205        break;
4206    case INDEX_op_usadd_vec:
4207        insn = usadd_op[vece];
4208        break;
4209    case INDEX_op_ussub_vec:
4210        insn = ussub_op[vece];
4211        break;
4212    case INDEX_op_smin_vec:
4213        insn = smin_op[vece];
4214        break;
4215    case INDEX_op_umin_vec:
4216        insn = umin_op[vece];
4217        break;
4218    case INDEX_op_smax_vec:
4219        insn = smax_op[vece];
4220        break;
4221    case INDEX_op_umax_vec:
4222        insn = umax_op[vece];
4223        break;
4224    case INDEX_op_shlv_vec:
4225        insn = shlv_op[vece];
4226        break;
4227    case INDEX_op_shrv_vec:
4228        insn = shrv_op[vece];
4229        break;
4230    case INDEX_op_sarv_vec:
4231        insn = sarv_op[vece];
4232        break;
4233    case INDEX_op_and_vec:
4234        tcg_out_and_vec(s, a0, a1, a2);
4235        return;
4236    case INDEX_op_or_vec:
4237        tcg_out_or_vec(s, a0, a1, a2);
4238        return;
4239    case INDEX_op_xor_vec:
4240        insn = VXOR;
4241        break;
4242    case INDEX_op_andc_vec:
4243        tcg_out_andc_vec(s, a0, a1, a2);
4244        return;
4245    case INDEX_op_not_vec:
4246        tcg_out_not_vec(s, a0, a1);
4247        return;
4248    case INDEX_op_orc_vec:
4249        tcg_out_orc_vec(s, a0, a1, a2);
4250        return;
4251    case INDEX_op_nand_vec:
4252        insn = VNAND;
4253        break;
4254    case INDEX_op_nor_vec:
4255        insn = VNOR;
4256        break;
4257    case INDEX_op_eqv_vec:
4258        insn = VEQV;
4259        break;
4260
4261    case INDEX_op_cmp_vec:
4262        tcg_out_cmp_vec(s, vece, a0, a1, a2, args[3]);
4263        return;
4264    case INDEX_op_cmpsel_vec:
4265        tcg_out_cmpsel_vec(s, vece, a0, a1, a2,
4266                           args[3], const_args[3], args[4], args[5]);
4267        return;
4268    case INDEX_op_bitsel_vec:
4269        tcg_out_bitsel_vec(s, a0, a1, a2, args[3]);
4270        return;
4271
4272    case INDEX_op_dup2_vec:
4273        assert(TCG_TARGET_REG_BITS == 32);
4274        /* With inputs a1 = xLxx, a2 = xHxx  */
4275        tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1));  /* a0  = xxHL */
4276        tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8);          /* tmp = HLxx */
4277        tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8);          /* a0  = HLHL */
4278        return;
4279
4280    case INDEX_op_ppc_mrgh_vec:
4281        insn = mrgh_op[vece];
4282        break;
4283    case INDEX_op_ppc_mrgl_vec:
4284        insn = mrgl_op[vece];
4285        break;
4286    case INDEX_op_ppc_muleu_vec:
4287        insn = muleu_op[vece];
4288        break;
4289    case INDEX_op_ppc_mulou_vec:
4290        insn = mulou_op[vece];
4291        break;
4292    case INDEX_op_ppc_pkum_vec:
4293        insn = pkum_op[vece];
4294        break;
4295    case INDEX_op_rotlv_vec:
4296        insn = rotl_op[vece];
4297        break;
4298    case INDEX_op_ppc_msum_vec:
4299        tcg_debug_assert(vece == MO_16);
4300        tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
4301        return;
4302
4303    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
4304    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
4305    default:
4306        g_assert_not_reached();
4307    }
4308
4309    tcg_debug_assert(insn != 0);
4310    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
4311}
4312
4313static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
4314                           TCGv_vec v1, TCGArg imm, TCGOpcode opci)
4315{
4316    TCGv_vec t1;
4317
4318    if (vece == MO_32) {
4319        /*
4320         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
4321         * So using negative numbers gets us the 4th bit easily.
4322         */
4323        imm = sextract32(imm, 0, 5);
4324    } else {
4325        imm &= (8 << vece) - 1;
4326    }
4327
4328    /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */
4329    t1 = tcg_constant_vec(type, MO_8, imm);
4330    vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
4331              tcgv_vec_arg(v1), tcgv_vec_arg(t1));
4332}
4333
4334static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
4335                           TCGv_vec v1, TCGv_vec v2)
4336{
4337    TCGv_vec t1 = tcg_temp_new_vec(type);
4338    TCGv_vec t2 = tcg_temp_new_vec(type);
4339    TCGv_vec c0, c16;
4340
4341    switch (vece) {
4342    case MO_8:
4343    case MO_16:
4344        vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
4345                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
4346        vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
4347                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
4348        vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
4349                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
4350        vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
4351                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
4352        vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
4353                  tcgv_vec_arg(v0), tcgv_vec_arg(t1));
4354        break;
4355
4356    case MO_32:
4357        tcg_debug_assert(!have_isa_2_07);
4358        /*
4359         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
4360         * So using -16 is a quick way to represent 16.
4361         */
4362        c16 = tcg_constant_vec(type, MO_8, -16);
4363        c0 = tcg_constant_vec(type, MO_8, 0);
4364
4365        vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
4366                  tcgv_vec_arg(v2), tcgv_vec_arg(c16));
4367        vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
4368                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
4369        vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1),
4370                  tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0));
4371        vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1),
4372                  tcgv_vec_arg(t1), tcgv_vec_arg(c16));
4373        tcg_gen_add_vec(MO_32, v0, t1, t2);
4374        break;
4375
4376    default:
4377        g_assert_not_reached();
4378    }
4379    tcg_temp_free_vec(t1);
4380    tcg_temp_free_vec(t2);
4381}
4382
4383void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
4384                       TCGArg a0, ...)
4385{
4386    va_list va;
4387    TCGv_vec v0, v1, v2, t0;
4388    TCGArg a2;
4389
4390    va_start(va, a0);
4391    v0 = temp_tcgv_vec(arg_temp(a0));
4392    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
4393    a2 = va_arg(va, TCGArg);
4394
4395    switch (opc) {
4396    case INDEX_op_shli_vec:
4397        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
4398        break;
4399    case INDEX_op_shri_vec:
4400        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
4401        break;
4402    case INDEX_op_sari_vec:
4403        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
4404        break;
4405    case INDEX_op_rotli_vec:
4406        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
4407        break;
4408    case INDEX_op_mul_vec:
4409        v2 = temp_tcgv_vec(arg_temp(a2));
4410        expand_vec_mul(type, vece, v0, v1, v2);
4411        break;
4412    case INDEX_op_rotlv_vec:
4413        v2 = temp_tcgv_vec(arg_temp(a2));
4414        t0 = tcg_temp_new_vec(type);
4415        tcg_gen_neg_vec(vece, t0, v2);
4416        tcg_gen_rotlv_vec(vece, v0, v1, t0);
4417        tcg_temp_free_vec(t0);
4418        break;
4419    default:
4420        g_assert_not_reached();
4421    }
4422    va_end(va);
4423}
4424
4425static TCGConstraintSetIndex
4426tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
4427{
4428    switch (op) {
4429    case INDEX_op_qemu_ld_i32:
4430        return C_O1_I1(r, r);
4431    case INDEX_op_qemu_ld_i64:
4432        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
4433
4434    case INDEX_op_qemu_st_i32:
4435        return C_O0_I2(r, r);
4436    case INDEX_op_qemu_st_i64:
4437        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
4438
4439    case INDEX_op_qemu_ld_i128:
4440        return C_N1O1_I1(o, m, r);
4441    case INDEX_op_qemu_st_i128:
4442        return C_O0_I3(o, m, r);
4443
4444    case INDEX_op_add_vec:
4445    case INDEX_op_sub_vec:
4446    case INDEX_op_mul_vec:
4447    case INDEX_op_and_vec:
4448    case INDEX_op_or_vec:
4449    case INDEX_op_xor_vec:
4450    case INDEX_op_andc_vec:
4451    case INDEX_op_orc_vec:
4452    case INDEX_op_nor_vec:
4453    case INDEX_op_eqv_vec:
4454    case INDEX_op_nand_vec:
4455    case INDEX_op_cmp_vec:
4456    case INDEX_op_ssadd_vec:
4457    case INDEX_op_sssub_vec:
4458    case INDEX_op_usadd_vec:
4459    case INDEX_op_ussub_vec:
4460    case INDEX_op_smax_vec:
4461    case INDEX_op_smin_vec:
4462    case INDEX_op_umax_vec:
4463    case INDEX_op_umin_vec:
4464    case INDEX_op_shlv_vec:
4465    case INDEX_op_shrv_vec:
4466    case INDEX_op_sarv_vec:
4467    case INDEX_op_rotlv_vec:
4468    case INDEX_op_rotrv_vec:
4469    case INDEX_op_ppc_mrgh_vec:
4470    case INDEX_op_ppc_mrgl_vec:
4471    case INDEX_op_ppc_muleu_vec:
4472    case INDEX_op_ppc_mulou_vec:
4473    case INDEX_op_ppc_pkum_vec:
4474    case INDEX_op_dup2_vec:
4475        return C_O1_I2(v, v, v);
4476
4477    case INDEX_op_not_vec:
4478    case INDEX_op_neg_vec:
4479        return C_O1_I1(v, v);
4480
4481    case INDEX_op_dup_vec:
4482        return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v);
4483
4484    case INDEX_op_ld_vec:
4485    case INDEX_op_dupm_vec:
4486        return C_O1_I1(v, r);
4487
4488    case INDEX_op_st_vec:
4489        return C_O0_I2(v, r);
4490
4491    case INDEX_op_bitsel_vec:
4492    case INDEX_op_ppc_msum_vec:
4493        return C_O1_I3(v, v, v, v);
4494    case INDEX_op_cmpsel_vec:
4495        return C_O1_I4(v, v, v, vZM, v);
4496
4497    default:
4498        return C_NotImplemented;
4499    }
4500}
4501
4502static void tcg_target_init(TCGContext *s)
4503{
4504    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
4505    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
4506    if (have_altivec) {
4507        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
4508        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
4509    }
4510
4511    tcg_target_call_clobber_regs = 0;
4512    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
4513    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
4514    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
4515    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
4516    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
4517    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
4518    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7);
4519    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
4520    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
4521    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
4522    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
4523    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
4524
4525    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
4526    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
4527    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
4528    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
4529    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
4530    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
4531    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
4532    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
4533    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
4534    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
4535    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
4536    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
4537    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
4538    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
4539    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
4540    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
4541    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
4542    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
4543    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
4544    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
4545
4546    s->reserved_regs = 0;
4547    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
4548    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
4549#if defined(_CALL_SYSV)
4550    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
4551#endif
4552#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
4553    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
4554#endif
4555    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
4556    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
4557    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
4558    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
4559    if (USE_REG_TB) {
4560        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);  /* tb->tc_ptr */
4561    }
4562}
4563
4564#ifdef __ELF__
4565typedef struct {
4566    DebugFrameCIE cie;
4567    DebugFrameFDEHeader fde;
4568    uint8_t fde_def_cfa[4];
4569    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
4570} DebugFrame;
4571
4572/* We're expecting a 2 byte uleb128 encoded value.  */
4573QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
4574
4575#if TCG_TARGET_REG_BITS == 64
4576# define ELF_HOST_MACHINE EM_PPC64
4577#else
4578# define ELF_HOST_MACHINE EM_PPC
4579#endif
4580
4581static DebugFrame debug_frame = {
4582    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
4583    .cie.id = -1,
4584    .cie.version = 1,
4585    .cie.code_align = 1,
4586    .cie.data_align = (-SZR & 0x7f),         /* sleb128 -SZR */
4587    .cie.return_column = 65,
4588
4589    /* Total FDE size does not include the "len" member.  */
4590    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
4591
4592    .fde_def_cfa = {
4593        12, TCG_REG_R1,                 /* DW_CFA_def_cfa r1, ... */
4594        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
4595        (FRAME_SIZE >> 7)
4596    },
4597    .fde_reg_ofs = {
4598        /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
4599        0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
4600    }
4601};
4602
4603void tcg_register_jit(const void *buf, size_t buf_size)
4604{
4605    uint8_t *p = &debug_frame.fde_reg_ofs[3];
4606    int i;
4607
4608    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
4609        p[0] = 0x80 + tcg_target_callee_save_regs[i];
4610        p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
4611    }
4612
4613    debug_frame.fde.func_start = (uintptr_t)buf;
4614    debug_frame.fde.func_len = buf_size;
4615
4616    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
4617}
4618#endif /* __ELF__ */
4619#undef VMULEUB
4620#undef VMULEUH
4621#undef VMULEUW
4622#undef VMULOUB
4623#undef VMULOUH
4624#undef VMULOUW
4625#undef VMSUMUHM
4626