xref: /openbmc/qemu/tcg/ppc/tcg-target.c.inc (revision 623d7e3551a6fc5693c06ea938c60fe281b52e27)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "elf.h"
26#include "../tcg-pool.c.inc"
27#include "../tcg-ldst.c.inc"
28
29/*
30 * Standardize on the _CALL_FOO symbols used by GCC:
31 * Apple XCode does not define _CALL_DARWIN.
32 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV or _CALL_AIX.
33 */
34#if TCG_TARGET_REG_BITS == 64
35# ifdef _CALL_AIX
36    /* ok */
37# elif defined(_CALL_ELF) && _CALL_ELF == 1
38#  define _CALL_AIX
39# elif defined(_CALL_ELF) && _CALL_ELF == 2
40    /* ok */
41# else
42#  error "Unknown ABI"
43# endif
44#else
45# if defined(_CALL_SYSV) || defined(_CALL_DARWIN)
46    /* ok */
47# elif defined(__APPLE__)
48#  define _CALL_DARWIN
49# elif defined(__ELF__)
50#  define _CALL_SYSV
51# else
52#  error "Unknown ABI"
53# endif
54#endif
55
56#if TCG_TARGET_REG_BITS == 64
57# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_EXTEND
58# define TCG_TARGET_CALL_RET_I128  TCG_CALL_RET_NORMAL
59#else
60# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_NORMAL
61# define TCG_TARGET_CALL_RET_I128  TCG_CALL_RET_BY_REF
62#endif
63#ifdef _CALL_SYSV
64# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_EVEN
65# define TCG_TARGET_CALL_ARG_I128  TCG_CALL_ARG_BY_REF
66#else
67# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_NORMAL
68# define TCG_TARGET_CALL_ARG_I128  TCG_CALL_ARG_NORMAL
69#endif
70
71/* For some memory operations, we need a scratch that isn't R0.  For the AIX
72   calling convention, we can re-use the TOC register since we'll be reloading
73   it at every call.  Otherwise R12 will do nicely as neither a call-saved
74   register nor a parameter register.  */
75#ifdef _CALL_AIX
76# define TCG_REG_TMP1   TCG_REG_R2
77#else
78# define TCG_REG_TMP1   TCG_REG_R12
79#endif
80#define TCG_REG_TMP2    TCG_REG_R11
81
82#define TCG_VEC_TMP1    TCG_REG_V0
83#define TCG_VEC_TMP2    TCG_REG_V1
84
85#define TCG_REG_TB     TCG_REG_R31
86#define USE_REG_TB     (TCG_TARGET_REG_BITS == 64)
87
88/* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */
89#define SZP  ((int)sizeof(void *))
90
91/* Shorthand for size of a register.  */
92#define SZR  (TCG_TARGET_REG_BITS / 8)
93
94#define TCG_CT_CONST_S16  0x100
95#define TCG_CT_CONST_S32  0x400
96#define TCG_CT_CONST_U32  0x800
97#define TCG_CT_CONST_ZERO 0x1000
98#define TCG_CT_CONST_MONE 0x2000
99#define TCG_CT_CONST_WSZ  0x4000
100
101#define ALL_GENERAL_REGS  0xffffffffu
102#define ALL_VECTOR_REGS   0xffffffff00000000ull
103
104#define have_isel  (cpuinfo & CPUINFO_ISEL)
105
106#ifndef CONFIG_SOFTMMU
107#define TCG_GUEST_BASE_REG 30
108#endif
109
110#ifdef CONFIG_DEBUG_TCG
111static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
112    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
113    "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
114    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
115    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
116    "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
117    "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
118    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
119    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
120};
121#endif
122
123static const int tcg_target_reg_alloc_order[] = {
124    TCG_REG_R14,  /* call saved registers */
125    TCG_REG_R15,
126    TCG_REG_R16,
127    TCG_REG_R17,
128    TCG_REG_R18,
129    TCG_REG_R19,
130    TCG_REG_R20,
131    TCG_REG_R21,
132    TCG_REG_R22,
133    TCG_REG_R23,
134    TCG_REG_R24,
135    TCG_REG_R25,
136    TCG_REG_R26,
137    TCG_REG_R27,
138    TCG_REG_R28,
139    TCG_REG_R29,
140    TCG_REG_R30,
141    TCG_REG_R31,
142    TCG_REG_R12,  /* call clobbered, non-arguments */
143    TCG_REG_R11,
144    TCG_REG_R2,
145    TCG_REG_R13,
146    TCG_REG_R10,  /* call clobbered, arguments */
147    TCG_REG_R9,
148    TCG_REG_R8,
149    TCG_REG_R7,
150    TCG_REG_R6,
151    TCG_REG_R5,
152    TCG_REG_R4,
153    TCG_REG_R3,
154
155    /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
156    TCG_REG_V2,   /* call clobbered, vectors */
157    TCG_REG_V3,
158    TCG_REG_V4,
159    TCG_REG_V5,
160    TCG_REG_V6,
161    TCG_REG_V7,
162    TCG_REG_V8,
163    TCG_REG_V9,
164    TCG_REG_V10,
165    TCG_REG_V11,
166    TCG_REG_V12,
167    TCG_REG_V13,
168    TCG_REG_V14,
169    TCG_REG_V15,
170    TCG_REG_V16,
171    TCG_REG_V17,
172    TCG_REG_V18,
173    TCG_REG_V19,
174};
175
176static const int tcg_target_call_iarg_regs[] = {
177    TCG_REG_R3,
178    TCG_REG_R4,
179    TCG_REG_R5,
180    TCG_REG_R6,
181    TCG_REG_R7,
182    TCG_REG_R8,
183    TCG_REG_R9,
184    TCG_REG_R10
185};
186
187static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
188{
189    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
190    tcg_debug_assert(slot >= 0 && slot <= 1);
191    return TCG_REG_R3 + slot;
192}
193
194static const int tcg_target_callee_save_regs[] = {
195#ifdef _CALL_DARWIN
196    TCG_REG_R11,
197#endif
198    TCG_REG_R14,
199    TCG_REG_R15,
200    TCG_REG_R16,
201    TCG_REG_R17,
202    TCG_REG_R18,
203    TCG_REG_R19,
204    TCG_REG_R20,
205    TCG_REG_R21,
206    TCG_REG_R22,
207    TCG_REG_R23,
208    TCG_REG_R24,
209    TCG_REG_R25,
210    TCG_REG_R26,
211    TCG_REG_R27, /* currently used for the global env */
212    TCG_REG_R28,
213    TCG_REG_R29,
214    TCG_REG_R30,
215    TCG_REG_R31
216};
217
218static inline bool in_range_b(tcg_target_long target)
219{
220    return target == sextract64(target, 0, 26);
221}
222
223static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
224			       const tcg_insn_unit *target)
225{
226    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
227    tcg_debug_assert(in_range_b(disp));
228    return disp & 0x3fffffc;
229}
230
231static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
232{
233    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
234    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
235
236    if (in_range_b(disp)) {
237        *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc);
238        return true;
239    }
240    return false;
241}
242
243static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
244			       const tcg_insn_unit *target)
245{
246    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
247    tcg_debug_assert(disp == (int16_t) disp);
248    return disp & 0xfffc;
249}
250
251static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
252{
253    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
254    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
255
256    if (disp == (int16_t) disp) {
257        *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc);
258        return true;
259    }
260    return false;
261}
262
263/* test if a constant matches the constraint */
264static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
265{
266    if (ct & TCG_CT_CONST) {
267        return 1;
268    }
269
270    /* The only 32-bit constraint we use aside from
271       TCG_CT_CONST is TCG_CT_CONST_S16.  */
272    if (type == TCG_TYPE_I32) {
273        val = (int32_t)val;
274    }
275
276    if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
277        return 1;
278    } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
279        return 1;
280    } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
281        return 1;
282    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
283        return 1;
284    } else if ((ct & TCG_CT_CONST_MONE) && val == -1) {
285        return 1;
286    } else if ((ct & TCG_CT_CONST_WSZ)
287               && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
288        return 1;
289    }
290    return 0;
291}
292
293#define OPCD(opc) ((opc)<<26)
294#define XO19(opc) (OPCD(19)|((opc)<<1))
295#define MD30(opc) (OPCD(30)|((opc)<<2))
296#define MDS30(opc) (OPCD(30)|((opc)<<1))
297#define XO31(opc) (OPCD(31)|((opc)<<1))
298#define XO58(opc) (OPCD(58)|(opc))
299#define XO62(opc) (OPCD(62)|(opc))
300#define VX4(opc)  (OPCD(4)|(opc))
301
302#define B      OPCD( 18)
303#define BC     OPCD( 16)
304
305#define LBZ    OPCD( 34)
306#define LHZ    OPCD( 40)
307#define LHA    OPCD( 42)
308#define LWZ    OPCD( 32)
309#define LWZUX  XO31( 55)
310#define LD     XO58(  0)
311#define LDX    XO31( 21)
312#define LDU    XO58(  1)
313#define LDUX   XO31( 53)
314#define LWA    XO58(  2)
315#define LWAX   XO31(341)
316#define LQ     OPCD( 56)
317
318#define STB    OPCD( 38)
319#define STH    OPCD( 44)
320#define STW    OPCD( 36)
321#define STD    XO62(  0)
322#define STDU   XO62(  1)
323#define STDX   XO31(149)
324#define STQ    XO62(  2)
325
326#define ADDIC  OPCD( 12)
327#define ADDI   OPCD( 14)
328#define ADDIS  OPCD( 15)
329#define ORI    OPCD( 24)
330#define ORIS   OPCD( 25)
331#define XORI   OPCD( 26)
332#define XORIS  OPCD( 27)
333#define ANDI   OPCD( 28)
334#define ANDIS  OPCD( 29)
335#define MULLI  OPCD(  7)
336#define CMPLI  OPCD( 10)
337#define CMPI   OPCD( 11)
338#define SUBFIC OPCD( 8)
339
340#define LWZU   OPCD( 33)
341#define STWU   OPCD( 37)
342
343#define RLWIMI OPCD( 20)
344#define RLWINM OPCD( 21)
345#define RLWNM  OPCD( 23)
346
347#define RLDICL MD30(  0)
348#define RLDICR MD30(  1)
349#define RLDIMI MD30(  3)
350#define RLDCL  MDS30( 8)
351
352#define BCLR   XO19( 16)
353#define BCCTR  XO19(528)
354#define CRAND  XO19(257)
355#define CRANDC XO19(129)
356#define CRNAND XO19(225)
357#define CROR   XO19(449)
358#define CRNOR  XO19( 33)
359
360#define EXTSB  XO31(954)
361#define EXTSH  XO31(922)
362#define EXTSW  XO31(986)
363#define ADD    XO31(266)
364#define ADDE   XO31(138)
365#define ADDME  XO31(234)
366#define ADDZE  XO31(202)
367#define ADDC   XO31( 10)
368#define AND    XO31( 28)
369#define SUBF   XO31( 40)
370#define SUBFC  XO31(  8)
371#define SUBFE  XO31(136)
372#define SUBFME XO31(232)
373#define SUBFZE XO31(200)
374#define OR     XO31(444)
375#define XOR    XO31(316)
376#define MULLW  XO31(235)
377#define MULHW  XO31( 75)
378#define MULHWU XO31( 11)
379#define DIVW   XO31(491)
380#define DIVWU  XO31(459)
381#define MODSW  XO31(779)
382#define MODUW  XO31(267)
383#define CMP    XO31(  0)
384#define CMPL   XO31( 32)
385#define LHBRX  XO31(790)
386#define LWBRX  XO31(534)
387#define LDBRX  XO31(532)
388#define STHBRX XO31(918)
389#define STWBRX XO31(662)
390#define STDBRX XO31(660)
391#define MFSPR  XO31(339)
392#define MTSPR  XO31(467)
393#define SRAWI  XO31(824)
394#define NEG    XO31(104)
395#define MFCR   XO31( 19)
396#define MFOCRF (MFCR | (1u << 20))
397#define NOR    XO31(124)
398#define CNTLZW XO31( 26)
399#define CNTLZD XO31( 58)
400#define CNTTZW XO31(538)
401#define CNTTZD XO31(570)
402#define CNTPOPW XO31(378)
403#define CNTPOPD XO31(506)
404#define ANDC   XO31( 60)
405#define ORC    XO31(412)
406#define EQV    XO31(284)
407#define NAND   XO31(476)
408#define ISEL   XO31( 15)
409
410#define MULLD  XO31(233)
411#define MULHD  XO31( 73)
412#define MULHDU XO31(  9)
413#define DIVD   XO31(489)
414#define DIVDU  XO31(457)
415#define MODSD  XO31(777)
416#define MODUD  XO31(265)
417
418#define LBZX   XO31( 87)
419#define LHZX   XO31(279)
420#define LHAX   XO31(343)
421#define LWZX   XO31( 23)
422#define STBX   XO31(215)
423#define STHX   XO31(407)
424#define STWX   XO31(151)
425
426#define EIEIO  XO31(854)
427#define HWSYNC XO31(598)
428#define LWSYNC (HWSYNC | (1u << 21))
429
430#define SPR(a, b) ((((a)<<5)|(b))<<11)
431#define LR     SPR(8, 0)
432#define CTR    SPR(9, 0)
433
434#define SLW    XO31( 24)
435#define SRW    XO31(536)
436#define SRAW   XO31(792)
437
438#define SLD    XO31( 27)
439#define SRD    XO31(539)
440#define SRAD   XO31(794)
441#define SRADI  XO31(413<<1)
442
443#define BRH    XO31(219)
444#define BRW    XO31(155)
445#define BRD    XO31(187)
446
447#define TW     XO31( 4)
448#define TRAP   (TW | TO(31))
449
450#define NOP    ORI  /* ori 0,0,0 */
451
452#define LVX        XO31(103)
453#define LVEBX      XO31(7)
454#define LVEHX      XO31(39)
455#define LVEWX      XO31(71)
456#define LXSDX      (XO31(588) | 1)  /* v2.06, force tx=1 */
457#define LXVDSX     (XO31(332) | 1)  /* v2.06, force tx=1 */
458#define LXSIWZX    (XO31(12) | 1)   /* v2.07, force tx=1 */
459#define LXV        (OPCD(61) | 8 | 1)  /* v3.00, force tx=1 */
460#define LXSD       (OPCD(57) | 2)   /* v3.00 */
461#define LXVWSX     (XO31(364) | 1)  /* v3.00, force tx=1 */
462
463#define STVX       XO31(231)
464#define STVEWX     XO31(199)
465#define STXSDX     (XO31(716) | 1)  /* v2.06, force sx=1 */
466#define STXSIWX    (XO31(140) | 1)  /* v2.07, force sx=1 */
467#define STXV       (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
468#define STXSD      (OPCD(61) | 2)   /* v3.00 */
469
470#define VADDSBS    VX4(768)
471#define VADDUBS    VX4(512)
472#define VADDUBM    VX4(0)
473#define VADDSHS    VX4(832)
474#define VADDUHS    VX4(576)
475#define VADDUHM    VX4(64)
476#define VADDSWS    VX4(896)
477#define VADDUWS    VX4(640)
478#define VADDUWM    VX4(128)
479#define VADDUDM    VX4(192)       /* v2.07 */
480
481#define VSUBSBS    VX4(1792)
482#define VSUBUBS    VX4(1536)
483#define VSUBUBM    VX4(1024)
484#define VSUBSHS    VX4(1856)
485#define VSUBUHS    VX4(1600)
486#define VSUBUHM    VX4(1088)
487#define VSUBSWS    VX4(1920)
488#define VSUBUWS    VX4(1664)
489#define VSUBUWM    VX4(1152)
490#define VSUBUDM    VX4(1216)      /* v2.07 */
491
492#define VNEGW      (VX4(1538) | (6 << 16))  /* v3.00 */
493#define VNEGD      (VX4(1538) | (7 << 16))  /* v3.00 */
494
495#define VMAXSB     VX4(258)
496#define VMAXSH     VX4(322)
497#define VMAXSW     VX4(386)
498#define VMAXSD     VX4(450)       /* v2.07 */
499#define VMAXUB     VX4(2)
500#define VMAXUH     VX4(66)
501#define VMAXUW     VX4(130)
502#define VMAXUD     VX4(194)       /* v2.07 */
503#define VMINSB     VX4(770)
504#define VMINSH     VX4(834)
505#define VMINSW     VX4(898)
506#define VMINSD     VX4(962)       /* v2.07 */
507#define VMINUB     VX4(514)
508#define VMINUH     VX4(578)
509#define VMINUW     VX4(642)
510#define VMINUD     VX4(706)       /* v2.07 */
511
512#define VCMPEQUB   VX4(6)
513#define VCMPEQUH   VX4(70)
514#define VCMPEQUW   VX4(134)
515#define VCMPEQUD   VX4(199)       /* v2.07 */
516#define VCMPGTSB   VX4(774)
517#define VCMPGTSH   VX4(838)
518#define VCMPGTSW   VX4(902)
519#define VCMPGTSD   VX4(967)       /* v2.07 */
520#define VCMPGTUB   VX4(518)
521#define VCMPGTUH   VX4(582)
522#define VCMPGTUW   VX4(646)
523#define VCMPGTUD   VX4(711)       /* v2.07 */
524#define VCMPNEB    VX4(7)         /* v3.00 */
525#define VCMPNEH    VX4(71)        /* v3.00 */
526#define VCMPNEW    VX4(135)       /* v3.00 */
527
528#define VSLB       VX4(260)
529#define VSLH       VX4(324)
530#define VSLW       VX4(388)
531#define VSLD       VX4(1476)      /* v2.07 */
532#define VSRB       VX4(516)
533#define VSRH       VX4(580)
534#define VSRW       VX4(644)
535#define VSRD       VX4(1732)      /* v2.07 */
536#define VSRAB      VX4(772)
537#define VSRAH      VX4(836)
538#define VSRAW      VX4(900)
539#define VSRAD      VX4(964)       /* v2.07 */
540#define VRLB       VX4(4)
541#define VRLH       VX4(68)
542#define VRLW       VX4(132)
543#define VRLD       VX4(196)       /* v2.07 */
544
545#define VMULEUB    VX4(520)
546#define VMULEUH    VX4(584)
547#define VMULEUW    VX4(648)       /* v2.07 */
548#define VMULOUB    VX4(8)
549#define VMULOUH    VX4(72)
550#define VMULOUW    VX4(136)       /* v2.07 */
551#define VMULUWM    VX4(137)       /* v2.07 */
552#define VMULLD     VX4(457)       /* v3.10 */
553#define VMSUMUHM   VX4(38)
554
555#define VMRGHB     VX4(12)
556#define VMRGHH     VX4(76)
557#define VMRGHW     VX4(140)
558#define VMRGLB     VX4(268)
559#define VMRGLH     VX4(332)
560#define VMRGLW     VX4(396)
561
562#define VPKUHUM    VX4(14)
563#define VPKUWUM    VX4(78)
564
565#define VAND       VX4(1028)
566#define VANDC      VX4(1092)
567#define VNOR       VX4(1284)
568#define VOR        VX4(1156)
569#define VXOR       VX4(1220)
570#define VEQV       VX4(1668)      /* v2.07 */
571#define VNAND      VX4(1412)      /* v2.07 */
572#define VORC       VX4(1348)      /* v2.07 */
573
574#define VSPLTB     VX4(524)
575#define VSPLTH     VX4(588)
576#define VSPLTW     VX4(652)
577#define VSPLTISB   VX4(780)
578#define VSPLTISH   VX4(844)
579#define VSPLTISW   VX4(908)
580
581#define VSLDOI     VX4(44)
582
583#define XXPERMDI   (OPCD(60) | (10 << 3) | 7)  /* v2.06, force ax=bx=tx=1 */
584#define XXSEL      (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
585#define XXSPLTIB   (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
586
587#define MFVSRD     (XO31(51) | 1)   /* v2.07, force sx=1 */
588#define MFVSRWZ    (XO31(115) | 1)  /* v2.07, force sx=1 */
589#define MTVSRD     (XO31(179) | 1)  /* v2.07, force tx=1 */
590#define MTVSRWZ    (XO31(243) | 1)  /* v2.07, force tx=1 */
591#define MTVSRDD    (XO31(435) | 1)  /* v3.00, force tx=1 */
592#define MTVSRWS    (XO31(403) | 1)  /* v3.00, force tx=1 */
593
594#define RT(r) ((r)<<21)
595#define RS(r) ((r)<<21)
596#define RA(r) ((r)<<16)
597#define RB(r) ((r)<<11)
598#define TO(t) ((t)<<21)
599#define SH(s) ((s)<<11)
600#define MB(b) ((b)<<6)
601#define ME(e) ((e)<<1)
602#define BO(o) ((o)<<21)
603#define MB64(b) ((b)<<5)
604#define FXM(b) (1 << (19 - (b)))
605
606#define VRT(r)  (((r) & 31) << 21)
607#define VRA(r)  (((r) & 31) << 16)
608#define VRB(r)  (((r) & 31) << 11)
609#define VRC(r)  (((r) & 31) <<  6)
610
611#define LK    1
612
613#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
614#define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
615#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
616#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
617
618#define BF(n)    ((n)<<23)
619#define BI(n, c) (((c)+((n)*4))<<16)
620#define BT(n, c) (((c)+((n)*4))<<21)
621#define BA(n, c) (((c)+((n)*4))<<16)
622#define BB(n, c) (((c)+((n)*4))<<11)
623#define BC_(n, c) (((c)+((n)*4))<<6)
624
625#define BO_COND_TRUE  BO(12)
626#define BO_COND_FALSE BO( 4)
627#define BO_ALWAYS     BO(20)
628
629enum {
630    CR_LT,
631    CR_GT,
632    CR_EQ,
633    CR_SO
634};
635
636static const uint32_t tcg_to_bc[] = {
637    [TCG_COND_EQ]  = BC | BI(7, CR_EQ) | BO_COND_TRUE,
638    [TCG_COND_NE]  = BC | BI(7, CR_EQ) | BO_COND_FALSE,
639    [TCG_COND_LT]  = BC | BI(7, CR_LT) | BO_COND_TRUE,
640    [TCG_COND_GE]  = BC | BI(7, CR_LT) | BO_COND_FALSE,
641    [TCG_COND_LE]  = BC | BI(7, CR_GT) | BO_COND_FALSE,
642    [TCG_COND_GT]  = BC | BI(7, CR_GT) | BO_COND_TRUE,
643    [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE,
644    [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE,
645    [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE,
646    [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE,
647};
648
649/* The low bit here is set if the RA and RB fields must be inverted.  */
650static const uint32_t tcg_to_isel[] = {
651    [TCG_COND_EQ]  = ISEL | BC_(7, CR_EQ),
652    [TCG_COND_NE]  = ISEL | BC_(7, CR_EQ) | 1,
653    [TCG_COND_LT]  = ISEL | BC_(7, CR_LT),
654    [TCG_COND_GE]  = ISEL | BC_(7, CR_LT) | 1,
655    [TCG_COND_LE]  = ISEL | BC_(7, CR_GT) | 1,
656    [TCG_COND_GT]  = ISEL | BC_(7, CR_GT),
657    [TCG_COND_LTU] = ISEL | BC_(7, CR_LT),
658    [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1,
659    [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1,
660    [TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
661};
662
663static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
664                        intptr_t value, intptr_t addend)
665{
666    const tcg_insn_unit *target;
667    int16_t lo;
668    int32_t hi;
669
670    value += addend;
671    target = (const tcg_insn_unit *)value;
672
673    switch (type) {
674    case R_PPC_REL14:
675        return reloc_pc14(code_ptr, target);
676    case R_PPC_REL24:
677        return reloc_pc24(code_ptr, target);
678    case R_PPC_ADDR16:
679        /*
680         * We are (slightly) abusing this relocation type.  In particular,
681         * assert that the low 2 bits are zero, and do not modify them.
682         * That way we can use this with LD et al that have opcode bits
683         * in the low 2 bits of the insn.
684         */
685        if ((value & 3) || value != (int16_t)value) {
686            return false;
687        }
688        *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
689        break;
690    case R_PPC_ADDR32:
691        /*
692         * We are abusing this relocation type.  Again, this points to
693         * a pair of insns, lis + load.  This is an absolute address
694         * relocation for PPC32 so the lis cannot be removed.
695         */
696        lo = value;
697        hi = value - lo;
698        if (hi + lo != value) {
699            return false;
700        }
701        code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
702        code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
703        break;
704    default:
705        g_assert_not_reached();
706    }
707    return true;
708}
709
710static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
711                             TCGReg base, tcg_target_long offset);
712
713static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
714{
715    if (ret == arg) {
716        return true;
717    }
718    switch (type) {
719    case TCG_TYPE_I64:
720        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
721        /* fallthru */
722    case TCG_TYPE_I32:
723        if (ret < TCG_REG_V0) {
724            if (arg < TCG_REG_V0) {
725                tcg_out32(s, OR | SAB(arg, ret, arg));
726                break;
727            } else if (have_isa_2_07) {
728                tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
729                          | VRT(arg) | RA(ret));
730                break;
731            } else {
732                /* Altivec does not support vector->integer moves.  */
733                return false;
734            }
735        } else if (arg < TCG_REG_V0) {
736            if (have_isa_2_07) {
737                tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
738                          | VRT(ret) | RA(arg));
739                break;
740            } else {
741                /* Altivec does not support integer->vector moves.  */
742                return false;
743            }
744        }
745        /* fallthru */
746    case TCG_TYPE_V64:
747    case TCG_TYPE_V128:
748        tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
749        tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
750        break;
751    default:
752        g_assert_not_reached();
753    }
754    return true;
755}
756
757static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
758                               int sh, int mb)
759{
760    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
761    sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
762    mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
763    tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb);
764}
765
766static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
767                               int sh, int mb, int me)
768{
769    tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me));
770}
771
772static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
773{
774    tcg_out32(s, EXTSB | RA(dst) | RS(src));
775}
776
777static void tcg_out_ext8u(TCGContext *s, TCGReg dst, TCGReg src)
778{
779    tcg_out32(s, ANDI | SAI(src, dst, 0xff));
780}
781
782static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
783{
784    tcg_out32(s, EXTSH | RA(dst) | RS(src));
785}
786
787static void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src)
788{
789    tcg_out32(s, ANDI | SAI(src, dst, 0xffff));
790}
791
792static void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src)
793{
794    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
795    tcg_out32(s, EXTSW | RA(dst) | RS(src));
796}
797
798static void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
799{
800    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
801    tcg_out_rld(s, RLDICL, dst, src, 0, 32);
802}
803
804static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dst, TCGReg src)
805{
806    tcg_out_ext32s(s, dst, src);
807}
808
809static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dst, TCGReg src)
810{
811    tcg_out_ext32u(s, dst, src);
812}
813
814static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
815{
816    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
817    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
818}
819
820static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
821{
822    tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
823}
824
825static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
826{
827    tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
828}
829
830static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c)
831{
832    /* Limit immediate shift count lest we create an illegal insn.  */
833    tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31));
834}
835
836static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
837{
838    tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
839}
840
841static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
842{
843    tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
844}
845
846static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
847{
848    tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
849}
850
851static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags)
852{
853    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
854
855    if (have_isa_3_10) {
856        tcg_out32(s, BRH | RA(dst) | RS(src));
857        if (flags & TCG_BSWAP_OS) {
858            tcg_out_ext16s(s, TCG_TYPE_REG, dst, dst);
859        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
860            tcg_out_ext16u(s, dst, dst);
861        }
862        return;
863    }
864
865    /*
866     * In the following,
867     *   dep(a, b, m) -> (a & ~m) | (b & m)
868     *
869     * Begin with:                              src = xxxxabcd
870     */
871    /* tmp = rol32(src, 24) & 0x000000ff            = 0000000c */
872    tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31);
873    /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00)    = 000000dc */
874    tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);
875
876    if (flags & TCG_BSWAP_OS) {
877        tcg_out_ext16s(s, TCG_TYPE_REG, dst, tmp);
878    } else {
879        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
880    }
881}
882
883static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags)
884{
885    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
886
887    if (have_isa_3_10) {
888        tcg_out32(s, BRW | RA(dst) | RS(src));
889        if (flags & TCG_BSWAP_OS) {
890            tcg_out_ext32s(s, dst, dst);
891        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
892            tcg_out_ext32u(s, dst, dst);
893        }
894        return;
895    }
896
897    /*
898     * Stolen from gcc's builtin_bswap32.
899     * In the following,
900     *   dep(a, b, m) -> (a & ~m) | (b & m)
901     *
902     * Begin with:                              src = xxxxabcd
903     */
904    /* tmp = rol32(src, 8) & 0xffffffff             = 0000bcda */
905    tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31);
906    /* tmp = dep(tmp, rol32(src, 24), 0xff000000)   = 0000dcda */
907    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7);
908    /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00)   = 0000dcba */
909    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23);
910
911    if (flags & TCG_BSWAP_OS) {
912        tcg_out_ext32s(s, dst, tmp);
913    } else {
914        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
915    }
916}
917
918static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src)
919{
920    TCGReg t0 = dst == src ? TCG_REG_R0 : dst;
921    TCGReg t1 = dst == src ? dst : TCG_REG_R0;
922
923    if (have_isa_3_10) {
924        tcg_out32(s, BRD | RA(dst) | RS(src));
925        return;
926    }
927
928    /*
929     * In the following,
930     *   dep(a, b, m) -> (a & ~m) | (b & m)
931     *
932     * Begin with:                              src = abcdefgh
933     */
934    /* t0 = rol32(src, 8) & 0xffffffff              = 0000fghe */
935    tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31);
936    /* t0 = dep(t0, rol32(src, 24), 0xff000000)     = 0000hghe */
937    tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7);
938    /* t0 = dep(t0, rol32(src, 24), 0x0000ff00)     = 0000hgfe */
939    tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23);
940
941    /* t0 = rol64(t0, 32)                           = hgfe0000 */
942    tcg_out_rld(s, RLDICL, t0, t0, 32, 0);
943    /* t1 = rol64(src, 32)                          = efghabcd */
944    tcg_out_rld(s, RLDICL, t1, src, 32, 0);
945
946    /* t0 = dep(t0, rol32(t1, 24), 0xffffffff)      = hgfebcda */
947    tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31);
948    /* t0 = dep(t0, rol32(t1, 24), 0xff000000)      = hgfedcda */
949    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7);
950    /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00)      = hgfedcba */
951    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23);
952
953    tcg_out_mov(s, TCG_TYPE_REG, dst, t0);
954}
955
956/* Emit a move into ret of arg, if it can be done in one insn.  */
957static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
958{
959    if (arg == (int16_t)arg) {
960        tcg_out32(s, ADDI | TAI(ret, 0, arg));
961        return true;
962    }
963    if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
964        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
965        return true;
966    }
967    return false;
968}
969
970static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
971                             tcg_target_long arg, bool in_prologue)
972{
973    intptr_t tb_diff;
974    tcg_target_long tmp;
975    int shift;
976
977    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
978
979    if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
980        arg = (int32_t)arg;
981    }
982
983    /* Load 16-bit immediates with one insn.  */
984    if (tcg_out_movi_one(s, ret, arg)) {
985        return;
986    }
987
988    /* Load addresses within the TB with one insn.  */
989    tb_diff = tcg_tbrel_diff(s, (void *)arg);
990    if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
991        tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
992        return;
993    }
994
995    /* Load 32-bit immediates with two insns.  Note that we've already
996       eliminated bare ADDIS, so we know both insns are required.  */
997    if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
998        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
999        tcg_out32(s, ORI | SAI(ret, ret, arg));
1000        return;
1001    }
1002    if (arg == (uint32_t)arg && !(arg & 0x8000)) {
1003        tcg_out32(s, ADDI | TAI(ret, 0, arg));
1004        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1005        return;
1006    }
1007
1008    /* Load masked 16-bit value.  */
1009    if (arg > 0 && (arg & 0x8000)) {
1010        tmp = arg | 0x7fff;
1011        if ((tmp & (tmp + 1)) == 0) {
1012            int mb = clz64(tmp + 1) + 1;
1013            tcg_out32(s, ADDI | TAI(ret, 0, arg));
1014            tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
1015            return;
1016        }
1017    }
1018
1019    /* Load common masks with 2 insns.  */
1020    shift = ctz64(arg);
1021    tmp = arg >> shift;
1022    if (tmp == (int16_t)tmp) {
1023        tcg_out32(s, ADDI | TAI(ret, 0, tmp));
1024        tcg_out_shli64(s, ret, ret, shift);
1025        return;
1026    }
1027    shift = clz64(arg);
1028    if (tcg_out_movi_one(s, ret, arg << shift)) {
1029        tcg_out_shri64(s, ret, ret, shift);
1030        return;
1031    }
1032
1033    /* Load addresses within 2GB of TB with 2 (or rarely 3) insns.  */
1034    if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
1035        tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
1036        return;
1037    }
1038
1039    /* Use the constant pool, if possible.  */
1040    if (!in_prologue && USE_REG_TB) {
1041        new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
1042                       tcg_tbrel_diff(s, NULL));
1043        tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
1044        return;
1045    }
1046
1047    tmp = arg >> 31 >> 1;
1048    tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
1049    if (tmp) {
1050        tcg_out_shli64(s, ret, ret, 32);
1051    }
1052    if (arg & 0xffff0000) {
1053        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1054    }
1055    if (arg & 0xffff) {
1056        tcg_out32(s, ORI | SAI(ret, ret, arg));
1057    }
1058}
1059
1060static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
1061                             TCGReg ret, int64_t val)
1062{
1063    uint32_t load_insn;
1064    int rel, low;
1065    intptr_t add;
1066
1067    switch (vece) {
1068    case MO_8:
1069        low = (int8_t)val;
1070        if (low >= -16 && low < 16) {
1071            tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
1072            return;
1073        }
1074        if (have_isa_3_00) {
1075            tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
1076            return;
1077        }
1078        break;
1079
1080    case MO_16:
1081        low = (int16_t)val;
1082        if (low >= -16 && low < 16) {
1083            tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
1084            return;
1085        }
1086        break;
1087
1088    case MO_32:
1089        low = (int32_t)val;
1090        if (low >= -16 && low < 16) {
1091            tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
1092            return;
1093        }
1094        break;
1095    }
1096
1097    /*
1098     * Otherwise we must load the value from the constant pool.
1099     */
1100    if (USE_REG_TB) {
1101        rel = R_PPC_ADDR16;
1102        add = tcg_tbrel_diff(s, NULL);
1103    } else {
1104        rel = R_PPC_ADDR32;
1105        add = 0;
1106    }
1107
1108    if (have_vsx) {
1109        load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
1110        load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
1111        if (TCG_TARGET_REG_BITS == 64) {
1112            new_pool_label(s, val, rel, s->code_ptr, add);
1113        } else {
1114            new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val);
1115        }
1116    } else {
1117        load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
1118        if (TCG_TARGET_REG_BITS == 64) {
1119            new_pool_l2(s, rel, s->code_ptr, add, val, val);
1120        } else {
1121            new_pool_l4(s, rel, s->code_ptr, add,
1122                        val >> 32, val, val >> 32, val);
1123        }
1124    }
1125
1126    if (USE_REG_TB) {
1127        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
1128        load_insn |= RA(TCG_REG_TB);
1129    } else {
1130        tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
1131        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1132    }
1133    tcg_out32(s, load_insn);
1134}
1135
1136static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
1137                         tcg_target_long arg)
1138{
1139    switch (type) {
1140    case TCG_TYPE_I32:
1141    case TCG_TYPE_I64:
1142        tcg_debug_assert(ret < TCG_REG_V0);
1143        tcg_out_movi_int(s, type, ret, arg, false);
1144        break;
1145
1146    default:
1147        g_assert_not_reached();
1148    }
1149}
1150
1151static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1152{
1153    return false;
1154}
1155
1156static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1157                             tcg_target_long imm)
1158{
1159    /* This function is only used for passing structs by reference. */
1160    g_assert_not_reached();
1161}
1162
1163static bool mask_operand(uint32_t c, int *mb, int *me)
1164{
1165    uint32_t lsb, test;
1166
1167    /* Accept a bit pattern like:
1168           0....01....1
1169           1....10....0
1170           0..01..10..0
1171       Keep track of the transitions.  */
1172    if (c == 0 || c == -1) {
1173        return false;
1174    }
1175    test = c;
1176    lsb = test & -test;
1177    test += lsb;
1178    if (test & (test - 1)) {
1179        return false;
1180    }
1181
1182    *me = clz32(lsb);
1183    *mb = test ? clz32(test & -test) + 1 : 0;
1184    return true;
1185}
1186
1187static bool mask64_operand(uint64_t c, int *mb, int *me)
1188{
1189    uint64_t lsb;
1190
1191    if (c == 0) {
1192        return false;
1193    }
1194
1195    lsb = c & -c;
1196    /* Accept 1..10..0.  */
1197    if (c == -lsb) {
1198        *mb = 0;
1199        *me = clz64(lsb);
1200        return true;
1201    }
1202    /* Accept 0..01..1.  */
1203    if (lsb == 1 && (c & (c + 1)) == 0) {
1204        *mb = clz64(c + 1) + 1;
1205        *me = 63;
1206        return true;
1207    }
1208    return false;
1209}
1210
1211static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1212{
1213    int mb, me;
1214
1215    if (mask_operand(c, &mb, &me)) {
1216        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
1217    } else if ((c & 0xffff) == c) {
1218        tcg_out32(s, ANDI | SAI(src, dst, c));
1219        return;
1220    } else if ((c & 0xffff0000) == c) {
1221        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1222        return;
1223    } else {
1224        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
1225        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1226    }
1227}
1228
1229static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
1230{
1231    int mb, me;
1232
1233    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1234    if (mask64_operand(c, &mb, &me)) {
1235        if (mb == 0) {
1236            tcg_out_rld(s, RLDICR, dst, src, 0, me);
1237        } else {
1238            tcg_out_rld(s, RLDICL, dst, src, 0, mb);
1239        }
1240    } else if ((c & 0xffff) == c) {
1241        tcg_out32(s, ANDI | SAI(src, dst, c));
1242        return;
1243    } else if ((c & 0xffff0000) == c) {
1244        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1245        return;
1246    } else {
1247        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
1248        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1249    }
1250}
1251
1252static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
1253                           int op_lo, int op_hi)
1254{
1255    if (c >> 16) {
1256        tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
1257        src = dst;
1258    }
1259    if (c & 0xffff) {
1260        tcg_out32(s, op_lo | SAI(src, dst, c));
1261        src = dst;
1262    }
1263}
1264
1265static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1266{
1267    tcg_out_zori32(s, dst, src, c, ORI, ORIS);
1268}
1269
1270static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1271{
1272    tcg_out_zori32(s, dst, src, c, XORI, XORIS);
1273}
1274
1275static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target)
1276{
1277    ptrdiff_t disp = tcg_pcrel_diff(s, target);
1278    if (in_range_b(disp)) {
1279        tcg_out32(s, B | (disp & 0x3fffffc) | mask);
1280    } else {
1281        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
1282        tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
1283        tcg_out32(s, BCCTR | BO_ALWAYS | mask);
1284    }
1285}
1286
1287static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
1288                             TCGReg base, tcg_target_long offset)
1289{
1290    tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
1291    bool is_int_store = false;
1292    TCGReg rs = TCG_REG_TMP1;
1293
1294    switch (opi) {
1295    case LD: case LWA:
1296        align = 3;
1297        /* FALLTHRU */
1298    default:
1299        if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
1300            rs = rt;
1301            break;
1302        }
1303        break;
1304    case LXSD:
1305    case STXSD:
1306        align = 3;
1307        break;
1308    case LXV:
1309    case STXV:
1310        align = 15;
1311        break;
1312    case STD:
1313        align = 3;
1314        /* FALLTHRU */
1315    case STB: case STH: case STW:
1316        is_int_store = true;
1317        break;
1318    }
1319
1320    /* For unaligned, or very large offsets, use the indexed form.  */
1321    if (offset & align || offset != (int32_t)offset || opi == 0) {
1322        if (rs == base) {
1323            rs = TCG_REG_R0;
1324        }
1325        tcg_debug_assert(!is_int_store || rs != rt);
1326        tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
1327        tcg_out32(s, opx | TAB(rt & 31, base, rs));
1328        return;
1329    }
1330
1331    l0 = (int16_t)offset;
1332    offset = (offset - l0) >> 16;
1333    l1 = (int16_t)offset;
1334
1335    if (l1 < 0 && orig >= 0) {
1336        extra = 0x4000;
1337        l1 = (int16_t)(offset - 0x4000);
1338    }
1339    if (l1) {
1340        tcg_out32(s, ADDIS | TAI(rs, base, l1));
1341        base = rs;
1342    }
1343    if (extra) {
1344        tcg_out32(s, ADDIS | TAI(rs, base, extra));
1345        base = rs;
1346    }
1347    if (opi != ADDI || base != rt || l0 != 0) {
1348        tcg_out32(s, opi | TAI(rt & 31, base, l0));
1349    }
1350}
1351
1352static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
1353                           TCGReg va, TCGReg vb, int shb)
1354{
1355    tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
1356}
1357
1358static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1359                       TCGReg base, intptr_t offset)
1360{
1361    int shift;
1362
1363    switch (type) {
1364    case TCG_TYPE_I32:
1365        if (ret < TCG_REG_V0) {
1366            tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
1367            break;
1368        }
1369        if (have_isa_2_07 && have_vsx) {
1370            tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
1371            break;
1372        }
1373        tcg_debug_assert((offset & 3) == 0);
1374        tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
1375        shift = (offset - 4) & 0xc;
1376        if (shift) {
1377            tcg_out_vsldoi(s, ret, ret, ret, shift);
1378        }
1379        break;
1380    case TCG_TYPE_I64:
1381        if (ret < TCG_REG_V0) {
1382            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1383            tcg_out_mem_long(s, LD, LDX, ret, base, offset);
1384            break;
1385        }
1386        /* fallthru */
1387    case TCG_TYPE_V64:
1388        tcg_debug_assert(ret >= TCG_REG_V0);
1389        if (have_vsx) {
1390            tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
1391                             ret, base, offset);
1392            break;
1393        }
1394        tcg_debug_assert((offset & 7) == 0);
1395        tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
1396        if (offset & 8) {
1397            tcg_out_vsldoi(s, ret, ret, ret, 8);
1398        }
1399        break;
1400    case TCG_TYPE_V128:
1401        tcg_debug_assert(ret >= TCG_REG_V0);
1402        tcg_debug_assert((offset & 15) == 0);
1403        tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
1404                         LVX, ret, base, offset);
1405        break;
1406    default:
1407        g_assert_not_reached();
1408    }
1409}
1410
1411static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1412                              TCGReg base, intptr_t offset)
1413{
1414    int shift;
1415
1416    switch (type) {
1417    case TCG_TYPE_I32:
1418        if (arg < TCG_REG_V0) {
1419            tcg_out_mem_long(s, STW, STWX, arg, base, offset);
1420            break;
1421        }
1422        if (have_isa_2_07 && have_vsx) {
1423            tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
1424            break;
1425        }
1426        assert((offset & 3) == 0);
1427        tcg_debug_assert((offset & 3) == 0);
1428        shift = (offset - 4) & 0xc;
1429        if (shift) {
1430            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
1431            arg = TCG_VEC_TMP1;
1432        }
1433        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1434        break;
1435    case TCG_TYPE_I64:
1436        if (arg < TCG_REG_V0) {
1437            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1438            tcg_out_mem_long(s, STD, STDX, arg, base, offset);
1439            break;
1440        }
1441        /* fallthru */
1442    case TCG_TYPE_V64:
1443        tcg_debug_assert(arg >= TCG_REG_V0);
1444        if (have_vsx) {
1445            tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
1446                             STXSDX, arg, base, offset);
1447            break;
1448        }
1449        tcg_debug_assert((offset & 7) == 0);
1450        if (offset & 8) {
1451            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
1452            arg = TCG_VEC_TMP1;
1453        }
1454        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1455        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
1456        break;
1457    case TCG_TYPE_V128:
1458        tcg_debug_assert(arg >= TCG_REG_V0);
1459        tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
1460                         STVX, arg, base, offset);
1461        break;
1462    default:
1463        g_assert_not_reached();
1464    }
1465}
1466
1467static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1468                               TCGReg base, intptr_t ofs)
1469{
1470    return false;
1471}
1472
1473static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
1474                        int const_arg2, int cr, TCGType type)
1475{
1476    int imm;
1477    uint32_t op;
1478
1479    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1480
1481    /* Simplify the comparisons below wrt CMPI.  */
1482    if (type == TCG_TYPE_I32) {
1483        arg2 = (int32_t)arg2;
1484    }
1485
1486    switch (cond) {
1487    case TCG_COND_EQ:
1488    case TCG_COND_NE:
1489        if (const_arg2) {
1490            if ((int16_t) arg2 == arg2) {
1491                op = CMPI;
1492                imm = 1;
1493                break;
1494            } else if ((uint16_t) arg2 == arg2) {
1495                op = CMPLI;
1496                imm = 1;
1497                break;
1498            }
1499        }
1500        op = CMPL;
1501        imm = 0;
1502        break;
1503
1504    case TCG_COND_LT:
1505    case TCG_COND_GE:
1506    case TCG_COND_LE:
1507    case TCG_COND_GT:
1508        if (const_arg2) {
1509            if ((int16_t) arg2 == arg2) {
1510                op = CMPI;
1511                imm = 1;
1512                break;
1513            }
1514        }
1515        op = CMP;
1516        imm = 0;
1517        break;
1518
1519    case TCG_COND_LTU:
1520    case TCG_COND_GEU:
1521    case TCG_COND_LEU:
1522    case TCG_COND_GTU:
1523        if (const_arg2) {
1524            if ((uint16_t) arg2 == arg2) {
1525                op = CMPLI;
1526                imm = 1;
1527                break;
1528            }
1529        }
1530        op = CMPL;
1531        imm = 0;
1532        break;
1533
1534    default:
1535        g_assert_not_reached();
1536    }
1537    op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
1538
1539    if (imm) {
1540        tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
1541    } else {
1542        if (const_arg2) {
1543            tcg_out_movi(s, type, TCG_REG_R0, arg2);
1544            arg2 = TCG_REG_R0;
1545        }
1546        tcg_out32(s, op | RA(arg1) | RB(arg2));
1547    }
1548}
1549
1550static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
1551                                TCGReg dst, TCGReg src)
1552{
1553    if (type == TCG_TYPE_I32) {
1554        tcg_out32(s, CNTLZW | RS(src) | RA(dst));
1555        tcg_out_shri32(s, dst, dst, 5);
1556    } else {
1557        tcg_out32(s, CNTLZD | RS(src) | RA(dst));
1558        tcg_out_shri64(s, dst, dst, 6);
1559    }
1560}
1561
1562static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src)
1563{
1564    /* X != 0 implies X + -1 generates a carry.  Extra addition
1565       trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.  */
1566    if (dst != src) {
1567        tcg_out32(s, ADDIC | TAI(dst, src, -1));
1568        tcg_out32(s, SUBFE | TAB(dst, dst, src));
1569    } else {
1570        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1571        tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
1572    }
1573}
1574
1575static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
1576                                  bool const_arg2)
1577{
1578    if (const_arg2) {
1579        if ((uint32_t)arg2 == arg2) {
1580            tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
1581        } else {
1582            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
1583            tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
1584        }
1585    } else {
1586        tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
1587    }
1588    return TCG_REG_R0;
1589}
1590
1591static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
1592                            TCGArg arg0, TCGArg arg1, TCGArg arg2,
1593                            int const_arg2)
1594{
1595    int crop, sh;
1596
1597    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1598
1599    /* Ignore high bits of a potential constant arg2.  */
1600    if (type == TCG_TYPE_I32) {
1601        arg2 = (uint32_t)arg2;
1602    }
1603
1604    /* Handle common and trivial cases before handling anything else.  */
1605    if (arg2 == 0) {
1606        switch (cond) {
1607        case TCG_COND_EQ:
1608            tcg_out_setcond_eq0(s, type, arg0, arg1);
1609            return;
1610        case TCG_COND_NE:
1611            if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1612                tcg_out_ext32u(s, TCG_REG_R0, arg1);
1613                arg1 = TCG_REG_R0;
1614            }
1615            tcg_out_setcond_ne0(s, arg0, arg1);
1616            return;
1617        case TCG_COND_GE:
1618            tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
1619            arg1 = arg0;
1620            /* FALLTHRU */
1621        case TCG_COND_LT:
1622            /* Extract the sign bit.  */
1623            if (type == TCG_TYPE_I32) {
1624                tcg_out_shri32(s, arg0, arg1, 31);
1625            } else {
1626                tcg_out_shri64(s, arg0, arg1, 63);
1627            }
1628            return;
1629        default:
1630            break;
1631        }
1632    }
1633
1634    /* If we have ISEL, we can implement everything with 3 or 4 insns.
1635       All other cases below are also at least 3 insns, so speed up the
1636       code generator by not considering them and always using ISEL.  */
1637    if (have_isel) {
1638        int isel, tab;
1639
1640        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1641
1642        isel = tcg_to_isel[cond];
1643
1644        tcg_out_movi(s, type, arg0, 1);
1645        if (isel & 1) {
1646            /* arg0 = (bc ? 0 : 1) */
1647            tab = TAB(arg0, 0, arg0);
1648            isel &= ~1;
1649        } else {
1650            /* arg0 = (bc ? 1 : 0) */
1651            tcg_out_movi(s, type, TCG_REG_R0, 0);
1652            tab = TAB(arg0, arg0, TCG_REG_R0);
1653        }
1654        tcg_out32(s, isel | tab);
1655        return;
1656    }
1657
1658    switch (cond) {
1659    case TCG_COND_EQ:
1660        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1661        tcg_out_setcond_eq0(s, type, arg0, arg1);
1662        return;
1663
1664    case TCG_COND_NE:
1665        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1666        /* Discard the high bits only once, rather than both inputs.  */
1667        if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1668            tcg_out_ext32u(s, TCG_REG_R0, arg1);
1669            arg1 = TCG_REG_R0;
1670        }
1671        tcg_out_setcond_ne0(s, arg0, arg1);
1672        return;
1673
1674    case TCG_COND_GT:
1675    case TCG_COND_GTU:
1676        sh = 30;
1677        crop = 0;
1678        goto crtest;
1679
1680    case TCG_COND_LT:
1681    case TCG_COND_LTU:
1682        sh = 29;
1683        crop = 0;
1684        goto crtest;
1685
1686    case TCG_COND_GE:
1687    case TCG_COND_GEU:
1688        sh = 31;
1689        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT);
1690        goto crtest;
1691
1692    case TCG_COND_LE:
1693    case TCG_COND_LEU:
1694        sh = 31;
1695        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT);
1696    crtest:
1697        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1698        if (crop) {
1699            tcg_out32(s, crop);
1700        }
1701        tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1702        tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
1703        break;
1704
1705    default:
1706        g_assert_not_reached();
1707    }
1708}
1709
1710static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l)
1711{
1712    if (l->has_value) {
1713        bc |= reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
1714    } else {
1715        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
1716    }
1717    tcg_out32(s, bc);
1718}
1719
1720static void tcg_out_brcond(TCGContext *s, TCGCond cond,
1721                           TCGArg arg1, TCGArg arg2, int const_arg2,
1722                           TCGLabel *l, TCGType type)
1723{
1724    tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1725    tcg_out_bc(s, tcg_to_bc[cond], l);
1726}
1727
1728static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
1729                            TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1,
1730                            TCGArg v2, bool const_c2)
1731{
1732    /* If for some reason both inputs are zero, don't produce bad code.  */
1733    if (v1 == 0 && v2 == 0) {
1734        tcg_out_movi(s, type, dest, 0);
1735        return;
1736    }
1737
1738    tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
1739
1740    if (have_isel) {
1741        int isel = tcg_to_isel[cond];
1742
1743        /* Swap the V operands if the operation indicates inversion.  */
1744        if (isel & 1) {
1745            int t = v1;
1746            v1 = v2;
1747            v2 = t;
1748            isel &= ~1;
1749        }
1750        /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */
1751        if (v2 == 0) {
1752            tcg_out_movi(s, type, TCG_REG_R0, 0);
1753        }
1754        tcg_out32(s, isel | TAB(dest, v1, v2));
1755    } else {
1756        if (dest == v2) {
1757            cond = tcg_invert_cond(cond);
1758            v2 = v1;
1759        } else if (dest != v1) {
1760            if (v1 == 0) {
1761                tcg_out_movi(s, type, dest, 0);
1762            } else {
1763                tcg_out_mov(s, type, dest, v1);
1764            }
1765        }
1766        /* Branch forward over one insn */
1767        tcg_out32(s, tcg_to_bc[cond] | 8);
1768        if (v2 == 0) {
1769            tcg_out_movi(s, type, dest, 0);
1770        } else {
1771            tcg_out_mov(s, type, dest, v2);
1772        }
1773    }
1774}
1775
1776static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
1777                          TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
1778{
1779    if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
1780        tcg_out32(s, opc | RA(a0) | RS(a1));
1781    } else {
1782        tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type);
1783        /* Note that the only other valid constant for a2 is 0.  */
1784        if (have_isel) {
1785            tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
1786            tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
1787        } else if (!const_a2 && a0 == a2) {
1788            tcg_out32(s, tcg_to_bc[TCG_COND_EQ] | 8);
1789            tcg_out32(s, opc | RA(a0) | RS(a1));
1790        } else {
1791            tcg_out32(s, opc | RA(a0) | RS(a1));
1792            tcg_out32(s, tcg_to_bc[TCG_COND_NE] | 8);
1793            if (const_a2) {
1794                tcg_out_movi(s, type, a0, 0);
1795            } else {
1796                tcg_out_mov(s, type, a0, a2);
1797            }
1798        }
1799    }
1800}
1801
1802static void tcg_out_cmp2(TCGContext *s, const TCGArg *args,
1803                         const int *const_args)
1804{
1805    static const struct { uint8_t bit1, bit2; } bits[] = {
1806        [TCG_COND_LT ] = { CR_LT, CR_LT },
1807        [TCG_COND_LE ] = { CR_LT, CR_GT },
1808        [TCG_COND_GT ] = { CR_GT, CR_GT },
1809        [TCG_COND_GE ] = { CR_GT, CR_LT },
1810        [TCG_COND_LTU] = { CR_LT, CR_LT },
1811        [TCG_COND_LEU] = { CR_LT, CR_GT },
1812        [TCG_COND_GTU] = { CR_GT, CR_GT },
1813        [TCG_COND_GEU] = { CR_GT, CR_LT },
1814    };
1815
1816    TCGCond cond = args[4], cond2;
1817    TCGArg al, ah, bl, bh;
1818    int blconst, bhconst;
1819    int op, bit1, bit2;
1820
1821    al = args[0];
1822    ah = args[1];
1823    bl = args[2];
1824    bh = args[3];
1825    blconst = const_args[2];
1826    bhconst = const_args[3];
1827
1828    switch (cond) {
1829    case TCG_COND_EQ:
1830        op = CRAND;
1831        goto do_equality;
1832    case TCG_COND_NE:
1833        op = CRNAND;
1834    do_equality:
1835        tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
1836        tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
1837        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
1838        break;
1839
1840    case TCG_COND_LT:
1841    case TCG_COND_LE:
1842    case TCG_COND_GT:
1843    case TCG_COND_GE:
1844    case TCG_COND_LTU:
1845    case TCG_COND_LEU:
1846    case TCG_COND_GTU:
1847    case TCG_COND_GEU:
1848        bit1 = bits[cond].bit1;
1849        bit2 = bits[cond].bit2;
1850        op = (bit1 != bit2 ? CRANDC : CRAND);
1851        cond2 = tcg_unsigned_cond(cond);
1852
1853        tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
1854        tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
1855        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
1856        tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ));
1857        break;
1858
1859    default:
1860        g_assert_not_reached();
1861    }
1862}
1863
1864static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1865                             const int *const_args)
1866{
1867    tcg_out_cmp2(s, args + 1, const_args + 1);
1868    tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1869    tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31);
1870}
1871
1872static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
1873                             const int *const_args)
1874{
1875    tcg_out_cmp2(s, args, const_args);
1876    tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
1877}
1878
1879static void tcg_out_mb(TCGContext *s, TCGArg a0)
1880{
1881    uint32_t insn;
1882
1883    if (a0 & TCG_MO_ST_LD) {
1884        insn = HWSYNC;
1885    } else {
1886        insn = LWSYNC;
1887    }
1888
1889    tcg_out32(s, insn);
1890}
1891
1892static void tcg_out_call_int(TCGContext *s, int lk,
1893                             const tcg_insn_unit *target)
1894{
1895#ifdef _CALL_AIX
1896    /* Look through the descriptor.  If the branch is in range, and we
1897       don't have to spend too much effort on building the toc.  */
1898    const void *tgt = ((const void * const *)target)[0];
1899    uintptr_t toc = ((const uintptr_t *)target)[1];
1900    intptr_t diff = tcg_pcrel_diff(s, tgt);
1901
1902    if (in_range_b(diff) && toc == (uint32_t)toc) {
1903        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
1904        tcg_out_b(s, lk, tgt);
1905    } else {
1906        /* Fold the low bits of the constant into the addresses below.  */
1907        intptr_t arg = (intptr_t)target;
1908        int ofs = (int16_t)arg;
1909
1910        if (ofs + 8 < 0x8000) {
1911            arg -= ofs;
1912        } else {
1913            ofs = 0;
1914        }
1915        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
1916        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
1917        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
1918        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
1919        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
1920    }
1921#elif defined(_CALL_ELF) && _CALL_ELF == 2
1922    intptr_t diff;
1923
1924    /* In the ELFv2 ABI, we have to set up r12 to contain the destination
1925       address, which the callee uses to compute its TOC address.  */
1926    /* FIXME: when the branch is in range, we could avoid r12 load if we
1927       knew that the destination uses the same TOC, and what its local
1928       entry point offset is.  */
1929    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
1930
1931    diff = tcg_pcrel_diff(s, target);
1932    if (in_range_b(diff)) {
1933        tcg_out_b(s, lk, target);
1934    } else {
1935        tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
1936        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
1937    }
1938#else
1939    tcg_out_b(s, lk, target);
1940#endif
1941}
1942
1943static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1944                         const TCGHelperInfo *info)
1945{
1946    tcg_out_call_int(s, LK, target);
1947}
1948
1949static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = {
1950    [MO_UB] = LBZX,
1951    [MO_UW] = LHZX,
1952    [MO_UL] = LWZX,
1953    [MO_UQ] = LDX,
1954    [MO_SW] = LHAX,
1955    [MO_SL] = LWAX,
1956    [MO_BSWAP | MO_UB] = LBZX,
1957    [MO_BSWAP | MO_UW] = LHBRX,
1958    [MO_BSWAP | MO_UL] = LWBRX,
1959    [MO_BSWAP | MO_UQ] = LDBRX,
1960};
1961
1962static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
1963    [MO_UB] = STBX,
1964    [MO_UW] = STHX,
1965    [MO_UL] = STWX,
1966    [MO_UQ] = STDX,
1967    [MO_BSWAP | MO_UB] = STBX,
1968    [MO_BSWAP | MO_UW] = STHBRX,
1969    [MO_BSWAP | MO_UL] = STWBRX,
1970    [MO_BSWAP | MO_UQ] = STDBRX,
1971};
1972
1973static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
1974{
1975    if (arg < 0) {
1976        arg = TCG_REG_TMP1;
1977    }
1978    tcg_out32(s, MFSPR | RT(arg) | LR);
1979    return arg;
1980}
1981
1982/*
1983 * For the purposes of ppc32 sorting 4 input registers into 4 argument
1984 * registers, there is an outside chance we would require 3 temps.
1985 */
1986static const TCGLdstHelperParam ldst_helper_param = {
1987    .ra_gen = ldst_ra_gen,
1988    .ntmp = 3,
1989    .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 }
1990};
1991
1992static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1993{
1994    MemOp opc = get_memop(lb->oi);
1995
1996    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1997        return false;
1998    }
1999
2000    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
2001    tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]);
2002    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
2003
2004    tcg_out_b(s, 0, lb->raddr);
2005    return true;
2006}
2007
2008static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2009{
2010    MemOp opc = get_memop(lb->oi);
2011
2012    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2013        return false;
2014    }
2015
2016    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
2017    tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]);
2018
2019    tcg_out_b(s, 0, lb->raddr);
2020    return true;
2021}
2022
2023typedef struct {
2024    TCGReg base;
2025    TCGReg index;
2026    TCGAtomAlign aa;
2027} HostAddress;
2028
2029bool tcg_target_has_memory_bswap(MemOp memop)
2030{
2031    TCGAtomAlign aa;
2032
2033    if ((memop & MO_SIZE) <= MO_64) {
2034        return true;
2035    }
2036
2037    /*
2038     * Reject 16-byte memop with 16-byte atomicity,
2039     * but do allow a pair of 64-bit operations.
2040     */
2041    aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
2042    return aa.atom <= MO_64;
2043}
2044
2045/* We expect to use a 16-bit negative offset from ENV.  */
2046#define MIN_TLB_MASK_TABLE_OFS  -32768
2047
2048/*
2049 * For softmmu, perform the TLB load and compare.
2050 * For useronly, perform any required alignment tests.
2051 * In both cases, return a TCGLabelQemuLdst structure if the slow path
2052 * is required and fill in @h with the host address for the fast path.
2053 */
2054static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
2055                                           TCGReg addrlo, TCGReg addrhi,
2056                                           MemOpIdx oi, bool is_ld)
2057{
2058    TCGType addr_type = s->addr_type;
2059    TCGLabelQemuLdst *ldst = NULL;
2060    MemOp opc = get_memop(oi);
2061    MemOp a_bits, s_bits;
2062
2063    /*
2064     * Book II, Section 1.4, Single-Copy Atomicity, specifies:
2065     *
2066     * Before 3.0, "An access that is not atomic is performed as a set of
2067     * smaller disjoint atomic accesses. In general, the number and alignment
2068     * of these accesses are implementation-dependent."  Thus MO_ATOM_IFALIGN.
2069     *
2070     * As of 3.0, "the non-atomic access is performed as described in
2071     * the corresponding list", which matches MO_ATOM_SUBALIGN.
2072     */
2073    s_bits = opc & MO_SIZE;
2074    h->aa = atom_and_align_for_opc(s, opc,
2075                                   have_isa_3_00 ? MO_ATOM_SUBALIGN
2076                                                 : MO_ATOM_IFALIGN,
2077                                   s_bits == MO_128);
2078    a_bits = h->aa.align;
2079
2080#ifdef CONFIG_SOFTMMU
2081    int mem_index = get_mmuidx(oi);
2082    int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
2083                        : offsetof(CPUTLBEntry, addr_write);
2084    int fast_off = tlb_mask_table_ofs(s, mem_index);
2085    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
2086    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
2087
2088    ldst = new_ldst_label(s);
2089    ldst->is_ld = is_ld;
2090    ldst->oi = oi;
2091    ldst->addrlo_reg = addrlo;
2092    ldst->addrhi_reg = addrhi;
2093
2094    /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
2095    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off);
2096    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off);
2097
2098    /* Extract the page index, shifted into place for tlb index.  */
2099    if (TCG_TARGET_REG_BITS == 32) {
2100        tcg_out_shri32(s, TCG_REG_R0, addrlo,
2101                       s->page_bits - CPU_TLB_ENTRY_BITS);
2102    } else {
2103        tcg_out_shri64(s, TCG_REG_R0, addrlo,
2104                       s->page_bits - CPU_TLB_ENTRY_BITS);
2105    }
2106    tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
2107
2108    /*
2109     * Load the (low part) TLB comparator into TMP2.
2110     * For 64-bit host, always load the entire 64-bit slot for simplicity.
2111     * We will ignore the high bits with tcg_out_cmp(..., addr_type).
2112     */
2113    if (TCG_TARGET_REG_BITS == 64) {
2114        if (cmp_off == 0) {
2115            tcg_out32(s, LDUX | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2));
2116        } else {
2117            tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
2118            tcg_out_ld(s, TCG_TYPE_I64, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off);
2119        }
2120    } else if (cmp_off == 0 && !HOST_BIG_ENDIAN) {
2121        tcg_out32(s, LWZUX | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2));
2122    } else {
2123        tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
2124        tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
2125                   cmp_off + 4 * HOST_BIG_ENDIAN);
2126    }
2127
2128    /*
2129     * Load the TLB addend for use on the fast path.
2130     * Do this asap to minimize any load use delay.
2131     */
2132    if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
2133        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
2134                   offsetof(CPUTLBEntry, addend));
2135    }
2136
2137    /* Clear the non-page, non-alignment bits from the address in R0. */
2138    if (TCG_TARGET_REG_BITS == 32) {
2139        /*
2140         * We don't support unaligned accesses on 32-bits.
2141         * Preserve the bottom bits and thus trigger a comparison
2142         * failure on unaligned accesses.
2143         */
2144        if (a_bits < s_bits) {
2145            a_bits = s_bits;
2146        }
2147        tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
2148                    (32 - a_bits) & 31, 31 - s->page_bits);
2149    } else {
2150        TCGReg t = addrlo;
2151
2152        /*
2153         * If the access is unaligned, we need to make sure we fail if we
2154         * cross a page boundary.  The trick is to add the access size-1
2155         * to the address before masking the low bits.  That will make the
2156         * address overflow to the next page if we cross a page boundary,
2157         * which will then force a mismatch of the TLB compare.
2158         */
2159        if (a_bits < s_bits) {
2160            unsigned a_mask = (1 << a_bits) - 1;
2161            unsigned s_mask = (1 << s_bits) - 1;
2162            tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
2163            t = TCG_REG_R0;
2164        }
2165
2166        /* Mask the address for the requested alignment.  */
2167        if (addr_type == TCG_TYPE_I32) {
2168            tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
2169                        (32 - a_bits) & 31, 31 - s->page_bits);
2170        } else if (a_bits == 0) {
2171            tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits);
2172        } else {
2173            tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
2174                        64 - s->page_bits, s->page_bits - a_bits);
2175            tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0);
2176        }
2177    }
2178
2179    if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) {
2180        /* Low part comparison into cr7. */
2181        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
2182                    0, 7, TCG_TYPE_I32);
2183
2184        /* Load the high part TLB comparator into TMP2.  */
2185        tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
2186                   cmp_off + 4 * !HOST_BIG_ENDIAN);
2187
2188        /* Load addend, deferred for this case. */
2189        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
2190                   offsetof(CPUTLBEntry, addend));
2191
2192        /* High part comparison into cr6. */
2193        tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2, 0, 6, TCG_TYPE_I32);
2194
2195        /* Combine comparisons into cr7. */
2196        tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
2197    } else {
2198        /* Full comparison into cr7. */
2199        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 0, 7, addr_type);
2200    }
2201
2202    /* Load a pointer into the current opcode w/conditional branch-link. */
2203    ldst->label_ptr[0] = s->code_ptr;
2204    tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
2205
2206    h->base = TCG_REG_TMP1;
2207#else
2208    if (a_bits) {
2209        ldst = new_ldst_label(s);
2210        ldst->is_ld = is_ld;
2211        ldst->oi = oi;
2212        ldst->addrlo_reg = addrlo;
2213        ldst->addrhi_reg = addrhi;
2214
2215        /* We are expecting a_bits to max out at 7, much lower than ANDI. */
2216        tcg_debug_assert(a_bits < 16);
2217        tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1));
2218
2219        ldst->label_ptr[0] = s->code_ptr;
2220        tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
2221    }
2222
2223    h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
2224#endif
2225
2226    if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
2227        /* Zero-extend the guest address for use in the host address. */
2228        tcg_out_ext32u(s, TCG_REG_R0, addrlo);
2229        h->index = TCG_REG_R0;
2230    } else {
2231        h->index = addrlo;
2232    }
2233
2234    return ldst;
2235}
2236
2237static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
2238                            TCGReg addrlo, TCGReg addrhi,
2239                            MemOpIdx oi, TCGType data_type)
2240{
2241    MemOp opc = get_memop(oi);
2242    TCGLabelQemuLdst *ldst;
2243    HostAddress h;
2244
2245    ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
2246
2247    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2248        if (opc & MO_BSWAP) {
2249            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2250            tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
2251            tcg_out32(s, LWBRX | TAB(datahi, h.base, TCG_REG_R0));
2252        } else if (h.base != 0) {
2253            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2254            tcg_out32(s, LWZX | TAB(datahi, h.base, h.index));
2255            tcg_out32(s, LWZX | TAB(datalo, h.base, TCG_REG_R0));
2256        } else if (h.index == datahi) {
2257            tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
2258            tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
2259        } else {
2260            tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
2261            tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
2262        }
2263    } else {
2264        uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
2265        if (!have_isa_2_06 && insn == LDBRX) {
2266            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2267            tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
2268            tcg_out32(s, LWBRX | TAB(TCG_REG_R0, h.base, TCG_REG_R0));
2269            tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
2270        } else if (insn) {
2271            tcg_out32(s, insn | TAB(datalo, h.base, h.index));
2272        } else {
2273            insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
2274            tcg_out32(s, insn | TAB(datalo, h.base, h.index));
2275            tcg_out_movext(s, TCG_TYPE_REG, datalo,
2276                           TCG_TYPE_REG, opc & MO_SSIZE, datalo);
2277        }
2278    }
2279
2280    if (ldst) {
2281        ldst->type = data_type;
2282        ldst->datalo_reg = datalo;
2283        ldst->datahi_reg = datahi;
2284        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2285    }
2286}
2287
2288static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
2289                            TCGReg addrlo, TCGReg addrhi,
2290                            MemOpIdx oi, TCGType data_type)
2291{
2292    MemOp opc = get_memop(oi);
2293    TCGLabelQemuLdst *ldst;
2294    HostAddress h;
2295
2296    ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
2297
2298    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2299        if (opc & MO_BSWAP) {
2300            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2301            tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
2302            tcg_out32(s, STWBRX | SAB(datahi, h.base, TCG_REG_R0));
2303        } else if (h.base != 0) {
2304            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2305            tcg_out32(s, STWX | SAB(datahi, h.base, h.index));
2306            tcg_out32(s, STWX | SAB(datalo, h.base, TCG_REG_R0));
2307        } else {
2308            tcg_out32(s, STW | TAI(datahi, h.index, 0));
2309            tcg_out32(s, STW | TAI(datalo, h.index, 4));
2310        }
2311    } else {
2312        uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
2313        if (!have_isa_2_06 && insn == STDBRX) {
2314            tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
2315            tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, h.index, 4));
2316            tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
2317            tcg_out32(s, STWBRX | SAB(TCG_REG_R0, h.base, TCG_REG_TMP1));
2318        } else {
2319            tcg_out32(s, insn | SAB(datalo, h.base, h.index));
2320        }
2321    }
2322
2323    if (ldst) {
2324        ldst->type = data_type;
2325        ldst->datalo_reg = datalo;
2326        ldst->datahi_reg = datahi;
2327        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2328    }
2329}
2330
2331static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
2332                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
2333{
2334    TCGLabelQemuLdst *ldst;
2335    HostAddress h;
2336    bool need_bswap;
2337    uint32_t insn;
2338    TCGReg index;
2339
2340    ldst = prepare_host_addr(s, &h, addr_reg, -1, oi, is_ld);
2341
2342    /* Compose the final address, as LQ/STQ have no indexing. */
2343    index = h.index;
2344    if (h.base != 0) {
2345        index = TCG_REG_TMP1;
2346        tcg_out32(s, ADD | TAB(index, h.base, h.index));
2347    }
2348    need_bswap = get_memop(oi) & MO_BSWAP;
2349
2350    if (h.aa.atom == MO_128) {
2351        tcg_debug_assert(!need_bswap);
2352        tcg_debug_assert(datalo & 1);
2353        tcg_debug_assert(datahi == datalo - 1);
2354        insn = is_ld ? LQ : STQ;
2355        tcg_out32(s, insn | TAI(datahi, index, 0));
2356    } else {
2357        TCGReg d1, d2;
2358
2359        if (HOST_BIG_ENDIAN ^ need_bswap) {
2360            d1 = datahi, d2 = datalo;
2361        } else {
2362            d1 = datalo, d2 = datahi;
2363        }
2364
2365        if (need_bswap) {
2366            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8);
2367            insn = is_ld ? LDBRX : STDBRX;
2368            tcg_out32(s, insn | TAB(d1, 0, index));
2369            tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0));
2370        } else {
2371            insn = is_ld ? LD : STD;
2372            tcg_out32(s, insn | TAI(d1, index, 0));
2373            tcg_out32(s, insn | TAI(d2, index, 8));
2374        }
2375    }
2376
2377    if (ldst) {
2378        ldst->type = TCG_TYPE_I128;
2379        ldst->datalo_reg = datalo;
2380        ldst->datahi_reg = datahi;
2381        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2382    }
2383}
2384
2385static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2386{
2387    int i;
2388    for (i = 0; i < count; ++i) {
2389        p[i] = NOP;
2390    }
2391}
2392
2393/* Parameters for function call generation, used in tcg.c.  */
2394#define TCG_TARGET_STACK_ALIGN       16
2395
2396#ifdef _CALL_AIX
2397# define LINK_AREA_SIZE                (6 * SZR)
2398# define LR_OFFSET                     (1 * SZR)
2399# define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
2400#elif defined(_CALL_DARWIN)
2401# define LINK_AREA_SIZE                (6 * SZR)
2402# define LR_OFFSET                     (2 * SZR)
2403#elif TCG_TARGET_REG_BITS == 64
2404# if defined(_CALL_ELF) && _CALL_ELF == 2
2405#  define LINK_AREA_SIZE               (4 * SZR)
2406#  define LR_OFFSET                    (1 * SZR)
2407# endif
2408#else /* TCG_TARGET_REG_BITS == 32 */
2409# if defined(_CALL_SYSV)
2410#  define LINK_AREA_SIZE               (2 * SZR)
2411#  define LR_OFFSET                    (1 * SZR)
2412# endif
2413#endif
2414#ifndef LR_OFFSET
2415# error "Unhandled abi"
2416#endif
2417#ifndef TCG_TARGET_CALL_STACK_OFFSET
2418# define TCG_TARGET_CALL_STACK_OFFSET  LINK_AREA_SIZE
2419#endif
2420
2421#define CPU_TEMP_BUF_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2422#define REG_SAVE_SIZE      ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
2423
2424#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET   \
2425                     + TCG_STATIC_CALL_ARGS_SIZE    \
2426                     + CPU_TEMP_BUF_SIZE            \
2427                     + REG_SAVE_SIZE                \
2428                     + TCG_TARGET_STACK_ALIGN - 1)  \
2429                    & -TCG_TARGET_STACK_ALIGN)
2430
2431#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
2432
2433static void tcg_target_qemu_prologue(TCGContext *s)
2434{
2435    int i;
2436
2437#ifdef _CALL_AIX
2438    const void **desc = (const void **)s->code_ptr;
2439    desc[0] = tcg_splitwx_to_rx(desc + 2);  /* entry point */
2440    desc[1] = 0;                            /* environment pointer */
2441    s->code_ptr = (void *)(desc + 2);       /* skip over descriptor */
2442#endif
2443
2444    tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
2445                  CPU_TEMP_BUF_SIZE);
2446
2447    /* Prologue */
2448    tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
2449    tcg_out32(s, (SZR == 8 ? STDU : STWU)
2450              | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
2451
2452    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2453        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2454                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2455    }
2456    tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2457
2458#ifndef CONFIG_SOFTMMU
2459    if (guest_base) {
2460        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
2461        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2462    }
2463#endif
2464
2465    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2466    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
2467    if (USE_REG_TB) {
2468        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]);
2469    }
2470    tcg_out32(s, BCCTR | BO_ALWAYS);
2471
2472    /* Epilogue */
2473    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2474
2475    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2476    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2477        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2478                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2479    }
2480    tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
2481    tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
2482    tcg_out32(s, BCLR | BO_ALWAYS);
2483}
2484
2485static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
2486{
2487    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg);
2488    tcg_out_b(s, 0, tcg_code_gen_epilogue);
2489}
2490
2491static void tcg_out_goto_tb(TCGContext *s, int which)
2492{
2493    uintptr_t ptr = get_jmp_target_addr(s, which);
2494
2495    if (USE_REG_TB) {
2496        ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr);
2497        tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset);
2498
2499        /* Direct branch will be patched by tb_target_set_jmp_target. */
2500        set_jmp_insn_offset(s, which);
2501        tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
2502
2503        /* When branch is out of range, fall through to indirect. */
2504        tcg_out32(s, BCCTR | BO_ALWAYS);
2505
2506        /* For the unlinked case, need to reset TCG_REG_TB.  */
2507        set_jmp_reset_offset(s, which);
2508        tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB,
2509                         -tcg_current_code_size(s));
2510    } else {
2511        /* Direct branch will be patched by tb_target_set_jmp_target. */
2512        set_jmp_insn_offset(s, which);
2513        tcg_out32(s, NOP);
2514
2515        /* When branch is out of range, fall through to indirect. */
2516        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr);
2517        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, (int16_t)ptr);
2518        tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
2519        tcg_out32(s, BCCTR | BO_ALWAYS);
2520        set_jmp_reset_offset(s, which);
2521    }
2522}
2523
2524void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
2525                              uintptr_t jmp_rx, uintptr_t jmp_rw)
2526{
2527    uintptr_t addr = tb->jmp_target_addr[n];
2528    intptr_t diff = addr - jmp_rx;
2529    tcg_insn_unit insn;
2530
2531    if (in_range_b(diff)) {
2532        insn = B | (diff & 0x3fffffc);
2533    } else if (USE_REG_TB) {
2534        insn = MTSPR | RS(TCG_REG_TB) | CTR;
2535    } else {
2536        insn = NOP;
2537    }
2538
2539    qatomic_set((uint32_t *)jmp_rw, insn);
2540    flush_idcache_range(jmp_rx, jmp_rw, 4);
2541}
2542
2543static void tcg_out_op(TCGContext *s, TCGOpcode opc,
2544                       const TCGArg args[TCG_MAX_OP_ARGS],
2545                       const int const_args[TCG_MAX_OP_ARGS])
2546{
2547    TCGArg a0, a1, a2;
2548
2549    switch (opc) {
2550    case INDEX_op_goto_ptr:
2551        tcg_out32(s, MTSPR | RS(args[0]) | CTR);
2552        if (USE_REG_TB) {
2553            tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]);
2554        }
2555        tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
2556        tcg_out32(s, BCCTR | BO_ALWAYS);
2557        break;
2558    case INDEX_op_br:
2559        {
2560            TCGLabel *l = arg_label(args[0]);
2561            uint32_t insn = B;
2562
2563            if (l->has_value) {
2564                insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr),
2565                                       l->u.value_ptr);
2566            } else {
2567                tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
2568            }
2569            tcg_out32(s, insn);
2570        }
2571        break;
2572    case INDEX_op_ld8u_i32:
2573    case INDEX_op_ld8u_i64:
2574        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2575        break;
2576    case INDEX_op_ld8s_i32:
2577    case INDEX_op_ld8s_i64:
2578        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2579        tcg_out_ext8s(s, TCG_TYPE_REG, args[0], args[0]);
2580        break;
2581    case INDEX_op_ld16u_i32:
2582    case INDEX_op_ld16u_i64:
2583        tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
2584        break;
2585    case INDEX_op_ld16s_i32:
2586    case INDEX_op_ld16s_i64:
2587        tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
2588        break;
2589    case INDEX_op_ld_i32:
2590    case INDEX_op_ld32u_i64:
2591        tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
2592        break;
2593    case INDEX_op_ld32s_i64:
2594        tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
2595        break;
2596    case INDEX_op_ld_i64:
2597        tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
2598        break;
2599    case INDEX_op_st8_i32:
2600    case INDEX_op_st8_i64:
2601        tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
2602        break;
2603    case INDEX_op_st16_i32:
2604    case INDEX_op_st16_i64:
2605        tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
2606        break;
2607    case INDEX_op_st_i32:
2608    case INDEX_op_st32_i64:
2609        tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
2610        break;
2611    case INDEX_op_st_i64:
2612        tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
2613        break;
2614
2615    case INDEX_op_add_i32:
2616        a0 = args[0], a1 = args[1], a2 = args[2];
2617        if (const_args[2]) {
2618        do_addi_32:
2619            tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
2620        } else {
2621            tcg_out32(s, ADD | TAB(a0, a1, a2));
2622        }
2623        break;
2624    case INDEX_op_sub_i32:
2625        a0 = args[0], a1 = args[1], a2 = args[2];
2626        if (const_args[1]) {
2627            if (const_args[2]) {
2628                tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2);
2629            } else {
2630                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2631            }
2632        } else if (const_args[2]) {
2633            a2 = -a2;
2634            goto do_addi_32;
2635        } else {
2636            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2637        }
2638        break;
2639
2640    case INDEX_op_and_i32:
2641        a0 = args[0], a1 = args[1], a2 = args[2];
2642        if (const_args[2]) {
2643            tcg_out_andi32(s, a0, a1, a2);
2644        } else {
2645            tcg_out32(s, AND | SAB(a1, a0, a2));
2646        }
2647        break;
2648    case INDEX_op_and_i64:
2649        a0 = args[0], a1 = args[1], a2 = args[2];
2650        if (const_args[2]) {
2651            tcg_out_andi64(s, a0, a1, a2);
2652        } else {
2653            tcg_out32(s, AND | SAB(a1, a0, a2));
2654        }
2655        break;
2656    case INDEX_op_or_i64:
2657    case INDEX_op_or_i32:
2658        a0 = args[0], a1 = args[1], a2 = args[2];
2659        if (const_args[2]) {
2660            tcg_out_ori32(s, a0, a1, a2);
2661        } else {
2662            tcg_out32(s, OR | SAB(a1, a0, a2));
2663        }
2664        break;
2665    case INDEX_op_xor_i64:
2666    case INDEX_op_xor_i32:
2667        a0 = args[0], a1 = args[1], a2 = args[2];
2668        if (const_args[2]) {
2669            tcg_out_xori32(s, a0, a1, a2);
2670        } else {
2671            tcg_out32(s, XOR | SAB(a1, a0, a2));
2672        }
2673        break;
2674    case INDEX_op_andc_i32:
2675        a0 = args[0], a1 = args[1], a2 = args[2];
2676        if (const_args[2]) {
2677            tcg_out_andi32(s, a0, a1, ~a2);
2678        } else {
2679            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2680        }
2681        break;
2682    case INDEX_op_andc_i64:
2683        a0 = args[0], a1 = args[1], a2 = args[2];
2684        if (const_args[2]) {
2685            tcg_out_andi64(s, a0, a1, ~a2);
2686        } else {
2687            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2688        }
2689        break;
2690    case INDEX_op_orc_i32:
2691        if (const_args[2]) {
2692            tcg_out_ori32(s, args[0], args[1], ~args[2]);
2693            break;
2694        }
2695        /* FALLTHRU */
2696    case INDEX_op_orc_i64:
2697        tcg_out32(s, ORC | SAB(args[1], args[0], args[2]));
2698        break;
2699    case INDEX_op_eqv_i32:
2700        if (const_args[2]) {
2701            tcg_out_xori32(s, args[0], args[1], ~args[2]);
2702            break;
2703        }
2704        /* FALLTHRU */
2705    case INDEX_op_eqv_i64:
2706        tcg_out32(s, EQV | SAB(args[1], args[0], args[2]));
2707        break;
2708    case INDEX_op_nand_i32:
2709    case INDEX_op_nand_i64:
2710        tcg_out32(s, NAND | SAB(args[1], args[0], args[2]));
2711        break;
2712    case INDEX_op_nor_i32:
2713    case INDEX_op_nor_i64:
2714        tcg_out32(s, NOR | SAB(args[1], args[0], args[2]));
2715        break;
2716
2717    case INDEX_op_clz_i32:
2718        tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1],
2719                      args[2], const_args[2]);
2720        break;
2721    case INDEX_op_ctz_i32:
2722        tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1],
2723                      args[2], const_args[2]);
2724        break;
2725    case INDEX_op_ctpop_i32:
2726        tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0));
2727        break;
2728
2729    case INDEX_op_clz_i64:
2730        tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1],
2731                      args[2], const_args[2]);
2732        break;
2733    case INDEX_op_ctz_i64:
2734        tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1],
2735                      args[2], const_args[2]);
2736        break;
2737    case INDEX_op_ctpop_i64:
2738        tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0));
2739        break;
2740
2741    case INDEX_op_mul_i32:
2742        a0 = args[0], a1 = args[1], a2 = args[2];
2743        if (const_args[2]) {
2744            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2745        } else {
2746            tcg_out32(s, MULLW | TAB(a0, a1, a2));
2747        }
2748        break;
2749
2750    case INDEX_op_div_i32:
2751        tcg_out32(s, DIVW | TAB(args[0], args[1], args[2]));
2752        break;
2753
2754    case INDEX_op_divu_i32:
2755        tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2]));
2756        break;
2757
2758    case INDEX_op_rem_i32:
2759        tcg_out32(s, MODSW | TAB(args[0], args[1], args[2]));
2760        break;
2761
2762    case INDEX_op_remu_i32:
2763        tcg_out32(s, MODUW | TAB(args[0], args[1], args[2]));
2764        break;
2765
2766    case INDEX_op_shl_i32:
2767        if (const_args[2]) {
2768            /* Limit immediate shift count lest we create an illegal insn.  */
2769            tcg_out_shli32(s, args[0], args[1], args[2] & 31);
2770        } else {
2771            tcg_out32(s, SLW | SAB(args[1], args[0], args[2]));
2772        }
2773        break;
2774    case INDEX_op_shr_i32:
2775        if (const_args[2]) {
2776            /* Limit immediate shift count lest we create an illegal insn.  */
2777            tcg_out_shri32(s, args[0], args[1], args[2] & 31);
2778        } else {
2779            tcg_out32(s, SRW | SAB(args[1], args[0], args[2]));
2780        }
2781        break;
2782    case INDEX_op_sar_i32:
2783        if (const_args[2]) {
2784            tcg_out_sari32(s, args[0], args[1], args[2]);
2785        } else {
2786            tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
2787        }
2788        break;
2789    case INDEX_op_rotl_i32:
2790        if (const_args[2]) {
2791            tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31);
2792        } else {
2793            tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
2794                         | MB(0) | ME(31));
2795        }
2796        break;
2797    case INDEX_op_rotr_i32:
2798        if (const_args[2]) {
2799            tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
2800        } else {
2801            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32));
2802            tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0)
2803                         | MB(0) | ME(31));
2804        }
2805        break;
2806
2807    case INDEX_op_brcond_i32:
2808        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2809                       arg_label(args[3]), TCG_TYPE_I32);
2810        break;
2811    case INDEX_op_brcond_i64:
2812        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2813                       arg_label(args[3]), TCG_TYPE_I64);
2814        break;
2815    case INDEX_op_brcond2_i32:
2816        tcg_out_brcond2(s, args, const_args);
2817        break;
2818
2819    case INDEX_op_neg_i32:
2820    case INDEX_op_neg_i64:
2821        tcg_out32(s, NEG | RT(args[0]) | RA(args[1]));
2822        break;
2823
2824    case INDEX_op_not_i32:
2825    case INDEX_op_not_i64:
2826        tcg_out32(s, NOR | SAB(args[1], args[0], args[1]));
2827        break;
2828
2829    case INDEX_op_add_i64:
2830        a0 = args[0], a1 = args[1], a2 = args[2];
2831        if (const_args[2]) {
2832        do_addi_64:
2833            tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
2834        } else {
2835            tcg_out32(s, ADD | TAB(a0, a1, a2));
2836        }
2837        break;
2838    case INDEX_op_sub_i64:
2839        a0 = args[0], a1 = args[1], a2 = args[2];
2840        if (const_args[1]) {
2841            if (const_args[2]) {
2842                tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2);
2843            } else {
2844                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2845            }
2846        } else if (const_args[2]) {
2847            a2 = -a2;
2848            goto do_addi_64;
2849        } else {
2850            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2851        }
2852        break;
2853
2854    case INDEX_op_shl_i64:
2855        if (const_args[2]) {
2856            /* Limit immediate shift count lest we create an illegal insn.  */
2857            tcg_out_shli64(s, args[0], args[1], args[2] & 63);
2858        } else {
2859            tcg_out32(s, SLD | SAB(args[1], args[0], args[2]));
2860        }
2861        break;
2862    case INDEX_op_shr_i64:
2863        if (const_args[2]) {
2864            /* Limit immediate shift count lest we create an illegal insn.  */
2865            tcg_out_shri64(s, args[0], args[1], args[2] & 63);
2866        } else {
2867            tcg_out32(s, SRD | SAB(args[1], args[0], args[2]));
2868        }
2869        break;
2870    case INDEX_op_sar_i64:
2871        if (const_args[2]) {
2872            tcg_out_sari64(s, args[0], args[1], args[2]);
2873        } else {
2874            tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
2875        }
2876        break;
2877    case INDEX_op_rotl_i64:
2878        if (const_args[2]) {
2879            tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0);
2880        } else {
2881            tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0));
2882        }
2883        break;
2884    case INDEX_op_rotr_i64:
2885        if (const_args[2]) {
2886            tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0);
2887        } else {
2888            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64));
2889            tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0));
2890        }
2891        break;
2892
2893    case INDEX_op_mul_i64:
2894        a0 = args[0], a1 = args[1], a2 = args[2];
2895        if (const_args[2]) {
2896            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2897        } else {
2898            tcg_out32(s, MULLD | TAB(a0, a1, a2));
2899        }
2900        break;
2901    case INDEX_op_div_i64:
2902        tcg_out32(s, DIVD | TAB(args[0], args[1], args[2]));
2903        break;
2904    case INDEX_op_divu_i64:
2905        tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
2906        break;
2907    case INDEX_op_rem_i64:
2908        tcg_out32(s, MODSD | TAB(args[0], args[1], args[2]));
2909        break;
2910    case INDEX_op_remu_i64:
2911        tcg_out32(s, MODUD | TAB(args[0], args[1], args[2]));
2912        break;
2913
2914    case INDEX_op_qemu_ld_a64_i32:
2915        if (TCG_TARGET_REG_BITS == 32) {
2916            tcg_out_qemu_ld(s, args[0], -1, args[1], args[2],
2917                            args[3], TCG_TYPE_I32);
2918            break;
2919        }
2920        /* fall through */
2921    case INDEX_op_qemu_ld_a32_i32:
2922        tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
2923        break;
2924    case INDEX_op_qemu_ld_a32_i64:
2925        if (TCG_TARGET_REG_BITS == 64) {
2926            tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
2927                            args[2], TCG_TYPE_I64);
2928        } else {
2929            tcg_out_qemu_ld(s, args[0], args[1], args[2], -1,
2930                            args[3], TCG_TYPE_I64);
2931        }
2932        break;
2933    case INDEX_op_qemu_ld_a64_i64:
2934        if (TCG_TARGET_REG_BITS == 64) {
2935            tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
2936                            args[2], TCG_TYPE_I64);
2937        } else {
2938            tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3],
2939                            args[4], TCG_TYPE_I64);
2940        }
2941        break;
2942    case INDEX_op_qemu_ld_a32_i128:
2943    case INDEX_op_qemu_ld_a64_i128:
2944        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
2945        tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
2946        break;
2947
2948    case INDEX_op_qemu_st_a64_i32:
2949        if (TCG_TARGET_REG_BITS == 32) {
2950            tcg_out_qemu_st(s, args[0], -1, args[1], args[2],
2951                            args[3], TCG_TYPE_I32);
2952            break;
2953        }
2954        /* fall through */
2955    case INDEX_op_qemu_st_a32_i32:
2956        tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
2957        break;
2958    case INDEX_op_qemu_st_a32_i64:
2959        if (TCG_TARGET_REG_BITS == 64) {
2960            tcg_out_qemu_st(s, args[0], -1, args[1], -1,
2961                            args[2], TCG_TYPE_I64);
2962        } else {
2963            tcg_out_qemu_st(s, args[0], args[1], args[2], -1,
2964                            args[3], TCG_TYPE_I64);
2965        }
2966        break;
2967    case INDEX_op_qemu_st_a64_i64:
2968        if (TCG_TARGET_REG_BITS == 64) {
2969            tcg_out_qemu_st(s, args[0], -1, args[1], -1,
2970                            args[2], TCG_TYPE_I64);
2971        } else {
2972            tcg_out_qemu_st(s, args[0], args[1], args[2], args[3],
2973                            args[4], TCG_TYPE_I64);
2974        }
2975        break;
2976    case INDEX_op_qemu_st_a32_i128:
2977    case INDEX_op_qemu_st_a64_i128:
2978        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
2979        tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
2980        break;
2981
2982    case INDEX_op_setcond_i32:
2983        tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
2984                        const_args[2]);
2985        break;
2986    case INDEX_op_setcond_i64:
2987        tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
2988                        const_args[2]);
2989        break;
2990    case INDEX_op_setcond2_i32:
2991        tcg_out_setcond2(s, args, const_args);
2992        break;
2993
2994    case INDEX_op_bswap16_i32:
2995    case INDEX_op_bswap16_i64:
2996        tcg_out_bswap16(s, args[0], args[1], args[2]);
2997        break;
2998    case INDEX_op_bswap32_i32:
2999        tcg_out_bswap32(s, args[0], args[1], 0);
3000        break;
3001    case INDEX_op_bswap32_i64:
3002        tcg_out_bswap32(s, args[0], args[1], args[2]);
3003        break;
3004    case INDEX_op_bswap64_i64:
3005        tcg_out_bswap64(s, args[0], args[1]);
3006        break;
3007
3008    case INDEX_op_deposit_i32:
3009        if (const_args[2]) {
3010            uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
3011            tcg_out_andi32(s, args[0], args[0], ~mask);
3012        } else {
3013            tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
3014                        32 - args[3] - args[4], 31 - args[3]);
3015        }
3016        break;
3017    case INDEX_op_deposit_i64:
3018        if (const_args[2]) {
3019            uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
3020            tcg_out_andi64(s, args[0], args[0], ~mask);
3021        } else {
3022            tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
3023                        64 - args[3] - args[4]);
3024        }
3025        break;
3026
3027    case INDEX_op_extract_i32:
3028        tcg_out_rlw(s, RLWINM, args[0], args[1],
3029                    32 - args[2], 32 - args[3], 31);
3030        break;
3031    case INDEX_op_extract_i64:
3032        tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
3033        break;
3034
3035    case INDEX_op_movcond_i32:
3036        tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
3037                        args[3], args[4], const_args[2]);
3038        break;
3039    case INDEX_op_movcond_i64:
3040        tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2],
3041                        args[3], args[4], const_args[2]);
3042        break;
3043
3044#if TCG_TARGET_REG_BITS == 64
3045    case INDEX_op_add2_i64:
3046#else
3047    case INDEX_op_add2_i32:
3048#endif
3049        /* Note that the CA bit is defined based on the word size of the
3050           environment.  So in 64-bit mode it's always carry-out of bit 63.
3051           The fallback code using deposit works just as well for 32-bit.  */
3052        a0 = args[0], a1 = args[1];
3053        if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
3054            a0 = TCG_REG_R0;
3055        }
3056        if (const_args[4]) {
3057            tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
3058        } else {
3059            tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
3060        }
3061        if (const_args[5]) {
3062            tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
3063        } else {
3064            tcg_out32(s, ADDE | TAB(a1, args[3], args[5]));
3065        }
3066        if (a0 != args[0]) {
3067            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3068        }
3069        break;
3070
3071#if TCG_TARGET_REG_BITS == 64
3072    case INDEX_op_sub2_i64:
3073#else
3074    case INDEX_op_sub2_i32:
3075#endif
3076        a0 = args[0], a1 = args[1];
3077        if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
3078            a0 = TCG_REG_R0;
3079        }
3080        if (const_args[2]) {
3081            tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
3082        } else {
3083            tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
3084        }
3085        if (const_args[3]) {
3086            tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
3087        } else {
3088            tcg_out32(s, SUBFE | TAB(a1, args[5], args[3]));
3089        }
3090        if (a0 != args[0]) {
3091            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3092        }
3093        break;
3094
3095    case INDEX_op_muluh_i32:
3096        tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2]));
3097        break;
3098    case INDEX_op_mulsh_i32:
3099        tcg_out32(s, MULHW | TAB(args[0], args[1], args[2]));
3100        break;
3101    case INDEX_op_muluh_i64:
3102        tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2]));
3103        break;
3104    case INDEX_op_mulsh_i64:
3105        tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
3106        break;
3107
3108    case INDEX_op_mb:
3109        tcg_out_mb(s, args[0]);
3110        break;
3111
3112    case INDEX_op_mov_i32:   /* Always emitted via tcg_out_mov.  */
3113    case INDEX_op_mov_i64:
3114    case INDEX_op_call:      /* Always emitted via tcg_out_call.  */
3115    case INDEX_op_exit_tb:   /* Always emitted via tcg_out_exit_tb.  */
3116    case INDEX_op_goto_tb:   /* Always emitted via tcg_out_goto_tb.  */
3117    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
3118    case INDEX_op_ext8s_i64:
3119    case INDEX_op_ext8u_i32:
3120    case INDEX_op_ext8u_i64:
3121    case INDEX_op_ext16s_i32:
3122    case INDEX_op_ext16s_i64:
3123    case INDEX_op_ext16u_i32:
3124    case INDEX_op_ext16u_i64:
3125    case INDEX_op_ext32s_i64:
3126    case INDEX_op_ext32u_i64:
3127    case INDEX_op_ext_i32_i64:
3128    case INDEX_op_extu_i32_i64:
3129    case INDEX_op_extrl_i64_i32:
3130    default:
3131        g_assert_not_reached();
3132    }
3133}
3134
3135int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3136{
3137    switch (opc) {
3138    case INDEX_op_and_vec:
3139    case INDEX_op_or_vec:
3140    case INDEX_op_xor_vec:
3141    case INDEX_op_andc_vec:
3142    case INDEX_op_not_vec:
3143    case INDEX_op_nor_vec:
3144    case INDEX_op_eqv_vec:
3145    case INDEX_op_nand_vec:
3146        return 1;
3147    case INDEX_op_orc_vec:
3148        return have_isa_2_07;
3149    case INDEX_op_add_vec:
3150    case INDEX_op_sub_vec:
3151    case INDEX_op_smax_vec:
3152    case INDEX_op_smin_vec:
3153    case INDEX_op_umax_vec:
3154    case INDEX_op_umin_vec:
3155    case INDEX_op_shlv_vec:
3156    case INDEX_op_shrv_vec:
3157    case INDEX_op_sarv_vec:
3158    case INDEX_op_rotlv_vec:
3159        return vece <= MO_32 || have_isa_2_07;
3160    case INDEX_op_ssadd_vec:
3161    case INDEX_op_sssub_vec:
3162    case INDEX_op_usadd_vec:
3163    case INDEX_op_ussub_vec:
3164        return vece <= MO_32;
3165    case INDEX_op_cmp_vec:
3166    case INDEX_op_shli_vec:
3167    case INDEX_op_shri_vec:
3168    case INDEX_op_sari_vec:
3169    case INDEX_op_rotli_vec:
3170        return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
3171    case INDEX_op_neg_vec:
3172        return vece >= MO_32 && have_isa_3_00;
3173    case INDEX_op_mul_vec:
3174        switch (vece) {
3175        case MO_8:
3176        case MO_16:
3177            return -1;
3178        case MO_32:
3179            return have_isa_2_07 ? 1 : -1;
3180        case MO_64:
3181            return have_isa_3_10;
3182        }
3183        return 0;
3184    case INDEX_op_bitsel_vec:
3185        return have_vsx;
3186    case INDEX_op_rotrv_vec:
3187        return -1;
3188    default:
3189        return 0;
3190    }
3191}
3192
3193static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
3194                            TCGReg dst, TCGReg src)
3195{
3196    tcg_debug_assert(dst >= TCG_REG_V0);
3197
3198    /* Splat from integer reg allowed via constraints for v3.00.  */
3199    if (src < TCG_REG_V0) {
3200        tcg_debug_assert(have_isa_3_00);
3201        switch (vece) {
3202        case MO_64:
3203            tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
3204            return true;
3205        case MO_32:
3206            tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
3207            return true;
3208        default:
3209            /* Fail, so that we fall back on either dupm or mov+dup.  */
3210            return false;
3211        }
3212    }
3213
3214    /*
3215     * Recall we use (or emulate) VSX integer loads, so the integer is
3216     * right justified within the left (zero-index) double-word.
3217     */
3218    switch (vece) {
3219    case MO_8:
3220        tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
3221        break;
3222    case MO_16:
3223        tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
3224        break;
3225    case MO_32:
3226        tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
3227        break;
3228    case MO_64:
3229        if (have_vsx) {
3230            tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
3231            break;
3232        }
3233        tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
3234        tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
3235        break;
3236    default:
3237        g_assert_not_reached();
3238    }
3239    return true;
3240}
3241
3242static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
3243                             TCGReg out, TCGReg base, intptr_t offset)
3244{
3245    int elt;
3246
3247    tcg_debug_assert(out >= TCG_REG_V0);
3248    switch (vece) {
3249    case MO_8:
3250        if (have_isa_3_00) {
3251            tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
3252        } else {
3253            tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
3254        }
3255        elt = extract32(offset, 0, 4);
3256#if !HOST_BIG_ENDIAN
3257        elt ^= 15;
3258#endif
3259        tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
3260        break;
3261    case MO_16:
3262        tcg_debug_assert((offset & 1) == 0);
3263        if (have_isa_3_00) {
3264            tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
3265        } else {
3266            tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
3267        }
3268        elt = extract32(offset, 1, 3);
3269#if !HOST_BIG_ENDIAN
3270        elt ^= 7;
3271#endif
3272        tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
3273        break;
3274    case MO_32:
3275        if (have_isa_3_00) {
3276            tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
3277            break;
3278        }
3279        tcg_debug_assert((offset & 3) == 0);
3280        tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
3281        elt = extract32(offset, 2, 2);
3282#if !HOST_BIG_ENDIAN
3283        elt ^= 3;
3284#endif
3285        tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
3286        break;
3287    case MO_64:
3288        if (have_vsx) {
3289            tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
3290            break;
3291        }
3292        tcg_debug_assert((offset & 7) == 0);
3293        tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
3294        tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
3295        elt = extract32(offset, 3, 1);
3296#if !HOST_BIG_ENDIAN
3297        elt = !elt;
3298#endif
3299        if (elt) {
3300            tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
3301        } else {
3302            tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
3303        }
3304        break;
3305    default:
3306        g_assert_not_reached();
3307    }
3308    return true;
3309}
3310
3311static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
3312                           unsigned vecl, unsigned vece,
3313                           const TCGArg args[TCG_MAX_OP_ARGS],
3314                           const int const_args[TCG_MAX_OP_ARGS])
3315{
3316    static const uint32_t
3317        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
3318        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
3319        mul_op[4] = { 0, 0, VMULUWM, VMULLD },
3320        neg_op[4] = { 0, 0, VNEGW, VNEGD },
3321        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
3322        ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
3323        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
3324        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
3325        ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
3326        usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
3327        sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
3328        ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
3329        umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
3330        smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
3331        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
3332        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
3333        shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
3334        shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
3335        sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
3336        mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
3337        mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
3338        muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
3339        mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
3340        pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
3341        rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
3342
3343    TCGType type = vecl + TCG_TYPE_V64;
3344    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
3345    uint32_t insn;
3346
3347    switch (opc) {
3348    case INDEX_op_ld_vec:
3349        tcg_out_ld(s, type, a0, a1, a2);
3350        return;
3351    case INDEX_op_st_vec:
3352        tcg_out_st(s, type, a0, a1, a2);
3353        return;
3354    case INDEX_op_dupm_vec:
3355        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
3356        return;
3357
3358    case INDEX_op_add_vec:
3359        insn = add_op[vece];
3360        break;
3361    case INDEX_op_sub_vec:
3362        insn = sub_op[vece];
3363        break;
3364    case INDEX_op_neg_vec:
3365        insn = neg_op[vece];
3366        a2 = a1;
3367        a1 = 0;
3368        break;
3369    case INDEX_op_mul_vec:
3370        insn = mul_op[vece];
3371        break;
3372    case INDEX_op_ssadd_vec:
3373        insn = ssadd_op[vece];
3374        break;
3375    case INDEX_op_sssub_vec:
3376        insn = sssub_op[vece];
3377        break;
3378    case INDEX_op_usadd_vec:
3379        insn = usadd_op[vece];
3380        break;
3381    case INDEX_op_ussub_vec:
3382        insn = ussub_op[vece];
3383        break;
3384    case INDEX_op_smin_vec:
3385        insn = smin_op[vece];
3386        break;
3387    case INDEX_op_umin_vec:
3388        insn = umin_op[vece];
3389        break;
3390    case INDEX_op_smax_vec:
3391        insn = smax_op[vece];
3392        break;
3393    case INDEX_op_umax_vec:
3394        insn = umax_op[vece];
3395        break;
3396    case INDEX_op_shlv_vec:
3397        insn = shlv_op[vece];
3398        break;
3399    case INDEX_op_shrv_vec:
3400        insn = shrv_op[vece];
3401        break;
3402    case INDEX_op_sarv_vec:
3403        insn = sarv_op[vece];
3404        break;
3405    case INDEX_op_and_vec:
3406        insn = VAND;
3407        break;
3408    case INDEX_op_or_vec:
3409        insn = VOR;
3410        break;
3411    case INDEX_op_xor_vec:
3412        insn = VXOR;
3413        break;
3414    case INDEX_op_andc_vec:
3415        insn = VANDC;
3416        break;
3417    case INDEX_op_not_vec:
3418        insn = VNOR;
3419        a2 = a1;
3420        break;
3421    case INDEX_op_orc_vec:
3422        insn = VORC;
3423        break;
3424    case INDEX_op_nand_vec:
3425        insn = VNAND;
3426        break;
3427    case INDEX_op_nor_vec:
3428        insn = VNOR;
3429        break;
3430    case INDEX_op_eqv_vec:
3431        insn = VEQV;
3432        break;
3433
3434    case INDEX_op_cmp_vec:
3435        switch (args[3]) {
3436        case TCG_COND_EQ:
3437            insn = eq_op[vece];
3438            break;
3439        case TCG_COND_NE:
3440            insn = ne_op[vece];
3441            break;
3442        case TCG_COND_GT:
3443            insn = gts_op[vece];
3444            break;
3445        case TCG_COND_GTU:
3446            insn = gtu_op[vece];
3447            break;
3448        default:
3449            g_assert_not_reached();
3450        }
3451        break;
3452
3453    case INDEX_op_bitsel_vec:
3454        tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
3455        return;
3456
3457    case INDEX_op_dup2_vec:
3458        assert(TCG_TARGET_REG_BITS == 32);
3459        /* With inputs a1 = xLxx, a2 = xHxx  */
3460        tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1));  /* a0  = xxHL */
3461        tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8);          /* tmp = HLxx */
3462        tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8);          /* a0  = HLHL */
3463        return;
3464
3465    case INDEX_op_ppc_mrgh_vec:
3466        insn = mrgh_op[vece];
3467        break;
3468    case INDEX_op_ppc_mrgl_vec:
3469        insn = mrgl_op[vece];
3470        break;
3471    case INDEX_op_ppc_muleu_vec:
3472        insn = muleu_op[vece];
3473        break;
3474    case INDEX_op_ppc_mulou_vec:
3475        insn = mulou_op[vece];
3476        break;
3477    case INDEX_op_ppc_pkum_vec:
3478        insn = pkum_op[vece];
3479        break;
3480    case INDEX_op_rotlv_vec:
3481        insn = rotl_op[vece];
3482        break;
3483    case INDEX_op_ppc_msum_vec:
3484        tcg_debug_assert(vece == MO_16);
3485        tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
3486        return;
3487
3488    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
3489    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
3490    default:
3491        g_assert_not_reached();
3492    }
3493
3494    tcg_debug_assert(insn != 0);
3495    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
3496}
3497
3498static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
3499                           TCGv_vec v1, TCGArg imm, TCGOpcode opci)
3500{
3501    TCGv_vec t1;
3502
3503    if (vece == MO_32) {
3504        /*
3505         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
3506         * So using negative numbers gets us the 4th bit easily.
3507         */
3508        imm = sextract32(imm, 0, 5);
3509    } else {
3510        imm &= (8 << vece) - 1;
3511    }
3512
3513    /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */
3514    t1 = tcg_constant_vec(type, MO_8, imm);
3515    vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
3516              tcgv_vec_arg(v1), tcgv_vec_arg(t1));
3517}
3518
3519static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
3520                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
3521{
3522    bool need_swap = false, need_inv = false;
3523
3524    tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
3525
3526    switch (cond) {
3527    case TCG_COND_EQ:
3528    case TCG_COND_GT:
3529    case TCG_COND_GTU:
3530        break;
3531    case TCG_COND_NE:
3532        if (have_isa_3_00 && vece <= MO_32) {
3533            break;
3534        }
3535        /* fall through */
3536    case TCG_COND_LE:
3537    case TCG_COND_LEU:
3538        need_inv = true;
3539        break;
3540    case TCG_COND_LT:
3541    case TCG_COND_LTU:
3542        need_swap = true;
3543        break;
3544    case TCG_COND_GE:
3545    case TCG_COND_GEU:
3546        need_swap = need_inv = true;
3547        break;
3548    default:
3549        g_assert_not_reached();
3550    }
3551
3552    if (need_inv) {
3553        cond = tcg_invert_cond(cond);
3554    }
3555    if (need_swap) {
3556        TCGv_vec t1;
3557        t1 = v1, v1 = v2, v2 = t1;
3558        cond = tcg_swap_cond(cond);
3559    }
3560
3561    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
3562              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
3563
3564    if (need_inv) {
3565        tcg_gen_not_vec(vece, v0, v0);
3566    }
3567}
3568
3569static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
3570                           TCGv_vec v1, TCGv_vec v2)
3571{
3572    TCGv_vec t1 = tcg_temp_new_vec(type);
3573    TCGv_vec t2 = tcg_temp_new_vec(type);
3574    TCGv_vec c0, c16;
3575
3576    switch (vece) {
3577    case MO_8:
3578    case MO_16:
3579        vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
3580                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3581        vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
3582                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3583        vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
3584                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3585        vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
3586                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3587        vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
3588                  tcgv_vec_arg(v0), tcgv_vec_arg(t1));
3589	break;
3590
3591    case MO_32:
3592        tcg_debug_assert(!have_isa_2_07);
3593        /*
3594         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
3595         * So using -16 is a quick way to represent 16.
3596         */
3597        c16 = tcg_constant_vec(type, MO_8, -16);
3598        c0 = tcg_constant_vec(type, MO_8, 0);
3599
3600        vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
3601                  tcgv_vec_arg(v2), tcgv_vec_arg(c16));
3602        vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
3603                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3604        vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1),
3605                  tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0));
3606        vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1),
3607                  tcgv_vec_arg(t1), tcgv_vec_arg(c16));
3608        tcg_gen_add_vec(MO_32, v0, t1, t2);
3609        break;
3610
3611    default:
3612        g_assert_not_reached();
3613    }
3614    tcg_temp_free_vec(t1);
3615    tcg_temp_free_vec(t2);
3616}
3617
3618void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3619                       TCGArg a0, ...)
3620{
3621    va_list va;
3622    TCGv_vec v0, v1, v2, t0;
3623    TCGArg a2;
3624
3625    va_start(va, a0);
3626    v0 = temp_tcgv_vec(arg_temp(a0));
3627    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3628    a2 = va_arg(va, TCGArg);
3629
3630    switch (opc) {
3631    case INDEX_op_shli_vec:
3632        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
3633        break;
3634    case INDEX_op_shri_vec:
3635        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
3636        break;
3637    case INDEX_op_sari_vec:
3638        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
3639        break;
3640    case INDEX_op_rotli_vec:
3641        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
3642        break;
3643    case INDEX_op_cmp_vec:
3644        v2 = temp_tcgv_vec(arg_temp(a2));
3645        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3646        break;
3647    case INDEX_op_mul_vec:
3648        v2 = temp_tcgv_vec(arg_temp(a2));
3649        expand_vec_mul(type, vece, v0, v1, v2);
3650        break;
3651    case INDEX_op_rotlv_vec:
3652        v2 = temp_tcgv_vec(arg_temp(a2));
3653        t0 = tcg_temp_new_vec(type);
3654        tcg_gen_neg_vec(vece, t0, v2);
3655        tcg_gen_rotlv_vec(vece, v0, v1, t0);
3656        tcg_temp_free_vec(t0);
3657        break;
3658    default:
3659        g_assert_not_reached();
3660    }
3661    va_end(va);
3662}
3663
3664static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
3665{
3666    switch (op) {
3667    case INDEX_op_goto_ptr:
3668        return C_O0_I1(r);
3669
3670    case INDEX_op_ld8u_i32:
3671    case INDEX_op_ld8s_i32:
3672    case INDEX_op_ld16u_i32:
3673    case INDEX_op_ld16s_i32:
3674    case INDEX_op_ld_i32:
3675    case INDEX_op_ctpop_i32:
3676    case INDEX_op_neg_i32:
3677    case INDEX_op_not_i32:
3678    case INDEX_op_ext8s_i32:
3679    case INDEX_op_ext16s_i32:
3680    case INDEX_op_bswap16_i32:
3681    case INDEX_op_bswap32_i32:
3682    case INDEX_op_extract_i32:
3683    case INDEX_op_ld8u_i64:
3684    case INDEX_op_ld8s_i64:
3685    case INDEX_op_ld16u_i64:
3686    case INDEX_op_ld16s_i64:
3687    case INDEX_op_ld32u_i64:
3688    case INDEX_op_ld32s_i64:
3689    case INDEX_op_ld_i64:
3690    case INDEX_op_ctpop_i64:
3691    case INDEX_op_neg_i64:
3692    case INDEX_op_not_i64:
3693    case INDEX_op_ext8s_i64:
3694    case INDEX_op_ext16s_i64:
3695    case INDEX_op_ext32s_i64:
3696    case INDEX_op_ext_i32_i64:
3697    case INDEX_op_extu_i32_i64:
3698    case INDEX_op_bswap16_i64:
3699    case INDEX_op_bswap32_i64:
3700    case INDEX_op_bswap64_i64:
3701    case INDEX_op_extract_i64:
3702        return C_O1_I1(r, r);
3703
3704    case INDEX_op_st8_i32:
3705    case INDEX_op_st16_i32:
3706    case INDEX_op_st_i32:
3707    case INDEX_op_st8_i64:
3708    case INDEX_op_st16_i64:
3709    case INDEX_op_st32_i64:
3710    case INDEX_op_st_i64:
3711        return C_O0_I2(r, r);
3712
3713    case INDEX_op_add_i32:
3714    case INDEX_op_and_i32:
3715    case INDEX_op_or_i32:
3716    case INDEX_op_xor_i32:
3717    case INDEX_op_andc_i32:
3718    case INDEX_op_orc_i32:
3719    case INDEX_op_eqv_i32:
3720    case INDEX_op_shl_i32:
3721    case INDEX_op_shr_i32:
3722    case INDEX_op_sar_i32:
3723    case INDEX_op_rotl_i32:
3724    case INDEX_op_rotr_i32:
3725    case INDEX_op_setcond_i32:
3726    case INDEX_op_and_i64:
3727    case INDEX_op_andc_i64:
3728    case INDEX_op_shl_i64:
3729    case INDEX_op_shr_i64:
3730    case INDEX_op_sar_i64:
3731    case INDEX_op_rotl_i64:
3732    case INDEX_op_rotr_i64:
3733    case INDEX_op_setcond_i64:
3734        return C_O1_I2(r, r, ri);
3735
3736    case INDEX_op_mul_i32:
3737    case INDEX_op_mul_i64:
3738        return C_O1_I2(r, r, rI);
3739
3740    case INDEX_op_div_i32:
3741    case INDEX_op_divu_i32:
3742    case INDEX_op_rem_i32:
3743    case INDEX_op_remu_i32:
3744    case INDEX_op_nand_i32:
3745    case INDEX_op_nor_i32:
3746    case INDEX_op_muluh_i32:
3747    case INDEX_op_mulsh_i32:
3748    case INDEX_op_orc_i64:
3749    case INDEX_op_eqv_i64:
3750    case INDEX_op_nand_i64:
3751    case INDEX_op_nor_i64:
3752    case INDEX_op_div_i64:
3753    case INDEX_op_divu_i64:
3754    case INDEX_op_rem_i64:
3755    case INDEX_op_remu_i64:
3756    case INDEX_op_mulsh_i64:
3757    case INDEX_op_muluh_i64:
3758        return C_O1_I2(r, r, r);
3759
3760    case INDEX_op_sub_i32:
3761        return C_O1_I2(r, rI, ri);
3762    case INDEX_op_add_i64:
3763        return C_O1_I2(r, r, rT);
3764    case INDEX_op_or_i64:
3765    case INDEX_op_xor_i64:
3766        return C_O1_I2(r, r, rU);
3767    case INDEX_op_sub_i64:
3768        return C_O1_I2(r, rI, rT);
3769    case INDEX_op_clz_i32:
3770    case INDEX_op_ctz_i32:
3771    case INDEX_op_clz_i64:
3772    case INDEX_op_ctz_i64:
3773        return C_O1_I2(r, r, rZW);
3774
3775    case INDEX_op_brcond_i32:
3776    case INDEX_op_brcond_i64:
3777        return C_O0_I2(r, ri);
3778
3779    case INDEX_op_movcond_i32:
3780    case INDEX_op_movcond_i64:
3781        return C_O1_I4(r, r, ri, rZ, rZ);
3782    case INDEX_op_deposit_i32:
3783    case INDEX_op_deposit_i64:
3784        return C_O1_I2(r, 0, rZ);
3785    case INDEX_op_brcond2_i32:
3786        return C_O0_I4(r, r, ri, ri);
3787    case INDEX_op_setcond2_i32:
3788        return C_O1_I4(r, r, r, ri, ri);
3789    case INDEX_op_add2_i64:
3790    case INDEX_op_add2_i32:
3791        return C_O2_I4(r, r, r, r, rI, rZM);
3792    case INDEX_op_sub2_i64:
3793    case INDEX_op_sub2_i32:
3794        return C_O2_I4(r, r, rI, rZM, r, r);
3795
3796    case INDEX_op_qemu_ld_a32_i32:
3797        return C_O1_I1(r, r);
3798    case INDEX_op_qemu_ld_a64_i32:
3799        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r);
3800    case INDEX_op_qemu_ld_a32_i64:
3801        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
3802    case INDEX_op_qemu_ld_a64_i64:
3803        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r);
3804
3805    case INDEX_op_qemu_st_a32_i32:
3806        return C_O0_I2(r, r);
3807    case INDEX_op_qemu_st_a64_i32:
3808        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
3809    case INDEX_op_qemu_st_a32_i64:
3810        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
3811    case INDEX_op_qemu_st_a64_i64:
3812        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
3813
3814    case INDEX_op_qemu_ld_a32_i128:
3815    case INDEX_op_qemu_ld_a64_i128:
3816        return C_O2_I1(o, m, r);
3817    case INDEX_op_qemu_st_a32_i128:
3818    case INDEX_op_qemu_st_a64_i128:
3819        return C_O0_I3(o, m, r);
3820
3821    case INDEX_op_add_vec:
3822    case INDEX_op_sub_vec:
3823    case INDEX_op_mul_vec:
3824    case INDEX_op_and_vec:
3825    case INDEX_op_or_vec:
3826    case INDEX_op_xor_vec:
3827    case INDEX_op_andc_vec:
3828    case INDEX_op_orc_vec:
3829    case INDEX_op_nor_vec:
3830    case INDEX_op_eqv_vec:
3831    case INDEX_op_nand_vec:
3832    case INDEX_op_cmp_vec:
3833    case INDEX_op_ssadd_vec:
3834    case INDEX_op_sssub_vec:
3835    case INDEX_op_usadd_vec:
3836    case INDEX_op_ussub_vec:
3837    case INDEX_op_smax_vec:
3838    case INDEX_op_smin_vec:
3839    case INDEX_op_umax_vec:
3840    case INDEX_op_umin_vec:
3841    case INDEX_op_shlv_vec:
3842    case INDEX_op_shrv_vec:
3843    case INDEX_op_sarv_vec:
3844    case INDEX_op_rotlv_vec:
3845    case INDEX_op_rotrv_vec:
3846    case INDEX_op_ppc_mrgh_vec:
3847    case INDEX_op_ppc_mrgl_vec:
3848    case INDEX_op_ppc_muleu_vec:
3849    case INDEX_op_ppc_mulou_vec:
3850    case INDEX_op_ppc_pkum_vec:
3851    case INDEX_op_dup2_vec:
3852        return C_O1_I2(v, v, v);
3853
3854    case INDEX_op_not_vec:
3855    case INDEX_op_neg_vec:
3856        return C_O1_I1(v, v);
3857
3858    case INDEX_op_dup_vec:
3859        return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v);
3860
3861    case INDEX_op_ld_vec:
3862    case INDEX_op_dupm_vec:
3863        return C_O1_I1(v, r);
3864
3865    case INDEX_op_st_vec:
3866        return C_O0_I2(v, r);
3867
3868    case INDEX_op_bitsel_vec:
3869    case INDEX_op_ppc_msum_vec:
3870        return C_O1_I3(v, v, v, v);
3871
3872    default:
3873        g_assert_not_reached();
3874    }
3875}
3876
3877static void tcg_target_init(TCGContext *s)
3878{
3879    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
3880    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
3881    if (have_altivec) {
3882        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3883        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3884    }
3885
3886    tcg_target_call_clobber_regs = 0;
3887    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3888    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3889    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3890    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3891    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3892    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3893    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7);
3894    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
3895    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
3896    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
3897    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
3898    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
3899
3900    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3901    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3902    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3903    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3904    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3905    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3906    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3907    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3908    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3909    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3910    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3911    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3912    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3913    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3914    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3915    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3916    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3917    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3918    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3919    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3920
3921    s->reserved_regs = 0;
3922    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
3923    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
3924#if defined(_CALL_SYSV)
3925    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
3926#endif
3927#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
3928    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
3929#endif
3930    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
3931    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
3932    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
3933    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
3934    if (USE_REG_TB) {
3935        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);  /* tb->tc_ptr */
3936    }
3937}
3938
3939#ifdef __ELF__
3940typedef struct {
3941    DebugFrameCIE cie;
3942    DebugFrameFDEHeader fde;
3943    uint8_t fde_def_cfa[4];
3944    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
3945} DebugFrame;
3946
3947/* We're expecting a 2 byte uleb128 encoded value.  */
3948QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3949
3950#if TCG_TARGET_REG_BITS == 64
3951# define ELF_HOST_MACHINE EM_PPC64
3952#else
3953# define ELF_HOST_MACHINE EM_PPC
3954#endif
3955
3956static DebugFrame debug_frame = {
3957    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3958    .cie.id = -1,
3959    .cie.version = 1,
3960    .cie.code_align = 1,
3961    .cie.data_align = (-SZR & 0x7f),         /* sleb128 -SZR */
3962    .cie.return_column = 65,
3963
3964    /* Total FDE size does not include the "len" member.  */
3965    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
3966
3967    .fde_def_cfa = {
3968        12, TCG_REG_R1,                 /* DW_CFA_def_cfa r1, ... */
3969        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3970        (FRAME_SIZE >> 7)
3971    },
3972    .fde_reg_ofs = {
3973        /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
3974        0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
3975    }
3976};
3977
3978void tcg_register_jit(const void *buf, size_t buf_size)
3979{
3980    uint8_t *p = &debug_frame.fde_reg_ofs[3];
3981    int i;
3982
3983    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
3984        p[0] = 0x80 + tcg_target_callee_save_regs[i];
3985        p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
3986    }
3987
3988    debug_frame.fde.func_start = (uintptr_t)buf;
3989    debug_frame.fde.func_len = buf_size;
3990
3991    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3992}
3993#endif /* __ELF__ */
3994#undef VMULEUB
3995#undef VMULEUH
3996#undef VMULEUW
3997#undef VMULOUB
3998#undef VMULOUH
3999#undef VMULOUW
4000#undef VMSUMUHM
4001