xref: /openbmc/qemu/tcg/ppc/tcg-target.c.inc (revision 2cfb3b6c)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "elf.h"
26#include "../tcg-pool.c.inc"
27#include "../tcg-ldst.c.inc"
28
29/*
30 * Standardize on the _CALL_FOO symbols used by GCC:
31 * Apple XCode does not define _CALL_DARWIN.
32 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV (32-bit).
33 */
34#if !defined(_CALL_SYSV) && \
35    !defined(_CALL_DARWIN) && \
36    !defined(_CALL_AIX) && \
37    !defined(_CALL_ELF)
38# if defined(__APPLE__)
39#  define _CALL_DARWIN
40# elif defined(__ELF__) && TCG_TARGET_REG_BITS == 32
41#  define _CALL_SYSV
42# else
43#  error "Unknown ABI"
44# endif
45#endif
46
47#if TCG_TARGET_REG_BITS == 64
48# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_EXTEND
49#else
50# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_NORMAL
51#endif
52#ifdef _CALL_SYSV
53# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_EVEN
54#else
55# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_NORMAL
56#endif
57/* Note sysv arg alignment applies only to 2-word types, not more. */
58#define TCG_TARGET_CALL_ARG_I128   TCG_CALL_ARG_NORMAL
59#define TCG_TARGET_CALL_RET_I128   TCG_CALL_RET_NORMAL
60
61/* For some memory operations, we need a scratch that isn't R0.  For the AIX
62   calling convention, we can re-use the TOC register since we'll be reloading
63   it at every call.  Otherwise R12 will do nicely as neither a call-saved
64   register nor a parameter register.  */
65#ifdef _CALL_AIX
66# define TCG_REG_TMP1   TCG_REG_R2
67#else
68# define TCG_REG_TMP1   TCG_REG_R12
69#endif
70
71#define TCG_VEC_TMP1    TCG_REG_V0
72#define TCG_VEC_TMP2    TCG_REG_V1
73
74#define TCG_REG_TB     TCG_REG_R31
75#define USE_REG_TB     (TCG_TARGET_REG_BITS == 64)
76
77/* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */
78#define SZP  ((int)sizeof(void *))
79
80/* Shorthand for size of a register.  */
81#define SZR  (TCG_TARGET_REG_BITS / 8)
82
83#define TCG_CT_CONST_S16  0x100
84#define TCG_CT_CONST_U16  0x200
85#define TCG_CT_CONST_S32  0x400
86#define TCG_CT_CONST_U32  0x800
87#define TCG_CT_CONST_ZERO 0x1000
88#define TCG_CT_CONST_MONE 0x2000
89#define TCG_CT_CONST_WSZ  0x4000
90
91#define ALL_GENERAL_REGS  0xffffffffu
92#define ALL_VECTOR_REGS   0xffffffff00000000ull
93
94#ifdef CONFIG_SOFTMMU
95#define ALL_QLOAD_REGS \
96    (ALL_GENERAL_REGS & \
97     ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | (1 << TCG_REG_R5)))
98#define ALL_QSTORE_REGS \
99    (ALL_GENERAL_REGS & ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | \
100                          (1 << TCG_REG_R5) | (1 << TCG_REG_R6)))
101#else
102#define ALL_QLOAD_REGS  (ALL_GENERAL_REGS & ~(1 << TCG_REG_R3))
103#define ALL_QSTORE_REGS ALL_QLOAD_REGS
104#endif
105
106TCGPowerISA have_isa;
107static bool have_isel;
108bool have_altivec;
109bool have_vsx;
110
111#ifndef CONFIG_SOFTMMU
112#define TCG_GUEST_BASE_REG 30
113#endif
114
115#ifdef CONFIG_DEBUG_TCG
116static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
117    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
118    "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
119    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
120    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
121    "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
122    "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
123    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
124    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
125};
126#endif
127
128static const int tcg_target_reg_alloc_order[] = {
129    TCG_REG_R14,  /* call saved registers */
130    TCG_REG_R15,
131    TCG_REG_R16,
132    TCG_REG_R17,
133    TCG_REG_R18,
134    TCG_REG_R19,
135    TCG_REG_R20,
136    TCG_REG_R21,
137    TCG_REG_R22,
138    TCG_REG_R23,
139    TCG_REG_R24,
140    TCG_REG_R25,
141    TCG_REG_R26,
142    TCG_REG_R27,
143    TCG_REG_R28,
144    TCG_REG_R29,
145    TCG_REG_R30,
146    TCG_REG_R31,
147    TCG_REG_R12,  /* call clobbered, non-arguments */
148    TCG_REG_R11,
149    TCG_REG_R2,
150    TCG_REG_R13,
151    TCG_REG_R10,  /* call clobbered, arguments */
152    TCG_REG_R9,
153    TCG_REG_R8,
154    TCG_REG_R7,
155    TCG_REG_R6,
156    TCG_REG_R5,
157    TCG_REG_R4,
158    TCG_REG_R3,
159
160    /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
161    TCG_REG_V2,   /* call clobbered, vectors */
162    TCG_REG_V3,
163    TCG_REG_V4,
164    TCG_REG_V5,
165    TCG_REG_V6,
166    TCG_REG_V7,
167    TCG_REG_V8,
168    TCG_REG_V9,
169    TCG_REG_V10,
170    TCG_REG_V11,
171    TCG_REG_V12,
172    TCG_REG_V13,
173    TCG_REG_V14,
174    TCG_REG_V15,
175    TCG_REG_V16,
176    TCG_REG_V17,
177    TCG_REG_V18,
178    TCG_REG_V19,
179};
180
181static const int tcg_target_call_iarg_regs[] = {
182    TCG_REG_R3,
183    TCG_REG_R4,
184    TCG_REG_R5,
185    TCG_REG_R6,
186    TCG_REG_R7,
187    TCG_REG_R8,
188    TCG_REG_R9,
189    TCG_REG_R10
190};
191
192static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
193{
194    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
195    tcg_debug_assert(slot >= 0 && slot <= 1);
196    return TCG_REG_R3 + slot;
197}
198
199static const int tcg_target_callee_save_regs[] = {
200#ifdef _CALL_DARWIN
201    TCG_REG_R11,
202#endif
203    TCG_REG_R14,
204    TCG_REG_R15,
205    TCG_REG_R16,
206    TCG_REG_R17,
207    TCG_REG_R18,
208    TCG_REG_R19,
209    TCG_REG_R20,
210    TCG_REG_R21,
211    TCG_REG_R22,
212    TCG_REG_R23,
213    TCG_REG_R24,
214    TCG_REG_R25,
215    TCG_REG_R26,
216    TCG_REG_R27, /* currently used for the global env */
217    TCG_REG_R28,
218    TCG_REG_R29,
219    TCG_REG_R30,
220    TCG_REG_R31
221};
222
223static inline bool in_range_b(tcg_target_long target)
224{
225    return target == sextract64(target, 0, 26);
226}
227
228static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
229			       const tcg_insn_unit *target)
230{
231    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
232    tcg_debug_assert(in_range_b(disp));
233    return disp & 0x3fffffc;
234}
235
236static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
237{
238    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
239    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
240
241    if (in_range_b(disp)) {
242        *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc);
243        return true;
244    }
245    return false;
246}
247
248static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
249			       const tcg_insn_unit *target)
250{
251    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
252    tcg_debug_assert(disp == (int16_t) disp);
253    return disp & 0xfffc;
254}
255
256static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
257{
258    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
259    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
260
261    if (disp == (int16_t) disp) {
262        *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc);
263        return true;
264    }
265    return false;
266}
267
268/* test if a constant matches the constraint */
269static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
270{
271    if (ct & TCG_CT_CONST) {
272        return 1;
273    }
274
275    /* The only 32-bit constraint we use aside from
276       TCG_CT_CONST is TCG_CT_CONST_S16.  */
277    if (type == TCG_TYPE_I32) {
278        val = (int32_t)val;
279    }
280
281    if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
282        return 1;
283    } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) {
284        return 1;
285    } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
286        return 1;
287    } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
288        return 1;
289    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
290        return 1;
291    } else if ((ct & TCG_CT_CONST_MONE) && val == -1) {
292        return 1;
293    } else if ((ct & TCG_CT_CONST_WSZ)
294               && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
295        return 1;
296    }
297    return 0;
298}
299
300#define OPCD(opc) ((opc)<<26)
301#define XO19(opc) (OPCD(19)|((opc)<<1))
302#define MD30(opc) (OPCD(30)|((opc)<<2))
303#define MDS30(opc) (OPCD(30)|((opc)<<1))
304#define XO31(opc) (OPCD(31)|((opc)<<1))
305#define XO58(opc) (OPCD(58)|(opc))
306#define XO62(opc) (OPCD(62)|(opc))
307#define VX4(opc)  (OPCD(4)|(opc))
308
309#define B      OPCD( 18)
310#define BC     OPCD( 16)
311#define LBZ    OPCD( 34)
312#define LHZ    OPCD( 40)
313#define LHA    OPCD( 42)
314#define LWZ    OPCD( 32)
315#define LWZUX  XO31( 55)
316#define STB    OPCD( 38)
317#define STH    OPCD( 44)
318#define STW    OPCD( 36)
319
320#define STD    XO62(  0)
321#define STDU   XO62(  1)
322#define STDX   XO31(149)
323
324#define LD     XO58(  0)
325#define LDX    XO31( 21)
326#define LDU    XO58(  1)
327#define LDUX   XO31( 53)
328#define LWA    XO58(  2)
329#define LWAX   XO31(341)
330
331#define ADDIC  OPCD( 12)
332#define ADDI   OPCD( 14)
333#define ADDIS  OPCD( 15)
334#define ORI    OPCD( 24)
335#define ORIS   OPCD( 25)
336#define XORI   OPCD( 26)
337#define XORIS  OPCD( 27)
338#define ANDI   OPCD( 28)
339#define ANDIS  OPCD( 29)
340#define MULLI  OPCD(  7)
341#define CMPLI  OPCD( 10)
342#define CMPI   OPCD( 11)
343#define SUBFIC OPCD( 8)
344
345#define LWZU   OPCD( 33)
346#define STWU   OPCD( 37)
347
348#define RLWIMI OPCD( 20)
349#define RLWINM OPCD( 21)
350#define RLWNM  OPCD( 23)
351
352#define RLDICL MD30(  0)
353#define RLDICR MD30(  1)
354#define RLDIMI MD30(  3)
355#define RLDCL  MDS30( 8)
356
357#define BCLR   XO19( 16)
358#define BCCTR  XO19(528)
359#define CRAND  XO19(257)
360#define CRANDC XO19(129)
361#define CRNAND XO19(225)
362#define CROR   XO19(449)
363#define CRNOR  XO19( 33)
364
365#define EXTSB  XO31(954)
366#define EXTSH  XO31(922)
367#define EXTSW  XO31(986)
368#define ADD    XO31(266)
369#define ADDE   XO31(138)
370#define ADDME  XO31(234)
371#define ADDZE  XO31(202)
372#define ADDC   XO31( 10)
373#define AND    XO31( 28)
374#define SUBF   XO31( 40)
375#define SUBFC  XO31(  8)
376#define SUBFE  XO31(136)
377#define SUBFME XO31(232)
378#define SUBFZE XO31(200)
379#define OR     XO31(444)
380#define XOR    XO31(316)
381#define MULLW  XO31(235)
382#define MULHW  XO31( 75)
383#define MULHWU XO31( 11)
384#define DIVW   XO31(491)
385#define DIVWU  XO31(459)
386#define MODSW  XO31(779)
387#define MODUW  XO31(267)
388#define CMP    XO31(  0)
389#define CMPL   XO31( 32)
390#define LHBRX  XO31(790)
391#define LWBRX  XO31(534)
392#define LDBRX  XO31(532)
393#define STHBRX XO31(918)
394#define STWBRX XO31(662)
395#define STDBRX XO31(660)
396#define MFSPR  XO31(339)
397#define MTSPR  XO31(467)
398#define SRAWI  XO31(824)
399#define NEG    XO31(104)
400#define MFCR   XO31( 19)
401#define MFOCRF (MFCR | (1u << 20))
402#define NOR    XO31(124)
403#define CNTLZW XO31( 26)
404#define CNTLZD XO31( 58)
405#define CNTTZW XO31(538)
406#define CNTTZD XO31(570)
407#define CNTPOPW XO31(378)
408#define CNTPOPD XO31(506)
409#define ANDC   XO31( 60)
410#define ORC    XO31(412)
411#define EQV    XO31(284)
412#define NAND   XO31(476)
413#define ISEL   XO31( 15)
414
415#define MULLD  XO31(233)
416#define MULHD  XO31( 73)
417#define MULHDU XO31(  9)
418#define DIVD   XO31(489)
419#define DIVDU  XO31(457)
420#define MODSD  XO31(777)
421#define MODUD  XO31(265)
422
423#define LBZX   XO31( 87)
424#define LHZX   XO31(279)
425#define LHAX   XO31(343)
426#define LWZX   XO31( 23)
427#define STBX   XO31(215)
428#define STHX   XO31(407)
429#define STWX   XO31(151)
430
431#define EIEIO  XO31(854)
432#define HWSYNC XO31(598)
433#define LWSYNC (HWSYNC | (1u << 21))
434
435#define SPR(a, b) ((((a)<<5)|(b))<<11)
436#define LR     SPR(8, 0)
437#define CTR    SPR(9, 0)
438
439#define SLW    XO31( 24)
440#define SRW    XO31(536)
441#define SRAW   XO31(792)
442
443#define SLD    XO31( 27)
444#define SRD    XO31(539)
445#define SRAD   XO31(794)
446#define SRADI  XO31(413<<1)
447
448#define BRH    XO31(219)
449#define BRW    XO31(155)
450#define BRD    XO31(187)
451
452#define TW     XO31( 4)
453#define TRAP   (TW | TO(31))
454
455#define NOP    ORI  /* ori 0,0,0 */
456
457#define LVX        XO31(103)
458#define LVEBX      XO31(7)
459#define LVEHX      XO31(39)
460#define LVEWX      XO31(71)
461#define LXSDX      (XO31(588) | 1)  /* v2.06, force tx=1 */
462#define LXVDSX     (XO31(332) | 1)  /* v2.06, force tx=1 */
463#define LXSIWZX    (XO31(12) | 1)   /* v2.07, force tx=1 */
464#define LXV        (OPCD(61) | 8 | 1)  /* v3.00, force tx=1 */
465#define LXSD       (OPCD(57) | 2)   /* v3.00 */
466#define LXVWSX     (XO31(364) | 1)  /* v3.00, force tx=1 */
467
468#define STVX       XO31(231)
469#define STVEWX     XO31(199)
470#define STXSDX     (XO31(716) | 1)  /* v2.06, force sx=1 */
471#define STXSIWX    (XO31(140) | 1)  /* v2.07, force sx=1 */
472#define STXV       (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
473#define STXSD      (OPCD(61) | 2)   /* v3.00 */
474
475#define VADDSBS    VX4(768)
476#define VADDUBS    VX4(512)
477#define VADDUBM    VX4(0)
478#define VADDSHS    VX4(832)
479#define VADDUHS    VX4(576)
480#define VADDUHM    VX4(64)
481#define VADDSWS    VX4(896)
482#define VADDUWS    VX4(640)
483#define VADDUWM    VX4(128)
484#define VADDUDM    VX4(192)       /* v2.07 */
485
486#define VSUBSBS    VX4(1792)
487#define VSUBUBS    VX4(1536)
488#define VSUBUBM    VX4(1024)
489#define VSUBSHS    VX4(1856)
490#define VSUBUHS    VX4(1600)
491#define VSUBUHM    VX4(1088)
492#define VSUBSWS    VX4(1920)
493#define VSUBUWS    VX4(1664)
494#define VSUBUWM    VX4(1152)
495#define VSUBUDM    VX4(1216)      /* v2.07 */
496
497#define VNEGW      (VX4(1538) | (6 << 16))  /* v3.00 */
498#define VNEGD      (VX4(1538) | (7 << 16))  /* v3.00 */
499
500#define VMAXSB     VX4(258)
501#define VMAXSH     VX4(322)
502#define VMAXSW     VX4(386)
503#define VMAXSD     VX4(450)       /* v2.07 */
504#define VMAXUB     VX4(2)
505#define VMAXUH     VX4(66)
506#define VMAXUW     VX4(130)
507#define VMAXUD     VX4(194)       /* v2.07 */
508#define VMINSB     VX4(770)
509#define VMINSH     VX4(834)
510#define VMINSW     VX4(898)
511#define VMINSD     VX4(962)       /* v2.07 */
512#define VMINUB     VX4(514)
513#define VMINUH     VX4(578)
514#define VMINUW     VX4(642)
515#define VMINUD     VX4(706)       /* v2.07 */
516
517#define VCMPEQUB   VX4(6)
518#define VCMPEQUH   VX4(70)
519#define VCMPEQUW   VX4(134)
520#define VCMPEQUD   VX4(199)       /* v2.07 */
521#define VCMPGTSB   VX4(774)
522#define VCMPGTSH   VX4(838)
523#define VCMPGTSW   VX4(902)
524#define VCMPGTSD   VX4(967)       /* v2.07 */
525#define VCMPGTUB   VX4(518)
526#define VCMPGTUH   VX4(582)
527#define VCMPGTUW   VX4(646)
528#define VCMPGTUD   VX4(711)       /* v2.07 */
529#define VCMPNEB    VX4(7)         /* v3.00 */
530#define VCMPNEH    VX4(71)        /* v3.00 */
531#define VCMPNEW    VX4(135)       /* v3.00 */
532
533#define VSLB       VX4(260)
534#define VSLH       VX4(324)
535#define VSLW       VX4(388)
536#define VSLD       VX4(1476)      /* v2.07 */
537#define VSRB       VX4(516)
538#define VSRH       VX4(580)
539#define VSRW       VX4(644)
540#define VSRD       VX4(1732)      /* v2.07 */
541#define VSRAB      VX4(772)
542#define VSRAH      VX4(836)
543#define VSRAW      VX4(900)
544#define VSRAD      VX4(964)       /* v2.07 */
545#define VRLB       VX4(4)
546#define VRLH       VX4(68)
547#define VRLW       VX4(132)
548#define VRLD       VX4(196)       /* v2.07 */
549
550#define VMULEUB    VX4(520)
551#define VMULEUH    VX4(584)
552#define VMULEUW    VX4(648)       /* v2.07 */
553#define VMULOUB    VX4(8)
554#define VMULOUH    VX4(72)
555#define VMULOUW    VX4(136)       /* v2.07 */
556#define VMULUWM    VX4(137)       /* v2.07 */
557#define VMULLD     VX4(457)       /* v3.10 */
558#define VMSUMUHM   VX4(38)
559
560#define VMRGHB     VX4(12)
561#define VMRGHH     VX4(76)
562#define VMRGHW     VX4(140)
563#define VMRGLB     VX4(268)
564#define VMRGLH     VX4(332)
565#define VMRGLW     VX4(396)
566
567#define VPKUHUM    VX4(14)
568#define VPKUWUM    VX4(78)
569
570#define VAND       VX4(1028)
571#define VANDC      VX4(1092)
572#define VNOR       VX4(1284)
573#define VOR        VX4(1156)
574#define VXOR       VX4(1220)
575#define VEQV       VX4(1668)      /* v2.07 */
576#define VNAND      VX4(1412)      /* v2.07 */
577#define VORC       VX4(1348)      /* v2.07 */
578
579#define VSPLTB     VX4(524)
580#define VSPLTH     VX4(588)
581#define VSPLTW     VX4(652)
582#define VSPLTISB   VX4(780)
583#define VSPLTISH   VX4(844)
584#define VSPLTISW   VX4(908)
585
586#define VSLDOI     VX4(44)
587
588#define XXPERMDI   (OPCD(60) | (10 << 3) | 7)  /* v2.06, force ax=bx=tx=1 */
589#define XXSEL      (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
590#define XXSPLTIB   (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
591
592#define MFVSRD     (XO31(51) | 1)   /* v2.07, force sx=1 */
593#define MFVSRWZ    (XO31(115) | 1)  /* v2.07, force sx=1 */
594#define MTVSRD     (XO31(179) | 1)  /* v2.07, force tx=1 */
595#define MTVSRWZ    (XO31(243) | 1)  /* v2.07, force tx=1 */
596#define MTVSRDD    (XO31(435) | 1)  /* v3.00, force tx=1 */
597#define MTVSRWS    (XO31(403) | 1)  /* v3.00, force tx=1 */
598
599#define RT(r) ((r)<<21)
600#define RS(r) ((r)<<21)
601#define RA(r) ((r)<<16)
602#define RB(r) ((r)<<11)
603#define TO(t) ((t)<<21)
604#define SH(s) ((s)<<11)
605#define MB(b) ((b)<<6)
606#define ME(e) ((e)<<1)
607#define BO(o) ((o)<<21)
608#define MB64(b) ((b)<<5)
609#define FXM(b) (1 << (19 - (b)))
610
611#define VRT(r)  (((r) & 31) << 21)
612#define VRA(r)  (((r) & 31) << 16)
613#define VRB(r)  (((r) & 31) << 11)
614#define VRC(r)  (((r) & 31) <<  6)
615
616#define LK    1
617
618#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
619#define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
620#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
621#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
622
623#define BF(n)    ((n)<<23)
624#define BI(n, c) (((c)+((n)*4))<<16)
625#define BT(n, c) (((c)+((n)*4))<<21)
626#define BA(n, c) (((c)+((n)*4))<<16)
627#define BB(n, c) (((c)+((n)*4))<<11)
628#define BC_(n, c) (((c)+((n)*4))<<6)
629
630#define BO_COND_TRUE  BO(12)
631#define BO_COND_FALSE BO( 4)
632#define BO_ALWAYS     BO(20)
633
634enum {
635    CR_LT,
636    CR_GT,
637    CR_EQ,
638    CR_SO
639};
640
641static const uint32_t tcg_to_bc[] = {
642    [TCG_COND_EQ]  = BC | BI(7, CR_EQ) | BO_COND_TRUE,
643    [TCG_COND_NE]  = BC | BI(7, CR_EQ) | BO_COND_FALSE,
644    [TCG_COND_LT]  = BC | BI(7, CR_LT) | BO_COND_TRUE,
645    [TCG_COND_GE]  = BC | BI(7, CR_LT) | BO_COND_FALSE,
646    [TCG_COND_LE]  = BC | BI(7, CR_GT) | BO_COND_FALSE,
647    [TCG_COND_GT]  = BC | BI(7, CR_GT) | BO_COND_TRUE,
648    [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE,
649    [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE,
650    [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE,
651    [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE,
652};
653
654/* The low bit here is set if the RA and RB fields must be inverted.  */
655static const uint32_t tcg_to_isel[] = {
656    [TCG_COND_EQ]  = ISEL | BC_(7, CR_EQ),
657    [TCG_COND_NE]  = ISEL | BC_(7, CR_EQ) | 1,
658    [TCG_COND_LT]  = ISEL | BC_(7, CR_LT),
659    [TCG_COND_GE]  = ISEL | BC_(7, CR_LT) | 1,
660    [TCG_COND_LE]  = ISEL | BC_(7, CR_GT) | 1,
661    [TCG_COND_GT]  = ISEL | BC_(7, CR_GT),
662    [TCG_COND_LTU] = ISEL | BC_(7, CR_LT),
663    [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1,
664    [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1,
665    [TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
666};
667
668static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
669                        intptr_t value, intptr_t addend)
670{
671    const tcg_insn_unit *target;
672    int16_t lo;
673    int32_t hi;
674
675    value += addend;
676    target = (const tcg_insn_unit *)value;
677
678    switch (type) {
679    case R_PPC_REL14:
680        return reloc_pc14(code_ptr, target);
681    case R_PPC_REL24:
682        return reloc_pc24(code_ptr, target);
683    case R_PPC_ADDR16:
684        /*
685         * We are (slightly) abusing this relocation type.  In particular,
686         * assert that the low 2 bits are zero, and do not modify them.
687         * That way we can use this with LD et al that have opcode bits
688         * in the low 2 bits of the insn.
689         */
690        if ((value & 3) || value != (int16_t)value) {
691            return false;
692        }
693        *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
694        break;
695    case R_PPC_ADDR32:
696        /*
697         * We are abusing this relocation type.  Again, this points to
698         * a pair of insns, lis + load.  This is an absolute address
699         * relocation for PPC32 so the lis cannot be removed.
700         */
701        lo = value;
702        hi = value - lo;
703        if (hi + lo != value) {
704            return false;
705        }
706        code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
707        code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
708        break;
709    default:
710        g_assert_not_reached();
711    }
712    return true;
713}
714
715static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
716                             TCGReg base, tcg_target_long offset);
717
718static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
719{
720    if (ret == arg) {
721        return true;
722    }
723    switch (type) {
724    case TCG_TYPE_I64:
725        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
726        /* fallthru */
727    case TCG_TYPE_I32:
728        if (ret < TCG_REG_V0) {
729            if (arg < TCG_REG_V0) {
730                tcg_out32(s, OR | SAB(arg, ret, arg));
731                break;
732            } else if (have_isa_2_07) {
733                tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
734                          | VRT(arg) | RA(ret));
735                break;
736            } else {
737                /* Altivec does not support vector->integer moves.  */
738                return false;
739            }
740        } else if (arg < TCG_REG_V0) {
741            if (have_isa_2_07) {
742                tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
743                          | VRT(ret) | RA(arg));
744                break;
745            } else {
746                /* Altivec does not support integer->vector moves.  */
747                return false;
748            }
749        }
750        /* fallthru */
751    case TCG_TYPE_V64:
752    case TCG_TYPE_V128:
753        tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
754        tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
755        break;
756    default:
757        g_assert_not_reached();
758    }
759    return true;
760}
761
762static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
763                               int sh, int mb)
764{
765    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
766    sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
767    mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
768    tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb);
769}
770
771static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
772                               int sh, int mb, int me)
773{
774    tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me));
775}
776
777static inline void tcg_out_ext8s(TCGContext *s, TCGReg dst, TCGReg src)
778{
779    tcg_out32(s, EXTSB | RA(dst) | RS(src));
780}
781
782static inline void tcg_out_ext16s(TCGContext *s, TCGReg dst, TCGReg src)
783{
784    tcg_out32(s, EXTSH | RA(dst) | RS(src));
785}
786
787static inline void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src)
788{
789    tcg_out32(s, ANDI | SAI(src, dst, 0xffff));
790}
791
792static inline void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src)
793{
794    tcg_out32(s, EXTSW | RA(dst) | RS(src));
795}
796
797static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
798{
799    tcg_out_rld(s, RLDICL, dst, src, 0, 32);
800}
801
802static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
803{
804    tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
805}
806
807static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
808{
809    tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
810}
811
812static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c)
813{
814    /* Limit immediate shift count lest we create an illegal insn.  */
815    tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31));
816}
817
818static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
819{
820    tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
821}
822
823static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
824{
825    tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
826}
827
828static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
829{
830    tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
831}
832
833static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags)
834{
835    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
836
837    if (have_isa_3_10) {
838        tcg_out32(s, BRH | RA(dst) | RS(src));
839        if (flags & TCG_BSWAP_OS) {
840            tcg_out_ext16s(s, dst, dst);
841        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
842            tcg_out_ext16u(s, dst, dst);
843        }
844        return;
845    }
846
847    /*
848     * In the following,
849     *   dep(a, b, m) -> (a & ~m) | (b & m)
850     *
851     * Begin with:                              src = xxxxabcd
852     */
853    /* tmp = rol32(src, 24) & 0x000000ff            = 0000000c */
854    tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31);
855    /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00)    = 000000dc */
856    tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);
857
858    if (flags & TCG_BSWAP_OS) {
859        tcg_out_ext16s(s, dst, tmp);
860    } else {
861        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
862    }
863}
864
865static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags)
866{
867    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
868
869    if (have_isa_3_10) {
870        tcg_out32(s, BRW | RA(dst) | RS(src));
871        if (flags & TCG_BSWAP_OS) {
872            tcg_out_ext32s(s, dst, dst);
873        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
874            tcg_out_ext32u(s, dst, dst);
875        }
876        return;
877    }
878
879    /*
880     * Stolen from gcc's builtin_bswap32.
881     * In the following,
882     *   dep(a, b, m) -> (a & ~m) | (b & m)
883     *
884     * Begin with:                              src = xxxxabcd
885     */
886    /* tmp = rol32(src, 8) & 0xffffffff             = 0000bcda */
887    tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31);
888    /* tmp = dep(tmp, rol32(src, 24), 0xff000000)   = 0000dcda */
889    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7);
890    /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00)   = 0000dcba */
891    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23);
892
893    if (flags & TCG_BSWAP_OS) {
894        tcg_out_ext32s(s, dst, tmp);
895    } else {
896        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
897    }
898}
899
900static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src)
901{
902    TCGReg t0 = dst == src ? TCG_REG_R0 : dst;
903    TCGReg t1 = dst == src ? dst : TCG_REG_R0;
904
905    if (have_isa_3_10) {
906        tcg_out32(s, BRD | RA(dst) | RS(src));
907        return;
908    }
909
910    /*
911     * In the following,
912     *   dep(a, b, m) -> (a & ~m) | (b & m)
913     *
914     * Begin with:                              src = abcdefgh
915     */
916    /* t0 = rol32(src, 8) & 0xffffffff              = 0000fghe */
917    tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31);
918    /* t0 = dep(t0, rol32(src, 24), 0xff000000)     = 0000hghe */
919    tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7);
920    /* t0 = dep(t0, rol32(src, 24), 0x0000ff00)     = 0000hgfe */
921    tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23);
922
923    /* t0 = rol64(t0, 32)                           = hgfe0000 */
924    tcg_out_rld(s, RLDICL, t0, t0, 32, 0);
925    /* t1 = rol64(src, 32)                          = efghabcd */
926    tcg_out_rld(s, RLDICL, t1, src, 32, 0);
927
928    /* t0 = dep(t0, rol32(t1, 24), 0xffffffff)      = hgfebcda */
929    tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31);
930    /* t0 = dep(t0, rol32(t1, 24), 0xff000000)      = hgfedcda */
931    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7);
932    /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00)      = hgfedcba */
933    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23);
934
935    tcg_out_mov(s, TCG_TYPE_REG, dst, t0);
936}
937
938/* Emit a move into ret of arg, if it can be done in one insn.  */
939static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
940{
941    if (arg == (int16_t)arg) {
942        tcg_out32(s, ADDI | TAI(ret, 0, arg));
943        return true;
944    }
945    if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
946        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
947        return true;
948    }
949    return false;
950}
951
952static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
953                             tcg_target_long arg, bool in_prologue)
954{
955    intptr_t tb_diff;
956    tcg_target_long tmp;
957    int shift;
958
959    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
960
961    if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
962        arg = (int32_t)arg;
963    }
964
965    /* Load 16-bit immediates with one insn.  */
966    if (tcg_out_movi_one(s, ret, arg)) {
967        return;
968    }
969
970    /* Load addresses within the TB with one insn.  */
971    tb_diff = tcg_tbrel_diff(s, (void *)arg);
972    if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
973        tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
974        return;
975    }
976
977    /* Load 32-bit immediates with two insns.  Note that we've already
978       eliminated bare ADDIS, so we know both insns are required.  */
979    if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
980        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
981        tcg_out32(s, ORI | SAI(ret, ret, arg));
982        return;
983    }
984    if (arg == (uint32_t)arg && !(arg & 0x8000)) {
985        tcg_out32(s, ADDI | TAI(ret, 0, arg));
986        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
987        return;
988    }
989
990    /* Load masked 16-bit value.  */
991    if (arg > 0 && (arg & 0x8000)) {
992        tmp = arg | 0x7fff;
993        if ((tmp & (tmp + 1)) == 0) {
994            int mb = clz64(tmp + 1) + 1;
995            tcg_out32(s, ADDI | TAI(ret, 0, arg));
996            tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
997            return;
998        }
999    }
1000
1001    /* Load common masks with 2 insns.  */
1002    shift = ctz64(arg);
1003    tmp = arg >> shift;
1004    if (tmp == (int16_t)tmp) {
1005        tcg_out32(s, ADDI | TAI(ret, 0, tmp));
1006        tcg_out_shli64(s, ret, ret, shift);
1007        return;
1008    }
1009    shift = clz64(arg);
1010    if (tcg_out_movi_one(s, ret, arg << shift)) {
1011        tcg_out_shri64(s, ret, ret, shift);
1012        return;
1013    }
1014
1015    /* Load addresses within 2GB of TB with 2 (or rarely 3) insns.  */
1016    if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
1017        tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
1018        return;
1019    }
1020
1021    /* Use the constant pool, if possible.  */
1022    if (!in_prologue && USE_REG_TB) {
1023        new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
1024                       tcg_tbrel_diff(s, NULL));
1025        tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
1026        return;
1027    }
1028
1029    tmp = arg >> 31 >> 1;
1030    tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
1031    if (tmp) {
1032        tcg_out_shli64(s, ret, ret, 32);
1033    }
1034    if (arg & 0xffff0000) {
1035        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1036    }
1037    if (arg & 0xffff) {
1038        tcg_out32(s, ORI | SAI(ret, ret, arg));
1039    }
1040}
1041
1042static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
1043                             TCGReg ret, int64_t val)
1044{
1045    uint32_t load_insn;
1046    int rel, low;
1047    intptr_t add;
1048
1049    switch (vece) {
1050    case MO_8:
1051        low = (int8_t)val;
1052        if (low >= -16 && low < 16) {
1053            tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
1054            return;
1055        }
1056        if (have_isa_3_00) {
1057            tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
1058            return;
1059        }
1060        break;
1061
1062    case MO_16:
1063        low = (int16_t)val;
1064        if (low >= -16 && low < 16) {
1065            tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
1066            return;
1067        }
1068        break;
1069
1070    case MO_32:
1071        low = (int32_t)val;
1072        if (low >= -16 && low < 16) {
1073            tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
1074            return;
1075        }
1076        break;
1077    }
1078
1079    /*
1080     * Otherwise we must load the value from the constant pool.
1081     */
1082    if (USE_REG_TB) {
1083        rel = R_PPC_ADDR16;
1084        add = tcg_tbrel_diff(s, NULL);
1085    } else {
1086        rel = R_PPC_ADDR32;
1087        add = 0;
1088    }
1089
1090    if (have_vsx) {
1091        load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
1092        load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
1093        if (TCG_TARGET_REG_BITS == 64) {
1094            new_pool_label(s, val, rel, s->code_ptr, add);
1095        } else {
1096            new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val);
1097        }
1098    } else {
1099        load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
1100        if (TCG_TARGET_REG_BITS == 64) {
1101            new_pool_l2(s, rel, s->code_ptr, add, val, val);
1102        } else {
1103            new_pool_l4(s, rel, s->code_ptr, add,
1104                        val >> 32, val, val >> 32, val);
1105        }
1106    }
1107
1108    if (USE_REG_TB) {
1109        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
1110        load_insn |= RA(TCG_REG_TB);
1111    } else {
1112        tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
1113        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1114    }
1115    tcg_out32(s, load_insn);
1116}
1117
1118static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
1119                         tcg_target_long arg)
1120{
1121    switch (type) {
1122    case TCG_TYPE_I32:
1123    case TCG_TYPE_I64:
1124        tcg_debug_assert(ret < TCG_REG_V0);
1125        tcg_out_movi_int(s, type, ret, arg, false);
1126        break;
1127
1128    default:
1129        g_assert_not_reached();
1130    }
1131}
1132
1133static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1134                             tcg_target_long imm)
1135{
1136    /* This function is only used for passing structs by reference. */
1137    g_assert_not_reached();
1138}
1139
1140static bool mask_operand(uint32_t c, int *mb, int *me)
1141{
1142    uint32_t lsb, test;
1143
1144    /* Accept a bit pattern like:
1145           0....01....1
1146           1....10....0
1147           0..01..10..0
1148       Keep track of the transitions.  */
1149    if (c == 0 || c == -1) {
1150        return false;
1151    }
1152    test = c;
1153    lsb = test & -test;
1154    test += lsb;
1155    if (test & (test - 1)) {
1156        return false;
1157    }
1158
1159    *me = clz32(lsb);
1160    *mb = test ? clz32(test & -test) + 1 : 0;
1161    return true;
1162}
1163
1164static bool mask64_operand(uint64_t c, int *mb, int *me)
1165{
1166    uint64_t lsb;
1167
1168    if (c == 0) {
1169        return false;
1170    }
1171
1172    lsb = c & -c;
1173    /* Accept 1..10..0.  */
1174    if (c == -lsb) {
1175        *mb = 0;
1176        *me = clz64(lsb);
1177        return true;
1178    }
1179    /* Accept 0..01..1.  */
1180    if (lsb == 1 && (c & (c + 1)) == 0) {
1181        *mb = clz64(c + 1) + 1;
1182        *me = 63;
1183        return true;
1184    }
1185    return false;
1186}
1187
1188static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1189{
1190    int mb, me;
1191
1192    if (mask_operand(c, &mb, &me)) {
1193        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
1194    } else if ((c & 0xffff) == c) {
1195        tcg_out32(s, ANDI | SAI(src, dst, c));
1196        return;
1197    } else if ((c & 0xffff0000) == c) {
1198        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1199        return;
1200    } else {
1201        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
1202        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1203    }
1204}
1205
1206static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
1207{
1208    int mb, me;
1209
1210    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1211    if (mask64_operand(c, &mb, &me)) {
1212        if (mb == 0) {
1213            tcg_out_rld(s, RLDICR, dst, src, 0, me);
1214        } else {
1215            tcg_out_rld(s, RLDICL, dst, src, 0, mb);
1216        }
1217    } else if ((c & 0xffff) == c) {
1218        tcg_out32(s, ANDI | SAI(src, dst, c));
1219        return;
1220    } else if ((c & 0xffff0000) == c) {
1221        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1222        return;
1223    } else {
1224        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
1225        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1226    }
1227}
1228
1229static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
1230                           int op_lo, int op_hi)
1231{
1232    if (c >> 16) {
1233        tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
1234        src = dst;
1235    }
1236    if (c & 0xffff) {
1237        tcg_out32(s, op_lo | SAI(src, dst, c));
1238        src = dst;
1239    }
1240}
1241
1242static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1243{
1244    tcg_out_zori32(s, dst, src, c, ORI, ORIS);
1245}
1246
1247static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1248{
1249    tcg_out_zori32(s, dst, src, c, XORI, XORIS);
1250}
1251
1252static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target)
1253{
1254    ptrdiff_t disp = tcg_pcrel_diff(s, target);
1255    if (in_range_b(disp)) {
1256        tcg_out32(s, B | (disp & 0x3fffffc) | mask);
1257    } else {
1258        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
1259        tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
1260        tcg_out32(s, BCCTR | BO_ALWAYS | mask);
1261    }
1262}
1263
1264static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
1265                             TCGReg base, tcg_target_long offset)
1266{
1267    tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
1268    bool is_int_store = false;
1269    TCGReg rs = TCG_REG_TMP1;
1270
1271    switch (opi) {
1272    case LD: case LWA:
1273        align = 3;
1274        /* FALLTHRU */
1275    default:
1276        if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
1277            rs = rt;
1278            break;
1279        }
1280        break;
1281    case LXSD:
1282    case STXSD:
1283        align = 3;
1284        break;
1285    case LXV:
1286    case STXV:
1287        align = 15;
1288        break;
1289    case STD:
1290        align = 3;
1291        /* FALLTHRU */
1292    case STB: case STH: case STW:
1293        is_int_store = true;
1294        break;
1295    }
1296
1297    /* For unaligned, or very large offsets, use the indexed form.  */
1298    if (offset & align || offset != (int32_t)offset || opi == 0) {
1299        if (rs == base) {
1300            rs = TCG_REG_R0;
1301        }
1302        tcg_debug_assert(!is_int_store || rs != rt);
1303        tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
1304        tcg_out32(s, opx | TAB(rt & 31, base, rs));
1305        return;
1306    }
1307
1308    l0 = (int16_t)offset;
1309    offset = (offset - l0) >> 16;
1310    l1 = (int16_t)offset;
1311
1312    if (l1 < 0 && orig >= 0) {
1313        extra = 0x4000;
1314        l1 = (int16_t)(offset - 0x4000);
1315    }
1316    if (l1) {
1317        tcg_out32(s, ADDIS | TAI(rs, base, l1));
1318        base = rs;
1319    }
1320    if (extra) {
1321        tcg_out32(s, ADDIS | TAI(rs, base, extra));
1322        base = rs;
1323    }
1324    if (opi != ADDI || base != rt || l0 != 0) {
1325        tcg_out32(s, opi | TAI(rt & 31, base, l0));
1326    }
1327}
1328
1329static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
1330                           TCGReg va, TCGReg vb, int shb)
1331{
1332    tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
1333}
1334
1335static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1336                       TCGReg base, intptr_t offset)
1337{
1338    int shift;
1339
1340    switch (type) {
1341    case TCG_TYPE_I32:
1342        if (ret < TCG_REG_V0) {
1343            tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
1344            break;
1345        }
1346        if (have_isa_2_07 && have_vsx) {
1347            tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
1348            break;
1349        }
1350        tcg_debug_assert((offset & 3) == 0);
1351        tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
1352        shift = (offset - 4) & 0xc;
1353        if (shift) {
1354            tcg_out_vsldoi(s, ret, ret, ret, shift);
1355        }
1356        break;
1357    case TCG_TYPE_I64:
1358        if (ret < TCG_REG_V0) {
1359            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1360            tcg_out_mem_long(s, LD, LDX, ret, base, offset);
1361            break;
1362        }
1363        /* fallthru */
1364    case TCG_TYPE_V64:
1365        tcg_debug_assert(ret >= TCG_REG_V0);
1366        if (have_vsx) {
1367            tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
1368                             ret, base, offset);
1369            break;
1370        }
1371        tcg_debug_assert((offset & 7) == 0);
1372        tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
1373        if (offset & 8) {
1374            tcg_out_vsldoi(s, ret, ret, ret, 8);
1375        }
1376        break;
1377    case TCG_TYPE_V128:
1378        tcg_debug_assert(ret >= TCG_REG_V0);
1379        tcg_debug_assert((offset & 15) == 0);
1380        tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
1381                         LVX, ret, base, offset);
1382        break;
1383    default:
1384        g_assert_not_reached();
1385    }
1386}
1387
1388static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1389                              TCGReg base, intptr_t offset)
1390{
1391    int shift;
1392
1393    switch (type) {
1394    case TCG_TYPE_I32:
1395        if (arg < TCG_REG_V0) {
1396            tcg_out_mem_long(s, STW, STWX, arg, base, offset);
1397            break;
1398        }
1399        if (have_isa_2_07 && have_vsx) {
1400            tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
1401            break;
1402        }
1403        assert((offset & 3) == 0);
1404        tcg_debug_assert((offset & 3) == 0);
1405        shift = (offset - 4) & 0xc;
1406        if (shift) {
1407            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
1408            arg = TCG_VEC_TMP1;
1409        }
1410        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1411        break;
1412    case TCG_TYPE_I64:
1413        if (arg < TCG_REG_V0) {
1414            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1415            tcg_out_mem_long(s, STD, STDX, arg, base, offset);
1416            break;
1417        }
1418        /* fallthru */
1419    case TCG_TYPE_V64:
1420        tcg_debug_assert(arg >= TCG_REG_V0);
1421        if (have_vsx) {
1422            tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
1423                             STXSDX, arg, base, offset);
1424            break;
1425        }
1426        tcg_debug_assert((offset & 7) == 0);
1427        if (offset & 8) {
1428            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
1429            arg = TCG_VEC_TMP1;
1430        }
1431        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1432        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
1433        break;
1434    case TCG_TYPE_V128:
1435        tcg_debug_assert(arg >= TCG_REG_V0);
1436        tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
1437                         STVX, arg, base, offset);
1438        break;
1439    default:
1440        g_assert_not_reached();
1441    }
1442}
1443
1444static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1445                               TCGReg base, intptr_t ofs)
1446{
1447    return false;
1448}
1449
1450static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
1451                        int const_arg2, int cr, TCGType type)
1452{
1453    int imm;
1454    uint32_t op;
1455
1456    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1457
1458    /* Simplify the comparisons below wrt CMPI.  */
1459    if (type == TCG_TYPE_I32) {
1460        arg2 = (int32_t)arg2;
1461    }
1462
1463    switch (cond) {
1464    case TCG_COND_EQ:
1465    case TCG_COND_NE:
1466        if (const_arg2) {
1467            if ((int16_t) arg2 == arg2) {
1468                op = CMPI;
1469                imm = 1;
1470                break;
1471            } else if ((uint16_t) arg2 == arg2) {
1472                op = CMPLI;
1473                imm = 1;
1474                break;
1475            }
1476        }
1477        op = CMPL;
1478        imm = 0;
1479        break;
1480
1481    case TCG_COND_LT:
1482    case TCG_COND_GE:
1483    case TCG_COND_LE:
1484    case TCG_COND_GT:
1485        if (const_arg2) {
1486            if ((int16_t) arg2 == arg2) {
1487                op = CMPI;
1488                imm = 1;
1489                break;
1490            }
1491        }
1492        op = CMP;
1493        imm = 0;
1494        break;
1495
1496    case TCG_COND_LTU:
1497    case TCG_COND_GEU:
1498    case TCG_COND_LEU:
1499    case TCG_COND_GTU:
1500        if (const_arg2) {
1501            if ((uint16_t) arg2 == arg2) {
1502                op = CMPLI;
1503                imm = 1;
1504                break;
1505            }
1506        }
1507        op = CMPL;
1508        imm = 0;
1509        break;
1510
1511    default:
1512        tcg_abort();
1513    }
1514    op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
1515
1516    if (imm) {
1517        tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
1518    } else {
1519        if (const_arg2) {
1520            tcg_out_movi(s, type, TCG_REG_R0, arg2);
1521            arg2 = TCG_REG_R0;
1522        }
1523        tcg_out32(s, op | RA(arg1) | RB(arg2));
1524    }
1525}
1526
1527static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
1528                                TCGReg dst, TCGReg src)
1529{
1530    if (type == TCG_TYPE_I32) {
1531        tcg_out32(s, CNTLZW | RS(src) | RA(dst));
1532        tcg_out_shri32(s, dst, dst, 5);
1533    } else {
1534        tcg_out32(s, CNTLZD | RS(src) | RA(dst));
1535        tcg_out_shri64(s, dst, dst, 6);
1536    }
1537}
1538
1539static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src)
1540{
1541    /* X != 0 implies X + -1 generates a carry.  Extra addition
1542       trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.  */
1543    if (dst != src) {
1544        tcg_out32(s, ADDIC | TAI(dst, src, -1));
1545        tcg_out32(s, SUBFE | TAB(dst, dst, src));
1546    } else {
1547        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1548        tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
1549    }
1550}
1551
1552static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
1553                                  bool const_arg2)
1554{
1555    if (const_arg2) {
1556        if ((uint32_t)arg2 == arg2) {
1557            tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
1558        } else {
1559            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
1560            tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
1561        }
1562    } else {
1563        tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
1564    }
1565    return TCG_REG_R0;
1566}
1567
1568static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
1569                            TCGArg arg0, TCGArg arg1, TCGArg arg2,
1570                            int const_arg2)
1571{
1572    int crop, sh;
1573
1574    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1575
1576    /* Ignore high bits of a potential constant arg2.  */
1577    if (type == TCG_TYPE_I32) {
1578        arg2 = (uint32_t)arg2;
1579    }
1580
1581    /* Handle common and trivial cases before handling anything else.  */
1582    if (arg2 == 0) {
1583        switch (cond) {
1584        case TCG_COND_EQ:
1585            tcg_out_setcond_eq0(s, type, arg0, arg1);
1586            return;
1587        case TCG_COND_NE:
1588            if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1589                tcg_out_ext32u(s, TCG_REG_R0, arg1);
1590                arg1 = TCG_REG_R0;
1591            }
1592            tcg_out_setcond_ne0(s, arg0, arg1);
1593            return;
1594        case TCG_COND_GE:
1595            tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
1596            arg1 = arg0;
1597            /* FALLTHRU */
1598        case TCG_COND_LT:
1599            /* Extract the sign bit.  */
1600            if (type == TCG_TYPE_I32) {
1601                tcg_out_shri32(s, arg0, arg1, 31);
1602            } else {
1603                tcg_out_shri64(s, arg0, arg1, 63);
1604            }
1605            return;
1606        default:
1607            break;
1608        }
1609    }
1610
1611    /* If we have ISEL, we can implement everything with 3 or 4 insns.
1612       All other cases below are also at least 3 insns, so speed up the
1613       code generator by not considering them and always using ISEL.  */
1614    if (have_isel) {
1615        int isel, tab;
1616
1617        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1618
1619        isel = tcg_to_isel[cond];
1620
1621        tcg_out_movi(s, type, arg0, 1);
1622        if (isel & 1) {
1623            /* arg0 = (bc ? 0 : 1) */
1624            tab = TAB(arg0, 0, arg0);
1625            isel &= ~1;
1626        } else {
1627            /* arg0 = (bc ? 1 : 0) */
1628            tcg_out_movi(s, type, TCG_REG_R0, 0);
1629            tab = TAB(arg0, arg0, TCG_REG_R0);
1630        }
1631        tcg_out32(s, isel | tab);
1632        return;
1633    }
1634
1635    switch (cond) {
1636    case TCG_COND_EQ:
1637        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1638        tcg_out_setcond_eq0(s, type, arg0, arg1);
1639        return;
1640
1641    case TCG_COND_NE:
1642        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1643        /* Discard the high bits only once, rather than both inputs.  */
1644        if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1645            tcg_out_ext32u(s, TCG_REG_R0, arg1);
1646            arg1 = TCG_REG_R0;
1647        }
1648        tcg_out_setcond_ne0(s, arg0, arg1);
1649        return;
1650
1651    case TCG_COND_GT:
1652    case TCG_COND_GTU:
1653        sh = 30;
1654        crop = 0;
1655        goto crtest;
1656
1657    case TCG_COND_LT:
1658    case TCG_COND_LTU:
1659        sh = 29;
1660        crop = 0;
1661        goto crtest;
1662
1663    case TCG_COND_GE:
1664    case TCG_COND_GEU:
1665        sh = 31;
1666        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT);
1667        goto crtest;
1668
1669    case TCG_COND_LE:
1670    case TCG_COND_LEU:
1671        sh = 31;
1672        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT);
1673    crtest:
1674        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1675        if (crop) {
1676            tcg_out32(s, crop);
1677        }
1678        tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1679        tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
1680        break;
1681
1682    default:
1683        tcg_abort();
1684    }
1685}
1686
1687static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l)
1688{
1689    if (l->has_value) {
1690        bc |= reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
1691    } else {
1692        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
1693    }
1694    tcg_out32(s, bc);
1695}
1696
1697static void tcg_out_brcond(TCGContext *s, TCGCond cond,
1698                           TCGArg arg1, TCGArg arg2, int const_arg2,
1699                           TCGLabel *l, TCGType type)
1700{
1701    tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1702    tcg_out_bc(s, tcg_to_bc[cond], l);
1703}
1704
1705static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
1706                            TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1,
1707                            TCGArg v2, bool const_c2)
1708{
1709    /* If for some reason both inputs are zero, don't produce bad code.  */
1710    if (v1 == 0 && v2 == 0) {
1711        tcg_out_movi(s, type, dest, 0);
1712        return;
1713    }
1714
1715    tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
1716
1717    if (have_isel) {
1718        int isel = tcg_to_isel[cond];
1719
1720        /* Swap the V operands if the operation indicates inversion.  */
1721        if (isel & 1) {
1722            int t = v1;
1723            v1 = v2;
1724            v2 = t;
1725            isel &= ~1;
1726        }
1727        /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */
1728        if (v2 == 0) {
1729            tcg_out_movi(s, type, TCG_REG_R0, 0);
1730        }
1731        tcg_out32(s, isel | TAB(dest, v1, v2));
1732    } else {
1733        if (dest == v2) {
1734            cond = tcg_invert_cond(cond);
1735            v2 = v1;
1736        } else if (dest != v1) {
1737            if (v1 == 0) {
1738                tcg_out_movi(s, type, dest, 0);
1739            } else {
1740                tcg_out_mov(s, type, dest, v1);
1741            }
1742        }
1743        /* Branch forward over one insn */
1744        tcg_out32(s, tcg_to_bc[cond] | 8);
1745        if (v2 == 0) {
1746            tcg_out_movi(s, type, dest, 0);
1747        } else {
1748            tcg_out_mov(s, type, dest, v2);
1749        }
1750    }
1751}
1752
1753static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
1754                          TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
1755{
1756    if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
1757        tcg_out32(s, opc | RA(a0) | RS(a1));
1758    } else {
1759        tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type);
1760        /* Note that the only other valid constant for a2 is 0.  */
1761        if (have_isel) {
1762            tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
1763            tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
1764        } else if (!const_a2 && a0 == a2) {
1765            tcg_out32(s, tcg_to_bc[TCG_COND_EQ] | 8);
1766            tcg_out32(s, opc | RA(a0) | RS(a1));
1767        } else {
1768            tcg_out32(s, opc | RA(a0) | RS(a1));
1769            tcg_out32(s, tcg_to_bc[TCG_COND_NE] | 8);
1770            if (const_a2) {
1771                tcg_out_movi(s, type, a0, 0);
1772            } else {
1773                tcg_out_mov(s, type, a0, a2);
1774            }
1775        }
1776    }
1777}
1778
1779static void tcg_out_cmp2(TCGContext *s, const TCGArg *args,
1780                         const int *const_args)
1781{
1782    static const struct { uint8_t bit1, bit2; } bits[] = {
1783        [TCG_COND_LT ] = { CR_LT, CR_LT },
1784        [TCG_COND_LE ] = { CR_LT, CR_GT },
1785        [TCG_COND_GT ] = { CR_GT, CR_GT },
1786        [TCG_COND_GE ] = { CR_GT, CR_LT },
1787        [TCG_COND_LTU] = { CR_LT, CR_LT },
1788        [TCG_COND_LEU] = { CR_LT, CR_GT },
1789        [TCG_COND_GTU] = { CR_GT, CR_GT },
1790        [TCG_COND_GEU] = { CR_GT, CR_LT },
1791    };
1792
1793    TCGCond cond = args[4], cond2;
1794    TCGArg al, ah, bl, bh;
1795    int blconst, bhconst;
1796    int op, bit1, bit2;
1797
1798    al = args[0];
1799    ah = args[1];
1800    bl = args[2];
1801    bh = args[3];
1802    blconst = const_args[2];
1803    bhconst = const_args[3];
1804
1805    switch (cond) {
1806    case TCG_COND_EQ:
1807        op = CRAND;
1808        goto do_equality;
1809    case TCG_COND_NE:
1810        op = CRNAND;
1811    do_equality:
1812        tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
1813        tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
1814        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
1815        break;
1816
1817    case TCG_COND_LT:
1818    case TCG_COND_LE:
1819    case TCG_COND_GT:
1820    case TCG_COND_GE:
1821    case TCG_COND_LTU:
1822    case TCG_COND_LEU:
1823    case TCG_COND_GTU:
1824    case TCG_COND_GEU:
1825        bit1 = bits[cond].bit1;
1826        bit2 = bits[cond].bit2;
1827        op = (bit1 != bit2 ? CRANDC : CRAND);
1828        cond2 = tcg_unsigned_cond(cond);
1829
1830        tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
1831        tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
1832        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
1833        tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ));
1834        break;
1835
1836    default:
1837        tcg_abort();
1838    }
1839}
1840
1841static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1842                             const int *const_args)
1843{
1844    tcg_out_cmp2(s, args + 1, const_args + 1);
1845    tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1846    tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31);
1847}
1848
1849static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
1850                             const int *const_args)
1851{
1852    tcg_out_cmp2(s, args, const_args);
1853    tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
1854}
1855
1856static void tcg_out_mb(TCGContext *s, TCGArg a0)
1857{
1858    uint32_t insn;
1859
1860    if (a0 & TCG_MO_ST_LD) {
1861        insn = HWSYNC;
1862    } else {
1863        insn = LWSYNC;
1864    }
1865
1866    tcg_out32(s, insn);
1867}
1868
1869static void tcg_out_call_int(TCGContext *s, int lk,
1870                             const tcg_insn_unit *target)
1871{
1872#ifdef _CALL_AIX
1873    /* Look through the descriptor.  If the branch is in range, and we
1874       don't have to spend too much effort on building the toc.  */
1875    const void *tgt = ((const void * const *)target)[0];
1876    uintptr_t toc = ((const uintptr_t *)target)[1];
1877    intptr_t diff = tcg_pcrel_diff(s, tgt);
1878
1879    if (in_range_b(diff) && toc == (uint32_t)toc) {
1880        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
1881        tcg_out_b(s, lk, tgt);
1882    } else {
1883        /* Fold the low bits of the constant into the addresses below.  */
1884        intptr_t arg = (intptr_t)target;
1885        int ofs = (int16_t)arg;
1886
1887        if (ofs + 8 < 0x8000) {
1888            arg -= ofs;
1889        } else {
1890            ofs = 0;
1891        }
1892        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
1893        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
1894        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
1895        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
1896        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
1897    }
1898#elif defined(_CALL_ELF) && _CALL_ELF == 2
1899    intptr_t diff;
1900
1901    /* In the ELFv2 ABI, we have to set up r12 to contain the destination
1902       address, which the callee uses to compute its TOC address.  */
1903    /* FIXME: when the branch is in range, we could avoid r12 load if we
1904       knew that the destination uses the same TOC, and what its local
1905       entry point offset is.  */
1906    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
1907
1908    diff = tcg_pcrel_diff(s, target);
1909    if (in_range_b(diff)) {
1910        tcg_out_b(s, lk, target);
1911    } else {
1912        tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
1913        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
1914    }
1915#else
1916    tcg_out_b(s, lk, target);
1917#endif
1918}
1919
1920static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1921                         const TCGHelperInfo *info)
1922{
1923    tcg_out_call_int(s, LK, target);
1924}
1925
1926static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = {
1927    [MO_UB] = LBZX,
1928    [MO_UW] = LHZX,
1929    [MO_UL] = LWZX,
1930    [MO_UQ] = LDX,
1931    [MO_SW] = LHAX,
1932    [MO_SL] = LWAX,
1933    [MO_BSWAP | MO_UB] = LBZX,
1934    [MO_BSWAP | MO_UW] = LHBRX,
1935    [MO_BSWAP | MO_UL] = LWBRX,
1936    [MO_BSWAP | MO_UQ] = LDBRX,
1937};
1938
1939static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
1940    [MO_UB] = STBX,
1941    [MO_UW] = STHX,
1942    [MO_UL] = STWX,
1943    [MO_UQ] = STDX,
1944    [MO_BSWAP | MO_UB] = STBX,
1945    [MO_BSWAP | MO_UW] = STHBRX,
1946    [MO_BSWAP | MO_UL] = STWBRX,
1947    [MO_BSWAP | MO_UQ] = STDBRX,
1948};
1949
1950static const uint32_t qemu_exts_opc[4] = {
1951    EXTSB, EXTSH, EXTSW, 0
1952};
1953
1954#if defined (CONFIG_SOFTMMU)
1955/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
1956 *                                 int mmu_idx, uintptr_t ra)
1957 */
1958static void * const qemu_ld_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
1959    [MO_UB]   = helper_ret_ldub_mmu,
1960    [MO_LEUW] = helper_le_lduw_mmu,
1961    [MO_LEUL] = helper_le_ldul_mmu,
1962    [MO_LEUQ] = helper_le_ldq_mmu,
1963    [MO_BEUW] = helper_be_lduw_mmu,
1964    [MO_BEUL] = helper_be_ldul_mmu,
1965    [MO_BEUQ] = helper_be_ldq_mmu,
1966};
1967
1968/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
1969 *                                 uintxx_t val, int mmu_idx, uintptr_t ra)
1970 */
1971static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
1972    [MO_UB]   = helper_ret_stb_mmu,
1973    [MO_LEUW] = helper_le_stw_mmu,
1974    [MO_LEUL] = helper_le_stl_mmu,
1975    [MO_LEUQ] = helper_le_stq_mmu,
1976    [MO_BEUW] = helper_be_stw_mmu,
1977    [MO_BEUL] = helper_be_stl_mmu,
1978    [MO_BEUQ] = helper_be_stq_mmu,
1979};
1980
1981/* We expect to use a 16-bit negative offset from ENV.  */
1982QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1983QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
1984
1985/* Perform the TLB load and compare.  Places the result of the comparison
1986   in CR7, loads the addend of the TLB into R3, and returns the register
1987   containing the guest address (zero-extended into R4).  Clobbers R0 and R2. */
1988
1989static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
1990                               TCGReg addrlo, TCGReg addrhi,
1991                               int mem_index, bool is_read)
1992{
1993    int cmp_off
1994        = (is_read
1995           ? offsetof(CPUTLBEntry, addr_read)
1996           : offsetof(CPUTLBEntry, addr_write));
1997    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
1998    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
1999    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
2000    unsigned s_bits = opc & MO_SIZE;
2001    unsigned a_bits = get_alignment_bits(opc);
2002
2003    /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
2004    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
2005    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
2006
2007    /* Extract the page index, shifted into place for tlb index.  */
2008    if (TCG_TARGET_REG_BITS == 32) {
2009        tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
2010                       TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
2011    } else {
2012        tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
2013                       TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
2014    }
2015    tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
2016
2017    /* Load the TLB comparator.  */
2018    if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
2019        uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
2020                        ? LWZUX : LDUX);
2021        tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
2022    } else {
2023        tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
2024        if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2025            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
2026            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
2027        } else {
2028            tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
2029        }
2030    }
2031
2032    /* Load the TLB addend for use on the fast path.  Do this asap
2033       to minimize any load use delay.  */
2034    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3,
2035               offsetof(CPUTLBEntry, addend));
2036
2037    /* Clear the non-page, non-alignment bits from the address */
2038    if (TCG_TARGET_REG_BITS == 32) {
2039        /* We don't support unaligned accesses on 32-bits.
2040         * Preserve the bottom bits and thus trigger a comparison
2041         * failure on unaligned accesses.
2042         */
2043        if (a_bits < s_bits) {
2044            a_bits = s_bits;
2045        }
2046        tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
2047                    (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
2048    } else {
2049        TCGReg t = addrlo;
2050
2051        /* If the access is unaligned, we need to make sure we fail if we
2052         * cross a page boundary.  The trick is to add the access size-1
2053         * to the address before masking the low bits.  That will make the
2054         * address overflow to the next page if we cross a page boundary,
2055         * which will then force a mismatch of the TLB compare.
2056         */
2057        if (a_bits < s_bits) {
2058            unsigned a_mask = (1 << a_bits) - 1;
2059            unsigned s_mask = (1 << s_bits) - 1;
2060            tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
2061            t = TCG_REG_R0;
2062        }
2063
2064        /* Mask the address for the requested alignment.  */
2065        if (TARGET_LONG_BITS == 32) {
2066            tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
2067                        (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
2068            /* Zero-extend the address for use in the final address.  */
2069            tcg_out_ext32u(s, TCG_REG_R4, addrlo);
2070            addrlo = TCG_REG_R4;
2071        } else if (a_bits == 0) {
2072            tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
2073        } else {
2074            tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
2075                        64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
2076            tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
2077        }
2078    }
2079
2080    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2081        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
2082                    0, 7, TCG_TYPE_I32);
2083        tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
2084        tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
2085    } else {
2086        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
2087                    0, 7, TCG_TYPE_TL);
2088    }
2089
2090    return addrlo;
2091}
2092
2093/* Record the context of a call to the out of line helper code for the slow
2094   path for a load or store, so that we can later generate the correct
2095   helper code.  */
2096static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
2097                                TCGReg datalo_reg, TCGReg datahi_reg,
2098                                TCGReg addrlo_reg, TCGReg addrhi_reg,
2099                                tcg_insn_unit *raddr, tcg_insn_unit *lptr)
2100{
2101    TCGLabelQemuLdst *label = new_ldst_label(s);
2102
2103    label->is_ld = is_ld;
2104    label->oi = oi;
2105    label->datalo_reg = datalo_reg;
2106    label->datahi_reg = datahi_reg;
2107    label->addrlo_reg = addrlo_reg;
2108    label->addrhi_reg = addrhi_reg;
2109    label->raddr = tcg_splitwx_to_rx(raddr);
2110    label->label_ptr[0] = lptr;
2111}
2112
2113static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2114{
2115    MemOpIdx oi = lb->oi;
2116    MemOp opc = get_memop(oi);
2117    TCGReg hi, lo, arg = TCG_REG_R3;
2118
2119    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2120        return false;
2121    }
2122
2123    tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
2124
2125    lo = lb->addrlo_reg;
2126    hi = lb->addrhi_reg;
2127    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2128        arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
2129        tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2130        tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2131    } else {
2132        /* If the address needed to be zero-extended, we'll have already
2133           placed it in R4.  The only remaining case is 64-bit guest.  */
2134        tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
2135    }
2136
2137    tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
2138    tcg_out32(s, MFSPR | RT(arg) | LR);
2139
2140    tcg_out_call_int(s, LK, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
2141
2142    lo = lb->datalo_reg;
2143    hi = lb->datahi_reg;
2144    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2145        tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4);
2146        tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3);
2147    } else if (opc & MO_SIGN) {
2148        uint32_t insn = qemu_exts_opc[opc & MO_SIZE];
2149        tcg_out32(s, insn | RA(lo) | RS(TCG_REG_R3));
2150    } else {
2151        tcg_out_mov(s, TCG_TYPE_REG, lo, TCG_REG_R3);
2152    }
2153
2154    tcg_out_b(s, 0, lb->raddr);
2155    return true;
2156}
2157
2158static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2159{
2160    MemOpIdx oi = lb->oi;
2161    MemOp opc = get_memop(oi);
2162    MemOp s_bits = opc & MO_SIZE;
2163    TCGReg hi, lo, arg = TCG_REG_R3;
2164
2165    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2166        return false;
2167    }
2168
2169    tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
2170
2171    lo = lb->addrlo_reg;
2172    hi = lb->addrhi_reg;
2173    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2174        arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
2175        tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2176        tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2177    } else {
2178        /* If the address needed to be zero-extended, we'll have already
2179           placed it in R4.  The only remaining case is 64-bit guest.  */
2180        tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
2181    }
2182
2183    lo = lb->datalo_reg;
2184    hi = lb->datahi_reg;
2185    if (TCG_TARGET_REG_BITS == 32) {
2186        switch (s_bits) {
2187        case MO_64:
2188            arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
2189            tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2190            /* FALLTHRU */
2191        case MO_32:
2192            tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2193            break;
2194        default:
2195            tcg_out_rlw(s, RLWINM, arg++, lo, 0, 32 - (8 << s_bits), 31);
2196            break;
2197        }
2198    } else {
2199        if (s_bits == MO_64) {
2200            tcg_out_mov(s, TCG_TYPE_I64, arg++, lo);
2201        } else {
2202            tcg_out_rld(s, RLDICL, arg++, lo, 0, 64 - (8 << s_bits));
2203        }
2204    }
2205
2206    tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
2207    tcg_out32(s, MFSPR | RT(arg) | LR);
2208
2209    tcg_out_call_int(s, LK, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
2210
2211    tcg_out_b(s, 0, lb->raddr);
2212    return true;
2213}
2214#else
2215
2216static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
2217                                   TCGReg addrhi, unsigned a_bits)
2218{
2219    unsigned a_mask = (1 << a_bits) - 1;
2220    TCGLabelQemuLdst *label = new_ldst_label(s);
2221
2222    label->is_ld = is_ld;
2223    label->addrlo_reg = addrlo;
2224    label->addrhi_reg = addrhi;
2225
2226    /* We are expecting a_bits to max out at 7, much lower than ANDI. */
2227    tcg_debug_assert(a_bits < 16);
2228    tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, a_mask));
2229
2230    label->label_ptr[0] = s->code_ptr;
2231    tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
2232
2233    label->raddr = tcg_splitwx_to_rx(s->code_ptr);
2234}
2235
2236static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
2237{
2238    if (!reloc_pc14(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2239        return false;
2240    }
2241
2242    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2243        TCGReg arg = TCG_REG_R4;
2244
2245        arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
2246        if (l->addrlo_reg != arg) {
2247            tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg);
2248            tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg);
2249        } else if (l->addrhi_reg != arg + 1) {
2250            tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg);
2251            tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg);
2252        } else {
2253            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R0, arg);
2254            tcg_out_mov(s, TCG_TYPE_I32, arg, arg + 1);
2255            tcg_out_mov(s, TCG_TYPE_I32, arg + 1, TCG_REG_R0);
2256        }
2257    } else {
2258        tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R4, l->addrlo_reg);
2259    }
2260    tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, TCG_AREG0);
2261
2262    /* "Tail call" to the helper, with the return address back inline. */
2263    tcg_out_call_int(s, 0, (const void *)(l->is_ld ? helper_unaligned_ld
2264                                          : helper_unaligned_st));
2265    return true;
2266}
2267
2268static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
2269{
2270    return tcg_out_fail_alignment(s, l);
2271}
2272
2273static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
2274{
2275    return tcg_out_fail_alignment(s, l);
2276}
2277
2278#endif /* SOFTMMU */
2279
2280static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
2281{
2282    TCGReg datalo, datahi, addrlo, rbase;
2283    TCGReg addrhi __attribute__((unused));
2284    MemOpIdx oi;
2285    MemOp opc, s_bits;
2286#ifdef CONFIG_SOFTMMU
2287    int mem_index;
2288    tcg_insn_unit *label_ptr;
2289#else
2290    unsigned a_bits;
2291#endif
2292
2293    datalo = *args++;
2294    datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
2295    addrlo = *args++;
2296    addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
2297    oi = *args++;
2298    opc = get_memop(oi);
2299    s_bits = opc & MO_SIZE;
2300
2301#ifdef CONFIG_SOFTMMU
2302    mem_index = get_mmuidx(oi);
2303    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true);
2304
2305    /* Load a pointer into the current opcode w/conditional branch-link. */
2306    label_ptr = s->code_ptr;
2307    tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
2308
2309    rbase = TCG_REG_R3;
2310#else  /* !CONFIG_SOFTMMU */
2311    a_bits = get_alignment_bits(opc);
2312    if (a_bits) {
2313        tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
2314    }
2315    rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
2316    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
2317        tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
2318        addrlo = TCG_REG_TMP1;
2319    }
2320#endif
2321
2322    if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
2323        if (opc & MO_BSWAP) {
2324            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2325            tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
2326            tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0));
2327        } else if (rbase != 0) {
2328            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2329            tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo));
2330            tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0));
2331        } else if (addrlo == datahi) {
2332            tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
2333            tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
2334        } else {
2335            tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
2336            tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
2337        }
2338    } else {
2339        uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
2340        if (!have_isa_2_06 && insn == LDBRX) {
2341            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2342            tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
2343            tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0));
2344            tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
2345        } else if (insn) {
2346            tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
2347        } else {
2348            insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
2349            tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
2350            insn = qemu_exts_opc[s_bits];
2351            tcg_out32(s, insn | RA(datalo) | RS(datalo));
2352        }
2353    }
2354
2355#ifdef CONFIG_SOFTMMU
2356    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
2357                        s->code_ptr, label_ptr);
2358#endif
2359}
2360
2361static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
2362{
2363    TCGReg datalo, datahi, addrlo, rbase;
2364    TCGReg addrhi __attribute__((unused));
2365    MemOpIdx oi;
2366    MemOp opc, s_bits;
2367#ifdef CONFIG_SOFTMMU
2368    int mem_index;
2369    tcg_insn_unit *label_ptr;
2370#else
2371    unsigned a_bits;
2372#endif
2373
2374    datalo = *args++;
2375    datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
2376    addrlo = *args++;
2377    addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
2378    oi = *args++;
2379    opc = get_memop(oi);
2380    s_bits = opc & MO_SIZE;
2381
2382#ifdef CONFIG_SOFTMMU
2383    mem_index = get_mmuidx(oi);
2384    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false);
2385
2386    /* Load a pointer into the current opcode w/conditional branch-link. */
2387    label_ptr = s->code_ptr;
2388    tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
2389
2390    rbase = TCG_REG_R3;
2391#else  /* !CONFIG_SOFTMMU */
2392    a_bits = get_alignment_bits(opc);
2393    if (a_bits) {
2394        tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
2395    }
2396    rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
2397    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
2398        tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
2399        addrlo = TCG_REG_TMP1;
2400    }
2401#endif
2402
2403    if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
2404        if (opc & MO_BSWAP) {
2405            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2406            tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
2407            tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0));
2408        } else if (rbase != 0) {
2409            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2410            tcg_out32(s, STWX | SAB(datahi, rbase, addrlo));
2411            tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0));
2412        } else {
2413            tcg_out32(s, STW | TAI(datahi, addrlo, 0));
2414            tcg_out32(s, STW | TAI(datalo, addrlo, 4));
2415        }
2416    } else {
2417        uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
2418        if (!have_isa_2_06 && insn == STDBRX) {
2419            tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
2420            tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4));
2421            tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
2422            tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_TMP1));
2423        } else {
2424            tcg_out32(s, insn | SAB(datalo, rbase, addrlo));
2425        }
2426    }
2427
2428#ifdef CONFIG_SOFTMMU
2429    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
2430                        s->code_ptr, label_ptr);
2431#endif
2432}
2433
2434static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2435{
2436    int i;
2437    for (i = 0; i < count; ++i) {
2438        p[i] = NOP;
2439    }
2440}
2441
2442/* Parameters for function call generation, used in tcg.c.  */
2443#define TCG_TARGET_STACK_ALIGN       16
2444
2445#ifdef _CALL_AIX
2446# define LINK_AREA_SIZE                (6 * SZR)
2447# define LR_OFFSET                     (1 * SZR)
2448# define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
2449#elif defined(_CALL_DARWIN)
2450# define LINK_AREA_SIZE                (6 * SZR)
2451# define LR_OFFSET                     (2 * SZR)
2452#elif TCG_TARGET_REG_BITS == 64
2453# if defined(_CALL_ELF) && _CALL_ELF == 2
2454#  define LINK_AREA_SIZE               (4 * SZR)
2455#  define LR_OFFSET                    (1 * SZR)
2456# endif
2457#else /* TCG_TARGET_REG_BITS == 32 */
2458# if defined(_CALL_SYSV)
2459#  define LINK_AREA_SIZE               (2 * SZR)
2460#  define LR_OFFSET                    (1 * SZR)
2461# endif
2462#endif
2463#ifndef LR_OFFSET
2464# error "Unhandled abi"
2465#endif
2466#ifndef TCG_TARGET_CALL_STACK_OFFSET
2467# define TCG_TARGET_CALL_STACK_OFFSET  LINK_AREA_SIZE
2468#endif
2469
2470#define CPU_TEMP_BUF_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2471#define REG_SAVE_SIZE      ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
2472
2473#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET   \
2474                     + TCG_STATIC_CALL_ARGS_SIZE    \
2475                     + CPU_TEMP_BUF_SIZE            \
2476                     + REG_SAVE_SIZE                \
2477                     + TCG_TARGET_STACK_ALIGN - 1)  \
2478                    & -TCG_TARGET_STACK_ALIGN)
2479
2480#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
2481
2482static void tcg_target_qemu_prologue(TCGContext *s)
2483{
2484    int i;
2485
2486#ifdef _CALL_AIX
2487    const void **desc = (const void **)s->code_ptr;
2488    desc[0] = tcg_splitwx_to_rx(desc + 2);  /* entry point */
2489    desc[1] = 0;                            /* environment pointer */
2490    s->code_ptr = (void *)(desc + 2);       /* skip over descriptor */
2491#endif
2492
2493    tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
2494                  CPU_TEMP_BUF_SIZE);
2495
2496    /* Prologue */
2497    tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
2498    tcg_out32(s, (SZR == 8 ? STDU : STWU)
2499              | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
2500
2501    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2502        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2503                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2504    }
2505    tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2506
2507#ifndef CONFIG_SOFTMMU
2508    if (guest_base) {
2509        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
2510        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2511    }
2512#endif
2513
2514    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2515    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
2516    if (USE_REG_TB) {
2517        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]);
2518    }
2519    tcg_out32(s, BCCTR | BO_ALWAYS);
2520
2521    /* Epilogue */
2522    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2523
2524    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2525    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2526        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2527                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2528    }
2529    tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
2530    tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
2531    tcg_out32(s, BCLR | BO_ALWAYS);
2532}
2533
2534static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
2535{
2536    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg);
2537    tcg_out_b(s, 0, tcg_code_gen_epilogue);
2538}
2539
2540static void tcg_out_goto_tb(TCGContext *s, int which)
2541{
2542    uintptr_t ptr = get_jmp_target_addr(s, which);
2543
2544    if (USE_REG_TB) {
2545        ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr);
2546        tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset);
2547
2548        /* Direct branch will be patched by tb_target_set_jmp_target. */
2549        set_jmp_insn_offset(s, which);
2550        tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
2551
2552        /* When branch is out of range, fall through to indirect. */
2553        tcg_out32(s, BCCTR | BO_ALWAYS);
2554
2555        /* For the unlinked case, need to reset TCG_REG_TB.  */
2556        set_jmp_reset_offset(s, which);
2557        tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB,
2558                         -tcg_current_code_size(s));
2559    } else {
2560        /* Direct branch will be patched by tb_target_set_jmp_target. */
2561        set_jmp_insn_offset(s, which);
2562        tcg_out32(s, NOP);
2563
2564        /* When branch is out of range, fall through to indirect. */
2565        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr);
2566        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, (int16_t)ptr);
2567        tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
2568        tcg_out32(s, BCCTR | BO_ALWAYS);
2569        set_jmp_reset_offset(s, which);
2570    }
2571}
2572
2573void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
2574                              uintptr_t jmp_rx, uintptr_t jmp_rw)
2575{
2576    uintptr_t addr = tb->jmp_target_addr[n];
2577    intptr_t diff = addr - jmp_rx;
2578    tcg_insn_unit insn;
2579
2580    if (in_range_b(diff)) {
2581        insn = B | (diff & 0x3fffffc);
2582    } else if (USE_REG_TB) {
2583        insn = MTSPR | RS(TCG_REG_TB) | CTR;
2584    } else {
2585        insn = NOP;
2586    }
2587
2588    qatomic_set((uint32_t *)jmp_rw, insn);
2589    flush_idcache_range(jmp_rx, jmp_rw, 4);
2590}
2591
2592static void tcg_out_op(TCGContext *s, TCGOpcode opc,
2593                       const TCGArg args[TCG_MAX_OP_ARGS],
2594                       const int const_args[TCG_MAX_OP_ARGS])
2595{
2596    TCGArg a0, a1, a2;
2597
2598    switch (opc) {
2599    case INDEX_op_goto_ptr:
2600        tcg_out32(s, MTSPR | RS(args[0]) | CTR);
2601        if (USE_REG_TB) {
2602            tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]);
2603        }
2604        tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
2605        tcg_out32(s, BCCTR | BO_ALWAYS);
2606        break;
2607    case INDEX_op_br:
2608        {
2609            TCGLabel *l = arg_label(args[0]);
2610            uint32_t insn = B;
2611
2612            if (l->has_value) {
2613                insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr),
2614                                       l->u.value_ptr);
2615            } else {
2616                tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
2617            }
2618            tcg_out32(s, insn);
2619        }
2620        break;
2621    case INDEX_op_ld8u_i32:
2622    case INDEX_op_ld8u_i64:
2623        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2624        break;
2625    case INDEX_op_ld8s_i32:
2626    case INDEX_op_ld8s_i64:
2627        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2628        tcg_out_ext8s(s, args[0], args[0]);
2629        break;
2630    case INDEX_op_ld16u_i32:
2631    case INDEX_op_ld16u_i64:
2632        tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
2633        break;
2634    case INDEX_op_ld16s_i32:
2635    case INDEX_op_ld16s_i64:
2636        tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
2637        break;
2638    case INDEX_op_ld_i32:
2639    case INDEX_op_ld32u_i64:
2640        tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
2641        break;
2642    case INDEX_op_ld32s_i64:
2643        tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
2644        break;
2645    case INDEX_op_ld_i64:
2646        tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
2647        break;
2648    case INDEX_op_st8_i32:
2649    case INDEX_op_st8_i64:
2650        tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
2651        break;
2652    case INDEX_op_st16_i32:
2653    case INDEX_op_st16_i64:
2654        tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
2655        break;
2656    case INDEX_op_st_i32:
2657    case INDEX_op_st32_i64:
2658        tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
2659        break;
2660    case INDEX_op_st_i64:
2661        tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
2662        break;
2663
2664    case INDEX_op_add_i32:
2665        a0 = args[0], a1 = args[1], a2 = args[2];
2666        if (const_args[2]) {
2667        do_addi_32:
2668            tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
2669        } else {
2670            tcg_out32(s, ADD | TAB(a0, a1, a2));
2671        }
2672        break;
2673    case INDEX_op_sub_i32:
2674        a0 = args[0], a1 = args[1], a2 = args[2];
2675        if (const_args[1]) {
2676            if (const_args[2]) {
2677                tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2);
2678            } else {
2679                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2680            }
2681        } else if (const_args[2]) {
2682            a2 = -a2;
2683            goto do_addi_32;
2684        } else {
2685            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2686        }
2687        break;
2688
2689    case INDEX_op_and_i32:
2690        a0 = args[0], a1 = args[1], a2 = args[2];
2691        if (const_args[2]) {
2692            tcg_out_andi32(s, a0, a1, a2);
2693        } else {
2694            tcg_out32(s, AND | SAB(a1, a0, a2));
2695        }
2696        break;
2697    case INDEX_op_and_i64:
2698        a0 = args[0], a1 = args[1], a2 = args[2];
2699        if (const_args[2]) {
2700            tcg_out_andi64(s, a0, a1, a2);
2701        } else {
2702            tcg_out32(s, AND | SAB(a1, a0, a2));
2703        }
2704        break;
2705    case INDEX_op_or_i64:
2706    case INDEX_op_or_i32:
2707        a0 = args[0], a1 = args[1], a2 = args[2];
2708        if (const_args[2]) {
2709            tcg_out_ori32(s, a0, a1, a2);
2710        } else {
2711            tcg_out32(s, OR | SAB(a1, a0, a2));
2712        }
2713        break;
2714    case INDEX_op_xor_i64:
2715    case INDEX_op_xor_i32:
2716        a0 = args[0], a1 = args[1], a2 = args[2];
2717        if (const_args[2]) {
2718            tcg_out_xori32(s, a0, a1, a2);
2719        } else {
2720            tcg_out32(s, XOR | SAB(a1, a0, a2));
2721        }
2722        break;
2723    case INDEX_op_andc_i32:
2724        a0 = args[0], a1 = args[1], a2 = args[2];
2725        if (const_args[2]) {
2726            tcg_out_andi32(s, a0, a1, ~a2);
2727        } else {
2728            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2729        }
2730        break;
2731    case INDEX_op_andc_i64:
2732        a0 = args[0], a1 = args[1], a2 = args[2];
2733        if (const_args[2]) {
2734            tcg_out_andi64(s, a0, a1, ~a2);
2735        } else {
2736            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2737        }
2738        break;
2739    case INDEX_op_orc_i32:
2740        if (const_args[2]) {
2741            tcg_out_ori32(s, args[0], args[1], ~args[2]);
2742            break;
2743        }
2744        /* FALLTHRU */
2745    case INDEX_op_orc_i64:
2746        tcg_out32(s, ORC | SAB(args[1], args[0], args[2]));
2747        break;
2748    case INDEX_op_eqv_i32:
2749        if (const_args[2]) {
2750            tcg_out_xori32(s, args[0], args[1], ~args[2]);
2751            break;
2752        }
2753        /* FALLTHRU */
2754    case INDEX_op_eqv_i64:
2755        tcg_out32(s, EQV | SAB(args[1], args[0], args[2]));
2756        break;
2757    case INDEX_op_nand_i32:
2758    case INDEX_op_nand_i64:
2759        tcg_out32(s, NAND | SAB(args[1], args[0], args[2]));
2760        break;
2761    case INDEX_op_nor_i32:
2762    case INDEX_op_nor_i64:
2763        tcg_out32(s, NOR | SAB(args[1], args[0], args[2]));
2764        break;
2765
2766    case INDEX_op_clz_i32:
2767        tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1],
2768                      args[2], const_args[2]);
2769        break;
2770    case INDEX_op_ctz_i32:
2771        tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1],
2772                      args[2], const_args[2]);
2773        break;
2774    case INDEX_op_ctpop_i32:
2775        tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0));
2776        break;
2777
2778    case INDEX_op_clz_i64:
2779        tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1],
2780                      args[2], const_args[2]);
2781        break;
2782    case INDEX_op_ctz_i64:
2783        tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1],
2784                      args[2], const_args[2]);
2785        break;
2786    case INDEX_op_ctpop_i64:
2787        tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0));
2788        break;
2789
2790    case INDEX_op_mul_i32:
2791        a0 = args[0], a1 = args[1], a2 = args[2];
2792        if (const_args[2]) {
2793            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2794        } else {
2795            tcg_out32(s, MULLW | TAB(a0, a1, a2));
2796        }
2797        break;
2798
2799    case INDEX_op_div_i32:
2800        tcg_out32(s, DIVW | TAB(args[0], args[1], args[2]));
2801        break;
2802
2803    case INDEX_op_divu_i32:
2804        tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2]));
2805        break;
2806
2807    case INDEX_op_rem_i32:
2808        tcg_out32(s, MODSW | TAB(args[0], args[1], args[2]));
2809        break;
2810
2811    case INDEX_op_remu_i32:
2812        tcg_out32(s, MODUW | TAB(args[0], args[1], args[2]));
2813        break;
2814
2815    case INDEX_op_shl_i32:
2816        if (const_args[2]) {
2817            /* Limit immediate shift count lest we create an illegal insn.  */
2818            tcg_out_shli32(s, args[0], args[1], args[2] & 31);
2819        } else {
2820            tcg_out32(s, SLW | SAB(args[1], args[0], args[2]));
2821        }
2822        break;
2823    case INDEX_op_shr_i32:
2824        if (const_args[2]) {
2825            /* Limit immediate shift count lest we create an illegal insn.  */
2826            tcg_out_shri32(s, args[0], args[1], args[2] & 31);
2827        } else {
2828            tcg_out32(s, SRW | SAB(args[1], args[0], args[2]));
2829        }
2830        break;
2831    case INDEX_op_sar_i32:
2832        if (const_args[2]) {
2833            tcg_out_sari32(s, args[0], args[1], args[2]);
2834        } else {
2835            tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
2836        }
2837        break;
2838    case INDEX_op_rotl_i32:
2839        if (const_args[2]) {
2840            tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31);
2841        } else {
2842            tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
2843                         | MB(0) | ME(31));
2844        }
2845        break;
2846    case INDEX_op_rotr_i32:
2847        if (const_args[2]) {
2848            tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
2849        } else {
2850            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32));
2851            tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0)
2852                         | MB(0) | ME(31));
2853        }
2854        break;
2855
2856    case INDEX_op_brcond_i32:
2857        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2858                       arg_label(args[3]), TCG_TYPE_I32);
2859        break;
2860    case INDEX_op_brcond_i64:
2861        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2862                       arg_label(args[3]), TCG_TYPE_I64);
2863        break;
2864    case INDEX_op_brcond2_i32:
2865        tcg_out_brcond2(s, args, const_args);
2866        break;
2867
2868    case INDEX_op_neg_i32:
2869    case INDEX_op_neg_i64:
2870        tcg_out32(s, NEG | RT(args[0]) | RA(args[1]));
2871        break;
2872
2873    case INDEX_op_not_i32:
2874    case INDEX_op_not_i64:
2875        tcg_out32(s, NOR | SAB(args[1], args[0], args[1]));
2876        break;
2877
2878    case INDEX_op_add_i64:
2879        a0 = args[0], a1 = args[1], a2 = args[2];
2880        if (const_args[2]) {
2881        do_addi_64:
2882            tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
2883        } else {
2884            tcg_out32(s, ADD | TAB(a0, a1, a2));
2885        }
2886        break;
2887    case INDEX_op_sub_i64:
2888        a0 = args[0], a1 = args[1], a2 = args[2];
2889        if (const_args[1]) {
2890            if (const_args[2]) {
2891                tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2);
2892            } else {
2893                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2894            }
2895        } else if (const_args[2]) {
2896            a2 = -a2;
2897            goto do_addi_64;
2898        } else {
2899            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2900        }
2901        break;
2902
2903    case INDEX_op_shl_i64:
2904        if (const_args[2]) {
2905            /* Limit immediate shift count lest we create an illegal insn.  */
2906            tcg_out_shli64(s, args[0], args[1], args[2] & 63);
2907        } else {
2908            tcg_out32(s, SLD | SAB(args[1], args[0], args[2]));
2909        }
2910        break;
2911    case INDEX_op_shr_i64:
2912        if (const_args[2]) {
2913            /* Limit immediate shift count lest we create an illegal insn.  */
2914            tcg_out_shri64(s, args[0], args[1], args[2] & 63);
2915        } else {
2916            tcg_out32(s, SRD | SAB(args[1], args[0], args[2]));
2917        }
2918        break;
2919    case INDEX_op_sar_i64:
2920        if (const_args[2]) {
2921            tcg_out_sari64(s, args[0], args[1], args[2]);
2922        } else {
2923            tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
2924        }
2925        break;
2926    case INDEX_op_rotl_i64:
2927        if (const_args[2]) {
2928            tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0);
2929        } else {
2930            tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0));
2931        }
2932        break;
2933    case INDEX_op_rotr_i64:
2934        if (const_args[2]) {
2935            tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0);
2936        } else {
2937            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64));
2938            tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0));
2939        }
2940        break;
2941
2942    case INDEX_op_mul_i64:
2943        a0 = args[0], a1 = args[1], a2 = args[2];
2944        if (const_args[2]) {
2945            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2946        } else {
2947            tcg_out32(s, MULLD | TAB(a0, a1, a2));
2948        }
2949        break;
2950    case INDEX_op_div_i64:
2951        tcg_out32(s, DIVD | TAB(args[0], args[1], args[2]));
2952        break;
2953    case INDEX_op_divu_i64:
2954        tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
2955        break;
2956    case INDEX_op_rem_i64:
2957        tcg_out32(s, MODSD | TAB(args[0], args[1], args[2]));
2958        break;
2959    case INDEX_op_remu_i64:
2960        tcg_out32(s, MODUD | TAB(args[0], args[1], args[2]));
2961        break;
2962
2963    case INDEX_op_qemu_ld_i32:
2964        tcg_out_qemu_ld(s, args, false);
2965        break;
2966    case INDEX_op_qemu_ld_i64:
2967        tcg_out_qemu_ld(s, args, true);
2968        break;
2969    case INDEX_op_qemu_st_i32:
2970        tcg_out_qemu_st(s, args, false);
2971        break;
2972    case INDEX_op_qemu_st_i64:
2973        tcg_out_qemu_st(s, args, true);
2974        break;
2975
2976    case INDEX_op_ext8s_i32:
2977    case INDEX_op_ext8s_i64:
2978        tcg_out_ext8s(s, args[0], args[1]);
2979        break;
2980    case INDEX_op_ext16s_i32:
2981    case INDEX_op_ext16s_i64:
2982        tcg_out_ext16s(s, args[0], args[1]);
2983        break;
2984    case INDEX_op_ext_i32_i64:
2985    case INDEX_op_ext32s_i64:
2986        tcg_out_ext32s(s, args[0], args[1]);
2987        break;
2988    case INDEX_op_extu_i32_i64:
2989        tcg_out_ext32u(s, args[0], args[1]);
2990        break;
2991
2992    case INDEX_op_setcond_i32:
2993        tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
2994                        const_args[2]);
2995        break;
2996    case INDEX_op_setcond_i64:
2997        tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
2998                        const_args[2]);
2999        break;
3000    case INDEX_op_setcond2_i32:
3001        tcg_out_setcond2(s, args, const_args);
3002        break;
3003
3004    case INDEX_op_bswap16_i32:
3005    case INDEX_op_bswap16_i64:
3006        tcg_out_bswap16(s, args[0], args[1], args[2]);
3007        break;
3008    case INDEX_op_bswap32_i32:
3009        tcg_out_bswap32(s, args[0], args[1], 0);
3010        break;
3011    case INDEX_op_bswap32_i64:
3012        tcg_out_bswap32(s, args[0], args[1], args[2]);
3013        break;
3014    case INDEX_op_bswap64_i64:
3015        tcg_out_bswap64(s, args[0], args[1]);
3016        break;
3017
3018    case INDEX_op_deposit_i32:
3019        if (const_args[2]) {
3020            uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
3021            tcg_out_andi32(s, args[0], args[0], ~mask);
3022        } else {
3023            tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
3024                        32 - args[3] - args[4], 31 - args[3]);
3025        }
3026        break;
3027    case INDEX_op_deposit_i64:
3028        if (const_args[2]) {
3029            uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
3030            tcg_out_andi64(s, args[0], args[0], ~mask);
3031        } else {
3032            tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
3033                        64 - args[3] - args[4]);
3034        }
3035        break;
3036
3037    case INDEX_op_extract_i32:
3038        tcg_out_rlw(s, RLWINM, args[0], args[1],
3039                    32 - args[2], 32 - args[3], 31);
3040        break;
3041    case INDEX_op_extract_i64:
3042        tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
3043        break;
3044
3045    case INDEX_op_movcond_i32:
3046        tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
3047                        args[3], args[4], const_args[2]);
3048        break;
3049    case INDEX_op_movcond_i64:
3050        tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2],
3051                        args[3], args[4], const_args[2]);
3052        break;
3053
3054#if TCG_TARGET_REG_BITS == 64
3055    case INDEX_op_add2_i64:
3056#else
3057    case INDEX_op_add2_i32:
3058#endif
3059        /* Note that the CA bit is defined based on the word size of the
3060           environment.  So in 64-bit mode it's always carry-out of bit 63.
3061           The fallback code using deposit works just as well for 32-bit.  */
3062        a0 = args[0], a1 = args[1];
3063        if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
3064            a0 = TCG_REG_R0;
3065        }
3066        if (const_args[4]) {
3067            tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
3068        } else {
3069            tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
3070        }
3071        if (const_args[5]) {
3072            tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
3073        } else {
3074            tcg_out32(s, ADDE | TAB(a1, args[3], args[5]));
3075        }
3076        if (a0 != args[0]) {
3077            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3078        }
3079        break;
3080
3081#if TCG_TARGET_REG_BITS == 64
3082    case INDEX_op_sub2_i64:
3083#else
3084    case INDEX_op_sub2_i32:
3085#endif
3086        a0 = args[0], a1 = args[1];
3087        if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
3088            a0 = TCG_REG_R0;
3089        }
3090        if (const_args[2]) {
3091            tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
3092        } else {
3093            tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
3094        }
3095        if (const_args[3]) {
3096            tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
3097        } else {
3098            tcg_out32(s, SUBFE | TAB(a1, args[5], args[3]));
3099        }
3100        if (a0 != args[0]) {
3101            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3102        }
3103        break;
3104
3105    case INDEX_op_muluh_i32:
3106        tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2]));
3107        break;
3108    case INDEX_op_mulsh_i32:
3109        tcg_out32(s, MULHW | TAB(args[0], args[1], args[2]));
3110        break;
3111    case INDEX_op_muluh_i64:
3112        tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2]));
3113        break;
3114    case INDEX_op_mulsh_i64:
3115        tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
3116        break;
3117
3118    case INDEX_op_mb:
3119        tcg_out_mb(s, args[0]);
3120        break;
3121
3122    case INDEX_op_mov_i32:   /* Always emitted via tcg_out_mov.  */
3123    case INDEX_op_mov_i64:
3124    case INDEX_op_call:      /* Always emitted via tcg_out_call.  */
3125    case INDEX_op_exit_tb:   /* Always emitted via tcg_out_exit_tb.  */
3126    case INDEX_op_goto_tb:   /* Always emitted via tcg_out_goto_tb.  */
3127    default:
3128        tcg_abort();
3129    }
3130}
3131
3132int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3133{
3134    switch (opc) {
3135    case INDEX_op_and_vec:
3136    case INDEX_op_or_vec:
3137    case INDEX_op_xor_vec:
3138    case INDEX_op_andc_vec:
3139    case INDEX_op_not_vec:
3140    case INDEX_op_nor_vec:
3141    case INDEX_op_eqv_vec:
3142    case INDEX_op_nand_vec:
3143        return 1;
3144    case INDEX_op_orc_vec:
3145        return have_isa_2_07;
3146    case INDEX_op_add_vec:
3147    case INDEX_op_sub_vec:
3148    case INDEX_op_smax_vec:
3149    case INDEX_op_smin_vec:
3150    case INDEX_op_umax_vec:
3151    case INDEX_op_umin_vec:
3152    case INDEX_op_shlv_vec:
3153    case INDEX_op_shrv_vec:
3154    case INDEX_op_sarv_vec:
3155    case INDEX_op_rotlv_vec:
3156        return vece <= MO_32 || have_isa_2_07;
3157    case INDEX_op_ssadd_vec:
3158    case INDEX_op_sssub_vec:
3159    case INDEX_op_usadd_vec:
3160    case INDEX_op_ussub_vec:
3161        return vece <= MO_32;
3162    case INDEX_op_cmp_vec:
3163    case INDEX_op_shli_vec:
3164    case INDEX_op_shri_vec:
3165    case INDEX_op_sari_vec:
3166    case INDEX_op_rotli_vec:
3167        return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
3168    case INDEX_op_neg_vec:
3169        return vece >= MO_32 && have_isa_3_00;
3170    case INDEX_op_mul_vec:
3171        switch (vece) {
3172        case MO_8:
3173        case MO_16:
3174            return -1;
3175        case MO_32:
3176            return have_isa_2_07 ? 1 : -1;
3177        case MO_64:
3178            return have_isa_3_10;
3179        }
3180        return 0;
3181    case INDEX_op_bitsel_vec:
3182        return have_vsx;
3183    case INDEX_op_rotrv_vec:
3184        return -1;
3185    default:
3186        return 0;
3187    }
3188}
3189
3190static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
3191                            TCGReg dst, TCGReg src)
3192{
3193    tcg_debug_assert(dst >= TCG_REG_V0);
3194
3195    /* Splat from integer reg allowed via constraints for v3.00.  */
3196    if (src < TCG_REG_V0) {
3197        tcg_debug_assert(have_isa_3_00);
3198        switch (vece) {
3199        case MO_64:
3200            tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
3201            return true;
3202        case MO_32:
3203            tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
3204            return true;
3205        default:
3206            /* Fail, so that we fall back on either dupm or mov+dup.  */
3207            return false;
3208        }
3209    }
3210
3211    /*
3212     * Recall we use (or emulate) VSX integer loads, so the integer is
3213     * right justified within the left (zero-index) double-word.
3214     */
3215    switch (vece) {
3216    case MO_8:
3217        tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
3218        break;
3219    case MO_16:
3220        tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
3221        break;
3222    case MO_32:
3223        tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
3224        break;
3225    case MO_64:
3226        if (have_vsx) {
3227            tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
3228            break;
3229        }
3230        tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
3231        tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
3232        break;
3233    default:
3234        g_assert_not_reached();
3235    }
3236    return true;
3237}
3238
3239static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
3240                             TCGReg out, TCGReg base, intptr_t offset)
3241{
3242    int elt;
3243
3244    tcg_debug_assert(out >= TCG_REG_V0);
3245    switch (vece) {
3246    case MO_8:
3247        if (have_isa_3_00) {
3248            tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
3249        } else {
3250            tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
3251        }
3252        elt = extract32(offset, 0, 4);
3253#if !HOST_BIG_ENDIAN
3254        elt ^= 15;
3255#endif
3256        tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
3257        break;
3258    case MO_16:
3259        tcg_debug_assert((offset & 1) == 0);
3260        if (have_isa_3_00) {
3261            tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
3262        } else {
3263            tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
3264        }
3265        elt = extract32(offset, 1, 3);
3266#if !HOST_BIG_ENDIAN
3267        elt ^= 7;
3268#endif
3269        tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
3270        break;
3271    case MO_32:
3272        if (have_isa_3_00) {
3273            tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
3274            break;
3275        }
3276        tcg_debug_assert((offset & 3) == 0);
3277        tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
3278        elt = extract32(offset, 2, 2);
3279#if !HOST_BIG_ENDIAN
3280        elt ^= 3;
3281#endif
3282        tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
3283        break;
3284    case MO_64:
3285        if (have_vsx) {
3286            tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
3287            break;
3288        }
3289        tcg_debug_assert((offset & 7) == 0);
3290        tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
3291        tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
3292        elt = extract32(offset, 3, 1);
3293#if !HOST_BIG_ENDIAN
3294        elt = !elt;
3295#endif
3296        if (elt) {
3297            tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
3298        } else {
3299            tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
3300        }
3301        break;
3302    default:
3303        g_assert_not_reached();
3304    }
3305    return true;
3306}
3307
3308static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
3309                           unsigned vecl, unsigned vece,
3310                           const TCGArg args[TCG_MAX_OP_ARGS],
3311                           const int const_args[TCG_MAX_OP_ARGS])
3312{
3313    static const uint32_t
3314        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
3315        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
3316        mul_op[4] = { 0, 0, VMULUWM, VMULLD },
3317        neg_op[4] = { 0, 0, VNEGW, VNEGD },
3318        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
3319        ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
3320        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
3321        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
3322        ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
3323        usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
3324        sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
3325        ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
3326        umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
3327        smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
3328        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
3329        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
3330        shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
3331        shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
3332        sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
3333        mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
3334        mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
3335        muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
3336        mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
3337        pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
3338        rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
3339
3340    TCGType type = vecl + TCG_TYPE_V64;
3341    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
3342    uint32_t insn;
3343
3344    switch (opc) {
3345    case INDEX_op_ld_vec:
3346        tcg_out_ld(s, type, a0, a1, a2);
3347        return;
3348    case INDEX_op_st_vec:
3349        tcg_out_st(s, type, a0, a1, a2);
3350        return;
3351    case INDEX_op_dupm_vec:
3352        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
3353        return;
3354
3355    case INDEX_op_add_vec:
3356        insn = add_op[vece];
3357        break;
3358    case INDEX_op_sub_vec:
3359        insn = sub_op[vece];
3360        break;
3361    case INDEX_op_neg_vec:
3362        insn = neg_op[vece];
3363        a2 = a1;
3364        a1 = 0;
3365        break;
3366    case INDEX_op_mul_vec:
3367        insn = mul_op[vece];
3368        break;
3369    case INDEX_op_ssadd_vec:
3370        insn = ssadd_op[vece];
3371        break;
3372    case INDEX_op_sssub_vec:
3373        insn = sssub_op[vece];
3374        break;
3375    case INDEX_op_usadd_vec:
3376        insn = usadd_op[vece];
3377        break;
3378    case INDEX_op_ussub_vec:
3379        insn = ussub_op[vece];
3380        break;
3381    case INDEX_op_smin_vec:
3382        insn = smin_op[vece];
3383        break;
3384    case INDEX_op_umin_vec:
3385        insn = umin_op[vece];
3386        break;
3387    case INDEX_op_smax_vec:
3388        insn = smax_op[vece];
3389        break;
3390    case INDEX_op_umax_vec:
3391        insn = umax_op[vece];
3392        break;
3393    case INDEX_op_shlv_vec:
3394        insn = shlv_op[vece];
3395        break;
3396    case INDEX_op_shrv_vec:
3397        insn = shrv_op[vece];
3398        break;
3399    case INDEX_op_sarv_vec:
3400        insn = sarv_op[vece];
3401        break;
3402    case INDEX_op_and_vec:
3403        insn = VAND;
3404        break;
3405    case INDEX_op_or_vec:
3406        insn = VOR;
3407        break;
3408    case INDEX_op_xor_vec:
3409        insn = VXOR;
3410        break;
3411    case INDEX_op_andc_vec:
3412        insn = VANDC;
3413        break;
3414    case INDEX_op_not_vec:
3415        insn = VNOR;
3416        a2 = a1;
3417        break;
3418    case INDEX_op_orc_vec:
3419        insn = VORC;
3420        break;
3421    case INDEX_op_nand_vec:
3422        insn = VNAND;
3423        break;
3424    case INDEX_op_nor_vec:
3425        insn = VNOR;
3426        break;
3427    case INDEX_op_eqv_vec:
3428        insn = VEQV;
3429        break;
3430
3431    case INDEX_op_cmp_vec:
3432        switch (args[3]) {
3433        case TCG_COND_EQ:
3434            insn = eq_op[vece];
3435            break;
3436        case TCG_COND_NE:
3437            insn = ne_op[vece];
3438            break;
3439        case TCG_COND_GT:
3440            insn = gts_op[vece];
3441            break;
3442        case TCG_COND_GTU:
3443            insn = gtu_op[vece];
3444            break;
3445        default:
3446            g_assert_not_reached();
3447        }
3448        break;
3449
3450    case INDEX_op_bitsel_vec:
3451        tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
3452        return;
3453
3454    case INDEX_op_dup2_vec:
3455        assert(TCG_TARGET_REG_BITS == 32);
3456        /* With inputs a1 = xLxx, a2 = xHxx  */
3457        tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1));  /* a0  = xxHL */
3458        tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8);          /* tmp = HLxx */
3459        tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8);          /* a0  = HLHL */
3460        return;
3461
3462    case INDEX_op_ppc_mrgh_vec:
3463        insn = mrgh_op[vece];
3464        break;
3465    case INDEX_op_ppc_mrgl_vec:
3466        insn = mrgl_op[vece];
3467        break;
3468    case INDEX_op_ppc_muleu_vec:
3469        insn = muleu_op[vece];
3470        break;
3471    case INDEX_op_ppc_mulou_vec:
3472        insn = mulou_op[vece];
3473        break;
3474    case INDEX_op_ppc_pkum_vec:
3475        insn = pkum_op[vece];
3476        break;
3477    case INDEX_op_rotlv_vec:
3478        insn = rotl_op[vece];
3479        break;
3480    case INDEX_op_ppc_msum_vec:
3481        tcg_debug_assert(vece == MO_16);
3482        tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
3483        return;
3484
3485    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
3486    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
3487    default:
3488        g_assert_not_reached();
3489    }
3490
3491    tcg_debug_assert(insn != 0);
3492    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
3493}
3494
3495static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
3496                           TCGv_vec v1, TCGArg imm, TCGOpcode opci)
3497{
3498    TCGv_vec t1;
3499
3500    if (vece == MO_32) {
3501        /*
3502         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
3503         * So using negative numbers gets us the 4th bit easily.
3504         */
3505        imm = sextract32(imm, 0, 5);
3506    } else {
3507        imm &= (8 << vece) - 1;
3508    }
3509
3510    /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */
3511    t1 = tcg_constant_vec(type, MO_8, imm);
3512    vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
3513              tcgv_vec_arg(v1), tcgv_vec_arg(t1));
3514}
3515
3516static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
3517                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
3518{
3519    bool need_swap = false, need_inv = false;
3520
3521    tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
3522
3523    switch (cond) {
3524    case TCG_COND_EQ:
3525    case TCG_COND_GT:
3526    case TCG_COND_GTU:
3527        break;
3528    case TCG_COND_NE:
3529        if (have_isa_3_00 && vece <= MO_32) {
3530            break;
3531        }
3532        /* fall through */
3533    case TCG_COND_LE:
3534    case TCG_COND_LEU:
3535        need_inv = true;
3536        break;
3537    case TCG_COND_LT:
3538    case TCG_COND_LTU:
3539        need_swap = true;
3540        break;
3541    case TCG_COND_GE:
3542    case TCG_COND_GEU:
3543        need_swap = need_inv = true;
3544        break;
3545    default:
3546        g_assert_not_reached();
3547    }
3548
3549    if (need_inv) {
3550        cond = tcg_invert_cond(cond);
3551    }
3552    if (need_swap) {
3553        TCGv_vec t1;
3554        t1 = v1, v1 = v2, v2 = t1;
3555        cond = tcg_swap_cond(cond);
3556    }
3557
3558    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
3559              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
3560
3561    if (need_inv) {
3562        tcg_gen_not_vec(vece, v0, v0);
3563    }
3564}
3565
3566static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
3567                           TCGv_vec v1, TCGv_vec v2)
3568{
3569    TCGv_vec t1 = tcg_temp_new_vec(type);
3570    TCGv_vec t2 = tcg_temp_new_vec(type);
3571    TCGv_vec c0, c16;
3572
3573    switch (vece) {
3574    case MO_8:
3575    case MO_16:
3576        vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
3577                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3578        vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
3579                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3580        vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
3581                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3582        vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
3583                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3584        vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
3585                  tcgv_vec_arg(v0), tcgv_vec_arg(t1));
3586	break;
3587
3588    case MO_32:
3589        tcg_debug_assert(!have_isa_2_07);
3590        /*
3591         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
3592         * So using -16 is a quick way to represent 16.
3593         */
3594        c16 = tcg_constant_vec(type, MO_8, -16);
3595        c0 = tcg_constant_vec(type, MO_8, 0);
3596
3597        vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
3598                  tcgv_vec_arg(v2), tcgv_vec_arg(c16));
3599        vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
3600                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3601        vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1),
3602                  tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0));
3603        vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1),
3604                  tcgv_vec_arg(t1), tcgv_vec_arg(c16));
3605        tcg_gen_add_vec(MO_32, v0, t1, t2);
3606        break;
3607
3608    default:
3609        g_assert_not_reached();
3610    }
3611    tcg_temp_free_vec(t1);
3612    tcg_temp_free_vec(t2);
3613}
3614
3615void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3616                       TCGArg a0, ...)
3617{
3618    va_list va;
3619    TCGv_vec v0, v1, v2, t0;
3620    TCGArg a2;
3621
3622    va_start(va, a0);
3623    v0 = temp_tcgv_vec(arg_temp(a0));
3624    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3625    a2 = va_arg(va, TCGArg);
3626
3627    switch (opc) {
3628    case INDEX_op_shli_vec:
3629        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
3630        break;
3631    case INDEX_op_shri_vec:
3632        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
3633        break;
3634    case INDEX_op_sari_vec:
3635        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
3636        break;
3637    case INDEX_op_rotli_vec:
3638        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
3639        break;
3640    case INDEX_op_cmp_vec:
3641        v2 = temp_tcgv_vec(arg_temp(a2));
3642        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3643        break;
3644    case INDEX_op_mul_vec:
3645        v2 = temp_tcgv_vec(arg_temp(a2));
3646        expand_vec_mul(type, vece, v0, v1, v2);
3647        break;
3648    case INDEX_op_rotlv_vec:
3649        v2 = temp_tcgv_vec(arg_temp(a2));
3650        t0 = tcg_temp_new_vec(type);
3651        tcg_gen_neg_vec(vece, t0, v2);
3652        tcg_gen_rotlv_vec(vece, v0, v1, t0);
3653        tcg_temp_free_vec(t0);
3654        break;
3655    default:
3656        g_assert_not_reached();
3657    }
3658    va_end(va);
3659}
3660
3661static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
3662{
3663    switch (op) {
3664    case INDEX_op_goto_ptr:
3665        return C_O0_I1(r);
3666
3667    case INDEX_op_ld8u_i32:
3668    case INDEX_op_ld8s_i32:
3669    case INDEX_op_ld16u_i32:
3670    case INDEX_op_ld16s_i32:
3671    case INDEX_op_ld_i32:
3672    case INDEX_op_ctpop_i32:
3673    case INDEX_op_neg_i32:
3674    case INDEX_op_not_i32:
3675    case INDEX_op_ext8s_i32:
3676    case INDEX_op_ext16s_i32:
3677    case INDEX_op_bswap16_i32:
3678    case INDEX_op_bswap32_i32:
3679    case INDEX_op_extract_i32:
3680    case INDEX_op_ld8u_i64:
3681    case INDEX_op_ld8s_i64:
3682    case INDEX_op_ld16u_i64:
3683    case INDEX_op_ld16s_i64:
3684    case INDEX_op_ld32u_i64:
3685    case INDEX_op_ld32s_i64:
3686    case INDEX_op_ld_i64:
3687    case INDEX_op_ctpop_i64:
3688    case INDEX_op_neg_i64:
3689    case INDEX_op_not_i64:
3690    case INDEX_op_ext8s_i64:
3691    case INDEX_op_ext16s_i64:
3692    case INDEX_op_ext32s_i64:
3693    case INDEX_op_ext_i32_i64:
3694    case INDEX_op_extu_i32_i64:
3695    case INDEX_op_bswap16_i64:
3696    case INDEX_op_bswap32_i64:
3697    case INDEX_op_bswap64_i64:
3698    case INDEX_op_extract_i64:
3699        return C_O1_I1(r, r);
3700
3701    case INDEX_op_st8_i32:
3702    case INDEX_op_st16_i32:
3703    case INDEX_op_st_i32:
3704    case INDEX_op_st8_i64:
3705    case INDEX_op_st16_i64:
3706    case INDEX_op_st32_i64:
3707    case INDEX_op_st_i64:
3708        return C_O0_I2(r, r);
3709
3710    case INDEX_op_add_i32:
3711    case INDEX_op_and_i32:
3712    case INDEX_op_or_i32:
3713    case INDEX_op_xor_i32:
3714    case INDEX_op_andc_i32:
3715    case INDEX_op_orc_i32:
3716    case INDEX_op_eqv_i32:
3717    case INDEX_op_shl_i32:
3718    case INDEX_op_shr_i32:
3719    case INDEX_op_sar_i32:
3720    case INDEX_op_rotl_i32:
3721    case INDEX_op_rotr_i32:
3722    case INDEX_op_setcond_i32:
3723    case INDEX_op_and_i64:
3724    case INDEX_op_andc_i64:
3725    case INDEX_op_shl_i64:
3726    case INDEX_op_shr_i64:
3727    case INDEX_op_sar_i64:
3728    case INDEX_op_rotl_i64:
3729    case INDEX_op_rotr_i64:
3730    case INDEX_op_setcond_i64:
3731        return C_O1_I2(r, r, ri);
3732
3733    case INDEX_op_mul_i32:
3734    case INDEX_op_mul_i64:
3735        return C_O1_I2(r, r, rI);
3736
3737    case INDEX_op_div_i32:
3738    case INDEX_op_divu_i32:
3739    case INDEX_op_rem_i32:
3740    case INDEX_op_remu_i32:
3741    case INDEX_op_nand_i32:
3742    case INDEX_op_nor_i32:
3743    case INDEX_op_muluh_i32:
3744    case INDEX_op_mulsh_i32:
3745    case INDEX_op_orc_i64:
3746    case INDEX_op_eqv_i64:
3747    case INDEX_op_nand_i64:
3748    case INDEX_op_nor_i64:
3749    case INDEX_op_div_i64:
3750    case INDEX_op_divu_i64:
3751    case INDEX_op_rem_i64:
3752    case INDEX_op_remu_i64:
3753    case INDEX_op_mulsh_i64:
3754    case INDEX_op_muluh_i64:
3755        return C_O1_I2(r, r, r);
3756
3757    case INDEX_op_sub_i32:
3758        return C_O1_I2(r, rI, ri);
3759    case INDEX_op_add_i64:
3760        return C_O1_I2(r, r, rT);
3761    case INDEX_op_or_i64:
3762    case INDEX_op_xor_i64:
3763        return C_O1_I2(r, r, rU);
3764    case INDEX_op_sub_i64:
3765        return C_O1_I2(r, rI, rT);
3766    case INDEX_op_clz_i32:
3767    case INDEX_op_ctz_i32:
3768    case INDEX_op_clz_i64:
3769    case INDEX_op_ctz_i64:
3770        return C_O1_I2(r, r, rZW);
3771
3772    case INDEX_op_brcond_i32:
3773    case INDEX_op_brcond_i64:
3774        return C_O0_I2(r, ri);
3775
3776    case INDEX_op_movcond_i32:
3777    case INDEX_op_movcond_i64:
3778        return C_O1_I4(r, r, ri, rZ, rZ);
3779    case INDEX_op_deposit_i32:
3780    case INDEX_op_deposit_i64:
3781        return C_O1_I2(r, 0, rZ);
3782    case INDEX_op_brcond2_i32:
3783        return C_O0_I4(r, r, ri, ri);
3784    case INDEX_op_setcond2_i32:
3785        return C_O1_I4(r, r, r, ri, ri);
3786    case INDEX_op_add2_i64:
3787    case INDEX_op_add2_i32:
3788        return C_O2_I4(r, r, r, r, rI, rZM);
3789    case INDEX_op_sub2_i64:
3790    case INDEX_op_sub2_i32:
3791        return C_O2_I4(r, r, rI, rZM, r, r);
3792
3793    case INDEX_op_qemu_ld_i32:
3794        return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
3795                ? C_O1_I1(r, L)
3796                : C_O1_I2(r, L, L));
3797
3798    case INDEX_op_qemu_st_i32:
3799        return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
3800                ? C_O0_I2(S, S)
3801                : C_O0_I3(S, S, S));
3802
3803    case INDEX_op_qemu_ld_i64:
3804        return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
3805                : TARGET_LONG_BITS == 32 ? C_O2_I1(L, L, L)
3806                : C_O2_I2(L, L, L, L));
3807
3808    case INDEX_op_qemu_st_i64:
3809        return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(S, S)
3810                : TARGET_LONG_BITS == 32 ? C_O0_I3(S, S, S)
3811                : C_O0_I4(S, S, S, S));
3812
3813    case INDEX_op_add_vec:
3814    case INDEX_op_sub_vec:
3815    case INDEX_op_mul_vec:
3816    case INDEX_op_and_vec:
3817    case INDEX_op_or_vec:
3818    case INDEX_op_xor_vec:
3819    case INDEX_op_andc_vec:
3820    case INDEX_op_orc_vec:
3821    case INDEX_op_nor_vec:
3822    case INDEX_op_eqv_vec:
3823    case INDEX_op_nand_vec:
3824    case INDEX_op_cmp_vec:
3825    case INDEX_op_ssadd_vec:
3826    case INDEX_op_sssub_vec:
3827    case INDEX_op_usadd_vec:
3828    case INDEX_op_ussub_vec:
3829    case INDEX_op_smax_vec:
3830    case INDEX_op_smin_vec:
3831    case INDEX_op_umax_vec:
3832    case INDEX_op_umin_vec:
3833    case INDEX_op_shlv_vec:
3834    case INDEX_op_shrv_vec:
3835    case INDEX_op_sarv_vec:
3836    case INDEX_op_rotlv_vec:
3837    case INDEX_op_rotrv_vec:
3838    case INDEX_op_ppc_mrgh_vec:
3839    case INDEX_op_ppc_mrgl_vec:
3840    case INDEX_op_ppc_muleu_vec:
3841    case INDEX_op_ppc_mulou_vec:
3842    case INDEX_op_ppc_pkum_vec:
3843    case INDEX_op_dup2_vec:
3844        return C_O1_I2(v, v, v);
3845
3846    case INDEX_op_not_vec:
3847    case INDEX_op_neg_vec:
3848        return C_O1_I1(v, v);
3849
3850    case INDEX_op_dup_vec:
3851        return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v);
3852
3853    case INDEX_op_ld_vec:
3854    case INDEX_op_dupm_vec:
3855        return C_O1_I1(v, r);
3856
3857    case INDEX_op_st_vec:
3858        return C_O0_I2(v, r);
3859
3860    case INDEX_op_bitsel_vec:
3861    case INDEX_op_ppc_msum_vec:
3862        return C_O1_I3(v, v, v, v);
3863
3864    default:
3865        g_assert_not_reached();
3866    }
3867}
3868
3869static void tcg_target_init(TCGContext *s)
3870{
3871    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
3872    unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2);
3873
3874    have_isa = tcg_isa_base;
3875    if (hwcap & PPC_FEATURE_ARCH_2_06) {
3876        have_isa = tcg_isa_2_06;
3877    }
3878#ifdef PPC_FEATURE2_ARCH_2_07
3879    if (hwcap2 & PPC_FEATURE2_ARCH_2_07) {
3880        have_isa = tcg_isa_2_07;
3881    }
3882#endif
3883#ifdef PPC_FEATURE2_ARCH_3_00
3884    if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
3885        have_isa = tcg_isa_3_00;
3886    }
3887#endif
3888#ifdef PPC_FEATURE2_ARCH_3_10
3889    if (hwcap2 & PPC_FEATURE2_ARCH_3_10) {
3890        have_isa = tcg_isa_3_10;
3891    }
3892#endif
3893
3894#ifdef PPC_FEATURE2_HAS_ISEL
3895    /* Prefer explicit instruction from the kernel. */
3896    have_isel = (hwcap2 & PPC_FEATURE2_HAS_ISEL) != 0;
3897#else
3898    /* Fall back to knowing Power7 (2.06) has ISEL. */
3899    have_isel = have_isa_2_06;
3900#endif
3901
3902    if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
3903        have_altivec = true;
3904        /* We only care about the portion of VSX that overlaps Altivec. */
3905        if (hwcap & PPC_FEATURE_HAS_VSX) {
3906            have_vsx = true;
3907        }
3908    }
3909
3910    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
3911    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
3912    if (have_altivec) {
3913        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3914        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3915    }
3916
3917    tcg_target_call_clobber_regs = 0;
3918    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3919    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3920    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3921    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3922    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3923    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3924    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7);
3925    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
3926    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
3927    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
3928    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
3929    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
3930
3931    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3932    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3933    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3934    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3935    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3936    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3937    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3938    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3939    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3940    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3941    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3942    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3943    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3944    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3945    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3946    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3947    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3948    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3949    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3950    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3951
3952    s->reserved_regs = 0;
3953    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
3954    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
3955#if defined(_CALL_SYSV)
3956    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
3957#endif
3958#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
3959    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
3960#endif
3961    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
3962    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
3963    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
3964    if (USE_REG_TB) {
3965        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);  /* tb->tc_ptr */
3966    }
3967}
3968
3969#ifdef __ELF__
3970typedef struct {
3971    DebugFrameCIE cie;
3972    DebugFrameFDEHeader fde;
3973    uint8_t fde_def_cfa[4];
3974    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
3975} DebugFrame;
3976
3977/* We're expecting a 2 byte uleb128 encoded value.  */
3978QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3979
3980#if TCG_TARGET_REG_BITS == 64
3981# define ELF_HOST_MACHINE EM_PPC64
3982#else
3983# define ELF_HOST_MACHINE EM_PPC
3984#endif
3985
3986static DebugFrame debug_frame = {
3987    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3988    .cie.id = -1,
3989    .cie.version = 1,
3990    .cie.code_align = 1,
3991    .cie.data_align = (-SZR & 0x7f),         /* sleb128 -SZR */
3992    .cie.return_column = 65,
3993
3994    /* Total FDE size does not include the "len" member.  */
3995    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
3996
3997    .fde_def_cfa = {
3998        12, TCG_REG_R1,                 /* DW_CFA_def_cfa r1, ... */
3999        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
4000        (FRAME_SIZE >> 7)
4001    },
4002    .fde_reg_ofs = {
4003        /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
4004        0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
4005    }
4006};
4007
4008void tcg_register_jit(const void *buf, size_t buf_size)
4009{
4010    uint8_t *p = &debug_frame.fde_reg_ofs[3];
4011    int i;
4012
4013    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
4014        p[0] = 0x80 + tcg_target_callee_save_regs[i];
4015        p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
4016    }
4017
4018    debug_frame.fde.func_start = (uintptr_t)buf;
4019    debug_frame.fde.func_len = buf_size;
4020
4021    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
4022}
4023#endif /* __ELF__ */
4024#undef VMULEUB
4025#undef VMULEUH
4026#undef VMULEUW
4027#undef VMULOUB
4028#undef VMULOUH
4029#undef VMULOUW
4030#undef VMSUMUHM
4031