xref: /openbmc/qemu/tcg/ppc/tcg-target.c.inc (revision de799beb)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "elf.h"
26#include "../tcg-pool.c.inc"
27#include "../tcg-ldst.c.inc"
28
29/*
30 * Standardize on the _CALL_FOO symbols used by GCC:
31 * Apple XCode does not define _CALL_DARWIN.
32 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV (32-bit).
33 */
34#if !defined(_CALL_SYSV) && \
35    !defined(_CALL_DARWIN) && \
36    !defined(_CALL_AIX) && \
37    !defined(_CALL_ELF)
38# if defined(__APPLE__)
39#  define _CALL_DARWIN
40# elif defined(__ELF__) && TCG_TARGET_REG_BITS == 32
41#  define _CALL_SYSV
42# else
43#  error "Unknown ABI"
44# endif
45#endif
46
47#ifdef _CALL_SYSV
48# define TCG_TARGET_CALL_ALIGN_ARGS   1
49#endif
50
51/* For some memory operations, we need a scratch that isn't R0.  For the AIX
52   calling convention, we can re-use the TOC register since we'll be reloading
53   it at every call.  Otherwise R12 will do nicely as neither a call-saved
54   register nor a parameter register.  */
55#ifdef _CALL_AIX
56# define TCG_REG_TMP1   TCG_REG_R2
57#else
58# define TCG_REG_TMP1   TCG_REG_R12
59#endif
60
61#define TCG_VEC_TMP1    TCG_REG_V0
62#define TCG_VEC_TMP2    TCG_REG_V1
63
64#define TCG_REG_TB     TCG_REG_R31
65#define USE_REG_TB     (TCG_TARGET_REG_BITS == 64)
66
67/* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */
68#define SZP  ((int)sizeof(void *))
69
70/* Shorthand for size of a register.  */
71#define SZR  (TCG_TARGET_REG_BITS / 8)
72
73#define TCG_CT_CONST_S16  0x100
74#define TCG_CT_CONST_U16  0x200
75#define TCG_CT_CONST_S32  0x400
76#define TCG_CT_CONST_U32  0x800
77#define TCG_CT_CONST_ZERO 0x1000
78#define TCG_CT_CONST_MONE 0x2000
79#define TCG_CT_CONST_WSZ  0x4000
80
81#define ALL_GENERAL_REGS  0xffffffffu
82#define ALL_VECTOR_REGS   0xffffffff00000000ull
83
84#ifdef CONFIG_SOFTMMU
85#define ALL_QLOAD_REGS \
86    (ALL_GENERAL_REGS & \
87     ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | (1 << TCG_REG_R5)))
88#define ALL_QSTORE_REGS \
89    (ALL_GENERAL_REGS & ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | \
90                          (1 << TCG_REG_R5) | (1 << TCG_REG_R6)))
91#else
92#define ALL_QLOAD_REGS  (ALL_GENERAL_REGS & ~(1 << TCG_REG_R3))
93#define ALL_QSTORE_REGS ALL_QLOAD_REGS
94#endif
95
96TCGPowerISA have_isa;
97static bool have_isel;
98bool have_altivec;
99bool have_vsx;
100
101#ifndef CONFIG_SOFTMMU
102#define TCG_GUEST_BASE_REG 30
103#endif
104
105#ifdef CONFIG_DEBUG_TCG
106static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
107    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
108    "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
109    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
110    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
111    "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
112    "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
113    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
114    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
115};
116#endif
117
118static const int tcg_target_reg_alloc_order[] = {
119    TCG_REG_R14,  /* call saved registers */
120    TCG_REG_R15,
121    TCG_REG_R16,
122    TCG_REG_R17,
123    TCG_REG_R18,
124    TCG_REG_R19,
125    TCG_REG_R20,
126    TCG_REG_R21,
127    TCG_REG_R22,
128    TCG_REG_R23,
129    TCG_REG_R24,
130    TCG_REG_R25,
131    TCG_REG_R26,
132    TCG_REG_R27,
133    TCG_REG_R28,
134    TCG_REG_R29,
135    TCG_REG_R30,
136    TCG_REG_R31,
137    TCG_REG_R12,  /* call clobbered, non-arguments */
138    TCG_REG_R11,
139    TCG_REG_R2,
140    TCG_REG_R13,
141    TCG_REG_R10,  /* call clobbered, arguments */
142    TCG_REG_R9,
143    TCG_REG_R8,
144    TCG_REG_R7,
145    TCG_REG_R6,
146    TCG_REG_R5,
147    TCG_REG_R4,
148    TCG_REG_R3,
149
150    /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
151    TCG_REG_V2,   /* call clobbered, vectors */
152    TCG_REG_V3,
153    TCG_REG_V4,
154    TCG_REG_V5,
155    TCG_REG_V6,
156    TCG_REG_V7,
157    TCG_REG_V8,
158    TCG_REG_V9,
159    TCG_REG_V10,
160    TCG_REG_V11,
161    TCG_REG_V12,
162    TCG_REG_V13,
163    TCG_REG_V14,
164    TCG_REG_V15,
165    TCG_REG_V16,
166    TCG_REG_V17,
167    TCG_REG_V18,
168    TCG_REG_V19,
169};
170
171static const int tcg_target_call_iarg_regs[] = {
172    TCG_REG_R3,
173    TCG_REG_R4,
174    TCG_REG_R5,
175    TCG_REG_R6,
176    TCG_REG_R7,
177    TCG_REG_R8,
178    TCG_REG_R9,
179    TCG_REG_R10
180};
181
182static const int tcg_target_call_oarg_regs[] = {
183    TCG_REG_R3,
184    TCG_REG_R4
185};
186
187static const int tcg_target_callee_save_regs[] = {
188#ifdef _CALL_DARWIN
189    TCG_REG_R11,
190#endif
191    TCG_REG_R14,
192    TCG_REG_R15,
193    TCG_REG_R16,
194    TCG_REG_R17,
195    TCG_REG_R18,
196    TCG_REG_R19,
197    TCG_REG_R20,
198    TCG_REG_R21,
199    TCG_REG_R22,
200    TCG_REG_R23,
201    TCG_REG_R24,
202    TCG_REG_R25,
203    TCG_REG_R26,
204    TCG_REG_R27, /* currently used for the global env */
205    TCG_REG_R28,
206    TCG_REG_R29,
207    TCG_REG_R30,
208    TCG_REG_R31
209};
210
211static inline bool in_range_b(tcg_target_long target)
212{
213    return target == sextract64(target, 0, 26);
214}
215
216static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
217			       const tcg_insn_unit *target)
218{
219    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
220    tcg_debug_assert(in_range_b(disp));
221    return disp & 0x3fffffc;
222}
223
224static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
225{
226    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
227    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
228
229    if (in_range_b(disp)) {
230        *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc);
231        return true;
232    }
233    return false;
234}
235
236static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
237			       const tcg_insn_unit *target)
238{
239    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
240    tcg_debug_assert(disp == (int16_t) disp);
241    return disp & 0xfffc;
242}
243
244static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
245{
246    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
247    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
248
249    if (disp == (int16_t) disp) {
250        *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc);
251        return true;
252    }
253    return false;
254}
255
256/* test if a constant matches the constraint */
257static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
258{
259    if (ct & TCG_CT_CONST) {
260        return 1;
261    }
262
263    /* The only 32-bit constraint we use aside from
264       TCG_CT_CONST is TCG_CT_CONST_S16.  */
265    if (type == TCG_TYPE_I32) {
266        val = (int32_t)val;
267    }
268
269    if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
270        return 1;
271    } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) {
272        return 1;
273    } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
274        return 1;
275    } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
276        return 1;
277    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
278        return 1;
279    } else if ((ct & TCG_CT_CONST_MONE) && val == -1) {
280        return 1;
281    } else if ((ct & TCG_CT_CONST_WSZ)
282               && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
283        return 1;
284    }
285    return 0;
286}
287
288#define OPCD(opc) ((opc)<<26)
289#define XO19(opc) (OPCD(19)|((opc)<<1))
290#define MD30(opc) (OPCD(30)|((opc)<<2))
291#define MDS30(opc) (OPCD(30)|((opc)<<1))
292#define XO31(opc) (OPCD(31)|((opc)<<1))
293#define XO58(opc) (OPCD(58)|(opc))
294#define XO62(opc) (OPCD(62)|(opc))
295#define VX4(opc)  (OPCD(4)|(opc))
296
297#define B      OPCD( 18)
298#define BC     OPCD( 16)
299#define LBZ    OPCD( 34)
300#define LHZ    OPCD( 40)
301#define LHA    OPCD( 42)
302#define LWZ    OPCD( 32)
303#define LWZUX  XO31( 55)
304#define STB    OPCD( 38)
305#define STH    OPCD( 44)
306#define STW    OPCD( 36)
307
308#define STD    XO62(  0)
309#define STDU   XO62(  1)
310#define STDX   XO31(149)
311
312#define LD     XO58(  0)
313#define LDX    XO31( 21)
314#define LDU    XO58(  1)
315#define LDUX   XO31( 53)
316#define LWA    XO58(  2)
317#define LWAX   XO31(341)
318
319#define ADDIC  OPCD( 12)
320#define ADDI   OPCD( 14)
321#define ADDIS  OPCD( 15)
322#define ORI    OPCD( 24)
323#define ORIS   OPCD( 25)
324#define XORI   OPCD( 26)
325#define XORIS  OPCD( 27)
326#define ANDI   OPCD( 28)
327#define ANDIS  OPCD( 29)
328#define MULLI  OPCD(  7)
329#define CMPLI  OPCD( 10)
330#define CMPI   OPCD( 11)
331#define SUBFIC OPCD( 8)
332
333#define LWZU   OPCD( 33)
334#define STWU   OPCD( 37)
335
336#define RLWIMI OPCD( 20)
337#define RLWINM OPCD( 21)
338#define RLWNM  OPCD( 23)
339
340#define RLDICL MD30(  0)
341#define RLDICR MD30(  1)
342#define RLDIMI MD30(  3)
343#define RLDCL  MDS30( 8)
344
345#define BCLR   XO19( 16)
346#define BCCTR  XO19(528)
347#define CRAND  XO19(257)
348#define CRANDC XO19(129)
349#define CRNAND XO19(225)
350#define CROR   XO19(449)
351#define CRNOR  XO19( 33)
352
353#define EXTSB  XO31(954)
354#define EXTSH  XO31(922)
355#define EXTSW  XO31(986)
356#define ADD    XO31(266)
357#define ADDE   XO31(138)
358#define ADDME  XO31(234)
359#define ADDZE  XO31(202)
360#define ADDC   XO31( 10)
361#define AND    XO31( 28)
362#define SUBF   XO31( 40)
363#define SUBFC  XO31(  8)
364#define SUBFE  XO31(136)
365#define SUBFME XO31(232)
366#define SUBFZE XO31(200)
367#define OR     XO31(444)
368#define XOR    XO31(316)
369#define MULLW  XO31(235)
370#define MULHW  XO31( 75)
371#define MULHWU XO31( 11)
372#define DIVW   XO31(491)
373#define DIVWU  XO31(459)
374#define CMP    XO31(  0)
375#define CMPL   XO31( 32)
376#define LHBRX  XO31(790)
377#define LWBRX  XO31(534)
378#define LDBRX  XO31(532)
379#define STHBRX XO31(918)
380#define STWBRX XO31(662)
381#define STDBRX XO31(660)
382#define MFSPR  XO31(339)
383#define MTSPR  XO31(467)
384#define SRAWI  XO31(824)
385#define NEG    XO31(104)
386#define MFCR   XO31( 19)
387#define MFOCRF (MFCR | (1u << 20))
388#define NOR    XO31(124)
389#define CNTLZW XO31( 26)
390#define CNTLZD XO31( 58)
391#define CNTTZW XO31(538)
392#define CNTTZD XO31(570)
393#define CNTPOPW XO31(378)
394#define CNTPOPD XO31(506)
395#define ANDC   XO31( 60)
396#define ORC    XO31(412)
397#define EQV    XO31(284)
398#define NAND   XO31(476)
399#define ISEL   XO31( 15)
400
401#define MULLD  XO31(233)
402#define MULHD  XO31( 73)
403#define MULHDU XO31(  9)
404#define DIVD   XO31(489)
405#define DIVDU  XO31(457)
406
407#define LBZX   XO31( 87)
408#define LHZX   XO31(279)
409#define LHAX   XO31(343)
410#define LWZX   XO31( 23)
411#define STBX   XO31(215)
412#define STHX   XO31(407)
413#define STWX   XO31(151)
414
415#define EIEIO  XO31(854)
416#define HWSYNC XO31(598)
417#define LWSYNC (HWSYNC | (1u << 21))
418
419#define SPR(a, b) ((((a)<<5)|(b))<<11)
420#define LR     SPR(8, 0)
421#define CTR    SPR(9, 0)
422
423#define SLW    XO31( 24)
424#define SRW    XO31(536)
425#define SRAW   XO31(792)
426
427#define SLD    XO31( 27)
428#define SRD    XO31(539)
429#define SRAD   XO31(794)
430#define SRADI  XO31(413<<1)
431
432#define BRH    XO31(219)
433#define BRW    XO31(155)
434#define BRD    XO31(187)
435
436#define TW     XO31( 4)
437#define TRAP   (TW | TO(31))
438
439#define NOP    ORI  /* ori 0,0,0 */
440
441#define LVX        XO31(103)
442#define LVEBX      XO31(7)
443#define LVEHX      XO31(39)
444#define LVEWX      XO31(71)
445#define LXSDX      (XO31(588) | 1)  /* v2.06, force tx=1 */
446#define LXVDSX     (XO31(332) | 1)  /* v2.06, force tx=1 */
447#define LXSIWZX    (XO31(12) | 1)   /* v2.07, force tx=1 */
448#define LXV        (OPCD(61) | 8 | 1)  /* v3.00, force tx=1 */
449#define LXSD       (OPCD(57) | 2)   /* v3.00 */
450#define LXVWSX     (XO31(364) | 1)  /* v3.00, force tx=1 */
451
452#define STVX       XO31(231)
453#define STVEWX     XO31(199)
454#define STXSDX     (XO31(716) | 1)  /* v2.06, force sx=1 */
455#define STXSIWX    (XO31(140) | 1)  /* v2.07, force sx=1 */
456#define STXV       (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
457#define STXSD      (OPCD(61) | 2)   /* v3.00 */
458
459#define VADDSBS    VX4(768)
460#define VADDUBS    VX4(512)
461#define VADDUBM    VX4(0)
462#define VADDSHS    VX4(832)
463#define VADDUHS    VX4(576)
464#define VADDUHM    VX4(64)
465#define VADDSWS    VX4(896)
466#define VADDUWS    VX4(640)
467#define VADDUWM    VX4(128)
468#define VADDUDM    VX4(192)       /* v2.07 */
469
470#define VSUBSBS    VX4(1792)
471#define VSUBUBS    VX4(1536)
472#define VSUBUBM    VX4(1024)
473#define VSUBSHS    VX4(1856)
474#define VSUBUHS    VX4(1600)
475#define VSUBUHM    VX4(1088)
476#define VSUBSWS    VX4(1920)
477#define VSUBUWS    VX4(1664)
478#define VSUBUWM    VX4(1152)
479#define VSUBUDM    VX4(1216)      /* v2.07 */
480
481#define VNEGW      (VX4(1538) | (6 << 16))  /* v3.00 */
482#define VNEGD      (VX4(1538) | (7 << 16))  /* v3.00 */
483
484#define VMAXSB     VX4(258)
485#define VMAXSH     VX4(322)
486#define VMAXSW     VX4(386)
487#define VMAXSD     VX4(450)       /* v2.07 */
488#define VMAXUB     VX4(2)
489#define VMAXUH     VX4(66)
490#define VMAXUW     VX4(130)
491#define VMAXUD     VX4(194)       /* v2.07 */
492#define VMINSB     VX4(770)
493#define VMINSH     VX4(834)
494#define VMINSW     VX4(898)
495#define VMINSD     VX4(962)       /* v2.07 */
496#define VMINUB     VX4(514)
497#define VMINUH     VX4(578)
498#define VMINUW     VX4(642)
499#define VMINUD     VX4(706)       /* v2.07 */
500
501#define VCMPEQUB   VX4(6)
502#define VCMPEQUH   VX4(70)
503#define VCMPEQUW   VX4(134)
504#define VCMPEQUD   VX4(199)       /* v2.07 */
505#define VCMPGTSB   VX4(774)
506#define VCMPGTSH   VX4(838)
507#define VCMPGTSW   VX4(902)
508#define VCMPGTSD   VX4(967)       /* v2.07 */
509#define VCMPGTUB   VX4(518)
510#define VCMPGTUH   VX4(582)
511#define VCMPGTUW   VX4(646)
512#define VCMPGTUD   VX4(711)       /* v2.07 */
513#define VCMPNEB    VX4(7)         /* v3.00 */
514#define VCMPNEH    VX4(71)        /* v3.00 */
515#define VCMPNEW    VX4(135)       /* v3.00 */
516
517#define VSLB       VX4(260)
518#define VSLH       VX4(324)
519#define VSLW       VX4(388)
520#define VSLD       VX4(1476)      /* v2.07 */
521#define VSRB       VX4(516)
522#define VSRH       VX4(580)
523#define VSRW       VX4(644)
524#define VSRD       VX4(1732)      /* v2.07 */
525#define VSRAB      VX4(772)
526#define VSRAH      VX4(836)
527#define VSRAW      VX4(900)
528#define VSRAD      VX4(964)       /* v2.07 */
529#define VRLB       VX4(4)
530#define VRLH       VX4(68)
531#define VRLW       VX4(132)
532#define VRLD       VX4(196)       /* v2.07 */
533
534#define VMULEUB    VX4(520)
535#define VMULEUH    VX4(584)
536#define VMULEUW    VX4(648)       /* v2.07 */
537#define VMULOUB    VX4(8)
538#define VMULOUH    VX4(72)
539#define VMULOUW    VX4(136)       /* v2.07 */
540#define VMULUWM    VX4(137)       /* v2.07 */
541#define VMULLD     VX4(457)       /* v3.10 */
542#define VMSUMUHM   VX4(38)
543
544#define VMRGHB     VX4(12)
545#define VMRGHH     VX4(76)
546#define VMRGHW     VX4(140)
547#define VMRGLB     VX4(268)
548#define VMRGLH     VX4(332)
549#define VMRGLW     VX4(396)
550
551#define VPKUHUM    VX4(14)
552#define VPKUWUM    VX4(78)
553
554#define VAND       VX4(1028)
555#define VANDC      VX4(1092)
556#define VNOR       VX4(1284)
557#define VOR        VX4(1156)
558#define VXOR       VX4(1220)
559#define VEQV       VX4(1668)      /* v2.07 */
560#define VNAND      VX4(1412)      /* v2.07 */
561#define VORC       VX4(1348)      /* v2.07 */
562
563#define VSPLTB     VX4(524)
564#define VSPLTH     VX4(588)
565#define VSPLTW     VX4(652)
566#define VSPLTISB   VX4(780)
567#define VSPLTISH   VX4(844)
568#define VSPLTISW   VX4(908)
569
570#define VSLDOI     VX4(44)
571
572#define XXPERMDI   (OPCD(60) | (10 << 3) | 7)  /* v2.06, force ax=bx=tx=1 */
573#define XXSEL      (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
574#define XXSPLTIB   (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
575
576#define MFVSRD     (XO31(51) | 1)   /* v2.07, force sx=1 */
577#define MFVSRWZ    (XO31(115) | 1)  /* v2.07, force sx=1 */
578#define MTVSRD     (XO31(179) | 1)  /* v2.07, force tx=1 */
579#define MTVSRWZ    (XO31(243) | 1)  /* v2.07, force tx=1 */
580#define MTVSRDD    (XO31(435) | 1)  /* v3.00, force tx=1 */
581#define MTVSRWS    (XO31(403) | 1)  /* v3.00, force tx=1 */
582
583#define RT(r) ((r)<<21)
584#define RS(r) ((r)<<21)
585#define RA(r) ((r)<<16)
586#define RB(r) ((r)<<11)
587#define TO(t) ((t)<<21)
588#define SH(s) ((s)<<11)
589#define MB(b) ((b)<<6)
590#define ME(e) ((e)<<1)
591#define BO(o) ((o)<<21)
592#define MB64(b) ((b)<<5)
593#define FXM(b) (1 << (19 - (b)))
594
595#define VRT(r)  (((r) & 31) << 21)
596#define VRA(r)  (((r) & 31) << 16)
597#define VRB(r)  (((r) & 31) << 11)
598#define VRC(r)  (((r) & 31) <<  6)
599
600#define LK    1
601
602#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
603#define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
604#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
605#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
606
607#define BF(n)    ((n)<<23)
608#define BI(n, c) (((c)+((n)*4))<<16)
609#define BT(n, c) (((c)+((n)*4))<<21)
610#define BA(n, c) (((c)+((n)*4))<<16)
611#define BB(n, c) (((c)+((n)*4))<<11)
612#define BC_(n, c) (((c)+((n)*4))<<6)
613
614#define BO_COND_TRUE  BO(12)
615#define BO_COND_FALSE BO( 4)
616#define BO_ALWAYS     BO(20)
617
618enum {
619    CR_LT,
620    CR_GT,
621    CR_EQ,
622    CR_SO
623};
624
625static const uint32_t tcg_to_bc[] = {
626    [TCG_COND_EQ]  = BC | BI(7, CR_EQ) | BO_COND_TRUE,
627    [TCG_COND_NE]  = BC | BI(7, CR_EQ) | BO_COND_FALSE,
628    [TCG_COND_LT]  = BC | BI(7, CR_LT) | BO_COND_TRUE,
629    [TCG_COND_GE]  = BC | BI(7, CR_LT) | BO_COND_FALSE,
630    [TCG_COND_LE]  = BC | BI(7, CR_GT) | BO_COND_FALSE,
631    [TCG_COND_GT]  = BC | BI(7, CR_GT) | BO_COND_TRUE,
632    [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE,
633    [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE,
634    [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE,
635    [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE,
636};
637
638/* The low bit here is set if the RA and RB fields must be inverted.  */
639static const uint32_t tcg_to_isel[] = {
640    [TCG_COND_EQ]  = ISEL | BC_(7, CR_EQ),
641    [TCG_COND_NE]  = ISEL | BC_(7, CR_EQ) | 1,
642    [TCG_COND_LT]  = ISEL | BC_(7, CR_LT),
643    [TCG_COND_GE]  = ISEL | BC_(7, CR_LT) | 1,
644    [TCG_COND_LE]  = ISEL | BC_(7, CR_GT) | 1,
645    [TCG_COND_GT]  = ISEL | BC_(7, CR_GT),
646    [TCG_COND_LTU] = ISEL | BC_(7, CR_LT),
647    [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1,
648    [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1,
649    [TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
650};
651
652static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
653                        intptr_t value, intptr_t addend)
654{
655    const tcg_insn_unit *target;
656    int16_t lo;
657    int32_t hi;
658
659    value += addend;
660    target = (const tcg_insn_unit *)value;
661
662    switch (type) {
663    case R_PPC_REL14:
664        return reloc_pc14(code_ptr, target);
665    case R_PPC_REL24:
666        return reloc_pc24(code_ptr, target);
667    case R_PPC_ADDR16:
668        /*
669         * We are (slightly) abusing this relocation type.  In particular,
670         * assert that the low 2 bits are zero, and do not modify them.
671         * That way we can use this with LD et al that have opcode bits
672         * in the low 2 bits of the insn.
673         */
674        if ((value & 3) || value != (int16_t)value) {
675            return false;
676        }
677        *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
678        break;
679    case R_PPC_ADDR32:
680        /*
681         * We are abusing this relocation type.  Again, this points to
682         * a pair of insns, lis + load.  This is an absolute address
683         * relocation for PPC32 so the lis cannot be removed.
684         */
685        lo = value;
686        hi = value - lo;
687        if (hi + lo != value) {
688            return false;
689        }
690        code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
691        code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
692        break;
693    default:
694        g_assert_not_reached();
695    }
696    return true;
697}
698
699static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
700                             TCGReg base, tcg_target_long offset);
701
702static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
703{
704    if (ret == arg) {
705        return true;
706    }
707    switch (type) {
708    case TCG_TYPE_I64:
709        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
710        /* fallthru */
711    case TCG_TYPE_I32:
712        if (ret < TCG_REG_V0) {
713            if (arg < TCG_REG_V0) {
714                tcg_out32(s, OR | SAB(arg, ret, arg));
715                break;
716            } else if (have_isa_2_07) {
717                tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
718                          | VRT(arg) | RA(ret));
719                break;
720            } else {
721                /* Altivec does not support vector->integer moves.  */
722                return false;
723            }
724        } else if (arg < TCG_REG_V0) {
725            if (have_isa_2_07) {
726                tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
727                          | VRT(ret) | RA(arg));
728                break;
729            } else {
730                /* Altivec does not support integer->vector moves.  */
731                return false;
732            }
733        }
734        /* fallthru */
735    case TCG_TYPE_V64:
736    case TCG_TYPE_V128:
737        tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
738        tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
739        break;
740    default:
741        g_assert_not_reached();
742    }
743    return true;
744}
745
746static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
747                               int sh, int mb)
748{
749    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
750    sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
751    mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
752    tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb);
753}
754
755static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
756                               int sh, int mb, int me)
757{
758    tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me));
759}
760
761static inline void tcg_out_ext8s(TCGContext *s, TCGReg dst, TCGReg src)
762{
763    tcg_out32(s, EXTSB | RA(dst) | RS(src));
764}
765
766static inline void tcg_out_ext16s(TCGContext *s, TCGReg dst, TCGReg src)
767{
768    tcg_out32(s, EXTSH | RA(dst) | RS(src));
769}
770
771static inline void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src)
772{
773    tcg_out32(s, ANDI | SAI(src, dst, 0xffff));
774}
775
776static inline void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src)
777{
778    tcg_out32(s, EXTSW | RA(dst) | RS(src));
779}
780
781static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
782{
783    tcg_out_rld(s, RLDICL, dst, src, 0, 32);
784}
785
786static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
787{
788    tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
789}
790
791static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
792{
793    tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
794}
795
796static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c)
797{
798    /* Limit immediate shift count lest we create an illegal insn.  */
799    tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31));
800}
801
802static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
803{
804    tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
805}
806
807static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
808{
809    tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
810}
811
812static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
813{
814    tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
815}
816
817static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags)
818{
819    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
820
821    if (have_isa_3_10) {
822        tcg_out32(s, BRH | RA(dst) | RS(src));
823        if (flags & TCG_BSWAP_OS) {
824            tcg_out_ext16s(s, dst, dst);
825        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
826            tcg_out_ext16u(s, dst, dst);
827        }
828        return;
829    }
830
831    /*
832     * In the following,
833     *   dep(a, b, m) -> (a & ~m) | (b & m)
834     *
835     * Begin with:                              src = xxxxabcd
836     */
837    /* tmp = rol32(src, 24) & 0x000000ff            = 0000000c */
838    tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31);
839    /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00)    = 000000dc */
840    tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);
841
842    if (flags & TCG_BSWAP_OS) {
843        tcg_out_ext16s(s, dst, tmp);
844    } else {
845        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
846    }
847}
848
849static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags)
850{
851    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
852
853    if (have_isa_3_10) {
854        tcg_out32(s, BRW | RA(dst) | RS(src));
855        if (flags & TCG_BSWAP_OS) {
856            tcg_out_ext32s(s, dst, dst);
857        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
858            tcg_out_ext32u(s, dst, dst);
859        }
860        return;
861    }
862
863    /*
864     * Stolen from gcc's builtin_bswap32.
865     * In the following,
866     *   dep(a, b, m) -> (a & ~m) | (b & m)
867     *
868     * Begin with:                              src = xxxxabcd
869     */
870    /* tmp = rol32(src, 8) & 0xffffffff             = 0000bcda */
871    tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31);
872    /* tmp = dep(tmp, rol32(src, 24), 0xff000000)   = 0000dcda */
873    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7);
874    /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00)   = 0000dcba */
875    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23);
876
877    if (flags & TCG_BSWAP_OS) {
878        tcg_out_ext32s(s, dst, tmp);
879    } else {
880        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
881    }
882}
883
884static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src)
885{
886    TCGReg t0 = dst == src ? TCG_REG_R0 : dst;
887    TCGReg t1 = dst == src ? dst : TCG_REG_R0;
888
889    if (have_isa_3_10) {
890        tcg_out32(s, BRD | RA(dst) | RS(src));
891        return;
892    }
893
894    /*
895     * In the following,
896     *   dep(a, b, m) -> (a & ~m) | (b & m)
897     *
898     * Begin with:                              src = abcdefgh
899     */
900    /* t0 = rol32(src, 8) & 0xffffffff              = 0000fghe */
901    tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31);
902    /* t0 = dep(t0, rol32(src, 24), 0xff000000)     = 0000hghe */
903    tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7);
904    /* t0 = dep(t0, rol32(src, 24), 0x0000ff00)     = 0000hgfe */
905    tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23);
906
907    /* t0 = rol64(t0, 32)                           = hgfe0000 */
908    tcg_out_rld(s, RLDICL, t0, t0, 32, 0);
909    /* t1 = rol64(src, 32)                          = efghabcd */
910    tcg_out_rld(s, RLDICL, t1, src, 32, 0);
911
912    /* t0 = dep(t0, rol32(t1, 24), 0xffffffff)      = hgfebcda */
913    tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31);
914    /* t0 = dep(t0, rol32(t1, 24), 0xff000000)      = hgfedcda */
915    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7);
916    /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00)      = hgfedcba */
917    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23);
918
919    tcg_out_mov(s, TCG_TYPE_REG, dst, t0);
920}
921
922/* Emit a move into ret of arg, if it can be done in one insn.  */
923static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
924{
925    if (arg == (int16_t)arg) {
926        tcg_out32(s, ADDI | TAI(ret, 0, arg));
927        return true;
928    }
929    if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
930        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
931        return true;
932    }
933    return false;
934}
935
936static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
937                             tcg_target_long arg, bool in_prologue)
938{
939    intptr_t tb_diff;
940    tcg_target_long tmp;
941    int shift;
942
943    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
944
945    if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
946        arg = (int32_t)arg;
947    }
948
949    /* Load 16-bit immediates with one insn.  */
950    if (tcg_out_movi_one(s, ret, arg)) {
951        return;
952    }
953
954    /* Load addresses within the TB with one insn.  */
955    tb_diff = tcg_tbrel_diff(s, (void *)arg);
956    if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
957        tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
958        return;
959    }
960
961    /* Load 32-bit immediates with two insns.  Note that we've already
962       eliminated bare ADDIS, so we know both insns are required.  */
963    if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
964        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
965        tcg_out32(s, ORI | SAI(ret, ret, arg));
966        return;
967    }
968    if (arg == (uint32_t)arg && !(arg & 0x8000)) {
969        tcg_out32(s, ADDI | TAI(ret, 0, arg));
970        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
971        return;
972    }
973
974    /* Load masked 16-bit value.  */
975    if (arg > 0 && (arg & 0x8000)) {
976        tmp = arg | 0x7fff;
977        if ((tmp & (tmp + 1)) == 0) {
978            int mb = clz64(tmp + 1) + 1;
979            tcg_out32(s, ADDI | TAI(ret, 0, arg));
980            tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
981            return;
982        }
983    }
984
985    /* Load common masks with 2 insns.  */
986    shift = ctz64(arg);
987    tmp = arg >> shift;
988    if (tmp == (int16_t)tmp) {
989        tcg_out32(s, ADDI | TAI(ret, 0, tmp));
990        tcg_out_shli64(s, ret, ret, shift);
991        return;
992    }
993    shift = clz64(arg);
994    if (tcg_out_movi_one(s, ret, arg << shift)) {
995        tcg_out_shri64(s, ret, ret, shift);
996        return;
997    }
998
999    /* Load addresses within 2GB of TB with 2 (or rarely 3) insns.  */
1000    if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
1001        tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
1002        return;
1003    }
1004
1005    /* Use the constant pool, if possible.  */
1006    if (!in_prologue && USE_REG_TB) {
1007        new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
1008                       tcg_tbrel_diff(s, NULL));
1009        tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
1010        return;
1011    }
1012
1013    tmp = arg >> 31 >> 1;
1014    tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
1015    if (tmp) {
1016        tcg_out_shli64(s, ret, ret, 32);
1017    }
1018    if (arg & 0xffff0000) {
1019        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1020    }
1021    if (arg & 0xffff) {
1022        tcg_out32(s, ORI | SAI(ret, ret, arg));
1023    }
1024}
1025
1026static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
1027                             TCGReg ret, int64_t val)
1028{
1029    uint32_t load_insn;
1030    int rel, low;
1031    intptr_t add;
1032
1033    switch (vece) {
1034    case MO_8:
1035        low = (int8_t)val;
1036        if (low >= -16 && low < 16) {
1037            tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
1038            return;
1039        }
1040        if (have_isa_3_00) {
1041            tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
1042            return;
1043        }
1044        break;
1045
1046    case MO_16:
1047        low = (int16_t)val;
1048        if (low >= -16 && low < 16) {
1049            tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
1050            return;
1051        }
1052        break;
1053
1054    case MO_32:
1055        low = (int32_t)val;
1056        if (low >= -16 && low < 16) {
1057            tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
1058            return;
1059        }
1060        break;
1061    }
1062
1063    /*
1064     * Otherwise we must load the value from the constant pool.
1065     */
1066    if (USE_REG_TB) {
1067        rel = R_PPC_ADDR16;
1068        add = tcg_tbrel_diff(s, NULL);
1069    } else {
1070        rel = R_PPC_ADDR32;
1071        add = 0;
1072    }
1073
1074    if (have_vsx) {
1075        load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
1076        load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
1077        if (TCG_TARGET_REG_BITS == 64) {
1078            new_pool_label(s, val, rel, s->code_ptr, add);
1079        } else {
1080            new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val);
1081        }
1082    } else {
1083        load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
1084        if (TCG_TARGET_REG_BITS == 64) {
1085            new_pool_l2(s, rel, s->code_ptr, add, val, val);
1086        } else {
1087            new_pool_l4(s, rel, s->code_ptr, add,
1088                        val >> 32, val, val >> 32, val);
1089        }
1090    }
1091
1092    if (USE_REG_TB) {
1093        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
1094        load_insn |= RA(TCG_REG_TB);
1095    } else {
1096        tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
1097        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1098    }
1099    tcg_out32(s, load_insn);
1100}
1101
1102static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
1103                         tcg_target_long arg)
1104{
1105    switch (type) {
1106    case TCG_TYPE_I32:
1107    case TCG_TYPE_I64:
1108        tcg_debug_assert(ret < TCG_REG_V0);
1109        tcg_out_movi_int(s, type, ret, arg, false);
1110        break;
1111
1112    default:
1113        g_assert_not_reached();
1114    }
1115}
1116
1117static bool mask_operand(uint32_t c, int *mb, int *me)
1118{
1119    uint32_t lsb, test;
1120
1121    /* Accept a bit pattern like:
1122           0....01....1
1123           1....10....0
1124           0..01..10..0
1125       Keep track of the transitions.  */
1126    if (c == 0 || c == -1) {
1127        return false;
1128    }
1129    test = c;
1130    lsb = test & -test;
1131    test += lsb;
1132    if (test & (test - 1)) {
1133        return false;
1134    }
1135
1136    *me = clz32(lsb);
1137    *mb = test ? clz32(test & -test) + 1 : 0;
1138    return true;
1139}
1140
1141static bool mask64_operand(uint64_t c, int *mb, int *me)
1142{
1143    uint64_t lsb;
1144
1145    if (c == 0) {
1146        return false;
1147    }
1148
1149    lsb = c & -c;
1150    /* Accept 1..10..0.  */
1151    if (c == -lsb) {
1152        *mb = 0;
1153        *me = clz64(lsb);
1154        return true;
1155    }
1156    /* Accept 0..01..1.  */
1157    if (lsb == 1 && (c & (c + 1)) == 0) {
1158        *mb = clz64(c + 1) + 1;
1159        *me = 63;
1160        return true;
1161    }
1162    return false;
1163}
1164
1165static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1166{
1167    int mb, me;
1168
1169    if (mask_operand(c, &mb, &me)) {
1170        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
1171    } else if ((c & 0xffff) == c) {
1172        tcg_out32(s, ANDI | SAI(src, dst, c));
1173        return;
1174    } else if ((c & 0xffff0000) == c) {
1175        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1176        return;
1177    } else {
1178        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
1179        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1180    }
1181}
1182
1183static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
1184{
1185    int mb, me;
1186
1187    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1188    if (mask64_operand(c, &mb, &me)) {
1189        if (mb == 0) {
1190            tcg_out_rld(s, RLDICR, dst, src, 0, me);
1191        } else {
1192            tcg_out_rld(s, RLDICL, dst, src, 0, mb);
1193        }
1194    } else if ((c & 0xffff) == c) {
1195        tcg_out32(s, ANDI | SAI(src, dst, c));
1196        return;
1197    } else if ((c & 0xffff0000) == c) {
1198        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1199        return;
1200    } else {
1201        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
1202        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1203    }
1204}
1205
1206static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
1207                           int op_lo, int op_hi)
1208{
1209    if (c >> 16) {
1210        tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
1211        src = dst;
1212    }
1213    if (c & 0xffff) {
1214        tcg_out32(s, op_lo | SAI(src, dst, c));
1215        src = dst;
1216    }
1217}
1218
1219static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1220{
1221    tcg_out_zori32(s, dst, src, c, ORI, ORIS);
1222}
1223
1224static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1225{
1226    tcg_out_zori32(s, dst, src, c, XORI, XORIS);
1227}
1228
1229static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target)
1230{
1231    ptrdiff_t disp = tcg_pcrel_diff(s, target);
1232    if (in_range_b(disp)) {
1233        tcg_out32(s, B | (disp & 0x3fffffc) | mask);
1234    } else {
1235        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
1236        tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
1237        tcg_out32(s, BCCTR | BO_ALWAYS | mask);
1238    }
1239}
1240
1241static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
1242                             TCGReg base, tcg_target_long offset)
1243{
1244    tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
1245    bool is_int_store = false;
1246    TCGReg rs = TCG_REG_TMP1;
1247
1248    switch (opi) {
1249    case LD: case LWA:
1250        align = 3;
1251        /* FALLTHRU */
1252    default:
1253        if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
1254            rs = rt;
1255            break;
1256        }
1257        break;
1258    case LXSD:
1259    case STXSD:
1260        align = 3;
1261        break;
1262    case LXV:
1263    case STXV:
1264        align = 15;
1265        break;
1266    case STD:
1267        align = 3;
1268        /* FALLTHRU */
1269    case STB: case STH: case STW:
1270        is_int_store = true;
1271        break;
1272    }
1273
1274    /* For unaligned, or very large offsets, use the indexed form.  */
1275    if (offset & align || offset != (int32_t)offset || opi == 0) {
1276        if (rs == base) {
1277            rs = TCG_REG_R0;
1278        }
1279        tcg_debug_assert(!is_int_store || rs != rt);
1280        tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
1281        tcg_out32(s, opx | TAB(rt & 31, base, rs));
1282        return;
1283    }
1284
1285    l0 = (int16_t)offset;
1286    offset = (offset - l0) >> 16;
1287    l1 = (int16_t)offset;
1288
1289    if (l1 < 0 && orig >= 0) {
1290        extra = 0x4000;
1291        l1 = (int16_t)(offset - 0x4000);
1292    }
1293    if (l1) {
1294        tcg_out32(s, ADDIS | TAI(rs, base, l1));
1295        base = rs;
1296    }
1297    if (extra) {
1298        tcg_out32(s, ADDIS | TAI(rs, base, extra));
1299        base = rs;
1300    }
1301    if (opi != ADDI || base != rt || l0 != 0) {
1302        tcg_out32(s, opi | TAI(rt & 31, base, l0));
1303    }
1304}
1305
1306static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
1307                           TCGReg va, TCGReg vb, int shb)
1308{
1309    tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
1310}
1311
1312static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1313                       TCGReg base, intptr_t offset)
1314{
1315    int shift;
1316
1317    switch (type) {
1318    case TCG_TYPE_I32:
1319        if (ret < TCG_REG_V0) {
1320            tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
1321            break;
1322        }
1323        if (have_isa_2_07 && have_vsx) {
1324            tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
1325            break;
1326        }
1327        tcg_debug_assert((offset & 3) == 0);
1328        tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
1329        shift = (offset - 4) & 0xc;
1330        if (shift) {
1331            tcg_out_vsldoi(s, ret, ret, ret, shift);
1332        }
1333        break;
1334    case TCG_TYPE_I64:
1335        if (ret < TCG_REG_V0) {
1336            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1337            tcg_out_mem_long(s, LD, LDX, ret, base, offset);
1338            break;
1339        }
1340        /* fallthru */
1341    case TCG_TYPE_V64:
1342        tcg_debug_assert(ret >= TCG_REG_V0);
1343        if (have_vsx) {
1344            tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
1345                             ret, base, offset);
1346            break;
1347        }
1348        tcg_debug_assert((offset & 7) == 0);
1349        tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
1350        if (offset & 8) {
1351            tcg_out_vsldoi(s, ret, ret, ret, 8);
1352        }
1353        break;
1354    case TCG_TYPE_V128:
1355        tcg_debug_assert(ret >= TCG_REG_V0);
1356        tcg_debug_assert((offset & 15) == 0);
1357        tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
1358                         LVX, ret, base, offset);
1359        break;
1360    default:
1361        g_assert_not_reached();
1362    }
1363}
1364
1365static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1366                              TCGReg base, intptr_t offset)
1367{
1368    int shift;
1369
1370    switch (type) {
1371    case TCG_TYPE_I32:
1372        if (arg < TCG_REG_V0) {
1373            tcg_out_mem_long(s, STW, STWX, arg, base, offset);
1374            break;
1375        }
1376        if (have_isa_2_07 && have_vsx) {
1377            tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
1378            break;
1379        }
1380        assert((offset & 3) == 0);
1381        tcg_debug_assert((offset & 3) == 0);
1382        shift = (offset - 4) & 0xc;
1383        if (shift) {
1384            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
1385            arg = TCG_VEC_TMP1;
1386        }
1387        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1388        break;
1389    case TCG_TYPE_I64:
1390        if (arg < TCG_REG_V0) {
1391            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1392            tcg_out_mem_long(s, STD, STDX, arg, base, offset);
1393            break;
1394        }
1395        /* fallthru */
1396    case TCG_TYPE_V64:
1397        tcg_debug_assert(arg >= TCG_REG_V0);
1398        if (have_vsx) {
1399            tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
1400                             STXSDX, arg, base, offset);
1401            break;
1402        }
1403        tcg_debug_assert((offset & 7) == 0);
1404        if (offset & 8) {
1405            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
1406            arg = TCG_VEC_TMP1;
1407        }
1408        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1409        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
1410        break;
1411    case TCG_TYPE_V128:
1412        tcg_debug_assert(arg >= TCG_REG_V0);
1413        tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
1414                         STVX, arg, base, offset);
1415        break;
1416    default:
1417        g_assert_not_reached();
1418    }
1419}
1420
1421static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1422                               TCGReg base, intptr_t ofs)
1423{
1424    return false;
1425}
1426
1427static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
1428                        int const_arg2, int cr, TCGType type)
1429{
1430    int imm;
1431    uint32_t op;
1432
1433    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1434
1435    /* Simplify the comparisons below wrt CMPI.  */
1436    if (type == TCG_TYPE_I32) {
1437        arg2 = (int32_t)arg2;
1438    }
1439
1440    switch (cond) {
1441    case TCG_COND_EQ:
1442    case TCG_COND_NE:
1443        if (const_arg2) {
1444            if ((int16_t) arg2 == arg2) {
1445                op = CMPI;
1446                imm = 1;
1447                break;
1448            } else if ((uint16_t) arg2 == arg2) {
1449                op = CMPLI;
1450                imm = 1;
1451                break;
1452            }
1453        }
1454        op = CMPL;
1455        imm = 0;
1456        break;
1457
1458    case TCG_COND_LT:
1459    case TCG_COND_GE:
1460    case TCG_COND_LE:
1461    case TCG_COND_GT:
1462        if (const_arg2) {
1463            if ((int16_t) arg2 == arg2) {
1464                op = CMPI;
1465                imm = 1;
1466                break;
1467            }
1468        }
1469        op = CMP;
1470        imm = 0;
1471        break;
1472
1473    case TCG_COND_LTU:
1474    case TCG_COND_GEU:
1475    case TCG_COND_LEU:
1476    case TCG_COND_GTU:
1477        if (const_arg2) {
1478            if ((uint16_t) arg2 == arg2) {
1479                op = CMPLI;
1480                imm = 1;
1481                break;
1482            }
1483        }
1484        op = CMPL;
1485        imm = 0;
1486        break;
1487
1488    default:
1489        tcg_abort();
1490    }
1491    op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
1492
1493    if (imm) {
1494        tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
1495    } else {
1496        if (const_arg2) {
1497            tcg_out_movi(s, type, TCG_REG_R0, arg2);
1498            arg2 = TCG_REG_R0;
1499        }
1500        tcg_out32(s, op | RA(arg1) | RB(arg2));
1501    }
1502}
1503
1504static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
1505                                TCGReg dst, TCGReg src)
1506{
1507    if (type == TCG_TYPE_I32) {
1508        tcg_out32(s, CNTLZW | RS(src) | RA(dst));
1509        tcg_out_shri32(s, dst, dst, 5);
1510    } else {
1511        tcg_out32(s, CNTLZD | RS(src) | RA(dst));
1512        tcg_out_shri64(s, dst, dst, 6);
1513    }
1514}
1515
1516static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src)
1517{
1518    /* X != 0 implies X + -1 generates a carry.  Extra addition
1519       trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.  */
1520    if (dst != src) {
1521        tcg_out32(s, ADDIC | TAI(dst, src, -1));
1522        tcg_out32(s, SUBFE | TAB(dst, dst, src));
1523    } else {
1524        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1525        tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
1526    }
1527}
1528
1529static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
1530                                  bool const_arg2)
1531{
1532    if (const_arg2) {
1533        if ((uint32_t)arg2 == arg2) {
1534            tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
1535        } else {
1536            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
1537            tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
1538        }
1539    } else {
1540        tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
1541    }
1542    return TCG_REG_R0;
1543}
1544
1545static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
1546                            TCGArg arg0, TCGArg arg1, TCGArg arg2,
1547                            int const_arg2)
1548{
1549    int crop, sh;
1550
1551    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1552
1553    /* Ignore high bits of a potential constant arg2.  */
1554    if (type == TCG_TYPE_I32) {
1555        arg2 = (uint32_t)arg2;
1556    }
1557
1558    /* Handle common and trivial cases before handling anything else.  */
1559    if (arg2 == 0) {
1560        switch (cond) {
1561        case TCG_COND_EQ:
1562            tcg_out_setcond_eq0(s, type, arg0, arg1);
1563            return;
1564        case TCG_COND_NE:
1565            if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1566                tcg_out_ext32u(s, TCG_REG_R0, arg1);
1567                arg1 = TCG_REG_R0;
1568            }
1569            tcg_out_setcond_ne0(s, arg0, arg1);
1570            return;
1571        case TCG_COND_GE:
1572            tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
1573            arg1 = arg0;
1574            /* FALLTHRU */
1575        case TCG_COND_LT:
1576            /* Extract the sign bit.  */
1577            if (type == TCG_TYPE_I32) {
1578                tcg_out_shri32(s, arg0, arg1, 31);
1579            } else {
1580                tcg_out_shri64(s, arg0, arg1, 63);
1581            }
1582            return;
1583        default:
1584            break;
1585        }
1586    }
1587
1588    /* If we have ISEL, we can implement everything with 3 or 4 insns.
1589       All other cases below are also at least 3 insns, so speed up the
1590       code generator by not considering them and always using ISEL.  */
1591    if (have_isel) {
1592        int isel, tab;
1593
1594        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1595
1596        isel = tcg_to_isel[cond];
1597
1598        tcg_out_movi(s, type, arg0, 1);
1599        if (isel & 1) {
1600            /* arg0 = (bc ? 0 : 1) */
1601            tab = TAB(arg0, 0, arg0);
1602            isel &= ~1;
1603        } else {
1604            /* arg0 = (bc ? 1 : 0) */
1605            tcg_out_movi(s, type, TCG_REG_R0, 0);
1606            tab = TAB(arg0, arg0, TCG_REG_R0);
1607        }
1608        tcg_out32(s, isel | tab);
1609        return;
1610    }
1611
1612    switch (cond) {
1613    case TCG_COND_EQ:
1614        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1615        tcg_out_setcond_eq0(s, type, arg0, arg1);
1616        return;
1617
1618    case TCG_COND_NE:
1619        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1620        /* Discard the high bits only once, rather than both inputs.  */
1621        if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1622            tcg_out_ext32u(s, TCG_REG_R0, arg1);
1623            arg1 = TCG_REG_R0;
1624        }
1625        tcg_out_setcond_ne0(s, arg0, arg1);
1626        return;
1627
1628    case TCG_COND_GT:
1629    case TCG_COND_GTU:
1630        sh = 30;
1631        crop = 0;
1632        goto crtest;
1633
1634    case TCG_COND_LT:
1635    case TCG_COND_LTU:
1636        sh = 29;
1637        crop = 0;
1638        goto crtest;
1639
1640    case TCG_COND_GE:
1641    case TCG_COND_GEU:
1642        sh = 31;
1643        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT);
1644        goto crtest;
1645
1646    case TCG_COND_LE:
1647    case TCG_COND_LEU:
1648        sh = 31;
1649        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT);
1650    crtest:
1651        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1652        if (crop) {
1653            tcg_out32(s, crop);
1654        }
1655        tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1656        tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
1657        break;
1658
1659    default:
1660        tcg_abort();
1661    }
1662}
1663
1664static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l)
1665{
1666    if (l->has_value) {
1667        bc |= reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
1668    } else {
1669        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
1670    }
1671    tcg_out32(s, bc);
1672}
1673
1674static void tcg_out_brcond(TCGContext *s, TCGCond cond,
1675                           TCGArg arg1, TCGArg arg2, int const_arg2,
1676                           TCGLabel *l, TCGType type)
1677{
1678    tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1679    tcg_out_bc(s, tcg_to_bc[cond], l);
1680}
1681
1682static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
1683                            TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1,
1684                            TCGArg v2, bool const_c2)
1685{
1686    /* If for some reason both inputs are zero, don't produce bad code.  */
1687    if (v1 == 0 && v2 == 0) {
1688        tcg_out_movi(s, type, dest, 0);
1689        return;
1690    }
1691
1692    tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
1693
1694    if (have_isel) {
1695        int isel = tcg_to_isel[cond];
1696
1697        /* Swap the V operands if the operation indicates inversion.  */
1698        if (isel & 1) {
1699            int t = v1;
1700            v1 = v2;
1701            v2 = t;
1702            isel &= ~1;
1703        }
1704        /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */
1705        if (v2 == 0) {
1706            tcg_out_movi(s, type, TCG_REG_R0, 0);
1707        }
1708        tcg_out32(s, isel | TAB(dest, v1, v2));
1709    } else {
1710        if (dest == v2) {
1711            cond = tcg_invert_cond(cond);
1712            v2 = v1;
1713        } else if (dest != v1) {
1714            if (v1 == 0) {
1715                tcg_out_movi(s, type, dest, 0);
1716            } else {
1717                tcg_out_mov(s, type, dest, v1);
1718            }
1719        }
1720        /* Branch forward over one insn */
1721        tcg_out32(s, tcg_to_bc[cond] | 8);
1722        if (v2 == 0) {
1723            tcg_out_movi(s, type, dest, 0);
1724        } else {
1725            tcg_out_mov(s, type, dest, v2);
1726        }
1727    }
1728}
1729
1730static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
1731                          TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
1732{
1733    if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
1734        tcg_out32(s, opc | RA(a0) | RS(a1));
1735    } else {
1736        tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type);
1737        /* Note that the only other valid constant for a2 is 0.  */
1738        if (have_isel) {
1739            tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
1740            tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
1741        } else if (!const_a2 && a0 == a2) {
1742            tcg_out32(s, tcg_to_bc[TCG_COND_EQ] | 8);
1743            tcg_out32(s, opc | RA(a0) | RS(a1));
1744        } else {
1745            tcg_out32(s, opc | RA(a0) | RS(a1));
1746            tcg_out32(s, tcg_to_bc[TCG_COND_NE] | 8);
1747            if (const_a2) {
1748                tcg_out_movi(s, type, a0, 0);
1749            } else {
1750                tcg_out_mov(s, type, a0, a2);
1751            }
1752        }
1753    }
1754}
1755
1756static void tcg_out_cmp2(TCGContext *s, const TCGArg *args,
1757                         const int *const_args)
1758{
1759    static const struct { uint8_t bit1, bit2; } bits[] = {
1760        [TCG_COND_LT ] = { CR_LT, CR_LT },
1761        [TCG_COND_LE ] = { CR_LT, CR_GT },
1762        [TCG_COND_GT ] = { CR_GT, CR_GT },
1763        [TCG_COND_GE ] = { CR_GT, CR_LT },
1764        [TCG_COND_LTU] = { CR_LT, CR_LT },
1765        [TCG_COND_LEU] = { CR_LT, CR_GT },
1766        [TCG_COND_GTU] = { CR_GT, CR_GT },
1767        [TCG_COND_GEU] = { CR_GT, CR_LT },
1768    };
1769
1770    TCGCond cond = args[4], cond2;
1771    TCGArg al, ah, bl, bh;
1772    int blconst, bhconst;
1773    int op, bit1, bit2;
1774
1775    al = args[0];
1776    ah = args[1];
1777    bl = args[2];
1778    bh = args[3];
1779    blconst = const_args[2];
1780    bhconst = const_args[3];
1781
1782    switch (cond) {
1783    case TCG_COND_EQ:
1784        op = CRAND;
1785        goto do_equality;
1786    case TCG_COND_NE:
1787        op = CRNAND;
1788    do_equality:
1789        tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
1790        tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
1791        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
1792        break;
1793
1794    case TCG_COND_LT:
1795    case TCG_COND_LE:
1796    case TCG_COND_GT:
1797    case TCG_COND_GE:
1798    case TCG_COND_LTU:
1799    case TCG_COND_LEU:
1800    case TCG_COND_GTU:
1801    case TCG_COND_GEU:
1802        bit1 = bits[cond].bit1;
1803        bit2 = bits[cond].bit2;
1804        op = (bit1 != bit2 ? CRANDC : CRAND);
1805        cond2 = tcg_unsigned_cond(cond);
1806
1807        tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
1808        tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
1809        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
1810        tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ));
1811        break;
1812
1813    default:
1814        tcg_abort();
1815    }
1816}
1817
1818static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1819                             const int *const_args)
1820{
1821    tcg_out_cmp2(s, args + 1, const_args + 1);
1822    tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1823    tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31);
1824}
1825
1826static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
1827                             const int *const_args)
1828{
1829    tcg_out_cmp2(s, args, const_args);
1830    tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
1831}
1832
1833static void tcg_out_mb(TCGContext *s, TCGArg a0)
1834{
1835    uint32_t insn;
1836
1837    if (a0 & TCG_MO_ST_LD) {
1838        insn = HWSYNC;
1839    } else {
1840        insn = LWSYNC;
1841    }
1842
1843    tcg_out32(s, insn);
1844}
1845
1846void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
1847                              uintptr_t jmp_rw, uintptr_t addr)
1848{
1849    if (TCG_TARGET_REG_BITS == 64) {
1850        tcg_insn_unit i1, i2;
1851        intptr_t tb_diff = addr - tc_ptr;
1852        intptr_t br_diff = addr - (jmp_rx + 4);
1853        uint64_t pair;
1854
1855        /* This does not exercise the range of the branch, but we do
1856           still need to be able to load the new value of TCG_REG_TB.
1857           But this does still happen quite often.  */
1858        if (tb_diff == (int16_t)tb_diff) {
1859            i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff);
1860            i2 = B | (br_diff & 0x3fffffc);
1861        } else {
1862            intptr_t lo = (int16_t)tb_diff;
1863            intptr_t hi = (int32_t)(tb_diff - lo);
1864            assert(tb_diff == hi + lo);
1865            i1 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16);
1866            i2 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo);
1867        }
1868#if HOST_BIG_ENDIAN
1869        pair = (uint64_t)i1 << 32 | i2;
1870#else
1871        pair = (uint64_t)i2 << 32 | i1;
1872#endif
1873
1874        /* As per the enclosing if, this is ppc64.  Avoid the _Static_assert
1875           within qatomic_set that would fail to build a ppc32 host.  */
1876        qatomic_set__nocheck((uint64_t *)jmp_rw, pair);
1877        flush_idcache_range(jmp_rx, jmp_rw, 8);
1878    } else {
1879        intptr_t diff = addr - jmp_rx;
1880        tcg_debug_assert(in_range_b(diff));
1881        qatomic_set((uint32_t *)jmp_rw, B | (diff & 0x3fffffc));
1882        flush_idcache_range(jmp_rx, jmp_rw, 4);
1883    }
1884}
1885
1886static void tcg_out_call_int(TCGContext *s, int lk,
1887                             const tcg_insn_unit *target)
1888{
1889#ifdef _CALL_AIX
1890    /* Look through the descriptor.  If the branch is in range, and we
1891       don't have to spend too much effort on building the toc.  */
1892    const void *tgt = ((const void * const *)target)[0];
1893    uintptr_t toc = ((const uintptr_t *)target)[1];
1894    intptr_t diff = tcg_pcrel_diff(s, tgt);
1895
1896    if (in_range_b(diff) && toc == (uint32_t)toc) {
1897        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
1898        tcg_out_b(s, lk, tgt);
1899    } else {
1900        /* Fold the low bits of the constant into the addresses below.  */
1901        intptr_t arg = (intptr_t)target;
1902        int ofs = (int16_t)arg;
1903
1904        if (ofs + 8 < 0x8000) {
1905            arg -= ofs;
1906        } else {
1907            ofs = 0;
1908        }
1909        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
1910        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
1911        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
1912        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
1913        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
1914    }
1915#elif defined(_CALL_ELF) && _CALL_ELF == 2
1916    intptr_t diff;
1917
1918    /* In the ELFv2 ABI, we have to set up r12 to contain the destination
1919       address, which the callee uses to compute its TOC address.  */
1920    /* FIXME: when the branch is in range, we could avoid r12 load if we
1921       knew that the destination uses the same TOC, and what its local
1922       entry point offset is.  */
1923    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
1924
1925    diff = tcg_pcrel_diff(s, target);
1926    if (in_range_b(diff)) {
1927        tcg_out_b(s, lk, target);
1928    } else {
1929        tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
1930        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
1931    }
1932#else
1933    tcg_out_b(s, lk, target);
1934#endif
1935}
1936
1937static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
1938{
1939    tcg_out_call_int(s, LK, target);
1940}
1941
1942static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = {
1943    [MO_UB] = LBZX,
1944    [MO_UW] = LHZX,
1945    [MO_UL] = LWZX,
1946    [MO_UQ] = LDX,
1947    [MO_SW] = LHAX,
1948    [MO_SL] = LWAX,
1949    [MO_BSWAP | MO_UB] = LBZX,
1950    [MO_BSWAP | MO_UW] = LHBRX,
1951    [MO_BSWAP | MO_UL] = LWBRX,
1952    [MO_BSWAP | MO_UQ] = LDBRX,
1953};
1954
1955static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
1956    [MO_UB] = STBX,
1957    [MO_UW] = STHX,
1958    [MO_UL] = STWX,
1959    [MO_UQ] = STDX,
1960    [MO_BSWAP | MO_UB] = STBX,
1961    [MO_BSWAP | MO_UW] = STHBRX,
1962    [MO_BSWAP | MO_UL] = STWBRX,
1963    [MO_BSWAP | MO_UQ] = STDBRX,
1964};
1965
1966static const uint32_t qemu_exts_opc[4] = {
1967    EXTSB, EXTSH, EXTSW, 0
1968};
1969
1970#if defined (CONFIG_SOFTMMU)
1971/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
1972 *                                 int mmu_idx, uintptr_t ra)
1973 */
1974static void * const qemu_ld_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
1975    [MO_UB]   = helper_ret_ldub_mmu,
1976    [MO_LEUW] = helper_le_lduw_mmu,
1977    [MO_LEUL] = helper_le_ldul_mmu,
1978    [MO_LEUQ] = helper_le_ldq_mmu,
1979    [MO_BEUW] = helper_be_lduw_mmu,
1980    [MO_BEUL] = helper_be_ldul_mmu,
1981    [MO_BEUQ] = helper_be_ldq_mmu,
1982};
1983
1984/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
1985 *                                 uintxx_t val, int mmu_idx, uintptr_t ra)
1986 */
1987static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
1988    [MO_UB]   = helper_ret_stb_mmu,
1989    [MO_LEUW] = helper_le_stw_mmu,
1990    [MO_LEUL] = helper_le_stl_mmu,
1991    [MO_LEUQ] = helper_le_stq_mmu,
1992    [MO_BEUW] = helper_be_stw_mmu,
1993    [MO_BEUL] = helper_be_stl_mmu,
1994    [MO_BEUQ] = helper_be_stq_mmu,
1995};
1996
1997/* We expect to use a 16-bit negative offset from ENV.  */
1998QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1999QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
2000
2001/* Perform the TLB load and compare.  Places the result of the comparison
2002   in CR7, loads the addend of the TLB into R3, and returns the register
2003   containing the guest address (zero-extended into R4).  Clobbers R0 and R2. */
2004
2005static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
2006                               TCGReg addrlo, TCGReg addrhi,
2007                               int mem_index, bool is_read)
2008{
2009    int cmp_off
2010        = (is_read
2011           ? offsetof(CPUTLBEntry, addr_read)
2012           : offsetof(CPUTLBEntry, addr_write));
2013    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
2014    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
2015    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
2016    unsigned s_bits = opc & MO_SIZE;
2017    unsigned a_bits = get_alignment_bits(opc);
2018
2019    /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
2020    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
2021    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
2022
2023    /* Extract the page index, shifted into place for tlb index.  */
2024    if (TCG_TARGET_REG_BITS == 32) {
2025        tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
2026                       TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
2027    } else {
2028        tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
2029                       TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
2030    }
2031    tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
2032
2033    /* Load the TLB comparator.  */
2034    if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
2035        uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
2036                        ? LWZUX : LDUX);
2037        tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
2038    } else {
2039        tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
2040        if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2041            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
2042            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
2043        } else {
2044            tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
2045        }
2046    }
2047
2048    /* Load the TLB addend for use on the fast path.  Do this asap
2049       to minimize any load use delay.  */
2050    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3,
2051               offsetof(CPUTLBEntry, addend));
2052
2053    /* Clear the non-page, non-alignment bits from the address */
2054    if (TCG_TARGET_REG_BITS == 32) {
2055        /* We don't support unaligned accesses on 32-bits.
2056         * Preserve the bottom bits and thus trigger a comparison
2057         * failure on unaligned accesses.
2058         */
2059        if (a_bits < s_bits) {
2060            a_bits = s_bits;
2061        }
2062        tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
2063                    (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
2064    } else {
2065        TCGReg t = addrlo;
2066
2067        /* If the access is unaligned, we need to make sure we fail if we
2068         * cross a page boundary.  The trick is to add the access size-1
2069         * to the address before masking the low bits.  That will make the
2070         * address overflow to the next page if we cross a page boundary,
2071         * which will then force a mismatch of the TLB compare.
2072         */
2073        if (a_bits < s_bits) {
2074            unsigned a_mask = (1 << a_bits) - 1;
2075            unsigned s_mask = (1 << s_bits) - 1;
2076            tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
2077            t = TCG_REG_R0;
2078        }
2079
2080        /* Mask the address for the requested alignment.  */
2081        if (TARGET_LONG_BITS == 32) {
2082            tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
2083                        (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
2084            /* Zero-extend the address for use in the final address.  */
2085            tcg_out_ext32u(s, TCG_REG_R4, addrlo);
2086            addrlo = TCG_REG_R4;
2087        } else if (a_bits == 0) {
2088            tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
2089        } else {
2090            tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
2091                        64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
2092            tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
2093        }
2094    }
2095
2096    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2097        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
2098                    0, 7, TCG_TYPE_I32);
2099        tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
2100        tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
2101    } else {
2102        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
2103                    0, 7, TCG_TYPE_TL);
2104    }
2105
2106    return addrlo;
2107}
2108
2109/* Record the context of a call to the out of line helper code for the slow
2110   path for a load or store, so that we can later generate the correct
2111   helper code.  */
2112static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
2113                                TCGReg datalo_reg, TCGReg datahi_reg,
2114                                TCGReg addrlo_reg, TCGReg addrhi_reg,
2115                                tcg_insn_unit *raddr, tcg_insn_unit *lptr)
2116{
2117    TCGLabelQemuLdst *label = new_ldst_label(s);
2118
2119    label->is_ld = is_ld;
2120    label->oi = oi;
2121    label->datalo_reg = datalo_reg;
2122    label->datahi_reg = datahi_reg;
2123    label->addrlo_reg = addrlo_reg;
2124    label->addrhi_reg = addrhi_reg;
2125    label->raddr = tcg_splitwx_to_rx(raddr);
2126    label->label_ptr[0] = lptr;
2127}
2128
2129static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2130{
2131    MemOpIdx oi = lb->oi;
2132    MemOp opc = get_memop(oi);
2133    TCGReg hi, lo, arg = TCG_REG_R3;
2134
2135    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2136        return false;
2137    }
2138
2139    tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
2140
2141    lo = lb->addrlo_reg;
2142    hi = lb->addrhi_reg;
2143    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2144#ifdef TCG_TARGET_CALL_ALIGN_ARGS
2145        arg |= 1;
2146#endif
2147        tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2148        tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2149    } else {
2150        /* If the address needed to be zero-extended, we'll have already
2151           placed it in R4.  The only remaining case is 64-bit guest.  */
2152        tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
2153    }
2154
2155    tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
2156    tcg_out32(s, MFSPR | RT(arg) | LR);
2157
2158    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
2159
2160    lo = lb->datalo_reg;
2161    hi = lb->datahi_reg;
2162    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2163        tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4);
2164        tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3);
2165    } else if (opc & MO_SIGN) {
2166        uint32_t insn = qemu_exts_opc[opc & MO_SIZE];
2167        tcg_out32(s, insn | RA(lo) | RS(TCG_REG_R3));
2168    } else {
2169        tcg_out_mov(s, TCG_TYPE_REG, lo, TCG_REG_R3);
2170    }
2171
2172    tcg_out_b(s, 0, lb->raddr);
2173    return true;
2174}
2175
2176static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2177{
2178    MemOpIdx oi = lb->oi;
2179    MemOp opc = get_memop(oi);
2180    MemOp s_bits = opc & MO_SIZE;
2181    TCGReg hi, lo, arg = TCG_REG_R3;
2182
2183    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2184        return false;
2185    }
2186
2187    tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
2188
2189    lo = lb->addrlo_reg;
2190    hi = lb->addrhi_reg;
2191    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2192#ifdef TCG_TARGET_CALL_ALIGN_ARGS
2193        arg |= 1;
2194#endif
2195        tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2196        tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2197    } else {
2198        /* If the address needed to be zero-extended, we'll have already
2199           placed it in R4.  The only remaining case is 64-bit guest.  */
2200        tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
2201    }
2202
2203    lo = lb->datalo_reg;
2204    hi = lb->datahi_reg;
2205    if (TCG_TARGET_REG_BITS == 32) {
2206        switch (s_bits) {
2207        case MO_64:
2208#ifdef TCG_TARGET_CALL_ALIGN_ARGS
2209            arg |= 1;
2210#endif
2211            tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2212            /* FALLTHRU */
2213        case MO_32:
2214            tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2215            break;
2216        default:
2217            tcg_out_rlw(s, RLWINM, arg++, lo, 0, 32 - (8 << s_bits), 31);
2218            break;
2219        }
2220    } else {
2221        if (s_bits == MO_64) {
2222            tcg_out_mov(s, TCG_TYPE_I64, arg++, lo);
2223        } else {
2224            tcg_out_rld(s, RLDICL, arg++, lo, 0, 64 - (8 << s_bits));
2225        }
2226    }
2227
2228    tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
2229    tcg_out32(s, MFSPR | RT(arg) | LR);
2230
2231    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
2232
2233    tcg_out_b(s, 0, lb->raddr);
2234    return true;
2235}
2236#else
2237
2238static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
2239                                   TCGReg addrhi, unsigned a_bits)
2240{
2241    unsigned a_mask = (1 << a_bits) - 1;
2242    TCGLabelQemuLdst *label = new_ldst_label(s);
2243
2244    label->is_ld = is_ld;
2245    label->addrlo_reg = addrlo;
2246    label->addrhi_reg = addrhi;
2247
2248    /* We are expecting a_bits to max out at 7, much lower than ANDI. */
2249    tcg_debug_assert(a_bits < 16);
2250    tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, a_mask));
2251
2252    label->label_ptr[0] = s->code_ptr;
2253    tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
2254
2255    label->raddr = tcg_splitwx_to_rx(s->code_ptr);
2256}
2257
2258static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
2259{
2260    if (!reloc_pc14(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2261        return false;
2262    }
2263
2264    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2265        TCGReg arg = TCG_REG_R4;
2266#ifdef TCG_TARGET_CALL_ALIGN_ARGS
2267        arg |= 1;
2268#endif
2269        if (l->addrlo_reg != arg) {
2270            tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg);
2271            tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg);
2272        } else if (l->addrhi_reg != arg + 1) {
2273            tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg);
2274            tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg);
2275        } else {
2276            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R0, arg);
2277            tcg_out_mov(s, TCG_TYPE_I32, arg, arg + 1);
2278            tcg_out_mov(s, TCG_TYPE_I32, arg + 1, TCG_REG_R0);
2279        }
2280    } else {
2281        tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R4, l->addrlo_reg);
2282    }
2283    tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, TCG_AREG0);
2284
2285    /* "Tail call" to the helper, with the return address back inline. */
2286    tcg_out_call_int(s, 0, (const void *)(l->is_ld ? helper_unaligned_ld
2287                                          : helper_unaligned_st));
2288    return true;
2289}
2290
2291static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
2292{
2293    return tcg_out_fail_alignment(s, l);
2294}
2295
2296static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
2297{
2298    return tcg_out_fail_alignment(s, l);
2299}
2300
2301#endif /* SOFTMMU */
2302
2303static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
2304{
2305    TCGReg datalo, datahi, addrlo, rbase;
2306    TCGReg addrhi __attribute__((unused));
2307    MemOpIdx oi;
2308    MemOp opc, s_bits;
2309#ifdef CONFIG_SOFTMMU
2310    int mem_index;
2311    tcg_insn_unit *label_ptr;
2312#else
2313    unsigned a_bits;
2314#endif
2315
2316    datalo = *args++;
2317    datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
2318    addrlo = *args++;
2319    addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
2320    oi = *args++;
2321    opc = get_memop(oi);
2322    s_bits = opc & MO_SIZE;
2323
2324#ifdef CONFIG_SOFTMMU
2325    mem_index = get_mmuidx(oi);
2326    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true);
2327
2328    /* Load a pointer into the current opcode w/conditional branch-link. */
2329    label_ptr = s->code_ptr;
2330    tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
2331
2332    rbase = TCG_REG_R3;
2333#else  /* !CONFIG_SOFTMMU */
2334    a_bits = get_alignment_bits(opc);
2335    if (a_bits) {
2336        tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
2337    }
2338    rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
2339    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
2340        tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
2341        addrlo = TCG_REG_TMP1;
2342    }
2343#endif
2344
2345    if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
2346        if (opc & MO_BSWAP) {
2347            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2348            tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
2349            tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0));
2350        } else if (rbase != 0) {
2351            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2352            tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo));
2353            tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0));
2354        } else if (addrlo == datahi) {
2355            tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
2356            tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
2357        } else {
2358            tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
2359            tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
2360        }
2361    } else {
2362        uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
2363        if (!have_isa_2_06 && insn == LDBRX) {
2364            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2365            tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
2366            tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0));
2367            tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
2368        } else if (insn) {
2369            tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
2370        } else {
2371            insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
2372            tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
2373            insn = qemu_exts_opc[s_bits];
2374            tcg_out32(s, insn | RA(datalo) | RS(datalo));
2375        }
2376    }
2377
2378#ifdef CONFIG_SOFTMMU
2379    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
2380                        s->code_ptr, label_ptr);
2381#endif
2382}
2383
2384static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
2385{
2386    TCGReg datalo, datahi, addrlo, rbase;
2387    TCGReg addrhi __attribute__((unused));
2388    MemOpIdx oi;
2389    MemOp opc, s_bits;
2390#ifdef CONFIG_SOFTMMU
2391    int mem_index;
2392    tcg_insn_unit *label_ptr;
2393#else
2394    unsigned a_bits;
2395#endif
2396
2397    datalo = *args++;
2398    datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
2399    addrlo = *args++;
2400    addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
2401    oi = *args++;
2402    opc = get_memop(oi);
2403    s_bits = opc & MO_SIZE;
2404
2405#ifdef CONFIG_SOFTMMU
2406    mem_index = get_mmuidx(oi);
2407    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false);
2408
2409    /* Load a pointer into the current opcode w/conditional branch-link. */
2410    label_ptr = s->code_ptr;
2411    tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
2412
2413    rbase = TCG_REG_R3;
2414#else  /* !CONFIG_SOFTMMU */
2415    a_bits = get_alignment_bits(opc);
2416    if (a_bits) {
2417        tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
2418    }
2419    rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
2420    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
2421        tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
2422        addrlo = TCG_REG_TMP1;
2423    }
2424#endif
2425
2426    if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
2427        if (opc & MO_BSWAP) {
2428            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2429            tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
2430            tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0));
2431        } else if (rbase != 0) {
2432            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2433            tcg_out32(s, STWX | SAB(datahi, rbase, addrlo));
2434            tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0));
2435        } else {
2436            tcg_out32(s, STW | TAI(datahi, addrlo, 0));
2437            tcg_out32(s, STW | TAI(datalo, addrlo, 4));
2438        }
2439    } else {
2440        uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
2441        if (!have_isa_2_06 && insn == STDBRX) {
2442            tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
2443            tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4));
2444            tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
2445            tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_TMP1));
2446        } else {
2447            tcg_out32(s, insn | SAB(datalo, rbase, addrlo));
2448        }
2449    }
2450
2451#ifdef CONFIG_SOFTMMU
2452    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
2453                        s->code_ptr, label_ptr);
2454#endif
2455}
2456
2457static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2458{
2459    int i;
2460    for (i = 0; i < count; ++i) {
2461        p[i] = NOP;
2462    }
2463}
2464
2465/* Parameters for function call generation, used in tcg.c.  */
2466#define TCG_TARGET_STACK_ALIGN       16
2467#define TCG_TARGET_EXTEND_ARGS       1
2468
2469#ifdef _CALL_AIX
2470# define LINK_AREA_SIZE                (6 * SZR)
2471# define LR_OFFSET                     (1 * SZR)
2472# define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
2473#elif defined(_CALL_DARWIN)
2474# define LINK_AREA_SIZE                (6 * SZR)
2475# define LR_OFFSET                     (2 * SZR)
2476#elif TCG_TARGET_REG_BITS == 64
2477# if defined(_CALL_ELF) && _CALL_ELF == 2
2478#  define LINK_AREA_SIZE               (4 * SZR)
2479#  define LR_OFFSET                    (1 * SZR)
2480# endif
2481#else /* TCG_TARGET_REG_BITS == 32 */
2482# if defined(_CALL_SYSV)
2483#  define LINK_AREA_SIZE               (2 * SZR)
2484#  define LR_OFFSET                    (1 * SZR)
2485# endif
2486#endif
2487#ifndef LR_OFFSET
2488# error "Unhandled abi"
2489#endif
2490#ifndef TCG_TARGET_CALL_STACK_OFFSET
2491# define TCG_TARGET_CALL_STACK_OFFSET  LINK_AREA_SIZE
2492#endif
2493
2494#define CPU_TEMP_BUF_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2495#define REG_SAVE_SIZE      ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
2496
2497#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET   \
2498                     + TCG_STATIC_CALL_ARGS_SIZE    \
2499                     + CPU_TEMP_BUF_SIZE            \
2500                     + REG_SAVE_SIZE                \
2501                     + TCG_TARGET_STACK_ALIGN - 1)  \
2502                    & -TCG_TARGET_STACK_ALIGN)
2503
2504#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
2505
2506static void tcg_target_qemu_prologue(TCGContext *s)
2507{
2508    int i;
2509
2510#ifdef _CALL_AIX
2511    const void **desc = (const void **)s->code_ptr;
2512    desc[0] = tcg_splitwx_to_rx(desc + 2);  /* entry point */
2513    desc[1] = 0;                            /* environment pointer */
2514    s->code_ptr = (void *)(desc + 2);       /* skip over descriptor */
2515#endif
2516
2517    tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
2518                  CPU_TEMP_BUF_SIZE);
2519
2520    /* Prologue */
2521    tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
2522    tcg_out32(s, (SZR == 8 ? STDU : STWU)
2523              | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
2524
2525    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2526        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2527                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2528    }
2529    tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2530
2531#ifndef CONFIG_SOFTMMU
2532    if (guest_base) {
2533        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
2534        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2535    }
2536#endif
2537
2538    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2539    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
2540    if (USE_REG_TB) {
2541        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]);
2542    }
2543    tcg_out32(s, BCCTR | BO_ALWAYS);
2544
2545    /* Epilogue */
2546    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2547
2548    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2549    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2550        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2551                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2552    }
2553    tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
2554    tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
2555    tcg_out32(s, BCLR | BO_ALWAYS);
2556}
2557
2558static void tcg_out_op(TCGContext *s, TCGOpcode opc,
2559                       const TCGArg args[TCG_MAX_OP_ARGS],
2560                       const int const_args[TCG_MAX_OP_ARGS])
2561{
2562    TCGArg a0, a1, a2;
2563
2564    switch (opc) {
2565    case INDEX_op_exit_tb:
2566        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
2567        tcg_out_b(s, 0, tcg_code_gen_epilogue);
2568        break;
2569    case INDEX_op_goto_tb:
2570        if (s->tb_jmp_insn_offset) {
2571            /* Direct jump. */
2572            if (TCG_TARGET_REG_BITS == 64) {
2573                /* Ensure the next insns are 8-byte aligned. */
2574                if ((uintptr_t)s->code_ptr & 7) {
2575                    tcg_out32(s, NOP);
2576                }
2577                s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
2578                tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0));
2579                tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0));
2580            } else {
2581                s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
2582                tcg_out32(s, B);
2583                s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
2584                break;
2585            }
2586        } else {
2587            /* Indirect jump. */
2588            tcg_debug_assert(s->tb_jmp_insn_offset == NULL);
2589            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TB, 0,
2590                       (intptr_t)(s->tb_jmp_insn_offset + args[0]));
2591        }
2592        tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
2593        tcg_out32(s, BCCTR | BO_ALWAYS);
2594        set_jmp_reset_offset(s, args[0]);
2595        if (USE_REG_TB) {
2596            /* For the unlinked case, need to reset TCG_REG_TB.  */
2597            tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB,
2598                             -tcg_current_code_size(s));
2599        }
2600        break;
2601    case INDEX_op_goto_ptr:
2602        tcg_out32(s, MTSPR | RS(args[0]) | CTR);
2603        if (USE_REG_TB) {
2604            tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]);
2605        }
2606        tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
2607        tcg_out32(s, BCCTR | BO_ALWAYS);
2608        break;
2609    case INDEX_op_br:
2610        {
2611            TCGLabel *l = arg_label(args[0]);
2612            uint32_t insn = B;
2613
2614            if (l->has_value) {
2615                insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr),
2616                                       l->u.value_ptr);
2617            } else {
2618                tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
2619            }
2620            tcg_out32(s, insn);
2621        }
2622        break;
2623    case INDEX_op_ld8u_i32:
2624    case INDEX_op_ld8u_i64:
2625        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2626        break;
2627    case INDEX_op_ld8s_i32:
2628    case INDEX_op_ld8s_i64:
2629        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2630        tcg_out_ext8s(s, args[0], args[0]);
2631        break;
2632    case INDEX_op_ld16u_i32:
2633    case INDEX_op_ld16u_i64:
2634        tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
2635        break;
2636    case INDEX_op_ld16s_i32:
2637    case INDEX_op_ld16s_i64:
2638        tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
2639        break;
2640    case INDEX_op_ld_i32:
2641    case INDEX_op_ld32u_i64:
2642        tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
2643        break;
2644    case INDEX_op_ld32s_i64:
2645        tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
2646        break;
2647    case INDEX_op_ld_i64:
2648        tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
2649        break;
2650    case INDEX_op_st8_i32:
2651    case INDEX_op_st8_i64:
2652        tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
2653        break;
2654    case INDEX_op_st16_i32:
2655    case INDEX_op_st16_i64:
2656        tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
2657        break;
2658    case INDEX_op_st_i32:
2659    case INDEX_op_st32_i64:
2660        tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
2661        break;
2662    case INDEX_op_st_i64:
2663        tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
2664        break;
2665
2666    case INDEX_op_add_i32:
2667        a0 = args[0], a1 = args[1], a2 = args[2];
2668        if (const_args[2]) {
2669        do_addi_32:
2670            tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
2671        } else {
2672            tcg_out32(s, ADD | TAB(a0, a1, a2));
2673        }
2674        break;
2675    case INDEX_op_sub_i32:
2676        a0 = args[0], a1 = args[1], a2 = args[2];
2677        if (const_args[1]) {
2678            if (const_args[2]) {
2679                tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2);
2680            } else {
2681                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2682            }
2683        } else if (const_args[2]) {
2684            a2 = -a2;
2685            goto do_addi_32;
2686        } else {
2687            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2688        }
2689        break;
2690
2691    case INDEX_op_and_i32:
2692        a0 = args[0], a1 = args[1], a2 = args[2];
2693        if (const_args[2]) {
2694            tcg_out_andi32(s, a0, a1, a2);
2695        } else {
2696            tcg_out32(s, AND | SAB(a1, a0, a2));
2697        }
2698        break;
2699    case INDEX_op_and_i64:
2700        a0 = args[0], a1 = args[1], a2 = args[2];
2701        if (const_args[2]) {
2702            tcg_out_andi64(s, a0, a1, a2);
2703        } else {
2704            tcg_out32(s, AND | SAB(a1, a0, a2));
2705        }
2706        break;
2707    case INDEX_op_or_i64:
2708    case INDEX_op_or_i32:
2709        a0 = args[0], a1 = args[1], a2 = args[2];
2710        if (const_args[2]) {
2711            tcg_out_ori32(s, a0, a1, a2);
2712        } else {
2713            tcg_out32(s, OR | SAB(a1, a0, a2));
2714        }
2715        break;
2716    case INDEX_op_xor_i64:
2717    case INDEX_op_xor_i32:
2718        a0 = args[0], a1 = args[1], a2 = args[2];
2719        if (const_args[2]) {
2720            tcg_out_xori32(s, a0, a1, a2);
2721        } else {
2722            tcg_out32(s, XOR | SAB(a1, a0, a2));
2723        }
2724        break;
2725    case INDEX_op_andc_i32:
2726        a0 = args[0], a1 = args[1], a2 = args[2];
2727        if (const_args[2]) {
2728            tcg_out_andi32(s, a0, a1, ~a2);
2729        } else {
2730            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2731        }
2732        break;
2733    case INDEX_op_andc_i64:
2734        a0 = args[0], a1 = args[1], a2 = args[2];
2735        if (const_args[2]) {
2736            tcg_out_andi64(s, a0, a1, ~a2);
2737        } else {
2738            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2739        }
2740        break;
2741    case INDEX_op_orc_i32:
2742        if (const_args[2]) {
2743            tcg_out_ori32(s, args[0], args[1], ~args[2]);
2744            break;
2745        }
2746        /* FALLTHRU */
2747    case INDEX_op_orc_i64:
2748        tcg_out32(s, ORC | SAB(args[1], args[0], args[2]));
2749        break;
2750    case INDEX_op_eqv_i32:
2751        if (const_args[2]) {
2752            tcg_out_xori32(s, args[0], args[1], ~args[2]);
2753            break;
2754        }
2755        /* FALLTHRU */
2756    case INDEX_op_eqv_i64:
2757        tcg_out32(s, EQV | SAB(args[1], args[0], args[2]));
2758        break;
2759    case INDEX_op_nand_i32:
2760    case INDEX_op_nand_i64:
2761        tcg_out32(s, NAND | SAB(args[1], args[0], args[2]));
2762        break;
2763    case INDEX_op_nor_i32:
2764    case INDEX_op_nor_i64:
2765        tcg_out32(s, NOR | SAB(args[1], args[0], args[2]));
2766        break;
2767
2768    case INDEX_op_clz_i32:
2769        tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1],
2770                      args[2], const_args[2]);
2771        break;
2772    case INDEX_op_ctz_i32:
2773        tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1],
2774                      args[2], const_args[2]);
2775        break;
2776    case INDEX_op_ctpop_i32:
2777        tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0));
2778        break;
2779
2780    case INDEX_op_clz_i64:
2781        tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1],
2782                      args[2], const_args[2]);
2783        break;
2784    case INDEX_op_ctz_i64:
2785        tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1],
2786                      args[2], const_args[2]);
2787        break;
2788    case INDEX_op_ctpop_i64:
2789        tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0));
2790        break;
2791
2792    case INDEX_op_mul_i32:
2793        a0 = args[0], a1 = args[1], a2 = args[2];
2794        if (const_args[2]) {
2795            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2796        } else {
2797            tcg_out32(s, MULLW | TAB(a0, a1, a2));
2798        }
2799        break;
2800
2801    case INDEX_op_div_i32:
2802        tcg_out32(s, DIVW | TAB(args[0], args[1], args[2]));
2803        break;
2804
2805    case INDEX_op_divu_i32:
2806        tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2]));
2807        break;
2808
2809    case INDEX_op_shl_i32:
2810        if (const_args[2]) {
2811            /* Limit immediate shift count lest we create an illegal insn.  */
2812            tcg_out_shli32(s, args[0], args[1], args[2] & 31);
2813        } else {
2814            tcg_out32(s, SLW | SAB(args[1], args[0], args[2]));
2815        }
2816        break;
2817    case INDEX_op_shr_i32:
2818        if (const_args[2]) {
2819            /* Limit immediate shift count lest we create an illegal insn.  */
2820            tcg_out_shri32(s, args[0], args[1], args[2] & 31);
2821        } else {
2822            tcg_out32(s, SRW | SAB(args[1], args[0], args[2]));
2823        }
2824        break;
2825    case INDEX_op_sar_i32:
2826        if (const_args[2]) {
2827            tcg_out_sari32(s, args[0], args[1], args[2]);
2828        } else {
2829            tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
2830        }
2831        break;
2832    case INDEX_op_rotl_i32:
2833        if (const_args[2]) {
2834            tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31);
2835        } else {
2836            tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
2837                         | MB(0) | ME(31));
2838        }
2839        break;
2840    case INDEX_op_rotr_i32:
2841        if (const_args[2]) {
2842            tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
2843        } else {
2844            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32));
2845            tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0)
2846                         | MB(0) | ME(31));
2847        }
2848        break;
2849
2850    case INDEX_op_brcond_i32:
2851        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2852                       arg_label(args[3]), TCG_TYPE_I32);
2853        break;
2854    case INDEX_op_brcond_i64:
2855        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2856                       arg_label(args[3]), TCG_TYPE_I64);
2857        break;
2858    case INDEX_op_brcond2_i32:
2859        tcg_out_brcond2(s, args, const_args);
2860        break;
2861
2862    case INDEX_op_neg_i32:
2863    case INDEX_op_neg_i64:
2864        tcg_out32(s, NEG | RT(args[0]) | RA(args[1]));
2865        break;
2866
2867    case INDEX_op_not_i32:
2868    case INDEX_op_not_i64:
2869        tcg_out32(s, NOR | SAB(args[1], args[0], args[1]));
2870        break;
2871
2872    case INDEX_op_add_i64:
2873        a0 = args[0], a1 = args[1], a2 = args[2];
2874        if (const_args[2]) {
2875        do_addi_64:
2876            tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
2877        } else {
2878            tcg_out32(s, ADD | TAB(a0, a1, a2));
2879        }
2880        break;
2881    case INDEX_op_sub_i64:
2882        a0 = args[0], a1 = args[1], a2 = args[2];
2883        if (const_args[1]) {
2884            if (const_args[2]) {
2885                tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2);
2886            } else {
2887                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2888            }
2889        } else if (const_args[2]) {
2890            a2 = -a2;
2891            goto do_addi_64;
2892        } else {
2893            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2894        }
2895        break;
2896
2897    case INDEX_op_shl_i64:
2898        if (const_args[2]) {
2899            /* Limit immediate shift count lest we create an illegal insn.  */
2900            tcg_out_shli64(s, args[0], args[1], args[2] & 63);
2901        } else {
2902            tcg_out32(s, SLD | SAB(args[1], args[0], args[2]));
2903        }
2904        break;
2905    case INDEX_op_shr_i64:
2906        if (const_args[2]) {
2907            /* Limit immediate shift count lest we create an illegal insn.  */
2908            tcg_out_shri64(s, args[0], args[1], args[2] & 63);
2909        } else {
2910            tcg_out32(s, SRD | SAB(args[1], args[0], args[2]));
2911        }
2912        break;
2913    case INDEX_op_sar_i64:
2914        if (const_args[2]) {
2915            tcg_out_sari64(s, args[0], args[1], args[2]);
2916        } else {
2917            tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
2918        }
2919        break;
2920    case INDEX_op_rotl_i64:
2921        if (const_args[2]) {
2922            tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0);
2923        } else {
2924            tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0));
2925        }
2926        break;
2927    case INDEX_op_rotr_i64:
2928        if (const_args[2]) {
2929            tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0);
2930        } else {
2931            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64));
2932            tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0));
2933        }
2934        break;
2935
2936    case INDEX_op_mul_i64:
2937        a0 = args[0], a1 = args[1], a2 = args[2];
2938        if (const_args[2]) {
2939            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2940        } else {
2941            tcg_out32(s, MULLD | TAB(a0, a1, a2));
2942        }
2943        break;
2944    case INDEX_op_div_i64:
2945        tcg_out32(s, DIVD | TAB(args[0], args[1], args[2]));
2946        break;
2947    case INDEX_op_divu_i64:
2948        tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
2949        break;
2950
2951    case INDEX_op_qemu_ld_i32:
2952        tcg_out_qemu_ld(s, args, false);
2953        break;
2954    case INDEX_op_qemu_ld_i64:
2955        tcg_out_qemu_ld(s, args, true);
2956        break;
2957    case INDEX_op_qemu_st_i32:
2958        tcg_out_qemu_st(s, args, false);
2959        break;
2960    case INDEX_op_qemu_st_i64:
2961        tcg_out_qemu_st(s, args, true);
2962        break;
2963
2964    case INDEX_op_ext8s_i32:
2965    case INDEX_op_ext8s_i64:
2966        tcg_out_ext8s(s, args[0], args[1]);
2967        break;
2968    case INDEX_op_ext16s_i32:
2969    case INDEX_op_ext16s_i64:
2970        tcg_out_ext16s(s, args[0], args[1]);
2971        break;
2972    case INDEX_op_ext_i32_i64:
2973    case INDEX_op_ext32s_i64:
2974        tcg_out_ext32s(s, args[0], args[1]);
2975        break;
2976    case INDEX_op_extu_i32_i64:
2977        tcg_out_ext32u(s, args[0], args[1]);
2978        break;
2979
2980    case INDEX_op_setcond_i32:
2981        tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
2982                        const_args[2]);
2983        break;
2984    case INDEX_op_setcond_i64:
2985        tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
2986                        const_args[2]);
2987        break;
2988    case INDEX_op_setcond2_i32:
2989        tcg_out_setcond2(s, args, const_args);
2990        break;
2991
2992    case INDEX_op_bswap16_i32:
2993    case INDEX_op_bswap16_i64:
2994        tcg_out_bswap16(s, args[0], args[1], args[2]);
2995        break;
2996    case INDEX_op_bswap32_i32:
2997        tcg_out_bswap32(s, args[0], args[1], 0);
2998        break;
2999    case INDEX_op_bswap32_i64:
3000        tcg_out_bswap32(s, args[0], args[1], args[2]);
3001        break;
3002    case INDEX_op_bswap64_i64:
3003        tcg_out_bswap64(s, args[0], args[1]);
3004        break;
3005
3006    case INDEX_op_deposit_i32:
3007        if (const_args[2]) {
3008            uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
3009            tcg_out_andi32(s, args[0], args[0], ~mask);
3010        } else {
3011            tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
3012                        32 - args[3] - args[4], 31 - args[3]);
3013        }
3014        break;
3015    case INDEX_op_deposit_i64:
3016        if (const_args[2]) {
3017            uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
3018            tcg_out_andi64(s, args[0], args[0], ~mask);
3019        } else {
3020            tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
3021                        64 - args[3] - args[4]);
3022        }
3023        break;
3024
3025    case INDEX_op_extract_i32:
3026        tcg_out_rlw(s, RLWINM, args[0], args[1],
3027                    32 - args[2], 32 - args[3], 31);
3028        break;
3029    case INDEX_op_extract_i64:
3030        tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
3031        break;
3032
3033    case INDEX_op_movcond_i32:
3034        tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
3035                        args[3], args[4], const_args[2]);
3036        break;
3037    case INDEX_op_movcond_i64:
3038        tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2],
3039                        args[3], args[4], const_args[2]);
3040        break;
3041
3042#if TCG_TARGET_REG_BITS == 64
3043    case INDEX_op_add2_i64:
3044#else
3045    case INDEX_op_add2_i32:
3046#endif
3047        /* Note that the CA bit is defined based on the word size of the
3048           environment.  So in 64-bit mode it's always carry-out of bit 63.
3049           The fallback code using deposit works just as well for 32-bit.  */
3050        a0 = args[0], a1 = args[1];
3051        if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
3052            a0 = TCG_REG_R0;
3053        }
3054        if (const_args[4]) {
3055            tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
3056        } else {
3057            tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
3058        }
3059        if (const_args[5]) {
3060            tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
3061        } else {
3062            tcg_out32(s, ADDE | TAB(a1, args[3], args[5]));
3063        }
3064        if (a0 != args[0]) {
3065            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3066        }
3067        break;
3068
3069#if TCG_TARGET_REG_BITS == 64
3070    case INDEX_op_sub2_i64:
3071#else
3072    case INDEX_op_sub2_i32:
3073#endif
3074        a0 = args[0], a1 = args[1];
3075        if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
3076            a0 = TCG_REG_R0;
3077        }
3078        if (const_args[2]) {
3079            tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
3080        } else {
3081            tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
3082        }
3083        if (const_args[3]) {
3084            tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
3085        } else {
3086            tcg_out32(s, SUBFE | TAB(a1, args[5], args[3]));
3087        }
3088        if (a0 != args[0]) {
3089            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3090        }
3091        break;
3092
3093    case INDEX_op_muluh_i32:
3094        tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2]));
3095        break;
3096    case INDEX_op_mulsh_i32:
3097        tcg_out32(s, MULHW | TAB(args[0], args[1], args[2]));
3098        break;
3099    case INDEX_op_muluh_i64:
3100        tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2]));
3101        break;
3102    case INDEX_op_mulsh_i64:
3103        tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
3104        break;
3105
3106    case INDEX_op_mb:
3107        tcg_out_mb(s, args[0]);
3108        break;
3109
3110    case INDEX_op_mov_i32:   /* Always emitted via tcg_out_mov.  */
3111    case INDEX_op_mov_i64:
3112    case INDEX_op_call:      /* Always emitted via tcg_out_call.  */
3113    default:
3114        tcg_abort();
3115    }
3116}
3117
3118int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3119{
3120    switch (opc) {
3121    case INDEX_op_and_vec:
3122    case INDEX_op_or_vec:
3123    case INDEX_op_xor_vec:
3124    case INDEX_op_andc_vec:
3125    case INDEX_op_not_vec:
3126    case INDEX_op_nor_vec:
3127    case INDEX_op_eqv_vec:
3128    case INDEX_op_nand_vec:
3129        return 1;
3130    case INDEX_op_orc_vec:
3131        return have_isa_2_07;
3132    case INDEX_op_add_vec:
3133    case INDEX_op_sub_vec:
3134    case INDEX_op_smax_vec:
3135    case INDEX_op_smin_vec:
3136    case INDEX_op_umax_vec:
3137    case INDEX_op_umin_vec:
3138    case INDEX_op_shlv_vec:
3139    case INDEX_op_shrv_vec:
3140    case INDEX_op_sarv_vec:
3141    case INDEX_op_rotlv_vec:
3142        return vece <= MO_32 || have_isa_2_07;
3143    case INDEX_op_ssadd_vec:
3144    case INDEX_op_sssub_vec:
3145    case INDEX_op_usadd_vec:
3146    case INDEX_op_ussub_vec:
3147        return vece <= MO_32;
3148    case INDEX_op_cmp_vec:
3149    case INDEX_op_shli_vec:
3150    case INDEX_op_shri_vec:
3151    case INDEX_op_sari_vec:
3152    case INDEX_op_rotli_vec:
3153        return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
3154    case INDEX_op_neg_vec:
3155        return vece >= MO_32 && have_isa_3_00;
3156    case INDEX_op_mul_vec:
3157        switch (vece) {
3158        case MO_8:
3159        case MO_16:
3160            return -1;
3161        case MO_32:
3162            return have_isa_2_07 ? 1 : -1;
3163        case MO_64:
3164            return have_isa_3_10;
3165        }
3166        return 0;
3167    case INDEX_op_bitsel_vec:
3168        return have_vsx;
3169    case INDEX_op_rotrv_vec:
3170        return -1;
3171    default:
3172        return 0;
3173    }
3174}
3175
3176static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
3177                            TCGReg dst, TCGReg src)
3178{
3179    tcg_debug_assert(dst >= TCG_REG_V0);
3180
3181    /* Splat from integer reg allowed via constraints for v3.00.  */
3182    if (src < TCG_REG_V0) {
3183        tcg_debug_assert(have_isa_3_00);
3184        switch (vece) {
3185        case MO_64:
3186            tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
3187            return true;
3188        case MO_32:
3189            tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
3190            return true;
3191        default:
3192            /* Fail, so that we fall back on either dupm or mov+dup.  */
3193            return false;
3194        }
3195    }
3196
3197    /*
3198     * Recall we use (or emulate) VSX integer loads, so the integer is
3199     * right justified within the left (zero-index) double-word.
3200     */
3201    switch (vece) {
3202    case MO_8:
3203        tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
3204        break;
3205    case MO_16:
3206        tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
3207        break;
3208    case MO_32:
3209        tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
3210        break;
3211    case MO_64:
3212        if (have_vsx) {
3213            tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
3214            break;
3215        }
3216        tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
3217        tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
3218        break;
3219    default:
3220        g_assert_not_reached();
3221    }
3222    return true;
3223}
3224
3225static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
3226                             TCGReg out, TCGReg base, intptr_t offset)
3227{
3228    int elt;
3229
3230    tcg_debug_assert(out >= TCG_REG_V0);
3231    switch (vece) {
3232    case MO_8:
3233        if (have_isa_3_00) {
3234            tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
3235        } else {
3236            tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
3237        }
3238        elt = extract32(offset, 0, 4);
3239#if !HOST_BIG_ENDIAN
3240        elt ^= 15;
3241#endif
3242        tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
3243        break;
3244    case MO_16:
3245        tcg_debug_assert((offset & 1) == 0);
3246        if (have_isa_3_00) {
3247            tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
3248        } else {
3249            tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
3250        }
3251        elt = extract32(offset, 1, 3);
3252#if !HOST_BIG_ENDIAN
3253        elt ^= 7;
3254#endif
3255        tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
3256        break;
3257    case MO_32:
3258        if (have_isa_3_00) {
3259            tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
3260            break;
3261        }
3262        tcg_debug_assert((offset & 3) == 0);
3263        tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
3264        elt = extract32(offset, 2, 2);
3265#if !HOST_BIG_ENDIAN
3266        elt ^= 3;
3267#endif
3268        tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
3269        break;
3270    case MO_64:
3271        if (have_vsx) {
3272            tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
3273            break;
3274        }
3275        tcg_debug_assert((offset & 7) == 0);
3276        tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
3277        tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
3278        elt = extract32(offset, 3, 1);
3279#if !HOST_BIG_ENDIAN
3280        elt = !elt;
3281#endif
3282        if (elt) {
3283            tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
3284        } else {
3285            tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
3286        }
3287        break;
3288    default:
3289        g_assert_not_reached();
3290    }
3291    return true;
3292}
3293
3294static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
3295                           unsigned vecl, unsigned vece,
3296                           const TCGArg args[TCG_MAX_OP_ARGS],
3297                           const int const_args[TCG_MAX_OP_ARGS])
3298{
3299    static const uint32_t
3300        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
3301        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
3302        mul_op[4] = { 0, 0, VMULUWM, VMULLD },
3303        neg_op[4] = { 0, 0, VNEGW, VNEGD },
3304        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
3305        ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
3306        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
3307        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
3308        ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
3309        usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
3310        sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
3311        ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
3312        umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
3313        smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
3314        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
3315        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
3316        shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
3317        shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
3318        sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
3319        mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
3320        mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
3321        muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
3322        mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
3323        pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
3324        rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
3325
3326    TCGType type = vecl + TCG_TYPE_V64;
3327    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
3328    uint32_t insn;
3329
3330    switch (opc) {
3331    case INDEX_op_ld_vec:
3332        tcg_out_ld(s, type, a0, a1, a2);
3333        return;
3334    case INDEX_op_st_vec:
3335        tcg_out_st(s, type, a0, a1, a2);
3336        return;
3337    case INDEX_op_dupm_vec:
3338        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
3339        return;
3340
3341    case INDEX_op_add_vec:
3342        insn = add_op[vece];
3343        break;
3344    case INDEX_op_sub_vec:
3345        insn = sub_op[vece];
3346        break;
3347    case INDEX_op_neg_vec:
3348        insn = neg_op[vece];
3349        a2 = a1;
3350        a1 = 0;
3351        break;
3352    case INDEX_op_mul_vec:
3353        insn = mul_op[vece];
3354        break;
3355    case INDEX_op_ssadd_vec:
3356        insn = ssadd_op[vece];
3357        break;
3358    case INDEX_op_sssub_vec:
3359        insn = sssub_op[vece];
3360        break;
3361    case INDEX_op_usadd_vec:
3362        insn = usadd_op[vece];
3363        break;
3364    case INDEX_op_ussub_vec:
3365        insn = ussub_op[vece];
3366        break;
3367    case INDEX_op_smin_vec:
3368        insn = smin_op[vece];
3369        break;
3370    case INDEX_op_umin_vec:
3371        insn = umin_op[vece];
3372        break;
3373    case INDEX_op_smax_vec:
3374        insn = smax_op[vece];
3375        break;
3376    case INDEX_op_umax_vec:
3377        insn = umax_op[vece];
3378        break;
3379    case INDEX_op_shlv_vec:
3380        insn = shlv_op[vece];
3381        break;
3382    case INDEX_op_shrv_vec:
3383        insn = shrv_op[vece];
3384        break;
3385    case INDEX_op_sarv_vec:
3386        insn = sarv_op[vece];
3387        break;
3388    case INDEX_op_and_vec:
3389        insn = VAND;
3390        break;
3391    case INDEX_op_or_vec:
3392        insn = VOR;
3393        break;
3394    case INDEX_op_xor_vec:
3395        insn = VXOR;
3396        break;
3397    case INDEX_op_andc_vec:
3398        insn = VANDC;
3399        break;
3400    case INDEX_op_not_vec:
3401        insn = VNOR;
3402        a2 = a1;
3403        break;
3404    case INDEX_op_orc_vec:
3405        insn = VORC;
3406        break;
3407    case INDEX_op_nand_vec:
3408        insn = VNAND;
3409        break;
3410    case INDEX_op_nor_vec:
3411        insn = VNOR;
3412        break;
3413    case INDEX_op_eqv_vec:
3414        insn = VEQV;
3415        break;
3416
3417    case INDEX_op_cmp_vec:
3418        switch (args[3]) {
3419        case TCG_COND_EQ:
3420            insn = eq_op[vece];
3421            break;
3422        case TCG_COND_NE:
3423            insn = ne_op[vece];
3424            break;
3425        case TCG_COND_GT:
3426            insn = gts_op[vece];
3427            break;
3428        case TCG_COND_GTU:
3429            insn = gtu_op[vece];
3430            break;
3431        default:
3432            g_assert_not_reached();
3433        }
3434        break;
3435
3436    case INDEX_op_bitsel_vec:
3437        tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
3438        return;
3439
3440    case INDEX_op_dup2_vec:
3441        assert(TCG_TARGET_REG_BITS == 32);
3442        /* With inputs a1 = xLxx, a2 = xHxx  */
3443        tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1));  /* a0  = xxHL */
3444        tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8);          /* tmp = HLxx */
3445        tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8);          /* a0  = HLHL */
3446        return;
3447
3448    case INDEX_op_ppc_mrgh_vec:
3449        insn = mrgh_op[vece];
3450        break;
3451    case INDEX_op_ppc_mrgl_vec:
3452        insn = mrgl_op[vece];
3453        break;
3454    case INDEX_op_ppc_muleu_vec:
3455        insn = muleu_op[vece];
3456        break;
3457    case INDEX_op_ppc_mulou_vec:
3458        insn = mulou_op[vece];
3459        break;
3460    case INDEX_op_ppc_pkum_vec:
3461        insn = pkum_op[vece];
3462        break;
3463    case INDEX_op_rotlv_vec:
3464        insn = rotl_op[vece];
3465        break;
3466    case INDEX_op_ppc_msum_vec:
3467        tcg_debug_assert(vece == MO_16);
3468        tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
3469        return;
3470
3471    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
3472    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
3473    default:
3474        g_assert_not_reached();
3475    }
3476
3477    tcg_debug_assert(insn != 0);
3478    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
3479}
3480
3481static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
3482                           TCGv_vec v1, TCGArg imm, TCGOpcode opci)
3483{
3484    TCGv_vec t1;
3485
3486    if (vece == MO_32) {
3487        /*
3488         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
3489         * So using negative numbers gets us the 4th bit easily.
3490         */
3491        imm = sextract32(imm, 0, 5);
3492    } else {
3493        imm &= (8 << vece) - 1;
3494    }
3495
3496    /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */
3497    t1 = tcg_constant_vec(type, MO_8, imm);
3498    vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
3499              tcgv_vec_arg(v1), tcgv_vec_arg(t1));
3500}
3501
3502static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
3503                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
3504{
3505    bool need_swap = false, need_inv = false;
3506
3507    tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
3508
3509    switch (cond) {
3510    case TCG_COND_EQ:
3511    case TCG_COND_GT:
3512    case TCG_COND_GTU:
3513        break;
3514    case TCG_COND_NE:
3515        if (have_isa_3_00 && vece <= MO_32) {
3516            break;
3517        }
3518        /* fall through */
3519    case TCG_COND_LE:
3520    case TCG_COND_LEU:
3521        need_inv = true;
3522        break;
3523    case TCG_COND_LT:
3524    case TCG_COND_LTU:
3525        need_swap = true;
3526        break;
3527    case TCG_COND_GE:
3528    case TCG_COND_GEU:
3529        need_swap = need_inv = true;
3530        break;
3531    default:
3532        g_assert_not_reached();
3533    }
3534
3535    if (need_inv) {
3536        cond = tcg_invert_cond(cond);
3537    }
3538    if (need_swap) {
3539        TCGv_vec t1;
3540        t1 = v1, v1 = v2, v2 = t1;
3541        cond = tcg_swap_cond(cond);
3542    }
3543
3544    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
3545              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
3546
3547    if (need_inv) {
3548        tcg_gen_not_vec(vece, v0, v0);
3549    }
3550}
3551
3552static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
3553                           TCGv_vec v1, TCGv_vec v2)
3554{
3555    TCGv_vec t1 = tcg_temp_new_vec(type);
3556    TCGv_vec t2 = tcg_temp_new_vec(type);
3557    TCGv_vec c0, c16;
3558
3559    switch (vece) {
3560    case MO_8:
3561    case MO_16:
3562        vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
3563                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3564        vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
3565                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3566        vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
3567                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3568        vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
3569                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3570        vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
3571                  tcgv_vec_arg(v0), tcgv_vec_arg(t1));
3572	break;
3573
3574    case MO_32:
3575        tcg_debug_assert(!have_isa_2_07);
3576        /*
3577         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
3578         * So using -16 is a quick way to represent 16.
3579         */
3580        c16 = tcg_constant_vec(type, MO_8, -16);
3581        c0 = tcg_constant_vec(type, MO_8, 0);
3582
3583        vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
3584                  tcgv_vec_arg(v2), tcgv_vec_arg(c16));
3585        vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
3586                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3587        vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1),
3588                  tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0));
3589        vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1),
3590                  tcgv_vec_arg(t1), tcgv_vec_arg(c16));
3591        tcg_gen_add_vec(MO_32, v0, t1, t2);
3592        break;
3593
3594    default:
3595        g_assert_not_reached();
3596    }
3597    tcg_temp_free_vec(t1);
3598    tcg_temp_free_vec(t2);
3599}
3600
3601void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3602                       TCGArg a0, ...)
3603{
3604    va_list va;
3605    TCGv_vec v0, v1, v2, t0;
3606    TCGArg a2;
3607
3608    va_start(va, a0);
3609    v0 = temp_tcgv_vec(arg_temp(a0));
3610    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3611    a2 = va_arg(va, TCGArg);
3612
3613    switch (opc) {
3614    case INDEX_op_shli_vec:
3615        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
3616        break;
3617    case INDEX_op_shri_vec:
3618        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
3619        break;
3620    case INDEX_op_sari_vec:
3621        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
3622        break;
3623    case INDEX_op_rotli_vec:
3624        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
3625        break;
3626    case INDEX_op_cmp_vec:
3627        v2 = temp_tcgv_vec(arg_temp(a2));
3628        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3629        break;
3630    case INDEX_op_mul_vec:
3631        v2 = temp_tcgv_vec(arg_temp(a2));
3632        expand_vec_mul(type, vece, v0, v1, v2);
3633        break;
3634    case INDEX_op_rotlv_vec:
3635        v2 = temp_tcgv_vec(arg_temp(a2));
3636        t0 = tcg_temp_new_vec(type);
3637        tcg_gen_neg_vec(vece, t0, v2);
3638        tcg_gen_rotlv_vec(vece, v0, v1, t0);
3639        tcg_temp_free_vec(t0);
3640        break;
3641    default:
3642        g_assert_not_reached();
3643    }
3644    va_end(va);
3645}
3646
3647static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
3648{
3649    switch (op) {
3650    case INDEX_op_goto_ptr:
3651        return C_O0_I1(r);
3652
3653    case INDEX_op_ld8u_i32:
3654    case INDEX_op_ld8s_i32:
3655    case INDEX_op_ld16u_i32:
3656    case INDEX_op_ld16s_i32:
3657    case INDEX_op_ld_i32:
3658    case INDEX_op_ctpop_i32:
3659    case INDEX_op_neg_i32:
3660    case INDEX_op_not_i32:
3661    case INDEX_op_ext8s_i32:
3662    case INDEX_op_ext16s_i32:
3663    case INDEX_op_bswap16_i32:
3664    case INDEX_op_bswap32_i32:
3665    case INDEX_op_extract_i32:
3666    case INDEX_op_ld8u_i64:
3667    case INDEX_op_ld8s_i64:
3668    case INDEX_op_ld16u_i64:
3669    case INDEX_op_ld16s_i64:
3670    case INDEX_op_ld32u_i64:
3671    case INDEX_op_ld32s_i64:
3672    case INDEX_op_ld_i64:
3673    case INDEX_op_ctpop_i64:
3674    case INDEX_op_neg_i64:
3675    case INDEX_op_not_i64:
3676    case INDEX_op_ext8s_i64:
3677    case INDEX_op_ext16s_i64:
3678    case INDEX_op_ext32s_i64:
3679    case INDEX_op_ext_i32_i64:
3680    case INDEX_op_extu_i32_i64:
3681    case INDEX_op_bswap16_i64:
3682    case INDEX_op_bswap32_i64:
3683    case INDEX_op_bswap64_i64:
3684    case INDEX_op_extract_i64:
3685        return C_O1_I1(r, r);
3686
3687    case INDEX_op_st8_i32:
3688    case INDEX_op_st16_i32:
3689    case INDEX_op_st_i32:
3690    case INDEX_op_st8_i64:
3691    case INDEX_op_st16_i64:
3692    case INDEX_op_st32_i64:
3693    case INDEX_op_st_i64:
3694        return C_O0_I2(r, r);
3695
3696    case INDEX_op_add_i32:
3697    case INDEX_op_and_i32:
3698    case INDEX_op_or_i32:
3699    case INDEX_op_xor_i32:
3700    case INDEX_op_andc_i32:
3701    case INDEX_op_orc_i32:
3702    case INDEX_op_eqv_i32:
3703    case INDEX_op_shl_i32:
3704    case INDEX_op_shr_i32:
3705    case INDEX_op_sar_i32:
3706    case INDEX_op_rotl_i32:
3707    case INDEX_op_rotr_i32:
3708    case INDEX_op_setcond_i32:
3709    case INDEX_op_and_i64:
3710    case INDEX_op_andc_i64:
3711    case INDEX_op_shl_i64:
3712    case INDEX_op_shr_i64:
3713    case INDEX_op_sar_i64:
3714    case INDEX_op_rotl_i64:
3715    case INDEX_op_rotr_i64:
3716    case INDEX_op_setcond_i64:
3717        return C_O1_I2(r, r, ri);
3718
3719    case INDEX_op_mul_i32:
3720    case INDEX_op_mul_i64:
3721        return C_O1_I2(r, r, rI);
3722
3723    case INDEX_op_div_i32:
3724    case INDEX_op_divu_i32:
3725    case INDEX_op_nand_i32:
3726    case INDEX_op_nor_i32:
3727    case INDEX_op_muluh_i32:
3728    case INDEX_op_mulsh_i32:
3729    case INDEX_op_orc_i64:
3730    case INDEX_op_eqv_i64:
3731    case INDEX_op_nand_i64:
3732    case INDEX_op_nor_i64:
3733    case INDEX_op_div_i64:
3734    case INDEX_op_divu_i64:
3735    case INDEX_op_mulsh_i64:
3736    case INDEX_op_muluh_i64:
3737        return C_O1_I2(r, r, r);
3738
3739    case INDEX_op_sub_i32:
3740        return C_O1_I2(r, rI, ri);
3741    case INDEX_op_add_i64:
3742        return C_O1_I2(r, r, rT);
3743    case INDEX_op_or_i64:
3744    case INDEX_op_xor_i64:
3745        return C_O1_I2(r, r, rU);
3746    case INDEX_op_sub_i64:
3747        return C_O1_I2(r, rI, rT);
3748    case INDEX_op_clz_i32:
3749    case INDEX_op_ctz_i32:
3750    case INDEX_op_clz_i64:
3751    case INDEX_op_ctz_i64:
3752        return C_O1_I2(r, r, rZW);
3753
3754    case INDEX_op_brcond_i32:
3755    case INDEX_op_brcond_i64:
3756        return C_O0_I2(r, ri);
3757
3758    case INDEX_op_movcond_i32:
3759    case INDEX_op_movcond_i64:
3760        return C_O1_I4(r, r, ri, rZ, rZ);
3761    case INDEX_op_deposit_i32:
3762    case INDEX_op_deposit_i64:
3763        return C_O1_I2(r, 0, rZ);
3764    case INDEX_op_brcond2_i32:
3765        return C_O0_I4(r, r, ri, ri);
3766    case INDEX_op_setcond2_i32:
3767        return C_O1_I4(r, r, r, ri, ri);
3768    case INDEX_op_add2_i64:
3769    case INDEX_op_add2_i32:
3770        return C_O2_I4(r, r, r, r, rI, rZM);
3771    case INDEX_op_sub2_i64:
3772    case INDEX_op_sub2_i32:
3773        return C_O2_I4(r, r, rI, rZM, r, r);
3774
3775    case INDEX_op_qemu_ld_i32:
3776        return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
3777                ? C_O1_I1(r, L)
3778                : C_O1_I2(r, L, L));
3779
3780    case INDEX_op_qemu_st_i32:
3781        return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
3782                ? C_O0_I2(S, S)
3783                : C_O0_I3(S, S, S));
3784
3785    case INDEX_op_qemu_ld_i64:
3786        return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
3787                : TARGET_LONG_BITS == 32 ? C_O2_I1(L, L, L)
3788                : C_O2_I2(L, L, L, L));
3789
3790    case INDEX_op_qemu_st_i64:
3791        return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(S, S)
3792                : TARGET_LONG_BITS == 32 ? C_O0_I3(S, S, S)
3793                : C_O0_I4(S, S, S, S));
3794
3795    case INDEX_op_add_vec:
3796    case INDEX_op_sub_vec:
3797    case INDEX_op_mul_vec:
3798    case INDEX_op_and_vec:
3799    case INDEX_op_or_vec:
3800    case INDEX_op_xor_vec:
3801    case INDEX_op_andc_vec:
3802    case INDEX_op_orc_vec:
3803    case INDEX_op_nor_vec:
3804    case INDEX_op_eqv_vec:
3805    case INDEX_op_nand_vec:
3806    case INDEX_op_cmp_vec:
3807    case INDEX_op_ssadd_vec:
3808    case INDEX_op_sssub_vec:
3809    case INDEX_op_usadd_vec:
3810    case INDEX_op_ussub_vec:
3811    case INDEX_op_smax_vec:
3812    case INDEX_op_smin_vec:
3813    case INDEX_op_umax_vec:
3814    case INDEX_op_umin_vec:
3815    case INDEX_op_shlv_vec:
3816    case INDEX_op_shrv_vec:
3817    case INDEX_op_sarv_vec:
3818    case INDEX_op_rotlv_vec:
3819    case INDEX_op_rotrv_vec:
3820    case INDEX_op_ppc_mrgh_vec:
3821    case INDEX_op_ppc_mrgl_vec:
3822    case INDEX_op_ppc_muleu_vec:
3823    case INDEX_op_ppc_mulou_vec:
3824    case INDEX_op_ppc_pkum_vec:
3825    case INDEX_op_dup2_vec:
3826        return C_O1_I2(v, v, v);
3827
3828    case INDEX_op_not_vec:
3829    case INDEX_op_neg_vec:
3830        return C_O1_I1(v, v);
3831
3832    case INDEX_op_dup_vec:
3833        return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v);
3834
3835    case INDEX_op_ld_vec:
3836    case INDEX_op_dupm_vec:
3837        return C_O1_I1(v, r);
3838
3839    case INDEX_op_st_vec:
3840        return C_O0_I2(v, r);
3841
3842    case INDEX_op_bitsel_vec:
3843    case INDEX_op_ppc_msum_vec:
3844        return C_O1_I3(v, v, v, v);
3845
3846    default:
3847        g_assert_not_reached();
3848    }
3849}
3850
3851static void tcg_target_init(TCGContext *s)
3852{
3853    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
3854    unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2);
3855
3856    have_isa = tcg_isa_base;
3857    if (hwcap & PPC_FEATURE_ARCH_2_06) {
3858        have_isa = tcg_isa_2_06;
3859    }
3860#ifdef PPC_FEATURE2_ARCH_2_07
3861    if (hwcap2 & PPC_FEATURE2_ARCH_2_07) {
3862        have_isa = tcg_isa_2_07;
3863    }
3864#endif
3865#ifdef PPC_FEATURE2_ARCH_3_00
3866    if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
3867        have_isa = tcg_isa_3_00;
3868    }
3869#endif
3870#ifdef PPC_FEATURE2_ARCH_3_10
3871    if (hwcap2 & PPC_FEATURE2_ARCH_3_10) {
3872        have_isa = tcg_isa_3_10;
3873    }
3874#endif
3875
3876#ifdef PPC_FEATURE2_HAS_ISEL
3877    /* Prefer explicit instruction from the kernel. */
3878    have_isel = (hwcap2 & PPC_FEATURE2_HAS_ISEL) != 0;
3879#else
3880    /* Fall back to knowing Power7 (2.06) has ISEL. */
3881    have_isel = have_isa_2_06;
3882#endif
3883
3884    if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
3885        have_altivec = true;
3886        /* We only care about the portion of VSX that overlaps Altivec. */
3887        if (hwcap & PPC_FEATURE_HAS_VSX) {
3888            have_vsx = true;
3889        }
3890    }
3891
3892    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
3893    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
3894    if (have_altivec) {
3895        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3896        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3897    }
3898
3899    tcg_target_call_clobber_regs = 0;
3900    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3901    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3902    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3903    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3904    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3905    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3906    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7);
3907    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
3908    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
3909    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
3910    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
3911    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
3912
3913    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3914    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3915    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3916    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3917    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3918    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3919    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3920    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3921    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3922    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3923    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3924    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3925    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3926    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3927    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3928    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3929    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3930    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3931    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3932    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3933
3934    s->reserved_regs = 0;
3935    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
3936    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
3937#if defined(_CALL_SYSV)
3938    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
3939#endif
3940#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
3941    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
3942#endif
3943    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
3944    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
3945    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
3946    if (USE_REG_TB) {
3947        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);  /* tb->tc_ptr */
3948    }
3949}
3950
3951#ifdef __ELF__
3952typedef struct {
3953    DebugFrameCIE cie;
3954    DebugFrameFDEHeader fde;
3955    uint8_t fde_def_cfa[4];
3956    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
3957} DebugFrame;
3958
3959/* We're expecting a 2 byte uleb128 encoded value.  */
3960QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3961
3962#if TCG_TARGET_REG_BITS == 64
3963# define ELF_HOST_MACHINE EM_PPC64
3964#else
3965# define ELF_HOST_MACHINE EM_PPC
3966#endif
3967
3968static DebugFrame debug_frame = {
3969    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3970    .cie.id = -1,
3971    .cie.version = 1,
3972    .cie.code_align = 1,
3973    .cie.data_align = (-SZR & 0x7f),         /* sleb128 -SZR */
3974    .cie.return_column = 65,
3975
3976    /* Total FDE size does not include the "len" member.  */
3977    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
3978
3979    .fde_def_cfa = {
3980        12, TCG_REG_R1,                 /* DW_CFA_def_cfa r1, ... */
3981        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3982        (FRAME_SIZE >> 7)
3983    },
3984    .fde_reg_ofs = {
3985        /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
3986        0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
3987    }
3988};
3989
3990void tcg_register_jit(const void *buf, size_t buf_size)
3991{
3992    uint8_t *p = &debug_frame.fde_reg_ofs[3];
3993    int i;
3994
3995    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
3996        p[0] = 0x80 + tcg_target_callee_save_regs[i];
3997        p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
3998    }
3999
4000    debug_frame.fde.func_start = (uintptr_t)buf;
4001    debug_frame.fde.func_len = buf_size;
4002
4003    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
4004}
4005#endif /* __ELF__ */
4006#undef VMULEUB
4007#undef VMULEUH
4008#undef VMULEUW
4009#undef VMULOUB
4010#undef VMULOUH
4011#undef VMULOUW
4012#undef VMSUMUHM
4013