xref: /openbmc/qemu/tcg/ppc/tcg-target.c.inc (revision edf64786)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "elf.h"
26#include "../tcg-pool.c.inc"
27
28#if defined _CALL_DARWIN || defined __APPLE__
29#define TCG_TARGET_CALL_DARWIN
30#endif
31#ifdef _CALL_SYSV
32# define TCG_TARGET_CALL_ALIGN_ARGS   1
33#endif
34
35/* For some memory operations, we need a scratch that isn't R0.  For the AIX
36   calling convention, we can re-use the TOC register since we'll be reloading
37   it at every call.  Otherwise R12 will do nicely as neither a call-saved
38   register nor a parameter register.  */
39#ifdef _CALL_AIX
40# define TCG_REG_TMP1   TCG_REG_R2
41#else
42# define TCG_REG_TMP1   TCG_REG_R12
43#endif
44
45#define TCG_VEC_TMP1    TCG_REG_V0
46#define TCG_VEC_TMP2    TCG_REG_V1
47
48#define TCG_REG_TB     TCG_REG_R31
49#define USE_REG_TB     (TCG_TARGET_REG_BITS == 64)
50
51/* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */
52#define SZP  ((int)sizeof(void *))
53
54/* Shorthand for size of a register.  */
55#define SZR  (TCG_TARGET_REG_BITS / 8)
56
57#define TCG_CT_CONST_S16  0x100
58#define TCG_CT_CONST_U16  0x200
59#define TCG_CT_CONST_S32  0x400
60#define TCG_CT_CONST_U32  0x800
61#define TCG_CT_CONST_ZERO 0x1000
62#define TCG_CT_CONST_MONE 0x2000
63#define TCG_CT_CONST_WSZ  0x4000
64
65TCGPowerISA have_isa;
66static bool have_isel;
67bool have_altivec;
68bool have_vsx;
69
70#ifndef CONFIG_SOFTMMU
71#define TCG_GUEST_BASE_REG 30
72#endif
73
74#ifdef CONFIG_DEBUG_TCG
75static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
76    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
77    "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
78    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
79    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
80    "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
81    "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
82    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
83    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
84};
85#endif
86
87static const int tcg_target_reg_alloc_order[] = {
88    TCG_REG_R14,  /* call saved registers */
89    TCG_REG_R15,
90    TCG_REG_R16,
91    TCG_REG_R17,
92    TCG_REG_R18,
93    TCG_REG_R19,
94    TCG_REG_R20,
95    TCG_REG_R21,
96    TCG_REG_R22,
97    TCG_REG_R23,
98    TCG_REG_R24,
99    TCG_REG_R25,
100    TCG_REG_R26,
101    TCG_REG_R27,
102    TCG_REG_R28,
103    TCG_REG_R29,
104    TCG_REG_R30,
105    TCG_REG_R31,
106    TCG_REG_R12,  /* call clobbered, non-arguments */
107    TCG_REG_R11,
108    TCG_REG_R2,
109    TCG_REG_R13,
110    TCG_REG_R10,  /* call clobbered, arguments */
111    TCG_REG_R9,
112    TCG_REG_R8,
113    TCG_REG_R7,
114    TCG_REG_R6,
115    TCG_REG_R5,
116    TCG_REG_R4,
117    TCG_REG_R3,
118
119    /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
120    TCG_REG_V2,   /* call clobbered, vectors */
121    TCG_REG_V3,
122    TCG_REG_V4,
123    TCG_REG_V5,
124    TCG_REG_V6,
125    TCG_REG_V7,
126    TCG_REG_V8,
127    TCG_REG_V9,
128    TCG_REG_V10,
129    TCG_REG_V11,
130    TCG_REG_V12,
131    TCG_REG_V13,
132    TCG_REG_V14,
133    TCG_REG_V15,
134    TCG_REG_V16,
135    TCG_REG_V17,
136    TCG_REG_V18,
137    TCG_REG_V19,
138};
139
140static const int tcg_target_call_iarg_regs[] = {
141    TCG_REG_R3,
142    TCG_REG_R4,
143    TCG_REG_R5,
144    TCG_REG_R6,
145    TCG_REG_R7,
146    TCG_REG_R8,
147    TCG_REG_R9,
148    TCG_REG_R10
149};
150
151static const int tcg_target_call_oarg_regs[] = {
152    TCG_REG_R3,
153    TCG_REG_R4
154};
155
156static const int tcg_target_callee_save_regs[] = {
157#ifdef TCG_TARGET_CALL_DARWIN
158    TCG_REG_R11,
159#endif
160    TCG_REG_R14,
161    TCG_REG_R15,
162    TCG_REG_R16,
163    TCG_REG_R17,
164    TCG_REG_R18,
165    TCG_REG_R19,
166    TCG_REG_R20,
167    TCG_REG_R21,
168    TCG_REG_R22,
169    TCG_REG_R23,
170    TCG_REG_R24,
171    TCG_REG_R25,
172    TCG_REG_R26,
173    TCG_REG_R27, /* currently used for the global env */
174    TCG_REG_R28,
175    TCG_REG_R29,
176    TCG_REG_R30,
177    TCG_REG_R31
178};
179
180static inline bool in_range_b(tcg_target_long target)
181{
182    return target == sextract64(target, 0, 26);
183}
184
185static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
186			       const tcg_insn_unit *target)
187{
188    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
189    tcg_debug_assert(in_range_b(disp));
190    return disp & 0x3fffffc;
191}
192
193static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
194{
195    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
196    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
197
198    if (in_range_b(disp)) {
199        *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc);
200        return true;
201    }
202    return false;
203}
204
205static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
206			       const tcg_insn_unit *target)
207{
208    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
209    tcg_debug_assert(disp == (int16_t) disp);
210    return disp & 0xfffc;
211}
212
213static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
214{
215    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
216    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
217
218    if (disp == (int16_t) disp) {
219        *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc);
220        return true;
221    }
222    return false;
223}
224
225/* parse target specific constraints */
226static const char *target_parse_constraint(TCGArgConstraint *ct,
227                                           const char *ct_str, TCGType type)
228{
229    switch (*ct_str++) {
230    case 'A': case 'B': case 'C': case 'D':
231        tcg_regset_set_reg(ct->regs, 3 + ct_str[0] - 'A');
232        break;
233    case 'r':
234        ct->regs = 0xffffffff;
235        break;
236    case 'v':
237        ct->regs = 0xffffffff00000000ull;
238        break;
239    case 'L':                   /* qemu_ld constraint */
240        ct->regs = 0xffffffff;
241        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
242#ifdef CONFIG_SOFTMMU
243        tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
244        tcg_regset_reset_reg(ct->regs, TCG_REG_R5);
245#endif
246        break;
247    case 'S':                   /* qemu_st constraint */
248        ct->regs = 0xffffffff;
249        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
250#ifdef CONFIG_SOFTMMU
251        tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
252        tcg_regset_reset_reg(ct->regs, TCG_REG_R5);
253        tcg_regset_reset_reg(ct->regs, TCG_REG_R6);
254#endif
255        break;
256    case 'I':
257        ct->ct |= TCG_CT_CONST_S16;
258        break;
259    case 'J':
260        ct->ct |= TCG_CT_CONST_U16;
261        break;
262    case 'M':
263        ct->ct |= TCG_CT_CONST_MONE;
264        break;
265    case 'T':
266        ct->ct |= TCG_CT_CONST_S32;
267        break;
268    case 'U':
269        ct->ct |= TCG_CT_CONST_U32;
270        break;
271    case 'W':
272        ct->ct |= TCG_CT_CONST_WSZ;
273        break;
274    case 'Z':
275        ct->ct |= TCG_CT_CONST_ZERO;
276        break;
277    default:
278        return NULL;
279    }
280    return ct_str;
281}
282
283/* test if a constant matches the constraint */
284static int tcg_target_const_match(tcg_target_long val, TCGType type,
285                                  const TCGArgConstraint *arg_ct)
286{
287    int ct = arg_ct->ct;
288    if (ct & TCG_CT_CONST) {
289        return 1;
290    }
291
292    /* The only 32-bit constraint we use aside from
293       TCG_CT_CONST is TCG_CT_CONST_S16.  */
294    if (type == TCG_TYPE_I32) {
295        val = (int32_t)val;
296    }
297
298    if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
299        return 1;
300    } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) {
301        return 1;
302    } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
303        return 1;
304    } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
305        return 1;
306    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
307        return 1;
308    } else if ((ct & TCG_CT_CONST_MONE) && val == -1) {
309        return 1;
310    } else if ((ct & TCG_CT_CONST_WSZ)
311               && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
312        return 1;
313    }
314    return 0;
315}
316
317#define OPCD(opc) ((opc)<<26)
318#define XO19(opc) (OPCD(19)|((opc)<<1))
319#define MD30(opc) (OPCD(30)|((opc)<<2))
320#define MDS30(opc) (OPCD(30)|((opc)<<1))
321#define XO31(opc) (OPCD(31)|((opc)<<1))
322#define XO58(opc) (OPCD(58)|(opc))
323#define XO62(opc) (OPCD(62)|(opc))
324#define VX4(opc)  (OPCD(4)|(opc))
325
326#define B      OPCD( 18)
327#define BC     OPCD( 16)
328#define LBZ    OPCD( 34)
329#define LHZ    OPCD( 40)
330#define LHA    OPCD( 42)
331#define LWZ    OPCD( 32)
332#define LWZUX  XO31( 55)
333#define STB    OPCD( 38)
334#define STH    OPCD( 44)
335#define STW    OPCD( 36)
336
337#define STD    XO62(  0)
338#define STDU   XO62(  1)
339#define STDX   XO31(149)
340
341#define LD     XO58(  0)
342#define LDX    XO31( 21)
343#define LDU    XO58(  1)
344#define LDUX   XO31( 53)
345#define LWA    XO58(  2)
346#define LWAX   XO31(341)
347
348#define ADDIC  OPCD( 12)
349#define ADDI   OPCD( 14)
350#define ADDIS  OPCD( 15)
351#define ORI    OPCD( 24)
352#define ORIS   OPCD( 25)
353#define XORI   OPCD( 26)
354#define XORIS  OPCD( 27)
355#define ANDI   OPCD( 28)
356#define ANDIS  OPCD( 29)
357#define MULLI  OPCD(  7)
358#define CMPLI  OPCD( 10)
359#define CMPI   OPCD( 11)
360#define SUBFIC OPCD( 8)
361
362#define LWZU   OPCD( 33)
363#define STWU   OPCD( 37)
364
365#define RLWIMI OPCD( 20)
366#define RLWINM OPCD( 21)
367#define RLWNM  OPCD( 23)
368
369#define RLDICL MD30(  0)
370#define RLDICR MD30(  1)
371#define RLDIMI MD30(  3)
372#define RLDCL  MDS30( 8)
373
374#define BCLR   XO19( 16)
375#define BCCTR  XO19(528)
376#define CRAND  XO19(257)
377#define CRANDC XO19(129)
378#define CRNAND XO19(225)
379#define CROR   XO19(449)
380#define CRNOR  XO19( 33)
381
382#define EXTSB  XO31(954)
383#define EXTSH  XO31(922)
384#define EXTSW  XO31(986)
385#define ADD    XO31(266)
386#define ADDE   XO31(138)
387#define ADDME  XO31(234)
388#define ADDZE  XO31(202)
389#define ADDC   XO31( 10)
390#define AND    XO31( 28)
391#define SUBF   XO31( 40)
392#define SUBFC  XO31(  8)
393#define SUBFE  XO31(136)
394#define SUBFME XO31(232)
395#define SUBFZE XO31(200)
396#define OR     XO31(444)
397#define XOR    XO31(316)
398#define MULLW  XO31(235)
399#define MULHW  XO31( 75)
400#define MULHWU XO31( 11)
401#define DIVW   XO31(491)
402#define DIVWU  XO31(459)
403#define CMP    XO31(  0)
404#define CMPL   XO31( 32)
405#define LHBRX  XO31(790)
406#define LWBRX  XO31(534)
407#define LDBRX  XO31(532)
408#define STHBRX XO31(918)
409#define STWBRX XO31(662)
410#define STDBRX XO31(660)
411#define MFSPR  XO31(339)
412#define MTSPR  XO31(467)
413#define SRAWI  XO31(824)
414#define NEG    XO31(104)
415#define MFCR   XO31( 19)
416#define MFOCRF (MFCR | (1u << 20))
417#define NOR    XO31(124)
418#define CNTLZW XO31( 26)
419#define CNTLZD XO31( 58)
420#define CNTTZW XO31(538)
421#define CNTTZD XO31(570)
422#define CNTPOPW XO31(378)
423#define CNTPOPD XO31(506)
424#define ANDC   XO31( 60)
425#define ORC    XO31(412)
426#define EQV    XO31(284)
427#define NAND   XO31(476)
428#define ISEL   XO31( 15)
429
430#define MULLD  XO31(233)
431#define MULHD  XO31( 73)
432#define MULHDU XO31(  9)
433#define DIVD   XO31(489)
434#define DIVDU  XO31(457)
435
436#define LBZX   XO31( 87)
437#define LHZX   XO31(279)
438#define LHAX   XO31(343)
439#define LWZX   XO31( 23)
440#define STBX   XO31(215)
441#define STHX   XO31(407)
442#define STWX   XO31(151)
443
444#define EIEIO  XO31(854)
445#define HWSYNC XO31(598)
446#define LWSYNC (HWSYNC | (1u << 21))
447
448#define SPR(a, b) ((((a)<<5)|(b))<<11)
449#define LR     SPR(8, 0)
450#define CTR    SPR(9, 0)
451
452#define SLW    XO31( 24)
453#define SRW    XO31(536)
454#define SRAW   XO31(792)
455
456#define SLD    XO31( 27)
457#define SRD    XO31(539)
458#define SRAD   XO31(794)
459#define SRADI  XO31(413<<1)
460
461#define TW     XO31( 4)
462#define TRAP   (TW | TO(31))
463
464#define NOP    ORI  /* ori 0,0,0 */
465
466#define LVX        XO31(103)
467#define LVEBX      XO31(7)
468#define LVEHX      XO31(39)
469#define LVEWX      XO31(71)
470#define LXSDX      (XO31(588) | 1)  /* v2.06, force tx=1 */
471#define LXVDSX     (XO31(332) | 1)  /* v2.06, force tx=1 */
472#define LXSIWZX    (XO31(12) | 1)   /* v2.07, force tx=1 */
473#define LXV        (OPCD(61) | 8 | 1)  /* v3.00, force tx=1 */
474#define LXSD       (OPCD(57) | 2)   /* v3.00 */
475#define LXVWSX     (XO31(364) | 1)  /* v3.00, force tx=1 */
476
477#define STVX       XO31(231)
478#define STVEWX     XO31(199)
479#define STXSDX     (XO31(716) | 1)  /* v2.06, force sx=1 */
480#define STXSIWX    (XO31(140) | 1)  /* v2.07, force sx=1 */
481#define STXV       (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
482#define STXSD      (OPCD(61) | 2)   /* v3.00 */
483
484#define VADDSBS    VX4(768)
485#define VADDUBS    VX4(512)
486#define VADDUBM    VX4(0)
487#define VADDSHS    VX4(832)
488#define VADDUHS    VX4(576)
489#define VADDUHM    VX4(64)
490#define VADDSWS    VX4(896)
491#define VADDUWS    VX4(640)
492#define VADDUWM    VX4(128)
493#define VADDUDM    VX4(192)       /* v2.07 */
494
495#define VSUBSBS    VX4(1792)
496#define VSUBUBS    VX4(1536)
497#define VSUBUBM    VX4(1024)
498#define VSUBSHS    VX4(1856)
499#define VSUBUHS    VX4(1600)
500#define VSUBUHM    VX4(1088)
501#define VSUBSWS    VX4(1920)
502#define VSUBUWS    VX4(1664)
503#define VSUBUWM    VX4(1152)
504#define VSUBUDM    VX4(1216)      /* v2.07 */
505
506#define VNEGW      (VX4(1538) | (6 << 16))  /* v3.00 */
507#define VNEGD      (VX4(1538) | (7 << 16))  /* v3.00 */
508
509#define VMAXSB     VX4(258)
510#define VMAXSH     VX4(322)
511#define VMAXSW     VX4(386)
512#define VMAXSD     VX4(450)       /* v2.07 */
513#define VMAXUB     VX4(2)
514#define VMAXUH     VX4(66)
515#define VMAXUW     VX4(130)
516#define VMAXUD     VX4(194)       /* v2.07 */
517#define VMINSB     VX4(770)
518#define VMINSH     VX4(834)
519#define VMINSW     VX4(898)
520#define VMINSD     VX4(962)       /* v2.07 */
521#define VMINUB     VX4(514)
522#define VMINUH     VX4(578)
523#define VMINUW     VX4(642)
524#define VMINUD     VX4(706)       /* v2.07 */
525
526#define VCMPEQUB   VX4(6)
527#define VCMPEQUH   VX4(70)
528#define VCMPEQUW   VX4(134)
529#define VCMPEQUD   VX4(199)       /* v2.07 */
530#define VCMPGTSB   VX4(774)
531#define VCMPGTSH   VX4(838)
532#define VCMPGTSW   VX4(902)
533#define VCMPGTSD   VX4(967)       /* v2.07 */
534#define VCMPGTUB   VX4(518)
535#define VCMPGTUH   VX4(582)
536#define VCMPGTUW   VX4(646)
537#define VCMPGTUD   VX4(711)       /* v2.07 */
538#define VCMPNEB    VX4(7)         /* v3.00 */
539#define VCMPNEH    VX4(71)        /* v3.00 */
540#define VCMPNEW    VX4(135)       /* v3.00 */
541
542#define VSLB       VX4(260)
543#define VSLH       VX4(324)
544#define VSLW       VX4(388)
545#define VSLD       VX4(1476)      /* v2.07 */
546#define VSRB       VX4(516)
547#define VSRH       VX4(580)
548#define VSRW       VX4(644)
549#define VSRD       VX4(1732)      /* v2.07 */
550#define VSRAB      VX4(772)
551#define VSRAH      VX4(836)
552#define VSRAW      VX4(900)
553#define VSRAD      VX4(964)       /* v2.07 */
554#define VRLB       VX4(4)
555#define VRLH       VX4(68)
556#define VRLW       VX4(132)
557#define VRLD       VX4(196)       /* v2.07 */
558
559#define VMULEUB    VX4(520)
560#define VMULEUH    VX4(584)
561#define VMULEUW    VX4(648)       /* v2.07 */
562#define VMULOUB    VX4(8)
563#define VMULOUH    VX4(72)
564#define VMULOUW    VX4(136)       /* v2.07 */
565#define VMULUWM    VX4(137)       /* v2.07 */
566#define VMULLD     VX4(457)       /* v3.10 */
567#define VMSUMUHM   VX4(38)
568
569#define VMRGHB     VX4(12)
570#define VMRGHH     VX4(76)
571#define VMRGHW     VX4(140)
572#define VMRGLB     VX4(268)
573#define VMRGLH     VX4(332)
574#define VMRGLW     VX4(396)
575
576#define VPKUHUM    VX4(14)
577#define VPKUWUM    VX4(78)
578
579#define VAND       VX4(1028)
580#define VANDC      VX4(1092)
581#define VNOR       VX4(1284)
582#define VOR        VX4(1156)
583#define VXOR       VX4(1220)
584#define VEQV       VX4(1668)      /* v2.07 */
585#define VNAND      VX4(1412)      /* v2.07 */
586#define VORC       VX4(1348)      /* v2.07 */
587
588#define VSPLTB     VX4(524)
589#define VSPLTH     VX4(588)
590#define VSPLTW     VX4(652)
591#define VSPLTISB   VX4(780)
592#define VSPLTISH   VX4(844)
593#define VSPLTISW   VX4(908)
594
595#define VSLDOI     VX4(44)
596
597#define XXPERMDI   (OPCD(60) | (10 << 3) | 7)  /* v2.06, force ax=bx=tx=1 */
598#define XXSEL      (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
599#define XXSPLTIB   (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
600
601#define MFVSRD     (XO31(51) | 1)   /* v2.07, force sx=1 */
602#define MFVSRWZ    (XO31(115) | 1)  /* v2.07, force sx=1 */
603#define MTVSRD     (XO31(179) | 1)  /* v2.07, force tx=1 */
604#define MTVSRWZ    (XO31(243) | 1)  /* v2.07, force tx=1 */
605#define MTVSRDD    (XO31(435) | 1)  /* v3.00, force tx=1 */
606#define MTVSRWS    (XO31(403) | 1)  /* v3.00, force tx=1 */
607
608#define RT(r) ((r)<<21)
609#define RS(r) ((r)<<21)
610#define RA(r) ((r)<<16)
611#define RB(r) ((r)<<11)
612#define TO(t) ((t)<<21)
613#define SH(s) ((s)<<11)
614#define MB(b) ((b)<<6)
615#define ME(e) ((e)<<1)
616#define BO(o) ((o)<<21)
617#define MB64(b) ((b)<<5)
618#define FXM(b) (1 << (19 - (b)))
619
620#define VRT(r)  (((r) & 31) << 21)
621#define VRA(r)  (((r) & 31) << 16)
622#define VRB(r)  (((r) & 31) << 11)
623#define VRC(r)  (((r) & 31) <<  6)
624
625#define LK    1
626
627#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
628#define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
629#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
630#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
631
632#define BF(n)    ((n)<<23)
633#define BI(n, c) (((c)+((n)*4))<<16)
634#define BT(n, c) (((c)+((n)*4))<<21)
635#define BA(n, c) (((c)+((n)*4))<<16)
636#define BB(n, c) (((c)+((n)*4))<<11)
637#define BC_(n, c) (((c)+((n)*4))<<6)
638
639#define BO_COND_TRUE  BO(12)
640#define BO_COND_FALSE BO( 4)
641#define BO_ALWAYS     BO(20)
642
643enum {
644    CR_LT,
645    CR_GT,
646    CR_EQ,
647    CR_SO
648};
649
650static const uint32_t tcg_to_bc[] = {
651    [TCG_COND_EQ]  = BC | BI(7, CR_EQ) | BO_COND_TRUE,
652    [TCG_COND_NE]  = BC | BI(7, CR_EQ) | BO_COND_FALSE,
653    [TCG_COND_LT]  = BC | BI(7, CR_LT) | BO_COND_TRUE,
654    [TCG_COND_GE]  = BC | BI(7, CR_LT) | BO_COND_FALSE,
655    [TCG_COND_LE]  = BC | BI(7, CR_GT) | BO_COND_FALSE,
656    [TCG_COND_GT]  = BC | BI(7, CR_GT) | BO_COND_TRUE,
657    [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE,
658    [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE,
659    [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE,
660    [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE,
661};
662
663/* The low bit here is set if the RA and RB fields must be inverted.  */
664static const uint32_t tcg_to_isel[] = {
665    [TCG_COND_EQ]  = ISEL | BC_(7, CR_EQ),
666    [TCG_COND_NE]  = ISEL | BC_(7, CR_EQ) | 1,
667    [TCG_COND_LT]  = ISEL | BC_(7, CR_LT),
668    [TCG_COND_GE]  = ISEL | BC_(7, CR_LT) | 1,
669    [TCG_COND_LE]  = ISEL | BC_(7, CR_GT) | 1,
670    [TCG_COND_GT]  = ISEL | BC_(7, CR_GT),
671    [TCG_COND_LTU] = ISEL | BC_(7, CR_LT),
672    [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1,
673    [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1,
674    [TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
675};
676
677static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
678                        intptr_t value, intptr_t addend)
679{
680    const tcg_insn_unit *target;
681    int16_t lo;
682    int32_t hi;
683
684    value += addend;
685    target = (const tcg_insn_unit *)value;
686
687    switch (type) {
688    case R_PPC_REL14:
689        return reloc_pc14(code_ptr, target);
690    case R_PPC_REL24:
691        return reloc_pc24(code_ptr, target);
692    case R_PPC_ADDR16:
693        /*
694         * We are (slightly) abusing this relocation type.  In particular,
695         * assert that the low 2 bits are zero, and do not modify them.
696         * That way we can use this with LD et al that have opcode bits
697         * in the low 2 bits of the insn.
698         */
699        if ((value & 3) || value != (int16_t)value) {
700            return false;
701        }
702        *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
703        break;
704    case R_PPC_ADDR32:
705        /*
706         * We are abusing this relocation type.  Again, this points to
707         * a pair of insns, lis + load.  This is an absolute address
708         * relocation for PPC32 so the lis cannot be removed.
709         */
710        lo = value;
711        hi = value - lo;
712        if (hi + lo != value) {
713            return false;
714        }
715        code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
716        code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
717        break;
718    default:
719        g_assert_not_reached();
720    }
721    return true;
722}
723
724static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
725                             TCGReg base, tcg_target_long offset);
726
727static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
728{
729    if (ret == arg) {
730        return true;
731    }
732    switch (type) {
733    case TCG_TYPE_I64:
734        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
735        /* fallthru */
736    case TCG_TYPE_I32:
737        if (ret < TCG_REG_V0) {
738            if (arg < TCG_REG_V0) {
739                tcg_out32(s, OR | SAB(arg, ret, arg));
740                break;
741            } else if (have_isa_2_07) {
742                tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
743                          | VRT(arg) | RA(ret));
744                break;
745            } else {
746                /* Altivec does not support vector->integer moves.  */
747                return false;
748            }
749        } else if (arg < TCG_REG_V0) {
750            if (have_isa_2_07) {
751                tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
752                          | VRT(ret) | RA(arg));
753                break;
754            } else {
755                /* Altivec does not support integer->vector moves.  */
756                return false;
757            }
758        }
759        /* fallthru */
760    case TCG_TYPE_V64:
761    case TCG_TYPE_V128:
762        tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
763        tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
764        break;
765    default:
766        g_assert_not_reached();
767    }
768    return true;
769}
770
771static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
772                               int sh, int mb)
773{
774    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
775    sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
776    mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
777    tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb);
778}
779
780static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
781                               int sh, int mb, int me)
782{
783    tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me));
784}
785
786static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
787{
788    tcg_out_rld(s, RLDICL, dst, src, 0, 32);
789}
790
791static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
792{
793    tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
794}
795
796static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
797{
798    tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
799}
800
801static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
802{
803    tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
804}
805
806static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
807{
808    tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
809}
810
811/* Emit a move into ret of arg, if it can be done in one insn.  */
812static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
813{
814    if (arg == (int16_t)arg) {
815        tcg_out32(s, ADDI | TAI(ret, 0, arg));
816        return true;
817    }
818    if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
819        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
820        return true;
821    }
822    return false;
823}
824
825static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
826                             tcg_target_long arg, bool in_prologue)
827{
828    intptr_t tb_diff;
829    tcg_target_long tmp;
830    int shift;
831
832    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
833
834    if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
835        arg = (int32_t)arg;
836    }
837
838    /* Load 16-bit immediates with one insn.  */
839    if (tcg_out_movi_one(s, ret, arg)) {
840        return;
841    }
842
843    /* Load addresses within the TB with one insn.  */
844    tb_diff = tcg_tbrel_diff(s, (void *)arg);
845    if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
846        tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
847        return;
848    }
849
850    /* Load 32-bit immediates with two insns.  Note that we've already
851       eliminated bare ADDIS, so we know both insns are required.  */
852    if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
853        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
854        tcg_out32(s, ORI | SAI(ret, ret, arg));
855        return;
856    }
857    if (arg == (uint32_t)arg && !(arg & 0x8000)) {
858        tcg_out32(s, ADDI | TAI(ret, 0, arg));
859        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
860        return;
861    }
862
863    /* Load masked 16-bit value.  */
864    if (arg > 0 && (arg & 0x8000)) {
865        tmp = arg | 0x7fff;
866        if ((tmp & (tmp + 1)) == 0) {
867            int mb = clz64(tmp + 1) + 1;
868            tcg_out32(s, ADDI | TAI(ret, 0, arg));
869            tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
870            return;
871        }
872    }
873
874    /* Load common masks with 2 insns.  */
875    shift = ctz64(arg);
876    tmp = arg >> shift;
877    if (tmp == (int16_t)tmp) {
878        tcg_out32(s, ADDI | TAI(ret, 0, tmp));
879        tcg_out_shli64(s, ret, ret, shift);
880        return;
881    }
882    shift = clz64(arg);
883    if (tcg_out_movi_one(s, ret, arg << shift)) {
884        tcg_out_shri64(s, ret, ret, shift);
885        return;
886    }
887
888    /* Load addresses within 2GB of TB with 2 (or rarely 3) insns.  */
889    if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
890        tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
891        return;
892    }
893
894    /* Use the constant pool, if possible.  */
895    if (!in_prologue && USE_REG_TB) {
896        new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
897                       tcg_tbrel_diff(s, NULL));
898        tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
899        return;
900    }
901
902    tmp = arg >> 31 >> 1;
903    tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
904    if (tmp) {
905        tcg_out_shli64(s, ret, ret, 32);
906    }
907    if (arg & 0xffff0000) {
908        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
909    }
910    if (arg & 0xffff) {
911        tcg_out32(s, ORI | SAI(ret, ret, arg));
912    }
913}
914
915static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
916                             TCGReg ret, int64_t val)
917{
918    uint32_t load_insn;
919    int rel, low;
920    intptr_t add;
921
922    switch (vece) {
923    case MO_8:
924        low = (int8_t)val;
925        if (low >= -16 && low < 16) {
926            tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
927            return;
928        }
929        if (have_isa_3_00) {
930            tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
931            return;
932        }
933        break;
934
935    case MO_16:
936        low = (int16_t)val;
937        if (low >= -16 && low < 16) {
938            tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
939            return;
940        }
941        break;
942
943    case MO_32:
944        low = (int32_t)val;
945        if (low >= -16 && low < 16) {
946            tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
947            return;
948        }
949        break;
950    }
951
952    /*
953     * Otherwise we must load the value from the constant pool.
954     */
955    if (USE_REG_TB) {
956        rel = R_PPC_ADDR16;
957        add = tcg_tbrel_diff(s, NULL);
958    } else {
959        rel = R_PPC_ADDR32;
960        add = 0;
961    }
962
963    if (have_vsx) {
964        load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
965        load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
966        if (TCG_TARGET_REG_BITS == 64) {
967            new_pool_label(s, val, rel, s->code_ptr, add);
968        } else {
969            new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val);
970        }
971    } else {
972        load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
973        if (TCG_TARGET_REG_BITS == 64) {
974            new_pool_l2(s, rel, s->code_ptr, add, val, val);
975        } else {
976            new_pool_l4(s, rel, s->code_ptr, add,
977                        val >> 32, val, val >> 32, val);
978        }
979    }
980
981    if (USE_REG_TB) {
982        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
983        load_insn |= RA(TCG_REG_TB);
984    } else {
985        tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
986        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
987    }
988    tcg_out32(s, load_insn);
989}
990
991static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
992                         tcg_target_long arg)
993{
994    switch (type) {
995    case TCG_TYPE_I32:
996    case TCG_TYPE_I64:
997        tcg_debug_assert(ret < TCG_REG_V0);
998        tcg_out_movi_int(s, type, ret, arg, false);
999        break;
1000
1001    default:
1002        g_assert_not_reached();
1003    }
1004}
1005
1006static bool mask_operand(uint32_t c, int *mb, int *me)
1007{
1008    uint32_t lsb, test;
1009
1010    /* Accept a bit pattern like:
1011           0....01....1
1012           1....10....0
1013           0..01..10..0
1014       Keep track of the transitions.  */
1015    if (c == 0 || c == -1) {
1016        return false;
1017    }
1018    test = c;
1019    lsb = test & -test;
1020    test += lsb;
1021    if (test & (test - 1)) {
1022        return false;
1023    }
1024
1025    *me = clz32(lsb);
1026    *mb = test ? clz32(test & -test) + 1 : 0;
1027    return true;
1028}
1029
1030static bool mask64_operand(uint64_t c, int *mb, int *me)
1031{
1032    uint64_t lsb;
1033
1034    if (c == 0) {
1035        return false;
1036    }
1037
1038    lsb = c & -c;
1039    /* Accept 1..10..0.  */
1040    if (c == -lsb) {
1041        *mb = 0;
1042        *me = clz64(lsb);
1043        return true;
1044    }
1045    /* Accept 0..01..1.  */
1046    if (lsb == 1 && (c & (c + 1)) == 0) {
1047        *mb = clz64(c + 1) + 1;
1048        *me = 63;
1049        return true;
1050    }
1051    return false;
1052}
1053
1054static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1055{
1056    int mb, me;
1057
1058    if (mask_operand(c, &mb, &me)) {
1059        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
1060    } else if ((c & 0xffff) == c) {
1061        tcg_out32(s, ANDI | SAI(src, dst, c));
1062        return;
1063    } else if ((c & 0xffff0000) == c) {
1064        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1065        return;
1066    } else {
1067        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
1068        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1069    }
1070}
1071
1072static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
1073{
1074    int mb, me;
1075
1076    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1077    if (mask64_operand(c, &mb, &me)) {
1078        if (mb == 0) {
1079            tcg_out_rld(s, RLDICR, dst, src, 0, me);
1080        } else {
1081            tcg_out_rld(s, RLDICL, dst, src, 0, mb);
1082        }
1083    } else if ((c & 0xffff) == c) {
1084        tcg_out32(s, ANDI | SAI(src, dst, c));
1085        return;
1086    } else if ((c & 0xffff0000) == c) {
1087        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1088        return;
1089    } else {
1090        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
1091        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1092    }
1093}
1094
1095static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
1096                           int op_lo, int op_hi)
1097{
1098    if (c >> 16) {
1099        tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
1100        src = dst;
1101    }
1102    if (c & 0xffff) {
1103        tcg_out32(s, op_lo | SAI(src, dst, c));
1104        src = dst;
1105    }
1106}
1107
1108static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1109{
1110    tcg_out_zori32(s, dst, src, c, ORI, ORIS);
1111}
1112
1113static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1114{
1115    tcg_out_zori32(s, dst, src, c, XORI, XORIS);
1116}
1117
1118static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target)
1119{
1120    ptrdiff_t disp = tcg_pcrel_diff(s, target);
1121    if (in_range_b(disp)) {
1122        tcg_out32(s, B | (disp & 0x3fffffc) | mask);
1123    } else {
1124        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
1125        tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
1126        tcg_out32(s, BCCTR | BO_ALWAYS | mask);
1127    }
1128}
1129
1130static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
1131                             TCGReg base, tcg_target_long offset)
1132{
1133    tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
1134    bool is_int_store = false;
1135    TCGReg rs = TCG_REG_TMP1;
1136
1137    switch (opi) {
1138    case LD: case LWA:
1139        align = 3;
1140        /* FALLTHRU */
1141    default:
1142        if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
1143            rs = rt;
1144            break;
1145        }
1146        break;
1147    case LXSD:
1148    case STXSD:
1149        align = 3;
1150        break;
1151    case LXV:
1152    case STXV:
1153        align = 15;
1154        break;
1155    case STD:
1156        align = 3;
1157        /* FALLTHRU */
1158    case STB: case STH: case STW:
1159        is_int_store = true;
1160        break;
1161    }
1162
1163    /* For unaligned, or very large offsets, use the indexed form.  */
1164    if (offset & align || offset != (int32_t)offset || opi == 0) {
1165        if (rs == base) {
1166            rs = TCG_REG_R0;
1167        }
1168        tcg_debug_assert(!is_int_store || rs != rt);
1169        tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
1170        tcg_out32(s, opx | TAB(rt & 31, base, rs));
1171        return;
1172    }
1173
1174    l0 = (int16_t)offset;
1175    offset = (offset - l0) >> 16;
1176    l1 = (int16_t)offset;
1177
1178    if (l1 < 0 && orig >= 0) {
1179        extra = 0x4000;
1180        l1 = (int16_t)(offset - 0x4000);
1181    }
1182    if (l1) {
1183        tcg_out32(s, ADDIS | TAI(rs, base, l1));
1184        base = rs;
1185    }
1186    if (extra) {
1187        tcg_out32(s, ADDIS | TAI(rs, base, extra));
1188        base = rs;
1189    }
1190    if (opi != ADDI || base != rt || l0 != 0) {
1191        tcg_out32(s, opi | TAI(rt & 31, base, l0));
1192    }
1193}
1194
1195static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
1196                           TCGReg va, TCGReg vb, int shb)
1197{
1198    tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
1199}
1200
1201static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1202                       TCGReg base, intptr_t offset)
1203{
1204    int shift;
1205
1206    switch (type) {
1207    case TCG_TYPE_I32:
1208        if (ret < TCG_REG_V0) {
1209            tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
1210            break;
1211        }
1212        if (have_isa_2_07 && have_vsx) {
1213            tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
1214            break;
1215        }
1216        tcg_debug_assert((offset & 3) == 0);
1217        tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
1218        shift = (offset - 4) & 0xc;
1219        if (shift) {
1220            tcg_out_vsldoi(s, ret, ret, ret, shift);
1221        }
1222        break;
1223    case TCG_TYPE_I64:
1224        if (ret < TCG_REG_V0) {
1225            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1226            tcg_out_mem_long(s, LD, LDX, ret, base, offset);
1227            break;
1228        }
1229        /* fallthru */
1230    case TCG_TYPE_V64:
1231        tcg_debug_assert(ret >= TCG_REG_V0);
1232        if (have_vsx) {
1233            tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
1234                             ret, base, offset);
1235            break;
1236        }
1237        tcg_debug_assert((offset & 7) == 0);
1238        tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
1239        if (offset & 8) {
1240            tcg_out_vsldoi(s, ret, ret, ret, 8);
1241        }
1242        break;
1243    case TCG_TYPE_V128:
1244        tcg_debug_assert(ret >= TCG_REG_V0);
1245        tcg_debug_assert((offset & 15) == 0);
1246        tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
1247                         LVX, ret, base, offset);
1248        break;
1249    default:
1250        g_assert_not_reached();
1251    }
1252}
1253
1254static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1255                              TCGReg base, intptr_t offset)
1256{
1257    int shift;
1258
1259    switch (type) {
1260    case TCG_TYPE_I32:
1261        if (arg < TCG_REG_V0) {
1262            tcg_out_mem_long(s, STW, STWX, arg, base, offset);
1263            break;
1264        }
1265        if (have_isa_2_07 && have_vsx) {
1266            tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
1267            break;
1268        }
1269        assert((offset & 3) == 0);
1270        tcg_debug_assert((offset & 3) == 0);
1271        shift = (offset - 4) & 0xc;
1272        if (shift) {
1273            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
1274            arg = TCG_VEC_TMP1;
1275        }
1276        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1277        break;
1278    case TCG_TYPE_I64:
1279        if (arg < TCG_REG_V0) {
1280            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1281            tcg_out_mem_long(s, STD, STDX, arg, base, offset);
1282            break;
1283        }
1284        /* fallthru */
1285    case TCG_TYPE_V64:
1286        tcg_debug_assert(arg >= TCG_REG_V0);
1287        if (have_vsx) {
1288            tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
1289                             STXSDX, arg, base, offset);
1290            break;
1291        }
1292        tcg_debug_assert((offset & 7) == 0);
1293        if (offset & 8) {
1294            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
1295            arg = TCG_VEC_TMP1;
1296        }
1297        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1298        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
1299        break;
1300    case TCG_TYPE_V128:
1301        tcg_debug_assert(arg >= TCG_REG_V0);
1302        tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
1303                         STVX, arg, base, offset);
1304        break;
1305    default:
1306        g_assert_not_reached();
1307    }
1308}
1309
1310static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1311                               TCGReg base, intptr_t ofs)
1312{
1313    return false;
1314}
1315
1316static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
1317                        int const_arg2, int cr, TCGType type)
1318{
1319    int imm;
1320    uint32_t op;
1321
1322    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1323
1324    /* Simplify the comparisons below wrt CMPI.  */
1325    if (type == TCG_TYPE_I32) {
1326        arg2 = (int32_t)arg2;
1327    }
1328
1329    switch (cond) {
1330    case TCG_COND_EQ:
1331    case TCG_COND_NE:
1332        if (const_arg2) {
1333            if ((int16_t) arg2 == arg2) {
1334                op = CMPI;
1335                imm = 1;
1336                break;
1337            } else if ((uint16_t) arg2 == arg2) {
1338                op = CMPLI;
1339                imm = 1;
1340                break;
1341            }
1342        }
1343        op = CMPL;
1344        imm = 0;
1345        break;
1346
1347    case TCG_COND_LT:
1348    case TCG_COND_GE:
1349    case TCG_COND_LE:
1350    case TCG_COND_GT:
1351        if (const_arg2) {
1352            if ((int16_t) arg2 == arg2) {
1353                op = CMPI;
1354                imm = 1;
1355                break;
1356            }
1357        }
1358        op = CMP;
1359        imm = 0;
1360        break;
1361
1362    case TCG_COND_LTU:
1363    case TCG_COND_GEU:
1364    case TCG_COND_LEU:
1365    case TCG_COND_GTU:
1366        if (const_arg2) {
1367            if ((uint16_t) arg2 == arg2) {
1368                op = CMPLI;
1369                imm = 1;
1370                break;
1371            }
1372        }
1373        op = CMPL;
1374        imm = 0;
1375        break;
1376
1377    default:
1378        tcg_abort();
1379    }
1380    op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
1381
1382    if (imm) {
1383        tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
1384    } else {
1385        if (const_arg2) {
1386            tcg_out_movi(s, type, TCG_REG_R0, arg2);
1387            arg2 = TCG_REG_R0;
1388        }
1389        tcg_out32(s, op | RA(arg1) | RB(arg2));
1390    }
1391}
1392
1393static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
1394                                TCGReg dst, TCGReg src)
1395{
1396    if (type == TCG_TYPE_I32) {
1397        tcg_out32(s, CNTLZW | RS(src) | RA(dst));
1398        tcg_out_shri32(s, dst, dst, 5);
1399    } else {
1400        tcg_out32(s, CNTLZD | RS(src) | RA(dst));
1401        tcg_out_shri64(s, dst, dst, 6);
1402    }
1403}
1404
1405static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src)
1406{
1407    /* X != 0 implies X + -1 generates a carry.  Extra addition
1408       trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.  */
1409    if (dst != src) {
1410        tcg_out32(s, ADDIC | TAI(dst, src, -1));
1411        tcg_out32(s, SUBFE | TAB(dst, dst, src));
1412    } else {
1413        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1414        tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
1415    }
1416}
1417
1418static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
1419                                  bool const_arg2)
1420{
1421    if (const_arg2) {
1422        if ((uint32_t)arg2 == arg2) {
1423            tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
1424        } else {
1425            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
1426            tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
1427        }
1428    } else {
1429        tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
1430    }
1431    return TCG_REG_R0;
1432}
1433
1434static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
1435                            TCGArg arg0, TCGArg arg1, TCGArg arg2,
1436                            int const_arg2)
1437{
1438    int crop, sh;
1439
1440    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1441
1442    /* Ignore high bits of a potential constant arg2.  */
1443    if (type == TCG_TYPE_I32) {
1444        arg2 = (uint32_t)arg2;
1445    }
1446
1447    /* Handle common and trivial cases before handling anything else.  */
1448    if (arg2 == 0) {
1449        switch (cond) {
1450        case TCG_COND_EQ:
1451            tcg_out_setcond_eq0(s, type, arg0, arg1);
1452            return;
1453        case TCG_COND_NE:
1454            if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1455                tcg_out_ext32u(s, TCG_REG_R0, arg1);
1456                arg1 = TCG_REG_R0;
1457            }
1458            tcg_out_setcond_ne0(s, arg0, arg1);
1459            return;
1460        case TCG_COND_GE:
1461            tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
1462            arg1 = arg0;
1463            /* FALLTHRU */
1464        case TCG_COND_LT:
1465            /* Extract the sign bit.  */
1466            if (type == TCG_TYPE_I32) {
1467                tcg_out_shri32(s, arg0, arg1, 31);
1468            } else {
1469                tcg_out_shri64(s, arg0, arg1, 63);
1470            }
1471            return;
1472        default:
1473            break;
1474        }
1475    }
1476
1477    /* If we have ISEL, we can implement everything with 3 or 4 insns.
1478       All other cases below are also at least 3 insns, so speed up the
1479       code generator by not considering them and always using ISEL.  */
1480    if (have_isel) {
1481        int isel, tab;
1482
1483        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1484
1485        isel = tcg_to_isel[cond];
1486
1487        tcg_out_movi(s, type, arg0, 1);
1488        if (isel & 1) {
1489            /* arg0 = (bc ? 0 : 1) */
1490            tab = TAB(arg0, 0, arg0);
1491            isel &= ~1;
1492        } else {
1493            /* arg0 = (bc ? 1 : 0) */
1494            tcg_out_movi(s, type, TCG_REG_R0, 0);
1495            tab = TAB(arg0, arg0, TCG_REG_R0);
1496        }
1497        tcg_out32(s, isel | tab);
1498        return;
1499    }
1500
1501    switch (cond) {
1502    case TCG_COND_EQ:
1503        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1504        tcg_out_setcond_eq0(s, type, arg0, arg1);
1505        return;
1506
1507    case TCG_COND_NE:
1508        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1509        /* Discard the high bits only once, rather than both inputs.  */
1510        if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1511            tcg_out_ext32u(s, TCG_REG_R0, arg1);
1512            arg1 = TCG_REG_R0;
1513        }
1514        tcg_out_setcond_ne0(s, arg0, arg1);
1515        return;
1516
1517    case TCG_COND_GT:
1518    case TCG_COND_GTU:
1519        sh = 30;
1520        crop = 0;
1521        goto crtest;
1522
1523    case TCG_COND_LT:
1524    case TCG_COND_LTU:
1525        sh = 29;
1526        crop = 0;
1527        goto crtest;
1528
1529    case TCG_COND_GE:
1530    case TCG_COND_GEU:
1531        sh = 31;
1532        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT);
1533        goto crtest;
1534
1535    case TCG_COND_LE:
1536    case TCG_COND_LEU:
1537        sh = 31;
1538        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT);
1539    crtest:
1540        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1541        if (crop) {
1542            tcg_out32(s, crop);
1543        }
1544        tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1545        tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
1546        break;
1547
1548    default:
1549        tcg_abort();
1550    }
1551}
1552
1553static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l)
1554{
1555    if (l->has_value) {
1556        bc |= reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
1557    } else {
1558        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
1559    }
1560    tcg_out32(s, bc);
1561}
1562
1563static void tcg_out_brcond(TCGContext *s, TCGCond cond,
1564                           TCGArg arg1, TCGArg arg2, int const_arg2,
1565                           TCGLabel *l, TCGType type)
1566{
1567    tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1568    tcg_out_bc(s, tcg_to_bc[cond], l);
1569}
1570
1571static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
1572                            TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1,
1573                            TCGArg v2, bool const_c2)
1574{
1575    /* If for some reason both inputs are zero, don't produce bad code.  */
1576    if (v1 == 0 && v2 == 0) {
1577        tcg_out_movi(s, type, dest, 0);
1578        return;
1579    }
1580
1581    tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
1582
1583    if (have_isel) {
1584        int isel = tcg_to_isel[cond];
1585
1586        /* Swap the V operands if the operation indicates inversion.  */
1587        if (isel & 1) {
1588            int t = v1;
1589            v1 = v2;
1590            v2 = t;
1591            isel &= ~1;
1592        }
1593        /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */
1594        if (v2 == 0) {
1595            tcg_out_movi(s, type, TCG_REG_R0, 0);
1596        }
1597        tcg_out32(s, isel | TAB(dest, v1, v2));
1598    } else {
1599        if (dest == v2) {
1600            cond = tcg_invert_cond(cond);
1601            v2 = v1;
1602        } else if (dest != v1) {
1603            if (v1 == 0) {
1604                tcg_out_movi(s, type, dest, 0);
1605            } else {
1606                tcg_out_mov(s, type, dest, v1);
1607            }
1608        }
1609        /* Branch forward over one insn */
1610        tcg_out32(s, tcg_to_bc[cond] | 8);
1611        if (v2 == 0) {
1612            tcg_out_movi(s, type, dest, 0);
1613        } else {
1614            tcg_out_mov(s, type, dest, v2);
1615        }
1616    }
1617}
1618
1619static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
1620                          TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
1621{
1622    if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
1623        tcg_out32(s, opc | RA(a0) | RS(a1));
1624    } else {
1625        tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type);
1626        /* Note that the only other valid constant for a2 is 0.  */
1627        if (have_isel) {
1628            tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
1629            tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
1630        } else if (!const_a2 && a0 == a2) {
1631            tcg_out32(s, tcg_to_bc[TCG_COND_EQ] | 8);
1632            tcg_out32(s, opc | RA(a0) | RS(a1));
1633        } else {
1634            tcg_out32(s, opc | RA(a0) | RS(a1));
1635            tcg_out32(s, tcg_to_bc[TCG_COND_NE] | 8);
1636            if (const_a2) {
1637                tcg_out_movi(s, type, a0, 0);
1638            } else {
1639                tcg_out_mov(s, type, a0, a2);
1640            }
1641        }
1642    }
1643}
1644
1645static void tcg_out_cmp2(TCGContext *s, const TCGArg *args,
1646                         const int *const_args)
1647{
1648    static const struct { uint8_t bit1, bit2; } bits[] = {
1649        [TCG_COND_LT ] = { CR_LT, CR_LT },
1650        [TCG_COND_LE ] = { CR_LT, CR_GT },
1651        [TCG_COND_GT ] = { CR_GT, CR_GT },
1652        [TCG_COND_GE ] = { CR_GT, CR_LT },
1653        [TCG_COND_LTU] = { CR_LT, CR_LT },
1654        [TCG_COND_LEU] = { CR_LT, CR_GT },
1655        [TCG_COND_GTU] = { CR_GT, CR_GT },
1656        [TCG_COND_GEU] = { CR_GT, CR_LT },
1657    };
1658
1659    TCGCond cond = args[4], cond2;
1660    TCGArg al, ah, bl, bh;
1661    int blconst, bhconst;
1662    int op, bit1, bit2;
1663
1664    al = args[0];
1665    ah = args[1];
1666    bl = args[2];
1667    bh = args[3];
1668    blconst = const_args[2];
1669    bhconst = const_args[3];
1670
1671    switch (cond) {
1672    case TCG_COND_EQ:
1673        op = CRAND;
1674        goto do_equality;
1675    case TCG_COND_NE:
1676        op = CRNAND;
1677    do_equality:
1678        tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
1679        tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
1680        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
1681        break;
1682
1683    case TCG_COND_LT:
1684    case TCG_COND_LE:
1685    case TCG_COND_GT:
1686    case TCG_COND_GE:
1687    case TCG_COND_LTU:
1688    case TCG_COND_LEU:
1689    case TCG_COND_GTU:
1690    case TCG_COND_GEU:
1691        bit1 = bits[cond].bit1;
1692        bit2 = bits[cond].bit2;
1693        op = (bit1 != bit2 ? CRANDC : CRAND);
1694        cond2 = tcg_unsigned_cond(cond);
1695
1696        tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
1697        tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
1698        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
1699        tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ));
1700        break;
1701
1702    default:
1703        tcg_abort();
1704    }
1705}
1706
1707static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1708                             const int *const_args)
1709{
1710    tcg_out_cmp2(s, args + 1, const_args + 1);
1711    tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1712    tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31);
1713}
1714
1715static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
1716                             const int *const_args)
1717{
1718    tcg_out_cmp2(s, args, const_args);
1719    tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
1720}
1721
1722static void tcg_out_mb(TCGContext *s, TCGArg a0)
1723{
1724    uint32_t insn = HWSYNC;
1725    a0 &= TCG_MO_ALL;
1726    if (a0 == TCG_MO_LD_LD) {
1727        insn = LWSYNC;
1728    } else if (a0 == TCG_MO_ST_ST) {
1729        insn = EIEIO;
1730    }
1731    tcg_out32(s, insn);
1732}
1733
1734void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
1735                              uintptr_t jmp_rw, uintptr_t addr)
1736{
1737    if (TCG_TARGET_REG_BITS == 64) {
1738        tcg_insn_unit i1, i2;
1739        intptr_t tb_diff = addr - tc_ptr;
1740        intptr_t br_diff = addr - (jmp_rx + 4);
1741        uint64_t pair;
1742
1743        /* This does not exercise the range of the branch, but we do
1744           still need to be able to load the new value of TCG_REG_TB.
1745           But this does still happen quite often.  */
1746        if (tb_diff == (int16_t)tb_diff) {
1747            i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff);
1748            i2 = B | (br_diff & 0x3fffffc);
1749        } else {
1750            intptr_t lo = (int16_t)tb_diff;
1751            intptr_t hi = (int32_t)(tb_diff - lo);
1752            assert(tb_diff == hi + lo);
1753            i1 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16);
1754            i2 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo);
1755        }
1756#ifdef HOST_WORDS_BIGENDIAN
1757        pair = (uint64_t)i1 << 32 | i2;
1758#else
1759        pair = (uint64_t)i2 << 32 | i1;
1760#endif
1761
1762        /* As per the enclosing if, this is ppc64.  Avoid the _Static_assert
1763           within qatomic_set that would fail to build a ppc32 host.  */
1764        qatomic_set__nocheck((uint64_t *)jmp_rw, pair);
1765        flush_idcache_range(jmp_rx, jmp_rw, 8);
1766    } else {
1767        intptr_t diff = addr - jmp_rx;
1768        tcg_debug_assert(in_range_b(diff));
1769        qatomic_set((uint32_t *)jmp_rw, B | (diff & 0x3fffffc));
1770        flush_idcache_range(jmp_rx, jmp_rw, 4);
1771    }
1772}
1773
1774static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
1775{
1776#ifdef _CALL_AIX
1777    /* Look through the descriptor.  If the branch is in range, and we
1778       don't have to spend too much effort on building the toc.  */
1779    const void *tgt = ((const void * const *)target)[0];
1780    uintptr_t toc = ((const uintptr_t *)target)[1];
1781    intptr_t diff = tcg_pcrel_diff(s, tgt);
1782
1783    if (in_range_b(diff) && toc == (uint32_t)toc) {
1784        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
1785        tcg_out_b(s, LK, tgt);
1786    } else {
1787        /* Fold the low bits of the constant into the addresses below.  */
1788        intptr_t arg = (intptr_t)target;
1789        int ofs = (int16_t)arg;
1790
1791        if (ofs + 8 < 0x8000) {
1792            arg -= ofs;
1793        } else {
1794            ofs = 0;
1795        }
1796        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
1797        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
1798        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
1799        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
1800        tcg_out32(s, BCCTR | BO_ALWAYS | LK);
1801    }
1802#elif defined(_CALL_ELF) && _CALL_ELF == 2
1803    intptr_t diff;
1804
1805    /* In the ELFv2 ABI, we have to set up r12 to contain the destination
1806       address, which the callee uses to compute its TOC address.  */
1807    /* FIXME: when the branch is in range, we could avoid r12 load if we
1808       knew that the destination uses the same TOC, and what its local
1809       entry point offset is.  */
1810    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
1811
1812    diff = tcg_pcrel_diff(s, target);
1813    if (in_range_b(diff)) {
1814        tcg_out_b(s, LK, target);
1815    } else {
1816        tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
1817        tcg_out32(s, BCCTR | BO_ALWAYS | LK);
1818    }
1819#else
1820    tcg_out_b(s, LK, target);
1821#endif
1822}
1823
1824static const uint32_t qemu_ldx_opc[16] = {
1825    [MO_UB] = LBZX,
1826    [MO_UW] = LHZX,
1827    [MO_UL] = LWZX,
1828    [MO_Q]  = LDX,
1829    [MO_SW] = LHAX,
1830    [MO_SL] = LWAX,
1831    [MO_BSWAP | MO_UB] = LBZX,
1832    [MO_BSWAP | MO_UW] = LHBRX,
1833    [MO_BSWAP | MO_UL] = LWBRX,
1834    [MO_BSWAP | MO_Q]  = LDBRX,
1835};
1836
1837static const uint32_t qemu_stx_opc[16] = {
1838    [MO_UB] = STBX,
1839    [MO_UW] = STHX,
1840    [MO_UL] = STWX,
1841    [MO_Q]  = STDX,
1842    [MO_BSWAP | MO_UB] = STBX,
1843    [MO_BSWAP | MO_UW] = STHBRX,
1844    [MO_BSWAP | MO_UL] = STWBRX,
1845    [MO_BSWAP | MO_Q]  = STDBRX,
1846};
1847
1848static const uint32_t qemu_exts_opc[4] = {
1849    EXTSB, EXTSH, EXTSW, 0
1850};
1851
1852#if defined (CONFIG_SOFTMMU)
1853#include "../tcg-ldst.c.inc"
1854
1855/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
1856 *                                 int mmu_idx, uintptr_t ra)
1857 */
1858static void * const qemu_ld_helpers[16] = {
1859    [MO_UB]   = helper_ret_ldub_mmu,
1860    [MO_LEUW] = helper_le_lduw_mmu,
1861    [MO_LEUL] = helper_le_ldul_mmu,
1862    [MO_LEQ]  = helper_le_ldq_mmu,
1863    [MO_BEUW] = helper_be_lduw_mmu,
1864    [MO_BEUL] = helper_be_ldul_mmu,
1865    [MO_BEQ]  = helper_be_ldq_mmu,
1866};
1867
1868/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
1869 *                                 uintxx_t val, int mmu_idx, uintptr_t ra)
1870 */
1871static void * const qemu_st_helpers[16] = {
1872    [MO_UB]   = helper_ret_stb_mmu,
1873    [MO_LEUW] = helper_le_stw_mmu,
1874    [MO_LEUL] = helper_le_stl_mmu,
1875    [MO_LEQ]  = helper_le_stq_mmu,
1876    [MO_BEUW] = helper_be_stw_mmu,
1877    [MO_BEUL] = helper_be_stl_mmu,
1878    [MO_BEQ]  = helper_be_stq_mmu,
1879};
1880
1881/* We expect to use a 16-bit negative offset from ENV.  */
1882QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1883QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
1884
1885/* Perform the TLB load and compare.  Places the result of the comparison
1886   in CR7, loads the addend of the TLB into R3, and returns the register
1887   containing the guest address (zero-extended into R4).  Clobbers R0 and R2. */
1888
1889static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
1890                               TCGReg addrlo, TCGReg addrhi,
1891                               int mem_index, bool is_read)
1892{
1893    int cmp_off
1894        = (is_read
1895           ? offsetof(CPUTLBEntry, addr_read)
1896           : offsetof(CPUTLBEntry, addr_write));
1897    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
1898    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
1899    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
1900    unsigned s_bits = opc & MO_SIZE;
1901    unsigned a_bits = get_alignment_bits(opc);
1902
1903    /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
1904    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
1905    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
1906
1907    /* Extract the page index, shifted into place for tlb index.  */
1908    if (TCG_TARGET_REG_BITS == 32) {
1909        tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
1910                       TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1911    } else {
1912        tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
1913                       TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1914    }
1915    tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
1916
1917    /* Load the TLB comparator.  */
1918    if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
1919        uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
1920                        ? LWZUX : LDUX);
1921        tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
1922    } else {
1923        tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
1924        if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
1925            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
1926            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
1927        } else {
1928            tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
1929        }
1930    }
1931
1932    /* Load the TLB addend for use on the fast path.  Do this asap
1933       to minimize any load use delay.  */
1934    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3,
1935               offsetof(CPUTLBEntry, addend));
1936
1937    /* Clear the non-page, non-alignment bits from the address */
1938    if (TCG_TARGET_REG_BITS == 32) {
1939        /* We don't support unaligned accesses on 32-bits.
1940         * Preserve the bottom bits and thus trigger a comparison
1941         * failure on unaligned accesses.
1942         */
1943        if (a_bits < s_bits) {
1944            a_bits = s_bits;
1945        }
1946        tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
1947                    (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
1948    } else {
1949        TCGReg t = addrlo;
1950
1951        /* If the access is unaligned, we need to make sure we fail if we
1952         * cross a page boundary.  The trick is to add the access size-1
1953         * to the address before masking the low bits.  That will make the
1954         * address overflow to the next page if we cross a page boundary,
1955         * which will then force a mismatch of the TLB compare.
1956         */
1957        if (a_bits < s_bits) {
1958            unsigned a_mask = (1 << a_bits) - 1;
1959            unsigned s_mask = (1 << s_bits) - 1;
1960            tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
1961            t = TCG_REG_R0;
1962        }
1963
1964        /* Mask the address for the requested alignment.  */
1965        if (TARGET_LONG_BITS == 32) {
1966            tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
1967                        (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
1968            /* Zero-extend the address for use in the final address.  */
1969            tcg_out_ext32u(s, TCG_REG_R4, addrlo);
1970            addrlo = TCG_REG_R4;
1971        } else if (a_bits == 0) {
1972            tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
1973        } else {
1974            tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
1975                        64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
1976            tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
1977        }
1978    }
1979
1980    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
1981        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
1982                    0, 7, TCG_TYPE_I32);
1983        tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
1984        tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
1985    } else {
1986        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
1987                    0, 7, TCG_TYPE_TL);
1988    }
1989
1990    return addrlo;
1991}
1992
1993/* Record the context of a call to the out of line helper code for the slow
1994   path for a load or store, so that we can later generate the correct
1995   helper code.  */
1996static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1997                                TCGReg datalo_reg, TCGReg datahi_reg,
1998                                TCGReg addrlo_reg, TCGReg addrhi_reg,
1999                                tcg_insn_unit *raddr, tcg_insn_unit *lptr)
2000{
2001    TCGLabelQemuLdst *label = new_ldst_label(s);
2002
2003    label->is_ld = is_ld;
2004    label->oi = oi;
2005    label->datalo_reg = datalo_reg;
2006    label->datahi_reg = datahi_reg;
2007    label->addrlo_reg = addrlo_reg;
2008    label->addrhi_reg = addrhi_reg;
2009    label->raddr = tcg_splitwx_to_rx(raddr);
2010    label->label_ptr[0] = lptr;
2011}
2012
2013static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2014{
2015    TCGMemOpIdx oi = lb->oi;
2016    MemOp opc = get_memop(oi);
2017    TCGReg hi, lo, arg = TCG_REG_R3;
2018
2019    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2020        return false;
2021    }
2022
2023    tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
2024
2025    lo = lb->addrlo_reg;
2026    hi = lb->addrhi_reg;
2027    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2028#ifdef TCG_TARGET_CALL_ALIGN_ARGS
2029        arg |= 1;
2030#endif
2031        tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2032        tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2033    } else {
2034        /* If the address needed to be zero-extended, we'll have already
2035           placed it in R4.  The only remaining case is 64-bit guest.  */
2036        tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
2037    }
2038
2039    tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
2040    tcg_out32(s, MFSPR | RT(arg) | LR);
2041
2042    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
2043
2044    lo = lb->datalo_reg;
2045    hi = lb->datahi_reg;
2046    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2047        tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4);
2048        tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3);
2049    } else if (opc & MO_SIGN) {
2050        uint32_t insn = qemu_exts_opc[opc & MO_SIZE];
2051        tcg_out32(s, insn | RA(lo) | RS(TCG_REG_R3));
2052    } else {
2053        tcg_out_mov(s, TCG_TYPE_REG, lo, TCG_REG_R3);
2054    }
2055
2056    tcg_out_b(s, 0, lb->raddr);
2057    return true;
2058}
2059
2060static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2061{
2062    TCGMemOpIdx oi = lb->oi;
2063    MemOp opc = get_memop(oi);
2064    MemOp s_bits = opc & MO_SIZE;
2065    TCGReg hi, lo, arg = TCG_REG_R3;
2066
2067    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2068        return false;
2069    }
2070
2071    tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
2072
2073    lo = lb->addrlo_reg;
2074    hi = lb->addrhi_reg;
2075    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2076#ifdef TCG_TARGET_CALL_ALIGN_ARGS
2077        arg |= 1;
2078#endif
2079        tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2080        tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2081    } else {
2082        /* If the address needed to be zero-extended, we'll have already
2083           placed it in R4.  The only remaining case is 64-bit guest.  */
2084        tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
2085    }
2086
2087    lo = lb->datalo_reg;
2088    hi = lb->datahi_reg;
2089    if (TCG_TARGET_REG_BITS == 32) {
2090        switch (s_bits) {
2091        case MO_64:
2092#ifdef TCG_TARGET_CALL_ALIGN_ARGS
2093            arg |= 1;
2094#endif
2095            tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2096            /* FALLTHRU */
2097        case MO_32:
2098            tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2099            break;
2100        default:
2101            tcg_out_rlw(s, RLWINM, arg++, lo, 0, 32 - (8 << s_bits), 31);
2102            break;
2103        }
2104    } else {
2105        if (s_bits == MO_64) {
2106            tcg_out_mov(s, TCG_TYPE_I64, arg++, lo);
2107        } else {
2108            tcg_out_rld(s, RLDICL, arg++, lo, 0, 64 - (8 << s_bits));
2109        }
2110    }
2111
2112    tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
2113    tcg_out32(s, MFSPR | RT(arg) | LR);
2114
2115    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
2116
2117    tcg_out_b(s, 0, lb->raddr);
2118    return true;
2119}
2120#endif /* SOFTMMU */
2121
2122static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
2123{
2124    TCGReg datalo, datahi, addrlo, rbase;
2125    TCGReg addrhi __attribute__((unused));
2126    TCGMemOpIdx oi;
2127    MemOp opc, s_bits;
2128#ifdef CONFIG_SOFTMMU
2129    int mem_index;
2130    tcg_insn_unit *label_ptr;
2131#endif
2132
2133    datalo = *args++;
2134    datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
2135    addrlo = *args++;
2136    addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
2137    oi = *args++;
2138    opc = get_memop(oi);
2139    s_bits = opc & MO_SIZE;
2140
2141#ifdef CONFIG_SOFTMMU
2142    mem_index = get_mmuidx(oi);
2143    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true);
2144
2145    /* Load a pointer into the current opcode w/conditional branch-link. */
2146    label_ptr = s->code_ptr;
2147    tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
2148
2149    rbase = TCG_REG_R3;
2150#else  /* !CONFIG_SOFTMMU */
2151    rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
2152    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
2153        tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
2154        addrlo = TCG_REG_TMP1;
2155    }
2156#endif
2157
2158    if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
2159        if (opc & MO_BSWAP) {
2160            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2161            tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
2162            tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0));
2163        } else if (rbase != 0) {
2164            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2165            tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo));
2166            tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0));
2167        } else if (addrlo == datahi) {
2168            tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
2169            tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
2170        } else {
2171            tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
2172            tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
2173        }
2174    } else {
2175        uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
2176        if (!have_isa_2_06 && insn == LDBRX) {
2177            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2178            tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
2179            tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0));
2180            tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
2181        } else if (insn) {
2182            tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
2183        } else {
2184            insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
2185            tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
2186            insn = qemu_exts_opc[s_bits];
2187            tcg_out32(s, insn | RA(datalo) | RS(datalo));
2188        }
2189    }
2190
2191#ifdef CONFIG_SOFTMMU
2192    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
2193                        s->code_ptr, label_ptr);
2194#endif
2195}
2196
2197static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
2198{
2199    TCGReg datalo, datahi, addrlo, rbase;
2200    TCGReg addrhi __attribute__((unused));
2201    TCGMemOpIdx oi;
2202    MemOp opc, s_bits;
2203#ifdef CONFIG_SOFTMMU
2204    int mem_index;
2205    tcg_insn_unit *label_ptr;
2206#endif
2207
2208    datalo = *args++;
2209    datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
2210    addrlo = *args++;
2211    addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
2212    oi = *args++;
2213    opc = get_memop(oi);
2214    s_bits = opc & MO_SIZE;
2215
2216#ifdef CONFIG_SOFTMMU
2217    mem_index = get_mmuidx(oi);
2218    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false);
2219
2220    /* Load a pointer into the current opcode w/conditional branch-link. */
2221    label_ptr = s->code_ptr;
2222    tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
2223
2224    rbase = TCG_REG_R3;
2225#else  /* !CONFIG_SOFTMMU */
2226    rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
2227    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
2228        tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
2229        addrlo = TCG_REG_TMP1;
2230    }
2231#endif
2232
2233    if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
2234        if (opc & MO_BSWAP) {
2235            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2236            tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
2237            tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0));
2238        } else if (rbase != 0) {
2239            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2240            tcg_out32(s, STWX | SAB(datahi, rbase, addrlo));
2241            tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0));
2242        } else {
2243            tcg_out32(s, STW | TAI(datahi, addrlo, 0));
2244            tcg_out32(s, STW | TAI(datalo, addrlo, 4));
2245        }
2246    } else {
2247        uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
2248        if (!have_isa_2_06 && insn == STDBRX) {
2249            tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
2250            tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4));
2251            tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
2252            tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_TMP1));
2253        } else {
2254            tcg_out32(s, insn | SAB(datalo, rbase, addrlo));
2255        }
2256    }
2257
2258#ifdef CONFIG_SOFTMMU
2259    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
2260                        s->code_ptr, label_ptr);
2261#endif
2262}
2263
2264static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2265{
2266    int i;
2267    for (i = 0; i < count; ++i) {
2268        p[i] = NOP;
2269    }
2270}
2271
2272/* Parameters for function call generation, used in tcg.c.  */
2273#define TCG_TARGET_STACK_ALIGN       16
2274#define TCG_TARGET_EXTEND_ARGS       1
2275
2276#ifdef _CALL_AIX
2277# define LINK_AREA_SIZE                (6 * SZR)
2278# define LR_OFFSET                     (1 * SZR)
2279# define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
2280#elif defined(TCG_TARGET_CALL_DARWIN)
2281# define LINK_AREA_SIZE                (6 * SZR)
2282# define LR_OFFSET                     (2 * SZR)
2283#elif TCG_TARGET_REG_BITS == 64
2284# if defined(_CALL_ELF) && _CALL_ELF == 2
2285#  define LINK_AREA_SIZE               (4 * SZR)
2286#  define LR_OFFSET                    (1 * SZR)
2287# endif
2288#else /* TCG_TARGET_REG_BITS == 32 */
2289# if defined(_CALL_SYSV)
2290#  define LINK_AREA_SIZE               (2 * SZR)
2291#  define LR_OFFSET                    (1 * SZR)
2292# endif
2293#endif
2294#ifndef LR_OFFSET
2295# error "Unhandled abi"
2296#endif
2297#ifndef TCG_TARGET_CALL_STACK_OFFSET
2298# define TCG_TARGET_CALL_STACK_OFFSET  LINK_AREA_SIZE
2299#endif
2300
2301#define CPU_TEMP_BUF_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2302#define REG_SAVE_SIZE      ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
2303
2304#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET   \
2305                     + TCG_STATIC_CALL_ARGS_SIZE    \
2306                     + CPU_TEMP_BUF_SIZE            \
2307                     + REG_SAVE_SIZE                \
2308                     + TCG_TARGET_STACK_ALIGN - 1)  \
2309                    & -TCG_TARGET_STACK_ALIGN)
2310
2311#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
2312
2313static void tcg_target_qemu_prologue(TCGContext *s)
2314{
2315    int i;
2316
2317#ifdef _CALL_AIX
2318    const void **desc = (const void **)s->code_ptr;
2319    desc[0] = tcg_splitwx_to_rx(desc + 2);  /* entry point */
2320    desc[1] = 0;                            /* environment pointer */
2321    s->code_ptr = (void *)(desc + 2);       /* skip over descriptor */
2322#endif
2323
2324    tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
2325                  CPU_TEMP_BUF_SIZE);
2326
2327    /* Prologue */
2328    tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
2329    tcg_out32(s, (SZR == 8 ? STDU : STWU)
2330              | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
2331
2332    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2333        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2334                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2335    }
2336    tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2337
2338#ifndef CONFIG_SOFTMMU
2339    if (guest_base) {
2340        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
2341        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2342    }
2343#endif
2344
2345    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2346    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
2347    if (USE_REG_TB) {
2348        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]);
2349    }
2350    tcg_out32(s, BCCTR | BO_ALWAYS);
2351
2352    /* Epilogue */
2353    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2354
2355    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2356    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2357        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2358                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2359    }
2360    tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
2361    tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
2362    tcg_out32(s, BCLR | BO_ALWAYS);
2363}
2364
2365static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
2366                       const int *const_args)
2367{
2368    TCGArg a0, a1, a2;
2369    int c;
2370
2371    switch (opc) {
2372    case INDEX_op_exit_tb:
2373        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
2374        tcg_out_b(s, 0, tcg_code_gen_epilogue);
2375        break;
2376    case INDEX_op_goto_tb:
2377        if (s->tb_jmp_insn_offset) {
2378            /* Direct jump. */
2379            if (TCG_TARGET_REG_BITS == 64) {
2380                /* Ensure the next insns are 8-byte aligned. */
2381                if ((uintptr_t)s->code_ptr & 7) {
2382                    tcg_out32(s, NOP);
2383                }
2384                s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
2385                tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0));
2386                tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0));
2387            } else {
2388                s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
2389                tcg_out32(s, B);
2390                s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
2391                break;
2392            }
2393        } else {
2394            /* Indirect jump. */
2395            tcg_debug_assert(s->tb_jmp_insn_offset == NULL);
2396            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TB, 0,
2397                       (intptr_t)(s->tb_jmp_insn_offset + args[0]));
2398        }
2399        tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
2400        tcg_out32(s, BCCTR | BO_ALWAYS);
2401        set_jmp_reset_offset(s, args[0]);
2402        if (USE_REG_TB) {
2403            /* For the unlinked case, need to reset TCG_REG_TB.  */
2404            tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB,
2405                             -tcg_current_code_size(s));
2406        }
2407        break;
2408    case INDEX_op_goto_ptr:
2409        tcg_out32(s, MTSPR | RS(args[0]) | CTR);
2410        if (USE_REG_TB) {
2411            tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]);
2412        }
2413        tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
2414        tcg_out32(s, BCCTR | BO_ALWAYS);
2415        break;
2416    case INDEX_op_br:
2417        {
2418            TCGLabel *l = arg_label(args[0]);
2419            uint32_t insn = B;
2420
2421            if (l->has_value) {
2422                insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr),
2423                                       l->u.value_ptr);
2424            } else {
2425                tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
2426            }
2427            tcg_out32(s, insn);
2428        }
2429        break;
2430    case INDEX_op_ld8u_i32:
2431    case INDEX_op_ld8u_i64:
2432        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2433        break;
2434    case INDEX_op_ld8s_i32:
2435    case INDEX_op_ld8s_i64:
2436        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2437        tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0]));
2438        break;
2439    case INDEX_op_ld16u_i32:
2440    case INDEX_op_ld16u_i64:
2441        tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
2442        break;
2443    case INDEX_op_ld16s_i32:
2444    case INDEX_op_ld16s_i64:
2445        tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
2446        break;
2447    case INDEX_op_ld_i32:
2448    case INDEX_op_ld32u_i64:
2449        tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
2450        break;
2451    case INDEX_op_ld32s_i64:
2452        tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
2453        break;
2454    case INDEX_op_ld_i64:
2455        tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
2456        break;
2457    case INDEX_op_st8_i32:
2458    case INDEX_op_st8_i64:
2459        tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
2460        break;
2461    case INDEX_op_st16_i32:
2462    case INDEX_op_st16_i64:
2463        tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
2464        break;
2465    case INDEX_op_st_i32:
2466    case INDEX_op_st32_i64:
2467        tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
2468        break;
2469    case INDEX_op_st_i64:
2470        tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
2471        break;
2472
2473    case INDEX_op_add_i32:
2474        a0 = args[0], a1 = args[1], a2 = args[2];
2475        if (const_args[2]) {
2476        do_addi_32:
2477            tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
2478        } else {
2479            tcg_out32(s, ADD | TAB(a0, a1, a2));
2480        }
2481        break;
2482    case INDEX_op_sub_i32:
2483        a0 = args[0], a1 = args[1], a2 = args[2];
2484        if (const_args[1]) {
2485            if (const_args[2]) {
2486                tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2);
2487            } else {
2488                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2489            }
2490        } else if (const_args[2]) {
2491            a2 = -a2;
2492            goto do_addi_32;
2493        } else {
2494            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2495        }
2496        break;
2497
2498    case INDEX_op_and_i32:
2499        a0 = args[0], a1 = args[1], a2 = args[2];
2500        if (const_args[2]) {
2501            tcg_out_andi32(s, a0, a1, a2);
2502        } else {
2503            tcg_out32(s, AND | SAB(a1, a0, a2));
2504        }
2505        break;
2506    case INDEX_op_and_i64:
2507        a0 = args[0], a1 = args[1], a2 = args[2];
2508        if (const_args[2]) {
2509            tcg_out_andi64(s, a0, a1, a2);
2510        } else {
2511            tcg_out32(s, AND | SAB(a1, a0, a2));
2512        }
2513        break;
2514    case INDEX_op_or_i64:
2515    case INDEX_op_or_i32:
2516        a0 = args[0], a1 = args[1], a2 = args[2];
2517        if (const_args[2]) {
2518            tcg_out_ori32(s, a0, a1, a2);
2519        } else {
2520            tcg_out32(s, OR | SAB(a1, a0, a2));
2521        }
2522        break;
2523    case INDEX_op_xor_i64:
2524    case INDEX_op_xor_i32:
2525        a0 = args[0], a1 = args[1], a2 = args[2];
2526        if (const_args[2]) {
2527            tcg_out_xori32(s, a0, a1, a2);
2528        } else {
2529            tcg_out32(s, XOR | SAB(a1, a0, a2));
2530        }
2531        break;
2532    case INDEX_op_andc_i32:
2533        a0 = args[0], a1 = args[1], a2 = args[2];
2534        if (const_args[2]) {
2535            tcg_out_andi32(s, a0, a1, ~a2);
2536        } else {
2537            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2538        }
2539        break;
2540    case INDEX_op_andc_i64:
2541        a0 = args[0], a1 = args[1], a2 = args[2];
2542        if (const_args[2]) {
2543            tcg_out_andi64(s, a0, a1, ~a2);
2544        } else {
2545            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2546        }
2547        break;
2548    case INDEX_op_orc_i32:
2549        if (const_args[2]) {
2550            tcg_out_ori32(s, args[0], args[1], ~args[2]);
2551            break;
2552        }
2553        /* FALLTHRU */
2554    case INDEX_op_orc_i64:
2555        tcg_out32(s, ORC | SAB(args[1], args[0], args[2]));
2556        break;
2557    case INDEX_op_eqv_i32:
2558        if (const_args[2]) {
2559            tcg_out_xori32(s, args[0], args[1], ~args[2]);
2560            break;
2561        }
2562        /* FALLTHRU */
2563    case INDEX_op_eqv_i64:
2564        tcg_out32(s, EQV | SAB(args[1], args[0], args[2]));
2565        break;
2566    case INDEX_op_nand_i32:
2567    case INDEX_op_nand_i64:
2568        tcg_out32(s, NAND | SAB(args[1], args[0], args[2]));
2569        break;
2570    case INDEX_op_nor_i32:
2571    case INDEX_op_nor_i64:
2572        tcg_out32(s, NOR | SAB(args[1], args[0], args[2]));
2573        break;
2574
2575    case INDEX_op_clz_i32:
2576        tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1],
2577                      args[2], const_args[2]);
2578        break;
2579    case INDEX_op_ctz_i32:
2580        tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1],
2581                      args[2], const_args[2]);
2582        break;
2583    case INDEX_op_ctpop_i32:
2584        tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0));
2585        break;
2586
2587    case INDEX_op_clz_i64:
2588        tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1],
2589                      args[2], const_args[2]);
2590        break;
2591    case INDEX_op_ctz_i64:
2592        tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1],
2593                      args[2], const_args[2]);
2594        break;
2595    case INDEX_op_ctpop_i64:
2596        tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0));
2597        break;
2598
2599    case INDEX_op_mul_i32:
2600        a0 = args[0], a1 = args[1], a2 = args[2];
2601        if (const_args[2]) {
2602            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2603        } else {
2604            tcg_out32(s, MULLW | TAB(a0, a1, a2));
2605        }
2606        break;
2607
2608    case INDEX_op_div_i32:
2609        tcg_out32(s, DIVW | TAB(args[0], args[1], args[2]));
2610        break;
2611
2612    case INDEX_op_divu_i32:
2613        tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2]));
2614        break;
2615
2616    case INDEX_op_shl_i32:
2617        if (const_args[2]) {
2618            /* Limit immediate shift count lest we create an illegal insn.  */
2619            tcg_out_shli32(s, args[0], args[1], args[2] & 31);
2620        } else {
2621            tcg_out32(s, SLW | SAB(args[1], args[0], args[2]));
2622        }
2623        break;
2624    case INDEX_op_shr_i32:
2625        if (const_args[2]) {
2626            /* Limit immediate shift count lest we create an illegal insn.  */
2627            tcg_out_shri32(s, args[0], args[1], args[2] & 31);
2628        } else {
2629            tcg_out32(s, SRW | SAB(args[1], args[0], args[2]));
2630        }
2631        break;
2632    case INDEX_op_sar_i32:
2633        if (const_args[2]) {
2634            /* Limit immediate shift count lest we create an illegal insn.  */
2635            tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2] & 31));
2636        } else {
2637            tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
2638        }
2639        break;
2640    case INDEX_op_rotl_i32:
2641        if (const_args[2]) {
2642            tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31);
2643        } else {
2644            tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
2645                         | MB(0) | ME(31));
2646        }
2647        break;
2648    case INDEX_op_rotr_i32:
2649        if (const_args[2]) {
2650            tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
2651        } else {
2652            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32));
2653            tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0)
2654                         | MB(0) | ME(31));
2655        }
2656        break;
2657
2658    case INDEX_op_brcond_i32:
2659        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2660                       arg_label(args[3]), TCG_TYPE_I32);
2661        break;
2662    case INDEX_op_brcond_i64:
2663        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2664                       arg_label(args[3]), TCG_TYPE_I64);
2665        break;
2666    case INDEX_op_brcond2_i32:
2667        tcg_out_brcond2(s, args, const_args);
2668        break;
2669
2670    case INDEX_op_neg_i32:
2671    case INDEX_op_neg_i64:
2672        tcg_out32(s, NEG | RT(args[0]) | RA(args[1]));
2673        break;
2674
2675    case INDEX_op_not_i32:
2676    case INDEX_op_not_i64:
2677        tcg_out32(s, NOR | SAB(args[1], args[0], args[1]));
2678        break;
2679
2680    case INDEX_op_add_i64:
2681        a0 = args[0], a1 = args[1], a2 = args[2];
2682        if (const_args[2]) {
2683        do_addi_64:
2684            tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
2685        } else {
2686            tcg_out32(s, ADD | TAB(a0, a1, a2));
2687        }
2688        break;
2689    case INDEX_op_sub_i64:
2690        a0 = args[0], a1 = args[1], a2 = args[2];
2691        if (const_args[1]) {
2692            if (const_args[2]) {
2693                tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2);
2694            } else {
2695                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2696            }
2697        } else if (const_args[2]) {
2698            a2 = -a2;
2699            goto do_addi_64;
2700        } else {
2701            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2702        }
2703        break;
2704
2705    case INDEX_op_shl_i64:
2706        if (const_args[2]) {
2707            /* Limit immediate shift count lest we create an illegal insn.  */
2708            tcg_out_shli64(s, args[0], args[1], args[2] & 63);
2709        } else {
2710            tcg_out32(s, SLD | SAB(args[1], args[0], args[2]));
2711        }
2712        break;
2713    case INDEX_op_shr_i64:
2714        if (const_args[2]) {
2715            /* Limit immediate shift count lest we create an illegal insn.  */
2716            tcg_out_shri64(s, args[0], args[1], args[2] & 63);
2717        } else {
2718            tcg_out32(s, SRD | SAB(args[1], args[0], args[2]));
2719        }
2720        break;
2721    case INDEX_op_sar_i64:
2722        if (const_args[2]) {
2723            int sh = SH(args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1);
2724            tcg_out32(s, SRADI | RA(args[0]) | RS(args[1]) | sh);
2725        } else {
2726            tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
2727        }
2728        break;
2729    case INDEX_op_rotl_i64:
2730        if (const_args[2]) {
2731            tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0);
2732        } else {
2733            tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0));
2734        }
2735        break;
2736    case INDEX_op_rotr_i64:
2737        if (const_args[2]) {
2738            tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0);
2739        } else {
2740            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64));
2741            tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0));
2742        }
2743        break;
2744
2745    case INDEX_op_mul_i64:
2746        a0 = args[0], a1 = args[1], a2 = args[2];
2747        if (const_args[2]) {
2748            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2749        } else {
2750            tcg_out32(s, MULLD | TAB(a0, a1, a2));
2751        }
2752        break;
2753    case INDEX_op_div_i64:
2754        tcg_out32(s, DIVD | TAB(args[0], args[1], args[2]));
2755        break;
2756    case INDEX_op_divu_i64:
2757        tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
2758        break;
2759
2760    case INDEX_op_qemu_ld_i32:
2761        tcg_out_qemu_ld(s, args, false);
2762        break;
2763    case INDEX_op_qemu_ld_i64:
2764        tcg_out_qemu_ld(s, args, true);
2765        break;
2766    case INDEX_op_qemu_st_i32:
2767        tcg_out_qemu_st(s, args, false);
2768        break;
2769    case INDEX_op_qemu_st_i64:
2770        tcg_out_qemu_st(s, args, true);
2771        break;
2772
2773    case INDEX_op_ext8s_i32:
2774    case INDEX_op_ext8s_i64:
2775        c = EXTSB;
2776        goto gen_ext;
2777    case INDEX_op_ext16s_i32:
2778    case INDEX_op_ext16s_i64:
2779        c = EXTSH;
2780        goto gen_ext;
2781    case INDEX_op_ext_i32_i64:
2782    case INDEX_op_ext32s_i64:
2783        c = EXTSW;
2784        goto gen_ext;
2785    gen_ext:
2786        tcg_out32(s, c | RS(args[1]) | RA(args[0]));
2787        break;
2788    case INDEX_op_extu_i32_i64:
2789        tcg_out_ext32u(s, args[0], args[1]);
2790        break;
2791
2792    case INDEX_op_setcond_i32:
2793        tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
2794                        const_args[2]);
2795        break;
2796    case INDEX_op_setcond_i64:
2797        tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
2798                        const_args[2]);
2799        break;
2800    case INDEX_op_setcond2_i32:
2801        tcg_out_setcond2(s, args, const_args);
2802        break;
2803
2804    case INDEX_op_bswap16_i32:
2805    case INDEX_op_bswap16_i64:
2806        a0 = args[0], a1 = args[1];
2807        /* a1 = abcd */
2808        if (a0 != a1) {
2809            /* a0 = (a1 r<< 24) & 0xff # 000c */
2810            tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
2811            /* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */
2812            tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23);
2813        } else {
2814            /* r0 = (a1 r<< 8) & 0xff00 # 00d0 */
2815            tcg_out_rlw(s, RLWINM, TCG_REG_R0, a1, 8, 16, 23);
2816            /* a0 = (a1 r<< 24) & 0xff # 000c */
2817            tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
2818            /* a0 = a0 | r0 # 00dc */
2819            tcg_out32(s, OR | SAB(TCG_REG_R0, a0, a0));
2820        }
2821        break;
2822
2823    case INDEX_op_bswap32_i32:
2824    case INDEX_op_bswap32_i64:
2825        /* Stolen from gcc's builtin_bswap32 */
2826        a1 = args[1];
2827        a0 = args[0] == a1 ? TCG_REG_R0 : args[0];
2828
2829        /* a1 = args[1] # abcd */
2830        /* a0 = rotate_left (a1, 8) # bcda */
2831        tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31);
2832        /* a0 = (a0 & ~0xff000000) | ((a1 r<< 24) & 0xff000000) # dcda */
2833        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7);
2834        /* a0 = (a0 & ~0x0000ff00) | ((a1 r<< 24) & 0x0000ff00) # dcba */
2835        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23);
2836
2837        if (a0 == TCG_REG_R0) {
2838            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2839        }
2840        break;
2841
2842    case INDEX_op_bswap64_i64:
2843        a0 = args[0], a1 = args[1], a2 = TCG_REG_R0;
2844        if (a0 == a1) {
2845            a0 = TCG_REG_R0;
2846            a2 = a1;
2847        }
2848
2849        /* a1 = # abcd efgh */
2850        /* a0 = rl32(a1, 8) # 0000 fghe */
2851        tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31);
2852        /* a0 = dep(a0, rl32(a1, 24), 0xff000000) # 0000 hghe */
2853        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7);
2854        /* a0 = dep(a0, rl32(a1, 24), 0x0000ff00) # 0000 hgfe */
2855        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23);
2856
2857        /* a0 = rl64(a0, 32) # hgfe 0000 */
2858        /* a2 = rl64(a1, 32) # efgh abcd */
2859        tcg_out_rld(s, RLDICL, a0, a0, 32, 0);
2860        tcg_out_rld(s, RLDICL, a2, a1, 32, 0);
2861
2862        /* a0 = dep(a0, rl32(a2, 8), 0xffffffff)  # hgfe bcda */
2863        tcg_out_rlw(s, RLWIMI, a0, a2, 8, 0, 31);
2864        /* a0 = dep(a0, rl32(a2, 24), 0xff000000) # hgfe dcda */
2865        tcg_out_rlw(s, RLWIMI, a0, a2, 24, 0, 7);
2866        /* a0 = dep(a0, rl32(a2, 24), 0x0000ff00) # hgfe dcba */
2867        tcg_out_rlw(s, RLWIMI, a0, a2, 24, 16, 23);
2868
2869        if (a0 == 0) {
2870            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2871        }
2872        break;
2873
2874    case INDEX_op_deposit_i32:
2875        if (const_args[2]) {
2876            uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
2877            tcg_out_andi32(s, args[0], args[0], ~mask);
2878        } else {
2879            tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
2880                        32 - args[3] - args[4], 31 - args[3]);
2881        }
2882        break;
2883    case INDEX_op_deposit_i64:
2884        if (const_args[2]) {
2885            uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
2886            tcg_out_andi64(s, args[0], args[0], ~mask);
2887        } else {
2888            tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
2889                        64 - args[3] - args[4]);
2890        }
2891        break;
2892
2893    case INDEX_op_extract_i32:
2894        tcg_out_rlw(s, RLWINM, args[0], args[1],
2895                    32 - args[2], 32 - args[3], 31);
2896        break;
2897    case INDEX_op_extract_i64:
2898        tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
2899        break;
2900
2901    case INDEX_op_movcond_i32:
2902        tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
2903                        args[3], args[4], const_args[2]);
2904        break;
2905    case INDEX_op_movcond_i64:
2906        tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2],
2907                        args[3], args[4], const_args[2]);
2908        break;
2909
2910#if TCG_TARGET_REG_BITS == 64
2911    case INDEX_op_add2_i64:
2912#else
2913    case INDEX_op_add2_i32:
2914#endif
2915        /* Note that the CA bit is defined based on the word size of the
2916           environment.  So in 64-bit mode it's always carry-out of bit 63.
2917           The fallback code using deposit works just as well for 32-bit.  */
2918        a0 = args[0], a1 = args[1];
2919        if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
2920            a0 = TCG_REG_R0;
2921        }
2922        if (const_args[4]) {
2923            tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
2924        } else {
2925            tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
2926        }
2927        if (const_args[5]) {
2928            tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
2929        } else {
2930            tcg_out32(s, ADDE | TAB(a1, args[3], args[5]));
2931        }
2932        if (a0 != args[0]) {
2933            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2934        }
2935        break;
2936
2937#if TCG_TARGET_REG_BITS == 64
2938    case INDEX_op_sub2_i64:
2939#else
2940    case INDEX_op_sub2_i32:
2941#endif
2942        a0 = args[0], a1 = args[1];
2943        if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
2944            a0 = TCG_REG_R0;
2945        }
2946        if (const_args[2]) {
2947            tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
2948        } else {
2949            tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
2950        }
2951        if (const_args[3]) {
2952            tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
2953        } else {
2954            tcg_out32(s, SUBFE | TAB(a1, args[5], args[3]));
2955        }
2956        if (a0 != args[0]) {
2957            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2958        }
2959        break;
2960
2961    case INDEX_op_muluh_i32:
2962        tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2]));
2963        break;
2964    case INDEX_op_mulsh_i32:
2965        tcg_out32(s, MULHW | TAB(args[0], args[1], args[2]));
2966        break;
2967    case INDEX_op_muluh_i64:
2968        tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2]));
2969        break;
2970    case INDEX_op_mulsh_i64:
2971        tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
2972        break;
2973
2974    case INDEX_op_mb:
2975        tcg_out_mb(s, args[0]);
2976        break;
2977
2978    case INDEX_op_mov_i32:   /* Always emitted via tcg_out_mov.  */
2979    case INDEX_op_mov_i64:
2980    case INDEX_op_call:      /* Always emitted via tcg_out_call.  */
2981    default:
2982        tcg_abort();
2983    }
2984}
2985
2986int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2987{
2988    switch (opc) {
2989    case INDEX_op_and_vec:
2990    case INDEX_op_or_vec:
2991    case INDEX_op_xor_vec:
2992    case INDEX_op_andc_vec:
2993    case INDEX_op_not_vec:
2994        return 1;
2995    case INDEX_op_orc_vec:
2996        return have_isa_2_07;
2997    case INDEX_op_add_vec:
2998    case INDEX_op_sub_vec:
2999    case INDEX_op_smax_vec:
3000    case INDEX_op_smin_vec:
3001    case INDEX_op_umax_vec:
3002    case INDEX_op_umin_vec:
3003    case INDEX_op_shlv_vec:
3004    case INDEX_op_shrv_vec:
3005    case INDEX_op_sarv_vec:
3006    case INDEX_op_rotlv_vec:
3007        return vece <= MO_32 || have_isa_2_07;
3008    case INDEX_op_ssadd_vec:
3009    case INDEX_op_sssub_vec:
3010    case INDEX_op_usadd_vec:
3011    case INDEX_op_ussub_vec:
3012        return vece <= MO_32;
3013    case INDEX_op_cmp_vec:
3014    case INDEX_op_shli_vec:
3015    case INDEX_op_shri_vec:
3016    case INDEX_op_sari_vec:
3017    case INDEX_op_rotli_vec:
3018        return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
3019    case INDEX_op_neg_vec:
3020        return vece >= MO_32 && have_isa_3_00;
3021    case INDEX_op_mul_vec:
3022        switch (vece) {
3023        case MO_8:
3024        case MO_16:
3025            return -1;
3026        case MO_32:
3027            return have_isa_2_07 ? 1 : -1;
3028        case MO_64:
3029            return have_isa_3_10;
3030        }
3031        return 0;
3032    case INDEX_op_bitsel_vec:
3033        return have_vsx;
3034    case INDEX_op_rotrv_vec:
3035        return -1;
3036    default:
3037        return 0;
3038    }
3039}
3040
3041static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
3042                            TCGReg dst, TCGReg src)
3043{
3044    tcg_debug_assert(dst >= TCG_REG_V0);
3045
3046    /* Splat from integer reg allowed via constraints for v3.00.  */
3047    if (src < TCG_REG_V0) {
3048        tcg_debug_assert(have_isa_3_00);
3049        switch (vece) {
3050        case MO_64:
3051            tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
3052            return true;
3053        case MO_32:
3054            tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
3055            return true;
3056        default:
3057            /* Fail, so that we fall back on either dupm or mov+dup.  */
3058            return false;
3059        }
3060    }
3061
3062    /*
3063     * Recall we use (or emulate) VSX integer loads, so the integer is
3064     * right justified within the left (zero-index) double-word.
3065     */
3066    switch (vece) {
3067    case MO_8:
3068        tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
3069        break;
3070    case MO_16:
3071        tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
3072        break;
3073    case MO_32:
3074        tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
3075        break;
3076    case MO_64:
3077        if (have_vsx) {
3078            tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
3079            break;
3080        }
3081        tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
3082        tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
3083        break;
3084    default:
3085        g_assert_not_reached();
3086    }
3087    return true;
3088}
3089
3090static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
3091                             TCGReg out, TCGReg base, intptr_t offset)
3092{
3093    int elt;
3094
3095    tcg_debug_assert(out >= TCG_REG_V0);
3096    switch (vece) {
3097    case MO_8:
3098        if (have_isa_3_00) {
3099            tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
3100        } else {
3101            tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
3102        }
3103        elt = extract32(offset, 0, 4);
3104#ifndef HOST_WORDS_BIGENDIAN
3105        elt ^= 15;
3106#endif
3107        tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
3108        break;
3109    case MO_16:
3110        tcg_debug_assert((offset & 1) == 0);
3111        if (have_isa_3_00) {
3112            tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
3113        } else {
3114            tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
3115        }
3116        elt = extract32(offset, 1, 3);
3117#ifndef HOST_WORDS_BIGENDIAN
3118        elt ^= 7;
3119#endif
3120        tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
3121        break;
3122    case MO_32:
3123        if (have_isa_3_00) {
3124            tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
3125            break;
3126        }
3127        tcg_debug_assert((offset & 3) == 0);
3128        tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
3129        elt = extract32(offset, 2, 2);
3130#ifndef HOST_WORDS_BIGENDIAN
3131        elt ^= 3;
3132#endif
3133        tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
3134        break;
3135    case MO_64:
3136        if (have_vsx) {
3137            tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
3138            break;
3139        }
3140        tcg_debug_assert((offset & 7) == 0);
3141        tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
3142        tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
3143        elt = extract32(offset, 3, 1);
3144#ifndef HOST_WORDS_BIGENDIAN
3145        elt = !elt;
3146#endif
3147        if (elt) {
3148            tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
3149        } else {
3150            tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
3151        }
3152        break;
3153    default:
3154        g_assert_not_reached();
3155    }
3156    return true;
3157}
3158
3159static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
3160                           unsigned vecl, unsigned vece,
3161                           const TCGArg *args, const int *const_args)
3162{
3163    static const uint32_t
3164        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
3165        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
3166        mul_op[4] = { 0, 0, VMULUWM, VMULLD },
3167        neg_op[4] = { 0, 0, VNEGW, VNEGD },
3168        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
3169        ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
3170        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
3171        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
3172        ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
3173        usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
3174        sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
3175        ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
3176        umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
3177        smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
3178        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
3179        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
3180        shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
3181        shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
3182        sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
3183        mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
3184        mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
3185        muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
3186        mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
3187        pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
3188        rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
3189
3190    TCGType type = vecl + TCG_TYPE_V64;
3191    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
3192    uint32_t insn;
3193
3194    switch (opc) {
3195    case INDEX_op_ld_vec:
3196        tcg_out_ld(s, type, a0, a1, a2);
3197        return;
3198    case INDEX_op_st_vec:
3199        tcg_out_st(s, type, a0, a1, a2);
3200        return;
3201    case INDEX_op_dupm_vec:
3202        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
3203        return;
3204
3205    case INDEX_op_add_vec:
3206        insn = add_op[vece];
3207        break;
3208    case INDEX_op_sub_vec:
3209        insn = sub_op[vece];
3210        break;
3211    case INDEX_op_neg_vec:
3212        insn = neg_op[vece];
3213        a2 = a1;
3214        a1 = 0;
3215        break;
3216    case INDEX_op_mul_vec:
3217        insn = mul_op[vece];
3218        break;
3219    case INDEX_op_ssadd_vec:
3220        insn = ssadd_op[vece];
3221        break;
3222    case INDEX_op_sssub_vec:
3223        insn = sssub_op[vece];
3224        break;
3225    case INDEX_op_usadd_vec:
3226        insn = usadd_op[vece];
3227        break;
3228    case INDEX_op_ussub_vec:
3229        insn = ussub_op[vece];
3230        break;
3231    case INDEX_op_smin_vec:
3232        insn = smin_op[vece];
3233        break;
3234    case INDEX_op_umin_vec:
3235        insn = umin_op[vece];
3236        break;
3237    case INDEX_op_smax_vec:
3238        insn = smax_op[vece];
3239        break;
3240    case INDEX_op_umax_vec:
3241        insn = umax_op[vece];
3242        break;
3243    case INDEX_op_shlv_vec:
3244        insn = shlv_op[vece];
3245        break;
3246    case INDEX_op_shrv_vec:
3247        insn = shrv_op[vece];
3248        break;
3249    case INDEX_op_sarv_vec:
3250        insn = sarv_op[vece];
3251        break;
3252    case INDEX_op_and_vec:
3253        insn = VAND;
3254        break;
3255    case INDEX_op_or_vec:
3256        insn = VOR;
3257        break;
3258    case INDEX_op_xor_vec:
3259        insn = VXOR;
3260        break;
3261    case INDEX_op_andc_vec:
3262        insn = VANDC;
3263        break;
3264    case INDEX_op_not_vec:
3265        insn = VNOR;
3266        a2 = a1;
3267        break;
3268    case INDEX_op_orc_vec:
3269        insn = VORC;
3270        break;
3271
3272    case INDEX_op_cmp_vec:
3273        switch (args[3]) {
3274        case TCG_COND_EQ:
3275            insn = eq_op[vece];
3276            break;
3277        case TCG_COND_NE:
3278            insn = ne_op[vece];
3279            break;
3280        case TCG_COND_GT:
3281            insn = gts_op[vece];
3282            break;
3283        case TCG_COND_GTU:
3284            insn = gtu_op[vece];
3285            break;
3286        default:
3287            g_assert_not_reached();
3288        }
3289        break;
3290
3291    case INDEX_op_bitsel_vec:
3292        tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
3293        return;
3294
3295    case INDEX_op_dup2_vec:
3296        assert(TCG_TARGET_REG_BITS == 32);
3297        /* With inputs a1 = xLxx, a2 = xHxx  */
3298        tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1));  /* a0  = xxHL */
3299        tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8);          /* tmp = HLxx */
3300        tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8);          /* a0  = HLHL */
3301        return;
3302
3303    case INDEX_op_ppc_mrgh_vec:
3304        insn = mrgh_op[vece];
3305        break;
3306    case INDEX_op_ppc_mrgl_vec:
3307        insn = mrgl_op[vece];
3308        break;
3309    case INDEX_op_ppc_muleu_vec:
3310        insn = muleu_op[vece];
3311        break;
3312    case INDEX_op_ppc_mulou_vec:
3313        insn = mulou_op[vece];
3314        break;
3315    case INDEX_op_ppc_pkum_vec:
3316        insn = pkum_op[vece];
3317        break;
3318    case INDEX_op_rotlv_vec:
3319        insn = rotl_op[vece];
3320        break;
3321    case INDEX_op_ppc_msum_vec:
3322        tcg_debug_assert(vece == MO_16);
3323        tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
3324        return;
3325
3326    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
3327    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
3328    default:
3329        g_assert_not_reached();
3330    }
3331
3332    tcg_debug_assert(insn != 0);
3333    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
3334}
3335
3336static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
3337                           TCGv_vec v1, TCGArg imm, TCGOpcode opci)
3338{
3339    TCGv_vec t1;
3340
3341    if (vece == MO_32) {
3342        /*
3343         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
3344         * So using negative numbers gets us the 4th bit easily.
3345         */
3346        imm = sextract32(imm, 0, 5);
3347    } else {
3348        imm &= (8 << vece) - 1;
3349    }
3350
3351    /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */
3352    t1 = tcg_constant_vec(type, MO_8, imm);
3353    vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
3354              tcgv_vec_arg(v1), tcgv_vec_arg(t1));
3355}
3356
3357static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
3358                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
3359{
3360    bool need_swap = false, need_inv = false;
3361
3362    tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
3363
3364    switch (cond) {
3365    case TCG_COND_EQ:
3366    case TCG_COND_GT:
3367    case TCG_COND_GTU:
3368        break;
3369    case TCG_COND_NE:
3370        if (have_isa_3_00 && vece <= MO_32) {
3371            break;
3372        }
3373        /* fall through */
3374    case TCG_COND_LE:
3375    case TCG_COND_LEU:
3376        need_inv = true;
3377        break;
3378    case TCG_COND_LT:
3379    case TCG_COND_LTU:
3380        need_swap = true;
3381        break;
3382    case TCG_COND_GE:
3383    case TCG_COND_GEU:
3384        need_swap = need_inv = true;
3385        break;
3386    default:
3387        g_assert_not_reached();
3388    }
3389
3390    if (need_inv) {
3391        cond = tcg_invert_cond(cond);
3392    }
3393    if (need_swap) {
3394        TCGv_vec t1;
3395        t1 = v1, v1 = v2, v2 = t1;
3396        cond = tcg_swap_cond(cond);
3397    }
3398
3399    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
3400              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
3401
3402    if (need_inv) {
3403        tcg_gen_not_vec(vece, v0, v0);
3404    }
3405}
3406
3407static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
3408                           TCGv_vec v1, TCGv_vec v2)
3409{
3410    TCGv_vec t1 = tcg_temp_new_vec(type);
3411    TCGv_vec t2 = tcg_temp_new_vec(type);
3412    TCGv_vec c0, c16;
3413
3414    switch (vece) {
3415    case MO_8:
3416    case MO_16:
3417        vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
3418                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3419        vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
3420                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3421        vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
3422                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3423        vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
3424                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3425        vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
3426                  tcgv_vec_arg(v0), tcgv_vec_arg(t1));
3427	break;
3428
3429    case MO_32:
3430        tcg_debug_assert(!have_isa_2_07);
3431        /*
3432         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
3433         * So using -16 is a quick way to represent 16.
3434         */
3435        c16 = tcg_constant_vec(type, MO_8, -16);
3436        c0 = tcg_constant_vec(type, MO_8, 0);
3437
3438        vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
3439                  tcgv_vec_arg(v2), tcgv_vec_arg(c16));
3440        vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
3441                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3442        vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1),
3443                  tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0));
3444        vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1),
3445                  tcgv_vec_arg(t1), tcgv_vec_arg(c16));
3446        tcg_gen_add_vec(MO_32, v0, t1, t2);
3447        break;
3448
3449    default:
3450        g_assert_not_reached();
3451    }
3452    tcg_temp_free_vec(t1);
3453    tcg_temp_free_vec(t2);
3454}
3455
3456void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3457                       TCGArg a0, ...)
3458{
3459    va_list va;
3460    TCGv_vec v0, v1, v2, t0;
3461    TCGArg a2;
3462
3463    va_start(va, a0);
3464    v0 = temp_tcgv_vec(arg_temp(a0));
3465    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3466    a2 = va_arg(va, TCGArg);
3467
3468    switch (opc) {
3469    case INDEX_op_shli_vec:
3470        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
3471        break;
3472    case INDEX_op_shri_vec:
3473        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
3474        break;
3475    case INDEX_op_sari_vec:
3476        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
3477        break;
3478    case INDEX_op_rotli_vec:
3479        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
3480        break;
3481    case INDEX_op_cmp_vec:
3482        v2 = temp_tcgv_vec(arg_temp(a2));
3483        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3484        break;
3485    case INDEX_op_mul_vec:
3486        v2 = temp_tcgv_vec(arg_temp(a2));
3487        expand_vec_mul(type, vece, v0, v1, v2);
3488        break;
3489    case INDEX_op_rotlv_vec:
3490        v2 = temp_tcgv_vec(arg_temp(a2));
3491        t0 = tcg_temp_new_vec(type);
3492        tcg_gen_neg_vec(vece, t0, v2);
3493        tcg_gen_rotlv_vec(vece, v0, v1, t0);
3494        tcg_temp_free_vec(t0);
3495        break;
3496    default:
3497        g_assert_not_reached();
3498    }
3499    va_end(va);
3500}
3501
3502static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
3503{
3504    static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
3505    static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
3506    static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
3507    static const TCGTargetOpDef S_S = { .args_ct_str = { "S", "S" } };
3508    static const TCGTargetOpDef r_ri = { .args_ct_str = { "r", "ri" } };
3509    static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
3510    static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } };
3511    static const TCGTargetOpDef L_L_L = { .args_ct_str = { "L", "L", "L" } };
3512    static const TCGTargetOpDef S_S_S = { .args_ct_str = { "S", "S", "S" } };
3513    static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
3514    static const TCGTargetOpDef r_r_rI = { .args_ct_str = { "r", "r", "rI" } };
3515    static const TCGTargetOpDef r_r_rT = { .args_ct_str = { "r", "r", "rT" } };
3516    static const TCGTargetOpDef r_r_rU = { .args_ct_str = { "r", "r", "rU" } };
3517    static const TCGTargetOpDef r_rI_ri
3518        = { .args_ct_str = { "r", "rI", "ri" } };
3519    static const TCGTargetOpDef r_rI_rT
3520        = { .args_ct_str = { "r", "rI", "rT" } };
3521    static const TCGTargetOpDef r_r_rZW
3522        = { .args_ct_str = { "r", "r", "rZW" } };
3523    static const TCGTargetOpDef L_L_L_L
3524        = { .args_ct_str = { "L", "L", "L", "L" } };
3525    static const TCGTargetOpDef S_S_S_S
3526        = { .args_ct_str = { "S", "S", "S", "S" } };
3527    static const TCGTargetOpDef movc
3528        = { .args_ct_str = { "r", "r", "ri", "rZ", "rZ" } };
3529    static const TCGTargetOpDef dep
3530        = { .args_ct_str = { "r", "0", "rZ" } };
3531    static const TCGTargetOpDef br2
3532        = { .args_ct_str = { "r", "r", "ri", "ri" } };
3533    static const TCGTargetOpDef setc2
3534        = { .args_ct_str = { "r", "r", "r", "ri", "ri" } };
3535    static const TCGTargetOpDef add2
3536        = { .args_ct_str = { "r", "r", "r", "r", "rI", "rZM" } };
3537    static const TCGTargetOpDef sub2
3538        = { .args_ct_str = { "r", "r", "rI", "rZM", "r", "r" } };
3539    static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } };
3540    static const TCGTargetOpDef v_vr = { .args_ct_str = { "v", "vr" } };
3541    static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } };
3542    static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } };
3543    static const TCGTargetOpDef v_v_v_v
3544        = { .args_ct_str = { "v", "v", "v", "v" } };
3545
3546    switch (op) {
3547    case INDEX_op_goto_ptr:
3548        return &r;
3549
3550    case INDEX_op_ld8u_i32:
3551    case INDEX_op_ld8s_i32:
3552    case INDEX_op_ld16u_i32:
3553    case INDEX_op_ld16s_i32:
3554    case INDEX_op_ld_i32:
3555    case INDEX_op_st8_i32:
3556    case INDEX_op_st16_i32:
3557    case INDEX_op_st_i32:
3558    case INDEX_op_ctpop_i32:
3559    case INDEX_op_neg_i32:
3560    case INDEX_op_not_i32:
3561    case INDEX_op_ext8s_i32:
3562    case INDEX_op_ext16s_i32:
3563    case INDEX_op_bswap16_i32:
3564    case INDEX_op_bswap32_i32:
3565    case INDEX_op_extract_i32:
3566    case INDEX_op_ld8u_i64:
3567    case INDEX_op_ld8s_i64:
3568    case INDEX_op_ld16u_i64:
3569    case INDEX_op_ld16s_i64:
3570    case INDEX_op_ld32u_i64:
3571    case INDEX_op_ld32s_i64:
3572    case INDEX_op_ld_i64:
3573    case INDEX_op_st8_i64:
3574    case INDEX_op_st16_i64:
3575    case INDEX_op_st32_i64:
3576    case INDEX_op_st_i64:
3577    case INDEX_op_ctpop_i64:
3578    case INDEX_op_neg_i64:
3579    case INDEX_op_not_i64:
3580    case INDEX_op_ext8s_i64:
3581    case INDEX_op_ext16s_i64:
3582    case INDEX_op_ext32s_i64:
3583    case INDEX_op_ext_i32_i64:
3584    case INDEX_op_extu_i32_i64:
3585    case INDEX_op_bswap16_i64:
3586    case INDEX_op_bswap32_i64:
3587    case INDEX_op_bswap64_i64:
3588    case INDEX_op_extract_i64:
3589        return &r_r;
3590
3591    case INDEX_op_add_i32:
3592    case INDEX_op_and_i32:
3593    case INDEX_op_or_i32:
3594    case INDEX_op_xor_i32:
3595    case INDEX_op_andc_i32:
3596    case INDEX_op_orc_i32:
3597    case INDEX_op_eqv_i32:
3598    case INDEX_op_shl_i32:
3599    case INDEX_op_shr_i32:
3600    case INDEX_op_sar_i32:
3601    case INDEX_op_rotl_i32:
3602    case INDEX_op_rotr_i32:
3603    case INDEX_op_setcond_i32:
3604    case INDEX_op_and_i64:
3605    case INDEX_op_andc_i64:
3606    case INDEX_op_shl_i64:
3607    case INDEX_op_shr_i64:
3608    case INDEX_op_sar_i64:
3609    case INDEX_op_rotl_i64:
3610    case INDEX_op_rotr_i64:
3611    case INDEX_op_setcond_i64:
3612        return &r_r_ri;
3613    case INDEX_op_mul_i32:
3614    case INDEX_op_mul_i64:
3615        return &r_r_rI;
3616    case INDEX_op_div_i32:
3617    case INDEX_op_divu_i32:
3618    case INDEX_op_nand_i32:
3619    case INDEX_op_nor_i32:
3620    case INDEX_op_muluh_i32:
3621    case INDEX_op_mulsh_i32:
3622    case INDEX_op_orc_i64:
3623    case INDEX_op_eqv_i64:
3624    case INDEX_op_nand_i64:
3625    case INDEX_op_nor_i64:
3626    case INDEX_op_div_i64:
3627    case INDEX_op_divu_i64:
3628    case INDEX_op_mulsh_i64:
3629    case INDEX_op_muluh_i64:
3630        return &r_r_r;
3631    case INDEX_op_sub_i32:
3632        return &r_rI_ri;
3633    case INDEX_op_add_i64:
3634        return &r_r_rT;
3635    case INDEX_op_or_i64:
3636    case INDEX_op_xor_i64:
3637        return &r_r_rU;
3638    case INDEX_op_sub_i64:
3639        return &r_rI_rT;
3640    case INDEX_op_clz_i32:
3641    case INDEX_op_ctz_i32:
3642    case INDEX_op_clz_i64:
3643    case INDEX_op_ctz_i64:
3644        return &r_r_rZW;
3645
3646    case INDEX_op_brcond_i32:
3647    case INDEX_op_brcond_i64:
3648        return &r_ri;
3649
3650    case INDEX_op_movcond_i32:
3651    case INDEX_op_movcond_i64:
3652        return &movc;
3653    case INDEX_op_deposit_i32:
3654    case INDEX_op_deposit_i64:
3655        return &dep;
3656    case INDEX_op_brcond2_i32:
3657        return &br2;
3658    case INDEX_op_setcond2_i32:
3659        return &setc2;
3660    case INDEX_op_add2_i64:
3661    case INDEX_op_add2_i32:
3662        return &add2;
3663    case INDEX_op_sub2_i64:
3664    case INDEX_op_sub2_i32:
3665        return &sub2;
3666
3667    case INDEX_op_qemu_ld_i32:
3668        return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
3669                ? &r_L : &r_L_L);
3670    case INDEX_op_qemu_st_i32:
3671        return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
3672                ? &S_S : &S_S_S);
3673    case INDEX_op_qemu_ld_i64:
3674        return (TCG_TARGET_REG_BITS == 64 ? &r_L
3675                : TARGET_LONG_BITS == 32 ? &L_L_L : &L_L_L_L);
3676    case INDEX_op_qemu_st_i64:
3677        return (TCG_TARGET_REG_BITS == 64 ? &S_S
3678                : TARGET_LONG_BITS == 32 ? &S_S_S : &S_S_S_S);
3679
3680    case INDEX_op_add_vec:
3681    case INDEX_op_sub_vec:
3682    case INDEX_op_mul_vec:
3683    case INDEX_op_and_vec:
3684    case INDEX_op_or_vec:
3685    case INDEX_op_xor_vec:
3686    case INDEX_op_andc_vec:
3687    case INDEX_op_orc_vec:
3688    case INDEX_op_cmp_vec:
3689    case INDEX_op_ssadd_vec:
3690    case INDEX_op_sssub_vec:
3691    case INDEX_op_usadd_vec:
3692    case INDEX_op_ussub_vec:
3693    case INDEX_op_smax_vec:
3694    case INDEX_op_smin_vec:
3695    case INDEX_op_umax_vec:
3696    case INDEX_op_umin_vec:
3697    case INDEX_op_shlv_vec:
3698    case INDEX_op_shrv_vec:
3699    case INDEX_op_sarv_vec:
3700    case INDEX_op_rotlv_vec:
3701    case INDEX_op_rotrv_vec:
3702    case INDEX_op_ppc_mrgh_vec:
3703    case INDEX_op_ppc_mrgl_vec:
3704    case INDEX_op_ppc_muleu_vec:
3705    case INDEX_op_ppc_mulou_vec:
3706    case INDEX_op_ppc_pkum_vec:
3707    case INDEX_op_dup2_vec:
3708        return &v_v_v;
3709    case INDEX_op_not_vec:
3710    case INDEX_op_neg_vec:
3711        return &v_v;
3712    case INDEX_op_dup_vec:
3713        return have_isa_3_00 ? &v_vr : &v_v;
3714    case INDEX_op_ld_vec:
3715    case INDEX_op_st_vec:
3716    case INDEX_op_dupm_vec:
3717        return &v_r;
3718    case INDEX_op_bitsel_vec:
3719    case INDEX_op_ppc_msum_vec:
3720        return &v_v_v_v;
3721
3722    default:
3723        return NULL;
3724    }
3725}
3726
3727static void tcg_target_init(TCGContext *s)
3728{
3729    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
3730    unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2);
3731
3732    have_isa = tcg_isa_base;
3733    if (hwcap & PPC_FEATURE_ARCH_2_06) {
3734        have_isa = tcg_isa_2_06;
3735    }
3736#ifdef PPC_FEATURE2_ARCH_2_07
3737    if (hwcap2 & PPC_FEATURE2_ARCH_2_07) {
3738        have_isa = tcg_isa_2_07;
3739    }
3740#endif
3741#ifdef PPC_FEATURE2_ARCH_3_00
3742    if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
3743        have_isa = tcg_isa_3_00;
3744    }
3745#endif
3746#ifdef PPC_FEATURE2_ARCH_3_10
3747    if (hwcap2 & PPC_FEATURE2_ARCH_3_10) {
3748        have_isa = tcg_isa_3_10;
3749    }
3750#endif
3751
3752#ifdef PPC_FEATURE2_HAS_ISEL
3753    /* Prefer explicit instruction from the kernel. */
3754    have_isel = (hwcap2 & PPC_FEATURE2_HAS_ISEL) != 0;
3755#else
3756    /* Fall back to knowing Power7 (2.06) has ISEL. */
3757    have_isel = have_isa_2_06;
3758#endif
3759
3760    if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
3761        have_altivec = true;
3762        /* We only care about the portion of VSX that overlaps Altivec. */
3763        if (hwcap & PPC_FEATURE_HAS_VSX) {
3764            have_vsx = true;
3765        }
3766    }
3767
3768    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
3769    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
3770    if (have_altivec) {
3771        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3772        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3773    }
3774
3775    tcg_target_call_clobber_regs = 0;
3776    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3777    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3778    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3779    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3780    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3781    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3782    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7);
3783    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
3784    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
3785    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
3786    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
3787    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
3788
3789    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3790    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3791    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3792    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3793    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3794    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3795    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3796    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3797    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3798    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3799    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3800    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3801    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3802    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3803    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3804    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3805    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3806    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3807    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3808    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3809
3810    s->reserved_regs = 0;
3811    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
3812    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
3813#if defined(_CALL_SYSV)
3814    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
3815#endif
3816#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
3817    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
3818#endif
3819    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
3820    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
3821    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
3822    if (USE_REG_TB) {
3823        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);  /* tb->tc_ptr */
3824    }
3825}
3826
3827#ifdef __ELF__
3828typedef struct {
3829    DebugFrameCIE cie;
3830    DebugFrameFDEHeader fde;
3831    uint8_t fde_def_cfa[4];
3832    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
3833} DebugFrame;
3834
3835/* We're expecting a 2 byte uleb128 encoded value.  */
3836QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3837
3838#if TCG_TARGET_REG_BITS == 64
3839# define ELF_HOST_MACHINE EM_PPC64
3840#else
3841# define ELF_HOST_MACHINE EM_PPC
3842#endif
3843
3844static DebugFrame debug_frame = {
3845    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3846    .cie.id = -1,
3847    .cie.version = 1,
3848    .cie.code_align = 1,
3849    .cie.data_align = (-SZR & 0x7f),         /* sleb128 -SZR */
3850    .cie.return_column = 65,
3851
3852    /* Total FDE size does not include the "len" member.  */
3853    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
3854
3855    .fde_def_cfa = {
3856        12, TCG_REG_R1,                 /* DW_CFA_def_cfa r1, ... */
3857        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3858        (FRAME_SIZE >> 7)
3859    },
3860    .fde_reg_ofs = {
3861        /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
3862        0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
3863    }
3864};
3865
3866void tcg_register_jit(const void *buf, size_t buf_size)
3867{
3868    uint8_t *p = &debug_frame.fde_reg_ofs[3];
3869    int i;
3870
3871    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
3872        p[0] = 0x80 + tcg_target_callee_save_regs[i];
3873        p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
3874    }
3875
3876    debug_frame.fde.func_start = (uintptr_t)buf;
3877    debug_frame.fde.func_len = buf_size;
3878
3879    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3880}
3881#endif /* __ELF__ */
3882