xref: /openbmc/qemu/tcg/ppc/tcg-target.c.inc (revision 587adaca)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "elf.h"
26#include "../tcg-pool.c.inc"
27
28#if defined _CALL_DARWIN || defined __APPLE__
29#define TCG_TARGET_CALL_DARWIN
30#endif
31#ifdef _CALL_SYSV
32# define TCG_TARGET_CALL_ALIGN_ARGS   1
33#endif
34
35/* For some memory operations, we need a scratch that isn't R0.  For the AIX
36   calling convention, we can re-use the TOC register since we'll be reloading
37   it at every call.  Otherwise R12 will do nicely as neither a call-saved
38   register nor a parameter register.  */
39#ifdef _CALL_AIX
40# define TCG_REG_TMP1   TCG_REG_R2
41#else
42# define TCG_REG_TMP1   TCG_REG_R12
43#endif
44
45#define TCG_VEC_TMP1    TCG_REG_V0
46#define TCG_VEC_TMP2    TCG_REG_V1
47
48#define TCG_REG_TB     TCG_REG_R31
49#define USE_REG_TB     (TCG_TARGET_REG_BITS == 64)
50
51/* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */
52#define SZP  ((int)sizeof(void *))
53
54/* Shorthand for size of a register.  */
55#define SZR  (TCG_TARGET_REG_BITS / 8)
56
57#define TCG_CT_CONST_S16  0x100
58#define TCG_CT_CONST_U16  0x200
59#define TCG_CT_CONST_S32  0x400
60#define TCG_CT_CONST_U32  0x800
61#define TCG_CT_CONST_ZERO 0x1000
62#define TCG_CT_CONST_MONE 0x2000
63#define TCG_CT_CONST_WSZ  0x4000
64
65#define ALL_GENERAL_REGS  0xffffffffu
66#define ALL_VECTOR_REGS   0xffffffff00000000ull
67
68#ifdef CONFIG_SOFTMMU
69#define ALL_QLOAD_REGS \
70    (ALL_GENERAL_REGS & \
71     ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | (1 << TCG_REG_R5)))
72#define ALL_QSTORE_REGS \
73    (ALL_GENERAL_REGS & ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | \
74                          (1 << TCG_REG_R5) | (1 << TCG_REG_R6)))
75#else
76#define ALL_QLOAD_REGS  (ALL_GENERAL_REGS & ~(1 << TCG_REG_R3))
77#define ALL_QSTORE_REGS ALL_QLOAD_REGS
78#endif
79
80TCGPowerISA have_isa;
81static bool have_isel;
82bool have_altivec;
83bool have_vsx;
84
85#ifndef CONFIG_SOFTMMU
86#define TCG_GUEST_BASE_REG 30
87#endif
88
89#ifdef CONFIG_DEBUG_TCG
90static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
91    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
92    "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
93    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
94    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
95    "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
96    "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
97    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
98    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
99};
100#endif
101
102static const int tcg_target_reg_alloc_order[] = {
103    TCG_REG_R14,  /* call saved registers */
104    TCG_REG_R15,
105    TCG_REG_R16,
106    TCG_REG_R17,
107    TCG_REG_R18,
108    TCG_REG_R19,
109    TCG_REG_R20,
110    TCG_REG_R21,
111    TCG_REG_R22,
112    TCG_REG_R23,
113    TCG_REG_R24,
114    TCG_REG_R25,
115    TCG_REG_R26,
116    TCG_REG_R27,
117    TCG_REG_R28,
118    TCG_REG_R29,
119    TCG_REG_R30,
120    TCG_REG_R31,
121    TCG_REG_R12,  /* call clobbered, non-arguments */
122    TCG_REG_R11,
123    TCG_REG_R2,
124    TCG_REG_R13,
125    TCG_REG_R10,  /* call clobbered, arguments */
126    TCG_REG_R9,
127    TCG_REG_R8,
128    TCG_REG_R7,
129    TCG_REG_R6,
130    TCG_REG_R5,
131    TCG_REG_R4,
132    TCG_REG_R3,
133
134    /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
135    TCG_REG_V2,   /* call clobbered, vectors */
136    TCG_REG_V3,
137    TCG_REG_V4,
138    TCG_REG_V5,
139    TCG_REG_V6,
140    TCG_REG_V7,
141    TCG_REG_V8,
142    TCG_REG_V9,
143    TCG_REG_V10,
144    TCG_REG_V11,
145    TCG_REG_V12,
146    TCG_REG_V13,
147    TCG_REG_V14,
148    TCG_REG_V15,
149    TCG_REG_V16,
150    TCG_REG_V17,
151    TCG_REG_V18,
152    TCG_REG_V19,
153};
154
155static const int tcg_target_call_iarg_regs[] = {
156    TCG_REG_R3,
157    TCG_REG_R4,
158    TCG_REG_R5,
159    TCG_REG_R6,
160    TCG_REG_R7,
161    TCG_REG_R8,
162    TCG_REG_R9,
163    TCG_REG_R10
164};
165
166static const int tcg_target_call_oarg_regs[] = {
167    TCG_REG_R3,
168    TCG_REG_R4
169};
170
171static const int tcg_target_callee_save_regs[] = {
172#ifdef TCG_TARGET_CALL_DARWIN
173    TCG_REG_R11,
174#endif
175    TCG_REG_R14,
176    TCG_REG_R15,
177    TCG_REG_R16,
178    TCG_REG_R17,
179    TCG_REG_R18,
180    TCG_REG_R19,
181    TCG_REG_R20,
182    TCG_REG_R21,
183    TCG_REG_R22,
184    TCG_REG_R23,
185    TCG_REG_R24,
186    TCG_REG_R25,
187    TCG_REG_R26,
188    TCG_REG_R27, /* currently used for the global env */
189    TCG_REG_R28,
190    TCG_REG_R29,
191    TCG_REG_R30,
192    TCG_REG_R31
193};
194
195static inline bool in_range_b(tcg_target_long target)
196{
197    return target == sextract64(target, 0, 26);
198}
199
200static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
201			       const tcg_insn_unit *target)
202{
203    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
204    tcg_debug_assert(in_range_b(disp));
205    return disp & 0x3fffffc;
206}
207
208static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
209{
210    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
211    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
212
213    if (in_range_b(disp)) {
214        *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc);
215        return true;
216    }
217    return false;
218}
219
220static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
221			       const tcg_insn_unit *target)
222{
223    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
224    tcg_debug_assert(disp == (int16_t) disp);
225    return disp & 0xfffc;
226}
227
228static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
229{
230    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
231    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
232
233    if (disp == (int16_t) disp) {
234        *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc);
235        return true;
236    }
237    return false;
238}
239
240/* test if a constant matches the constraint */
241static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
242{
243    if (ct & TCG_CT_CONST) {
244        return 1;
245    }
246
247    /* The only 32-bit constraint we use aside from
248       TCG_CT_CONST is TCG_CT_CONST_S16.  */
249    if (type == TCG_TYPE_I32) {
250        val = (int32_t)val;
251    }
252
253    if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
254        return 1;
255    } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) {
256        return 1;
257    } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
258        return 1;
259    } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
260        return 1;
261    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
262        return 1;
263    } else if ((ct & TCG_CT_CONST_MONE) && val == -1) {
264        return 1;
265    } else if ((ct & TCG_CT_CONST_WSZ)
266               && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
267        return 1;
268    }
269    return 0;
270}
271
272#define OPCD(opc) ((opc)<<26)
273#define XO19(opc) (OPCD(19)|((opc)<<1))
274#define MD30(opc) (OPCD(30)|((opc)<<2))
275#define MDS30(opc) (OPCD(30)|((opc)<<1))
276#define XO31(opc) (OPCD(31)|((opc)<<1))
277#define XO58(opc) (OPCD(58)|(opc))
278#define XO62(opc) (OPCD(62)|(opc))
279#define VX4(opc)  (OPCD(4)|(opc))
280
281#define B      OPCD( 18)
282#define BC     OPCD( 16)
283#define LBZ    OPCD( 34)
284#define LHZ    OPCD( 40)
285#define LHA    OPCD( 42)
286#define LWZ    OPCD( 32)
287#define LWZUX  XO31( 55)
288#define STB    OPCD( 38)
289#define STH    OPCD( 44)
290#define STW    OPCD( 36)
291
292#define STD    XO62(  0)
293#define STDU   XO62(  1)
294#define STDX   XO31(149)
295
296#define LD     XO58(  0)
297#define LDX    XO31( 21)
298#define LDU    XO58(  1)
299#define LDUX   XO31( 53)
300#define LWA    XO58(  2)
301#define LWAX   XO31(341)
302
303#define ADDIC  OPCD( 12)
304#define ADDI   OPCD( 14)
305#define ADDIS  OPCD( 15)
306#define ORI    OPCD( 24)
307#define ORIS   OPCD( 25)
308#define XORI   OPCD( 26)
309#define XORIS  OPCD( 27)
310#define ANDI   OPCD( 28)
311#define ANDIS  OPCD( 29)
312#define MULLI  OPCD(  7)
313#define CMPLI  OPCD( 10)
314#define CMPI   OPCD( 11)
315#define SUBFIC OPCD( 8)
316
317#define LWZU   OPCD( 33)
318#define STWU   OPCD( 37)
319
320#define RLWIMI OPCD( 20)
321#define RLWINM OPCD( 21)
322#define RLWNM  OPCD( 23)
323
324#define RLDICL MD30(  0)
325#define RLDICR MD30(  1)
326#define RLDIMI MD30(  3)
327#define RLDCL  MDS30( 8)
328
329#define BCLR   XO19( 16)
330#define BCCTR  XO19(528)
331#define CRAND  XO19(257)
332#define CRANDC XO19(129)
333#define CRNAND XO19(225)
334#define CROR   XO19(449)
335#define CRNOR  XO19( 33)
336
337#define EXTSB  XO31(954)
338#define EXTSH  XO31(922)
339#define EXTSW  XO31(986)
340#define ADD    XO31(266)
341#define ADDE   XO31(138)
342#define ADDME  XO31(234)
343#define ADDZE  XO31(202)
344#define ADDC   XO31( 10)
345#define AND    XO31( 28)
346#define SUBF   XO31( 40)
347#define SUBFC  XO31(  8)
348#define SUBFE  XO31(136)
349#define SUBFME XO31(232)
350#define SUBFZE XO31(200)
351#define OR     XO31(444)
352#define XOR    XO31(316)
353#define MULLW  XO31(235)
354#define MULHW  XO31( 75)
355#define MULHWU XO31( 11)
356#define DIVW   XO31(491)
357#define DIVWU  XO31(459)
358#define CMP    XO31(  0)
359#define CMPL   XO31( 32)
360#define LHBRX  XO31(790)
361#define LWBRX  XO31(534)
362#define LDBRX  XO31(532)
363#define STHBRX XO31(918)
364#define STWBRX XO31(662)
365#define STDBRX XO31(660)
366#define MFSPR  XO31(339)
367#define MTSPR  XO31(467)
368#define SRAWI  XO31(824)
369#define NEG    XO31(104)
370#define MFCR   XO31( 19)
371#define MFOCRF (MFCR | (1u << 20))
372#define NOR    XO31(124)
373#define CNTLZW XO31( 26)
374#define CNTLZD XO31( 58)
375#define CNTTZW XO31(538)
376#define CNTTZD XO31(570)
377#define CNTPOPW XO31(378)
378#define CNTPOPD XO31(506)
379#define ANDC   XO31( 60)
380#define ORC    XO31(412)
381#define EQV    XO31(284)
382#define NAND   XO31(476)
383#define ISEL   XO31( 15)
384
385#define MULLD  XO31(233)
386#define MULHD  XO31( 73)
387#define MULHDU XO31(  9)
388#define DIVD   XO31(489)
389#define DIVDU  XO31(457)
390
391#define LBZX   XO31( 87)
392#define LHZX   XO31(279)
393#define LHAX   XO31(343)
394#define LWZX   XO31( 23)
395#define STBX   XO31(215)
396#define STHX   XO31(407)
397#define STWX   XO31(151)
398
399#define EIEIO  XO31(854)
400#define HWSYNC XO31(598)
401#define LWSYNC (HWSYNC | (1u << 21))
402
403#define SPR(a, b) ((((a)<<5)|(b))<<11)
404#define LR     SPR(8, 0)
405#define CTR    SPR(9, 0)
406
407#define SLW    XO31( 24)
408#define SRW    XO31(536)
409#define SRAW   XO31(792)
410
411#define SLD    XO31( 27)
412#define SRD    XO31(539)
413#define SRAD   XO31(794)
414#define SRADI  XO31(413<<1)
415
416#define TW     XO31( 4)
417#define TRAP   (TW | TO(31))
418
419#define NOP    ORI  /* ori 0,0,0 */
420
421#define LVX        XO31(103)
422#define LVEBX      XO31(7)
423#define LVEHX      XO31(39)
424#define LVEWX      XO31(71)
425#define LXSDX      (XO31(588) | 1)  /* v2.06, force tx=1 */
426#define LXVDSX     (XO31(332) | 1)  /* v2.06, force tx=1 */
427#define LXSIWZX    (XO31(12) | 1)   /* v2.07, force tx=1 */
428#define LXV        (OPCD(61) | 8 | 1)  /* v3.00, force tx=1 */
429#define LXSD       (OPCD(57) | 2)   /* v3.00 */
430#define LXVWSX     (XO31(364) | 1)  /* v3.00, force tx=1 */
431
432#define STVX       XO31(231)
433#define STVEWX     XO31(199)
434#define STXSDX     (XO31(716) | 1)  /* v2.06, force sx=1 */
435#define STXSIWX    (XO31(140) | 1)  /* v2.07, force sx=1 */
436#define STXV       (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
437#define STXSD      (OPCD(61) | 2)   /* v3.00 */
438
439#define VADDSBS    VX4(768)
440#define VADDUBS    VX4(512)
441#define VADDUBM    VX4(0)
442#define VADDSHS    VX4(832)
443#define VADDUHS    VX4(576)
444#define VADDUHM    VX4(64)
445#define VADDSWS    VX4(896)
446#define VADDUWS    VX4(640)
447#define VADDUWM    VX4(128)
448#define VADDUDM    VX4(192)       /* v2.07 */
449
450#define VSUBSBS    VX4(1792)
451#define VSUBUBS    VX4(1536)
452#define VSUBUBM    VX4(1024)
453#define VSUBSHS    VX4(1856)
454#define VSUBUHS    VX4(1600)
455#define VSUBUHM    VX4(1088)
456#define VSUBSWS    VX4(1920)
457#define VSUBUWS    VX4(1664)
458#define VSUBUWM    VX4(1152)
459#define VSUBUDM    VX4(1216)      /* v2.07 */
460
461#define VNEGW      (VX4(1538) | (6 << 16))  /* v3.00 */
462#define VNEGD      (VX4(1538) | (7 << 16))  /* v3.00 */
463
464#define VMAXSB     VX4(258)
465#define VMAXSH     VX4(322)
466#define VMAXSW     VX4(386)
467#define VMAXSD     VX4(450)       /* v2.07 */
468#define VMAXUB     VX4(2)
469#define VMAXUH     VX4(66)
470#define VMAXUW     VX4(130)
471#define VMAXUD     VX4(194)       /* v2.07 */
472#define VMINSB     VX4(770)
473#define VMINSH     VX4(834)
474#define VMINSW     VX4(898)
475#define VMINSD     VX4(962)       /* v2.07 */
476#define VMINUB     VX4(514)
477#define VMINUH     VX4(578)
478#define VMINUW     VX4(642)
479#define VMINUD     VX4(706)       /* v2.07 */
480
481#define VCMPEQUB   VX4(6)
482#define VCMPEQUH   VX4(70)
483#define VCMPEQUW   VX4(134)
484#define VCMPEQUD   VX4(199)       /* v2.07 */
485#define VCMPGTSB   VX4(774)
486#define VCMPGTSH   VX4(838)
487#define VCMPGTSW   VX4(902)
488#define VCMPGTSD   VX4(967)       /* v2.07 */
489#define VCMPGTUB   VX4(518)
490#define VCMPGTUH   VX4(582)
491#define VCMPGTUW   VX4(646)
492#define VCMPGTUD   VX4(711)       /* v2.07 */
493#define VCMPNEB    VX4(7)         /* v3.00 */
494#define VCMPNEH    VX4(71)        /* v3.00 */
495#define VCMPNEW    VX4(135)       /* v3.00 */
496
497#define VSLB       VX4(260)
498#define VSLH       VX4(324)
499#define VSLW       VX4(388)
500#define VSLD       VX4(1476)      /* v2.07 */
501#define VSRB       VX4(516)
502#define VSRH       VX4(580)
503#define VSRW       VX4(644)
504#define VSRD       VX4(1732)      /* v2.07 */
505#define VSRAB      VX4(772)
506#define VSRAH      VX4(836)
507#define VSRAW      VX4(900)
508#define VSRAD      VX4(964)       /* v2.07 */
509#define VRLB       VX4(4)
510#define VRLH       VX4(68)
511#define VRLW       VX4(132)
512#define VRLD       VX4(196)       /* v2.07 */
513
514#define VMULEUB    VX4(520)
515#define VMULEUH    VX4(584)
516#define VMULEUW    VX4(648)       /* v2.07 */
517#define VMULOUB    VX4(8)
518#define VMULOUH    VX4(72)
519#define VMULOUW    VX4(136)       /* v2.07 */
520#define VMULUWM    VX4(137)       /* v2.07 */
521#define VMULLD     VX4(457)       /* v3.10 */
522#define VMSUMUHM   VX4(38)
523
524#define VMRGHB     VX4(12)
525#define VMRGHH     VX4(76)
526#define VMRGHW     VX4(140)
527#define VMRGLB     VX4(268)
528#define VMRGLH     VX4(332)
529#define VMRGLW     VX4(396)
530
531#define VPKUHUM    VX4(14)
532#define VPKUWUM    VX4(78)
533
534#define VAND       VX4(1028)
535#define VANDC      VX4(1092)
536#define VNOR       VX4(1284)
537#define VOR        VX4(1156)
538#define VXOR       VX4(1220)
539#define VEQV       VX4(1668)      /* v2.07 */
540#define VNAND      VX4(1412)      /* v2.07 */
541#define VORC       VX4(1348)      /* v2.07 */
542
543#define VSPLTB     VX4(524)
544#define VSPLTH     VX4(588)
545#define VSPLTW     VX4(652)
546#define VSPLTISB   VX4(780)
547#define VSPLTISH   VX4(844)
548#define VSPLTISW   VX4(908)
549
550#define VSLDOI     VX4(44)
551
552#define XXPERMDI   (OPCD(60) | (10 << 3) | 7)  /* v2.06, force ax=bx=tx=1 */
553#define XXSEL      (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
554#define XXSPLTIB   (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
555
556#define MFVSRD     (XO31(51) | 1)   /* v2.07, force sx=1 */
557#define MFVSRWZ    (XO31(115) | 1)  /* v2.07, force sx=1 */
558#define MTVSRD     (XO31(179) | 1)  /* v2.07, force tx=1 */
559#define MTVSRWZ    (XO31(243) | 1)  /* v2.07, force tx=1 */
560#define MTVSRDD    (XO31(435) | 1)  /* v3.00, force tx=1 */
561#define MTVSRWS    (XO31(403) | 1)  /* v3.00, force tx=1 */
562
563#define RT(r) ((r)<<21)
564#define RS(r) ((r)<<21)
565#define RA(r) ((r)<<16)
566#define RB(r) ((r)<<11)
567#define TO(t) ((t)<<21)
568#define SH(s) ((s)<<11)
569#define MB(b) ((b)<<6)
570#define ME(e) ((e)<<1)
571#define BO(o) ((o)<<21)
572#define MB64(b) ((b)<<5)
573#define FXM(b) (1 << (19 - (b)))
574
575#define VRT(r)  (((r) & 31) << 21)
576#define VRA(r)  (((r) & 31) << 16)
577#define VRB(r)  (((r) & 31) << 11)
578#define VRC(r)  (((r) & 31) <<  6)
579
580#define LK    1
581
582#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
583#define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
584#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
585#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
586
587#define BF(n)    ((n)<<23)
588#define BI(n, c) (((c)+((n)*4))<<16)
589#define BT(n, c) (((c)+((n)*4))<<21)
590#define BA(n, c) (((c)+((n)*4))<<16)
591#define BB(n, c) (((c)+((n)*4))<<11)
592#define BC_(n, c) (((c)+((n)*4))<<6)
593
594#define BO_COND_TRUE  BO(12)
595#define BO_COND_FALSE BO( 4)
596#define BO_ALWAYS     BO(20)
597
598enum {
599    CR_LT,
600    CR_GT,
601    CR_EQ,
602    CR_SO
603};
604
605static const uint32_t tcg_to_bc[] = {
606    [TCG_COND_EQ]  = BC | BI(7, CR_EQ) | BO_COND_TRUE,
607    [TCG_COND_NE]  = BC | BI(7, CR_EQ) | BO_COND_FALSE,
608    [TCG_COND_LT]  = BC | BI(7, CR_LT) | BO_COND_TRUE,
609    [TCG_COND_GE]  = BC | BI(7, CR_LT) | BO_COND_FALSE,
610    [TCG_COND_LE]  = BC | BI(7, CR_GT) | BO_COND_FALSE,
611    [TCG_COND_GT]  = BC | BI(7, CR_GT) | BO_COND_TRUE,
612    [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE,
613    [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE,
614    [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE,
615    [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE,
616};
617
618/* The low bit here is set if the RA and RB fields must be inverted.  */
619static const uint32_t tcg_to_isel[] = {
620    [TCG_COND_EQ]  = ISEL | BC_(7, CR_EQ),
621    [TCG_COND_NE]  = ISEL | BC_(7, CR_EQ) | 1,
622    [TCG_COND_LT]  = ISEL | BC_(7, CR_LT),
623    [TCG_COND_GE]  = ISEL | BC_(7, CR_LT) | 1,
624    [TCG_COND_LE]  = ISEL | BC_(7, CR_GT) | 1,
625    [TCG_COND_GT]  = ISEL | BC_(7, CR_GT),
626    [TCG_COND_LTU] = ISEL | BC_(7, CR_LT),
627    [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1,
628    [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1,
629    [TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
630};
631
632static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
633                        intptr_t value, intptr_t addend)
634{
635    const tcg_insn_unit *target;
636    int16_t lo;
637    int32_t hi;
638
639    value += addend;
640    target = (const tcg_insn_unit *)value;
641
642    switch (type) {
643    case R_PPC_REL14:
644        return reloc_pc14(code_ptr, target);
645    case R_PPC_REL24:
646        return reloc_pc24(code_ptr, target);
647    case R_PPC_ADDR16:
648        /*
649         * We are (slightly) abusing this relocation type.  In particular,
650         * assert that the low 2 bits are zero, and do not modify them.
651         * That way we can use this with LD et al that have opcode bits
652         * in the low 2 bits of the insn.
653         */
654        if ((value & 3) || value != (int16_t)value) {
655            return false;
656        }
657        *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
658        break;
659    case R_PPC_ADDR32:
660        /*
661         * We are abusing this relocation type.  Again, this points to
662         * a pair of insns, lis + load.  This is an absolute address
663         * relocation for PPC32 so the lis cannot be removed.
664         */
665        lo = value;
666        hi = value - lo;
667        if (hi + lo != value) {
668            return false;
669        }
670        code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
671        code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
672        break;
673    default:
674        g_assert_not_reached();
675    }
676    return true;
677}
678
679static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
680                             TCGReg base, tcg_target_long offset);
681
682static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
683{
684    if (ret == arg) {
685        return true;
686    }
687    switch (type) {
688    case TCG_TYPE_I64:
689        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
690        /* fallthru */
691    case TCG_TYPE_I32:
692        if (ret < TCG_REG_V0) {
693            if (arg < TCG_REG_V0) {
694                tcg_out32(s, OR | SAB(arg, ret, arg));
695                break;
696            } else if (have_isa_2_07) {
697                tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
698                          | VRT(arg) | RA(ret));
699                break;
700            } else {
701                /* Altivec does not support vector->integer moves.  */
702                return false;
703            }
704        } else if (arg < TCG_REG_V0) {
705            if (have_isa_2_07) {
706                tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
707                          | VRT(ret) | RA(arg));
708                break;
709            } else {
710                /* Altivec does not support integer->vector moves.  */
711                return false;
712            }
713        }
714        /* fallthru */
715    case TCG_TYPE_V64:
716    case TCG_TYPE_V128:
717        tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
718        tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
719        break;
720    default:
721        g_assert_not_reached();
722    }
723    return true;
724}
725
726static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
727                               int sh, int mb)
728{
729    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
730    sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
731    mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
732    tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb);
733}
734
735static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
736                               int sh, int mb, int me)
737{
738    tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me));
739}
740
741static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
742{
743    tcg_out_rld(s, RLDICL, dst, src, 0, 32);
744}
745
746static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
747{
748    tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
749}
750
751static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
752{
753    tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
754}
755
756static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
757{
758    tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
759}
760
761static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
762{
763    tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
764}
765
766/* Emit a move into ret of arg, if it can be done in one insn.  */
767static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
768{
769    if (arg == (int16_t)arg) {
770        tcg_out32(s, ADDI | TAI(ret, 0, arg));
771        return true;
772    }
773    if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
774        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
775        return true;
776    }
777    return false;
778}
779
780static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
781                             tcg_target_long arg, bool in_prologue)
782{
783    intptr_t tb_diff;
784    tcg_target_long tmp;
785    int shift;
786
787    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
788
789    if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
790        arg = (int32_t)arg;
791    }
792
793    /* Load 16-bit immediates with one insn.  */
794    if (tcg_out_movi_one(s, ret, arg)) {
795        return;
796    }
797
798    /* Load addresses within the TB with one insn.  */
799    tb_diff = tcg_tbrel_diff(s, (void *)arg);
800    if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
801        tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
802        return;
803    }
804
805    /* Load 32-bit immediates with two insns.  Note that we've already
806       eliminated bare ADDIS, so we know both insns are required.  */
807    if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
808        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
809        tcg_out32(s, ORI | SAI(ret, ret, arg));
810        return;
811    }
812    if (arg == (uint32_t)arg && !(arg & 0x8000)) {
813        tcg_out32(s, ADDI | TAI(ret, 0, arg));
814        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
815        return;
816    }
817
818    /* Load masked 16-bit value.  */
819    if (arg > 0 && (arg & 0x8000)) {
820        tmp = arg | 0x7fff;
821        if ((tmp & (tmp + 1)) == 0) {
822            int mb = clz64(tmp + 1) + 1;
823            tcg_out32(s, ADDI | TAI(ret, 0, arg));
824            tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
825            return;
826        }
827    }
828
829    /* Load common masks with 2 insns.  */
830    shift = ctz64(arg);
831    tmp = arg >> shift;
832    if (tmp == (int16_t)tmp) {
833        tcg_out32(s, ADDI | TAI(ret, 0, tmp));
834        tcg_out_shli64(s, ret, ret, shift);
835        return;
836    }
837    shift = clz64(arg);
838    if (tcg_out_movi_one(s, ret, arg << shift)) {
839        tcg_out_shri64(s, ret, ret, shift);
840        return;
841    }
842
843    /* Load addresses within 2GB of TB with 2 (or rarely 3) insns.  */
844    if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
845        tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
846        return;
847    }
848
849    /* Use the constant pool, if possible.  */
850    if (!in_prologue && USE_REG_TB) {
851        new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
852                       tcg_tbrel_diff(s, NULL));
853        tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
854        return;
855    }
856
857    tmp = arg >> 31 >> 1;
858    tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
859    if (tmp) {
860        tcg_out_shli64(s, ret, ret, 32);
861    }
862    if (arg & 0xffff0000) {
863        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
864    }
865    if (arg & 0xffff) {
866        tcg_out32(s, ORI | SAI(ret, ret, arg));
867    }
868}
869
870static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
871                             TCGReg ret, int64_t val)
872{
873    uint32_t load_insn;
874    int rel, low;
875    intptr_t add;
876
877    switch (vece) {
878    case MO_8:
879        low = (int8_t)val;
880        if (low >= -16 && low < 16) {
881            tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
882            return;
883        }
884        if (have_isa_3_00) {
885            tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
886            return;
887        }
888        break;
889
890    case MO_16:
891        low = (int16_t)val;
892        if (low >= -16 && low < 16) {
893            tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
894            return;
895        }
896        break;
897
898    case MO_32:
899        low = (int32_t)val;
900        if (low >= -16 && low < 16) {
901            tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
902            return;
903        }
904        break;
905    }
906
907    /*
908     * Otherwise we must load the value from the constant pool.
909     */
910    if (USE_REG_TB) {
911        rel = R_PPC_ADDR16;
912        add = tcg_tbrel_diff(s, NULL);
913    } else {
914        rel = R_PPC_ADDR32;
915        add = 0;
916    }
917
918    if (have_vsx) {
919        load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
920        load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
921        if (TCG_TARGET_REG_BITS == 64) {
922            new_pool_label(s, val, rel, s->code_ptr, add);
923        } else {
924            new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val);
925        }
926    } else {
927        load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
928        if (TCG_TARGET_REG_BITS == 64) {
929            new_pool_l2(s, rel, s->code_ptr, add, val, val);
930        } else {
931            new_pool_l4(s, rel, s->code_ptr, add,
932                        val >> 32, val, val >> 32, val);
933        }
934    }
935
936    if (USE_REG_TB) {
937        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
938        load_insn |= RA(TCG_REG_TB);
939    } else {
940        tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
941        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
942    }
943    tcg_out32(s, load_insn);
944}
945
946static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
947                         tcg_target_long arg)
948{
949    switch (type) {
950    case TCG_TYPE_I32:
951    case TCG_TYPE_I64:
952        tcg_debug_assert(ret < TCG_REG_V0);
953        tcg_out_movi_int(s, type, ret, arg, false);
954        break;
955
956    default:
957        g_assert_not_reached();
958    }
959}
960
961static bool mask_operand(uint32_t c, int *mb, int *me)
962{
963    uint32_t lsb, test;
964
965    /* Accept a bit pattern like:
966           0....01....1
967           1....10....0
968           0..01..10..0
969       Keep track of the transitions.  */
970    if (c == 0 || c == -1) {
971        return false;
972    }
973    test = c;
974    lsb = test & -test;
975    test += lsb;
976    if (test & (test - 1)) {
977        return false;
978    }
979
980    *me = clz32(lsb);
981    *mb = test ? clz32(test & -test) + 1 : 0;
982    return true;
983}
984
985static bool mask64_operand(uint64_t c, int *mb, int *me)
986{
987    uint64_t lsb;
988
989    if (c == 0) {
990        return false;
991    }
992
993    lsb = c & -c;
994    /* Accept 1..10..0.  */
995    if (c == -lsb) {
996        *mb = 0;
997        *me = clz64(lsb);
998        return true;
999    }
1000    /* Accept 0..01..1.  */
1001    if (lsb == 1 && (c & (c + 1)) == 0) {
1002        *mb = clz64(c + 1) + 1;
1003        *me = 63;
1004        return true;
1005    }
1006    return false;
1007}
1008
1009static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1010{
1011    int mb, me;
1012
1013    if (mask_operand(c, &mb, &me)) {
1014        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
1015    } else if ((c & 0xffff) == c) {
1016        tcg_out32(s, ANDI | SAI(src, dst, c));
1017        return;
1018    } else if ((c & 0xffff0000) == c) {
1019        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1020        return;
1021    } else {
1022        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
1023        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1024    }
1025}
1026
1027static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
1028{
1029    int mb, me;
1030
1031    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1032    if (mask64_operand(c, &mb, &me)) {
1033        if (mb == 0) {
1034            tcg_out_rld(s, RLDICR, dst, src, 0, me);
1035        } else {
1036            tcg_out_rld(s, RLDICL, dst, src, 0, mb);
1037        }
1038    } else if ((c & 0xffff) == c) {
1039        tcg_out32(s, ANDI | SAI(src, dst, c));
1040        return;
1041    } else if ((c & 0xffff0000) == c) {
1042        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1043        return;
1044    } else {
1045        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
1046        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1047    }
1048}
1049
1050static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
1051                           int op_lo, int op_hi)
1052{
1053    if (c >> 16) {
1054        tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
1055        src = dst;
1056    }
1057    if (c & 0xffff) {
1058        tcg_out32(s, op_lo | SAI(src, dst, c));
1059        src = dst;
1060    }
1061}
1062
1063static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1064{
1065    tcg_out_zori32(s, dst, src, c, ORI, ORIS);
1066}
1067
1068static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1069{
1070    tcg_out_zori32(s, dst, src, c, XORI, XORIS);
1071}
1072
1073static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target)
1074{
1075    ptrdiff_t disp = tcg_pcrel_diff(s, target);
1076    if (in_range_b(disp)) {
1077        tcg_out32(s, B | (disp & 0x3fffffc) | mask);
1078    } else {
1079        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
1080        tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
1081        tcg_out32(s, BCCTR | BO_ALWAYS | mask);
1082    }
1083}
1084
1085static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
1086                             TCGReg base, tcg_target_long offset)
1087{
1088    tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
1089    bool is_int_store = false;
1090    TCGReg rs = TCG_REG_TMP1;
1091
1092    switch (opi) {
1093    case LD: case LWA:
1094        align = 3;
1095        /* FALLTHRU */
1096    default:
1097        if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
1098            rs = rt;
1099            break;
1100        }
1101        break;
1102    case LXSD:
1103    case STXSD:
1104        align = 3;
1105        break;
1106    case LXV:
1107    case STXV:
1108        align = 15;
1109        break;
1110    case STD:
1111        align = 3;
1112        /* FALLTHRU */
1113    case STB: case STH: case STW:
1114        is_int_store = true;
1115        break;
1116    }
1117
1118    /* For unaligned, or very large offsets, use the indexed form.  */
1119    if (offset & align || offset != (int32_t)offset || opi == 0) {
1120        if (rs == base) {
1121            rs = TCG_REG_R0;
1122        }
1123        tcg_debug_assert(!is_int_store || rs != rt);
1124        tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
1125        tcg_out32(s, opx | TAB(rt & 31, base, rs));
1126        return;
1127    }
1128
1129    l0 = (int16_t)offset;
1130    offset = (offset - l0) >> 16;
1131    l1 = (int16_t)offset;
1132
1133    if (l1 < 0 && orig >= 0) {
1134        extra = 0x4000;
1135        l1 = (int16_t)(offset - 0x4000);
1136    }
1137    if (l1) {
1138        tcg_out32(s, ADDIS | TAI(rs, base, l1));
1139        base = rs;
1140    }
1141    if (extra) {
1142        tcg_out32(s, ADDIS | TAI(rs, base, extra));
1143        base = rs;
1144    }
1145    if (opi != ADDI || base != rt || l0 != 0) {
1146        tcg_out32(s, opi | TAI(rt & 31, base, l0));
1147    }
1148}
1149
1150static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
1151                           TCGReg va, TCGReg vb, int shb)
1152{
1153    tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
1154}
1155
1156static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1157                       TCGReg base, intptr_t offset)
1158{
1159    int shift;
1160
1161    switch (type) {
1162    case TCG_TYPE_I32:
1163        if (ret < TCG_REG_V0) {
1164            tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
1165            break;
1166        }
1167        if (have_isa_2_07 && have_vsx) {
1168            tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
1169            break;
1170        }
1171        tcg_debug_assert((offset & 3) == 0);
1172        tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
1173        shift = (offset - 4) & 0xc;
1174        if (shift) {
1175            tcg_out_vsldoi(s, ret, ret, ret, shift);
1176        }
1177        break;
1178    case TCG_TYPE_I64:
1179        if (ret < TCG_REG_V0) {
1180            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1181            tcg_out_mem_long(s, LD, LDX, ret, base, offset);
1182            break;
1183        }
1184        /* fallthru */
1185    case TCG_TYPE_V64:
1186        tcg_debug_assert(ret >= TCG_REG_V0);
1187        if (have_vsx) {
1188            tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
1189                             ret, base, offset);
1190            break;
1191        }
1192        tcg_debug_assert((offset & 7) == 0);
1193        tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
1194        if (offset & 8) {
1195            tcg_out_vsldoi(s, ret, ret, ret, 8);
1196        }
1197        break;
1198    case TCG_TYPE_V128:
1199        tcg_debug_assert(ret >= TCG_REG_V0);
1200        tcg_debug_assert((offset & 15) == 0);
1201        tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
1202                         LVX, ret, base, offset);
1203        break;
1204    default:
1205        g_assert_not_reached();
1206    }
1207}
1208
1209static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1210                              TCGReg base, intptr_t offset)
1211{
1212    int shift;
1213
1214    switch (type) {
1215    case TCG_TYPE_I32:
1216        if (arg < TCG_REG_V0) {
1217            tcg_out_mem_long(s, STW, STWX, arg, base, offset);
1218            break;
1219        }
1220        if (have_isa_2_07 && have_vsx) {
1221            tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
1222            break;
1223        }
1224        assert((offset & 3) == 0);
1225        tcg_debug_assert((offset & 3) == 0);
1226        shift = (offset - 4) & 0xc;
1227        if (shift) {
1228            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
1229            arg = TCG_VEC_TMP1;
1230        }
1231        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1232        break;
1233    case TCG_TYPE_I64:
1234        if (arg < TCG_REG_V0) {
1235            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1236            tcg_out_mem_long(s, STD, STDX, arg, base, offset);
1237            break;
1238        }
1239        /* fallthru */
1240    case TCG_TYPE_V64:
1241        tcg_debug_assert(arg >= TCG_REG_V0);
1242        if (have_vsx) {
1243            tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
1244                             STXSDX, arg, base, offset);
1245            break;
1246        }
1247        tcg_debug_assert((offset & 7) == 0);
1248        if (offset & 8) {
1249            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
1250            arg = TCG_VEC_TMP1;
1251        }
1252        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1253        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
1254        break;
1255    case TCG_TYPE_V128:
1256        tcg_debug_assert(arg >= TCG_REG_V0);
1257        tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
1258                         STVX, arg, base, offset);
1259        break;
1260    default:
1261        g_assert_not_reached();
1262    }
1263}
1264
1265static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1266                               TCGReg base, intptr_t ofs)
1267{
1268    return false;
1269}
1270
1271static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
1272                        int const_arg2, int cr, TCGType type)
1273{
1274    int imm;
1275    uint32_t op;
1276
1277    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1278
1279    /* Simplify the comparisons below wrt CMPI.  */
1280    if (type == TCG_TYPE_I32) {
1281        arg2 = (int32_t)arg2;
1282    }
1283
1284    switch (cond) {
1285    case TCG_COND_EQ:
1286    case TCG_COND_NE:
1287        if (const_arg2) {
1288            if ((int16_t) arg2 == arg2) {
1289                op = CMPI;
1290                imm = 1;
1291                break;
1292            } else if ((uint16_t) arg2 == arg2) {
1293                op = CMPLI;
1294                imm = 1;
1295                break;
1296            }
1297        }
1298        op = CMPL;
1299        imm = 0;
1300        break;
1301
1302    case TCG_COND_LT:
1303    case TCG_COND_GE:
1304    case TCG_COND_LE:
1305    case TCG_COND_GT:
1306        if (const_arg2) {
1307            if ((int16_t) arg2 == arg2) {
1308                op = CMPI;
1309                imm = 1;
1310                break;
1311            }
1312        }
1313        op = CMP;
1314        imm = 0;
1315        break;
1316
1317    case TCG_COND_LTU:
1318    case TCG_COND_GEU:
1319    case TCG_COND_LEU:
1320    case TCG_COND_GTU:
1321        if (const_arg2) {
1322            if ((uint16_t) arg2 == arg2) {
1323                op = CMPLI;
1324                imm = 1;
1325                break;
1326            }
1327        }
1328        op = CMPL;
1329        imm = 0;
1330        break;
1331
1332    default:
1333        tcg_abort();
1334    }
1335    op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
1336
1337    if (imm) {
1338        tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
1339    } else {
1340        if (const_arg2) {
1341            tcg_out_movi(s, type, TCG_REG_R0, arg2);
1342            arg2 = TCG_REG_R0;
1343        }
1344        tcg_out32(s, op | RA(arg1) | RB(arg2));
1345    }
1346}
1347
1348static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
1349                                TCGReg dst, TCGReg src)
1350{
1351    if (type == TCG_TYPE_I32) {
1352        tcg_out32(s, CNTLZW | RS(src) | RA(dst));
1353        tcg_out_shri32(s, dst, dst, 5);
1354    } else {
1355        tcg_out32(s, CNTLZD | RS(src) | RA(dst));
1356        tcg_out_shri64(s, dst, dst, 6);
1357    }
1358}
1359
1360static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src)
1361{
1362    /* X != 0 implies X + -1 generates a carry.  Extra addition
1363       trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.  */
1364    if (dst != src) {
1365        tcg_out32(s, ADDIC | TAI(dst, src, -1));
1366        tcg_out32(s, SUBFE | TAB(dst, dst, src));
1367    } else {
1368        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1369        tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
1370    }
1371}
1372
1373static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
1374                                  bool const_arg2)
1375{
1376    if (const_arg2) {
1377        if ((uint32_t)arg2 == arg2) {
1378            tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
1379        } else {
1380            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
1381            tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
1382        }
1383    } else {
1384        tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
1385    }
1386    return TCG_REG_R0;
1387}
1388
1389static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
1390                            TCGArg arg0, TCGArg arg1, TCGArg arg2,
1391                            int const_arg2)
1392{
1393    int crop, sh;
1394
1395    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1396
1397    /* Ignore high bits of a potential constant arg2.  */
1398    if (type == TCG_TYPE_I32) {
1399        arg2 = (uint32_t)arg2;
1400    }
1401
1402    /* Handle common and trivial cases before handling anything else.  */
1403    if (arg2 == 0) {
1404        switch (cond) {
1405        case TCG_COND_EQ:
1406            tcg_out_setcond_eq0(s, type, arg0, arg1);
1407            return;
1408        case TCG_COND_NE:
1409            if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1410                tcg_out_ext32u(s, TCG_REG_R0, arg1);
1411                arg1 = TCG_REG_R0;
1412            }
1413            tcg_out_setcond_ne0(s, arg0, arg1);
1414            return;
1415        case TCG_COND_GE:
1416            tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
1417            arg1 = arg0;
1418            /* FALLTHRU */
1419        case TCG_COND_LT:
1420            /* Extract the sign bit.  */
1421            if (type == TCG_TYPE_I32) {
1422                tcg_out_shri32(s, arg0, arg1, 31);
1423            } else {
1424                tcg_out_shri64(s, arg0, arg1, 63);
1425            }
1426            return;
1427        default:
1428            break;
1429        }
1430    }
1431
1432    /* If we have ISEL, we can implement everything with 3 or 4 insns.
1433       All other cases below are also at least 3 insns, so speed up the
1434       code generator by not considering them and always using ISEL.  */
1435    if (have_isel) {
1436        int isel, tab;
1437
1438        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1439
1440        isel = tcg_to_isel[cond];
1441
1442        tcg_out_movi(s, type, arg0, 1);
1443        if (isel & 1) {
1444            /* arg0 = (bc ? 0 : 1) */
1445            tab = TAB(arg0, 0, arg0);
1446            isel &= ~1;
1447        } else {
1448            /* arg0 = (bc ? 1 : 0) */
1449            tcg_out_movi(s, type, TCG_REG_R0, 0);
1450            tab = TAB(arg0, arg0, TCG_REG_R0);
1451        }
1452        tcg_out32(s, isel | tab);
1453        return;
1454    }
1455
1456    switch (cond) {
1457    case TCG_COND_EQ:
1458        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1459        tcg_out_setcond_eq0(s, type, arg0, arg1);
1460        return;
1461
1462    case TCG_COND_NE:
1463        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1464        /* Discard the high bits only once, rather than both inputs.  */
1465        if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1466            tcg_out_ext32u(s, TCG_REG_R0, arg1);
1467            arg1 = TCG_REG_R0;
1468        }
1469        tcg_out_setcond_ne0(s, arg0, arg1);
1470        return;
1471
1472    case TCG_COND_GT:
1473    case TCG_COND_GTU:
1474        sh = 30;
1475        crop = 0;
1476        goto crtest;
1477
1478    case TCG_COND_LT:
1479    case TCG_COND_LTU:
1480        sh = 29;
1481        crop = 0;
1482        goto crtest;
1483
1484    case TCG_COND_GE:
1485    case TCG_COND_GEU:
1486        sh = 31;
1487        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT);
1488        goto crtest;
1489
1490    case TCG_COND_LE:
1491    case TCG_COND_LEU:
1492        sh = 31;
1493        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT);
1494    crtest:
1495        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1496        if (crop) {
1497            tcg_out32(s, crop);
1498        }
1499        tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1500        tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
1501        break;
1502
1503    default:
1504        tcg_abort();
1505    }
1506}
1507
1508static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l)
1509{
1510    if (l->has_value) {
1511        bc |= reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
1512    } else {
1513        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
1514    }
1515    tcg_out32(s, bc);
1516}
1517
1518static void tcg_out_brcond(TCGContext *s, TCGCond cond,
1519                           TCGArg arg1, TCGArg arg2, int const_arg2,
1520                           TCGLabel *l, TCGType type)
1521{
1522    tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1523    tcg_out_bc(s, tcg_to_bc[cond], l);
1524}
1525
1526static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
1527                            TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1,
1528                            TCGArg v2, bool const_c2)
1529{
1530    /* If for some reason both inputs are zero, don't produce bad code.  */
1531    if (v1 == 0 && v2 == 0) {
1532        tcg_out_movi(s, type, dest, 0);
1533        return;
1534    }
1535
1536    tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
1537
1538    if (have_isel) {
1539        int isel = tcg_to_isel[cond];
1540
1541        /* Swap the V operands if the operation indicates inversion.  */
1542        if (isel & 1) {
1543            int t = v1;
1544            v1 = v2;
1545            v2 = t;
1546            isel &= ~1;
1547        }
1548        /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */
1549        if (v2 == 0) {
1550            tcg_out_movi(s, type, TCG_REG_R0, 0);
1551        }
1552        tcg_out32(s, isel | TAB(dest, v1, v2));
1553    } else {
1554        if (dest == v2) {
1555            cond = tcg_invert_cond(cond);
1556            v2 = v1;
1557        } else if (dest != v1) {
1558            if (v1 == 0) {
1559                tcg_out_movi(s, type, dest, 0);
1560            } else {
1561                tcg_out_mov(s, type, dest, v1);
1562            }
1563        }
1564        /* Branch forward over one insn */
1565        tcg_out32(s, tcg_to_bc[cond] | 8);
1566        if (v2 == 0) {
1567            tcg_out_movi(s, type, dest, 0);
1568        } else {
1569            tcg_out_mov(s, type, dest, v2);
1570        }
1571    }
1572}
1573
1574static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
1575                          TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
1576{
1577    if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
1578        tcg_out32(s, opc | RA(a0) | RS(a1));
1579    } else {
1580        tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type);
1581        /* Note that the only other valid constant for a2 is 0.  */
1582        if (have_isel) {
1583            tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
1584            tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
1585        } else if (!const_a2 && a0 == a2) {
1586            tcg_out32(s, tcg_to_bc[TCG_COND_EQ] | 8);
1587            tcg_out32(s, opc | RA(a0) | RS(a1));
1588        } else {
1589            tcg_out32(s, opc | RA(a0) | RS(a1));
1590            tcg_out32(s, tcg_to_bc[TCG_COND_NE] | 8);
1591            if (const_a2) {
1592                tcg_out_movi(s, type, a0, 0);
1593            } else {
1594                tcg_out_mov(s, type, a0, a2);
1595            }
1596        }
1597    }
1598}
1599
1600static void tcg_out_cmp2(TCGContext *s, const TCGArg *args,
1601                         const int *const_args)
1602{
1603    static const struct { uint8_t bit1, bit2; } bits[] = {
1604        [TCG_COND_LT ] = { CR_LT, CR_LT },
1605        [TCG_COND_LE ] = { CR_LT, CR_GT },
1606        [TCG_COND_GT ] = { CR_GT, CR_GT },
1607        [TCG_COND_GE ] = { CR_GT, CR_LT },
1608        [TCG_COND_LTU] = { CR_LT, CR_LT },
1609        [TCG_COND_LEU] = { CR_LT, CR_GT },
1610        [TCG_COND_GTU] = { CR_GT, CR_GT },
1611        [TCG_COND_GEU] = { CR_GT, CR_LT },
1612    };
1613
1614    TCGCond cond = args[4], cond2;
1615    TCGArg al, ah, bl, bh;
1616    int blconst, bhconst;
1617    int op, bit1, bit2;
1618
1619    al = args[0];
1620    ah = args[1];
1621    bl = args[2];
1622    bh = args[3];
1623    blconst = const_args[2];
1624    bhconst = const_args[3];
1625
1626    switch (cond) {
1627    case TCG_COND_EQ:
1628        op = CRAND;
1629        goto do_equality;
1630    case TCG_COND_NE:
1631        op = CRNAND;
1632    do_equality:
1633        tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
1634        tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
1635        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
1636        break;
1637
1638    case TCG_COND_LT:
1639    case TCG_COND_LE:
1640    case TCG_COND_GT:
1641    case TCG_COND_GE:
1642    case TCG_COND_LTU:
1643    case TCG_COND_LEU:
1644    case TCG_COND_GTU:
1645    case TCG_COND_GEU:
1646        bit1 = bits[cond].bit1;
1647        bit2 = bits[cond].bit2;
1648        op = (bit1 != bit2 ? CRANDC : CRAND);
1649        cond2 = tcg_unsigned_cond(cond);
1650
1651        tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
1652        tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
1653        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
1654        tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ));
1655        break;
1656
1657    default:
1658        tcg_abort();
1659    }
1660}
1661
1662static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1663                             const int *const_args)
1664{
1665    tcg_out_cmp2(s, args + 1, const_args + 1);
1666    tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1667    tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31);
1668}
1669
1670static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
1671                             const int *const_args)
1672{
1673    tcg_out_cmp2(s, args, const_args);
1674    tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
1675}
1676
1677static void tcg_out_mb(TCGContext *s, TCGArg a0)
1678{
1679    uint32_t insn = HWSYNC;
1680    a0 &= TCG_MO_ALL;
1681    if (a0 == TCG_MO_LD_LD) {
1682        insn = LWSYNC;
1683    } else if (a0 == TCG_MO_ST_ST) {
1684        insn = EIEIO;
1685    }
1686    tcg_out32(s, insn);
1687}
1688
1689void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
1690                              uintptr_t jmp_rw, uintptr_t addr)
1691{
1692    if (TCG_TARGET_REG_BITS == 64) {
1693        tcg_insn_unit i1, i2;
1694        intptr_t tb_diff = addr - tc_ptr;
1695        intptr_t br_diff = addr - (jmp_rx + 4);
1696        uint64_t pair;
1697
1698        /* This does not exercise the range of the branch, but we do
1699           still need to be able to load the new value of TCG_REG_TB.
1700           But this does still happen quite often.  */
1701        if (tb_diff == (int16_t)tb_diff) {
1702            i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff);
1703            i2 = B | (br_diff & 0x3fffffc);
1704        } else {
1705            intptr_t lo = (int16_t)tb_diff;
1706            intptr_t hi = (int32_t)(tb_diff - lo);
1707            assert(tb_diff == hi + lo);
1708            i1 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16);
1709            i2 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo);
1710        }
1711#ifdef HOST_WORDS_BIGENDIAN
1712        pair = (uint64_t)i1 << 32 | i2;
1713#else
1714        pair = (uint64_t)i2 << 32 | i1;
1715#endif
1716
1717        /* As per the enclosing if, this is ppc64.  Avoid the _Static_assert
1718           within qatomic_set that would fail to build a ppc32 host.  */
1719        qatomic_set__nocheck((uint64_t *)jmp_rw, pair);
1720        flush_idcache_range(jmp_rx, jmp_rw, 8);
1721    } else {
1722        intptr_t diff = addr - jmp_rx;
1723        tcg_debug_assert(in_range_b(diff));
1724        qatomic_set((uint32_t *)jmp_rw, B | (diff & 0x3fffffc));
1725        flush_idcache_range(jmp_rx, jmp_rw, 4);
1726    }
1727}
1728
1729static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
1730{
1731#ifdef _CALL_AIX
1732    /* Look through the descriptor.  If the branch is in range, and we
1733       don't have to spend too much effort on building the toc.  */
1734    const void *tgt = ((const void * const *)target)[0];
1735    uintptr_t toc = ((const uintptr_t *)target)[1];
1736    intptr_t diff = tcg_pcrel_diff(s, tgt);
1737
1738    if (in_range_b(diff) && toc == (uint32_t)toc) {
1739        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
1740        tcg_out_b(s, LK, tgt);
1741    } else {
1742        /* Fold the low bits of the constant into the addresses below.  */
1743        intptr_t arg = (intptr_t)target;
1744        int ofs = (int16_t)arg;
1745
1746        if (ofs + 8 < 0x8000) {
1747            arg -= ofs;
1748        } else {
1749            ofs = 0;
1750        }
1751        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
1752        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
1753        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
1754        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
1755        tcg_out32(s, BCCTR | BO_ALWAYS | LK);
1756    }
1757#elif defined(_CALL_ELF) && _CALL_ELF == 2
1758    intptr_t diff;
1759
1760    /* In the ELFv2 ABI, we have to set up r12 to contain the destination
1761       address, which the callee uses to compute its TOC address.  */
1762    /* FIXME: when the branch is in range, we could avoid r12 load if we
1763       knew that the destination uses the same TOC, and what its local
1764       entry point offset is.  */
1765    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
1766
1767    diff = tcg_pcrel_diff(s, target);
1768    if (in_range_b(diff)) {
1769        tcg_out_b(s, LK, target);
1770    } else {
1771        tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
1772        tcg_out32(s, BCCTR | BO_ALWAYS | LK);
1773    }
1774#else
1775    tcg_out_b(s, LK, target);
1776#endif
1777}
1778
1779static const uint32_t qemu_ldx_opc[16] = {
1780    [MO_UB] = LBZX,
1781    [MO_UW] = LHZX,
1782    [MO_UL] = LWZX,
1783    [MO_Q]  = LDX,
1784    [MO_SW] = LHAX,
1785    [MO_SL] = LWAX,
1786    [MO_BSWAP | MO_UB] = LBZX,
1787    [MO_BSWAP | MO_UW] = LHBRX,
1788    [MO_BSWAP | MO_UL] = LWBRX,
1789    [MO_BSWAP | MO_Q]  = LDBRX,
1790};
1791
1792static const uint32_t qemu_stx_opc[16] = {
1793    [MO_UB] = STBX,
1794    [MO_UW] = STHX,
1795    [MO_UL] = STWX,
1796    [MO_Q]  = STDX,
1797    [MO_BSWAP | MO_UB] = STBX,
1798    [MO_BSWAP | MO_UW] = STHBRX,
1799    [MO_BSWAP | MO_UL] = STWBRX,
1800    [MO_BSWAP | MO_Q]  = STDBRX,
1801};
1802
1803static const uint32_t qemu_exts_opc[4] = {
1804    EXTSB, EXTSH, EXTSW, 0
1805};
1806
1807#if defined (CONFIG_SOFTMMU)
1808#include "../tcg-ldst.c.inc"
1809
1810/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
1811 *                                 int mmu_idx, uintptr_t ra)
1812 */
1813static void * const qemu_ld_helpers[16] = {
1814    [MO_UB]   = helper_ret_ldub_mmu,
1815    [MO_LEUW] = helper_le_lduw_mmu,
1816    [MO_LEUL] = helper_le_ldul_mmu,
1817    [MO_LEQ]  = helper_le_ldq_mmu,
1818    [MO_BEUW] = helper_be_lduw_mmu,
1819    [MO_BEUL] = helper_be_ldul_mmu,
1820    [MO_BEQ]  = helper_be_ldq_mmu,
1821};
1822
1823/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
1824 *                                 uintxx_t val, int mmu_idx, uintptr_t ra)
1825 */
1826static void * const qemu_st_helpers[16] = {
1827    [MO_UB]   = helper_ret_stb_mmu,
1828    [MO_LEUW] = helper_le_stw_mmu,
1829    [MO_LEUL] = helper_le_stl_mmu,
1830    [MO_LEQ]  = helper_le_stq_mmu,
1831    [MO_BEUW] = helper_be_stw_mmu,
1832    [MO_BEUL] = helper_be_stl_mmu,
1833    [MO_BEQ]  = helper_be_stq_mmu,
1834};
1835
1836/* We expect to use a 16-bit negative offset from ENV.  */
1837QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1838QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
1839
1840/* Perform the TLB load and compare.  Places the result of the comparison
1841   in CR7, loads the addend of the TLB into R3, and returns the register
1842   containing the guest address (zero-extended into R4).  Clobbers R0 and R2. */
1843
1844static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
1845                               TCGReg addrlo, TCGReg addrhi,
1846                               int mem_index, bool is_read)
1847{
1848    int cmp_off
1849        = (is_read
1850           ? offsetof(CPUTLBEntry, addr_read)
1851           : offsetof(CPUTLBEntry, addr_write));
1852    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
1853    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
1854    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
1855    unsigned s_bits = opc & MO_SIZE;
1856    unsigned a_bits = get_alignment_bits(opc);
1857
1858    /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
1859    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
1860    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
1861
1862    /* Extract the page index, shifted into place for tlb index.  */
1863    if (TCG_TARGET_REG_BITS == 32) {
1864        tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
1865                       TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1866    } else {
1867        tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
1868                       TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1869    }
1870    tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
1871
1872    /* Load the TLB comparator.  */
1873    if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
1874        uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
1875                        ? LWZUX : LDUX);
1876        tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
1877    } else {
1878        tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
1879        if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
1880            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
1881            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
1882        } else {
1883            tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
1884        }
1885    }
1886
1887    /* Load the TLB addend for use on the fast path.  Do this asap
1888       to minimize any load use delay.  */
1889    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3,
1890               offsetof(CPUTLBEntry, addend));
1891
1892    /* Clear the non-page, non-alignment bits from the address */
1893    if (TCG_TARGET_REG_BITS == 32) {
1894        /* We don't support unaligned accesses on 32-bits.
1895         * Preserve the bottom bits and thus trigger a comparison
1896         * failure on unaligned accesses.
1897         */
1898        if (a_bits < s_bits) {
1899            a_bits = s_bits;
1900        }
1901        tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
1902                    (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
1903    } else {
1904        TCGReg t = addrlo;
1905
1906        /* If the access is unaligned, we need to make sure we fail if we
1907         * cross a page boundary.  The trick is to add the access size-1
1908         * to the address before masking the low bits.  That will make the
1909         * address overflow to the next page if we cross a page boundary,
1910         * which will then force a mismatch of the TLB compare.
1911         */
1912        if (a_bits < s_bits) {
1913            unsigned a_mask = (1 << a_bits) - 1;
1914            unsigned s_mask = (1 << s_bits) - 1;
1915            tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
1916            t = TCG_REG_R0;
1917        }
1918
1919        /* Mask the address for the requested alignment.  */
1920        if (TARGET_LONG_BITS == 32) {
1921            tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
1922                        (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
1923            /* Zero-extend the address for use in the final address.  */
1924            tcg_out_ext32u(s, TCG_REG_R4, addrlo);
1925            addrlo = TCG_REG_R4;
1926        } else if (a_bits == 0) {
1927            tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
1928        } else {
1929            tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
1930                        64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
1931            tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
1932        }
1933    }
1934
1935    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
1936        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
1937                    0, 7, TCG_TYPE_I32);
1938        tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
1939        tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
1940    } else {
1941        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
1942                    0, 7, TCG_TYPE_TL);
1943    }
1944
1945    return addrlo;
1946}
1947
1948/* Record the context of a call to the out of line helper code for the slow
1949   path for a load or store, so that we can later generate the correct
1950   helper code.  */
1951static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1952                                TCGReg datalo_reg, TCGReg datahi_reg,
1953                                TCGReg addrlo_reg, TCGReg addrhi_reg,
1954                                tcg_insn_unit *raddr, tcg_insn_unit *lptr)
1955{
1956    TCGLabelQemuLdst *label = new_ldst_label(s);
1957
1958    label->is_ld = is_ld;
1959    label->oi = oi;
1960    label->datalo_reg = datalo_reg;
1961    label->datahi_reg = datahi_reg;
1962    label->addrlo_reg = addrlo_reg;
1963    label->addrhi_reg = addrhi_reg;
1964    label->raddr = tcg_splitwx_to_rx(raddr);
1965    label->label_ptr[0] = lptr;
1966}
1967
1968static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1969{
1970    TCGMemOpIdx oi = lb->oi;
1971    MemOp opc = get_memop(oi);
1972    TCGReg hi, lo, arg = TCG_REG_R3;
1973
1974    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1975        return false;
1976    }
1977
1978    tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
1979
1980    lo = lb->addrlo_reg;
1981    hi = lb->addrhi_reg;
1982    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
1983#ifdef TCG_TARGET_CALL_ALIGN_ARGS
1984        arg |= 1;
1985#endif
1986        tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
1987        tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
1988    } else {
1989        /* If the address needed to be zero-extended, we'll have already
1990           placed it in R4.  The only remaining case is 64-bit guest.  */
1991        tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
1992    }
1993
1994    tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
1995    tcg_out32(s, MFSPR | RT(arg) | LR);
1996
1997    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1998
1999    lo = lb->datalo_reg;
2000    hi = lb->datahi_reg;
2001    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2002        tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4);
2003        tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3);
2004    } else if (opc & MO_SIGN) {
2005        uint32_t insn = qemu_exts_opc[opc & MO_SIZE];
2006        tcg_out32(s, insn | RA(lo) | RS(TCG_REG_R3));
2007    } else {
2008        tcg_out_mov(s, TCG_TYPE_REG, lo, TCG_REG_R3);
2009    }
2010
2011    tcg_out_b(s, 0, lb->raddr);
2012    return true;
2013}
2014
2015static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2016{
2017    TCGMemOpIdx oi = lb->oi;
2018    MemOp opc = get_memop(oi);
2019    MemOp s_bits = opc & MO_SIZE;
2020    TCGReg hi, lo, arg = TCG_REG_R3;
2021
2022    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2023        return false;
2024    }
2025
2026    tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
2027
2028    lo = lb->addrlo_reg;
2029    hi = lb->addrhi_reg;
2030    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2031#ifdef TCG_TARGET_CALL_ALIGN_ARGS
2032        arg |= 1;
2033#endif
2034        tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2035        tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2036    } else {
2037        /* If the address needed to be zero-extended, we'll have already
2038           placed it in R4.  The only remaining case is 64-bit guest.  */
2039        tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
2040    }
2041
2042    lo = lb->datalo_reg;
2043    hi = lb->datahi_reg;
2044    if (TCG_TARGET_REG_BITS == 32) {
2045        switch (s_bits) {
2046        case MO_64:
2047#ifdef TCG_TARGET_CALL_ALIGN_ARGS
2048            arg |= 1;
2049#endif
2050            tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2051            /* FALLTHRU */
2052        case MO_32:
2053            tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2054            break;
2055        default:
2056            tcg_out_rlw(s, RLWINM, arg++, lo, 0, 32 - (8 << s_bits), 31);
2057            break;
2058        }
2059    } else {
2060        if (s_bits == MO_64) {
2061            tcg_out_mov(s, TCG_TYPE_I64, arg++, lo);
2062        } else {
2063            tcg_out_rld(s, RLDICL, arg++, lo, 0, 64 - (8 << s_bits));
2064        }
2065    }
2066
2067    tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
2068    tcg_out32(s, MFSPR | RT(arg) | LR);
2069
2070    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
2071
2072    tcg_out_b(s, 0, lb->raddr);
2073    return true;
2074}
2075#endif /* SOFTMMU */
2076
2077static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
2078{
2079    TCGReg datalo, datahi, addrlo, rbase;
2080    TCGReg addrhi __attribute__((unused));
2081    TCGMemOpIdx oi;
2082    MemOp opc, s_bits;
2083#ifdef CONFIG_SOFTMMU
2084    int mem_index;
2085    tcg_insn_unit *label_ptr;
2086#endif
2087
2088    datalo = *args++;
2089    datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
2090    addrlo = *args++;
2091    addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
2092    oi = *args++;
2093    opc = get_memop(oi);
2094    s_bits = opc & MO_SIZE;
2095
2096#ifdef CONFIG_SOFTMMU
2097    mem_index = get_mmuidx(oi);
2098    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true);
2099
2100    /* Load a pointer into the current opcode w/conditional branch-link. */
2101    label_ptr = s->code_ptr;
2102    tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
2103
2104    rbase = TCG_REG_R3;
2105#else  /* !CONFIG_SOFTMMU */
2106    rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
2107    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
2108        tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
2109        addrlo = TCG_REG_TMP1;
2110    }
2111#endif
2112
2113    if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
2114        if (opc & MO_BSWAP) {
2115            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2116            tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
2117            tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0));
2118        } else if (rbase != 0) {
2119            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2120            tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo));
2121            tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0));
2122        } else if (addrlo == datahi) {
2123            tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
2124            tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
2125        } else {
2126            tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
2127            tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
2128        }
2129    } else {
2130        uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
2131        if (!have_isa_2_06 && insn == LDBRX) {
2132            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2133            tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
2134            tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0));
2135            tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
2136        } else if (insn) {
2137            tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
2138        } else {
2139            insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
2140            tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
2141            insn = qemu_exts_opc[s_bits];
2142            tcg_out32(s, insn | RA(datalo) | RS(datalo));
2143        }
2144    }
2145
2146#ifdef CONFIG_SOFTMMU
2147    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
2148                        s->code_ptr, label_ptr);
2149#endif
2150}
2151
2152static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
2153{
2154    TCGReg datalo, datahi, addrlo, rbase;
2155    TCGReg addrhi __attribute__((unused));
2156    TCGMemOpIdx oi;
2157    MemOp opc, s_bits;
2158#ifdef CONFIG_SOFTMMU
2159    int mem_index;
2160    tcg_insn_unit *label_ptr;
2161#endif
2162
2163    datalo = *args++;
2164    datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
2165    addrlo = *args++;
2166    addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
2167    oi = *args++;
2168    opc = get_memop(oi);
2169    s_bits = opc & MO_SIZE;
2170
2171#ifdef CONFIG_SOFTMMU
2172    mem_index = get_mmuidx(oi);
2173    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false);
2174
2175    /* Load a pointer into the current opcode w/conditional branch-link. */
2176    label_ptr = s->code_ptr;
2177    tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
2178
2179    rbase = TCG_REG_R3;
2180#else  /* !CONFIG_SOFTMMU */
2181    rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
2182    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
2183        tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
2184        addrlo = TCG_REG_TMP1;
2185    }
2186#endif
2187
2188    if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
2189        if (opc & MO_BSWAP) {
2190            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2191            tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
2192            tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0));
2193        } else if (rbase != 0) {
2194            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2195            tcg_out32(s, STWX | SAB(datahi, rbase, addrlo));
2196            tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0));
2197        } else {
2198            tcg_out32(s, STW | TAI(datahi, addrlo, 0));
2199            tcg_out32(s, STW | TAI(datalo, addrlo, 4));
2200        }
2201    } else {
2202        uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
2203        if (!have_isa_2_06 && insn == STDBRX) {
2204            tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
2205            tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4));
2206            tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
2207            tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_TMP1));
2208        } else {
2209            tcg_out32(s, insn | SAB(datalo, rbase, addrlo));
2210        }
2211    }
2212
2213#ifdef CONFIG_SOFTMMU
2214    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
2215                        s->code_ptr, label_ptr);
2216#endif
2217}
2218
2219static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2220{
2221    int i;
2222    for (i = 0; i < count; ++i) {
2223        p[i] = NOP;
2224    }
2225}
2226
2227/* Parameters for function call generation, used in tcg.c.  */
2228#define TCG_TARGET_STACK_ALIGN       16
2229#define TCG_TARGET_EXTEND_ARGS       1
2230
2231#ifdef _CALL_AIX
2232# define LINK_AREA_SIZE                (6 * SZR)
2233# define LR_OFFSET                     (1 * SZR)
2234# define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
2235#elif defined(TCG_TARGET_CALL_DARWIN)
2236# define LINK_AREA_SIZE                (6 * SZR)
2237# define LR_OFFSET                     (2 * SZR)
2238#elif TCG_TARGET_REG_BITS == 64
2239# if defined(_CALL_ELF) && _CALL_ELF == 2
2240#  define LINK_AREA_SIZE               (4 * SZR)
2241#  define LR_OFFSET                    (1 * SZR)
2242# endif
2243#else /* TCG_TARGET_REG_BITS == 32 */
2244# if defined(_CALL_SYSV)
2245#  define LINK_AREA_SIZE               (2 * SZR)
2246#  define LR_OFFSET                    (1 * SZR)
2247# endif
2248#endif
2249#ifndef LR_OFFSET
2250# error "Unhandled abi"
2251#endif
2252#ifndef TCG_TARGET_CALL_STACK_OFFSET
2253# define TCG_TARGET_CALL_STACK_OFFSET  LINK_AREA_SIZE
2254#endif
2255
2256#define CPU_TEMP_BUF_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2257#define REG_SAVE_SIZE      ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
2258
2259#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET   \
2260                     + TCG_STATIC_CALL_ARGS_SIZE    \
2261                     + CPU_TEMP_BUF_SIZE            \
2262                     + REG_SAVE_SIZE                \
2263                     + TCG_TARGET_STACK_ALIGN - 1)  \
2264                    & -TCG_TARGET_STACK_ALIGN)
2265
2266#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
2267
2268static void tcg_target_qemu_prologue(TCGContext *s)
2269{
2270    int i;
2271
2272#ifdef _CALL_AIX
2273    const void **desc = (const void **)s->code_ptr;
2274    desc[0] = tcg_splitwx_to_rx(desc + 2);  /* entry point */
2275    desc[1] = 0;                            /* environment pointer */
2276    s->code_ptr = (void *)(desc + 2);       /* skip over descriptor */
2277#endif
2278
2279    tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
2280                  CPU_TEMP_BUF_SIZE);
2281
2282    /* Prologue */
2283    tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
2284    tcg_out32(s, (SZR == 8 ? STDU : STWU)
2285              | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
2286
2287    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2288        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2289                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2290    }
2291    tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2292
2293#ifndef CONFIG_SOFTMMU
2294    if (guest_base) {
2295        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
2296        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2297    }
2298#endif
2299
2300    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2301    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
2302    if (USE_REG_TB) {
2303        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]);
2304    }
2305    tcg_out32(s, BCCTR | BO_ALWAYS);
2306
2307    /* Epilogue */
2308    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2309
2310    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2311    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2312        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2313                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2314    }
2315    tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
2316    tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
2317    tcg_out32(s, BCLR | BO_ALWAYS);
2318}
2319
2320static void tcg_out_op(TCGContext *s, TCGOpcode opc,
2321                       const TCGArg args[TCG_MAX_OP_ARGS],
2322                       const int const_args[TCG_MAX_OP_ARGS])
2323{
2324    TCGArg a0, a1, a2;
2325    int c;
2326
2327    switch (opc) {
2328    case INDEX_op_exit_tb:
2329        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
2330        tcg_out_b(s, 0, tcg_code_gen_epilogue);
2331        break;
2332    case INDEX_op_goto_tb:
2333        if (s->tb_jmp_insn_offset) {
2334            /* Direct jump. */
2335            if (TCG_TARGET_REG_BITS == 64) {
2336                /* Ensure the next insns are 8-byte aligned. */
2337                if ((uintptr_t)s->code_ptr & 7) {
2338                    tcg_out32(s, NOP);
2339                }
2340                s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
2341                tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0));
2342                tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0));
2343            } else {
2344                s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
2345                tcg_out32(s, B);
2346                s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
2347                break;
2348            }
2349        } else {
2350            /* Indirect jump. */
2351            tcg_debug_assert(s->tb_jmp_insn_offset == NULL);
2352            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TB, 0,
2353                       (intptr_t)(s->tb_jmp_insn_offset + args[0]));
2354        }
2355        tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
2356        tcg_out32(s, BCCTR | BO_ALWAYS);
2357        set_jmp_reset_offset(s, args[0]);
2358        if (USE_REG_TB) {
2359            /* For the unlinked case, need to reset TCG_REG_TB.  */
2360            tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB,
2361                             -tcg_current_code_size(s));
2362        }
2363        break;
2364    case INDEX_op_goto_ptr:
2365        tcg_out32(s, MTSPR | RS(args[0]) | CTR);
2366        if (USE_REG_TB) {
2367            tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]);
2368        }
2369        tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
2370        tcg_out32(s, BCCTR | BO_ALWAYS);
2371        break;
2372    case INDEX_op_br:
2373        {
2374            TCGLabel *l = arg_label(args[0]);
2375            uint32_t insn = B;
2376
2377            if (l->has_value) {
2378                insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr),
2379                                       l->u.value_ptr);
2380            } else {
2381                tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
2382            }
2383            tcg_out32(s, insn);
2384        }
2385        break;
2386    case INDEX_op_ld8u_i32:
2387    case INDEX_op_ld8u_i64:
2388        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2389        break;
2390    case INDEX_op_ld8s_i32:
2391    case INDEX_op_ld8s_i64:
2392        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2393        tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0]));
2394        break;
2395    case INDEX_op_ld16u_i32:
2396    case INDEX_op_ld16u_i64:
2397        tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
2398        break;
2399    case INDEX_op_ld16s_i32:
2400    case INDEX_op_ld16s_i64:
2401        tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
2402        break;
2403    case INDEX_op_ld_i32:
2404    case INDEX_op_ld32u_i64:
2405        tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
2406        break;
2407    case INDEX_op_ld32s_i64:
2408        tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
2409        break;
2410    case INDEX_op_ld_i64:
2411        tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
2412        break;
2413    case INDEX_op_st8_i32:
2414    case INDEX_op_st8_i64:
2415        tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
2416        break;
2417    case INDEX_op_st16_i32:
2418    case INDEX_op_st16_i64:
2419        tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
2420        break;
2421    case INDEX_op_st_i32:
2422    case INDEX_op_st32_i64:
2423        tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
2424        break;
2425    case INDEX_op_st_i64:
2426        tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
2427        break;
2428
2429    case INDEX_op_add_i32:
2430        a0 = args[0], a1 = args[1], a2 = args[2];
2431        if (const_args[2]) {
2432        do_addi_32:
2433            tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
2434        } else {
2435            tcg_out32(s, ADD | TAB(a0, a1, a2));
2436        }
2437        break;
2438    case INDEX_op_sub_i32:
2439        a0 = args[0], a1 = args[1], a2 = args[2];
2440        if (const_args[1]) {
2441            if (const_args[2]) {
2442                tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2);
2443            } else {
2444                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2445            }
2446        } else if (const_args[2]) {
2447            a2 = -a2;
2448            goto do_addi_32;
2449        } else {
2450            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2451        }
2452        break;
2453
2454    case INDEX_op_and_i32:
2455        a0 = args[0], a1 = args[1], a2 = args[2];
2456        if (const_args[2]) {
2457            tcg_out_andi32(s, a0, a1, a2);
2458        } else {
2459            tcg_out32(s, AND | SAB(a1, a0, a2));
2460        }
2461        break;
2462    case INDEX_op_and_i64:
2463        a0 = args[0], a1 = args[1], a2 = args[2];
2464        if (const_args[2]) {
2465            tcg_out_andi64(s, a0, a1, a2);
2466        } else {
2467            tcg_out32(s, AND | SAB(a1, a0, a2));
2468        }
2469        break;
2470    case INDEX_op_or_i64:
2471    case INDEX_op_or_i32:
2472        a0 = args[0], a1 = args[1], a2 = args[2];
2473        if (const_args[2]) {
2474            tcg_out_ori32(s, a0, a1, a2);
2475        } else {
2476            tcg_out32(s, OR | SAB(a1, a0, a2));
2477        }
2478        break;
2479    case INDEX_op_xor_i64:
2480    case INDEX_op_xor_i32:
2481        a0 = args[0], a1 = args[1], a2 = args[2];
2482        if (const_args[2]) {
2483            tcg_out_xori32(s, a0, a1, a2);
2484        } else {
2485            tcg_out32(s, XOR | SAB(a1, a0, a2));
2486        }
2487        break;
2488    case INDEX_op_andc_i32:
2489        a0 = args[0], a1 = args[1], a2 = args[2];
2490        if (const_args[2]) {
2491            tcg_out_andi32(s, a0, a1, ~a2);
2492        } else {
2493            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2494        }
2495        break;
2496    case INDEX_op_andc_i64:
2497        a0 = args[0], a1 = args[1], a2 = args[2];
2498        if (const_args[2]) {
2499            tcg_out_andi64(s, a0, a1, ~a2);
2500        } else {
2501            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2502        }
2503        break;
2504    case INDEX_op_orc_i32:
2505        if (const_args[2]) {
2506            tcg_out_ori32(s, args[0], args[1], ~args[2]);
2507            break;
2508        }
2509        /* FALLTHRU */
2510    case INDEX_op_orc_i64:
2511        tcg_out32(s, ORC | SAB(args[1], args[0], args[2]));
2512        break;
2513    case INDEX_op_eqv_i32:
2514        if (const_args[2]) {
2515            tcg_out_xori32(s, args[0], args[1], ~args[2]);
2516            break;
2517        }
2518        /* FALLTHRU */
2519    case INDEX_op_eqv_i64:
2520        tcg_out32(s, EQV | SAB(args[1], args[0], args[2]));
2521        break;
2522    case INDEX_op_nand_i32:
2523    case INDEX_op_nand_i64:
2524        tcg_out32(s, NAND | SAB(args[1], args[0], args[2]));
2525        break;
2526    case INDEX_op_nor_i32:
2527    case INDEX_op_nor_i64:
2528        tcg_out32(s, NOR | SAB(args[1], args[0], args[2]));
2529        break;
2530
2531    case INDEX_op_clz_i32:
2532        tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1],
2533                      args[2], const_args[2]);
2534        break;
2535    case INDEX_op_ctz_i32:
2536        tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1],
2537                      args[2], const_args[2]);
2538        break;
2539    case INDEX_op_ctpop_i32:
2540        tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0));
2541        break;
2542
2543    case INDEX_op_clz_i64:
2544        tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1],
2545                      args[2], const_args[2]);
2546        break;
2547    case INDEX_op_ctz_i64:
2548        tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1],
2549                      args[2], const_args[2]);
2550        break;
2551    case INDEX_op_ctpop_i64:
2552        tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0));
2553        break;
2554
2555    case INDEX_op_mul_i32:
2556        a0 = args[0], a1 = args[1], a2 = args[2];
2557        if (const_args[2]) {
2558            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2559        } else {
2560            tcg_out32(s, MULLW | TAB(a0, a1, a2));
2561        }
2562        break;
2563
2564    case INDEX_op_div_i32:
2565        tcg_out32(s, DIVW | TAB(args[0], args[1], args[2]));
2566        break;
2567
2568    case INDEX_op_divu_i32:
2569        tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2]));
2570        break;
2571
2572    case INDEX_op_shl_i32:
2573        if (const_args[2]) {
2574            /* Limit immediate shift count lest we create an illegal insn.  */
2575            tcg_out_shli32(s, args[0], args[1], args[2] & 31);
2576        } else {
2577            tcg_out32(s, SLW | SAB(args[1], args[0], args[2]));
2578        }
2579        break;
2580    case INDEX_op_shr_i32:
2581        if (const_args[2]) {
2582            /* Limit immediate shift count lest we create an illegal insn.  */
2583            tcg_out_shri32(s, args[0], args[1], args[2] & 31);
2584        } else {
2585            tcg_out32(s, SRW | SAB(args[1], args[0], args[2]));
2586        }
2587        break;
2588    case INDEX_op_sar_i32:
2589        if (const_args[2]) {
2590            /* Limit immediate shift count lest we create an illegal insn.  */
2591            tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2] & 31));
2592        } else {
2593            tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
2594        }
2595        break;
2596    case INDEX_op_rotl_i32:
2597        if (const_args[2]) {
2598            tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31);
2599        } else {
2600            tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
2601                         | MB(0) | ME(31));
2602        }
2603        break;
2604    case INDEX_op_rotr_i32:
2605        if (const_args[2]) {
2606            tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
2607        } else {
2608            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32));
2609            tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0)
2610                         | MB(0) | ME(31));
2611        }
2612        break;
2613
2614    case INDEX_op_brcond_i32:
2615        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2616                       arg_label(args[3]), TCG_TYPE_I32);
2617        break;
2618    case INDEX_op_brcond_i64:
2619        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2620                       arg_label(args[3]), TCG_TYPE_I64);
2621        break;
2622    case INDEX_op_brcond2_i32:
2623        tcg_out_brcond2(s, args, const_args);
2624        break;
2625
2626    case INDEX_op_neg_i32:
2627    case INDEX_op_neg_i64:
2628        tcg_out32(s, NEG | RT(args[0]) | RA(args[1]));
2629        break;
2630
2631    case INDEX_op_not_i32:
2632    case INDEX_op_not_i64:
2633        tcg_out32(s, NOR | SAB(args[1], args[0], args[1]));
2634        break;
2635
2636    case INDEX_op_add_i64:
2637        a0 = args[0], a1 = args[1], a2 = args[2];
2638        if (const_args[2]) {
2639        do_addi_64:
2640            tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
2641        } else {
2642            tcg_out32(s, ADD | TAB(a0, a1, a2));
2643        }
2644        break;
2645    case INDEX_op_sub_i64:
2646        a0 = args[0], a1 = args[1], a2 = args[2];
2647        if (const_args[1]) {
2648            if (const_args[2]) {
2649                tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2);
2650            } else {
2651                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2652            }
2653        } else if (const_args[2]) {
2654            a2 = -a2;
2655            goto do_addi_64;
2656        } else {
2657            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2658        }
2659        break;
2660
2661    case INDEX_op_shl_i64:
2662        if (const_args[2]) {
2663            /* Limit immediate shift count lest we create an illegal insn.  */
2664            tcg_out_shli64(s, args[0], args[1], args[2] & 63);
2665        } else {
2666            tcg_out32(s, SLD | SAB(args[1], args[0], args[2]));
2667        }
2668        break;
2669    case INDEX_op_shr_i64:
2670        if (const_args[2]) {
2671            /* Limit immediate shift count lest we create an illegal insn.  */
2672            tcg_out_shri64(s, args[0], args[1], args[2] & 63);
2673        } else {
2674            tcg_out32(s, SRD | SAB(args[1], args[0], args[2]));
2675        }
2676        break;
2677    case INDEX_op_sar_i64:
2678        if (const_args[2]) {
2679            int sh = SH(args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1);
2680            tcg_out32(s, SRADI | RA(args[0]) | RS(args[1]) | sh);
2681        } else {
2682            tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
2683        }
2684        break;
2685    case INDEX_op_rotl_i64:
2686        if (const_args[2]) {
2687            tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0);
2688        } else {
2689            tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0));
2690        }
2691        break;
2692    case INDEX_op_rotr_i64:
2693        if (const_args[2]) {
2694            tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0);
2695        } else {
2696            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64));
2697            tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0));
2698        }
2699        break;
2700
2701    case INDEX_op_mul_i64:
2702        a0 = args[0], a1 = args[1], a2 = args[2];
2703        if (const_args[2]) {
2704            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2705        } else {
2706            tcg_out32(s, MULLD | TAB(a0, a1, a2));
2707        }
2708        break;
2709    case INDEX_op_div_i64:
2710        tcg_out32(s, DIVD | TAB(args[0], args[1], args[2]));
2711        break;
2712    case INDEX_op_divu_i64:
2713        tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
2714        break;
2715
2716    case INDEX_op_qemu_ld_i32:
2717        tcg_out_qemu_ld(s, args, false);
2718        break;
2719    case INDEX_op_qemu_ld_i64:
2720        tcg_out_qemu_ld(s, args, true);
2721        break;
2722    case INDEX_op_qemu_st_i32:
2723        tcg_out_qemu_st(s, args, false);
2724        break;
2725    case INDEX_op_qemu_st_i64:
2726        tcg_out_qemu_st(s, args, true);
2727        break;
2728
2729    case INDEX_op_ext8s_i32:
2730    case INDEX_op_ext8s_i64:
2731        c = EXTSB;
2732        goto gen_ext;
2733    case INDEX_op_ext16s_i32:
2734    case INDEX_op_ext16s_i64:
2735        c = EXTSH;
2736        goto gen_ext;
2737    case INDEX_op_ext_i32_i64:
2738    case INDEX_op_ext32s_i64:
2739        c = EXTSW;
2740        goto gen_ext;
2741    gen_ext:
2742        tcg_out32(s, c | RS(args[1]) | RA(args[0]));
2743        break;
2744    case INDEX_op_extu_i32_i64:
2745        tcg_out_ext32u(s, args[0], args[1]);
2746        break;
2747
2748    case INDEX_op_setcond_i32:
2749        tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
2750                        const_args[2]);
2751        break;
2752    case INDEX_op_setcond_i64:
2753        tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
2754                        const_args[2]);
2755        break;
2756    case INDEX_op_setcond2_i32:
2757        tcg_out_setcond2(s, args, const_args);
2758        break;
2759
2760    case INDEX_op_bswap16_i32:
2761    case INDEX_op_bswap16_i64:
2762        a0 = args[0], a1 = args[1];
2763        /* a1 = abcd */
2764        if (a0 != a1) {
2765            /* a0 = (a1 r<< 24) & 0xff # 000c */
2766            tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
2767            /* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */
2768            tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23);
2769        } else {
2770            /* r0 = (a1 r<< 8) & 0xff00 # 00d0 */
2771            tcg_out_rlw(s, RLWINM, TCG_REG_R0, a1, 8, 16, 23);
2772            /* a0 = (a1 r<< 24) & 0xff # 000c */
2773            tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
2774            /* a0 = a0 | r0 # 00dc */
2775            tcg_out32(s, OR | SAB(TCG_REG_R0, a0, a0));
2776        }
2777        break;
2778
2779    case INDEX_op_bswap32_i32:
2780    case INDEX_op_bswap32_i64:
2781        /* Stolen from gcc's builtin_bswap32 */
2782        a1 = args[1];
2783        a0 = args[0] == a1 ? TCG_REG_R0 : args[0];
2784
2785        /* a1 = args[1] # abcd */
2786        /* a0 = rotate_left (a1, 8) # bcda */
2787        tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31);
2788        /* a0 = (a0 & ~0xff000000) | ((a1 r<< 24) & 0xff000000) # dcda */
2789        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7);
2790        /* a0 = (a0 & ~0x0000ff00) | ((a1 r<< 24) & 0x0000ff00) # dcba */
2791        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23);
2792
2793        if (a0 == TCG_REG_R0) {
2794            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2795        }
2796        break;
2797
2798    case INDEX_op_bswap64_i64:
2799        a0 = args[0], a1 = args[1], a2 = TCG_REG_R0;
2800        if (a0 == a1) {
2801            a0 = TCG_REG_R0;
2802            a2 = a1;
2803        }
2804
2805        /* a1 = # abcd efgh */
2806        /* a0 = rl32(a1, 8) # 0000 fghe */
2807        tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31);
2808        /* a0 = dep(a0, rl32(a1, 24), 0xff000000) # 0000 hghe */
2809        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7);
2810        /* a0 = dep(a0, rl32(a1, 24), 0x0000ff00) # 0000 hgfe */
2811        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23);
2812
2813        /* a0 = rl64(a0, 32) # hgfe 0000 */
2814        /* a2 = rl64(a1, 32) # efgh abcd */
2815        tcg_out_rld(s, RLDICL, a0, a0, 32, 0);
2816        tcg_out_rld(s, RLDICL, a2, a1, 32, 0);
2817
2818        /* a0 = dep(a0, rl32(a2, 8), 0xffffffff)  # hgfe bcda */
2819        tcg_out_rlw(s, RLWIMI, a0, a2, 8, 0, 31);
2820        /* a0 = dep(a0, rl32(a2, 24), 0xff000000) # hgfe dcda */
2821        tcg_out_rlw(s, RLWIMI, a0, a2, 24, 0, 7);
2822        /* a0 = dep(a0, rl32(a2, 24), 0x0000ff00) # hgfe dcba */
2823        tcg_out_rlw(s, RLWIMI, a0, a2, 24, 16, 23);
2824
2825        if (a0 == 0) {
2826            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2827        }
2828        break;
2829
2830    case INDEX_op_deposit_i32:
2831        if (const_args[2]) {
2832            uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
2833            tcg_out_andi32(s, args[0], args[0], ~mask);
2834        } else {
2835            tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
2836                        32 - args[3] - args[4], 31 - args[3]);
2837        }
2838        break;
2839    case INDEX_op_deposit_i64:
2840        if (const_args[2]) {
2841            uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
2842            tcg_out_andi64(s, args[0], args[0], ~mask);
2843        } else {
2844            tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
2845                        64 - args[3] - args[4]);
2846        }
2847        break;
2848
2849    case INDEX_op_extract_i32:
2850        tcg_out_rlw(s, RLWINM, args[0], args[1],
2851                    32 - args[2], 32 - args[3], 31);
2852        break;
2853    case INDEX_op_extract_i64:
2854        tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
2855        break;
2856
2857    case INDEX_op_movcond_i32:
2858        tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
2859                        args[3], args[4], const_args[2]);
2860        break;
2861    case INDEX_op_movcond_i64:
2862        tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2],
2863                        args[3], args[4], const_args[2]);
2864        break;
2865
2866#if TCG_TARGET_REG_BITS == 64
2867    case INDEX_op_add2_i64:
2868#else
2869    case INDEX_op_add2_i32:
2870#endif
2871        /* Note that the CA bit is defined based on the word size of the
2872           environment.  So in 64-bit mode it's always carry-out of bit 63.
2873           The fallback code using deposit works just as well for 32-bit.  */
2874        a0 = args[0], a1 = args[1];
2875        if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
2876            a0 = TCG_REG_R0;
2877        }
2878        if (const_args[4]) {
2879            tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
2880        } else {
2881            tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
2882        }
2883        if (const_args[5]) {
2884            tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
2885        } else {
2886            tcg_out32(s, ADDE | TAB(a1, args[3], args[5]));
2887        }
2888        if (a0 != args[0]) {
2889            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2890        }
2891        break;
2892
2893#if TCG_TARGET_REG_BITS == 64
2894    case INDEX_op_sub2_i64:
2895#else
2896    case INDEX_op_sub2_i32:
2897#endif
2898        a0 = args[0], a1 = args[1];
2899        if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
2900            a0 = TCG_REG_R0;
2901        }
2902        if (const_args[2]) {
2903            tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
2904        } else {
2905            tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
2906        }
2907        if (const_args[3]) {
2908            tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
2909        } else {
2910            tcg_out32(s, SUBFE | TAB(a1, args[5], args[3]));
2911        }
2912        if (a0 != args[0]) {
2913            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2914        }
2915        break;
2916
2917    case INDEX_op_muluh_i32:
2918        tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2]));
2919        break;
2920    case INDEX_op_mulsh_i32:
2921        tcg_out32(s, MULHW | TAB(args[0], args[1], args[2]));
2922        break;
2923    case INDEX_op_muluh_i64:
2924        tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2]));
2925        break;
2926    case INDEX_op_mulsh_i64:
2927        tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
2928        break;
2929
2930    case INDEX_op_mb:
2931        tcg_out_mb(s, args[0]);
2932        break;
2933
2934    case INDEX_op_mov_i32:   /* Always emitted via tcg_out_mov.  */
2935    case INDEX_op_mov_i64:
2936    case INDEX_op_call:      /* Always emitted via tcg_out_call.  */
2937    default:
2938        tcg_abort();
2939    }
2940}
2941
2942int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2943{
2944    switch (opc) {
2945    case INDEX_op_and_vec:
2946    case INDEX_op_or_vec:
2947    case INDEX_op_xor_vec:
2948    case INDEX_op_andc_vec:
2949    case INDEX_op_not_vec:
2950        return 1;
2951    case INDEX_op_orc_vec:
2952        return have_isa_2_07;
2953    case INDEX_op_add_vec:
2954    case INDEX_op_sub_vec:
2955    case INDEX_op_smax_vec:
2956    case INDEX_op_smin_vec:
2957    case INDEX_op_umax_vec:
2958    case INDEX_op_umin_vec:
2959    case INDEX_op_shlv_vec:
2960    case INDEX_op_shrv_vec:
2961    case INDEX_op_sarv_vec:
2962    case INDEX_op_rotlv_vec:
2963        return vece <= MO_32 || have_isa_2_07;
2964    case INDEX_op_ssadd_vec:
2965    case INDEX_op_sssub_vec:
2966    case INDEX_op_usadd_vec:
2967    case INDEX_op_ussub_vec:
2968        return vece <= MO_32;
2969    case INDEX_op_cmp_vec:
2970    case INDEX_op_shli_vec:
2971    case INDEX_op_shri_vec:
2972    case INDEX_op_sari_vec:
2973    case INDEX_op_rotli_vec:
2974        return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
2975    case INDEX_op_neg_vec:
2976        return vece >= MO_32 && have_isa_3_00;
2977    case INDEX_op_mul_vec:
2978        switch (vece) {
2979        case MO_8:
2980        case MO_16:
2981            return -1;
2982        case MO_32:
2983            return have_isa_2_07 ? 1 : -1;
2984        case MO_64:
2985            return have_isa_3_10;
2986        }
2987        return 0;
2988    case INDEX_op_bitsel_vec:
2989        return have_vsx;
2990    case INDEX_op_rotrv_vec:
2991        return -1;
2992    default:
2993        return 0;
2994    }
2995}
2996
2997static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
2998                            TCGReg dst, TCGReg src)
2999{
3000    tcg_debug_assert(dst >= TCG_REG_V0);
3001
3002    /* Splat from integer reg allowed via constraints for v3.00.  */
3003    if (src < TCG_REG_V0) {
3004        tcg_debug_assert(have_isa_3_00);
3005        switch (vece) {
3006        case MO_64:
3007            tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
3008            return true;
3009        case MO_32:
3010            tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
3011            return true;
3012        default:
3013            /* Fail, so that we fall back on either dupm or mov+dup.  */
3014            return false;
3015        }
3016    }
3017
3018    /*
3019     * Recall we use (or emulate) VSX integer loads, so the integer is
3020     * right justified within the left (zero-index) double-word.
3021     */
3022    switch (vece) {
3023    case MO_8:
3024        tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
3025        break;
3026    case MO_16:
3027        tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
3028        break;
3029    case MO_32:
3030        tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
3031        break;
3032    case MO_64:
3033        if (have_vsx) {
3034            tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
3035            break;
3036        }
3037        tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
3038        tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
3039        break;
3040    default:
3041        g_assert_not_reached();
3042    }
3043    return true;
3044}
3045
3046static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
3047                             TCGReg out, TCGReg base, intptr_t offset)
3048{
3049    int elt;
3050
3051    tcg_debug_assert(out >= TCG_REG_V0);
3052    switch (vece) {
3053    case MO_8:
3054        if (have_isa_3_00) {
3055            tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
3056        } else {
3057            tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
3058        }
3059        elt = extract32(offset, 0, 4);
3060#ifndef HOST_WORDS_BIGENDIAN
3061        elt ^= 15;
3062#endif
3063        tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
3064        break;
3065    case MO_16:
3066        tcg_debug_assert((offset & 1) == 0);
3067        if (have_isa_3_00) {
3068            tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
3069        } else {
3070            tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
3071        }
3072        elt = extract32(offset, 1, 3);
3073#ifndef HOST_WORDS_BIGENDIAN
3074        elt ^= 7;
3075#endif
3076        tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
3077        break;
3078    case MO_32:
3079        if (have_isa_3_00) {
3080            tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
3081            break;
3082        }
3083        tcg_debug_assert((offset & 3) == 0);
3084        tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
3085        elt = extract32(offset, 2, 2);
3086#ifndef HOST_WORDS_BIGENDIAN
3087        elt ^= 3;
3088#endif
3089        tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
3090        break;
3091    case MO_64:
3092        if (have_vsx) {
3093            tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
3094            break;
3095        }
3096        tcg_debug_assert((offset & 7) == 0);
3097        tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
3098        tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
3099        elt = extract32(offset, 3, 1);
3100#ifndef HOST_WORDS_BIGENDIAN
3101        elt = !elt;
3102#endif
3103        if (elt) {
3104            tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
3105        } else {
3106            tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
3107        }
3108        break;
3109    default:
3110        g_assert_not_reached();
3111    }
3112    return true;
3113}
3114
3115static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
3116                           unsigned vecl, unsigned vece,
3117                           const TCGArg args[TCG_MAX_OP_ARGS],
3118                           const int const_args[TCG_MAX_OP_ARGS])
3119{
3120    static const uint32_t
3121        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
3122        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
3123        mul_op[4] = { 0, 0, VMULUWM, VMULLD },
3124        neg_op[4] = { 0, 0, VNEGW, VNEGD },
3125        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
3126        ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
3127        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
3128        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
3129        ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
3130        usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
3131        sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
3132        ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
3133        umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
3134        smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
3135        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
3136        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
3137        shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
3138        shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
3139        sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
3140        mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
3141        mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
3142        muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
3143        mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
3144        pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
3145        rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
3146
3147    TCGType type = vecl + TCG_TYPE_V64;
3148    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
3149    uint32_t insn;
3150
3151    switch (opc) {
3152    case INDEX_op_ld_vec:
3153        tcg_out_ld(s, type, a0, a1, a2);
3154        return;
3155    case INDEX_op_st_vec:
3156        tcg_out_st(s, type, a0, a1, a2);
3157        return;
3158    case INDEX_op_dupm_vec:
3159        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
3160        return;
3161
3162    case INDEX_op_add_vec:
3163        insn = add_op[vece];
3164        break;
3165    case INDEX_op_sub_vec:
3166        insn = sub_op[vece];
3167        break;
3168    case INDEX_op_neg_vec:
3169        insn = neg_op[vece];
3170        a2 = a1;
3171        a1 = 0;
3172        break;
3173    case INDEX_op_mul_vec:
3174        insn = mul_op[vece];
3175        break;
3176    case INDEX_op_ssadd_vec:
3177        insn = ssadd_op[vece];
3178        break;
3179    case INDEX_op_sssub_vec:
3180        insn = sssub_op[vece];
3181        break;
3182    case INDEX_op_usadd_vec:
3183        insn = usadd_op[vece];
3184        break;
3185    case INDEX_op_ussub_vec:
3186        insn = ussub_op[vece];
3187        break;
3188    case INDEX_op_smin_vec:
3189        insn = smin_op[vece];
3190        break;
3191    case INDEX_op_umin_vec:
3192        insn = umin_op[vece];
3193        break;
3194    case INDEX_op_smax_vec:
3195        insn = smax_op[vece];
3196        break;
3197    case INDEX_op_umax_vec:
3198        insn = umax_op[vece];
3199        break;
3200    case INDEX_op_shlv_vec:
3201        insn = shlv_op[vece];
3202        break;
3203    case INDEX_op_shrv_vec:
3204        insn = shrv_op[vece];
3205        break;
3206    case INDEX_op_sarv_vec:
3207        insn = sarv_op[vece];
3208        break;
3209    case INDEX_op_and_vec:
3210        insn = VAND;
3211        break;
3212    case INDEX_op_or_vec:
3213        insn = VOR;
3214        break;
3215    case INDEX_op_xor_vec:
3216        insn = VXOR;
3217        break;
3218    case INDEX_op_andc_vec:
3219        insn = VANDC;
3220        break;
3221    case INDEX_op_not_vec:
3222        insn = VNOR;
3223        a2 = a1;
3224        break;
3225    case INDEX_op_orc_vec:
3226        insn = VORC;
3227        break;
3228
3229    case INDEX_op_cmp_vec:
3230        switch (args[3]) {
3231        case TCG_COND_EQ:
3232            insn = eq_op[vece];
3233            break;
3234        case TCG_COND_NE:
3235            insn = ne_op[vece];
3236            break;
3237        case TCG_COND_GT:
3238            insn = gts_op[vece];
3239            break;
3240        case TCG_COND_GTU:
3241            insn = gtu_op[vece];
3242            break;
3243        default:
3244            g_assert_not_reached();
3245        }
3246        break;
3247
3248    case INDEX_op_bitsel_vec:
3249        tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
3250        return;
3251
3252    case INDEX_op_dup2_vec:
3253        assert(TCG_TARGET_REG_BITS == 32);
3254        /* With inputs a1 = xLxx, a2 = xHxx  */
3255        tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1));  /* a0  = xxHL */
3256        tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8);          /* tmp = HLxx */
3257        tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8);          /* a0  = HLHL */
3258        return;
3259
3260    case INDEX_op_ppc_mrgh_vec:
3261        insn = mrgh_op[vece];
3262        break;
3263    case INDEX_op_ppc_mrgl_vec:
3264        insn = mrgl_op[vece];
3265        break;
3266    case INDEX_op_ppc_muleu_vec:
3267        insn = muleu_op[vece];
3268        break;
3269    case INDEX_op_ppc_mulou_vec:
3270        insn = mulou_op[vece];
3271        break;
3272    case INDEX_op_ppc_pkum_vec:
3273        insn = pkum_op[vece];
3274        break;
3275    case INDEX_op_rotlv_vec:
3276        insn = rotl_op[vece];
3277        break;
3278    case INDEX_op_ppc_msum_vec:
3279        tcg_debug_assert(vece == MO_16);
3280        tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
3281        return;
3282
3283    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
3284    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
3285    default:
3286        g_assert_not_reached();
3287    }
3288
3289    tcg_debug_assert(insn != 0);
3290    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
3291}
3292
3293static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
3294                           TCGv_vec v1, TCGArg imm, TCGOpcode opci)
3295{
3296    TCGv_vec t1;
3297
3298    if (vece == MO_32) {
3299        /*
3300         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
3301         * So using negative numbers gets us the 4th bit easily.
3302         */
3303        imm = sextract32(imm, 0, 5);
3304    } else {
3305        imm &= (8 << vece) - 1;
3306    }
3307
3308    /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */
3309    t1 = tcg_constant_vec(type, MO_8, imm);
3310    vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
3311              tcgv_vec_arg(v1), tcgv_vec_arg(t1));
3312}
3313
3314static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
3315                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
3316{
3317    bool need_swap = false, need_inv = false;
3318
3319    tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
3320
3321    switch (cond) {
3322    case TCG_COND_EQ:
3323    case TCG_COND_GT:
3324    case TCG_COND_GTU:
3325        break;
3326    case TCG_COND_NE:
3327        if (have_isa_3_00 && vece <= MO_32) {
3328            break;
3329        }
3330        /* fall through */
3331    case TCG_COND_LE:
3332    case TCG_COND_LEU:
3333        need_inv = true;
3334        break;
3335    case TCG_COND_LT:
3336    case TCG_COND_LTU:
3337        need_swap = true;
3338        break;
3339    case TCG_COND_GE:
3340    case TCG_COND_GEU:
3341        need_swap = need_inv = true;
3342        break;
3343    default:
3344        g_assert_not_reached();
3345    }
3346
3347    if (need_inv) {
3348        cond = tcg_invert_cond(cond);
3349    }
3350    if (need_swap) {
3351        TCGv_vec t1;
3352        t1 = v1, v1 = v2, v2 = t1;
3353        cond = tcg_swap_cond(cond);
3354    }
3355
3356    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
3357              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
3358
3359    if (need_inv) {
3360        tcg_gen_not_vec(vece, v0, v0);
3361    }
3362}
3363
3364static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
3365                           TCGv_vec v1, TCGv_vec v2)
3366{
3367    TCGv_vec t1 = tcg_temp_new_vec(type);
3368    TCGv_vec t2 = tcg_temp_new_vec(type);
3369    TCGv_vec c0, c16;
3370
3371    switch (vece) {
3372    case MO_8:
3373    case MO_16:
3374        vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
3375                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3376        vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
3377                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3378        vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
3379                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3380        vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
3381                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3382        vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
3383                  tcgv_vec_arg(v0), tcgv_vec_arg(t1));
3384	break;
3385
3386    case MO_32:
3387        tcg_debug_assert(!have_isa_2_07);
3388        /*
3389         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
3390         * So using -16 is a quick way to represent 16.
3391         */
3392        c16 = tcg_constant_vec(type, MO_8, -16);
3393        c0 = tcg_constant_vec(type, MO_8, 0);
3394
3395        vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
3396                  tcgv_vec_arg(v2), tcgv_vec_arg(c16));
3397        vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
3398                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3399        vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1),
3400                  tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0));
3401        vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1),
3402                  tcgv_vec_arg(t1), tcgv_vec_arg(c16));
3403        tcg_gen_add_vec(MO_32, v0, t1, t2);
3404        break;
3405
3406    default:
3407        g_assert_not_reached();
3408    }
3409    tcg_temp_free_vec(t1);
3410    tcg_temp_free_vec(t2);
3411}
3412
3413void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3414                       TCGArg a0, ...)
3415{
3416    va_list va;
3417    TCGv_vec v0, v1, v2, t0;
3418    TCGArg a2;
3419
3420    va_start(va, a0);
3421    v0 = temp_tcgv_vec(arg_temp(a0));
3422    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3423    a2 = va_arg(va, TCGArg);
3424
3425    switch (opc) {
3426    case INDEX_op_shli_vec:
3427        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
3428        break;
3429    case INDEX_op_shri_vec:
3430        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
3431        break;
3432    case INDEX_op_sari_vec:
3433        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
3434        break;
3435    case INDEX_op_rotli_vec:
3436        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
3437        break;
3438    case INDEX_op_cmp_vec:
3439        v2 = temp_tcgv_vec(arg_temp(a2));
3440        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3441        break;
3442    case INDEX_op_mul_vec:
3443        v2 = temp_tcgv_vec(arg_temp(a2));
3444        expand_vec_mul(type, vece, v0, v1, v2);
3445        break;
3446    case INDEX_op_rotlv_vec:
3447        v2 = temp_tcgv_vec(arg_temp(a2));
3448        t0 = tcg_temp_new_vec(type);
3449        tcg_gen_neg_vec(vece, t0, v2);
3450        tcg_gen_rotlv_vec(vece, v0, v1, t0);
3451        tcg_temp_free_vec(t0);
3452        break;
3453    default:
3454        g_assert_not_reached();
3455    }
3456    va_end(va);
3457}
3458
3459static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
3460{
3461    switch (op) {
3462    case INDEX_op_goto_ptr:
3463        return C_O0_I1(r);
3464
3465    case INDEX_op_ld8u_i32:
3466    case INDEX_op_ld8s_i32:
3467    case INDEX_op_ld16u_i32:
3468    case INDEX_op_ld16s_i32:
3469    case INDEX_op_ld_i32:
3470    case INDEX_op_ctpop_i32:
3471    case INDEX_op_neg_i32:
3472    case INDEX_op_not_i32:
3473    case INDEX_op_ext8s_i32:
3474    case INDEX_op_ext16s_i32:
3475    case INDEX_op_bswap16_i32:
3476    case INDEX_op_bswap32_i32:
3477    case INDEX_op_extract_i32:
3478    case INDEX_op_ld8u_i64:
3479    case INDEX_op_ld8s_i64:
3480    case INDEX_op_ld16u_i64:
3481    case INDEX_op_ld16s_i64:
3482    case INDEX_op_ld32u_i64:
3483    case INDEX_op_ld32s_i64:
3484    case INDEX_op_ld_i64:
3485    case INDEX_op_ctpop_i64:
3486    case INDEX_op_neg_i64:
3487    case INDEX_op_not_i64:
3488    case INDEX_op_ext8s_i64:
3489    case INDEX_op_ext16s_i64:
3490    case INDEX_op_ext32s_i64:
3491    case INDEX_op_ext_i32_i64:
3492    case INDEX_op_extu_i32_i64:
3493    case INDEX_op_bswap16_i64:
3494    case INDEX_op_bswap32_i64:
3495    case INDEX_op_bswap64_i64:
3496    case INDEX_op_extract_i64:
3497        return C_O1_I1(r, r);
3498
3499    case INDEX_op_st8_i32:
3500    case INDEX_op_st16_i32:
3501    case INDEX_op_st_i32:
3502    case INDEX_op_st8_i64:
3503    case INDEX_op_st16_i64:
3504    case INDEX_op_st32_i64:
3505    case INDEX_op_st_i64:
3506        return C_O0_I2(r, r);
3507
3508    case INDEX_op_add_i32:
3509    case INDEX_op_and_i32:
3510    case INDEX_op_or_i32:
3511    case INDEX_op_xor_i32:
3512    case INDEX_op_andc_i32:
3513    case INDEX_op_orc_i32:
3514    case INDEX_op_eqv_i32:
3515    case INDEX_op_shl_i32:
3516    case INDEX_op_shr_i32:
3517    case INDEX_op_sar_i32:
3518    case INDEX_op_rotl_i32:
3519    case INDEX_op_rotr_i32:
3520    case INDEX_op_setcond_i32:
3521    case INDEX_op_and_i64:
3522    case INDEX_op_andc_i64:
3523    case INDEX_op_shl_i64:
3524    case INDEX_op_shr_i64:
3525    case INDEX_op_sar_i64:
3526    case INDEX_op_rotl_i64:
3527    case INDEX_op_rotr_i64:
3528    case INDEX_op_setcond_i64:
3529        return C_O1_I2(r, r, ri);
3530
3531    case INDEX_op_mul_i32:
3532    case INDEX_op_mul_i64:
3533        return C_O1_I2(r, r, rI);
3534
3535    case INDEX_op_div_i32:
3536    case INDEX_op_divu_i32:
3537    case INDEX_op_nand_i32:
3538    case INDEX_op_nor_i32:
3539    case INDEX_op_muluh_i32:
3540    case INDEX_op_mulsh_i32:
3541    case INDEX_op_orc_i64:
3542    case INDEX_op_eqv_i64:
3543    case INDEX_op_nand_i64:
3544    case INDEX_op_nor_i64:
3545    case INDEX_op_div_i64:
3546    case INDEX_op_divu_i64:
3547    case INDEX_op_mulsh_i64:
3548    case INDEX_op_muluh_i64:
3549        return C_O1_I2(r, r, r);
3550
3551    case INDEX_op_sub_i32:
3552        return C_O1_I2(r, rI, ri);
3553    case INDEX_op_add_i64:
3554        return C_O1_I2(r, r, rT);
3555    case INDEX_op_or_i64:
3556    case INDEX_op_xor_i64:
3557        return C_O1_I2(r, r, rU);
3558    case INDEX_op_sub_i64:
3559        return C_O1_I2(r, rI, rT);
3560    case INDEX_op_clz_i32:
3561    case INDEX_op_ctz_i32:
3562    case INDEX_op_clz_i64:
3563    case INDEX_op_ctz_i64:
3564        return C_O1_I2(r, r, rZW);
3565
3566    case INDEX_op_brcond_i32:
3567    case INDEX_op_brcond_i64:
3568        return C_O0_I2(r, ri);
3569
3570    case INDEX_op_movcond_i32:
3571    case INDEX_op_movcond_i64:
3572        return C_O1_I4(r, r, ri, rZ, rZ);
3573    case INDEX_op_deposit_i32:
3574    case INDEX_op_deposit_i64:
3575        return C_O1_I2(r, 0, rZ);
3576    case INDEX_op_brcond2_i32:
3577        return C_O0_I4(r, r, ri, ri);
3578    case INDEX_op_setcond2_i32:
3579        return C_O1_I4(r, r, r, ri, ri);
3580    case INDEX_op_add2_i64:
3581    case INDEX_op_add2_i32:
3582        return C_O2_I4(r, r, r, r, rI, rZM);
3583    case INDEX_op_sub2_i64:
3584    case INDEX_op_sub2_i32:
3585        return C_O2_I4(r, r, rI, rZM, r, r);
3586
3587    case INDEX_op_qemu_ld_i32:
3588        return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
3589                ? C_O1_I1(r, L)
3590                : C_O1_I2(r, L, L));
3591
3592    case INDEX_op_qemu_st_i32:
3593        return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
3594                ? C_O0_I2(S, S)
3595                : C_O0_I3(S, S, S));
3596
3597    case INDEX_op_qemu_ld_i64:
3598        return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
3599                : TARGET_LONG_BITS == 32 ? C_O2_I1(L, L, L)
3600                : C_O2_I2(L, L, L, L));
3601
3602    case INDEX_op_qemu_st_i64:
3603        return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(S, S)
3604                : TARGET_LONG_BITS == 32 ? C_O0_I3(S, S, S)
3605                : C_O0_I4(S, S, S, S));
3606
3607    case INDEX_op_add_vec:
3608    case INDEX_op_sub_vec:
3609    case INDEX_op_mul_vec:
3610    case INDEX_op_and_vec:
3611    case INDEX_op_or_vec:
3612    case INDEX_op_xor_vec:
3613    case INDEX_op_andc_vec:
3614    case INDEX_op_orc_vec:
3615    case INDEX_op_cmp_vec:
3616    case INDEX_op_ssadd_vec:
3617    case INDEX_op_sssub_vec:
3618    case INDEX_op_usadd_vec:
3619    case INDEX_op_ussub_vec:
3620    case INDEX_op_smax_vec:
3621    case INDEX_op_smin_vec:
3622    case INDEX_op_umax_vec:
3623    case INDEX_op_umin_vec:
3624    case INDEX_op_shlv_vec:
3625    case INDEX_op_shrv_vec:
3626    case INDEX_op_sarv_vec:
3627    case INDEX_op_rotlv_vec:
3628    case INDEX_op_rotrv_vec:
3629    case INDEX_op_ppc_mrgh_vec:
3630    case INDEX_op_ppc_mrgl_vec:
3631    case INDEX_op_ppc_muleu_vec:
3632    case INDEX_op_ppc_mulou_vec:
3633    case INDEX_op_ppc_pkum_vec:
3634    case INDEX_op_dup2_vec:
3635        return C_O1_I2(v, v, v);
3636
3637    case INDEX_op_not_vec:
3638    case INDEX_op_neg_vec:
3639        return C_O1_I1(v, v);
3640
3641    case INDEX_op_dup_vec:
3642        return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v);
3643
3644    case INDEX_op_ld_vec:
3645    case INDEX_op_dupm_vec:
3646        return C_O1_I1(v, r);
3647
3648    case INDEX_op_st_vec:
3649        return C_O0_I2(v, r);
3650
3651    case INDEX_op_bitsel_vec:
3652    case INDEX_op_ppc_msum_vec:
3653        return C_O1_I3(v, v, v, v);
3654
3655    default:
3656        g_assert_not_reached();
3657    }
3658}
3659
3660static void tcg_target_init(TCGContext *s)
3661{
3662    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
3663    unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2);
3664
3665    have_isa = tcg_isa_base;
3666    if (hwcap & PPC_FEATURE_ARCH_2_06) {
3667        have_isa = tcg_isa_2_06;
3668    }
3669#ifdef PPC_FEATURE2_ARCH_2_07
3670    if (hwcap2 & PPC_FEATURE2_ARCH_2_07) {
3671        have_isa = tcg_isa_2_07;
3672    }
3673#endif
3674#ifdef PPC_FEATURE2_ARCH_3_00
3675    if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
3676        have_isa = tcg_isa_3_00;
3677    }
3678#endif
3679#ifdef PPC_FEATURE2_ARCH_3_10
3680    if (hwcap2 & PPC_FEATURE2_ARCH_3_10) {
3681        have_isa = tcg_isa_3_10;
3682    }
3683#endif
3684
3685#ifdef PPC_FEATURE2_HAS_ISEL
3686    /* Prefer explicit instruction from the kernel. */
3687    have_isel = (hwcap2 & PPC_FEATURE2_HAS_ISEL) != 0;
3688#else
3689    /* Fall back to knowing Power7 (2.06) has ISEL. */
3690    have_isel = have_isa_2_06;
3691#endif
3692
3693    if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
3694        have_altivec = true;
3695        /* We only care about the portion of VSX that overlaps Altivec. */
3696        if (hwcap & PPC_FEATURE_HAS_VSX) {
3697            have_vsx = true;
3698        }
3699    }
3700
3701    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
3702    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
3703    if (have_altivec) {
3704        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3705        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3706    }
3707
3708    tcg_target_call_clobber_regs = 0;
3709    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3710    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3711    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3712    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3713    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3714    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3715    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7);
3716    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
3717    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
3718    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
3719    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
3720    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
3721
3722    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3723    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3724    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3725    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3726    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3727    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3728    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3729    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3730    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3731    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3732    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3733    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3734    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3735    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3736    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3737    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3738    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3739    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3740    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3741    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3742
3743    s->reserved_regs = 0;
3744    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
3745    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
3746#if defined(_CALL_SYSV)
3747    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
3748#endif
3749#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
3750    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
3751#endif
3752    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
3753    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
3754    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
3755    if (USE_REG_TB) {
3756        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);  /* tb->tc_ptr */
3757    }
3758}
3759
3760#ifdef __ELF__
3761typedef struct {
3762    DebugFrameCIE cie;
3763    DebugFrameFDEHeader fde;
3764    uint8_t fde_def_cfa[4];
3765    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
3766} DebugFrame;
3767
3768/* We're expecting a 2 byte uleb128 encoded value.  */
3769QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3770
3771#if TCG_TARGET_REG_BITS == 64
3772# define ELF_HOST_MACHINE EM_PPC64
3773#else
3774# define ELF_HOST_MACHINE EM_PPC
3775#endif
3776
3777static DebugFrame debug_frame = {
3778    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3779    .cie.id = -1,
3780    .cie.version = 1,
3781    .cie.code_align = 1,
3782    .cie.data_align = (-SZR & 0x7f),         /* sleb128 -SZR */
3783    .cie.return_column = 65,
3784
3785    /* Total FDE size does not include the "len" member.  */
3786    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
3787
3788    .fde_def_cfa = {
3789        12, TCG_REG_R1,                 /* DW_CFA_def_cfa r1, ... */
3790        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3791        (FRAME_SIZE >> 7)
3792    },
3793    .fde_reg_ofs = {
3794        /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
3795        0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
3796    }
3797};
3798
3799void tcg_register_jit(const void *buf, size_t buf_size)
3800{
3801    uint8_t *p = &debug_frame.fde_reg_ofs[3];
3802    int i;
3803
3804    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
3805        p[0] = 0x80 + tcg_target_callee_save_regs[i];
3806        p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
3807    }
3808
3809    debug_frame.fde.func_start = (uintptr_t)buf;
3810    debug_frame.fde.func_len = buf_size;
3811
3812    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3813}
3814#endif /* __ELF__ */
3815